diff --git a/cpp/bench_mark/bench_mark_src/bench_conf.h b/cpp/bench_mark/bench_mark_src/bench_conf.h index 9d961d729..f2a4de754 100644 --- a/cpp/bench_mark/bench_mark_src/bench_conf.h +++ b/cpp/bench_mark/bench_mark_src/bench_conf.h @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + #include namespace bench { @@ -23,4 +24,5 @@ int LOOP_NUM = 100000; int THREAD_NUM = 1; int TIMESERIES_NUM = 50; std::vector TYPE_LIST = {0, 0, 1, 0, 1}; -} // namespace bench \ No newline at end of file +} // namespace bench + diff --git a/cpp/examples/c_examples/c_examples.h b/cpp/examples/c_examples/c_examples.h index d29090cb2..a04051385 100644 --- a/cpp/examples/c_examples/c_examples.h +++ b/cpp/examples/c_examples/c_examples.h @@ -17,7 +17,6 @@ * under the License. */ - #include "cwrapper/TsFile-cwrapper.h" #ifdef __cplusplus @@ -29,4 +28,5 @@ ErrorCode read_tsfile(); #ifdef __cplusplus } -#endif \ No newline at end of file +#endif + diff --git a/cpp/pom.xml b/cpp/pom.xml index 3390c0f43..653246167 100644 --- a/cpp/pom.xml +++ b/cpp/pom.xml @@ -22,7 +22,7 @@ org.apache.tsfile tsfile-parent - 1.0.1-SNAPSHOT + 1.2.0-SNAPSHOT tsfile-cpp pom diff --git a/cpp/src/reader/query_data_set.h b/cpp/src/reader/query_data_set.h index 858427660..abea04af8 100644 --- a/cpp/src/reader/query_data_set.h +++ b/cpp/src/reader/query_data_set.h @@ -34,3 +34,4 @@ class QueryDataSet { } // namespace storage #endif // READER_QUERY_DATA_SET_H + diff --git a/java/common/pom.xml b/java/common/pom.xml index 91b24366f..fe503903c 100644 --- a/java/common/pom.xml +++ b/java/common/pom.xml @@ -24,7 +24,7 @@ org.apache.tsfile tsfile-java - 1.0.1-SNAPSHOT + 1.2.0-SNAPSHOT common TsFile: Java: Common diff --git a/java/common/src/main/java/org/apache/tsfile/block/column/Column.java b/java/common/src/main/java/org/apache/tsfile/block/column/Column.java index 161a2c8eb..f92d3b5c0 100644 --- a/java/common/src/main/java/org/apache/tsfile/block/column/Column.java +++ b/java/common/src/main/java/org/apache/tsfile/block/column/Column.java @@ -23,6 +23,8 @@ import org.apache.tsfile.utils.Binary; import org.apache.tsfile.utils.TsPrimitiveType; +import java.util.Arrays; + public interface Column { /** Get the data type. */ @@ -124,6 +126,14 @@ default TsPrimitiveType getTsPrimitiveType(int position) { /** Returns the array to determine whether each position of the column is null or not. */ boolean[] isNull(); + /** + * Set the given range as null. + * + * @param start start position (inclusive) + * @param end end position (exclusive) + */ + void setNull(int start, int end); + /** Returns the number of positions in this block. */ int getPositionCount(); @@ -164,4 +174,14 @@ default TsPrimitiveType getTsPrimitiveType(int position) { void reverse(); int getInstanceSize(); + + void setPositionCount(int count); + + default void reset() { + setPositionCount(0); + final boolean[] isNulls = isNull(); + if (isNulls != null) { + Arrays.fill(isNulls, false); + } + } } diff --git a/java/examples/pom.xml b/java/examples/pom.xml index 8f215aaa2..b27fd5bf9 100644 --- a/java/examples/pom.xml +++ b/java/examples/pom.xml @@ -24,7 +24,7 @@ org.apache.tsfile tsfile-java - 1.0.1-SNAPSHOT + 1.2.0-SNAPSHOT examples pom @@ -38,7 +38,7 @@ org.apache.tsfile tsfile - 1.0.1-SNAPSHOT + 1.2.0-SNAPSHOT diff --git a/java/examples/src/main/java/org/apache/tsfile/TsFileWriteAlignedWithTSRecord.java b/java/examples/src/main/java/org/apache/tsfile/TsFileWriteAlignedWithTSRecord.java index d5a5c523d..f7d1bc7b4 100644 --- a/java/examples/src/main/java/org/apache/tsfile/TsFileWriteAlignedWithTSRecord.java +++ b/java/examples/src/main/java/org/apache/tsfile/TsFileWriteAlignedWithTSRecord.java @@ -55,7 +55,7 @@ public static void main(String[] args) throws IOException { } try (TsFileWriter tsFileWriter = new TsFileWriter(f)) { - List measurementSchemas = new ArrayList<>(); + List measurementSchemas = new ArrayList<>(); measurementSchemas.add( new MeasurementSchema(Constant.SENSOR_1, TSDataType.INT64, TSEncoding.RLE)); measurementSchemas.add( @@ -84,7 +84,7 @@ public static void main(String[] args) throws IOException { private static void writeAligned( TsFileWriter tsFileWriter, String deviceId, - List schemas, + List schemas, long rowSize, long startTime, long startValue) diff --git a/java/examples/src/main/java/org/apache/tsfile/TsFileWriteAlignedWithTablet.java b/java/examples/src/main/java/org/apache/tsfile/TsFileWriteAlignedWithTablet.java index 7f7f6f363..73704ca06 100644 --- a/java/examples/src/main/java/org/apache/tsfile/TsFileWriteAlignedWithTablet.java +++ b/java/examples/src/main/java/org/apache/tsfile/TsFileWriteAlignedWithTablet.java @@ -26,6 +26,7 @@ import org.apache.tsfile.read.common.Path; import org.apache.tsfile.write.TsFileWriter; import org.apache.tsfile.write.record.Tablet; +import org.apache.tsfile.write.schema.IMeasurementSchema; import org.apache.tsfile.write.schema.MeasurementSchema; import org.slf4j.Logger; @@ -56,7 +57,8 @@ public static void main(String[] args) throws IOException { } try (TsFileWriter tsFileWriter = new TsFileWriter(f)) { - List measurementSchemas = new ArrayList<>(); + + List measurementSchemas = new ArrayList<>(); measurementSchemas.add( new MeasurementSchema(Constant.SENSOR_1, TSDataType.INT64, TSEncoding.PLAIN)); measurementSchemas.add( @@ -87,7 +89,7 @@ public static void main(String[] args) throws IOException { private static void writeAlignedWithTablet( TsFileWriter tsFileWriter, String deviceId, - List schemas, + List schemas, long rowNum, long startTime, long startValue) @@ -127,7 +129,7 @@ private static void writeNonAlignedWithTablet(TsFileWriter tsFileWriter) tsFileWriter.registerTimeseries( new Path(DEVICE_2), new MeasurementSchema(SENSOR_2, TSDataType.INT64, TSEncoding.RLE)); // construct Tablet - List measurementSchemas = new ArrayList<>(); + List measurementSchemas = new ArrayList<>(); measurementSchemas.add(new MeasurementSchema(SENSOR_1, TSDataType.INT64, TSEncoding.RLE)); measurementSchemas.add(new MeasurementSchema(SENSOR_2, TSDataType.INT64, TSEncoding.RLE)); Tablet tablet = new Tablet(DEVICE_2, measurementSchemas); diff --git a/java/examples/src/main/java/org/apache/tsfile/TsFileWriteWithTSRecord.java b/java/examples/src/main/java/org/apache/tsfile/TsFileWriteWithTSRecord.java index 4234c830e..79f9fd527 100644 --- a/java/examples/src/main/java/org/apache/tsfile/TsFileWriteWithTSRecord.java +++ b/java/examples/src/main/java/org/apache/tsfile/TsFileWriteWithTSRecord.java @@ -56,7 +56,7 @@ public static void main(String[] args) { } try (TsFileWriter tsFileWriter = new TsFileWriter(f)) { - List schemas = new ArrayList<>(); + List schemas = new ArrayList<>(); schemas.add(new MeasurementSchema(Constant.SENSOR_1, TSDataType.INT64, TSEncoding.RLE)); schemas.add(new MeasurementSchema(Constant.SENSOR_2, TSDataType.INT64, TSEncoding.RLE)); schemas.add(new MeasurementSchema(Constant.SENSOR_3, TSDataType.INT64, TSEncoding.RLE)); @@ -80,7 +80,7 @@ public static void main(String[] args) { private static void write( TsFileWriter tsFileWriter, String deviceId, - List schemas, + List schemas, long rowSize, long startTime, long startValue) diff --git a/java/examples/src/main/java/org/apache/tsfile/TsFileWriteWithTablet.java b/java/examples/src/main/java/org/apache/tsfile/TsFileWriteWithTablet.java index a9c572e20..3ab95a187 100644 --- a/java/examples/src/main/java/org/apache/tsfile/TsFileWriteWithTablet.java +++ b/java/examples/src/main/java/org/apache/tsfile/TsFileWriteWithTablet.java @@ -51,7 +51,7 @@ public static void main(String[] args) { } try (TsFileWriter tsFileWriter = new TsFileWriter(f)) { - List measurementSchemas = new ArrayList<>(); + List measurementSchemas = new ArrayList<>(); measurementSchemas.add( new MeasurementSchema(Constant.SENSOR_1, TSDataType.INT64, TSEncoding.PLAIN)); measurementSchemas.add( @@ -81,7 +81,7 @@ public static void main(String[] args) { private static void writeWithTablet( TsFileWriter tsFileWriter, String deviceId, - List schemas, + List schemas, long rowNum, long startTime, long startValue) diff --git a/java/pom.xml b/java/pom.xml index 9f207cd77..24b2cb59a 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -24,16 +24,17 @@ org.apache.tsfile tsfile-parent - 1.0.1-SNAPSHOT + 1.2.0-SNAPSHOT tsfile-java - 1.0.1-SNAPSHOT + 1.2.0-SNAPSHOT pom TsFile: Java common tsfile examples + tools diff --git a/java/tools/README-zh.md b/java/tools/README-zh.md new file mode 100644 index 000000000..e0b8d7d3a --- /dev/null +++ b/java/tools/README-zh.md @@ -0,0 +1,126 @@ + + +[English](./README.md) | [中文](./README-zh.md) +# TsFile Tools 手册 +## 简介 + +## 开发 + +### 前置条件 + +构建 Java 版的 TsFile Tools,必须要安装以下依赖: + +1. Java >= 1.8 (1.8, 11 到 17 都经过验证. 请确保设置了环境变量). +2. Maven >= 3.6 (如果要从源代码编译TsFile). + + +### 使用 maven 构建 + +``` +mvn clean package -P with-java -DskipTests +``` + +### 安装到本地机器 + +``` +mvn install -P with-java -DskipTests +``` + +## schema 定义 + +| 参数 | 说明 | 是否必填 | 默认值 | +|------------|--------------------------|------|------| +| table_name | 表名 | 是 | | +| time_precision | 时间精度(可选值有:ms/us/ns) | 否 | ms | +| has_header | 是否包含表头 (可选值有:true/false) | 否 | true | +| separator | 行内分隔符(可选值有:, /tab/ ;) | 否 | , | +| null_format | 空值 | 否 | | +| id_columns | 主键列,支持cvs中不存在的列做为层级 | 否 | | +| time_column | 时间列 | 是 | | +| csv_columns | 按照顺序与csv列一一对应 | 是 | | + +说明: + +id_columns 按照顺序进行设置值,支持csv 文件中不存在的列作为层级 +例如csv 只有a,b,c,d,time五列则 +id_columns +a1 default aa +a +其中a1 不在csv列,为虚拟列,默认值为aa + +csv_columns 之后的内容为值列的定义,每一行的第一个字段为在tsfile中的测点名,第二个字段为类型 +当csv中某一列不需要写入 tsfile时,可以设置为 SKIP +例: +csv_columns +地区 TEXT, +厂号 TEXT, +设备号 TEXT, +SKIP, +SKIP, +时间 INT64, +温度 FLOAT, +排量 DOUBLE, + +### 数据示例 +csv 文件内容 +``` +地区, 厂号, 设备号, 型号, 维修周期, 时间, 温度, 排量 +河北, 1001, 1, 10, 1, 1, 80.0, 1000.0 +河北, 1001, 1, 10, 1, 4, 80.0, 1000.0 +河北, 1002, 7, 5, 2, 1, 90.0, 1200.0 +``` +schema 定义 + +``` +table_name=root.db1 +time_precision=ms +has_header=true +separator=, +null_format=\N + + +id_columns +集团 DEFAULT 大唐 +地区 +厂号 +设备号 + +time_column=时间 + +csv_columns +地区 TEXT, +厂号 TEXT, +设备号 TEXT, +SKIP, +SKIP, +时间 INT64, +温度 FLOAT, +排量 DOUBLE, +``` +## 命令 + +``` +csv2tsfile.sh --source ./xxx/xxx --target /xxx/xxx --fail_dir /xxx/xxx +csv2tsfile.bat --source ./xxx/xxx --target /xxx/xxx --fail_dir /xxx/xxx +``` + + diff --git a/java/tools/README.md b/java/tools/README.md new file mode 100644 index 000000000..97858fde5 --- /dev/null +++ b/java/tools/README.md @@ -0,0 +1,127 @@ + + +[English](./README.md) | [Chinese](./README-zh.md) +# TsFile Tools Manual +## Introduction + +## Development + +### Prerequisites + +To build the Java version of TsFile Tools, you must have the following dependencies installed: + +1. Java >= 1.8 (1.8, 11 to 17 are verified. Make sure the environment variable is set). +2. Maven >= 3.6 (if you are compiling TsFile from source). + +### Build with Maven + +```sh +mvn clean package -P with-java -DskipTests +``` + +### Install to local machine + +``` +mvn install -P with-java -DskipTests +``` + +## schema 定义 +| Parameter | Description | Required | Default | +|----------------|--------------------------|----------|------| +| table_name | Table name | Yes | | +| time_precision | Time precision (options: ms/us/ns) | No | ms | +| has_header | Whether it contains a header (options: true/false) | No | true | +| separator | Delimiter (options: , /tab/ ;) | No | , | +| null_format | Null value | No | | +| id_columns | Primary key columns, supports columns not in the CSV as hierarchy | No | | +| time_column | Time column | Yes | | +| csv_columns | Corresponding columns in the CSV in order | Yes | | + +Explanation: + +The "id_columns" sets values in order and supports using columns that do not exist in the CSV file as levels. +For example, if the CSV file has only five columns: "a", "b", "c", "d", and "time", +id_columns +a1 default aa +a +Among them, a1 is not in the CSV column and is a virtual column with a default value of aa + +The content after csv_columns is the definition of the value column, with the first field in each row being the measurement point name in tsfile and the second field being the type +When a column in CSV does not need to be written to tsfile, it can be set to SKIP. + +Example: +csv_columns +Region TEXT, +Factory Number TEXT, +Device Number TEXT, +SKIP, +SKIP, +Time INT64, +Temperature FLOAT, +Emission DOUBLE, + +Data Example +CSV file content: + +### sample data + +CSV file content: +``` +Region,FactoryNumber,DeviceNumber,Model,MaintenanceCycle,Time,Temperature,Emission +hebei,1001, 1,10,1,1,80.0,1000.0 +hebei,1001,1,10,1,4,80.0,1000.0 +hebei,1002,7,5,2,1,90.0,1200.0 +``` +Schema definition + +``` +table_name=root.db1 +time_precision=ms +has_header=true +separator=, +null_format=\N + + +id_columns +Group DEFAULT Datang +Region +FactoryNumber +DeviceNumber + +time_column=Time + +csv_columns +RegionTEXT, +FactoryNumber TEXT, +DeviceNumber TEXT, +SKIP, +SKIP, +Time INT64, +Temperature FLOAT, +Emission DOUBLE, +``` +## Commands + +``` +csv2tsfile.sh --source ./xxx/xxx --target /xxx/xxx --fail_dir /xxx/xxx +csv2tsfile.bat --source ./xxx/xxx --target /xxx/xxx --fail_dir /xxx/xxx +``` diff --git a/java/tools/pom.xml b/java/tools/pom.xml new file mode 100644 index 000000000..d46e695c7 --- /dev/null +++ b/java/tools/pom.xml @@ -0,0 +1,92 @@ + + + + 4.0.0 + + org.apache.tsfile + tsfile-java + 1.2.0-SNAPSHOT + + tools + TsFile: Java: Tools + + + org.apache.tsfile + common + 1.2.0-SNAPSHOT + + + commons-cli + commons-cli + 1.5.0 + + + commons-io + commons-io + + + org.apache.tsfile + tsfile + 1.2.0-SNAPSHOT + + + org.slf4j + slf4j-api + + + junit + junit + test + + + + + + org.apache.maven.plugins + maven-assembly-plugin + + + + server-assembly + + single + + package + + + src/assembly/tools.xml + + false + + + org.apache.tsfile.tools.TsFileTool + true + true + + + + + + + + + diff --git a/java/tools/src/assembly/resources/conf/logback-cvs2tsfile.xml b/java/tools/src/assembly/resources/conf/logback-cvs2tsfile.xml new file mode 100644 index 000000000..d4d2ed8fb --- /dev/null +++ b/java/tools/src/assembly/resources/conf/logback-cvs2tsfile.xml @@ -0,0 +1,49 @@ + + + + + System.out + + %d [%t] %-5p %C{25}:%L - %m %n + utf-8 + + + ERROR + + + + UTF-8 + ${TSFILE_HOME}/logs/log_tools.log + + ${TSFILE_HOME}/logs/log-tools-%d{yyyyMMdd}.log.gz + 30 + + true + + %d [%t] %-5p %C{25}:%L - %m %n + utf-8 + + + INFO + + + + + + + diff --git a/java/tools/src/assembly/resources/tools/csv2tsfile.bat b/java/tools/src/assembly/resources/tools/csv2tsfile.bat new file mode 100644 index 000000000..d8bc1dff4 --- /dev/null +++ b/java/tools/src/assembly/resources/tools/csv2tsfile.bat @@ -0,0 +1,50 @@ +@REM +@REM Licensed to the Apache Software Foundation (ASF) under one +@REM or more contributor license agreements. See the NOTICE file +@REM distributed with this work for additional information +@REM regarding copyright ownership. The ASF licenses this file +@REM to you under the Apache License, Version 2.0 (the +@REM "License"); you may not use this file except in compliance +@REM with the License. You may obtain a copy of the License at +@REM +@REM http://www.apache.org/licenses/LICENSE-2.0 +@REM +@REM Unless required by applicable law or agreed to in writing, +@REM software distributed under the License is distributed on an +@REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +@REM KIND, either express or implied. See the License for the +@REM specific language governing permissions and limitations +@REM under the License. +@REM + +@echo off +setlocal enabledelayedexpansion + +if "%OS%" == "Windows_NT" setlocal + +pushd %~dp0.. +if NOT DEFINED TSFILE_HOME set TSFILE_HOME=%CD% +popd + +set JAVA_OPTS=-ea^ + -DTSFILE_HOME="%TSFILE_HOME%" + +if NOT DEFINED JAVA_HOME goto :err + +echo ------------------------------------------ +echo Starting Csv to TsFile Script +echo ------------------------------------------ + +set CLASSPATH="%TSFILE_HOME%\lib\*" +if NOT DEFINED MAIN_CLASS set MAIN_CLASS=org.apache.tsfile.tools.TsFileTool + +set TSFILE_CONF=%TSFILE_HOME%\conf +set "tsfile_params=-Dlogback.configurationFile=!IOTDB_CLI_CONF!\logback-cvs2tsfile.xml" +start /B "" cmd /C "("%JAVA_HOME%\bin\java" -DTSFILE_HOME=!TSFILE_HOME! !tsfile_params! !JAVA_OPTS! -cp !CLASSPATH! !MAIN_CLASS! %*) > nul 2>&1" +exit /b + + +:err +echo JAVA_HOME environment variable must be set! +set ret_code=1 +exit /b \ No newline at end of file diff --git a/java/tools/src/assembly/resources/tools/csv2tsfile.sh b/java/tools/src/assembly/resources/tools/csv2tsfile.sh new file mode 100644 index 000000000..972c9160d --- /dev/null +++ b/java/tools/src/assembly/resources/tools/csv2tsfile.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +echo ------------------------------------------ +echo Starting Csv to TsFile Script +echo ------------------------------------------ + +if [ -z "${TSFILE_HOME}" ]; then + export TSFILE_HOME="$(cd "`dirname "$0"`"/..; pwd)" +fi + +if [ -n "$JAVA_HOME" ]; then + for java in "$JAVA_HOME"/bin/amd64/java "$JAVA_HOME"/bin/java; do + if [ -x "$java" ]; then + JAVA="$java" + break + fi + done +else + JAVA=java +fi + +if [ -z $JAVA ] ; then + echo Unable to find java executable. Check JAVA_HOME and PATH environment variables. > /dev/stderr + exit 1; +fi + + +CLASSPATH=${TSFILE_HOME}/lib/* + +MAIN_CLASS=org.apache.tsfile.tools.TsFileTool + +TSFILE_CONF=${TSFILE_HOME}/conf +tsfile_params="-Dlogback.configurationFile=${TSFILE_CONF}/logback-cvs2tsfile.xml" + +exec "$JAVA" -DTSFILE_HOME=${TSFILE_HOME} $tsfile_params -cp "$CLASSPATH" "$MAIN_CLASS" "$@" \ No newline at end of file diff --git a/java/tools/src/assembly/tools.xml b/java/tools/src/assembly/tools.xml new file mode 100644 index 000000000..ccd960516 --- /dev/null +++ b/java/tools/src/assembly/tools.xml @@ -0,0 +1,54 @@ + + + + tools + + dir + zip + + false + + + lib + + + + + src/assembly/resources + ${file.separator} + + + + + ${maven.multiModuleProjectDirectory}/java/tools/src/assembly/resources/conf/logback-cvs2tsfile.xml + conf/logback-cvs2tsfile.xml + + + ${maven.multiModuleProjectDirectory}/java/tools/src/assembly/resources/tools/csv2tsfile.sh + tools/csv2tsfile.sh + + + ${maven.multiModuleProjectDirectory}/java/tools/src/assembly/resources/tools/csv2tsfile.bat + tools/csv2tsfile.bat + + + diff --git a/java/tools/src/main/java/org/apache/tsfile/tools/DateTimeUtils.java b/java/tools/src/main/java/org/apache/tsfile/tools/DateTimeUtils.java new file mode 100644 index 000000000..04452d3f5 --- /dev/null +++ b/java/tools/src/main/java/org/apache/tsfile/tools/DateTimeUtils.java @@ -0,0 +1,514 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.tsfile.tools; + +import java.time.DateTimeException; +import java.time.Instant; +import java.time.ZoneId; +import java.time.ZoneOffset; +import java.time.ZonedDateTime; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; +import java.time.format.DateTimeParseException; +import java.time.format.SignStyle; +import java.time.temporal.ChronoField; + +public class DateTimeUtils { + + private DateTimeUtils() { + // forbidding instantiation + } + + public static final DateTimeFormatter ISO_LOCAL_DATE_WIDTH_1_2; + + static { + ISO_LOCAL_DATE_WIDTH_1_2 = + new DateTimeFormatterBuilder() + .appendValue(ChronoField.YEAR, 4, 19, SignStyle.NEVER) + .appendLiteral('-') + .appendValue(ChronoField.MONTH_OF_YEAR, 1, 2, SignStyle.NEVER) + .appendLiteral('-') + .appendValue(ChronoField.DAY_OF_MONTH, 1, 2, SignStyle.NEVER) + .toFormatter(); + } + + /** such as '2011/12/03'. */ + public static final DateTimeFormatter ISO_LOCAL_DATE_WITH_SLASH; + + static { + ISO_LOCAL_DATE_WITH_SLASH = + new DateTimeFormatterBuilder() + .appendValue(ChronoField.YEAR, 4, 19, SignStyle.NEVER) + .appendLiteral('/') + .appendValue(ChronoField.MONTH_OF_YEAR, 1, 2, SignStyle.NEVER) + .appendLiteral('/') + .appendValue(ChronoField.DAY_OF_MONTH, 1, 2, SignStyle.NEVER) + .toFormatter(); + } + + /** such as '2011.12.03'. */ + public static final DateTimeFormatter ISO_LOCAL_DATE_WITH_DOT; + + static { + ISO_LOCAL_DATE_WITH_DOT = + new DateTimeFormatterBuilder() + .appendValue(ChronoField.YEAR, 4, 19, SignStyle.NEVER) + .appendLiteral('.') + .appendValue(ChronoField.MONTH_OF_YEAR, 1, 2, SignStyle.NEVER) + .appendLiteral('.') + .appendValue(ChronoField.DAY_OF_MONTH, 1, 2, SignStyle.NEVER) + .toFormatter(); + } + + /** such as '10:15:30' or '10:15:30.123'. */ + public static final DateTimeFormatter ISO_LOCAL_TIME_WITH_MS; + + static { + ISO_LOCAL_TIME_WITH_MS = + new DateTimeFormatterBuilder() + .appendValue(ChronoField.HOUR_OF_DAY, 2) + .appendLiteral(':') + .appendValue(ChronoField.MINUTE_OF_HOUR, 2) + .appendLiteral(':') + .appendValue(ChronoField.SECOND_OF_MINUTE, 2) + .optionalStart() + .appendFraction(ChronoField.MILLI_OF_SECOND, 0, 3, true) + .toFormatter(); + } + + /** such as '10:15:30' or '10:15:30.123456'. */ + public static final DateTimeFormatter ISO_LOCAL_TIME_WITH_US; + + static { + ISO_LOCAL_TIME_WITH_US = + new DateTimeFormatterBuilder() + .appendValue(ChronoField.HOUR_OF_DAY, 2) + .appendLiteral(':') + .appendValue(ChronoField.MINUTE_OF_HOUR, 2) + .appendLiteral(':') + .appendValue(ChronoField.SECOND_OF_MINUTE, 2) + .optionalStart() + .appendFraction(ChronoField.MICRO_OF_SECOND, 0, 6, true) + .toFormatter(); + } + + /** such as '10:15:30' or '10:15:30.123456789'. */ + public static final DateTimeFormatter ISO_LOCAL_TIME_WITH_NS; + + static { + ISO_LOCAL_TIME_WITH_NS = + new DateTimeFormatterBuilder() + .appendValue(ChronoField.HOUR_OF_DAY, 2) + .appendLiteral(':') + .appendValue(ChronoField.MINUTE_OF_HOUR, 2) + .appendLiteral(':') + .appendValue(ChronoField.SECOND_OF_MINUTE, 2) + .optionalStart() + .appendFraction(ChronoField.NANO_OF_SECOND, 0, 9, true) + .optionalEnd() + .toFormatter(); + } + + /** such as '2011-12-03T10:15:30+01:00' or '2011-12-03T10:15:30.123+01:00'. */ + public static final DateTimeFormatter ISO_OFFSET_DATE_TIME_WITH_MS; + + static { + ISO_OFFSET_DATE_TIME_WITH_MS = + new DateTimeFormatterBuilder() + .parseCaseInsensitive() + .append(ISO_LOCAL_DATE_WIDTH_1_2) + .appendLiteral('T') + .append(ISO_LOCAL_TIME_WITH_MS) + .appendOffsetId() + .toFormatter(); + } + + /** such as '2011-12-03T10:15:30+01:00' or '2011-12-03T10:15:30.123456+01:00'. */ + public static final DateTimeFormatter ISO_OFFSET_DATE_TIME_WITH_US; + + static { + ISO_OFFSET_DATE_TIME_WITH_US = + new DateTimeFormatterBuilder() + .parseCaseInsensitive() + .append(ISO_LOCAL_DATE_WIDTH_1_2) + .appendLiteral('T') + .append(ISO_LOCAL_TIME_WITH_US) + .appendOffsetId() + .toFormatter(); + } + + /** such as '2011-12-03T10:15:30+01:00' or '2011-12-03T10:15:30.123456789+01:00'. */ + public static final DateTimeFormatter ISO_OFFSET_DATE_TIME_WITH_NS; + + static { + ISO_OFFSET_DATE_TIME_WITH_NS = + new DateTimeFormatterBuilder() + .parseCaseInsensitive() + .append(ISO_LOCAL_DATE_WIDTH_1_2) + .appendLiteral('T') + .append(ISO_LOCAL_TIME_WITH_NS) + .appendOffsetId() + .toFormatter(); + } + + /** such as '2011/12/03T10:15:30+01:00' or '2011/12/03T10:15:30.123+01:00'. */ + public static final DateTimeFormatter ISO_OFFSET_DATE_TIME_WITH_SLASH; + + static { + ISO_OFFSET_DATE_TIME_WITH_SLASH = + new DateTimeFormatterBuilder() + .parseCaseInsensitive() + .append(ISO_LOCAL_DATE_WITH_SLASH) + .appendLiteral('T') + .append(ISO_LOCAL_TIME_WITH_MS) + .appendOffsetId() + .toFormatter(); + } + + /** such as '2011/12/03T10:15:30+01:00' or '2011/12/03T10:15:30.123456+01:00'. */ + public static final DateTimeFormatter ISO_OFFSET_DATE_TIME_WITH_SLASH_US; + + static { + ISO_OFFSET_DATE_TIME_WITH_SLASH_US = + new DateTimeFormatterBuilder() + .parseCaseInsensitive() + .append(ISO_LOCAL_DATE_WITH_SLASH) + .appendLiteral('T') + .append(ISO_LOCAL_TIME_WITH_US) + .appendOffsetId() + .toFormatter(); + } + + /** such as '2011/12/03T10:15:30+01:00' or '2011/12/03T10:15:30.123456789+01:00'. */ + public static final DateTimeFormatter ISO_OFFSET_DATE_TIME_WITH_SLASH_NS; + + static { + ISO_OFFSET_DATE_TIME_WITH_SLASH_NS = + new DateTimeFormatterBuilder() + .parseCaseInsensitive() + .append(ISO_LOCAL_DATE_WITH_SLASH) + .appendLiteral('T') + .append(ISO_LOCAL_TIME_WITH_NS) + .appendOffsetId() + .toFormatter(); + } + + /** such as '2011.12.03T10:15:30+01:00' or '2011.12.03T10:15:30.123+01:00'. */ + public static final DateTimeFormatter ISO_OFFSET_DATE_TIME_WITH_DOT; + + static { + ISO_OFFSET_DATE_TIME_WITH_DOT = + new DateTimeFormatterBuilder() + .parseCaseInsensitive() + .append(ISO_LOCAL_DATE_WITH_DOT) + .appendLiteral('T') + .append(ISO_LOCAL_TIME_WITH_MS) + .appendOffsetId() + .toFormatter(); + } + + /** such as '2011.12.03T10:15:30+01:00' or '2011.12.03T10:15:30.123456+01:00'. */ + public static final DateTimeFormatter ISO_OFFSET_DATE_TIME_WITH_DOT_US; + + static { + ISO_OFFSET_DATE_TIME_WITH_DOT_US = + new DateTimeFormatterBuilder() + .parseCaseInsensitive() + .append(ISO_LOCAL_DATE_WITH_DOT) + .appendLiteral('T') + .append(ISO_LOCAL_TIME_WITH_US) + .appendOffsetId() + .toFormatter(); + } + + /** such as '2011.12.03T10:15:30+01:00' or '2011.12.03T10:15:30.123456789+01:00'. */ + public static final DateTimeFormatter ISO_OFFSET_DATE_TIME_WITH_DOT_NS; + + static { + ISO_OFFSET_DATE_TIME_WITH_DOT_NS = + new DateTimeFormatterBuilder() + .parseCaseInsensitive() + .append(ISO_LOCAL_DATE_WITH_DOT) + .appendLiteral('T') + .append(ISO_LOCAL_TIME_WITH_NS) + .appendOffsetId() + .toFormatter(); + } + + /** such as '2011-12-03 10:15:30+01:00' or '2011-12-03 10:15:30.123+01:00'. */ + public static final DateTimeFormatter ISO_OFFSET_DATE_TIME_WITH_SPACE; + + static { + ISO_OFFSET_DATE_TIME_WITH_SPACE = + new DateTimeFormatterBuilder() + .parseCaseInsensitive() + .append(DateTimeFormatter.ISO_LOCAL_DATE) + .appendLiteral(' ') + .append(ISO_LOCAL_TIME_WITH_MS) + .appendOffsetId() + .toFormatter(); + } + + /** such as '2011-12-03 10:15:30+01:00' or '2011-12-03 10:15:30.123456+01:00'. */ + public static final DateTimeFormatter ISO_OFFSET_DATE_TIME_WITH_SPACE_US; + + static { + ISO_OFFSET_DATE_TIME_WITH_SPACE_US = + new DateTimeFormatterBuilder() + .parseCaseInsensitive() + .append(DateTimeFormatter.ISO_LOCAL_DATE) + .appendLiteral(' ') + .append(ISO_LOCAL_TIME_WITH_US) + .appendOffsetId() + .toFormatter(); + } + + /** such as '2011-12-03 10:15:30+01:00' or '2011-12-03 10:15:30.123456789+01:00'. */ + public static final DateTimeFormatter ISO_OFFSET_DATE_TIME_WITH_SPACE_NS; + + static { + ISO_OFFSET_DATE_TIME_WITH_SPACE_NS = + new DateTimeFormatterBuilder() + .parseCaseInsensitive() + .append(DateTimeFormatter.ISO_LOCAL_DATE) + .appendLiteral(' ') + .append(ISO_LOCAL_TIME_WITH_NS) + .appendOffsetId() + .toFormatter(); + } + + /** such as '2011/12/03 10:15:30+01:00' or '2011/12/03 10:15:30.123+01:00'. */ + public static final DateTimeFormatter ISO_OFFSET_DATE_TIME_WITH_SLASH_WITH_SPACE; + + static { + ISO_OFFSET_DATE_TIME_WITH_SLASH_WITH_SPACE = + new DateTimeFormatterBuilder() + .parseCaseInsensitive() + .append(ISO_LOCAL_DATE_WITH_SLASH) + .appendLiteral(' ') + .append(ISO_LOCAL_TIME_WITH_MS) + .appendOffsetId() + .toFormatter(); + } + + /** such as '2011/12/03 10:15:30+01:00' or '2011/12/03 10:15:30.123456+01:00'. */ + public static final DateTimeFormatter ISO_OFFSET_DATE_TIME_WITH_SLASH_WITH_SPACE_US; + + static { + ISO_OFFSET_DATE_TIME_WITH_SLASH_WITH_SPACE_US = + new DateTimeFormatterBuilder() + .parseCaseInsensitive() + .append(ISO_LOCAL_DATE_WITH_SLASH) + .appendLiteral(' ') + .append(ISO_LOCAL_TIME_WITH_US) + .appendOffsetId() + .toFormatter(); + } + + /** such as '2011/12/03 10:15:30+01:00' or '2011/12/03 10:15:30.123456789+01:00'. */ + public static final DateTimeFormatter ISO_OFFSET_DATE_TIME_WITH_SLASH_WITH_SPACE_NS; + + static { + ISO_OFFSET_DATE_TIME_WITH_SLASH_WITH_SPACE_NS = + new DateTimeFormatterBuilder() + .parseCaseInsensitive() + .append(ISO_LOCAL_DATE_WITH_SLASH) + .appendLiteral(' ') + .append(ISO_LOCAL_TIME_WITH_NS) + .appendOffsetId() + .toFormatter(); + } + + /** such as '2011.12.03 10:15:30+01:00' or '2011.12.03 10:15:30.123+01:00'. */ + public static final DateTimeFormatter ISO_OFFSET_DATE_TIME_WITH_DOT_WITH_SPACE; + + static { + ISO_OFFSET_DATE_TIME_WITH_DOT_WITH_SPACE = + new DateTimeFormatterBuilder() + .parseCaseInsensitive() + .append(ISO_LOCAL_DATE_WITH_DOT) + .appendLiteral(' ') + .append(ISO_LOCAL_TIME_WITH_MS) + .appendOffsetId() + .toFormatter(); + } + + /** such as '2011.12.03 10:15:30+01:00' or '2011.12.03 10:15:30.123456+01:00'. */ + public static final DateTimeFormatter ISO_OFFSET_DATE_TIME_WITH_DOT_WITH_SPACE_US; + + static { + ISO_OFFSET_DATE_TIME_WITH_DOT_WITH_SPACE_US = + new DateTimeFormatterBuilder() + .parseCaseInsensitive() + .append(ISO_LOCAL_DATE_WITH_DOT) + .appendLiteral(' ') + .append(ISO_LOCAL_TIME_WITH_US) + .appendOffsetId() + .toFormatter(); + } + + /** such as '2011.12.03 10:15:30+01:00' or '2011.12.03 10:15:30.123456789+01:00'. */ + public static final DateTimeFormatter ISO_OFFSET_DATE_TIME_WITH_DOT_WITH_SPACE_NS; + + static { + ISO_OFFSET_DATE_TIME_WITH_DOT_WITH_SPACE_NS = + new DateTimeFormatterBuilder() + .parseCaseInsensitive() + .append(ISO_LOCAL_DATE_WITH_DOT) + .appendLiteral(' ') + .append(ISO_LOCAL_TIME_WITH_NS) + .appendOffsetId() + .toFormatter(); + } + + public static final DateTimeFormatter formatter = + new DateTimeFormatterBuilder() + /** + * The ISO date-time formatter that formats or parses a date-time with an offset, such as + * '2011-12-03T10:15:30+01:00' or '2011-12-03T10:15:30.123+01:00'. + */ + .appendOptional(ISO_OFFSET_DATE_TIME_WITH_MS) + + /** such as '2011-12-03T10:15:30+01:00' or '2011-12-03T10:15:30.123456+01:00'. */ + .appendOptional(ISO_OFFSET_DATE_TIME_WITH_US) + + /** such as '2011-12-03T10:15:30+01:00' or '2011-12-03T10:15:30.123456789+01:00'. */ + .appendOptional(ISO_OFFSET_DATE_TIME_WITH_NS) + + /** such as '2011/12/03T10:15:30+01:00' or '2011/12/03T10:15:30.123+01:00'. */ + .appendOptional(ISO_OFFSET_DATE_TIME_WITH_SLASH) + + /** such as '2011/12/03T10:15:30+01:00' or '2011/12/03T10:15:30.123456+01:00'. */ + .appendOptional(ISO_OFFSET_DATE_TIME_WITH_SLASH_US) + + /** such as '2011/12/03T10:15:30+01:00' or '2011/12/03T10:15:30.123456789+01:00'. */ + .appendOptional(ISO_OFFSET_DATE_TIME_WITH_SLASH_NS) + + /** such as '2011.12.03T10:15:30+01:00' or '2011.12.03T10:15:30.123+01:00'. */ + .appendOptional(ISO_OFFSET_DATE_TIME_WITH_DOT) + + /** such as '2011.12.03T10:15:30+01:00' or '2011.12.03T10:15:30.123456+01:00'. */ + .appendOptional(ISO_OFFSET_DATE_TIME_WITH_DOT_US) + + /** such as '2011.12.03T10:15:30+01:00' or '2011.12.03T10:15:30.123456789+01:00'. */ + .appendOptional(ISO_OFFSET_DATE_TIME_WITH_DOT_NS) + + /** such as '2011-12-03 10:15:30+01:00' or '2011-12-03 10:15:30.123+01:00'. */ + .appendOptional(ISO_OFFSET_DATE_TIME_WITH_SPACE) + + /** such as '2011-12-03 10:15:30+01:00' or '2011-12-03 10:15:30.123456+01:00'. */ + .appendOptional(ISO_OFFSET_DATE_TIME_WITH_SPACE_US) + + /** such as '2011-12-03 10:15:30+01:00' or '2011-12-03 10:15:30.123456789+01:00'. */ + .appendOptional(ISO_OFFSET_DATE_TIME_WITH_SPACE_NS) + + /** such as '2011/12/03 10:15:30+01:00' or '2011/12/03 10:15:30.123+01:00'. */ + .appendOptional(ISO_OFFSET_DATE_TIME_WITH_SLASH_WITH_SPACE) + + /** such as '2011/12/03 10:15:30+01:00' or '2011/12/03 10:15:30.123456+01:00'. */ + .appendOptional(ISO_OFFSET_DATE_TIME_WITH_SLASH_WITH_SPACE_US) + + /** such as '2011/12/03 10:15:30+01:00' or '2011/12/03 10:15:30.123456789+01:00'. */ + .appendOptional(ISO_OFFSET_DATE_TIME_WITH_SLASH_WITH_SPACE_NS) + + /** such as '2011.12.03 10:15:30+01:00' or '2011.12.03 10:15:30.123+01:00'. */ + .appendOptional(ISO_OFFSET_DATE_TIME_WITH_DOT_WITH_SPACE) + + /** such as '2011.12.03 10:15:30+01:00' or '2011.12.03 10:15:30.123456+01:00'. */ + .appendOptional(ISO_OFFSET_DATE_TIME_WITH_DOT_WITH_SPACE_US) + + /** such as '2011.12.03 10:15:30+01:00' or '2011.12.03 10:15:30.123456789+01:00'. */ + .appendOptional(ISO_OFFSET_DATE_TIME_WITH_DOT_WITH_SPACE_NS) + .toFormatter(); + + public static long convertTimestampOrDatetimeStrToLongWithDefaultZone( + String timeStr, String timestampPrecision) { + try { + return Long.parseLong(timeStr); + } catch (NumberFormatException e) { + return DateTimeUtils.convertDatetimeStrToLong( + timeStr, ZoneId.systemDefault(), timestampPrecision); + } + } + + public static long convertDatetimeStrToLong(String str, ZoneId zoneId) { + return convertDatetimeStrToLong(str, toZoneOffset(zoneId), 0, "ms"); + } + + public static long convertDatetimeStrToLong( + String str, ZoneId zoneId, String timestampPrecision) { + return convertDatetimeStrToLong(str, toZoneOffset(zoneId), 0, timestampPrecision); + } + + public static long getInstantWithPrecision(String str, String timestampPrecision) { + try { + ZonedDateTime zonedDateTime = ZonedDateTime.parse(str, formatter); + Instant instant = zonedDateTime.toInstant(); + if ("us".equals(timestampPrecision)) { + if (instant.getEpochSecond() < 0 && instant.getNano() > 0) { + // adjustment can reduce the loss of the division + long millis = Math.multiplyExact(instant.getEpochSecond() + 1, 1000_000L); + long adjustment = instant.getNano() / 1000 - 1L; + return Math.addExact(millis, adjustment); + } else { + long millis = Math.multiplyExact(instant.getEpochSecond(), 1000_000L); + return Math.addExact(millis, instant.getNano() / 1000); + } + } else if ("ns".equals(timestampPrecision)) { + long millis = Math.multiplyExact(instant.getEpochSecond(), 1000_000_000L); + return Math.addExact(millis, instant.getNano()); + } + return instant.toEpochMilli(); + } catch (DateTimeParseException e) { + throw new RuntimeException(e.getMessage()); + } + } + + /** convert date time string to millisecond, microsecond or nanosecond. */ + public static long convertDatetimeStrToLong( + String str, ZoneOffset offset, int depth, String timestampPrecision) { + if (depth >= 2) { + throw new DateTimeException( + String.format( + "Failed to convert %s to millisecond, zone offset is %s, " + + "please input like 2011-12-03T10:15:30 or 2011-12-03T10:15:30+01:00", + str, offset)); + } + if (str.contains("Z")) { + return convertDatetimeStrToLong( + str.substring(0, str.indexOf('Z')) + "+00:00", offset, depth, timestampPrecision); + } else if (str.length() == 10) { + return convertDatetimeStrToLong(str + "T00:00:00", offset, depth, timestampPrecision); + } else if (str.length() - str.lastIndexOf('+') != 6 + && str.length() - str.lastIndexOf('-') != 6) { + return convertDatetimeStrToLong(str + offset, offset, depth + 1, timestampPrecision); + } else if (str.contains("[") || str.contains("]")) { + throw new DateTimeException( + String.format( + "%s with [time-region] at end is not supported now, " + + "please input like 2011-12-03T10:15:30 or 2011-12-03T10:15:30+01:00", + str)); + } + return getInstantWithPrecision(str, timestampPrecision); + } + + public static ZoneOffset toZoneOffset(ZoneId zoneId) { + return zoneId.getRules().getOffset(Instant.now()); + } +} diff --git a/java/tools/src/main/java/org/apache/tsfile/tools/SchemaParser.java b/java/tools/src/main/java/org/apache/tsfile/tools/SchemaParser.java new file mode 100644 index 000000000..0bb9816e5 --- /dev/null +++ b/java/tools/src/main/java/org/apache/tsfile/tools/SchemaParser.java @@ -0,0 +1,264 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.tsfile.tools; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class SchemaParser { + + public static class Schema { + String tableName; + String timePrecision; + boolean hasHeader = true; + String separator; + String nullFormat; + String timeColumn; + int timeColumnIndex = -1; + List idColumns = new ArrayList<>(); + List csvColumns = new ArrayList<>(); + + @Override + public String toString() { + return "Schema{" + + "tableName='" + + tableName + + '\'' + + ", timePrecision='" + + timePrecision + + '\'' + + ", hasHeader=" + + hasHeader + + ", separator='" + + separator + + '\'' + + ", nullFormat='" + + nullFormat + + '\'' + + ", timeColumn='" + + timeColumn + + '\'' + + ", idColumns=" + + idColumns + + ", csvColumns=" + + csvColumns + + '}'; + } + } + + public static class Column { + String name; + String type; + + boolean isSkip; + + public Column(String name, String type) { + this.name = name; + this.isSkip = false; + this.type = type; + } + + public Column(String name) { + this.name = name; + this.isSkip = true; + } + + @Override + public String toString() { + return "Column{" + + "name='" + + name + + '\'' + + ", type='" + + type + + '\'' + + ", isSkip=" + + isSkip + + '}'; + } + } + + public static class IDColumns { + String name; + boolean isDefault; + String defaultValue; + int csvColumnIndex = -1; + boolean isExistCsvColumn; + + public IDColumns(String name, boolean isDefault, String defaultValue) { + this.name = name; + this.isDefault = isDefault; + if (isDefault) { + this.defaultValue = defaultValue; + this.isExistCsvColumn = false; + } + } + + public IDColumns(String name) { + this.name = name; + this.isDefault = false; + this.isExistCsvColumn = true; + } + + @Override + public String toString() { + return "IDColumns{" + + "name='" + + name + + '\'' + + ", isDefault=" + + isDefault + + ", defaultValue='" + + defaultValue + + '\'' + + ", isExistCsvColumn=" + + isExistCsvColumn + + ", csvColumnIndex=" + + csvColumnIndex + + '}'; + } + } + + public static Schema parseSchema(String filePath) throws IOException { + Schema schema = new Schema(); + try (BufferedReader reader = new BufferedReader(new FileReader(filePath))) { + String line; + boolean readingIdColumns = false; + boolean readingCsvColumns = false; + int timeIndex = 0; + while ((line = reader.readLine()) != null) { + line = line.trim(); + if (line.isEmpty() || line.startsWith("//")) { + continue; + } + if (line.startsWith("table_name=")) { + schema.tableName = extractValue(line); + } else if (line.startsWith("time_precision=")) { + schema.timePrecision = extractValue(line); + } else if (line.startsWith("has_header=")) { + schema.hasHeader = Boolean.parseBoolean(extractValue(line)); + } else if (line.startsWith("separator=")) { + schema.separator = extractValue(line); + } else if (line.startsWith("null_format=")) { + schema.nullFormat = extractValue(line); + } else if (line.startsWith("time_column=")) { + schema.timeColumn = extractValue(line); + } else if (line.equals("id_columns")) { + readingIdColumns = true; + readingCsvColumns = false; + } else if (line.equals("csv_columns")) { + readingIdColumns = false; + readingCsvColumns = true; + } else if (readingIdColumns) { + parseIdColumns(line, schema); + } else if (readingCsvColumns) { + parseCsvColumns(line, schema, timeIndex); + timeIndex++; + } + } + addIdColumnsIndex(schema); + } + validateParams(schema); + if (schema.separator.equals("tab")) { + schema.separator = "\t"; + } + return schema; + } + + private static String extractValue(String line) { + int index = line.indexOf('='); + return line.substring(index + 1); + } + + private static void parseIdColumns(String line, Schema schema) { + String[] parts = line.split(" "); + if (parts.length == 3) { + schema.idColumns.add( + new IDColumns( + parts[0].trim(), parts[1].trim().equalsIgnoreCase("DEFAULT"), parts[2].trim())); + } else if (parts.length == 1) { + schema.idColumns.add(new IDColumns(parts[0].trim())); + } else { + throw new IllegalArgumentException("The data format of id_columns is incorrect"); + } + } + + private static void addIdColumnsIndex(Schema schema) { + List idColumnsList = schema.idColumns; + List columnList = schema.csvColumns; + for (IDColumns idColumn : idColumnsList) { + if (!idColumn.isDefault) { + for (int j = 0; j < columnList.size(); j++) { + if (columnList.get(j).name.equals(idColumn.name)) { + idColumn.csvColumnIndex = j; + break; + } + } + } + } + } + + private static void parseCsvColumns(String line, Schema schema, int timeIndex) { + String[] parts = line.split(" "); + String columnName = parts[0].trim(); + + if (parts.length == 2) { + String dataType = parts[1].trim(); + if (dataType.endsWith(",") || dataType.endsWith(";")) { + dataType = dataType.substring(0, dataType.length() - 1); + } + if (columnName.equals(schema.timeColumn)) { + schema.timeColumnIndex = timeIndex; + } + schema.csvColumns.add(new Column(columnName, dataType)); + } else if (parts.length == 1) { + if (columnName.endsWith(",") || columnName.endsWith(";")) { + columnName = columnName.substring(0, columnName.length() - 1); + } + schema.csvColumns.add(new Column(columnName)); + } else { + throw new IllegalArgumentException("The data format of csv_columns is incorrect"); + } + } + + private static void validateParams(SchemaParser.Schema schema) { + if (!schema.timePrecision.equals("us") + && !schema.timePrecision.equals("ms") + && !schema.timePrecision.equals("ns")) { + throw new IllegalArgumentException("timePrecision must be us,ms or ns"); + } + if (!schema.separator.equals(",") + && !schema.separator.equals("tab") + && !schema.separator.equals(";")) { + throw new IllegalArgumentException("separator must be \",\", tab, or \";\""); + } + if (schema.timeColumnIndex < 0) { + throw new IllegalArgumentException("time_column is required"); + } + if (schema.tableName.isEmpty()) { + throw new IllegalArgumentException("table_name is required"); + } + if (schema.csvColumns.isEmpty()) { + throw new IllegalArgumentException("csv_columns is required"); + } + } +} diff --git a/java/tools/src/main/java/org/apache/tsfile/tools/TsFileTool.java b/java/tools/src/main/java/org/apache/tsfile/tools/TsFileTool.java new file mode 100644 index 000000000..0d5c1cfe9 --- /dev/null +++ b/java/tools/src/main/java/org/apache/tsfile/tools/TsFileTool.java @@ -0,0 +1,498 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.tsfile.tools; + +import org.apache.tsfile.enums.TSDataType; +import org.apache.tsfile.file.metadata.TableSchema; +import org.apache.tsfile.file.metadata.enums.CompressionType; +import org.apache.tsfile.file.metadata.enums.TSEncoding; +import org.apache.tsfile.utils.Binary; +import org.apache.tsfile.write.TsFileWriter; +import org.apache.tsfile.write.record.Tablet; +import org.apache.tsfile.write.schema.IMeasurementSchema; +import org.apache.tsfile.write.schema.MeasurementSchema; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.commons.io.FilenameUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardCopyOption; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +public class TsFileTool { + private static int THREAD_COUNT = 8; + // Default value 256MB + private static long CHUNK_SIZE_BYTE = 1024 * 1024 * 256; + private static String outputDirectoryStr = ""; + private static String inputDirectoryStr = ""; + private static String failedDirectoryStr = "failed"; + private static String schemaPathStr = ""; + + private static SchemaParser.Schema schema = null; + + private static final Logger LOGGER = LoggerFactory.getLogger(TsFileTool.class); + + public static void main(String[] args) { + if (System.getenv("TSFILE_HOME") != null) { + System.setProperty("TSFILE_HOME", System.getenv("TSFILE_HOME")); + } + parseCommandLineParams(args); + if (!validateParams()) { + return; + } + createDir(); + try { + schema = SchemaParser.parseSchema(schemaPathStr); + } catch (Exception e) { + LOGGER.error("Failed to parse schema file: " + schemaPathStr, e); + System.exit(1); + } + File inputDirectory = new File(inputDirectoryStr); + + ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT); + + try { + processDirectory(inputDirectory, executor); + } finally { + executor.shutdown(); + try { + executor.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS); + } catch (InterruptedException e) { + LOGGER.error("Failed to await termination", e); + } + } + } + + private static TableSchema genTableSchema( + List idColumnList, + List columnList, + String tableName, + Map defaultMap) { + List measurementSchemas = new ArrayList<>(); + List columnTypes = new ArrayList<>(); + List idSchemaList = new ArrayList<>(); + for (SchemaParser.IDColumns idSchema : idColumnList) { + if (idSchema.isDefault) { + defaultMap.put(idSchema.name, idSchema.defaultValue); + } + idSchemaList.add(idSchema.name); + measurementSchemas.add( + new MeasurementSchema( + idSchema.name, TSDataType.TEXT, TSEncoding.PLAIN, CompressionType.UNCOMPRESSED)); + columnTypes.add(Tablet.ColumnType.ID); + } + List newColumnList = new ArrayList<>(); + + for (SchemaParser.Column column : columnList) { + if (!column.isSkip + && !idSchemaList.contains(column.name) + && !column.name.equals(schema.timeColumn)) { + newColumnList.add(column); + } + } + + for (SchemaParser.Column column : newColumnList) { + measurementSchemas.add( + new MeasurementSchema( + column.name, + TSDataType.valueOf(column.type), + TSEncoding.PLAIN, + CompressionType.UNCOMPRESSED)); + columnTypes.add(Tablet.ColumnType.MEASUREMENT); + } + return new TableSchema(tableName, measurementSchemas, columnTypes); + } + + private static boolean writeTsFile(String fileName, List lineList) { + final File tsFile = new File(outputDirectoryStr, fileName); + TsFileWriter writer = null; + try { + writer = new TsFileWriter(tsFile); + writer.setGenerateTableSchema(true); + Map defaultMap = new HashMap<>(); + TableSchema tableSchema = + genTableSchema(schema.idColumns, schema.csvColumns, schema.tableName, defaultMap); + writer.registerTableSchema(tableSchema); + Tablet tablet = genTablet(tableSchema, lineList, defaultMap); + if (tablet != null) { + writer.writeTable(tablet); + return true; + } else { + return false; + } + } catch (Exception e) { + e.printStackTrace(); + LOGGER.error("Failed to write file: " + tsFile); + return false; + } finally { + if (writer != null) { + try { + writer.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + } + } + + private static Tablet genTablet( + TableSchema tableSchema, List lineList, Map defaultMap) { + int num = lineList.size(); + Tablet tablet = + new Tablet( + tableSchema.getTableName(), + tableSchema.getColumnSchemas(), + tableSchema.getColumnTypes(), + num); + + Map map = new HashMap<>(); + for (int i = 0; i < schema.csvColumns.size(); i++) { + SchemaParser.Column column = schema.csvColumns.get(i); + map.put(column.name, i); + } + try { + for (int i = 0; i < num; i++) { + String line = lineList.get(i); + String[] lineArray = line.split(schema.separator); + long timestamp = + DateTimeUtils.convertTimestampOrDatetimeStrToLongWithDefaultZone( + lineArray[schema.timeColumnIndex], schema.timePrecision); + + tablet.addTimestamp(i, timestamp); + List columnSchemas = tableSchema.getColumnSchemas(); + for (int j = 0; j < columnSchemas.size(); j++) { + IMeasurementSchema columnSchema = columnSchemas.get(j); + if (defaultMap.get(columnSchema.getMeasurementId()) != null) { + tablet.addValue( + columnSchema.getMeasurementId(), + i, + defaultMap.get(columnSchema.getMeasurementId())); + } else { + String value = lineArray[map.get(columnSchema.getMeasurementId())]; + if (value.equals(schema.nullFormat)) { + value = null; + } + tablet.addValue( + columnSchema.getMeasurementId(), + i, + getValue(columnSchema.getType(), value, tableSchema.getColumnTypes().get(j))); + } + } + } + tablet.rowSize = num; + return tablet; + } catch (Exception e) { + LOGGER.error("Failed to parse csv file"); + } + return null; + } + + public static Object getValue(TSDataType dataType, String i, Tablet.ColumnType columnType) { + switch (dataType) { + case INT64: + return Long.valueOf(i); + case INT32: + return Integer.valueOf(i); + case BOOLEAN: + return Boolean.valueOf(i); + case TEXT: + if (columnType.equals(Tablet.ColumnType.MEASUREMENT)) { + return new Binary(String.valueOf(i), StandardCharsets.UTF_8); + } else { + return String.valueOf(i); + } + case FLOAT: + return Float.valueOf(i); + case DOUBLE: + return Double.valueOf(i); + default: + return i; + } + } + + private static void processDirectory(File directory, ExecutorService executor) { + if (directory.isFile()) { + processFile(directory, executor); + } else { + File[] files = directory.listFiles(); + if (files != null) { + for (File file : files) { + if (file.isDirectory()) { + processDirectory(file, executor); + } else if (file.isFile() && file.getName().endsWith(".csv")) { + processFile(file, executor); + } + } + } + } + } + + private static void cpFile(String sourceFilePath, String targetDirectoryPath) { + try { + Files.createDirectories(Paths.get(targetDirectoryPath)); + Path sourcePath = Paths.get(sourceFilePath); + Path targetPath = Paths.get(targetDirectoryPath, sourcePath.getFileName().toString()); + Files.copy(sourcePath, targetPath, StandardCopyOption.REPLACE_EXISTING); + } catch (IOException e) { + LOGGER.error("Failed to copy file: " + sourceFilePath, e); + } + } + + private static void processFile(File inputFile, ExecutorService executor) { + AtomicInteger fileCounter = new AtomicInteger(1); + String fileName = FilenameUtils.getBaseName(inputFile.getName()); + String fileAbsolutePath = inputFile.getAbsolutePath(); + try (BufferedReader reader = + new BufferedReader( + new InputStreamReader( + Files.newInputStream(inputFile.toPath()), StandardCharsets.UTF_8))) { + String line; + long currentChunkSize = 0; + int chunkLines = 0; + int index = 0; + List lineList = new ArrayList<>(); + boolean isSingleFile = true; + while ((line = reader.readLine()) != null) { + if (index == 0) { + if (schema.timeColumnIndex == -1) { + LOGGER.error(inputFile.getAbsolutePath() + " not found:" + schema.timeColumn); + cpFile(inputFile.getAbsolutePath(), failedDirectoryStr); + break; + } + String[] csvCloumns = line.split(schema.separator); + if (csvCloumns.length != schema.csvColumns.size()) { + LOGGER.error( + "The number of columns defined in the schema file is not equal to the number of columns in the csv file(" + + inputFile.getAbsolutePath() + + ")."); + cpFile(inputFile.getAbsolutePath(), failedDirectoryStr); + break; + } + } + + if (schema.hasHeader && index == 0) { + index++; + continue; + } + index++; + byte[] lineBytes = line.getBytes(StandardCharsets.UTF_8); + long lineSize = lineBytes.length; + if (currentChunkSize + lineSize > CHUNK_SIZE_BYTE) { + isSingleFile = false; + if (chunkLines > 0) { + submitChunk( + lineList, + fileCounter.getAndIncrement(), + executor, + fileName, + isSingleFile, + fileAbsolutePath); + lineList = new ArrayList<>(); + currentChunkSize = 0; + chunkLines = 0; + } else { + lineList.add(line); + submitChunk( + lineList, + fileCounter.getAndIncrement(), + executor, + fileName, + isSingleFile, + fileAbsolutePath); + lineList = new ArrayList<>(); + currentChunkSize = 0; + chunkLines = 0; + } + } + lineList.add(line); + currentChunkSize += lineSize; + chunkLines++; + } + if (lineList.size() > 0) { + submitChunk( + lineList, + fileCounter.getAndIncrement(), + executor, + fileName, + isSingleFile, + fileAbsolutePath); + } + + } catch (IOException e) { + e.printStackTrace(); + } + } + + private static void submitChunk( + List lineList, + int fileNumber, + ExecutorService executor, + String fileName, + boolean isSingleFile, + String fileAbsolutePath) { + executor.submit( + () -> { + boolean isSuccess; + if (isSingleFile) { + isSuccess = writeTsFile(fileName + ".tsfile", lineList); + } else { + isSuccess = writeTsFile(fileName + "_" + fileNumber + ".tsfile", lineList); + } + if (!isSuccess) { + cpFile(fileAbsolutePath, failedDirectoryStr); + } + }); + } + + private static void printHelp(Options options) { + HelpFormatter formatter = new HelpFormatter(); + formatter.printHelp("csv2tsfile.sh/csv2tsfile.bat", options); + } + + private static void parseCommandLineParams(String[] args) { + Options options = new Options(); + options.addOption("s", "source", true, "Input directory"); + options.addOption("t", "target", true, "Output directory"); + options.addOption("fd", "fail_dir", true, "Failed file directory"); + options.addOption("b", "block_size", true, "Block size default value 256M"); + options.addOption("tn", "thread_num", true, "Thread number"); + options.addOption("schema", "schema", true, "Schema file path"); + options.addOption("h", "help", false, "Show help"); + + try { + CommandLineParser parser = new DefaultParser(); + CommandLine cmd = parser.parse(options, args); + + if (cmd.hasOption("h")) { + printHelp(options); + return; + } + + if (cmd.hasOption("s")) { + inputDirectoryStr = cmd.getOptionValue("s"); + } + if (cmd.hasOption("t")) { + outputDirectoryStr = cmd.getOptionValue("t"); + } + if (cmd.hasOption("fd")) { + failedDirectoryStr = cmd.getOptionValue("fd"); + } + if (cmd.hasOption("b")) { + CHUNK_SIZE_BYTE = parseBlockSize(cmd.getOptionValue("b")); + } + if (cmd.hasOption("tn")) { + THREAD_COUNT = Integer.parseInt(cmd.getOptionValue("tn")); + } + if (cmd.hasOption("schema")) { + schemaPathStr = cmd.getOptionValue("schema"); + } + + if (failedDirectoryStr == null || failedDirectoryStr.equals("")) { + failedDirectoryStr = "failed"; + } + } catch (ParseException e) { + e.printStackTrace(); + } + } + + private static long parseBlockSize(String blockSizeValue) { + long size; + blockSizeValue = blockSizeValue.toUpperCase(); + + if (blockSizeValue.endsWith("K")) { + size = Long.parseLong(blockSizeValue.substring(0, blockSizeValue.length() - 1)) * 1024; + } else if (blockSizeValue.endsWith("M")) { + size = Long.parseLong(blockSizeValue.substring(0, blockSizeValue.length() - 1)) * 1024 * 1024; + } else if (blockSizeValue.endsWith("G")) { + size = + Long.parseLong(blockSizeValue.substring(0, blockSizeValue.length() - 1)) + * 1024 + * 1024 + * 1024; + } else if (blockSizeValue.endsWith("T") || blockSizeValue.endsWith("B")) { + throw new IllegalArgumentException("block_size only supports units of K, M, G, or numbers"); + } else { + size = Long.parseLong(blockSizeValue); + } + + return size; + } + + private static void createDir() { + File targetDir = new File(outputDirectoryStr); + if (!targetDir.exists()) { + targetDir.mkdirs(); + } + if (failedDirectoryStr != null) { + File failDirFile = new File(failedDirectoryStr); + if (!failDirFile.exists()) { + failDirFile.mkdirs(); + } + } + } + + private static boolean validateParams() { + if (inputDirectoryStr == null + || inputDirectoryStr.isEmpty() + || outputDirectoryStr == null + || outputDirectoryStr.isEmpty() + || schemaPathStr == null + || schemaPathStr.isEmpty()) { + LOGGER.error("Missing required parameters. Please provide --source, --target, and --schema."); + return false; + } + File sourceDir = new File(inputDirectoryStr); + if (!sourceDir.exists()) { + LOGGER.error("Source directory(" + sourceDir + ") does not exist."); + return false; + } + File schemaFile = new File(schemaPathStr); + if (!schemaFile.exists()) { + LOGGER.error("Schema file(" + schemaPathStr + ") does not exist."); + return false; + } + if (THREAD_COUNT <= 0) { + LOGGER.error("Invalid thread number. Thread number must be greater than 0."); + return false; + } + + return true; + } +} diff --git a/java/tools/src/test/java/org/apache/tsfile/tools/TsfiletoolsTest.java b/java/tools/src/test/java/org/apache/tsfile/tools/TsfiletoolsTest.java new file mode 100644 index 000000000..455ab1c5a --- /dev/null +++ b/java/tools/src/test/java/org/apache/tsfile/tools/TsfiletoolsTest.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.tools; + +import org.apache.tsfile.read.TsFileSequenceReader; +import org.apache.tsfile.read.common.block.TsBlock; +import org.apache.tsfile.read.controller.CachedChunkLoaderImpl; +import org.apache.tsfile.read.controller.MetadataQuerierByFileImpl; +import org.apache.tsfile.read.query.executor.TableQueryExecutor; +import org.apache.tsfile.read.reader.block.TsBlockReader; + +import org.apache.commons.io.FileUtils; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class TsfiletoolsTest { + private final String testDir = "target" + File.separator + "csvTest"; + private final String csvFile = testDir + File.separator + "data.csv"; + private final String schemaFile = testDir + File.separator + "schemaFile.txt"; + + float[] tmpResult2 = new float[20]; + float[] tmpResult3 = new float[20]; + float[] tmpResult5 = new float[20]; + + @Before + public void setUp() { + new File(testDir).mkdirs(); + genCsvFile(20); + genSchemaFile(); + } + + public void genSchemaFile() { + try (BufferedWriter writer = new BufferedWriter(new FileWriter(schemaFile))) { + writer.write("table_name=root.db1"); + writer.newLine(); + writer.write("time_precision=ms"); + writer.newLine(); + writer.write("has_header=true"); + writer.newLine(); + writer.write("separator=,"); + writer.newLine(); + writer.write("null_format=\\N"); + writer.newLine(); + writer.newLine(); + writer.write("time_column=time"); + writer.newLine(); + writer.write("csv_columns"); + writer.newLine(); + writer.write("time INT64,"); + writer.newLine(); + writer.write("tmp2 FLOAT,"); + writer.newLine(); + writer.write("tmp3 FLOAT,"); + writer.newLine(); + writer.write("SKIP,"); + writer.newLine(); + writer.write("tmp5 FLOAT"); + } catch (IOException e) { + e.printStackTrace(); + } + } + + public void genCsvFile(int rows) { + + try (BufferedWriter writer = new BufferedWriter(new FileWriter(csvFile))) { + writer.write("time,tmp2,tmp3,tmp4,tmp5"); + writer.newLine(); + Random random = new Random(); + long timestamp = System.currentTimeMillis(); + + for (int i = 0; i < rows; i++) { + timestamp = timestamp + i; + float tmp2 = random.nextFloat(); + float tmp3 = random.nextFloat(); + float tmp4 = random.nextFloat(); + float tmp5 = random.nextFloat(); + tmpResult2[i] = tmp2; + tmpResult3[i] = tmp3; + tmpResult5[i] = tmp5; + writer.write(timestamp + "," + tmp2 + "," + tmp3 + "," + tmp4 + "," + tmp5); + writer.newLine(); + } + } catch (IOException e) { + e.printStackTrace(); + } + } + + @After + public void tearDown() throws Exception { + FileUtils.deleteDirectory(new File(testDir)); + } + + @Test + public void testCsvToTsfile() throws Exception { + String scFilePath = new File(schemaFile).getAbsolutePath(); + String csvFilePath = new File(csvFile).getAbsolutePath(); + String targetPath = new File(testDir).getAbsolutePath(); + String dataTsfilePath = new File(targetPath + File.separator + "data.tsfile").getAbsolutePath(); + String[] args = new String[] {"-s" + csvFilePath, "-schema" + scFilePath, "-t" + targetPath}; + TsFileTool.main(args); + List columns = new ArrayList<>(); + columns.add("tmp2"); + columns.add("tmp3"); + columns.add("tmp5"); + try (TsFileSequenceReader sequenceReader = new TsFileSequenceReader(dataTsfilePath)) { + TableQueryExecutor tableQueryExecutor = + new TableQueryExecutor( + new MetadataQuerierByFileImpl(sequenceReader), + new CachedChunkLoaderImpl(sequenceReader), + TableQueryExecutor.TableQueryOrdering.DEVICE); + final TsBlockReader reader = tableQueryExecutor.query("root.db1", columns, null, null, null); + assertTrue(reader.hasNext()); + int cnt = 0; + while (reader.hasNext()) { + final TsBlock result = reader.next(); + float[] floats_tmp2 = result.getColumn(0).getFloats(); + float[] floats_tmp3 = result.getColumn(1).getFloats(); + float[] floats_tmp5 = result.getColumn(2).getFloats(); + for (int i = 0; i < 20; i++) { + assertEquals(tmpResult2[i], floats_tmp2[i], 0); + assertEquals(tmpResult3[i], floats_tmp3[i], 0); + assertEquals(tmpResult5[i], floats_tmp5[i], 0); + } + cnt += result.getPositionCount(); + } + assertEquals(20, cnt); + } + } +} diff --git a/java/tsfile/pom.xml b/java/tsfile/pom.xml index b07965c8b..8ab97881a 100644 --- a/java/tsfile/pom.xml +++ b/java/tsfile/pom.xml @@ -24,7 +24,7 @@ org.apache.tsfile tsfile-java - 1.0.1-SNAPSHOT + 1.2.0-SNAPSHOT tsfile TsFile: Java: TsFile @@ -38,7 +38,7 @@ org.apache.tsfile common - 1.0.1-SNAPSHOT + 1.2.0-SNAPSHOT com.github.luben @@ -213,6 +213,14 @@ + + org.apache.maven.plugins + maven-compiler-plugin + + 8 + 8 + + diff --git a/java/tsfile/src/main/java/org/apache/tsfile/common/cache/LRUCache.java b/java/tsfile/src/main/java/org/apache/tsfile/common/cache/LRUCache.java index 9b79825be..a00469a52 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/common/cache/LRUCache.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/common/cache/LRUCache.java @@ -51,6 +51,10 @@ public synchronized T get(K key) throws IOException { } } + public boolean containsKey(K key) { + return cache.containsKey(key); + } + @Override public synchronized void clear() { cache.clear(); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/common/conf/TSFileConfig.java b/java/tsfile/src/main/java/org/apache/tsfile/common/conf/TSFileConfig.java index 5c0b719a8..1cdd59e37 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/common/conf/TSFileConfig.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/common/conf/TSFileConfig.java @@ -65,7 +65,9 @@ public class TSFileConfig implements Serializable { public static final String VERSION_NUMBER_V1 = "000001"; /** version number is changed to use 1 byte to represent since version 3. */ - public static final byte VERSION_NUMBER = 0x03; + public static final byte VERSION_NUMBER_V3 = 0x03; + + public static final byte VERSION_NUMBER = 0x04; /** Bloom filter constrain. */ public static final double MIN_BLOOM_FILTER_ERROR_RATE = 0.01; @@ -75,6 +77,9 @@ public class TSFileConfig implements Serializable { /** The primitive array capacity threshold. */ public static final int ARRAY_CAPACITY_THRESHOLD = 1000; + // TODO: configurable but unchangeable + public static int DEFAULT_SEGMENT_NUM_FOR_TABLE_NAME = 3; + /** Memory size threshold for flushing to disk, default value is 128MB. */ private int groupSizeInByte = 128 * 1024 * 1024; diff --git a/java/tsfile/src/main/java/org/apache/tsfile/compatibility/BufferDeserializer.java b/java/tsfile/src/main/java/org/apache/tsfile/compatibility/BufferDeserializer.java new file mode 100644 index 000000000..2bda70630 --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/compatibility/BufferDeserializer.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.compatibility; + +import java.nio.ByteBuffer; + +public interface BufferDeserializer { + T deserialize(ByteBuffer buffer, DeserializeConfig context); +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/compatibility/CompatibilityUtils.java b/java/tsfile/src/main/java/org/apache/tsfile/compatibility/CompatibilityUtils.java new file mode 100644 index 000000000..de01351f5 --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/compatibility/CompatibilityUtils.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.compatibility; + +import org.apache.tsfile.file.metadata.MetadataIndexNode; +import org.apache.tsfile.file.metadata.PlainDeviceID; +import org.apache.tsfile.file.metadata.TsFileMetadata; +import org.apache.tsfile.utils.BloomFilter; +import org.apache.tsfile.utils.ReadWriteForEncodingUtils; +import org.apache.tsfile.utils.ReadWriteIOUtils; + +import java.nio.ByteBuffer; +import java.util.Collections; + +public class CompatibilityUtils { + + private CompatibilityUtils() { + // util class + } + + public static DeserializeConfig v3DeserializeConfig = new DeserializeConfig(); + + static { + v3DeserializeConfig.tsFileMetadataBufferDeserializer = + CompatibilityUtils::deserializeTsFileMetadataFromV3; + v3DeserializeConfig.deviceIDBufferDeserializer = + ((buffer, context) -> { + final PlainDeviceID deviceID = PlainDeviceID.deserialize(buffer); + return deviceID.convertToStringArrayDeviceId(); + }); + } + + public static TsFileMetadata deserializeTsFileMetadataFromV3( + ByteBuffer buffer, DeserializeConfig context) { + TsFileMetadata fileMetaData = new TsFileMetadata(); + + // metadataIndex + MetadataIndexNode metadataIndexNode = + context.deviceMetadataIndexNodeBufferDeserializer.deserialize(buffer, context); + fileMetaData.setTableMetadataIndexNodeMap(Collections.singletonMap("", metadataIndexNode)); + + // metaOffset + long metaOffset = ReadWriteIOUtils.readLong(buffer); + fileMetaData.setMetaOffset(metaOffset); + + // read bloom filter + if (buffer.hasRemaining()) { + byte[] bytes = ReadWriteIOUtils.readByteBufferWithSelfDescriptionLength(buffer); + int filterSize = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer); + int hashFunctionSize = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer); + fileMetaData.setBloomFilter( + BloomFilter.buildBloomFilter(bytes, filterSize, hashFunctionSize)); + } + + return fileMetaData; + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/compatibility/DeserializeConfig.java b/java/tsfile/src/main/java/org/apache/tsfile/compatibility/DeserializeConfig.java new file mode 100644 index 000000000..b1c4f18ff --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/compatibility/DeserializeConfig.java @@ -0,0 +1,103 @@ +package org.apache.tsfile.compatibility; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import org.apache.tsfile.file.IMetadataIndexEntry; +import org.apache.tsfile.file.metadata.DeviceMetadataIndexEntry; +import org.apache.tsfile.file.metadata.IDeviceID; +import org.apache.tsfile.file.metadata.MeasurementMetadataIndexEntry; +import org.apache.tsfile.file.metadata.MetadataIndexNode; +import org.apache.tsfile.file.metadata.StringArrayDeviceID; +import org.apache.tsfile.file.metadata.TableSchema; +import org.apache.tsfile.file.metadata.TsFileMetadata; +import org.apache.tsfile.write.schema.MeasurementSchema; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; + +public class DeserializeConfig { + public BufferDeserializer tsFileMetadataBufferDeserializer = + TsFileMetadata::deserializeFrom; + + public BufferDeserializer deviceMetadataIndexNodeBufferDeserializer = + (buffer, context) -> MetadataIndexNode.deserializeFrom(buffer, true, context); + public BufferDeserializer measurementMetadataIndexNodeBufferDeserializer = + (buffer, context) -> MetadataIndexNode.deserializeFrom(buffer, false, context); + public BufferDeserializer deviceMetadataIndexEntryBufferDeserializer = + DeviceMetadataIndexEntry::deserializeFrom; + public BufferDeserializer measurementMetadataIndexEntryBufferDeserializer = + ((buffer, context) -> MeasurementMetadataIndexEntry.deserializeFrom(buffer)); + + public BufferDeserializer tableSchemaBufferDeserializer = TableSchema::deserialize; + public BufferDeserializer measurementSchemaBufferDeserializer = + ((buffer, context) -> MeasurementSchema.deserializeFrom(buffer)); + + public BufferDeserializer deviceIDBufferDeserializer = + ((buffer, context) -> StringArrayDeviceID.deserialize(buffer)); + + // stream deserializers + public StreamDeserializer deviceMetadataIndexNodeStreamDeserializer = + (stream, context) -> MetadataIndexNode.deserializeFrom(stream, true, context); + public StreamDeserializer measurementMetadataIndexNodeStreamDeserializer = + (stream, context) -> MetadataIndexNode.deserializeFrom(stream, false, context); + public StreamDeserializer deviceMetadataIndexEntryStreamDeserializer = + DeviceMetadataIndexEntry::deserializeFrom; + public StreamDeserializer measurementMetadataIndexEntryStreamDeserializer = + ((stream, context) -> MeasurementMetadataIndexEntry.deserializeFrom(stream)); + + public StreamDeserializer deviceIDStreamDeserializer = + ((stream, context) -> StringArrayDeviceID.deserialize(stream)); + + public MetadataIndexNode deserializeMetadataIndexNode(ByteBuffer buffer, boolean isDeviceLevel) { + if (isDeviceLevel) { + return deviceMetadataIndexNodeBufferDeserializer.deserialize(buffer, this); + } else { + return measurementMetadataIndexNodeBufferDeserializer.deserialize(buffer, this); + } + } + + public IMetadataIndexEntry deserializeMetadataIndexEntry( + ByteBuffer buffer, boolean isDeviceLevel) { + if (isDeviceLevel) { + return deviceMetadataIndexEntryBufferDeserializer.deserialize(buffer, this); + } else { + return measurementMetadataIndexEntryBufferDeserializer.deserialize(buffer, this); + } + } + + public MetadataIndexNode deserializeMetadataIndexNode(InputStream stream, boolean isDeviceLevel) + throws IOException { + if (isDeviceLevel) { + return deviceMetadataIndexNodeStreamDeserializer.deserialize(stream, this); + } else { + return measurementMetadataIndexNodeStreamDeserializer.deserialize(stream, this); + } + } + + public IMetadataIndexEntry deserializeMetadataIndexEntry( + InputStream stream, boolean isDeviceLevel) throws IOException { + if (isDeviceLevel) { + return deviceMetadataIndexEntryStreamDeserializer.deserialize(stream, this); + } else { + return measurementMetadataIndexEntryStreamDeserializer.deserialize(stream, this); + } + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/compatibility/StreamDeserializer.java b/java/tsfile/src/main/java/org/apache/tsfile/compatibility/StreamDeserializer.java new file mode 100644 index 000000000..b4870ff8f --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/compatibility/StreamDeserializer.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.compatibility; + +import java.io.IOException; +import java.io.InputStream; + +public interface StreamDeserializer { + T deserialize(InputStream inputStream, DeserializeConfig context) throws IOException; +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/exception/IllegalDeviceIDException.java b/java/tsfile/src/main/java/org/apache/tsfile/exception/IllegalDeviceIDException.java new file mode 100644 index 000000000..73b82be5f --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/exception/IllegalDeviceIDException.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.exception; + +public class IllegalDeviceIDException extends RuntimeException { + + public IllegalDeviceIDException(String message) { + super(message); + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/exception/TsFileRuntimeException.java b/java/tsfile/src/main/java/org/apache/tsfile/exception/TsFileRuntimeException.java index 9bbd16e81..fe9fa934c 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/exception/TsFileRuntimeException.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/exception/TsFileRuntimeException.java @@ -30,4 +30,8 @@ public class TsFileRuntimeException extends RuntimeException { public TsFileRuntimeException(String message) { super(message); } + + public TsFileRuntimeException(Throwable cause) { + super(cause); + } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/exception/read/FileVersionTooOldException.java b/java/tsfile/src/main/java/org/apache/tsfile/exception/read/FileVersionTooOldException.java new file mode 100644 index 000000000..2176f7ffc --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/exception/read/FileVersionTooOldException.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.exception.read; + +import java.io.IOException; + +public class FileVersionTooOldException extends IOException { + + public FileVersionTooOldException(byte currentVersion, byte minimumVersion) { + super( + String.format( + "The current version %d is too old, please at least upgrade to %d", + currentVersion, minimumVersion)); + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/exception/read/NoColumnException.java b/java/tsfile/src/main/java/org/apache/tsfile/exception/read/NoColumnException.java new file mode 100644 index 000000000..ee227092f --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/exception/read/NoColumnException.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.exception.read; + +public class NoColumnException extends ReadProcessException { + + public NoColumnException(String columnName) { + super(String.format("No column: %s", columnName)); + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/exception/read/ReadProcessException.java b/java/tsfile/src/main/java/org/apache/tsfile/exception/read/ReadProcessException.java new file mode 100644 index 000000000..7bd7c77e2 --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/exception/read/ReadProcessException.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.exception.read; + +public class ReadProcessException extends Exception { + + public ReadProcessException(String message) { + super(message); + } + + public ReadProcessException(String message, Throwable cause) { + super(message, cause); + } + + public ReadProcessException(Throwable cause) { + super(cause); + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/exception/read/UnsupportedOrderingException.java b/java/tsfile/src/main/java/org/apache/tsfile/exception/read/UnsupportedOrderingException.java new file mode 100644 index 000000000..6b7120c79 --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/exception/read/UnsupportedOrderingException.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.exception.read; + +public class UnsupportedOrderingException extends ReadProcessException { + + public UnsupportedOrderingException(String ordering) { + super(String.format("Unsupported ordering: %s", ordering)); + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/exception/write/ConflictDataTypeException.java b/java/tsfile/src/main/java/org/apache/tsfile/exception/write/ConflictDataTypeException.java new file mode 100644 index 000000000..da0cee540 --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/exception/write/ConflictDataTypeException.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.exception.write; + +import org.apache.tsfile.enums.TSDataType; + +public class ConflictDataTypeException extends WriteProcessException { + + public ConflictDataTypeException(TSDataType writing, TSDataType registered) { + super( + String.format("Conflict data type: %s (writing) and %s (registered)", writing, registered)); + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/exception/write/NoDeviceException.java b/java/tsfile/src/main/java/org/apache/tsfile/exception/write/NoDeviceException.java new file mode 100644 index 000000000..d72cb829b --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/exception/write/NoDeviceException.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.exception.write; + +/** This exception means it can not find the measurement while writing a TSRecord. */ +public class NoDeviceException extends WriteProcessException { + + private static final long serialVersionUID = -5599767368831572747L; + + public NoDeviceException(String columnName) { + super(String.format("No device for %s", columnName)); + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/exception/write/NoMeasurementException.java b/java/tsfile/src/main/java/org/apache/tsfile/exception/write/NoMeasurementException.java index 5e5745e2f..b7d3f3a98 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/exception/write/NoMeasurementException.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/exception/write/NoMeasurementException.java @@ -24,7 +24,7 @@ public class NoMeasurementException extends WriteProcessException { private static final long serialVersionUID = -5599767368831572747L; - public NoMeasurementException(String msg) { - super(msg); + public NoMeasurementException(String columnName) { + super(String.format("No measurement for %s", columnName)); } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/exception/write/NoTableException.java b/java/tsfile/src/main/java/org/apache/tsfile/exception/write/NoTableException.java new file mode 100644 index 000000000..5098916d0 --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/exception/write/NoTableException.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.exception.write; + +public class NoTableException extends WriteProcessException { + + public NoTableException(String tableName) { + super(String.format("Table %s not found", tableName)); + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/file/IMetadataIndexEntry.java b/java/tsfile/src/main/java/org/apache/tsfile/file/IMetadataIndexEntry.java index cc1cea3bd..4a6f08c28 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/file/IMetadataIndexEntry.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/file/IMetadataIndexEntry.java @@ -33,4 +33,6 @@ public interface IMetadataIndexEntry { Comparable getCompareKey(); boolean isDeviceLevel(); + + int serializedSize(); } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/file/header/ChunkGroupHeader.java b/java/tsfile/src/main/java/org/apache/tsfile/file/header/ChunkGroupHeader.java index 8e327af61..7a424f89d 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/file/header/ChunkGroupHeader.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/file/header/ChunkGroupHeader.java @@ -19,10 +19,8 @@ package org.apache.tsfile.file.header; -import org.apache.tsfile.common.conf.TSFileConfig; import org.apache.tsfile.file.MetaMarker; import org.apache.tsfile.file.metadata.IDeviceID; -import org.apache.tsfile.file.metadata.PlainDeviceID; import org.apache.tsfile.read.reader.TsFileInput; import org.apache.tsfile.utils.ReadWriteForEncodingUtils; import org.apache.tsfile.utils.ReadWriteIOUtils; @@ -56,8 +54,7 @@ public int getSerializedSize() { private int getSerializedSize(IDeviceID deviceID) { // TODO: add an interface in IDeviceID - int length = - ((PlainDeviceID) deviceID).toStringID().getBytes(TSFileConfig.STRING_CHARSET).length; + int length = deviceID.serializedSize(); return Byte.BYTES + ReadWriteForEncodingUtils.varIntSize(length) + length; } @@ -77,11 +74,9 @@ public static ChunkGroupHeader deserializeFrom(InputStream inputStream, boolean } // TODO: add an interface in IDeviceID - String deviceID = ReadWriteIOUtils.readVarIntString(inputStream); - if (deviceID == null || deviceID.isEmpty()) { - throw new IOException("DeviceId is empty"); - } - return new ChunkGroupHeader(new PlainDeviceID(deviceID)); + final IDeviceID deviceID = + IDeviceID.Deserializer.DEFAULT_DESERIALIZER.deserializeFrom(inputStream); + return new ChunkGroupHeader(deviceID); } /** @@ -96,9 +91,11 @@ public static ChunkGroupHeader deserializeFrom(TsFileInput input, long offset, b if (!markerRead) { offsetVar++; } - // TODO: add an interface in IDeviceID - String deviceID = input.readVarIntString(offsetVar); - return new ChunkGroupHeader(new PlainDeviceID(deviceID)); + input.position(offsetVar); + final InputStream inputStream = input.wrapAsInputStream(); + final IDeviceID deviceID = + IDeviceID.Deserializer.DEFAULT_DESERIALIZER.deserializeFrom(inputStream); + return new ChunkGroupHeader(deviceID); } public IDeviceID getDeviceID() { @@ -115,7 +112,7 @@ public IDeviceID getDeviceID() { public int serializeTo(OutputStream outputStream) throws IOException { int length = 0; length += ReadWriteIOUtils.write(MARKER, outputStream); - length += ReadWriteIOUtils.writeVar(((PlainDeviceID) deviceID).toStringID(), outputStream); + length += deviceID.serialize(outputStream); return length; } @@ -123,7 +120,7 @@ public int serializeTo(OutputStream outputStream) throws IOException { public String toString() { return "ChunkGroupHeader{" + "deviceID='" - + ((PlainDeviceID) deviceID).toStringID() + + deviceID + '\'' + ", serializedSize=" + serializedSize diff --git a/java/tsfile/src/main/java/org/apache/tsfile/file/header/PageHeader.java b/java/tsfile/src/main/java/org/apache/tsfile/file/header/PageHeader.java index 70387419f..78d80f32f 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/file/header/PageHeader.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/file/header/PageHeader.java @@ -107,6 +107,12 @@ public long getNumOfValues() { return statistics.getCount(); } + public int getSerializedSize() { + return ReadWriteForEncodingUtils.varIntSize(uncompressedSize) + + ReadWriteForEncodingUtils.varIntSize(compressedSize) + + (statistics != null ? statistics.getSerializedSize() : 0); + } + @Override public Statistics getStatistics() { return statistics; diff --git a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/ChunkMetadata.java b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/ChunkMetadata.java index e491d7bb2..5ce097fed 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/ChunkMetadata.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/ChunkMetadata.java @@ -20,11 +20,14 @@ package org.apache.tsfile.file.metadata; import org.apache.tsfile.enums.TSDataType; +import org.apache.tsfile.file.metadata.enums.CompressionType; +import org.apache.tsfile.file.metadata.enums.TSEncoding; import org.apache.tsfile.file.metadata.statistics.Statistics; import org.apache.tsfile.read.common.TimeRange; import org.apache.tsfile.read.controller.IChunkLoader; import org.apache.tsfile.utils.RamUsageEstimator; import org.apache.tsfile.utils.ReadWriteIOUtils; +import org.apache.tsfile.write.schema.MeasurementSchema; import java.io.IOException; import java.io.InputStream; @@ -55,6 +58,10 @@ public class ChunkMetadata implements IChunkMetadata { private TSDataType tsDataType; + // the following two are not serialized and only used during write + private TSEncoding encoding; + private CompressionType compressionType; + /** * version is used to define the order of operations(insertion, deletion, update). version is set * according to its belonging ChunkGroup only when being queried, so it is not persisted. @@ -93,12 +100,16 @@ public ChunkMetadata() {} public ChunkMetadata( String measurementUid, TSDataType tsDataType, + TSEncoding encoding, + CompressionType compressionType, long fileOffset, Statistics statistics) { this.measurementUid = measurementUid; this.tsDataType = tsDataType; this.offsetOfChunkHeader = fileOffset; this.statistics = statistics; + this.encoding = encoding; + this.compressionType = compressionType; } // won't clone deleteIntervalList & modified @@ -111,6 +122,8 @@ public ChunkMetadata(ChunkMetadata other) { this.isSeq = other.isSeq; this.isClosed = other.isClosed; this.mask = other.mask; + this.encoding = other.encoding; + this.compressionType = other.compressionType; } @Override @@ -171,6 +184,14 @@ public int serializeTo(OutputStream outputStream, boolean serializeStatistic) th return byteLen; } + public int serializedSize(boolean includeStatistics) { + int cnt = Long.BYTES; // offsetOfChunkHeader + if (includeStatistics) { + cnt += statistics.getSerializedSize(); + } + return cnt; + } + /** * deserialize from ByteBuffer. * @@ -384,4 +405,9 @@ public Optional> getMeasurementStatistics( public boolean hasNullValue(int measurementIndex) { return false; } + + // TODO: replaces fields in this class with MeasurementSchema + public MeasurementSchema toMeasurementSchema() { + return new MeasurementSchema(measurementUid, tsDataType, encoding, compressionType); + } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/DeviceMetadataIndexEntry.java b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/DeviceMetadataIndexEntry.java index 9993cb7ac..3c7c146e2 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/DeviceMetadataIndexEntry.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/DeviceMetadataIndexEntry.java @@ -19,6 +19,7 @@ package org.apache.tsfile.file.metadata; +import org.apache.tsfile.compatibility.DeserializeConfig; import org.apache.tsfile.file.IMetadataIndexEntry; import org.apache.tsfile.utils.ReadWriteIOUtils; @@ -28,6 +29,7 @@ import java.nio.ByteBuffer; public class DeviceMetadataIndexEntry implements IMetadataIndexEntry { + private IDeviceID deviceID; private long offset; @@ -72,19 +74,25 @@ public boolean isDeviceLevel() { return true; } - public static DeviceMetadataIndexEntry deserializeFrom(ByteBuffer buffer) { - IDeviceID device = IDeviceID.deserializeFrom(buffer); + public static DeviceMetadataIndexEntry deserializeFrom( + ByteBuffer buffer, DeserializeConfig context) { + IDeviceID device = context.deviceIDBufferDeserializer.deserialize(buffer, context); long offset = ReadWriteIOUtils.readLong(buffer); return new DeviceMetadataIndexEntry(device, offset); } - public static DeviceMetadataIndexEntry deserializeFrom(InputStream inputStream) - throws IOException { - IDeviceID device = IDeviceID.deserializeFrom(inputStream); + public static DeviceMetadataIndexEntry deserializeFrom( + InputStream inputStream, DeserializeConfig config) throws IOException { + IDeviceID device = config.deviceIDStreamDeserializer.deserialize(inputStream, config); long offset = ReadWriteIOUtils.readLong(inputStream); return new DeviceMetadataIndexEntry(device, offset); } + @Override + public int serializedSize() { + return deviceID.serializedSize() + Long.BYTES; // offset + } + @Override public String toString() { return "<" + deviceID + "," + offset + ">"; diff --git a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/IDeviceID.java b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/IDeviceID.java index 04d73c19a..87e04cc06 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/IDeviceID.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/IDeviceID.java @@ -19,16 +19,23 @@ package org.apache.tsfile.file.metadata; +import org.apache.tsfile.common.constant.TsFileConstant; import org.apache.tsfile.utils.Accountable; -import org.apache.tsfile.utils.ReadWriteIOUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.io.Serializable; import java.nio.ByteBuffer; /** Device id interface. */ -public interface IDeviceID extends Comparable, Accountable { +public interface IDeviceID extends Comparable, Accountable, Serializable { + + Logger LOGGER = LoggerFactory.getLogger(IDeviceID.class); int serialize(ByteBuffer byteBuffer); @@ -38,11 +45,126 @@ public interface IDeviceID extends Comparable, Accountable { boolean isEmpty(); - static IDeviceID deserializeFrom(ByteBuffer byteBuffer) { - return new PlainDeviceID(ReadWriteIOUtils.readVarIntString(byteBuffer)); + /** + * @return the table name associated with the device. For a path-DeviceId, like "root.a.b.c.d", it + * is converted according to a fixed rule, like assuming the first three levels ("root.a.b") + * as the table name; for a tuple-deviceId, like "(table1, beijing, turbine)", it is the first + * element in the deviceId, namely "table1". + */ + String getTableName(); + + /** + * @return how many segments this DeviceId consists of. For a path-DeviceId, like "root.a.b.c.d", + * it is 5; fot a tuple-DeviceId, like "(table1, beijing, turbine)", it is 3. + */ + int segmentNum(); + + /** + * @param i the sequence number of the segment that should be returned. + * @return i-th segment in this DeviceId. + * @throws ArrayIndexOutOfBoundsException if i >= segmentNum(). + */ + Object segment(int i); + + default int serializedSize() { + LOGGER.debug( + "Using default inefficient implementation of serialized size by {}", this.getClass()); + try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) { + serialize(baos); + return baos.size(); + } catch (IOException e) { + LOGGER.error("Failed to serialize device ID: {}", this, e); + return -1; + } + } + + default boolean startWith(String prefix) { + return startWith(prefix, false); + } + + /** + * @param prefix prefix to be matched, with "." as segment separator + * @param matchEntireSegment if true, the prefix should match entire segments. E.g., "root.a.b" + * matches ("root", "a", "bb") if matchEntireSegment is false; but it mismatches when + * matchEntireSegment is true. + * @return true if the prefix can be matched, false other with + */ + default boolean startWith(String prefix, boolean matchEntireSegment) { + int currSegment = 0; + int matchedPos = 0; + while (currSegment < segmentNum()) { + String segmentString = segment(currSegment).toString(); + String remainingPrefix = prefix.substring(matchedPos); + if (segmentString.startsWith(remainingPrefix)) { + // ("root.a.b","c","d") matches "root.a", "root.a.b", "root.a.b.c", "root.a.b.c.d" + if (matchEntireSegment) { + // ("root.a.b","c","d") matches "root.a.b", "root.a.b.c", "root.a.b.c.d" + return segmentString.equals(remainingPrefix); + } + return true; + } + if (!remainingPrefix.startsWith(segmentString)) { + // ("root.a.b","c","d") mismatches "root.b", "root.a.b.d", "root.a.b.c.e" + return false; + } + // the current segment is fully matched + matchedPos += segmentString.length(); + // check path separator + if (prefix.charAt(matchedPos) != TsFileConstant.PATH_SEPARATOR_CHAR) { + // ("root.a.b","c","d") mismatches "root.a.bb", "root.a.b.cc", "root.a.b.c.dd" + return false; + } + // path separator is matched, move to the next segment + matchedPos++; + currSegment++; + } + // ("root.a.b","c","d") mismatches "root.a.b.c.d.e" + return false; + } + + default Object[] getSegments() { + final Object[] segments = new Object[segmentNum()]; + for (int i = 0; i < segmentNum(); i++) { + segments[i] = segment(i); + } + return segments; } - static IDeviceID deserializeFrom(InputStream inputStream) throws IOException { - return new PlainDeviceID(ReadWriteIOUtils.readVarIntString(inputStream)); + default boolean matchDatabaseName(String databaseName) { + String tableName = getTableName(); + if (tableName.startsWith(databaseName) + // root.aa.bb matches root.aa.bb + && (tableName.length() == databaseName.length() + || + // root.aa.bb matches root.aa + tableName.charAt(databaseName.length()) == TsFileConstant.PATH_SEPARATOR_CHAR)) { + return true; + } + // root.aa mismatches root.a + return startWith(databaseName, true); + } + + interface Deserializer { + + IDeviceID deserializeFrom(ByteBuffer byteBuffer); + + IDeviceID deserializeFrom(InputStream inputStream) throws IOException; + + Deserializer DEFAULT_DESERIALIZER = StringArrayDeviceID.getDESERIALIZER(); + } + + interface Factory { + + IDeviceID create(String deviceIdString); + + /** + * The first segment is the table name and the rests are id columns. + * + * @param segments Example: ["table0", "id0", "id1"] + * @return a deviceId corresponding to the segments + */ + IDeviceID create(String[] segments); + + Factory DEFAULT_FACTORY = StringArrayDeviceID.getFACTORY(); } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/LogicalTableSchema.java b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/LogicalTableSchema.java new file mode 100644 index 000000000..2db594735 --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/LogicalTableSchema.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.file.metadata; + +import org.apache.tsfile.enums.TSDataType; +import org.apache.tsfile.file.metadata.enums.CompressionType; +import org.apache.tsfile.file.metadata.enums.TSEncoding; +import org.apache.tsfile.write.record.Tablet.ColumnType; +import org.apache.tsfile.write.schema.IMeasurementSchema; +import org.apache.tsfile.write.schema.MeasurementSchema; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.List; + +/** + * TableSchema for devices with path-based DeviceIds. It generates the Id columns based on the max + * level of paths. + */ +public class LogicalTableSchema extends TableSchema { + + private int maxLevel; + + public LogicalTableSchema(String tableName) { + super(tableName); + } + + @Override + public void update(ChunkGroupMetadata chunkGroupMetadata) { + super.update(chunkGroupMetadata); + this.maxLevel = Math.max(this.maxLevel, chunkGroupMetadata.getDevice().segmentNum()); + } + + private List generateIdColumns() { + List generatedIdColumns = new ArrayList<>(); + // level 0 is table name, not id column + for (int i = 1; i < maxLevel; i++) { + generatedIdColumns.add( + new MeasurementSchema( + "__level" + i, TSDataType.STRING, TSEncoding.PLAIN, CompressionType.UNCOMPRESSED)); + } + return generatedIdColumns; + } + + public void finalizeColumnSchema() { + if (!updatable) { + return; + } + + List allColumns = new ArrayList<>(generateIdColumns()); + List allColumnTypes = ColumnType.nCopy(ColumnType.ID, allColumns.size()); + allColumns.addAll(columnSchemas); + allColumnTypes.addAll(columnTypes); + columnSchemas = allColumns; + columnTypes = allColumnTypes; + updatable = false; + } + + @Override + public int serialize(OutputStream out) throws IOException { + finalizeColumnSchema(); + return super.serialize(out); + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/MeasurementMetadataIndexEntry.java b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/MeasurementMetadataIndexEntry.java index fbb4b4a37..4a794cfd3 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/MeasurementMetadataIndexEntry.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/MeasurementMetadataIndexEntry.java @@ -20,6 +20,7 @@ package org.apache.tsfile.file.metadata; import org.apache.tsfile.file.IMetadataIndexEntry; +import org.apache.tsfile.utils.ReadWriteForEncodingUtils; import org.apache.tsfile.utils.ReadWriteIOUtils; import java.io.IOException; @@ -86,6 +87,11 @@ public static MeasurementMetadataIndexEntry deserializeFrom(InputStream inputStr return new MeasurementMetadataIndexEntry(name, offset); } + @Override + public int serializedSize() { + return ReadWriteForEncodingUtils.varIntStringSize(name) + Long.BYTES; // offset + } + @Override public String toString() { return "<" + name + "," + offset + ">"; diff --git a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/MetadataIndexConstructor.java b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/MetadataIndexConstructor.java index 3941ec4f4..b1afd6797 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/MetadataIndexConstructor.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/MetadataIndexConstructor.java @@ -88,6 +88,17 @@ public static MetadataIndexNode constructMetadataIndex( return checkAndBuildLevelIndex(deviceMetadataIndexMap, out); } + public static Map> splitDeviceByTable( + Map deviceMetadataIndexMap) { + Map> result = new TreeMap<>(); + for (Entry entry : deviceMetadataIndexMap.entrySet()) { + IDeviceID deviceID = entry.getKey(); + String tableName = deviceID.getTableName(); + result.computeIfAbsent(tableName, tName -> new TreeMap<>()).put(deviceID, entry.getValue()); + } + return result; + } + public static MetadataIndexNode checkAndBuildLevelIndex( Map deviceMetadataIndexMap, TsFileOutput out) throws IOException { diff --git a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/MetadataIndexNode.java b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/MetadataIndexNode.java index 2034ad4f2..693b4ffb1 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/MetadataIndexNode.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/MetadataIndexNode.java @@ -21,6 +21,7 @@ import org.apache.tsfile.common.conf.TSFileConfig; import org.apache.tsfile.common.conf.TSFileDescriptor; +import org.apache.tsfile.compatibility.DeserializeConfig; import org.apache.tsfile.file.IMetadataIndexEntry; import org.apache.tsfile.file.metadata.enums.MetadataIndexNodeType; import org.apache.tsfile.utils.Pair; @@ -98,15 +99,12 @@ public int serializeTo(OutputStream outputStream) throws IOException { return byteLen; } - public static MetadataIndexNode deserializeFrom(ByteBuffer buffer, boolean isDeviceLevel) { + public static MetadataIndexNode deserializeFrom( + ByteBuffer buffer, boolean isDeviceLevel, DeserializeConfig context) { List children = new ArrayList<>(); int size = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer); for (int i = 0; i < size; i++) { - if (isDeviceLevel) { - children.add(DeviceMetadataIndexEntry.deserializeFrom(buffer)); - } else { - children.add(MeasurementMetadataIndexEntry.deserializeFrom(buffer)); - } + children.add(context.deserializeMetadataIndexEntry(buffer, isDeviceLevel)); } long offset = ReadWriteIOUtils.readLong(buffer); MetadataIndexNodeType nodeType = @@ -114,16 +112,12 @@ public static MetadataIndexNode deserializeFrom(ByteBuffer buffer, boolean isDev return new MetadataIndexNode(children, offset, nodeType); } - public static MetadataIndexNode deserializeFrom(InputStream inputStream, boolean isDeviceLevel) - throws IOException { + public static MetadataIndexNode deserializeFrom( + InputStream inputStream, boolean isDeviceLevel, DeserializeConfig config) throws IOException { List children = new ArrayList<>(); int size = ReadWriteForEncodingUtils.readUnsignedVarInt(inputStream); for (int i = 0; i < size; i++) { - if (isDeviceLevel) { - children.add(DeviceMetadataIndexEntry.deserializeFrom(inputStream)); - } else { - children.add(MeasurementMetadataIndexEntry.deserializeFrom(inputStream)); - } + children.add(config.deserializeMetadataIndexEntry(inputStream, isDeviceLevel)); } long offset = ReadWriteIOUtils.readLong(inputStream); MetadataIndexNodeType nodeType = diff --git a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/PlainDeviceID.java b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/PlainDeviceID.java index eb922521e..f36255fe5 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/PlainDeviceID.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/PlainDeviceID.java @@ -19,23 +19,30 @@ package org.apache.tsfile.file.metadata; +import org.apache.tsfile.common.conf.TSFileConfig; +import org.apache.tsfile.common.constant.TsFileConstant; import org.apache.tsfile.utils.RamUsageEstimator; import org.apache.tsfile.utils.ReadWriteIOUtils; import java.io.IOException; +import java.io.InputStream; import java.io.OutputStream; import java.nio.ByteBuffer; import java.util.Objects; import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfCharArray; +// TODO: rename to PathDeviceID (countering TupleDeviceID or ArrayDeviceID) /** Using device id path as id. */ public class PlainDeviceID implements IDeviceID { private static final long INSTANCE_SIZE = RamUsageEstimator.shallowSizeOfInstance(PlainDeviceID.class) + + RamUsageEstimator.shallowSizeOfInstance(String.class) + RamUsageEstimator.shallowSizeOfInstance(String.class); private final String deviceID; + private String tableName; + private String[] segments; public PlainDeviceID(String deviceID) { this.deviceID = deviceID; @@ -59,16 +66,16 @@ public int hashCode() { } public String toString() { - return "PlainDeviceID{" + "deviceID='" + deviceID + '\'' + '}'; + return deviceID; } public String toStringID() { - return deviceID; + return toString(); } @Override public int serialize(ByteBuffer byteBuffer) { - return ReadWriteIOUtils.write(deviceID, byteBuffer); + return ReadWriteIOUtils.writeVar(deviceID, byteBuffer); } @Override @@ -88,11 +95,27 @@ public boolean isEmpty() { @Override public long ramBytesUsed() { - return INSTANCE_SIZE + sizeOfCharArray(deviceID.length()); + long size = INSTANCE_SIZE; + size += sizeOfCharArray(deviceID.length()); + if (tableName != null) { + size += sizeOfCharArray(tableName.length()); + } + if (segments != null) { + size += RamUsageEstimator.sizeOf(segments); + } + return size; } public static PlainDeviceID deserialize(ByteBuffer byteBuffer) { - return new PlainDeviceID(ReadWriteIOUtils.readString(byteBuffer)); + return new PlainDeviceID(ReadWriteIOUtils.readVarIntString(byteBuffer)); + } + + public static PlainDeviceID deserialize(InputStream inputStream) throws IOException { + return new PlainDeviceID(ReadWriteIOUtils.readVarIntString(inputStream)); + } + + public StringArrayDeviceID convertToStringArrayDeviceId() { + return new StringArrayDeviceID(deviceID); } @Override @@ -102,4 +125,64 @@ public int compareTo(IDeviceID other) { } return deviceID.compareTo(((PlainDeviceID) other).deviceID); } + + @Override + public String getTableName() { + if (tableName != null) { + return tableName; + } + + int lastSeparatorPos = -1; + int separatorNum = 0; + + for (int i = 0; i < deviceID.length(); i++) { + if (deviceID.charAt(i) == TsFileConstant.PATH_SEPARATOR_CHAR) { + lastSeparatorPos = i; + separatorNum++; + if (separatorNum == TSFileConfig.DEFAULT_SEGMENT_NUM_FOR_TABLE_NAME) { + break; + } + } + } + if (lastSeparatorPos == -1) { + // not find even one separator, probably during a test, use the deviceId as the tableName + tableName = deviceID; + } else { + // use the first DEFAULT_SEGMENT_NUM_FOR_TABLE_NAME segments or all segments but the last + // one as the table name + tableName = deviceID.substring(0, lastSeparatorPos); + } + + return tableName; + } + + @Override + public int segmentNum() { + if (segments != null) { + return segments.length; + } + segments = deviceID.split(TsFileConstant.PATH_SEPARATER_NO_REGEX); + return segments.length; + } + + @Override + public String segment(int i) { + if (i >= segmentNum()) { + throw new ArrayIndexOutOfBoundsException(i); + } + return segments[i]; + } + + public static class Factory implements IDeviceID.Factory { + + @Override + public IDeviceID create(String deviceIdString) { + return new PlainDeviceID(deviceIdString); + } + + @Override + public IDeviceID create(String[] segments) { + return new PlainDeviceID(String.join(TsFileConstant.PATH_SEPARATOR, segments)); + } + } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/StringArrayDeviceID.java b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/StringArrayDeviceID.java new file mode 100644 index 000000000..020b54897 --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/StringArrayDeviceID.java @@ -0,0 +1,304 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.file.metadata; + +import org.apache.tsfile.common.conf.TSFileConfig; +import org.apache.tsfile.exception.IllegalDeviceIDException; +import org.apache.tsfile.exception.TsFileRuntimeException; +import org.apache.tsfile.read.common.parser.PathNodesGenerator; +import org.apache.tsfile.utils.RamUsageEstimator; +import org.apache.tsfile.utils.ReadWriteForEncodingUtils; +import org.apache.tsfile.utils.ReadWriteIOUtils; +import org.apache.tsfile.utils.WriteUtils; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; + +import static org.apache.tsfile.common.constant.TsFileConstant.PATH_SEPARATOR; + +public class StringArrayDeviceID implements IDeviceID { + + private int serializedSize = -1; + + private static final Deserializer DESERIALIZER = + new Deserializer() { + @Override + public IDeviceID deserializeFrom(ByteBuffer byteBuffer) { + return deserialize(byteBuffer); + } + + @Override + public IDeviceID deserializeFrom(InputStream inputStream) throws IOException { + return deserialize(inputStream); + } + }; + + private static final Factory FACTORY = + new Factory() { + @Override + public IDeviceID create(String deviceIdString) { + return new StringArrayDeviceID(deviceIdString); + } + + @Override + public IDeviceID create(String[] segments) { + return new StringArrayDeviceID(segments); + } + }; + + private static final long INSTANCE_SIZE = + RamUsageEstimator.shallowSizeOfInstance(StringArrayDeviceID.class); + + // TODO: change to Object[] and rename to just ArrayDeviceID + // or we can just use a tuple like Relational DB. + private final String[] segments; + + public StringArrayDeviceID(String... segments) { + this.segments = formalize(segments); + } + + public StringArrayDeviceID(String deviceIdString) { + this.segments = splitDeviceIdString(deviceIdString); + } + + private String[] formalize(String[] segments) { + // remove tailing nulls + int i = segments.length - 1; + for (; i >= 0; i--) { + if (segments[i] != null) { + break; + } + } + if (i < 0) { + throw new IllegalDeviceIDException("All segments are null"); + } + if (i != segments.length - 1) { + segments = Arrays.copyOf(segments, i + 1); + } + return segments; + } + + @SuppressWarnings("java:S125") // confusing comments with codes + private static String[] splitDeviceIdString(String deviceIdString) { + List splits = Arrays.asList(PathNodesGenerator.splitPathToNodes(deviceIdString)); + int segmentCnt = splits.size(); + + String tableName; + String[] segments; + // assuming DEFAULT_SEGMENT_NUM_FOR_TABLE_NAME = 3 + if (segmentCnt == 1) { + // "root" -> {"root"} + segments = new String[1]; + segments[0] = splits.get(0); + } else if (segmentCnt < TSFileConfig.DEFAULT_SEGMENT_NUM_FOR_TABLE_NAME + 1) { + // "root.a" -> {"root", "a"} + // "root.a.b" -> {"root.a", "b"} + tableName = + segmentCnt == 1 ? "" : String.join(PATH_SEPARATOR, splits.subList(0, segmentCnt - 1)); + segments = new String[2]; + segments[0] = tableName; + segments[1] = splits.get(segmentCnt - 1); + } else { + // "root.a.b.c" -> {"root.a.b", "c"} + // "root.a.b.c.d" -> {"root.a.b", "c", "d"} + tableName = + String.join( + PATH_SEPARATOR, splits.subList(0, TSFileConfig.DEFAULT_SEGMENT_NUM_FOR_TABLE_NAME)); + String[] idSegments = + splits + .subList(TSFileConfig.DEFAULT_SEGMENT_NUM_FOR_TABLE_NAME, splits.size()) + .toArray(new String[0]); + segments = new String[idSegments.length + 1]; + segments[0] = tableName; + System.arraycopy(idSegments, 0, segments, 1, idSegments.length); + } + + return segments; + } + + public static Deserializer getDESERIALIZER() { + return DESERIALIZER; + } + + public static Factory getFACTORY() { + return FACTORY; + } + + @Override + public int serialize(ByteBuffer byteBuffer) { + int cnt = 0; + cnt += ReadWriteForEncodingUtils.writeUnsignedVarInt(segments.length, byteBuffer); + for (String segment : segments) { + cnt += ReadWriteIOUtils.writeVar(segment, byteBuffer); + } + return cnt; + } + + @Override + public int serialize(OutputStream outputStream) throws IOException { + int cnt = 0; + cnt += ReadWriteForEncodingUtils.writeUnsignedVarInt(segments.length, outputStream); + for (String segment : segments) { + cnt += ReadWriteIOUtils.writeVar(segment, outputStream); + } + return cnt; + } + + public static StringArrayDeviceID deserialize(ByteBuffer byteBuffer) { + final int cnt = ReadWriteForEncodingUtils.readUnsignedVarInt(byteBuffer); + if (cnt == 0) { + return new StringArrayDeviceID(new String[] {""}); + } + + String[] segments = new String[cnt]; + for (int i = 0; i < cnt; i++) { + segments[i] = ReadWriteIOUtils.readVarIntString(byteBuffer); + } + return new StringArrayDeviceID(segments); + } + + public static StringArrayDeviceID deserialize(InputStream stream) throws IOException { + final int cnt = ReadWriteForEncodingUtils.readUnsignedVarInt(stream); + if (cnt == 0) { + return new StringArrayDeviceID(new String[] {""}); + } + + String[] segments = new String[cnt]; + for (int i = 0; i < cnt; i++) { + segments[i] = ReadWriteIOUtils.readVarIntString(stream); + } + return new StringArrayDeviceID(segments); + } + + @Override + public byte[] getBytes() { + ByteArrayOutputStream publicBAOS = new ByteArrayOutputStream(256); + try { + serialize(publicBAOS); + } catch (IOException e) { + throw new TsFileRuntimeException(e); + } + return publicBAOS.toByteArray(); + } + + @Override + public boolean isEmpty() { + return segments == null || segments.length == 0; + } + + @Override + public String getTableName() { + return segments[0]; + } + + @Override + public int segmentNum() { + return segments.length; + } + + @Override + public String segment(int i) { + return segments[i]; + } + + @Override + public int compareTo(IDeviceID o) { + int thisSegmentNum = segmentNum(); + int otherSegmentNum = o.segmentNum(); + for (int i = 0; i < thisSegmentNum; i++) { + if (i >= otherSegmentNum) { + // the other ID is a prefix of this one + return 1; + } + final int comp = + Objects.compare(this.segment(i), ((String) o.segment(i)), WriteUtils::compareStrings); + if (comp != 0) { + // the partial comparison has a result + return comp; + } + } + + if (thisSegmentNum < otherSegmentNum) { + // this ID is a prefix of the other one + return -1; + } + + // two ID equal + return 0; + } + + @Override + public long ramBytesUsed() { + return INSTANCE_SIZE + RamUsageEstimator.sizeOf(segments); + } + + @Override + public int serializedSize() { + if (serializedSize != -1) { + return serializedSize; + } + + int cnt = ReadWriteForEncodingUtils.varIntSize(segments.length); + for (String segment : segments) { + if (segment != null) { + byte[] bytes = segment.getBytes(TSFileConfig.STRING_CHARSET); + cnt += ReadWriteForEncodingUtils.varIntSize(bytes.length); + cnt += bytes.length; + } else { + cnt += ReadWriteForEncodingUtils.varIntSize(ReadWriteIOUtils.NO_BYTE_TO_READ); + } + } + serializedSize = cnt; + return cnt; + } + + @Override + public String toString() { + return String.join(".", segments); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + StringArrayDeviceID deviceID = (StringArrayDeviceID) o; + return Objects.deepEquals(segments, deviceID.segments); + } + + @Override + public int hashCode() { + return Arrays.hashCode(segments); + } + + @Override + public String[] getSegments() { + return segments; + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/TableSchema.java b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/TableSchema.java new file mode 100644 index 000000000..474c4a032 --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/TableSchema.java @@ -0,0 +1,197 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.file.metadata; + +import org.apache.tsfile.compatibility.DeserializeConfig; +import org.apache.tsfile.enums.TSDataType; +import org.apache.tsfile.utils.ReadWriteForEncodingUtils; +import org.apache.tsfile.utils.ReadWriteIOUtils; +import org.apache.tsfile.write.record.Tablet.ColumnType; +import org.apache.tsfile.write.schema.IMeasurementSchema; +import org.apache.tsfile.write.schema.MeasurementSchema; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +public class TableSchema { + // the tableName is not serialized since the TableSchema is always stored in a Map, from whose + // key the tableName can be known + protected String tableName; + protected List columnSchemas; + protected List columnTypes; + protected boolean updatable = false; + + // columnName -> pos in columnSchemas; + private Map columnPosIndex; + + public TableSchema(String tableName) { + this.tableName = tableName; + this.columnSchemas = new ArrayList<>(); + this.columnTypes = new ArrayList<>(); + this.updatable = true; + } + + public TableSchema( + String tableName, List columnSchemas, List columnTypes) { + this.tableName = tableName; + this.columnSchemas = columnSchemas; + this.columnTypes = columnTypes; + } + + public Map getColumnPosIndex() { + if (columnPosIndex == null) { + columnPosIndex = new HashMap<>(); + } + return columnPosIndex; + } + + public int findColumnIndex(String columnName) { + return getColumnPosIndex() + .computeIfAbsent( + columnName, + colName -> { + for (int i = 0; i < columnSchemas.size(); i++) { + if (columnSchemas.get(i).getMeasurementId().equals(columnName)) { + return i; + } + } + return -1; + }); + } + + public IMeasurementSchema findColumnSchema(String columnName) { + final int columnIndex = findColumnIndex(columnName); + return columnIndex >= 0 ? columnSchemas.get(columnIndex) : null; + } + + public void update(ChunkGroupMetadata chunkGroupMetadata) { + if (!updatable) { + return; + } + + for (ChunkMetadata chunkMetadata : chunkGroupMetadata.getChunkMetadataList()) { + int columnIndex = findColumnIndex(chunkMetadata.getMeasurementUid()); + // if the measurement is not found in the column list, add it + if (columnIndex == -1) { + columnSchemas.add(chunkMetadata.toMeasurementSchema()); + columnTypes.add(ColumnType.MEASUREMENT); + getColumnPosIndex().put(chunkMetadata.getMeasurementUid(), columnSchemas.size() - 1); + } else { + final IMeasurementSchema originSchema = columnSchemas.get(columnIndex); + if (originSchema.getType() != chunkMetadata.getDataType()) { + originSchema.setType(TSDataType.STRING); + } + } + } + } + + public List getColumnSchemas() { + return columnSchemas; + } + + public List getColumnTypes() { + return columnTypes; + } + + public int serialize(OutputStream out) throws IOException { + int cnt = 0; + if (columnSchemas != null) { + cnt += ReadWriteForEncodingUtils.writeUnsignedVarInt(columnSchemas.size(), out); + for (int i = 0; i < columnSchemas.size(); i++) { + IMeasurementSchema columnSchema = columnSchemas.get(i); + ColumnType columnType = columnTypes.get(i); + cnt += columnSchema.serializeTo(out); + cnt += ReadWriteIOUtils.write(columnType.ordinal(), out); + } + } else { + cnt += ReadWriteForEncodingUtils.writeUnsignedVarInt(0, out); + } + + return cnt; + } + + public int serializedSize() { + try { + return serialize(new ByteArrayOutputStream()); + } catch (IOException e) { + return -1; + } + } + + public static TableSchema deserialize(ByteBuffer buffer, DeserializeConfig context) { + final int columnCnt = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer); + List measurementSchemas = new ArrayList<>(columnCnt); + List columnTypes = new ArrayList<>(); + for (int i = 0; i < columnCnt; i++) { + MeasurementSchema measurementSchema = + context.measurementSchemaBufferDeserializer.deserialize(buffer, context); + measurementSchemas.add(measurementSchema); + columnTypes.add(ColumnType.values()[buffer.getInt()]); + } + return new TableSchema(null, measurementSchemas, columnTypes); + } + + public String getTableName() { + return tableName; + } + + public void setTableName(String tableName) { + this.tableName = tableName; + } + + @Override + public String toString() { + return "TableSchema{" + + "tableName='" + + tableName + + '\'' + + ", columnSchemas=" + + columnSchemas + + ", columnTypes=" + + columnTypes + + '}'; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof TableSchema)) { + return false; + } + TableSchema that = (TableSchema) o; + return Objects.equals(tableName, that.tableName) + && Objects.equals(columnSchemas, that.columnSchemas) + && Objects.equals(columnTypes, that.columnTypes); + } + + @Override + public int hashCode() { + return Objects.hash(tableName, columnSchemas, columnTypes); + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/TimeseriesMetadata.java b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/TimeseriesMetadata.java index 17a326e01..e59fd9dc2 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/TimeseriesMetadata.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/TimeseriesMetadata.java @@ -19,6 +19,7 @@ package org.apache.tsfile.file.metadata; +import org.apache.tsfile.common.conf.TSFileConfig; import org.apache.tsfile.enums.TSDataType; import org.apache.tsfile.file.metadata.statistics.Statistics; import org.apache.tsfile.read.controller.IChunkMetadataLoader; @@ -356,6 +357,21 @@ public long getRetainedSizeInBytes() { return retainedSize; } + public int serializedSizeWithoutMetadata() { + final byte[] bytes = measurementId.getBytes(TSFileConfig.STRING_CHARSET); + return Byte.BYTES + + // metadata type + ReadWriteForEncodingUtils.varIntSize(bytes.length) + + // measurementId size + bytes.length + + // measurmentId bytes + Byte.BYTES + + // data type + ReadWriteForEncodingUtils.uVarIntSize(chunkMetaDataListDataSize) + + // ChunkMetadata num + statistics.getSerializedSize(); // statistic + } + @Override public String toString() { return "TimeseriesMetadata{" diff --git a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/TsFileMetadata.java b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/TsFileMetadata.java index 68bd6dd64..ec3878c27 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/TsFileMetadata.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/TsFileMetadata.java @@ -19,6 +19,7 @@ package org.apache.tsfile.file.metadata; +import org.apache.tsfile.compatibility.DeserializeConfig; import org.apache.tsfile.utils.BloomFilter; import org.apache.tsfile.utils.ReadWriteForEncodingUtils; import org.apache.tsfile.utils.ReadWriteIOUtils; @@ -26,6 +27,10 @@ import java.io.IOException; import java.io.OutputStream; import java.nio.ByteBuffer; +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; +import java.util.TreeMap; /** TSFileMetaData collects all metadata info and saves in its data structure. */ public class TsFileMetadata { @@ -34,10 +39,14 @@ public class TsFileMetadata { private BloomFilter bloomFilter; // List of - private MetadataIndexNode metadataIndex; + private Map tableMetadataIndexNodeMap; + private Map tableSchemaMap; + private Map tsFileProperties; // offset of MetaMarker.SEPARATOR private long metaOffset; + // offset from MetaMarker.SEPARATOR (exclusive) to tsFileProperties + private int propertiesOffset; /** * deserialize data from the buffer. @@ -45,11 +54,31 @@ public class TsFileMetadata { * @param buffer -buffer use to deserialize * @return -a instance of TsFileMetaData */ - public static TsFileMetadata deserializeFrom(ByteBuffer buffer) { + public static TsFileMetadata deserializeFrom(ByteBuffer buffer, DeserializeConfig context) { TsFileMetadata fileMetaData = new TsFileMetadata(); + int startPos = buffer.position(); // metadataIndex - fileMetaData.metadataIndex = MetadataIndexNode.deserializeFrom(buffer, true); + int tableIndexNodeNum = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer); + Map tableIndexNodeMap = new TreeMap<>(); + for (int i = 0; i < tableIndexNodeNum; i++) { + String tableName = ReadWriteIOUtils.readVarIntString(buffer); + MetadataIndexNode metadataIndexNode = + context.deviceMetadataIndexNodeBufferDeserializer.deserialize(buffer, context); + tableIndexNodeMap.put(tableName, metadataIndexNode); + } + fileMetaData.setTableMetadataIndexNodeMap(tableIndexNodeMap); + + // tableSchemas + int tableSchemaNum = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer); + Map tableSchemaMap = new HashMap<>(); + for (int i = 0; i < tableSchemaNum; i++) { + String tableName = ReadWriteIOUtils.readVarIntString(buffer); + TableSchema tableSchema = context.tableSchemaBufferDeserializer.deserialize(buffer, context); + tableSchema.setTableName(tableName); + tableSchemaMap.put(tableName, tableSchema); + } + fileMetaData.setTableSchemaMap(tableSchemaMap); // metaOffset long metaOffset = ReadWriteIOUtils.readLong(buffer); @@ -58,9 +87,25 @@ public static TsFileMetadata deserializeFrom(ByteBuffer buffer) { // read bloom filter if (buffer.hasRemaining()) { byte[] bytes = ReadWriteIOUtils.readByteBufferWithSelfDescriptionLength(buffer); - int filterSize = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer); - int hashFunctionSize = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer); - fileMetaData.bloomFilter = BloomFilter.buildBloomFilter(bytes, filterSize, hashFunctionSize); + if (bytes.length != 0) { + int filterSize = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer); + int hashFunctionSize = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer); + fileMetaData.bloomFilter = + BloomFilter.buildBloomFilter(bytes, filterSize, hashFunctionSize); + } + } + + fileMetaData.propertiesOffset = buffer.position() - startPos; + + if (buffer.hasRemaining()) { + int propertiesSize = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer); + Map propertiesMap = new HashMap<>(); + for (int i = 0; i < propertiesSize; i++) { + String key = ReadWriteIOUtils.readVarIntString(buffer); + String value = ReadWriteIOUtils.readVarIntString(buffer); + propertiesMap.put(key, value); + } + fileMetaData.tsFileProperties = propertiesMap; } return fileMetaData; @@ -84,15 +129,45 @@ public void setBloomFilter(BloomFilter bloomFilter) { public int serializeTo(OutputStream outputStream) throws IOException { int byteLen = 0; - // metadataIndex - if (metadataIndex != null) { - byteLen += metadataIndex.serializeTo(outputStream); + if (tableMetadataIndexNodeMap != null) { + byteLen += + ReadWriteForEncodingUtils.writeUnsignedVarInt( + tableMetadataIndexNodeMap.size(), outputStream); + for (Entry entry : tableMetadataIndexNodeMap.entrySet()) { + byteLen += ReadWriteIOUtils.writeVar(entry.getKey(), outputStream); + byteLen += entry.getValue().serializeTo(outputStream); + } } else { - byteLen += ReadWriteIOUtils.write(0, outputStream); + byteLen += ReadWriteForEncodingUtils.writeUnsignedVarInt(0, outputStream); + } + + if (tableSchemaMap != null) { + byteLen += ReadWriteForEncodingUtils.writeUnsignedVarInt(tableSchemaMap.size(), outputStream); + for (Entry entry : tableSchemaMap.entrySet()) { + byteLen += ReadWriteIOUtils.writeVar(entry.getKey(), outputStream); + byteLen += entry.getValue().serialize(outputStream); + } + } else { + byteLen += ReadWriteForEncodingUtils.writeUnsignedVarInt(0, outputStream); } // metaOffset byteLen += ReadWriteIOUtils.write(metaOffset, outputStream); + if (bloomFilter != null) { + byteLen += serializeBloomFilter(outputStream, bloomFilter); + } else { + byteLen += ReadWriteIOUtils.write(0, outputStream); + } + + byteLen += + ReadWriteForEncodingUtils.writeVarInt( + tsFileProperties != null ? tsFileProperties.size() : 0, outputStream); + if (tsFileProperties != null) { + for (Entry entry : tsFileProperties.entrySet()) { + byteLen += ReadWriteIOUtils.writeVar(entry.getKey(), outputStream); + byteLen += ReadWriteIOUtils.writeVar(entry.getValue(), outputStream); + } + } return byteLen; } @@ -118,11 +193,28 @@ public void setMetaOffset(long metaOffset) { this.metaOffset = metaOffset; } - public MetadataIndexNode getMetadataIndex() { - return metadataIndex; + public void setTableMetadataIndexNodeMap( + Map tableMetadataIndexNodeMap) { + this.tableMetadataIndexNodeMap = tableMetadataIndexNodeMap; + } + + public void setTableSchemaMap(Map tableSchemaMap) { + this.tableSchemaMap = tableSchemaMap; + } + + public Map getTableMetadataIndexNodeMap() { + return tableMetadataIndexNodeMap; + } + + public MetadataIndexNode getTableMetadataIndexNode(String tableName) { + MetadataIndexNode metadataIndexNode = tableMetadataIndexNodeMap.get(tableName); + if (metadataIndexNode == null) { + metadataIndexNode = tableMetadataIndexNodeMap.get(""); + } + return metadataIndexNode; } - public void setMetadataIndex(MetadataIndexNode metadataIndex) { - this.metadataIndex = metadataIndex; + public Map getTableSchemaMap() { + return tableSchemaMap; } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/TsFileSequenceReader.java b/java/tsfile/src/main/java/org/apache/tsfile/read/TsFileSequenceReader.java index fa9b8ab30..b93dbb3d0 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/TsFileSequenceReader.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/TsFileSequenceReader.java @@ -22,12 +22,15 @@ import org.apache.tsfile.common.conf.TSFileConfig; import org.apache.tsfile.common.conf.TSFileDescriptor; import org.apache.tsfile.common.constant.TsFileConstant; +import org.apache.tsfile.compatibility.CompatibilityUtils; +import org.apache.tsfile.compatibility.DeserializeConfig; import org.apache.tsfile.compress.IUnCompressor; import org.apache.tsfile.encoding.decoder.Decoder; import org.apache.tsfile.enums.TSDataType; import org.apache.tsfile.exception.StopReadTsFileByInterruptException; import org.apache.tsfile.exception.TsFileRuntimeException; import org.apache.tsfile.exception.TsFileStatisticsMistakesException; +import org.apache.tsfile.exception.read.FileVersionTooOldException; import org.apache.tsfile.file.IMetadataIndexEntry; import org.apache.tsfile.file.MetaMarker; import org.apache.tsfile.file.header.ChunkGroupHeader; @@ -43,7 +46,6 @@ import org.apache.tsfile.file.metadata.ITimeSeriesMetadata; import org.apache.tsfile.file.metadata.MeasurementMetadataIndexEntry; import org.apache.tsfile.file.metadata.MetadataIndexNode; -import org.apache.tsfile.file.metadata.PlainDeviceID; import org.apache.tsfile.file.metadata.TimeseriesMetadata; import org.apache.tsfile.file.metadata.TsFileMetadata; import org.apache.tsfile.file.metadata.enums.CompressionType; @@ -67,6 +69,7 @@ import org.apache.tsfile.utils.TsPrimitiveType; import org.apache.tsfile.write.schema.IMeasurementSchema; import org.apache.tsfile.write.schema.MeasurementSchema; +import org.apache.tsfile.write.schema.Schema; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -79,12 +82,12 @@ import java.util.Collections; import java.util.Comparator; import java.util.HashMap; -import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.NoSuchElementException; import java.util.Objects; import java.util.Queue; @@ -120,6 +123,10 @@ public class TsFileSequenceReader implements AutoCloseable { private long minPlanIndex = Long.MAX_VALUE; private long maxPlanIndex = Long.MIN_VALUE; + private byte fileVersion; + + private DeserializeConfig deserializeConfig = new DeserializeConfig(); + /** * Create a file reader of the given file. The reader will read the tail of the file to get the * file metadata size.Then the reader will skip the first @@ -208,7 +215,32 @@ public TsFileSequenceReader(TsFileInput input, long fileMetadataPos, int fileMet this.fileMetadataSize = fileMetadataSize; } + private void loadFileVersion() throws IOException { + tsFileInput.position(TSFileConfig.MAGIC_STRING.getBytes(TSFileConfig.STRING_CHARSET).length); + final ByteBuffer buffer = ByteBuffer.allocate(1); + tsFileInput.read(buffer); + buffer.flip(); + fileVersion = buffer.get(); + + checkFileVersion(); + configDeserializer(); + } + + private void configDeserializer() throws IOException { + if (fileVersion == TSFileConfig.VERSION_NUMBER_V3) { + deserializeConfig = CompatibilityUtils.v3DeserializeConfig; + } + } + + private void checkFileVersion() throws FileVersionTooOldException { + if (TSFileConfig.VERSION_NUMBER - fileVersion > 1) { + throw new FileVersionTooOldException(fileVersion, (byte) (TSFileConfig.VERSION_NUMBER - 1)); + } + } + public void loadMetadataSize() throws IOException { + loadFileVersion(); + ByteBuffer metadataSize = ByteBuffer.allocate(Integer.BYTES); if (readTailMagic().equals(TSFileConfig.MAGIC_STRING)) { tsFileInput.read( @@ -299,7 +331,8 @@ public TsFileMetadata readFileMetadata() throws IOException { synchronized (this) { if (tsFileMetaData == null) { tsFileMetaData = - TsFileMetadata.deserializeFrom(readData(fileMetadataPos, fileMetadataSize)); + deserializeConfig.tsFileMetadataBufferDeserializer.deserialize( + readData(fileMetadataPos, fileMetadataSize), deserializeConfig); } } } @@ -376,7 +409,8 @@ private Map readDeviceMetadataFromDisk(IDeviceID dev public TimeseriesMetadata readTimeseriesMetadata( IDeviceID device, String measurement, boolean ignoreNotExists) throws IOException { readFileMetadata(); - MetadataIndexNode deviceMetadataIndexNode = tsFileMetaData.getMetadataIndex(); + MetadataIndexNode deviceMetadataIndexNode = + tsFileMetaData.getTableMetadataIndexNode(device.getTableName()); Pair metadataIndexPair = getMetadataAndEndOffsetOfDeviceNode(deviceMetadataIndexNode, device, true); if (metadataIndexPair == null) { @@ -389,7 +423,9 @@ public TimeseriesMetadata readTimeseriesMetadata( MetadataIndexNode metadataIndexNode = deviceMetadataIndexNode; if (!metadataIndexNode.getNodeType().equals(MetadataIndexNodeType.LEAF_MEASUREMENT)) { try { - metadataIndexNode = MetadataIndexNode.deserializeFrom(buffer, false); + metadataIndexNode = + deserializeConfig.measurementMetadataIndexNodeBufferDeserializer.deserialize( + buffer, deserializeConfig); } catch (Exception e) { logger.error(METADATA_INDEX_NODE_DESERIALIZE_ERROR, file); throw e; @@ -436,26 +472,29 @@ public TimeseriesMetadata readTimeseriesMetadata( public ITimeSeriesMetadata readITimeseriesMetadata(Path path, boolean ignoreNotExists) throws IOException { readFileMetadata(); - MetadataIndexNode deviceMetadataIndexNode = tsFileMetaData.getMetadataIndex(); + MetadataIndexNode deviceMetadataIndexNode = + tsFileMetaData.getTableMetadataIndexNode(path.getIDeviceID().getTableName()); Pair metadataIndexPair = getMetadataAndEndOffsetOfDeviceNode(deviceMetadataIndexNode, path.getIDeviceID(), true); if (metadataIndexPair == null) { if (ignoreNotExists) { return null; } - throw new IOException("Device {" + path.getDevice() + "} is not in tsFileMetaData"); + throw new IOException("Device {" + path.getDeviceString() + "} is not in tsFileMetaData"); } ByteBuffer buffer = readData(metadataIndexPair.left.getOffset(), metadataIndexPair.right); MetadataIndexNode metadataIndexNode; TimeseriesMetadata firstTimeseriesMetadata; try { // next layer MeasurementNode of the specific DeviceNode - metadataIndexNode = MetadataIndexNode.deserializeFrom(buffer, false); + metadataIndexNode = + deserializeConfig.measurementMetadataIndexNodeBufferDeserializer.deserialize( + buffer, deserializeConfig); } catch (Exception e) { logger.error(METADATA_INDEX_NODE_DESERIALIZE_ERROR, file); throw e; } - firstTimeseriesMetadata = tryToGetFirstTimeseriesMetadata(metadataIndexNode); + firstTimeseriesMetadata = getTimeColumnMetadata(metadataIndexNode); metadataIndexPair = getMetadataAndEndOffsetOfMeasurementNode(metadataIndexNode, path.getMeasurement(), false); @@ -543,7 +582,8 @@ public List readTimeseriesMetadata( private Pair getLeafMetadataIndexPair( IDeviceID device, String measurement) throws IOException { readFileMetadata(); - MetadataIndexNode deviceMetadataIndexNode = tsFileMetaData.getMetadataIndex(); + MetadataIndexNode deviceMetadataIndexNode = + tsFileMetaData.getTableMetadataIndexNode(device.getTableName()); Pair metadataIndexPair = getMetadataAndEndOffsetOfDeviceNode(deviceMetadataIndexNode, device, true); if (metadataIndexPair == null) { @@ -553,7 +593,9 @@ private Pair getLeafMetadataIndexPair( MetadataIndexNode metadataIndexNode = deviceMetadataIndexNode; if (!metadataIndexNode.getNodeType().equals(MetadataIndexNodeType.LEAF_MEASUREMENT)) { try { - metadataIndexNode = MetadataIndexNode.deserializeFrom(buffer, false); + metadataIndexNode = + deserializeConfig.measurementMetadataIndexNodeBufferDeserializer.deserialize( + buffer, deserializeConfig); } catch (Exception e) { logger.error(METADATA_INDEX_NODE_DESERIALIZE_ERROR, file); throw e; @@ -564,84 +606,192 @@ private Pair getLeafMetadataIndexPair( return metadataIndexPair; } - // This method is only used for TsFile - public List readITimeseriesMetadata( - IDeviceID device, Set measurements) throws IOException { + private MetadataIndexNode getTableRootNode(String tableName) throws IOException { + MetadataIndexNode metadataIndexNode = tsFileMetaData.getTableMetadataIndexNode(tableName); + if (metadataIndexNode == null && fileVersion < TSFileConfig.VERSION_NUMBER) { + // this file if from an old version, and all its metadata should have an anonymous root + metadataIndexNode = tsFileMetaData.getTableMetadataIndexNode(""); + } + return metadataIndexNode; + } + + /** + * Searching from the start node and try to find the root node of the deviceID. + * + * @param deviceID desired device + * @param startNode start of the search, if not provided, start from the table root + * @return MetadataIndexNode which is the root of deviceID + */ + private MetadataIndexNode getDeviceRootNode(IDeviceID deviceID, MetadataIndexNode startNode) + throws IOException { readFileMetadata(); - MetadataIndexNode deviceMetadataIndexNode = tsFileMetaData.getMetadataIndex(); - Pair metadataIndexPair = - getMetadataAndEndOffsetOfDeviceNode(deviceMetadataIndexNode, device, false); - if (metadataIndexPair == null) { - return Collections.emptyList(); + startNode = startNode != null ? startNode : getTableRootNode(deviceID.getTableName()); + if (startNode == null) { + return null; } - List resultTimeseriesMetadataList = new ArrayList<>(); - List measurementList = new ArrayList<>(measurements); - Set measurementsHadFound = new HashSet<>(); - // the content of next Layer MeasurementNode of the specific device's DeviceNode - ByteBuffer buffer = readData(metadataIndexPair.left.getOffset(), metadataIndexPair.right); - Pair measurementMetadataIndexPair = metadataIndexPair; - List timeseriesMetadataList = new ArrayList<>(); - // next layer MeasurementNode of the specific DeviceNode MetadataIndexNode measurementMetadataIndexNode; - try { - measurementMetadataIndexNode = MetadataIndexNode.deserializeFrom(buffer, false); - } catch (Exception e) { - logger.error(METADATA_INDEX_NODE_DESERIALIZE_ERROR, file); - throw e; + ByteBuffer buffer; + if (startNode.isDeviceLevel()) { + Pair metadataIndexPair = + getMetadataAndEndOffsetOfDeviceNode(startNode, deviceID, true); + if (metadataIndexPair == null) { + return null; + } + + // the content of next Layer MeasurementNode of the specific device's DeviceNode + buffer = readData(metadataIndexPair.left.getOffset(), metadataIndexPair.right); + // next layer MeasurementNode of the specific DeviceNode + try { + measurementMetadataIndexNode = + deserializeConfig.measurementMetadataIndexNodeBufferDeserializer.deserialize( + buffer, deserializeConfig); + } catch (Exception e) { + logger.error(METADATA_INDEX_NODE_DESERIALIZE_ERROR, file); + throw e; + } + } else { + measurementMetadataIndexNode = startNode; } - // Get the first timeseriesMetadata of the device - TimeseriesMetadata firstTimeseriesMetadata = - tryToGetFirstTimeseriesMetadata(measurementMetadataIndexNode); + return measurementMetadataIndexNode; + } - for (int i = 0; i < measurementList.size(); i++) { - if (measurementsHadFound.contains(measurementList.get(i))) { - continue; + /** + * Read the TimeseriesMetadata of the given measurement and its successors from the index node + * into the list. + * + * @param timeseriesMetadataList result holder + * @param node index node to be read from + * @param measurement the desired measurement + * @return true if the measurement exists + * @throws IOException when read fails + */ + public boolean readITimeseriesMetadata( + List timeseriesMetadataList, MetadataIndexNode node, String measurement) + throws IOException { + Pair measurementMetadataIndexPair = + getMetadataAndEndOffsetOfMeasurementNode(node, measurement, false); + + if (measurementMetadataIndexPair == null) { + return false; + } + // the content of TimeseriesNode of the specific MeasurementLeafNode + ByteBuffer buffer = + readData(measurementMetadataIndexPair.left.getOffset(), measurementMetadataIndexPair.right); + while (buffer.hasRemaining()) { + try { + timeseriesMetadataList.add(TimeseriesMetadata.deserializeFrom(buffer, true)); + } catch (Exception e) { + logger.error( + "Something error happened while deserializing TimeseriesMetadata of file {}", file); + throw e; } - timeseriesMetadataList.clear(); - measurementMetadataIndexPair = - getMetadataAndEndOffsetOfMeasurementNode( - measurementMetadataIndexNode, measurementList.get(i), false); + } + return true; + } + + /** + * Read TimeSeriesMetadata of the given device. This method is only used for TsFile. + * + * @param device deviceId to be read + * @param measurements measurements to be read + * @param root search start node, if not provided, use the root node of the table of the device + * @param mergeAlignedSeries see @return + * @return when the device is not aligned, or mergeAlignedSeries is false, each result correspond + * to one series in the provided measurements (if exists); otherwise, all columns in the + * aligned device will be merged into one AlignedTimeSeriesMetadata. + * @throws IOException if read fails + */ + public List readITimeseriesMetadata( + IDeviceID device, + Set measurements, + MetadataIndexNode root, + boolean mergeAlignedSeries) + throws IOException { + // find the index node associated with the device + final MetadataIndexNode measurementMetadataIndexNode = getDeviceRootNode(device, root); + if (measurementMetadataIndexNode == null) { + return Collections.emptyList(); + } + + // Get the time column metadata if the device is aligned + TimeseriesMetadata timeColumnMetadata = getTimeColumnMetadata(measurementMetadataIndexNode); + List valueTimeseriesMetadataList = + timeColumnMetadata != null ? new ArrayList<>() : null; + + List resultTimeseriesMetadataList = new ArrayList<>(); + List measurementList = new ArrayList<>(measurements); + measurementList.sort(null); + boolean[] measurementFound = new boolean[measurements.size()]; + int measurementFoundCnt = 0; - if (measurementMetadataIndexPair == null) { + List timeseriesMetadataList = new ArrayList<>(); + for (int i = 0; + i < measurementList.size() && measurementFoundCnt < measurementList.size(); + i++) { + final String measurementName = measurementList.get(i); + timeseriesMetadataList.clear(); + // read the leaf node that may contain the i-th measurement into a list + if (measurementFound[i] + || !readITimeseriesMetadata( + timeseriesMetadataList, measurementMetadataIndexNode, measurementName)) { continue; } - // the content of TimeseriesNode of the specific MeasurementLeafNode - buffer = - readData( - measurementMetadataIndexPair.left.getOffset(), measurementMetadataIndexPair.right); - while (buffer.hasRemaining()) { - try { - timeseriesMetadataList.add(TimeseriesMetadata.deserializeFrom(buffer, true)); - } catch (Exception e) { - logger.error( - "Something error happened while deserializing TimeseriesMetadata of file {}", file); - throw e; - } + // in the list, search for the all measurements that are not found + measurementFoundCnt += + searchInTimeseriesList( + measurementList, + i, + measurementFound, + timeseriesMetadataList, + resultTimeseriesMetadataList, + timeColumnMetadata, + valueTimeseriesMetadataList, + mergeAlignedSeries); + } + if (valueTimeseriesMetadataList != null && !valueTimeseriesMetadataList.isEmpty()) { + resultTimeseriesMetadataList.add( + new AlignedTimeSeriesMetadata(timeColumnMetadata, valueTimeseriesMetadataList)); + } + return resultTimeseriesMetadataList; + } + + private int searchInTimeseriesList( + List measurementList, + int startIndex, + boolean[] measurementFound, + List timeseriesMetadataList, + List resultTimeseriesMetadataList, + TimeseriesMetadata timeColumnMetadata, + List valueTimeseriesMetadataList, + boolean mergeAlignedSeries) { + int numOfFoundMeasurements = 0; + for (int j = startIndex; j < measurementList.size(); j++) { + int searchResult; + if (measurementFound[j] + || (searchResult = + binarySearchInTimeseriesMetadataList( + timeseriesMetadataList, measurementList.get(j))) + < 0) { + continue; } - for (int j = i; j < measurementList.size(); j++) { - String current = measurementList.get(j); - if (!measurementsHadFound.contains(current)) { - int searchResult = binarySearchInTimeseriesMetadataList(timeseriesMetadataList, current); - if (searchResult >= 0) { - if (firstTimeseriesMetadata != null) { - List valueTimeseriesMetadataList = new ArrayList<>(); - valueTimeseriesMetadataList.add(timeseriesMetadataList.get(searchResult)); - resultTimeseriesMetadataList.add( - new AlignedTimeSeriesMetadata( - firstTimeseriesMetadata, valueTimeseriesMetadataList)); - } else { - resultTimeseriesMetadataList.add(timeseriesMetadataList.get(searchResult)); - } - measurementsHadFound.add(current); - } - } - if (measurementsHadFound.size() == measurements.size()) { - return resultTimeseriesMetadataList; + + final TimeseriesMetadata valueColumnMetadata = timeseriesMetadataList.get(searchResult); + if (timeColumnMetadata != null) { + if (!mergeAlignedSeries) { + resultTimeseriesMetadataList.add( + new AlignedTimeSeriesMetadata( + timeColumnMetadata, Collections.singletonList(valueColumnMetadata))); + } else if (valueTimeseriesMetadataList != null) { + valueTimeseriesMetadataList.add(valueColumnMetadata); } + } else { + resultTimeseriesMetadataList.add(valueColumnMetadata); } + measurementFound[j] = true; + numOfFoundMeasurements++; } - return resultTimeseriesMetadataList; + return numOfFoundMeasurements; } protected int binarySearchInTimeseriesMetadataList( @@ -669,7 +819,12 @@ public List getAllDevices() throws IOException { if (tsFileMetaData == null) { readFileMetadata(); } - return getAllDevices(tsFileMetaData.getMetadataIndex()); + List deviceIDS = new ArrayList<>(); + for (Entry entry : + tsFileMetaData.getTableMetadataIndexNodeMap().entrySet()) { + deviceIDS.addAll(getAllDevices(entry.getValue())); + } + return deviceIDS; } private List getAllDevices(MetadataIndexNode metadataIndexNode) throws IOException { @@ -690,7 +845,9 @@ private List getAllDevices(MetadataIndexNode metadataIndexNode) throw endOffset = metadataIndexNode.getChildren().get(i + 1).getOffset(); } ByteBuffer buffer = readData(metadataIndexNode.getChildren().get(i).getOffset(), endOffset); - MetadataIndexNode node = MetadataIndexNode.deserializeFrom(buffer, true); + MetadataIndexNode node = + deserializeConfig.deviceMetadataIndexNodeBufferDeserializer.deserialize( + buffer, deserializeConfig); deviceList.addAll(getAllDevices(node)); } return deviceList; @@ -705,15 +862,16 @@ public TsFileDeviceIterator getAllDevicesIteratorWithIsAligned() throws IOExcept readFileMetadata(); Queue> queue = new LinkedList<>(); List leafDeviceNodeOffsets = new ArrayList<>(); - MetadataIndexNode metadataIndexNode = tsFileMetaData.getMetadataIndex(); - if (metadataIndexNode.getNodeType().equals(MetadataIndexNodeType.LEAF_DEVICE)) { - // the first node of index tree is device leaf node, then get the devices directly - getDevicesOfLeafNode(metadataIndexNode, queue); - } else { - // get all device leaf node offset - getAllDeviceLeafNodeOffset(metadataIndexNode, leafDeviceNodeOffsets); + for (MetadataIndexNode metadataIndexNode : + tsFileMetaData.getTableMetadataIndexNodeMap().values()) { + if (metadataIndexNode.getNodeType().equals(MetadataIndexNodeType.LEAF_DEVICE)) { + // the first node of index tree is device leaf node, then get the devices directly + getDevicesOfLeafNode(metadataIndexNode, queue); + } else { + // get all device leaf node offset + getAllDeviceLeafNodeOffset(metadataIndexNode, leafDeviceNodeOffsets); + } } - return new TsFileDeviceIterator(this, leafDeviceNodeOffsets, queue); } @@ -729,7 +887,9 @@ public void getDevicesAndEntriesOfOneLeafNode( throws IOException { try { ByteBuffer nextBuffer = readData(startOffset, endOffset); - MetadataIndexNode deviceLeafNode = MetadataIndexNode.deserializeFrom(nextBuffer, true); + MetadataIndexNode deviceLeafNode = + deserializeConfig.deviceMetadataIndexNodeBufferDeserializer.deserialize( + nextBuffer, deserializeConfig); getDevicesOfLeafNode(deviceLeafNode, measurementNodeOffsetQueue); } catch (StopReadTsFileByInterruptException e) { throw e; @@ -799,7 +959,9 @@ private void getAllDeviceLeafNodeOffset( } ByteBuffer nextBuffer = readData(startOffset, endOffset); getAllDeviceLeafNodeOffset( - MetadataIndexNode.deserializeFrom(nextBuffer, true), leafDeviceNodeOffsets); + deserializeConfig.deviceMetadataIndexNodeBufferDeserializer.deserialize( + nextBuffer, deserializeConfig), + leafDeviceNodeOffsets); } } catch (StopReadTsFileByInterruptException e) { throw e; @@ -859,17 +1021,19 @@ public List getAllPaths() throws IOException { public Iterator> getPathsIterator() throws IOException { readFileMetadata(); - MetadataIndexNode metadataIndexNode = tsFileMetaData.getMetadataIndex(); - List metadataIndexEntryList = metadataIndexNode.getChildren(); Queue>> queue = new LinkedList<>(); - for (int i = 0; i < metadataIndexEntryList.size(); i++) { - IMetadataIndexEntry metadataIndexEntry = metadataIndexEntryList.get(i); - long endOffset = metadataIndexNode.getEndOffset(); - if (i != metadataIndexEntryList.size() - 1) { - endOffset = metadataIndexEntryList.get(i + 1).getOffset(); + for (MetadataIndexNode metadataIndexNode : + tsFileMetaData.getTableMetadataIndexNodeMap().values()) { + List metadataIndexEntryList = metadataIndexNode.getChildren(); + for (int i = 0; i < metadataIndexEntryList.size(); i++) { + IMetadataIndexEntry metadataIndexEntry = metadataIndexEntryList.get(i); + long endOffset = metadataIndexNode.getEndOffset(); + if (i != metadataIndexEntryList.size() - 1) { + endOffset = metadataIndexEntryList.get(i + 1).getOffset(); + } + ByteBuffer buffer = readData(metadataIndexEntry.getOffset(), endOffset); + getAllPaths(metadataIndexEntry, buffer, null, metadataIndexNode.getNodeType(), queue); } - ByteBuffer buffer = readData(metadataIndexEntry.getOffset(), endOffset); - getAllPaths(metadataIndexEntry, buffer, null, metadataIndexNode.getNodeType(), queue); } return new Iterator>() { @Override @@ -915,7 +1079,8 @@ private void getAllPaths( } boolean currentChildLevelIsDevice = MetadataIndexNodeType.INTERNAL_DEVICE.equals(type); MetadataIndexNode metadataIndexNode = - MetadataIndexNode.deserializeFrom(buffer, currentChildLevelIsDevice); + deserializeConfig.deserializeMetadataIndexNode(buffer, currentChildLevelIsDevice); + int metadataIndexListSize = metadataIndexNode.getChildren().size(); for (int i = 0; i < metadataIndexListSize; i++) { long startOffset = metadataIndexNode.getChildren().get(i).getOffset(); @@ -954,8 +1119,7 @@ public boolean isAlignedDevice(MetadataIndexNode measurementNode) { return "".equals(((MeasurementMetadataIndexEntry) entry).getName()); } - TimeseriesMetadata tryToGetFirstTimeseriesMetadata(MetadataIndexNode measurementNode) - throws IOException { + TimeseriesMetadata getTimeColumnMetadata(MetadataIndexNode measurementNode) throws IOException { // Not aligned timeseries if (!isAlignedDevice(measurementNode)) { return null; @@ -980,8 +1144,10 @@ TimeseriesMetadata tryToGetFirstTimeseriesMetadata(MetadataIndexNode measurement readData( measurementNode.getChildren().get(0).getOffset(), measurementNode.getChildren().get(1).getOffset()); - MetadataIndexNode metadataIndexNode = MetadataIndexNode.deserializeFrom(buffer, false); - return tryToGetFirstTimeseriesMetadata(metadataIndexNode); + MetadataIndexNode metadataIndexNode = + deserializeConfig.measurementMetadataIndexNodeBufferDeserializer.deserialize( + buffer, deserializeConfig); + return getTimeColumnMetadata(metadataIndexNode); } return null; } @@ -1030,7 +1196,8 @@ TimeseriesMetadata tryToGetFirstTimeseriesMetadata(MetadataIndexNode measurement } else { // internal measurement node MetadataIndexNode nextLayerMeasurementNode = - MetadataIndexNode.deserializeFrom(nextBuffer, false); + deserializeConfig.measurementMetadataIndexNodeBufferDeserializer.deserialize( + nextBuffer, deserializeConfig); timeseriesMetadataOffsetMap.putAll( getTimeseriesMetadataOffsetByDevice( nextLayerMeasurementNode, excludedMeasurementIds, needChunkMetadata)); @@ -1087,7 +1254,8 @@ public void getDeviceTimeseriesMetadata( } else { // internal measurement node MetadataIndexNode nextLayerMeasurementNode = - MetadataIndexNode.deserializeFrom(nextBuffer, false); + deserializeConfig.measurementMetadataIndexNodeBufferDeserializer.deserialize( + nextBuffer, deserializeConfig); getDeviceTimeseriesMetadata( timeseriesMetadataList, nextLayerMeasurementNode, @@ -1130,7 +1298,8 @@ private void generateMetadataIndex( } boolean currentChildLevelIsDevice = MetadataIndexNodeType.INTERNAL_DEVICE.equals(type); MetadataIndexNode metadataIndexNode = - MetadataIndexNode.deserializeFrom(buffer, currentChildLevelIsDevice); + deserializeConfig.deserializeMetadataIndexNode(buffer, currentChildLevelIsDevice); + int metadataIndexListSize = metadataIndexNode.getChildren().size(); for (int i = 0; i < metadataIndexListSize; i++) { long endOffset = metadataIndexNode.getEndOffset(); @@ -1196,7 +1365,7 @@ private void generateMetadataIndexUsingTsFileInput( } boolean currentChildLevelIsDevice = MetadataIndexNodeType.INTERNAL_DEVICE.equals(type); MetadataIndexNode metadataIndexNode = - MetadataIndexNode.deserializeFrom( + deserializeConfig.deserializeMetadataIndexNode( tsFileInput.wrapAsInputStream(), currentChildLevelIsDevice); int metadataIndexListSize = metadataIndexNode.getChildren().size(); for (int i = 0; i < metadataIndexListSize; i++) { @@ -1229,41 +1398,45 @@ public Map> getAllTimeseriesMetadata( readFileMetadata(); } Map> timeseriesMetadataMap = new HashMap<>(); - MetadataIndexNode metadataIndexNode = tsFileMetaData.getMetadataIndex(); - List metadataIndexEntryList = metadataIndexNode.getChildren(); - for (int i = 0; i < metadataIndexEntryList.size(); i++) { - IMetadataIndexEntry metadataIndexEntry = metadataIndexEntryList.get(i); - long endOffset = metadataIndexNode.getEndOffset(); - if (i != metadataIndexEntryList.size() - 1) { - endOffset = metadataIndexEntryList.get(i + 1).getOffset(); - } - if (endOffset - metadataIndexEntry.getOffset() < Integer.MAX_VALUE) { - ByteBuffer buffer = readData(metadataIndexEntry.getOffset(), endOffset); - generateMetadataIndex( - metadataIndexEntry, - buffer, - null, - metadataIndexNode.getNodeType(), - timeseriesMetadataMap, - needChunkMetadata); - } else { - generateMetadataIndexUsingTsFileInput( - metadataIndexNode.getChildren().get(i), - metadataIndexNode.getChildren().get(i).getOffset(), - endOffset, - null, - metadataIndexNode.getNodeType(), - timeseriesMetadataMap, - needChunkMetadata); + for (MetadataIndexNode metadataIndexNode : + tsFileMetaData.getTableMetadataIndexNodeMap().values()) { + List metadataIndexEntryList = metadataIndexNode.getChildren(); + for (int i = 0; i < metadataIndexEntryList.size(); i++) { + IMetadataIndexEntry metadataIndexEntry = metadataIndexEntryList.get(i); + long endOffset = metadataIndexNode.getEndOffset(); + if (i != metadataIndexEntryList.size() - 1) { + endOffset = metadataIndexEntryList.get(i + 1).getOffset(); + } + if (endOffset - metadataIndexEntry.getOffset() < Integer.MAX_VALUE) { + ByteBuffer buffer = readData(metadataIndexEntry.getOffset(), endOffset); + generateMetadataIndex( + metadataIndexEntry, + buffer, + null, + metadataIndexNode.getNodeType(), + timeseriesMetadataMap, + needChunkMetadata); + } else { + generateMetadataIndexUsingTsFileInput( + metadataIndexNode.getChildren().get(i), + metadataIndexNode.getChildren().get(i).getOffset(), + endOffset, + null, + metadataIndexNode.getNodeType(), + timeseriesMetadataMap, + needChunkMetadata); + } } } + return timeseriesMetadataMap; } /* This method will only deserialize the TimeseriesMetadata, not including chunk metadata list */ private List getDeviceTimeseriesMetadataWithoutChunkMetadata(IDeviceID device) throws IOException { - MetadataIndexNode metadataIndexNode = tsFileMetaData.getMetadataIndex(); + MetadataIndexNode metadataIndexNode = + tsFileMetaData.getTableMetadataIndexNode(device.getTableName()); Pair metadataIndexPair = getMetadataAndEndOffsetOfDeviceNode(metadataIndexNode, device, true); if (metadataIndexPair == null) { @@ -1288,7 +1461,8 @@ private List getDeviceTimeseriesMetadataWithoutChunkMetadata /* This method will not only deserialize the TimeseriesMetadata, but also all the chunk metadata list meanwhile. */ private List getDeviceTimeseriesMetadata(IDeviceID device) throws IOException { - MetadataIndexNode metadataIndexNode = tsFileMetaData.getMetadataIndex(); + MetadataIndexNode metadataIndexNode = + tsFileMetaData.getTableMetadataIndexNode(device.getTableName()); Pair metadataIndexPair = getMetadataAndEndOffsetOfDeviceNode(metadataIndexNode, device, true); if (metadataIndexPair == null) { @@ -1321,6 +1495,9 @@ private List getDeviceTimeseriesMetadata(IDeviceID device) */ protected Pair getMetadataAndEndOffsetOfDeviceNode( MetadataIndexNode metadataIndex, IDeviceID deviceID, boolean exactSearch) throws IOException { + if (metadataIndex == null) { + return null; + } if (MetadataIndexNodeType.INTERNAL_MEASUREMENT.equals(metadataIndex.getNodeType()) || MetadataIndexNodeType.LEAF_MEASUREMENT.equals(metadataIndex.getNodeType())) { throw new IllegalArgumentException(); @@ -1331,7 +1508,10 @@ protected Pair getMetadataAndEndOffsetOfDeviceNode( metadataIndex.getChildIndexEntry(deviceID, false); ByteBuffer buffer = readData(childIndexEntry.left.getOffset(), childIndexEntry.right); return getMetadataAndEndOffsetOfDeviceNode( - MetadataIndexNode.deserializeFrom(buffer, true), deviceID, exactSearch); + deserializeConfig.deviceMetadataIndexNodeBufferDeserializer.deserialize( + buffer, deserializeConfig), + deviceID, + exactSearch); } else { return metadataIndex.getChildIndexEntry(deviceID, exactSearch); } @@ -1364,7 +1544,10 @@ protected Pair getMetadataAndEndOffsetOfMeasurementNo metadataIndex.getChildIndexEntry(measurement, false); ByteBuffer buffer = readData(childIndexEntry.left.getOffset(), childIndexEntry.right); return getMetadataAndEndOffsetOfMeasurementNode( - MetadataIndexNode.deserializeFrom(buffer, false), measurement, exactSearch); + deserializeConfig.measurementMetadataIndexNodeBufferDeserializer.deserialize( + buffer, deserializeConfig), + measurement, + exactSearch); } else { return metadataIndex.getChildIndexEntry(measurement, exactSearch); } @@ -1528,7 +1711,6 @@ public Chunk readMemChunk(CachedChunkLoaderImpl.ChunkCacheKey chunkCacheKey) thr * * @param timeseriesMetadata timeseries' metadata * @return a pair of {@link CompressionType} and {@link TSEncoding} of given timeseries. - * @throws IOException */ public Pair readTimeseriesCompressionTypeAndEncoding( TimeseriesMetadata timeseriesMetadata) throws IOException { @@ -1711,9 +1893,7 @@ public int readRaw(long position, int length, ByteBuffer target) throws IOExcept */ @SuppressWarnings("squid:S3776") // Suppress high Cognitive Complexity warning public long selfCheck( - Map newSchema, - List chunkGroupMetadataList, - boolean fastFinish) + Schema schema, List chunkGroupMetadataList, boolean fastFinish) throws IOException { File checkFile = FSFactoryProducer.getFSFactory().getFile(this.file); long fileSize; @@ -1929,7 +2109,13 @@ public long selfCheck( valueColumn2TimeBatchIndex.put(chunkHeader.getMeasurementID(), timeBatchIndex + 1); } currentChunk = - new ChunkMetadata(measurementID, dataType, fileOffsetOfChunk, chunkStatistics); + new ChunkMetadata( + measurementID, + dataType, + chunkHeader.getEncodingType(), + chunkHeader.getCompressionType(), + fileOffsetOfChunk, + chunkStatistics); chunkMetadataList.add(currentChunk); break; case MetaMarker.CHUNK_GROUP_HEADER: @@ -1938,10 +2124,9 @@ public long selfCheck( truncatedSize = this.position() - 1; if (lastDeviceId != null) { // schema of last chunk group - if (newSchema != null) { + if (schema != null) { for (IMeasurementSchema tsSchema : measurementSchemaList) { - newSchema.putIfAbsent( - new Path(lastDeviceId, tsSchema.getMeasurementId(), true), tsSchema); + schema.registerTimeseries(lastDeviceId, tsSchema); } } measurementSchemaList = new ArrayList<>(); @@ -1959,10 +2144,9 @@ public long selfCheck( truncatedSize = this.position() - 1; if (lastDeviceId != null) { // schema of last chunk group - if (newSchema != null) { + if (schema != null) { for (IMeasurementSchema tsSchema : measurementSchemaList) { - newSchema.putIfAbsent( - new Path(lastDeviceId, tsSchema.getMeasurementId(), true), tsSchema); + schema.registerTimeseries(lastDeviceId, tsSchema); } } measurementSchemaList = new ArrayList<>(); @@ -1982,10 +2166,9 @@ public long selfCheck( // ChunkGroupFooter is complete. if (lastDeviceId != null) { // schema of last chunk group - if (newSchema != null) { + if (schema != null) { for (IMeasurementSchema tsSchema : measurementSchemaList) { - newSchema.putIfAbsent( - new Path(lastDeviceId, tsSchema.getMeasurementId(), true), tsSchema); + schema.registerTimeseries(lastDeviceId, tsSchema); } } // last chunk group Metadata @@ -2003,6 +2186,10 @@ public long selfCheck( this.position(), e.getMessage()); } + + for (ChunkGroupMetadata chunkGroupMetadata : chunkGroupMetadataList) { + schema.updateTableSchema(chunkGroupMetadata); + } // Despite the completeness of the data section, we will discard current FileMetadata // so that we can continue to write data into this tsfile. return truncatedSize; @@ -2153,13 +2340,12 @@ public long checkChunkAndPagesStatistics(IChunkMetadata chunkMetadata) throws IO /** * get ChunkMetaDatas of given path, and throw exception if path not exists * - * @param path timeseries path * @return List of ChunkMetaData */ - public List getChunkMetadataList(Path path, boolean ignoreNotExists) - throws IOException { + public List getChunkMetadataList( + IDeviceID deviceID, String measurement, boolean ignoreNotExists) throws IOException { TimeseriesMetadata timeseriesMetaData = - readTimeseriesMetadata(path.getIDeviceID(), path.getMeasurement(), ignoreNotExists); + readTimeseriesMetadata(deviceID, measurement, ignoreNotExists); if (timeseriesMetaData == null) { return Collections.emptyList(); } @@ -2168,6 +2354,12 @@ public List getChunkMetadataList(Path path, boolean ignoreNotExis return chunkMetadataList; } + @Deprecated + public List getChunkMetadataList(Path path, boolean ignoreNotExists) + throws IOException { + return getChunkMetadataList(path.getIDeviceID(), path.getMeasurement(), ignoreNotExists); + } + // This method is only used for TsFile public List getIChunkMetadataList(Path path) throws IOException { ITimeSeriesMetadata timeseriesMetaData = readITimeseriesMetadata(path, true); @@ -2179,8 +2371,36 @@ public List getIChunkMetadataList(Path path) throws IOException return chunkMetadataList; } + public List getIChunkMetadataList(IDeviceID deviceID, String measurementName) + throws IOException { + List timeseriesMetaData = + readITimeseriesMetadata(deviceID, Collections.singleton(measurementName), null, false); + if (timeseriesMetaData == null || timeseriesMetaData.isEmpty()) { + return Collections.emptyList(); + } + List chunkMetadataList = readIChunkMetaDataList(timeseriesMetaData.get(0)); + chunkMetadataList.sort(Comparator.comparingLong(IChunkMetadata::getStartTime)); + return chunkMetadataList; + } + + public List> getIChunkMetadataList( + IDeviceID deviceID, Set measurementNames, MetadataIndexNode root) throws IOException { + List timeseriesMetaData = + readITimeseriesMetadata(deviceID, measurementNames, root, true); + if (timeseriesMetaData == null || timeseriesMetaData.isEmpty()) { + return Collections.emptyList(); + } + List> results = new ArrayList<>(timeseriesMetaData.size()); + for (ITimeSeriesMetadata timeseriesMetaDatum : timeseriesMetaData) { + List chunkMetadataList = readIChunkMetaDataList(timeseriesMetaDatum); + chunkMetadataList.sort(Comparator.comparingLong(IChunkMetadata::getStartTime)); + results.add(chunkMetadataList); + } + return results; + } + public List getChunkMetadataList(Path path) throws IOException { - return getChunkMetadataList(path, false); + return getChunkMetadataList(path, true); } /** @@ -2191,7 +2411,8 @@ public List getChunkMetadataList(Path path) throws IOException { */ public List getAlignedChunkMetadata(IDeviceID device) throws IOException { readFileMetadata(); - MetadataIndexNode deviceMetadataIndexNode = tsFileMetaData.getMetadataIndex(); + MetadataIndexNode deviceMetadataIndexNode = + tsFileMetaData.getTableMetadataIndexNode(device.getTableName()); Pair metadataIndexPair = getMetadataAndEndOffsetOfDeviceNode(deviceMetadataIndexNode, device, true); if (metadataIndexPair == null) { @@ -2201,7 +2422,9 @@ public List getAlignedChunkMetadata(IDeviceID device) thro MetadataIndexNode metadataIndexNode; try { // next layer MeasurementNode of the specific DeviceNode - metadataIndexNode = MetadataIndexNode.deserializeFrom(buffer, false); + metadataIndexNode = + deserializeConfig.measurementMetadataIndexNodeBufferDeserializer.deserialize( + buffer, deserializeConfig); } catch (Exception e) { logger.error(METADATA_INDEX_NODE_DESERIALIZE_ERROR, file); throw e; @@ -2218,7 +2441,7 @@ public List getAlignedChunkMetadata(IDeviceID device) thro */ public List getAlignedChunkMetadataByMetadataIndexNode( IDeviceID device, MetadataIndexNode metadataIndexNode) throws IOException { - TimeseriesMetadata firstTimeseriesMetadata = tryToGetFirstTimeseriesMetadata(metadataIndexNode); + TimeseriesMetadata firstTimeseriesMetadata = getTimeColumnMetadata(metadataIndexNode); if (firstTimeseriesMetadata == null) { throw new IOException("Timeseries of device {" + device + "} are not aligned"); } @@ -2326,7 +2549,7 @@ public Map getFullPathDataTypeMap() throws IOException { Map timeseriesMetadataMap = readDeviceMetadata(device); for (TimeseriesMetadata timeseriesMetadata : timeseriesMetadataMap.values()) { result.put( - ((PlainDeviceID) device).toStringID() + device.toString() + TsFileConstant.PATH_SEPARATOR + timeseriesMetadata.getMeasurementId(), timeseriesMetadata.getTsDataType()); @@ -2376,7 +2599,18 @@ public List getDeviceNameInRange(long start, long end) throws IOExcep */ public MetadataIndexNode readMetadataIndexNode( long startOffset, long endOffset, boolean isDeviceLevel) throws IOException { - return MetadataIndexNode.deserializeFrom(readData(startOffset, endOffset), isDeviceLevel); + MetadataIndexNode metadataIndexNode; + final ByteBuffer buffer = readData(startOffset, endOffset); + if (isDeviceLevel) { + metadataIndexNode = + deserializeConfig.deviceMetadataIndexNodeBufferDeserializer.deserialize( + buffer, deserializeConfig); + } else { + metadataIndexNode = + deserializeConfig.measurementMetadataIndexNodeBufferDeserializer.deserialize( + buffer, deserializeConfig); + } + return metadataIndexNode; } /** @@ -2433,7 +2667,8 @@ public Iterator>> getMeasurementChunkMetadataLis IDeviceID device) throws IOException { readFileMetadata(); - MetadataIndexNode metadataIndexNode = tsFileMetaData.getMetadataIndex(); + MetadataIndexNode metadataIndexNode = + tsFileMetaData.getTableMetadataIndexNode(device.getTableName()); Pair metadataIndexPair = getMetadataAndEndOffsetOfDeviceNode(metadataIndexNode, device, true); @@ -2453,7 +2688,10 @@ public LinkedHashMap> next() { } ByteBuffer buffer = readData(metadataIndexPair.left.getOffset(), metadataIndexPair.right); - MetadataIndexNode firstMeasurementNode = MetadataIndexNode.deserializeFrom(buffer, false); + + MetadataIndexNode firstMeasurementNode = + deserializeConfig.measurementMetadataIndexNodeBufferDeserializer.deserialize( + buffer, deserializeConfig); return getMeasurementChunkMetadataListMapIterator(firstMeasurementNode); } @@ -2523,7 +2761,9 @@ private void collectEachLeafMeasurementNodeOffsetRange( continue; } collectEachLeafMeasurementNodeOffsetRange( - MetadataIndexNode.deserializeFrom(readData(startOffset, endOffset), false), queue); + deserializeConfig.measurementMetadataIndexNodeBufferDeserializer.deserialize( + readData(startOffset, endOffset), deserializeConfig), + queue); } } catch (StopReadTsFileByInterruptException e) { throw e; @@ -2550,4 +2790,8 @@ public boolean equals(Object o) { public int hashCode() { return Objects.hash(file); } + + public DeserializeConfig getDeserializeContext() { + return deserializeConfig; + } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/TsFileSequenceReaderTimeseriesMetadataIterator.java b/java/tsfile/src/main/java/org/apache/tsfile/read/TsFileSequenceReaderTimeseriesMetadataIterator.java index 2fc83d954..9fa1c3115 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/TsFileSequenceReaderTimeseriesMetadataIterator.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/TsFileSequenceReaderTimeseriesMetadataIterator.java @@ -19,6 +19,7 @@ package org.apache.tsfile.read; +import org.apache.tsfile.compatibility.DeserializeConfig; import org.apache.tsfile.exception.TsFileSequenceReaderTimeseriesMetadataIteratorException; import org.apache.tsfile.file.IMetadataIndexEntry; import org.apache.tsfile.file.metadata.DeviceMetadataIndexEntry; @@ -50,6 +51,7 @@ public class TsFileSequenceReaderTimeseriesMetadataIterator private final Deque metadataIndexEntryStack = new ArrayDeque<>(); private IDeviceID currentDeviceId; private int currentTimeseriesMetadataCount = 0; + private DeserializeConfig deserializeConfig; public TsFileSequenceReaderTimeseriesMetadataIterator( TsFileSequenceReader reader, boolean needChunkMetadata, int timeseriesBatchReadNumber) @@ -57,20 +59,23 @@ public TsFileSequenceReaderTimeseriesMetadataIterator( this.reader = reader; this.needChunkMetadata = needChunkMetadata; this.timeseriesBatchReadNumber = timeseriesBatchReadNumber; + this.deserializeConfig = new DeserializeConfig(); if (this.reader.tsFileMetaData == null) { this.reader.readFileMetadata(); } - final MetadataIndexNode metadataIndexNode = reader.tsFileMetaData.getMetadataIndex(); - long curEntryEndOffset = metadataIndexNode.getEndOffset(); - List metadataIndexEntryList = metadataIndexNode.getChildren(); + for (MetadataIndexNode metadataIndexNode : + reader.tsFileMetaData.getTableMetadataIndexNodeMap().values()) { + long curEntryEndOffset = metadataIndexNode.getEndOffset(); + List metadataIndexEntryList = metadataIndexNode.getChildren(); - for (int i = metadataIndexEntryList.size() - 1; i >= 0; i--) { - metadataIndexEntryStack.push( - new MetadataIndexEntryInfo( - metadataIndexEntryList.get(i), metadataIndexNode.getNodeType(), curEntryEndOffset)); - curEntryEndOffset = metadataIndexEntryList.get(i).getOffset(); + for (int i = metadataIndexEntryList.size() - 1; i >= 0; i--) { + metadataIndexEntryStack.push( + new MetadataIndexEntryInfo( + metadataIndexEntryList.get(i), metadataIndexNode.getNodeType(), curEntryEndOffset)); + curEntryEndOffset = metadataIndexEntryList.get(i).getOffset(); + } } } @@ -222,7 +227,7 @@ private void deserializeInternalNode( boolean currentChildLevelIsDevice = MetadataIndexNodeType.INTERNAL_DEVICE.equals(type); final MetadataIndexNode metadataIndexNode = - MetadataIndexNode.deserializeFrom( + deserializeConfig.deserializeMetadataIndexNode( reader.readData(metadataIndexEntry.getOffset(), endOffset), currentChildLevelIsDevice); MetadataIndexNodeType metadataIndexNodeType = metadataIndexNode.getNodeType(); List children = metadataIndexNode.getChildren(); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/Path.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/Path.java index 3a682bf10..52968495d 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/Path.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/Path.java @@ -22,9 +22,9 @@ import org.apache.tsfile.common.constant.TsFileConstant; import org.apache.tsfile.exception.PathParseException; import org.apache.tsfile.file.metadata.IDeviceID; -import org.apache.tsfile.file.metadata.PlainDeviceID; +import org.apache.tsfile.file.metadata.IDeviceID.Deserializer; +import org.apache.tsfile.file.metadata.IDeviceID.Factory; import org.apache.tsfile.read.common.parser.PathNodesGenerator; -import org.apache.tsfile.utils.PublicBAOS; import org.apache.tsfile.utils.ReadWriteIOUtils; import org.apache.commons.lang3.StringUtils; @@ -46,7 +46,7 @@ public class Path implements Serializable, Comparable { private static final long serialVersionUID = 3405277066329298200L; private String measurement; - protected String device; + protected IDeviceID device; protected String fullPath; private static final String ILLEGAL_PATH_ARGUMENT = "Path parameter is null"; @@ -54,7 +54,8 @@ public Path() {} // Only used for test public Path(IDeviceID deviceID) { - this(((PlainDeviceID) deviceID).toStringID()); + this.device = deviceID; + this.fullPath = deviceID.toString(); } /** @@ -79,25 +80,29 @@ public Path(String pathSc, boolean needSplit) { if (!needSplit) { // no split, we don't use antlr to check here. fullPath = pathSc; + device = Factory.DEFAULT_FACTORY.create(pathSc); } else { if (pathSc.length() > 0) { String[] nodes = PathNodesGenerator.splitPathToNodes(pathSc); - device = ""; if (nodes.length > 1) { - device = transformNodesToString(nodes, nodes.length - 1); + device = Factory.DEFAULT_FACTORY.create(transformNodesToString(nodes, nodes.length - 1)); + } else { + device = Factory.DEFAULT_FACTORY.create(""); } measurement = nodes[nodes.length - 1]; - fullPath = transformNodesToString(nodes, nodes.length); + fullPath = this.device.toString() + TsFileConstant.PATH_SEPARATOR + measurement; } else { fullPath = pathSc; - device = ""; + device = Factory.DEFAULT_FACTORY.create(""); measurement = pathSc; } } } public Path(IDeviceID device, String measurement, boolean needCheck) { - this(((PlainDeviceID) device).toStringID(), measurement, needCheck); + this.device = device; + this.measurement = measurement; + this.fullPath = this.device.toString() + TsFileConstant.PATH_SEPARATOR + measurement; } /** @@ -113,7 +118,7 @@ public Path(String device, String measurement, boolean needCheck) { } if (!needCheck) { this.measurement = measurement; - this.device = device; + this.device = Factory.DEFAULT_FACTORY.create(device); this.fullPath = device + TsFileConstant.PATH_SEPARATOR + measurement; return; } @@ -121,22 +126,23 @@ public Path(String device, String measurement, boolean needCheck) { if (!StringUtils.isEmpty(device) && !StringUtils.isEmpty(measurement)) { String path = device + TsFileConstant.PATH_SEPARATOR + measurement; String[] nodes = PathNodesGenerator.splitPathToNodes(path); - this.device = transformNodesToString(nodes, nodes.length - 1); + this.device = Factory.DEFAULT_FACTORY.create(transformNodesToString(nodes, nodes.length - 1)); this.measurement = nodes[nodes.length - 1]; - this.fullPath = transformNodesToString(nodes, nodes.length); + this.fullPath = this.device.toString() + TsFileConstant.PATH_SEPARATOR + measurement; } else if (!StringUtils.isEmpty(device)) { String[] deviceNodes = PathNodesGenerator.splitPathToNodes(device); - this.device = transformNodesToString(deviceNodes, deviceNodes.length); + this.device = + Factory.DEFAULT_FACTORY.create(transformNodesToString(deviceNodes, deviceNodes.length)); this.measurement = measurement; // for aligned path, sensor name for time column is "" this.fullPath = device + TsFileConstant.PATH_SEPARATOR + measurement; } else if (!StringUtils.isEmpty(measurement)) { String[] measurementNodes = PathNodesGenerator.splitPathToNodes(measurement); this.measurement = transformNodesToString(measurementNodes, measurementNodes.length); - this.device = device; + this.device = Factory.DEFAULT_FACTORY.create(device); this.fullPath = measurement; } else { - this.device = device; + this.device = Factory.DEFAULT_FACTORY.create(device); this.measurement = measurement; this.fullPath = ""; } @@ -146,12 +152,12 @@ public String getFullPath() { return fullPath; } - public String getDevice() { - return device; + public String getDeviceString() { + return device.toString(); } public IDeviceID getIDeviceID() { - return new PlainDeviceID(getDevice()); + return device; } public String getMeasurement() { @@ -210,11 +216,6 @@ public void serialize(OutputStream stream) throws IOException { serializeWithoutType(stream); } - public void serialize(PublicBAOS stream) throws IOException { - ReadWriteIOUtils.write((byte) 3, stream); // org.apache.iotdb.db.metadata.path#PathType - serializeWithoutType(stream); - } - protected void serializeWithoutType(ByteBuffer byteBuffer) { if (measurement == null) { ReadWriteIOUtils.write((byte) 0, byteBuffer); @@ -226,7 +227,7 @@ protected void serializeWithoutType(ByteBuffer byteBuffer) { ReadWriteIOUtils.write((byte) 0, byteBuffer); } else { ReadWriteIOUtils.write((byte) 1, byteBuffer); - ReadWriteIOUtils.write(device, byteBuffer); + device.serialize(byteBuffer); } if (fullPath == null) { ReadWriteIOUtils.write((byte) 0, byteBuffer); @@ -247,28 +248,7 @@ protected void serializeWithoutType(OutputStream stream) throws IOException { ReadWriteIOUtils.write((byte) 0, stream); } else { ReadWriteIOUtils.write((byte) 1, stream); - ReadWriteIOUtils.write(device, stream); - } - if (fullPath == null) { - ReadWriteIOUtils.write((byte) 0, stream); - } else { - ReadWriteIOUtils.write((byte) 1, stream); - ReadWriteIOUtils.write(fullPath, stream); - } - } - - protected void serializeWithoutType(PublicBAOS stream) throws IOException { - if (measurement == null) { - ReadWriteIOUtils.write((byte) 0, stream); - } else { - ReadWriteIOUtils.write((byte) 1, stream); - ReadWriteIOUtils.write(measurement, stream); - } - if (device == null) { - ReadWriteIOUtils.write((byte) 0, stream); - } else { - ReadWriteIOUtils.write((byte) 1, stream); - ReadWriteIOUtils.write(device, stream); + device.serialize(stream); } if (fullPath == null) { ReadWriteIOUtils.write((byte) 0, stream); @@ -283,7 +263,8 @@ public static Path deserialize(ByteBuffer byteBuffer) { byte isNull = ReadWriteIOUtils.readByte(byteBuffer); path.measurement = isNull == 0 ? null : ReadWriteIOUtils.readString(byteBuffer); isNull = ReadWriteIOUtils.readByte(byteBuffer); - path.device = isNull == 0 ? null : ReadWriteIOUtils.readString(byteBuffer); + path.device = + isNull == 0 ? null : Deserializer.DEFAULT_DESERIALIZER.deserializeFrom(byteBuffer); isNull = ReadWriteIOUtils.readByte(byteBuffer); path.fullPath = isNull == 0 ? null : ReadWriteIOUtils.readString(byteBuffer); return path; diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlock.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlock.java index 14072b7a8..c607e9067 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlock.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlock.java @@ -20,16 +20,20 @@ package org.apache.tsfile.read.common.block; import org.apache.tsfile.block.column.Column; +import org.apache.tsfile.file.metadata.TableSchema; import org.apache.tsfile.read.TimeValuePair; import org.apache.tsfile.read.common.IBatchDataIterator; +import org.apache.tsfile.read.common.block.column.ColumnFactory; import org.apache.tsfile.read.common.block.column.TimeColumn; import org.apache.tsfile.read.reader.IPointReader; import org.apache.tsfile.utils.RamUsageEstimator; import org.apache.tsfile.utils.TsPrimitiveType; import org.apache.tsfile.write.UnSupportedDataTypeException; +import org.apache.tsfile.write.schema.IMeasurementSchema; import java.util.Arrays; import java.util.Iterator; +import java.util.List; import java.util.NoSuchElementException; import static java.lang.String.format; @@ -52,16 +56,16 @@ public class TsBlock { * doesn't defensively copy the valueColumns */ public static TsBlock wrapBlocksWithoutCopy( - int positionCount, TimeColumn timeColumn, Column[] valueColumns) { + int positionCount, Column timeColumn, Column[] valueColumns) { return new TsBlock(false, positionCount, timeColumn, valueColumns); } - private final TimeColumn timeColumn; + private final Column timeColumn; private final Column[] valueColumns; /** How many rows in current TsBlock */ - private final int positionCount; + private int positionCount; private volatile long retainedSizeInBytes = -1; @@ -69,19 +73,16 @@ public TsBlock(int positionCount) { this(false, positionCount, null, EMPTY_COLUMNS); } - public TsBlock(TimeColumn timeColumn, Column... valueColumns) { + public TsBlock(Column timeColumn, Column... valueColumns) { this(true, determinePositionCount(valueColumns), timeColumn, valueColumns); } - public TsBlock(int positionCount, TimeColumn timeColumn, Column... valueColumns) { + public TsBlock(int positionCount, Column timeColumn, Column... valueColumns) { this(true, positionCount, timeColumn, valueColumns); } private TsBlock( - boolean columnsCopyRequired, - int positionCount, - TimeColumn timeColumn, - Column[] valueColumns) { + boolean columnsCopyRequired, int positionCount, Column timeColumn, Column[] valueColumns) { requireNonNull(valueColumns, "blocks is null"); this.positionCount = positionCount; this.timeColumn = timeColumn; @@ -98,12 +99,16 @@ public int getPositionCount() { return positionCount; } + public void setPositionCount(int positionCount) { + this.positionCount = positionCount; + } + public long getStartTime() { - return timeColumn.getStartTime(); + return timeColumn.getLong(0); } public long getEndTime() { - return timeColumn.getEndTime(); + return timeColumn.getLong(positionCount - 1); } public boolean isEmpty() { @@ -135,7 +140,7 @@ public TsBlock getRegion(int positionOffset, int length) { slicedColumns[i] = valueColumns[i].getRegion(positionOffset, length); } return wrapBlocksWithoutCopy( - length, (TimeColumn) timeColumn.getRegion(positionOffset, length), slicedColumns); + length, timeColumn.getRegion(positionOffset, length), slicedColumns); } /** @@ -166,7 +171,7 @@ public TsBlock subTsBlock(int fromIndex) { if (fromIndex > positionCount) { throw new IllegalArgumentException("FromIndex of subTsBlock cannot over positionCount."); } - TimeColumn subTimeColumn = (TimeColumn) timeColumn.subColumn(fromIndex); + Column subTimeColumn = timeColumn.subColumn(fromIndex); Column[] subValueColumns = new Column[valueColumns.length]; for (int i = 0; i < subValueColumns.length; i++) { subValueColumns[i] = valueColumns[i].subColumn(fromIndex); @@ -186,7 +191,7 @@ public int getValueColumnCount() { return valueColumns.length; } - public TimeColumn getTimeColumn() { + public Column getTimeColumn() { return timeColumn; } @@ -248,6 +253,17 @@ public TsBlockAlignedRowIterator getTsBlockAlignedRowIterator() { return new TsBlockAlignedRowIterator(0); } + public void reset() { + if (positionCount == 0) { + return; + } + positionCount = 0; + timeColumn.reset(); + for (Column valueColumn : valueColumns) { + valueColumn.reset(); + } + } + public class TsBlockSingleColumnIterator implements IPointReader, IBatchDataIterator { private int rowIndex; @@ -523,7 +539,11 @@ private static int determinePositionCount(Column... columns) { } public void update(int updateIdx, TsBlock sourceTsBlock, int sourceIndex) { - timeColumn.getTimes()[updateIdx] = sourceTsBlock.getTimeByIndex(sourceIndex); + timeColumn.getLongs()[updateIdx] = sourceTsBlock.getTimeByIndex(sourceIndex); + updateWithoutTimeColumn(updateIdx, sourceTsBlock, sourceIndex); + } + + public void updateWithoutTimeColumn(int updateIdx, TsBlock sourceTsBlock, int sourceIndex) { for (int i = 0; i < getValueColumnCount(); i++) { if (sourceTsBlock.getValueColumns()[i].isNull(sourceIndex)) { valueColumns[i].isNull()[updateIdx] = true; @@ -570,4 +590,27 @@ public void update(int updateIdx, TsBlock sourceTsBlock, int sourceIndex) { } } } + + public static TsBlock buildTsBlock(List columnNames, TableSchema schema, int blockSize) { + Column timeColumn = new TimeColumn(blockSize); + Column[] columns = new Column[columnNames.size()]; + for (int i = 0; i < columnNames.size(); i++) { + final String columnName = columnNames.get(i); + final IMeasurementSchema columnSchema = schema.findColumnSchema(columnName); + columns[i] = ColumnFactory.create(columnSchema.getType(), blockSize); + } + return new TsBlock(timeColumn, columns); + } + + /** + * For each column, if its positionCount < this. positionCount, add nulls at the end of the + * column. + */ + public void fillTrailingNulls() { + for (Column valueColumn : valueColumns) { + if (valueColumn.getPositionCount() < this.positionCount) { + valueColumn.setNull(valueColumn.getPositionCount(), this.positionCount); + } + } + } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockBuilder.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockBuilder.java index 568c222c7..918551d9b 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockBuilder.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockBuilder.java @@ -310,6 +310,10 @@ public long getRetainedSizeInBytes() { public TsBlock build() { TimeColumn timeColumn = (TimeColumn) timeColumnBuilder.build(); + return build(timeColumn); + } + + public TsBlock build(Column timeColumn) { if (timeColumn.getPositionCount() != declaredPositions) { throw new IllegalStateException( format( diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockUtil.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockUtil.java index f0dd15513..106d568e3 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockUtil.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockUtil.java @@ -19,8 +19,8 @@ package org.apache.tsfile.read.common.block; +import org.apache.tsfile.block.column.Column; import org.apache.tsfile.read.common.TimeRange; -import org.apache.tsfile.read.common.block.column.TimeColumn; import org.apache.tsfile.read.filter.basic.Filter; import org.apache.tsfile.read.reader.series.PaginationController; @@ -41,7 +41,7 @@ public static TsBlock skipPointsOutOfTimeRange( // else, find the index of first less than or equal to targetTime public static int getFirstConditionIndex( TsBlock tsBlock, TimeRange targetTimeRange, boolean ascending) { - TimeColumn timeColumn = tsBlock.getTimeColumn(); + Column timeColumn = tsBlock.getTimeColumn(); long targetTime = ascending ? targetTimeRange.getMin() : targetTimeRange.getMax(); int left = 0; int right = timeColumn.getPositionCount() - 1; @@ -49,19 +49,19 @@ public static int getFirstConditionIndex( while (left < right) { mid = (left + right) >> 1; - if (timeColumn.getLongWithoutCheck(mid) < targetTime) { + if (timeColumn.getLong(mid) < targetTime) { if (ascending) { left = mid + 1; } else { right = mid; } - } else if (timeColumn.getLongWithoutCheck(mid) > targetTime) { + } else if (timeColumn.getLong(mid) > targetTime) { if (ascending) { right = mid; } else { left = mid + 1; } - } else if (timeColumn.getLongWithoutCheck(mid) == targetTime) { + } else if (timeColumn.getLong(mid) == targetTime) { return mid; } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java index beb48c28b..0be88efe2 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java @@ -39,12 +39,16 @@ public class BinaryColumn implements Column { (int) RamUsageEstimator.shallowSizeOfInstance(BinaryColumn.class); private final int arrayOffset; - private final int positionCount; - private final boolean[] valueIsNull; + private int positionCount; + private boolean[] valueIsNull; private final Binary[] values; private final long retainedSizeInBytes; + public BinaryColumn(int initialCapacity) { + this(0, 0, null, new Binary[initialCapacity]); + } + public BinaryColumn(int positionCount, Optional valueIsNull, Binary[] values) { this(0, positionCount, valueIsNull.orElse(null), values); } @@ -197,4 +201,17 @@ public void reverse() { public int getInstanceSize() { return INSTANCE_SIZE; } + + @Override + public void setPositionCount(int count) { + positionCount = count; + } + + @Override + public void setNull(int start, int end) { + if (valueIsNull == null) { + valueIsNull = new boolean[values.length]; + } + Arrays.fill(valueIsNull, start, end, true); + } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java index f74bcd8ee..d534a845a 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java @@ -38,12 +38,16 @@ public class BooleanColumn implements Column { public static final int SIZE_IN_BYTES_PER_POSITION = Byte.BYTES + Byte.BYTES; private final int arrayOffset; - private final int positionCount; - private final boolean[] valueIsNull; + private int positionCount; + private boolean[] valueIsNull; private final boolean[] values; private final long retainedSizeInBytes; + public BooleanColumn(int initialCapacity) { + this(0, 0, null, new boolean[initialCapacity]); + } + public BooleanColumn(int positionCount, Optional valueIsNull, boolean[] values) { this(0, positionCount, valueIsNull.orElse(null), values); } @@ -195,4 +199,17 @@ public void reverse() { public int getInstanceSize() { return INSTANCE_SIZE; } + + @Override + public void setPositionCount(int count) { + positionCount = count; + } + + @Override + public void setNull(int start, int end) { + if (valueIsNull == null) { + valueIsNull = new boolean[values.length]; + } + Arrays.fill(valueIsNull, start, end, true); + } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/ColumnEncoder.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/ColumnEncoder.java index b65841b85..9f8965e43 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/ColumnEncoder.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/ColumnEncoder.java @@ -28,11 +28,6 @@ public interface ColumnEncoder { - /** Read a time column from the specified input. */ - default TimeColumn readTimeColumn(ByteBuffer input, int positionCount) { - throw new UnsupportedOperationException(); - } - /** Read a column from the specified input. */ Column readColumn(ByteBuffer input, TSDataType dataType, int positionCount); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/ColumnFactory.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/ColumnFactory.java new file mode 100644 index 000000000..42984c3d5 --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/ColumnFactory.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.read.common.block.column; + +import org.apache.tsfile.block.column.Column; +import org.apache.tsfile.enums.TSDataType; + +public class ColumnFactory { + public static Column create(TSDataType dataType, int initialCapacity) { + switch (dataType) { + case INT64: + return new LongColumn(initialCapacity); + case DOUBLE: + return new DoubleColumn(initialCapacity); + case FLOAT: + return new FloatColumn(initialCapacity); + case TEXT: + return new BinaryColumn(initialCapacity); + case INT32: + return new IntColumn(initialCapacity); + case BOOLEAN: + return new BooleanColumn(initialCapacity); + default: + throw new IllegalArgumentException("Unsupported data type: " + dataType); + } + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java index 0311830b0..c065d98d5 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java @@ -39,12 +39,16 @@ public class DoubleColumn implements Column { public static final int SIZE_IN_BYTES_PER_POSITION = Double.BYTES + Byte.BYTES; private final int arrayOffset; - private final int positionCount; - private final boolean[] valueIsNull; + private int positionCount; + private boolean[] valueIsNull; private final double[] values; private final long retainedSizeInBytes; + public DoubleColumn(int initialCapacity) { + this(0, 0, null, new double[initialCapacity]); + } + public DoubleColumn(int positionCount, Optional valueIsNull, double[] values) { this(0, positionCount, valueIsNull.orElse(null), values); } @@ -196,4 +200,17 @@ public void reverse() { public int getInstanceSize() { return INSTANCE_SIZE; } + + @Override + public void setPositionCount(int count) { + positionCount = count; + } + + @Override + public void setNull(int start, int end) { + if (valueIsNull == null) { + valueIsNull = new boolean[values.length]; + } + Arrays.fill(valueIsNull, start, end, true); + } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java index 449212ebc..563067368 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java @@ -39,12 +39,16 @@ public class FloatColumn implements Column { public static final int SIZE_IN_BYTES_PER_POSITION = Float.BYTES + Byte.BYTES; private final int arrayOffset; - private final int positionCount; - private final boolean[] valueIsNull; + private int positionCount; + private boolean[] valueIsNull; private final float[] values; private final long retainedSizeInBytes; + public FloatColumn(int initialCapacity) { + this(0, 0, null, new float[initialCapacity]); + } + public FloatColumn(int positionCount, Optional valueIsNull, float[] values) { this(0, positionCount, valueIsNull.orElse(null), values); } @@ -195,4 +199,17 @@ public void reverse() { public int getInstanceSize() { return INSTANCE_SIZE; } + + @Override + public void setPositionCount(int count) { + positionCount = count; + } + + @Override + public void setNull(int start, int end) { + if (valueIsNull == null) { + valueIsNull = new boolean[values.length]; + } + Arrays.fill(valueIsNull, start, end, true); + } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/Int64ArrayColumnEncoder.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/Int64ArrayColumnEncoder.java index 58cfbebf9..446adaf47 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/Int64ArrayColumnEncoder.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/Int64ArrayColumnEncoder.java @@ -28,28 +28,6 @@ public class Int64ArrayColumnEncoder implements ColumnEncoder { - @Override - public TimeColumn readTimeColumn(ByteBuffer input, int positionCount) { - - // Serialized data layout: - // +---------------+-----------------+-------------+ - // | may have null | null indicators | values | - // +---------------+-----------------+-------------+ - // | byte | list[byte] | list[int64] | - // +---------------+-----------------+-------------+ - - boolean[] nullIndicators = ColumnEncoder.deserializeNullIndicators(input, positionCount); - long[] values = new long[positionCount]; - if (nullIndicators == null) { - for (int i = 0; i < positionCount; i++) { - values[i] = input.getLong(); - } - return new TimeColumn(0, positionCount, values); - } else { - throw new IllegalArgumentException("TimeColumn should not contain null values."); - } - } - @Override public Column readColumn(ByteBuffer input, TSDataType dataType, int positionCount) { diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java index 0254690b6..2c3c35c12 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java @@ -39,12 +39,16 @@ public class IntColumn implements Column { public static final int SIZE_IN_BYTES_PER_POSITION = Integer.BYTES + Byte.BYTES; private final int arrayOffset; - private final int positionCount; - private final boolean[] valueIsNull; + private int positionCount; + private boolean[] valueIsNull; private final int[] values; private final long retainedSizeInBytes; + public IntColumn(int initialCapacity) { + this(0, 0, null, new int[initialCapacity]); + } + public IntColumn(int positionCount, Optional valueIsNull, int[] values) { this(0, positionCount, valueIsNull.orElse(null), values); } @@ -195,4 +199,17 @@ public void reverse() { public int getInstanceSize() { return INSTANCE_SIZE; } + + @Override + public void setPositionCount(int count) { + this.positionCount = count; + } + + @Override + public void setNull(int start, int end) { + if (valueIsNull == null) { + valueIsNull = new boolean[values.length]; + } + Arrays.fill(valueIsNull, start, end, true); + } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java index 34cc7dc4e..1c022369d 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java @@ -39,12 +39,16 @@ public class LongColumn implements Column { public static final int SIZE_IN_BYTES_PER_POSITION = Long.BYTES + Byte.BYTES; private final int arrayOffset; - private final int positionCount; - private final boolean[] valueIsNull; + private int positionCount; + private boolean[] valueIsNull; private final long[] values; private final long retainedSizeInBytes; + public LongColumn(int initialCapacity) { + this(0, 0, null, new long[initialCapacity]); + } + public LongColumn(int positionCount, Optional valueIsNull, long[] values) { this(0, positionCount, valueIsNull.orElse(null), values); } @@ -195,4 +199,17 @@ public void reverse() { public int getInstanceSize() { return INSTANCE_SIZE; } + + @Override + public void setPositionCount(int count) { + this.positionCount = count; + } + + @Override + public void setNull(int start, int end) { + if (valueIsNull == null) { + valueIsNull = new boolean[values.length]; + } + Arrays.fill(valueIsNull, start, end, true); + } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java index 8b7e2152a..9b2a7bccd 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java @@ -36,7 +36,7 @@ public class NullColumn implements Column { private static final int INSTANCE_SIZE = (int) RamUsageEstimator.shallowSizeOfInstance(BooleanColumn.class); - private final int positionCount; + private int positionCount; private final long retainedSizeInBytes; @@ -140,4 +140,12 @@ public static Column create(TSDataType dataType, int positionCount) { public int getInstanceSize() { return INSTANCE_SIZE; } + + @Override + public void setPositionCount(int count) { + this.positionCount = count; + } + + @Override + public void setNull(int start, int end) {} } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java index a78e2daae..d20cc4436 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java @@ -37,7 +37,7 @@ public class RunLengthEncodedColumn implements Column { (int) RamUsageEstimator.shallowSizeOfInstance(RunLengthEncodedColumn.class); private final Column value; - private final int positionCount; + private int positionCount; public RunLengthEncodedColumn(Column value, int positionCount) { requireNonNull(value, "value is null"); @@ -230,4 +230,14 @@ public void reverse() { public int getInstanceSize() { return INSTANCE_SIZE; } + + @Override + public void setPositionCount(int count) { + this.positionCount = count; + } + + @Override + public void setNull(int start, int end) { + value.setNull(start, end); + } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java index 1783f6a34..d681b079f 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java @@ -31,16 +31,20 @@ public class TimeColumn implements Column { private static final int INSTANCE_SIZE = - (int) RamUsageEstimator.shallowSizeOfInstance(LongColumn.class); + (int) RamUsageEstimator.shallowSizeOfInstance(TimeColumn.class); public static final int SIZE_IN_BYTES_PER_POSITION = Long.BYTES; private final int arrayOffset; - private final int positionCount; + private int positionCount; private final long[] values; private final long retainedSizeInBytes; + public TimeColumn(int initialCapacity) { + this(0, 0, new long[initialCapacity]); + } + public TimeColumn(int positionCount, long[] values) { this(0, positionCount, values); } @@ -78,10 +82,6 @@ public long getLong(int position) { return values[position + arrayOffset]; } - public long getLongWithoutCheck(int position) { - return values[position + arrayOffset]; - } - @Override public Object getObject(int position) { return getLong(position); @@ -180,4 +180,12 @@ public long[] getLongs() { public int getInstanceSize() { return INSTANCE_SIZE; } + + @Override + public void setPositionCount(int count) { + this.positionCount = positionCount; + } + + @Override + public void setNull(int start, int end) {} } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TsBlockSerde.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TsBlockSerde.java index 0ef002567..e887a5773 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TsBlockSerde.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TsBlockSerde.java @@ -67,8 +67,10 @@ public TsBlock deserialize(ByteBuffer byteBuffer) { } // Time column. - TimeColumn timeColumn = - ColumnEncoderFactory.get(columnEncodings.get(0)).readTimeColumn(byteBuffer, positionCount); + // TODO: a TimeColumn will be deserialized as a LongColumn + Column timeColumn = + ColumnEncoderFactory.get(columnEncodings.get(0)) + .readColumn(byteBuffer, TSDataType.INT64, positionCount); // Value columns Column[] valueColumns = new Column[valueColumnCount]; diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/parser/PathNodesGenerator.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/parser/PathNodesGenerator.java index 1d2d14ae5..7c54598a5 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/parser/PathNodesGenerator.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/parser/PathNodesGenerator.java @@ -41,6 +41,9 @@ private PathNodesGenerator() { public static String[] splitPathToNodes(String path) throws PathParseException { try { + if (path.isEmpty()) { + return new String[] {path}; + } return invokeParser(path); } catch (ParseCancellationException e) { throw new PathParseException(path); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/AbstractIntType.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/AbstractIntType.java new file mode 100644 index 000000000..8425a8793 --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/AbstractIntType.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.read.common.type; + +import org.apache.tsfile.block.column.Column; +import org.apache.tsfile.block.column.ColumnBuilder; +import org.apache.tsfile.read.common.block.column.IntColumnBuilder; + +import java.util.Collections; +import java.util.List; + +public abstract class AbstractIntType extends AbstractType { + + @Override + public int getInt(Column c, int position) { + return c.getInt(position); + } + + @Override + public long getLong(Column c, int position) { + return c.getInt(position); + } + + @Override + public float getFloat(Column c, int position) { + return c.getInt(position); + } + + @Override + public double getDouble(Column c, int position) { + return c.getInt(position); + } + + @Override + public void writeInt(ColumnBuilder builder, int value) { + builder.writeInt(value); + } + + @Override + public void writeLong(ColumnBuilder builder, long value) { + builder.writeInt((int) value); + } + + @Override + public void writeFloat(ColumnBuilder builder, float value) { + builder.writeInt((int) value); + } + + @Override + public void writeDouble(ColumnBuilder builder, double value) { + builder.writeInt((int) value); + } + + @Override + public ColumnBuilder createColumnBuilder(int expectedEntries) { + return new IntColumnBuilder(null, expectedEntries); + } + + @Override + public boolean isComparable() { + return true; + } + + @Override + public boolean isOrderable() { + return true; + } + + @Override + public List getTypeParameters() { + return Collections.emptyList(); + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/AbstractLongType.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/AbstractLongType.java new file mode 100644 index 000000000..e8e1ecbc3 --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/AbstractLongType.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.read.common.type; + +import org.apache.tsfile.block.column.Column; +import org.apache.tsfile.block.column.ColumnBuilder; +import org.apache.tsfile.read.common.block.column.LongColumnBuilder; + +import java.util.Collections; +import java.util.List; + +public abstract class AbstractLongType extends AbstractType { + + @Override + public int getInt(Column c, int position) { + return (int) c.getLong(position); + } + + @Override + public long getLong(Column c, int position) { + return c.getLong(position); + } + + @Override + public float getFloat(Column c, int position) { + return c.getLong(position); + } + + @Override + public double getDouble(Column c, int position) { + return c.getLong(position); + } + + @Override + public void writeInt(ColumnBuilder builder, int value) { + builder.writeLong(value); + } + + @Override + public void writeLong(ColumnBuilder builder, long value) { + builder.writeLong(value); + } + + @Override + public void writeFloat(ColumnBuilder builder, float value) { + builder.writeLong((long) value); + } + + @Override + public void writeDouble(ColumnBuilder builder, double value) { + builder.writeLong((long) value); + } + + @Override + public ColumnBuilder createColumnBuilder(int expectedEntries) { + return new LongColumnBuilder(null, expectedEntries); + } + + @Override + public boolean isComparable() { + return true; + } + + @Override + public boolean isOrderable() { + return true; + } + + @Override + public List getTypeParameters() { + return Collections.emptyList(); + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/AbstractType.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/AbstractType.java new file mode 100644 index 000000000..937b7fdcc --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/AbstractType.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.read.common.type; + +public abstract class AbstractType implements Type { + + @Override + public String toString() { + return getDisplayName(); + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/AbstractVarcharType.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/AbstractVarcharType.java new file mode 100644 index 000000000..d09ed07c6 --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/AbstractVarcharType.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.read.common.type; + +import org.apache.tsfile.block.column.Column; +import org.apache.tsfile.block.column.ColumnBuilder; +import org.apache.tsfile.read.common.block.column.BinaryColumnBuilder; +import org.apache.tsfile.utils.Binary; + +import java.util.Collections; +import java.util.List; + +public abstract class AbstractVarcharType extends AbstractType { + @Override + public Binary getBinary(Column c, int position) { + return c.getBinary(position); + } + + @Override + public void writeBinary(ColumnBuilder builder, Binary value) { + builder.writeBinary(value); + } + + @Override + public ColumnBuilder createColumnBuilder(int expectedEntries) { + return new BinaryColumnBuilder(null, expectedEntries); + } + + @Override + public boolean isComparable() { + return true; + } + + @Override + public boolean isOrderable() { + return true; + } + + @Override + public List getTypeParameters() { + return Collections.emptyList(); + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/BinaryType.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/BinaryType.java index bcbd2a0b9..68c874998 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/BinaryType.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/BinaryType.java @@ -19,37 +19,22 @@ package org.apache.tsfile.read.common.type; -import org.apache.tsfile.block.column.Column; -import org.apache.tsfile.block.column.ColumnBuilder; -import org.apache.tsfile.read.common.block.column.BinaryColumnBuilder; -import org.apache.tsfile.utils.Binary; - -public class BinaryType implements Type { - private static final BinaryType INSTANCE = new BinaryType(); +public class BinaryType extends AbstractVarcharType { + public static final BinaryType TEXT = new BinaryType(); private BinaryType() {} @Override - public Binary getBinary(Column c, int position) { - return c.getBinary(position); - } - - @Override - public void writeBinary(ColumnBuilder builder, Binary value) { - builder.writeBinary(value); - } - - @Override - public ColumnBuilder createColumnBuilder(int expectedEntries) { - return new BinaryColumnBuilder(null, expectedEntries); + public TypeEnum getTypeEnum() { + return TypeEnum.TEXT; } @Override - public TypeEnum getTypeEnum() { - return TypeEnum.BINARY; + public String getDisplayName() { + return "TEXT"; } public static BinaryType getInstance() { - return INSTANCE; + return TEXT; } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/BlobType.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/BlobType.java new file mode 100644 index 000000000..d02ca7168 --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/BlobType.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.read.common.type; + +import org.apache.tsfile.block.column.Column; +import org.apache.tsfile.block.column.ColumnBuilder; +import org.apache.tsfile.read.common.block.column.BinaryColumnBuilder; +import org.apache.tsfile.utils.Binary; + +import java.util.Collections; +import java.util.List; + +public class BlobType extends AbstractType { + + public static final BlobType BLOB = new BlobType(); + + private BlobType() {} + + @Override + public Binary getBinary(Column c, int position) { + return c.getBinary(position); + } + + @Override + public void writeBinary(ColumnBuilder builder, Binary value) { + builder.writeBinary(value); + } + + @Override + public ColumnBuilder createColumnBuilder(int expectedEntries) { + return new BinaryColumnBuilder(null, expectedEntries); + } + + @Override + public TypeEnum getTypeEnum() { + return TypeEnum.BLOB; + } + + @Override + public String getDisplayName() { + return "BLOB"; + } + + @Override + public boolean isComparable() { + return true; + } + + @Override + public boolean isOrderable() { + return true; + } + + @Override + public List getTypeParameters() { + return Collections.emptyList(); + } + + public static BlobType getInstance() { + return BLOB; + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/BooleanType.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/BooleanType.java index 67cf86cca..e00d7ecb8 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/BooleanType.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/BooleanType.java @@ -23,9 +23,12 @@ import org.apache.tsfile.block.column.ColumnBuilder; import org.apache.tsfile.read.common.block.column.BooleanColumnBuilder; -public class BooleanType implements Type { +import java.util.Collections; +import java.util.List; - private static final BooleanType INSTANCE = new BooleanType(); +public class BooleanType extends AbstractType { + + public static final BooleanType BOOLEAN = new BooleanType(); private BooleanType() {} @@ -49,7 +52,27 @@ public TypeEnum getTypeEnum() { return TypeEnum.BOOLEAN; } + @Override + public String getDisplayName() { + return "BOOLEAN"; + } + + @Override + public boolean isComparable() { + return true; + } + + @Override + public boolean isOrderable() { + return true; + } + + @Override + public List getTypeParameters() { + return Collections.emptyList(); + } + public static BooleanType getInstance() { - return INSTANCE; + return BOOLEAN; } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/DateType.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/DateType.java new file mode 100644 index 000000000..88d17e242 --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/DateType.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.read.common.type; + +public class DateType extends AbstractIntType { + + public static final DateType DATE = new DateType(); + + private DateType() {} + + @Override + public TypeEnum getTypeEnum() { + return TypeEnum.DATE; + } + + @Override + public String getDisplayName() { + return "DATE"; + } + + public static DateType getInstance() { + return DATE; + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/DoubleType.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/DoubleType.java index 317dfd96a..6e325c066 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/DoubleType.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/DoubleType.java @@ -23,9 +23,12 @@ import org.apache.tsfile.block.column.ColumnBuilder; import org.apache.tsfile.read.common.block.column.DoubleColumnBuilder; -public class DoubleType implements Type { +import java.util.Collections; +import java.util.List; - private static final DoubleType INSTANCE = new DoubleType(); +public class DoubleType extends AbstractType { + + public static final DoubleType DOUBLE = new DoubleType(); private DoubleType() {} @@ -79,7 +82,27 @@ public TypeEnum getTypeEnum() { return TypeEnum.DOUBLE; } + @Override + public String getDisplayName() { + return "DOUBLE"; + } + + @Override + public boolean isComparable() { + return true; + } + + @Override + public boolean isOrderable() { + return true; + } + + @Override + public List getTypeParameters() { + return Collections.emptyList(); + } + public static DoubleType getInstance() { - return INSTANCE; + return DOUBLE; } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/FloatType.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/FloatType.java index 51e7e5b23..6417f0f52 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/FloatType.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/FloatType.java @@ -23,9 +23,12 @@ import org.apache.tsfile.block.column.ColumnBuilder; import org.apache.tsfile.read.common.block.column.FloatColumnBuilder; -public class FloatType implements Type { +import java.util.Collections; +import java.util.List; - private static final FloatType INSTANCE = new FloatType(); +public class FloatType extends AbstractType { + + public static final FloatType FLOAT = new FloatType(); private FloatType() {} @@ -79,7 +82,27 @@ public TypeEnum getTypeEnum() { return TypeEnum.FLOAT; } + @Override + public String getDisplayName() { + return "FLOAT"; + } + + @Override + public boolean isComparable() { + return true; + } + + @Override + public boolean isOrderable() { + return true; + } + + @Override + public List getTypeParameters() { + return Collections.emptyList(); + } + public static FloatType getInstance() { - return INSTANCE; + return FLOAT; } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/IntType.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/IntType.java index 1cead88dd..e4d36c2a3 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/IntType.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/IntType.java @@ -19,67 +19,23 @@ package org.apache.tsfile.read.common.type; -import org.apache.tsfile.block.column.Column; -import org.apache.tsfile.block.column.ColumnBuilder; -import org.apache.tsfile.read.common.block.column.IntColumnBuilder; +public class IntType extends AbstractIntType { -public class IntType implements Type { - - private static final IntType INSTANCE = new IntType(); + public static final IntType INT32 = new IntType(); private IntType() {} @Override - public int getInt(Column c, int position) { - return c.getInt(position); - } - - @Override - public long getLong(Column c, int position) { - return c.getInt(position); - } - - @Override - public float getFloat(Column c, int position) { - return c.getInt(position); - } - - @Override - public double getDouble(Column c, int position) { - return c.getInt(position); - } - - @Override - public void writeInt(ColumnBuilder builder, int value) { - builder.writeInt(value); - } - - @Override - public void writeLong(ColumnBuilder builder, long value) { - builder.writeInt((int) value); - } - - @Override - public void writeFloat(ColumnBuilder builder, float value) { - builder.writeInt((int) value); - } - - @Override - public void writeDouble(ColumnBuilder builder, double value) { - builder.writeInt((int) value); - } - - @Override - public ColumnBuilder createColumnBuilder(int expectedEntries) { - return new IntColumnBuilder(null, expectedEntries); + public TypeEnum getTypeEnum() { + return TypeEnum.INT32; } @Override - public TypeEnum getTypeEnum() { - return TypeEnum.INT32; + public String getDisplayName() { + return "INT32"; } public static IntType getInstance() { - return INSTANCE; + return INT32; } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/LongType.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/LongType.java index 7be7b792c..a1ed2a1c9 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/LongType.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/LongType.java @@ -19,67 +19,23 @@ package org.apache.tsfile.read.common.type; -import org.apache.tsfile.block.column.Column; -import org.apache.tsfile.block.column.ColumnBuilder; -import org.apache.tsfile.read.common.block.column.LongColumnBuilder; +public class LongType extends AbstractLongType { -public class LongType implements Type { - - private static final LongType INSTANCE = new LongType(); + public static final LongType INT64 = new LongType(); private LongType() {} @Override - public int getInt(Column c, int position) { - return (int) c.getLong(position); - } - - @Override - public long getLong(Column c, int position) { - return c.getLong(position); - } - - @Override - public float getFloat(Column c, int position) { - return c.getLong(position); - } - - @Override - public double getDouble(Column c, int position) { - return c.getLong(position); - } - - @Override - public void writeInt(ColumnBuilder builder, int value) { - builder.writeLong(value); - } - - @Override - public void writeLong(ColumnBuilder builder, long value) { - builder.writeLong(value); - } - - @Override - public void writeFloat(ColumnBuilder builder, float value) { - builder.writeLong((long) value); - } - - @Override - public void writeDouble(ColumnBuilder builder, double value) { - builder.writeLong((long) value); - } - - @Override - public ColumnBuilder createColumnBuilder(int expectedEntries) { - return new LongColumnBuilder(null, expectedEntries); + public TypeEnum getTypeEnum() { + return TypeEnum.INT64; } @Override - public TypeEnum getTypeEnum() { - return TypeEnum.INT64; + public String getDisplayName() { + return "INT64"; } public static LongType getInstance() { - return INSTANCE; + return INT64; } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/RowType.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/RowType.java new file mode 100644 index 000000000..0c5dabbc5 --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/RowType.java @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.read.common.type; + +import org.apache.tsfile.block.column.ColumnBuilder; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.stream.Collectors; + +import static java.util.Objects.requireNonNull; +import static org.apache.tsfile.read.common.type.TypeEnum.ROW; + +public class RowType extends AbstractType { + + private final List fields; + private final List fieldTypes; + private final boolean comparable; + private final boolean orderable; + + private RowType(List originalFields) { + + this.fields = new ArrayList<>(originalFields); + this.fieldTypes = fields.stream().map(Field::getType).collect(Collectors.toList()); + + this.comparable = fields.stream().allMatch(field -> field.getType().isComparable()); + this.orderable = fields.stream().allMatch(field -> field.getType().isOrderable()); + } + + public static RowType from(List fields) { + return new RowType(fields); + } + + public static RowType anonymous(List types) { + List fields = + types.stream().map(type -> new Field(Optional.empty(), type)).collect(Collectors.toList()); + + return new RowType(fields); + } + + public static RowType rowType(Field... field) { + return from(Arrays.asList(field)); + } + + public static RowType anonymousRow(Type... types) { + return anonymous(Arrays.asList(types)); + } + + // Only RowParametricType.createType should call this method + public static RowType createWithTypeSignature(List fields) { + return new RowType(fields); + } + + public static Field field(String name, Type type) { + return new Field(Optional.of(name), type); + } + + public static Field field(Type type) { + return new Field(Optional.empty(), type); + } + + @Override + public ColumnBuilder createColumnBuilder(int expectedEntries) { + throw new UnsupportedOperationException(); + } + + @Override + public TypeEnum getTypeEnum() { + return ROW; + } + + @Override + public String getDisplayName() { + // Convert to standard sql name + StringBuilder result = new StringBuilder(); + result.append("ROW").append('('); + for (Field field : fields) { + String typeDisplayName = field.getType().getDisplayName(); + if (field.getName().isPresent()) { + // TODO: names are already canonicalized, so they should be printed as delimited identifiers + result.append(field.getName().get()).append(' ').append(typeDisplayName); + } else { + result.append(typeDisplayName); + } + result.append(", "); + } + result.setLength(result.length() - 2); + result.append(')'); + return result.toString(); + } + + @Override + public List getTypeParameters() { + return fieldTypes; + } + + public List getFields() { + return fields; + } + + public static class Field { + private final Type type; + private final Optional name; + + public Field(Optional name, Type type) { + this.type = requireNonNull(type, "type is null"); + this.name = requireNonNull(name, "name is null"); + } + + public Type getType() { + return type; + } + + public Optional getName() { + return name; + } + } + + @Override + public boolean isComparable() { + return comparable; + } + + @Override + public boolean isOrderable() { + return orderable; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + RowType rowType = (RowType) o; + return comparable == rowType.comparable + && orderable == rowType.orderable + && Objects.equals(fields, rowType.fields) + && Objects.equals(fieldTypes, rowType.fieldTypes); + } + + @Override + public int hashCode() { + return Objects.hash(fields, fieldTypes, comparable, orderable); + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/StringType.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/StringType.java new file mode 100644 index 000000000..725ae9e3b --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/StringType.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.read.common.type; + +public class StringType extends AbstractVarcharType { + + public static final StringType STRING = new StringType(); + + private StringType() {} + + @Override + public TypeEnum getTypeEnum() { + return TypeEnum.STRING; + } + + @Override + public String getDisplayName() { + return "STRING"; + } + + public static StringType getInstance() { + return STRING; + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/TimestampType.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/TimestampType.java new file mode 100644 index 000000000..c4e5e8434 --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/TimestampType.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.read.common.type; + +public class TimestampType extends AbstractLongType { + + public static final TimestampType TIMESTAMP = new TimestampType(); + + private TimestampType() {} + + @Override + public TypeEnum getTypeEnum() { + return TypeEnum.TIMESTAMP; + } + + @Override + public String getDisplayName() { + return "TIMESTAMP"; + } + + public static TimestampType getInstance() { + return TIMESTAMP; + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/Type.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/Type.java index 118b83e7a..cb6b60944 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/Type.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/Type.java @@ -23,6 +23,8 @@ import org.apache.tsfile.block.column.ColumnBuilder; import org.apache.tsfile.utils.Binary; +import java.util.List; + public interface Type { /** Gets a boolean at {@code position}. */ @@ -102,4 +104,16 @@ default void writeObject(ColumnBuilder builder, Object value) { ColumnBuilder createColumnBuilder(int expectedEntries); TypeEnum getTypeEnum(); + + /** Returns the name of this type that should be displayed to end-users. */ + String getDisplayName(); + + /** True if the type supports equalTo and hash. */ + boolean isComparable(); + + /** True if the type supports compareTo. */ + boolean isOrderable(); + + /** For parameterized types returns the list of parameters. */ + List getTypeParameters(); } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/TypeEnum.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/TypeEnum.java index 5a4489de8..184a64af4 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/TypeEnum.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/TypeEnum.java @@ -30,5 +30,13 @@ public enum TypeEnum { BOOLEAN, - BINARY + TEXT, + + ROW, + + UNKNOWN, + TIMESTAMP, + DATE, + BLOB, + STRING } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/TypeFactory.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/TypeFactory.java index 72946ee83..41474542c 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/TypeFactory.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/TypeFactory.java @@ -29,11 +29,37 @@ private TypeFactory() { public static Type getType(TSDataType tsDataType) { switch (tsDataType) { - case INT32: case DATE: + return DateType.getInstance(); + case INT32: return IntType.getInstance(); case INT64: + return LongType.getInstance(); case TIMESTAMP: + return TimestampType.getInstance(); + case FLOAT: + return FloatType.getInstance(); + case DOUBLE: + return DoubleType.getInstance(); + case BOOLEAN: + return BooleanType.getInstance(); + case TEXT: + return BinaryType.getInstance(); + case STRING: + return StringType.getInstance(); + case BLOB: + return BlobType.getInstance(); + default: + throw new UnsupportedOperationException( + String.format("Invalid TSDataType for TypeFactory: %s", tsDataType)); + } + } + + public static Type getType(TypeEnum typeEnum) { + switch (typeEnum) { + case INT32: + return IntType.getInstance(); + case INT64: return LongType.getInstance(); case FLOAT: return FloatType.getInstance(); @@ -42,12 +68,20 @@ public static Type getType(TSDataType tsDataType) { case BOOLEAN: return BooleanType.getInstance(); case TEXT: + return BinaryType.getInstance(); + case UNKNOWN: + return UnknownType.getInstance(); + case DATE: + return DateType.getInstance(); + case TIMESTAMP: + return TimestampType.getInstance(); case BLOB: + return BlobType.getInstance(); case STRING: - return BinaryType.getInstance(); + return StringType.getInstance(); default: throw new UnsupportedOperationException( - String.format("Invalid TSDataType for TypeFactory: %s", tsDataType)); + String.format("Invalid TypeEnum for TypeFactory: %s", typeEnum)); } } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/UnknownType.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/UnknownType.java new file mode 100644 index 000000000..818019cf7 --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/type/UnknownType.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.read.common.type; + +import org.apache.tsfile.block.column.Column; +import org.apache.tsfile.block.column.ColumnBuilder; +import org.apache.tsfile.read.common.block.column.BooleanColumnBuilder; + +import java.util.Collections; +import java.util.List; + +import static org.apache.tsfile.utils.Preconditions.checkArgument; + +public class UnknownType implements Type { + public static final UnknownType UNKNOWN = new UnknownType(); + public static final String NAME = "unknown"; + + private UnknownType() {} + + @Override + public void writeBoolean(ColumnBuilder columnBuilder, boolean value) { + // Ideally, this function should never be invoked for the unknown type. + // However, some logic (e.g. AbstractMinMaxBy) relies on writing a default value before the null + // check. + checkArgument(!value); + columnBuilder.appendNull(); + } + + @Override + public boolean getBoolean(Column column, int position) { + // Ideally, this function should never be invoked for the unknown type. + // However, some logic relies on having a default value before the null check. + checkArgument(column.isNull(position)); + return false; + } + + @Override + public ColumnBuilder createColumnBuilder(int expectedEntries) { + return new BooleanColumnBuilder(null, expectedEntries); + } + + @Override + public TypeEnum getTypeEnum() { + return TypeEnum.UNKNOWN; + } + + @Override + public String getDisplayName() { + return NAME; + } + + @Override + public boolean isComparable() { + return true; + } + + @Override + public boolean isOrderable() { + return true; + } + + @Override + public List getTypeParameters() { + return Collections.emptyList(); + } + + public static UnknownType getInstance() { + return UNKNOWN; + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/controller/DeviceMetaIterator.java b/java/tsfile/src/main/java/org/apache/tsfile/read/controller/DeviceMetaIterator.java new file mode 100644 index 000000000..4445bf9be --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/controller/DeviceMetaIterator.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.tsfile.read.controller; + +import org.apache.tsfile.file.IMetadataIndexEntry; +import org.apache.tsfile.file.metadata.DeviceMetadataIndexEntry; +import org.apache.tsfile.file.metadata.IDeviceID; +import org.apache.tsfile.file.metadata.MetadataIndexNode; +import org.apache.tsfile.file.metadata.enums.MetadataIndexNodeType; +import org.apache.tsfile.read.TsFileSequenceReader; +import org.apache.tsfile.read.expression.ExpressionTree; +import org.apache.tsfile.utils.Pair; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayDeque; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.Queue; + +public class DeviceMetaIterator implements Iterator> { + + private static final Logger LOGGER = LoggerFactory.getLogger(DeviceMetaIterator.class); + private final TsFileSequenceReader tsFileSequenceReader; + private final Queue metadataIndexNodes = new ArrayDeque<>(); + private final Queue> resultCache = new ArrayDeque<>(); + private final ExpressionTree idFilter; + + public DeviceMetaIterator( + TsFileSequenceReader tsFileSequenceReader, + MetadataIndexNode metadataIndexNode, + ExpressionTree idFilter) { + this.tsFileSequenceReader = tsFileSequenceReader; + this.metadataIndexNodes.add(metadataIndexNode); + this.idFilter = idFilter; + } + + @Override + public boolean hasNext() { + if (!resultCache.isEmpty()) { + return true; + } + try { + loadResults(); + } catch (IOException e) { + LOGGER.error("Failed to load device meta data", e); + return false; + } + + return !resultCache.isEmpty(); + } + + private void loadLeafDevice(MetadataIndexNode currentNode) throws IOException { + List leafChildren = currentNode.getChildren(); + for (int i = 0; i < leafChildren.size(); i++) { + IMetadataIndexEntry child = leafChildren.get(i); + final IDeviceID deviceID = ((DeviceMetadataIndexEntry) child).getDeviceID(); + if (idFilter != null && !idFilter.satisfy(deviceID)) { + continue; + } + + long startOffset = child.getOffset(); + long endOffset = + i < leafChildren.size() - 1 + ? leafChildren.get(i + 1).getOffset() + : currentNode.getEndOffset(); + final MetadataIndexNode childNode = + tsFileSequenceReader.readMetadataIndexNode(startOffset, endOffset, false); + resultCache.add(new Pair<>(deviceID, childNode)); + } + } + + private void loadInternalNode(MetadataIndexNode currentNode) throws IOException { + List internalChildren = currentNode.getChildren(); + for (int i = 0; i < internalChildren.size(); i++) { + IMetadataIndexEntry child = internalChildren.get(i); + long startOffset = child.getOffset(); + long endOffset = + i < internalChildren.size() - 1 + ? internalChildren.get(i + 1).getOffset() + : currentNode.getEndOffset(); + final MetadataIndexNode childNode = + tsFileSequenceReader.readMetadataIndexNode(startOffset, endOffset, true); + metadataIndexNodes.add(childNode); + } + } + + private void loadResults() throws IOException { + while (!metadataIndexNodes.isEmpty()) { + final MetadataIndexNode currentNode = metadataIndexNodes.poll(); + final MetadataIndexNodeType nodeType = currentNode.getNodeType(); + switch (nodeType) { + case LEAF_DEVICE: + loadLeafDevice(currentNode); + if (!resultCache.isEmpty()) { + return; + } + case INTERNAL_DEVICE: + loadInternalNode(currentNode); + break; + default: + throw new IOException("A non-device node detected: " + currentNode); + } + } + } + + @Override + public Pair next() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + return resultCache.poll(); + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/controller/IMetadataQuerier.java b/java/tsfile/src/main/java/org/apache/tsfile/read/controller/IMetadataQuerier.java index 3aa63a6b5..b6d9654ac 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/controller/IMetadataQuerier.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/controller/IMetadataQuerier.java @@ -22,18 +22,35 @@ import org.apache.tsfile.enums.TSDataType; import org.apache.tsfile.exception.write.NoMeasurementException; import org.apache.tsfile.file.metadata.IChunkMetadata; +import org.apache.tsfile.file.metadata.IDeviceID; +import org.apache.tsfile.file.metadata.MetadataIndexNode; import org.apache.tsfile.file.metadata.TsFileMetadata; import org.apache.tsfile.read.common.Path; import org.apache.tsfile.read.common.TimeRange; +import org.apache.tsfile.read.expression.ExpressionTree; +import org.apache.tsfile.utils.Pair; import java.io.IOException; +import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Set; public interface IMetadataQuerier { List getChunkMetaDataList(Path path) throws IOException; + /** + * @param deviceID the deviceID to be queried + * @param measurementNames the measurementNames to be queried + * @param measurementNode nullable, if provided, the search will start from the node + * @return each list is the ChunkMetadata of those timeseries who exists + * @throws IOException if IO error occurs + */ + List> getChunkMetadataLists( + IDeviceID deviceID, Set measurementNames, MetadataIndexNode measurementNode) + throws IOException; + Map> getChunkMetaDataMap(List paths) throws IOException; TsFileMetadata getWholeFileMetadata(); @@ -65,4 +82,7 @@ List convertSpace2TimePartition( /** clear caches (if used) to release memory. */ void clear(); + + Iterator> deviceIterator( + MetadataIndexNode root, ExpressionTree idFilter); } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/controller/MetadataQuerierByFileImpl.java b/java/tsfile/src/main/java/org/apache/tsfile/read/controller/MetadataQuerierByFileImpl.java index 64d62442b..00e3ae1c9 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/controller/MetadataQuerierByFileImpl.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/controller/MetadataQuerierByFileImpl.java @@ -26,12 +26,15 @@ import org.apache.tsfile.file.metadata.IChunkMetadata; import org.apache.tsfile.file.metadata.IDeviceID; import org.apache.tsfile.file.metadata.ITimeSeriesMetadata; +import org.apache.tsfile.file.metadata.MetadataIndexNode; import org.apache.tsfile.file.metadata.TimeseriesMetadata; import org.apache.tsfile.file.metadata.TsFileMetadata; import org.apache.tsfile.read.TsFileSequenceReader; import org.apache.tsfile.read.TsFileSequenceReader.LocateStatus; import org.apache.tsfile.read.common.Path; import org.apache.tsfile.read.common.TimeRange; +import org.apache.tsfile.read.expression.ExpressionTree; +import org.apache.tsfile.utils.Pair; import java.io.IOException; import java.util.ArrayList; @@ -39,6 +42,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -52,8 +56,8 @@ public class MetadataQuerierByFileImpl implements IMetadataQuerier { private TsFileMetadata fileMetaData; - // TimeseriesPath -> List - private LRUCache> chunkMetaDataCache; + // (deviceId, measurementId) -> List + private LRUCache, List> deviceIdChunkMetadataCache; private TsFileSequenceReader tsFileReader; @@ -61,10 +65,11 @@ public class MetadataQuerierByFileImpl implements IMetadataQuerier { public MetadataQuerierByFileImpl(TsFileSequenceReader tsFileReader) throws IOException { this.tsFileReader = tsFileReader; this.fileMetaData = tsFileReader.readFileMetadata(); - chunkMetaDataCache = - new LRUCache>(CACHED_ENTRY_NUMBER) { + deviceIdChunkMetadataCache = + new LRUCache, List>(CACHED_ENTRY_NUMBER) { @Override - public List loadObjectByKey(Path key) throws IOException { + protected List loadObjectByKey(Pair key) + throws IOException { return loadChunkMetadata(key); } }; @@ -72,7 +77,37 @@ public List loadObjectByKey(Path key) throws IOException { @Override public List getChunkMetaDataList(Path timeseriesPath) throws IOException { - return new ArrayList<>(chunkMetaDataCache.get(timeseriesPath)); + return new ArrayList<>( + deviceIdChunkMetadataCache.get( + new Pair<>(timeseriesPath.getIDeviceID(), timeseriesPath.getMeasurement()))); + } + + public List> getChunkMetadataLists( + IDeviceID deviceID, Set measurementNames, MetadataIndexNode measurementNode) + throws IOException { + List> results = new ArrayList<>(measurementNames.size()); + final Iterator iterator = measurementNames.iterator(); + // use cache when possible + while (iterator.hasNext()) { + final String measurementName = iterator.next(); + // check first to avoid loading + final Pair key = new Pair<>(deviceID, measurementName); + if (deviceIdChunkMetadataCache.containsKey(key)) { + final List metadataList = deviceIdChunkMetadataCache.get(key); + results.add(metadataList); + iterator.remove(); + } + } + // the remaining is not in the cache, search them in file + final List> iChunkMetadataList = + tsFileReader.getIChunkMetadataList(deviceID, measurementNames, measurementNode); + for (List metadataList : iChunkMetadataList) { + final String measurementUid = metadataList.get(0).getMeasurementUid(); + // cache the result + deviceIdChunkMetadataCache.put(new Pair<>(deviceID, measurementUid), metadataList); + results.add(metadataList); + } + return results; } @Override @@ -118,7 +153,7 @@ public void loadChunkMetaDatas(List paths) throws IOException { } List timeseriesMetaDataList = - tsFileReader.readITimeseriesMetadata(selectedDevice, selectedMeasurements); + tsFileReader.readITimeseriesMetadata(selectedDevice, selectedMeasurements, null, false); for (ITimeSeriesMetadata timeseriesMetadata : timeseriesMetaDataList) { List chunkMetadataList = tsFileReader.readIChunkMetaDataList(timeseriesMetadata); @@ -132,8 +167,8 @@ public void loadChunkMetaDatas(List paths) throws IOException { } else { measurementId = ((TimeseriesMetadata) timeseriesMetadata).getMeasurementId(); } - this.chunkMetaDataCache.put( - new Path(selectedDevice, measurementId, true), chunkMetadataList); + this.deviceIdChunkMetadataCache.put( + new Pair<>(selectedDevice, measurementId), chunkMetadataList); count += chunkMetadataList.size(); if (count == CACHED_ENTRY_NUMBER) { enough = true; @@ -156,6 +191,10 @@ private List loadChunkMetadata(Path path) throws IOException { return tsFileReader.getIChunkMetadataList(path); } + private List loadChunkMetadata(Pair key) throws IOException { + return tsFileReader.getIChunkMetadataList(key.getLeft(), key.right); + } + @Override @SuppressWarnings("squid:S3776") // Suppress high Cognitive Complexity warning public List convertSpace2TimePartition( @@ -253,6 +292,12 @@ public static LocateStatus checkLocateStatus( @Override public void clear() { - chunkMetaDataCache.clear(); + deviceIdChunkMetadataCache.clear(); + } + + @Override + public Iterator> deviceIterator( + MetadataIndexNode root, ExpressionTree idFilter) { + return new DeviceMetaIterator(tsFileReader, root, idFilter); } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/expression/ExpressionTree.java b/java/tsfile/src/main/java/org/apache/tsfile/read/expression/ExpressionTree.java new file mode 100644 index 000000000..fa0b11389 --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/expression/ExpressionTree.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.read.expression; + +import org.apache.tsfile.read.filter.basic.Filter; + +public interface ExpressionTree { + boolean satisfy(Object value); + + Filter toFilter(); +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/filter/factory/FilterFactory.java b/java/tsfile/src/main/java/org/apache/tsfile/read/filter/factory/FilterFactory.java index 1fa9b4e01..f29fc7eda 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/filter/factory/FilterFactory.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/filter/factory/FilterFactory.java @@ -24,6 +24,8 @@ import org.apache.tsfile.read.filter.operator.Not; import org.apache.tsfile.read.filter.operator.Or; +import java.util.List; + import static org.apache.tsfile.utils.Preconditions.checkArgument; public class FilterFactory { @@ -43,6 +45,14 @@ public static Filter and(Filter left, Filter right) { return new And(left, right); } + public static Filter and(List filterList) { + And result = new And(filterList.get(0), filterList.get(1)); + for (int i = 2, size = filterList.size(); i < size; i++) { + result = new And(result, filterList.get(i)); + } + return result; + } + public static Filter or(Filter left, Filter right) { if (left == null && right == null) { return null; @@ -54,6 +64,14 @@ public static Filter or(Filter left, Filter right) { return new Or(left, right); } + public static Filter or(List filterList) { + Or result = new Or(filterList.get(0), filterList.get(1)); + for (int i = 2, size = filterList.size(); i < size; i++) { + result = new Or(result, filterList.get(i)); + } + return result; + } + public static Not not(Filter filter) { checkArgument(filter != null, "filter cannot be null"); return new Not(filter); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/query/executor/TableQueryExecutor.java b/java/tsfile/src/main/java/org/apache/tsfile/read/query/executor/TableQueryExecutor.java new file mode 100644 index 000000000..9cce7c78f --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/query/executor/TableQueryExecutor.java @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.read.query.executor; + +import org.apache.tsfile.exception.read.NoColumnException; +import org.apache.tsfile.exception.read.ReadProcessException; +import org.apache.tsfile.exception.read.UnsupportedOrderingException; +import org.apache.tsfile.file.metadata.MetadataIndexNode; +import org.apache.tsfile.file.metadata.TableSchema; +import org.apache.tsfile.file.metadata.TsFileMetadata; +import org.apache.tsfile.read.controller.IChunkLoader; +import org.apache.tsfile.read.controller.IMetadataQuerier; +import org.apache.tsfile.read.expression.ExpressionTree; +import org.apache.tsfile.read.query.executor.task.DeviceTaskIterator; +import org.apache.tsfile.read.reader.block.DeviceOrderedTsBlockReader; +import org.apache.tsfile.read.reader.block.TsBlockReader; +import org.apache.tsfile.read.reader.block.TsBlockReader.EmptyTsBlockReader; +import org.apache.tsfile.write.record.Tablet.ColumnType; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class TableQueryExecutor { + + private IMetadataQuerier metadataQuerier; + private IChunkLoader chunkLoader; + private TableQueryOrdering tableQueryOrdering; + private int blockSize = 1024; + + public TableQueryExecutor( + IMetadataQuerier metadataQuerier, + IChunkLoader chunkLoader, + TableQueryOrdering tableQueryOrdering) { + this.metadataQuerier = metadataQuerier; + this.chunkLoader = chunkLoader; + this.tableQueryOrdering = tableQueryOrdering; + } + + /** + * @param tableName table to query + * @param columns columns to query (ID or MEASUREMENT) + * @param timeFilter time predicate + * @param idFilter id predicate + * @param measurementFilter measurement predicate + * @return an iterator of TsBlocks + * @throws ReadProcessException if the read process fails + */ + public TsBlockReader query( + String tableName, + List columns, + ExpressionTree timeFilter, + ExpressionTree idFilter, + ExpressionTree measurementFilter) + throws ReadProcessException { + TsFileMetadata fileMetadata = metadataQuerier.getWholeFileMetadata(); + MetadataIndexNode tableRoot = fileMetadata.getTableMetadataIndexNode(tableName); + TableSchema tableSchema = fileMetadata.getTableSchemaMap().get(tableName); + if (tableRoot == null || tableSchema == null) { + return new EmptyTsBlockReader(); + } + + ColumnMapping columnMapping = new ColumnMapping(); + for (int i = 0; i < columns.size(); i++) { + String column = columns.get(i); + columnMapping.add(column, i, tableSchema); + } + columnMapping.add(measurementFilter); + + DeviceTaskIterator deviceTaskIterator = + new DeviceTaskIterator( + columns, tableRoot, columnMapping, metadataQuerier, idFilter, tableSchema); + switch (tableQueryOrdering) { + case DEVICE: + return new DeviceOrderedTsBlockReader( + deviceTaskIterator, + metadataQuerier, + chunkLoader, + timeFilter, + measurementFilter, + blockSize); + case TIME: + default: + throw new UnsupportedOrderingException(tableQueryOrdering.toString()); + } + } + + public class ColumnMapping { + /** + * The same column may occur multiple times in a query, but we surely do not want to read it + * redundantly. This mapping is used to put data of the same series into multiple columns. + */ + private Map> columnPosMap = new HashMap<>(); + + private Set idColumns = new HashSet<>(); + private Set measurementColumns = new HashSet<>(); + + public void add(String columnName, int i, TableSchema schema) throws NoColumnException { + final int columnIndex = schema.findColumnIndex(columnName); + if (columnIndex < 0) { + throw new NoColumnException(columnName); + } + + final ColumnType columnType = schema.getColumnTypes().get(columnIndex); + columnPosMap.computeIfAbsent(columnName, k -> new ArrayList<>()).add(i); + if (columnType.equals(ColumnType.ID)) { + idColumns.add(columnName); + } else { + measurementColumns.add(columnName); + } + } + + public void add(ExpressionTree measurementFilter) { + // TODO: get measurements in the filter and add them to measurementColumns + } + + public List getColumnPos(String columnName) { + return columnPosMap.getOrDefault(columnName, Collections.emptyList()); + } + + public boolean isId(String columnName) { + return idColumns.contains(columnName); + } + + public boolean isMeasurement(String columnName) { + return measurementColumns.contains(columnName); + } + + public Set getIdColumns() { + return idColumns; + } + + public Set getMeasurementColumns() { + return measurementColumns; + } + } + + public enum TableQueryOrdering { + TIME, + DEVICE + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/query/executor/task/DeviceQueryTask.java b/java/tsfile/src/main/java/org/apache/tsfile/read/query/executor/task/DeviceQueryTask.java new file mode 100644 index 000000000..e88218e62 --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/query/executor/task/DeviceQueryTask.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.read.query.executor.task; + +import org.apache.tsfile.file.metadata.IDeviceID; +import org.apache.tsfile.file.metadata.MetadataIndexNode; +import org.apache.tsfile.file.metadata.TableSchema; +import org.apache.tsfile.read.query.executor.TableQueryExecutor.ColumnMapping; + +import java.util.List; + +public class DeviceQueryTask { + private final IDeviceID deviceID; + private final List columnNames; + private final ColumnMapping columnMapping; + private final MetadataIndexNode indexRoot; + private final TableSchema tableSchema; + + public DeviceQueryTask( + IDeviceID deviceID, + List columnNames, + ColumnMapping columnMapping, + MetadataIndexNode indexRoot, + TableSchema tableSchema) { + this.deviceID = deviceID; + this.columnNames = columnNames; + this.columnMapping = columnMapping; + this.indexRoot = indexRoot; + this.tableSchema = tableSchema; + } + + public IDeviceID getDeviceID() { + return deviceID; + } + + public List getColumnNames() { + return columnNames; + } + + public ColumnMapping getColumnMapping() { + return columnMapping; + } + + public MetadataIndexNode getIndexRoot() { + return indexRoot; + } + + public TableSchema getTableSchema() { + return tableSchema; + } + + @Override + public String toString() { + return "DeviceQueryTask{" + "deviceID=" + deviceID + ", columnNames=" + columnNames + '}'; + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/query/executor/task/DeviceTaskIterator.java b/java/tsfile/src/main/java/org/apache/tsfile/read/query/executor/task/DeviceTaskIterator.java new file mode 100644 index 000000000..8ee446503 --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/query/executor/task/DeviceTaskIterator.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.read.query.executor.task; + +import org.apache.tsfile.file.metadata.IDeviceID; +import org.apache.tsfile.file.metadata.MetadataIndexNode; +import org.apache.tsfile.file.metadata.TableSchema; +import org.apache.tsfile.read.controller.IMetadataQuerier; +import org.apache.tsfile.read.expression.ExpressionTree; +import org.apache.tsfile.read.query.executor.TableQueryExecutor.ColumnMapping; +import org.apache.tsfile.utils.Pair; + +import java.util.Iterator; +import java.util.List; + +public class DeviceTaskIterator implements Iterator { + private List columnNames; + private ColumnMapping columnMapping; + private TableSchema tableSchema; + private Iterator> deviceMetaIterator; + + public DeviceTaskIterator( + List columnNames, + MetadataIndexNode indexRoot, + ColumnMapping columnMapping, + IMetadataQuerier metadataQuerier, + ExpressionTree idFilter, + TableSchema tableSchema) { + this.columnNames = columnNames; + this.columnMapping = columnMapping; + this.deviceMetaIterator = metadataQuerier.deviceIterator(indexRoot, idFilter); + this.tableSchema = tableSchema; + } + + @Override + public boolean hasNext() { + return deviceMetaIterator.hasNext(); + } + + @Override + public DeviceQueryTask next() { + final Pair next = deviceMetaIterator.next(); + return new DeviceQueryTask(next.left, columnNames, columnMapping, next.right, tableSchema); + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/reader/block/DeviceOrderedTsBlockReader.java b/java/tsfile/src/main/java/org/apache/tsfile/read/reader/block/DeviceOrderedTsBlockReader.java new file mode 100644 index 000000000..30ddca8f4 --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/reader/block/DeviceOrderedTsBlockReader.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.read.reader.block; + +import org.apache.tsfile.read.common.block.TsBlock; +import org.apache.tsfile.read.controller.IChunkLoader; +import org.apache.tsfile.read.controller.IMetadataQuerier; +import org.apache.tsfile.read.expression.ExpressionTree; +import org.apache.tsfile.read.query.executor.task.DeviceQueryTask; +import org.apache.tsfile.read.query.executor.task.DeviceTaskIterator; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.NoSuchElementException; + +public class DeviceOrderedTsBlockReader implements TsBlockReader { + + private static final Logger LOGGER = LoggerFactory.getLogger(DeviceOrderedTsBlockReader.class); + private final DeviceTaskIterator taskIterator; + private final IMetadataQuerier metadataQuerier; + private final IChunkLoader chunkLoader; + private final int blockSize; + private SingleDeviceTsBlockReader currentReader; + private ExpressionTree timeFilter; + private ExpressionTree measurementFilter; + + public DeviceOrderedTsBlockReader( + DeviceTaskIterator taskIterator, + IMetadataQuerier metadataQuerier, + IChunkLoader chunkLoader, + ExpressionTree timeFilter, + ExpressionTree measurementFilter, + int blockSize) { + this.taskIterator = taskIterator; + this.metadataQuerier = metadataQuerier; + this.chunkLoader = chunkLoader; + this.blockSize = blockSize; + this.timeFilter = timeFilter; + this.measurementFilter = measurementFilter; + } + + @Override + public boolean hasNext() { + if (currentReader != null && currentReader.hasNext()) { + return true; + } + while (taskIterator.hasNext()) { + final DeviceQueryTask nextTask = taskIterator.next(); + try { + currentReader = + new SingleDeviceTsBlockReader( + nextTask, metadataQuerier, chunkLoader, blockSize, timeFilter, measurementFilter); + } catch (IOException e) { + LOGGER.error("Failed to construct reader for {}", nextTask, e); + } + if (currentReader.hasNext()) { + return true; + } + } + return false; + } + + @Override + public TsBlock next() throws IOException { + if (!hasNext()) { + throw new NoSuchElementException(); + } + return currentReader.next(); + } + + @Override + public void close() throws Exception { + if (currentReader != null) { + currentReader.close(); + } + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/reader/block/SingleDeviceTsBlockReader.java b/java/tsfile/src/main/java/org/apache/tsfile/read/reader/block/SingleDeviceTsBlockReader.java new file mode 100644 index 000000000..473c3127c --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/reader/block/SingleDeviceTsBlockReader.java @@ -0,0 +1,408 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.read.reader.block; + +import org.apache.tsfile.block.column.Column; +import org.apache.tsfile.file.metadata.AlignedChunkMetadata; +import org.apache.tsfile.file.metadata.IChunkMetadata; +import org.apache.tsfile.read.common.BatchData; +import org.apache.tsfile.read.common.block.TsBlock; +import org.apache.tsfile.read.controller.IChunkLoader; +import org.apache.tsfile.read.controller.IMetadataQuerier; +import org.apache.tsfile.read.expression.ExpressionTree; +import org.apache.tsfile.read.filter.basic.Filter; +import org.apache.tsfile.read.query.executor.task.DeviceQueryTask; +import org.apache.tsfile.read.reader.series.AbstractFileSeriesReader; +import org.apache.tsfile.read.reader.series.FileSeriesReader; +import org.apache.tsfile.utils.Binary; +import org.apache.tsfile.utils.TsPrimitiveType; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.NoSuchElementException; + +public class SingleDeviceTsBlockReader implements TsBlockReader { + + private static final Logger LOGGER = LoggerFactory.getLogger(SingleDeviceTsBlockReader.class); + private final DeviceQueryTask task; + private final ExpressionTree measurementExpression; + private final int blockSize; + + private final TsBlock currentBlock; + private boolean lastBlockReturned = true; + private final Map measureColumnContextMap; + private final Map idColumnContextMap; + + private long nextTime; + + public SingleDeviceTsBlockReader( + DeviceQueryTask task, + IMetadataQuerier metadataQuerier, + IChunkLoader chunkLoader, + int blockSize, + ExpressionTree timeExpression, + ExpressionTree measurementFilter) + throws IOException { + this.task = task; + this.blockSize = blockSize; + this.measurementExpression = measurementFilter; + + this.currentBlock = + TsBlock.buildTsBlock(task.getColumnNames(), task.getTableSchema(), blockSize); + this.measureColumnContextMap = new HashMap<>(); + this.idColumnContextMap = new HashMap<>(); + + final List> chunkMetadataLists = + metadataQuerier.getChunkMetadataLists( + task.getDeviceID(), + task.getColumnMapping().getMeasurementColumns(), + task.getIndexRoot()); + + Filter timeFilter = timeExpression == null ? null : timeExpression.toFilter(); + for (List chunkMetadataList : chunkMetadataLists) { + constructColumnContext(chunkMetadataList, chunkLoader, timeFilter); + } + + for (String idColumn : task.getColumnMapping().getIdColumns()) { + final List columnPosInResult = task.getColumnMapping().getColumnPos(idColumn); + // the first segment in DeviceId is the table name + final int columnPosInId = task.getTableSchema().findColumnIndex(idColumn) + 1; + idColumnContextMap.put(idColumn, new IdColumnContext(columnPosInResult, columnPosInId)); + } + } + + private void constructColumnContext( + List chunkMetadataList, IChunkLoader chunkLoader, Filter timeFilter) + throws IOException { + if (chunkMetadataList.isEmpty()) { + return; + } + final IChunkMetadata chunkMetadata = chunkMetadataList.get(0); + AbstractFileSeriesReader seriesReader = + new FileSeriesReader(chunkLoader, chunkMetadataList, timeFilter); + if (seriesReader.hasNextBatch()) { + if (chunkMetadata instanceof AlignedChunkMetadata) { + final List currentChunkMeasurementNames = + seriesReader.getCurrentChunkMeasurementNames(); + List> posInResult = new ArrayList<>(); + for (String currentChunkMeasurementName : currentChunkMeasurementNames) { + posInResult.add(task.getColumnMapping().getColumnPos(currentChunkMeasurementName)); + } + measureColumnContextMap.put( + "", + new VectorMeasurementColumnContext( + posInResult, seriesReader.nextBatch(), seriesReader)); + } else { + final String measurementUid = chunkMetadata.getMeasurementUid(); + measureColumnContextMap.put( + measurementUid, + new SingleMeasurementColumnContext( + measurementUid, + task.getColumnMapping().getColumnPos(measurementUid), + seriesReader.nextBatch(), + seriesReader)); + } + } + } + + @Override + public boolean hasNext() { + if (!lastBlockReturned) { + return true; + } + + if (measureColumnContextMap.isEmpty()) { + return false; + } + + currentBlock.reset(); + nextTime = Long.MAX_VALUE; + List minTimeColumns = new ArrayList<>(); + + while (currentBlock.getPositionCount() < blockSize) { + // find the minimum time among the batches and the associated columns + for (Entry entry : measureColumnContextMap.entrySet()) { + final BatchData batchData = entry.getValue().currentBatch; + final long currentTime = batchData.currentTime(); + if (nextTime > currentTime) { + nextTime = currentTime; + minTimeColumns.clear(); + minTimeColumns.add(entry.getValue()); + } else if (nextTime == currentTime) { + minTimeColumns.add(entry.getValue()); + } + } + + try { + fillMeasurements(minTimeColumns); + } catch (IOException e) { + LOGGER.error("Cannot fill measurements", e); + return false; + } + + // all columns have exhausted + if (measureColumnContextMap.isEmpty()) { + break; + } + } + + if (currentBlock.getPositionCount() > 0) { + fillIds(); + currentBlock.fillTrailingNulls(); + lastBlockReturned = false; + return true; + } + + return false; + } + + private void fillIds() { + for (Entry entry : idColumnContextMap.entrySet()) { + final IdColumnContext idColumnContext = entry.getValue(); + for (Integer pos : idColumnContext.posInResult) { + final Column column = currentBlock.getColumn(pos); + fillIdColumn( + column, + task.getDeviceID().segment(idColumnContext.posInDeviceId), + 0, + currentBlock.getPositionCount()); + } + } + } + + private void fillMeasurements(List minTimeColumns) throws IOException { + if (measurementExpression == null || measurementExpression.satisfy(this)) { + // use the time to fill the block + final int positionCount = currentBlock.getPositionCount(); + currentBlock.getTimeColumn().getLongs()[positionCount] = nextTime; + // project the value columns to the result + for (final MeasurementColumnContext columnContext : minTimeColumns) { + columnContext.fillInto(currentBlock, positionCount); + advanceColumn(columnContext.currentBatch, columnContext); + } + currentBlock.setPositionCount(positionCount + 1); + } else { + for (final MeasurementColumnContext columnContext : minTimeColumns) { + final BatchData batchData = columnContext.currentBatch; + advanceColumn(batchData, columnContext); + } + } + } + + private void advanceColumn(BatchData batchData, MeasurementColumnContext columnContext) + throws IOException { + batchData.next(); + if (!batchData.hasCurrent()) { + // get next batch of the column + if (columnContext.seriesReader.hasNextBatch()) { + columnContext.currentBatch = columnContext.seriesReader.nextBatch(); + } else { + // no more data in this column + columnContext.removeFrom(measureColumnContextMap); + } + } + } + + private void fillIdColumn(Column column, Object val, int startPos, int endPos) { + switch (column.getDataType()) { + case TEXT: + if (val instanceof String) { + val = new Binary(((String) val), StandardCharsets.UTF_8); + } + Arrays.fill(column.getBinaries(), startPos, endPos, val); + break; + case BOOLEAN: + Arrays.fill(column.getBooleans(), startPos, endPos, ((boolean) val)); + break; + case INT32: + Arrays.fill(column.getInts(), startPos, endPos, ((int) val)); + break; + case INT64: + Arrays.fill(column.getLongs(), startPos, endPos, ((long) val)); + break; + case FLOAT: + Arrays.fill(column.getFloats(), startPos, endPos, ((float) val)); + break; + case DOUBLE: + Arrays.fill(column.getDoubles(), startPos, endPos, ((double) val)); + break; + default: + throw new IllegalArgumentException("Unsupported data type: " + column.getDataType()); + } + column.setPositionCount(endPos); + } + + private static void fillSingleMeasurementColumn(Column column, BatchData batchData, int pos) { + switch (batchData.getDataType()) { + case BOOLEAN: + column.getBooleans()[pos] = batchData.getBoolean(); + break; + case DOUBLE: + column.getDoubles()[pos] = batchData.getDouble(); + break; + case FLOAT: + column.getFloats()[pos] = batchData.getFloat(); + break; + case INT32: + column.getInts()[pos] = batchData.getInt(); + break; + case TEXT: + column.getBinaries()[pos] = batchData.getBinary(); + break; + case INT64: + column.getLongs()[pos] = batchData.getLong(); + break; + default: + throw new IllegalArgumentException("Unsupported data type: " + batchData.getDataType()); + } + column.setPositionCount(pos + 1); + } + + @Override + public TsBlock next() throws IOException { + if (!hasNext()) { + throw new NoSuchElementException(); + } + lastBlockReturned = true; + return currentBlock; + } + + @Override + public void close() throws Exception { + // nothing to be done + } + + public abstract static class MeasurementColumnContext { + + protected BatchData currentBatch; + protected final AbstractFileSeriesReader seriesReader; + + protected MeasurementColumnContext( + AbstractFileSeriesReader seriesReader, BatchData currentBatch) { + this.seriesReader = seriesReader; + this.currentBatch = currentBatch; + } + + abstract void removeFrom(Map columnContextMap); + + abstract void fillInto(TsBlock block, int position); + } + + // gather necessary fields in this class to avoid redundant map access + public static class SingleMeasurementColumnContext extends MeasurementColumnContext { + + private final String columnName; + private final List posInResult; + + public SingleMeasurementColumnContext( + String columnName, + List posInResult, + BatchData currentBatch, + AbstractFileSeriesReader seriesReader) { + super(seriesReader, currentBatch); + this.columnName = columnName; + this.posInResult = posInResult; + } + + @Override + void removeFrom(Map columnContextMap) { + columnContextMap.remove(columnName); + } + + @Override + void fillInto(TsBlock block, int position) { + for (Integer pos : posInResult) { + final Column column = block.getColumn(pos); + fillSingleMeasurementColumn(column, currentBatch, position); + } + } + } + + public static class VectorMeasurementColumnContext extends MeasurementColumnContext { + + private final List> posInResult; + + public VectorMeasurementColumnContext( + List> posInResult, + BatchData currentBatch, + AbstractFileSeriesReader seriesReader) { + super(seriesReader, currentBatch); + this.posInResult = posInResult; + } + + @Override + void removeFrom(Map columnContextMap) { + columnContextMap.remove(""); + } + + @Override + void fillInto(TsBlock block, int blockRowNum) { + final TsPrimitiveType[] vector = currentBatch.getVector(); + for (int i = 0; i < vector.length; i++) { + final TsPrimitiveType value = vector[i]; + final List columnPositions = posInResult.get(i); + for (Integer pos : columnPositions) { + switch (value.getDataType()) { + case TEXT: + block.getColumn(pos).getBinaries()[blockRowNum] = value.getBinary(); + break; + case INT32: + block.getColumn(pos).getInts()[blockRowNum] = value.getInt(); + break; + case INT64: + block.getColumn(pos).getLongs()[blockRowNum] = value.getLong(); + break; + case BOOLEAN: + block.getColumn(pos).getBooleans()[blockRowNum] = value.getBoolean(); + break; + case FLOAT: + block.getColumn(pos).getFloats()[blockRowNum] = value.getFloat(); + break; + case DOUBLE: + block.getColumn(pos).getDoubles()[blockRowNum] = value.getDouble(); + break; + default: + throw new IllegalArgumentException("Unsupported data type: " + value.getDataType()); + } + } + } + } + } + + public static class IdColumnContext { + + private final List posInResult; + private final int posInDeviceId; + + public IdColumnContext(List posInResult, int posInDeviceId) { + this.posInResult = posInResult; + this.posInDeviceId = posInDeviceId; + } + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/reader/block/TsBlockReader.java b/java/tsfile/src/main/java/org/apache/tsfile/read/reader/block/TsBlockReader.java new file mode 100644 index 000000000..a63c07179 --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/reader/block/TsBlockReader.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.read.reader.block; + +import org.apache.tsfile.read.common.block.TsBlock; + +import java.io.IOException; + +public interface TsBlockReader extends AutoCloseable { + boolean hasNext(); + + TsBlock next() throws IOException; + + class EmptyTsBlockReader implements TsBlockReader { + + @Override + public boolean hasNext() { + return false; + } + + @Override + public TsBlock next() throws IOException { + return null; + } + + @Override + public void close() throws Exception { + // nothing to be done + } + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/reader/series/AbstractFileSeriesReader.java b/java/tsfile/src/main/java/org/apache/tsfile/read/reader/series/AbstractFileSeriesReader.java index edb8c05e7..735b44c9b 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/reader/series/AbstractFileSeriesReader.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/reader/series/AbstractFileSeriesReader.java @@ -27,6 +27,7 @@ import org.apache.tsfile.read.reader.IChunkReader; import java.io.IOException; +import java.util.ArrayList; import java.util.List; /** Series reader is used to query one series of one tsfile. */ @@ -35,6 +36,7 @@ public abstract class AbstractFileSeriesReader implements IBatchReader { protected IChunkLoader chunkLoader; protected List chunkMetadataList; protected IChunkReader chunkReader; + protected List currentChunkMeasurementNames = new ArrayList<>(); private int chunkToRead; protected Filter filter; @@ -91,4 +93,8 @@ public void close() throws IOException { private IChunkMetadata nextChunkMeta() { return chunkMetadataList.get(chunkToRead++); } + + public List getCurrentChunkMeasurementNames() { + return currentChunkMeasurementNames; + } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/reader/series/FileSeriesReader.java b/java/tsfile/src/main/java/org/apache/tsfile/read/reader/series/FileSeriesReader.java index 1704d553f..c6cb00d96 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/reader/series/FileSeriesReader.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/reader/series/FileSeriesReader.java @@ -45,9 +45,11 @@ public FileSeriesReader( @Override protected void initChunkReader(IChunkMetadata chunkMetaData) throws IOException { + currentChunkMeasurementNames.clear(); if (chunkMetaData instanceof ChunkMetadata) { Chunk chunk = chunkLoader.loadChunk((ChunkMetadata) chunkMetaData); this.chunkReader = new ChunkReader(chunk, filter); + currentChunkMeasurementNames.add(chunkMetaData.getMeasurementUid()); } else { AlignedChunkMetadata alignedChunkMetadata = (AlignedChunkMetadata) chunkMetaData; Chunk timeChunk = @@ -55,6 +57,7 @@ protected void initChunkReader(IChunkMetadata chunkMetaData) throws IOException List valueChunkList = new ArrayList<>(); for (IChunkMetadata metadata : alignedChunkMetadata.getValueChunkMetadataList()) { valueChunkList.add(chunkLoader.loadChunk((ChunkMetadata) metadata)); + currentChunkMeasurementNames.add(metadata.getMeasurementUid()); } this.chunkReader = new AlignedChunkReader(timeChunk, valueChunkList, filter); } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/utils/MeasurementGroup.java b/java/tsfile/src/main/java/org/apache/tsfile/utils/MeasurementGroup.java index 996e3476d..84f95f24c 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/utils/MeasurementGroup.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/utils/MeasurementGroup.java @@ -18,7 +18,7 @@ */ package org.apache.tsfile.utils; -import org.apache.tsfile.write.schema.MeasurementSchema; +import org.apache.tsfile.write.schema.IMeasurementSchema; import java.io.Serializable; import java.util.HashMap; @@ -27,22 +27,22 @@ public class MeasurementGroup implements Serializable { private boolean isAligned; - private Map measurementSchemaMap; + private Map measurementSchemaMap; public MeasurementGroup(boolean isAligned) { this.isAligned = isAligned; measurementSchemaMap = new HashMap<>(); } - public MeasurementGroup(boolean isAligned, List measurementSchemas) { + public MeasurementGroup(boolean isAligned, List measurementSchemas) { this.isAligned = isAligned; measurementSchemaMap = new HashMap<>(); - for (MeasurementSchema schema : measurementSchemas) { + for (IMeasurementSchema schema : measurementSchemas) { measurementSchemaMap.put(schema.getMeasurementId(), schema); } } - public MeasurementGroup(boolean isAligned, Map measurementSchemaMap) { + public MeasurementGroup(boolean isAligned, Map measurementSchemaMap) { this.isAligned = isAligned; this.measurementSchemaMap = measurementSchemaMap; } @@ -55,11 +55,11 @@ public void setAligned(boolean aligned) { isAligned = aligned; } - public Map getMeasurementSchemaMap() { + public Map getMeasurementSchemaMap() { return measurementSchemaMap; } - public void setMeasurementSchemaMap(Map measurementSchemaMap) { + public void setMeasurementSchemaMap(Map measurementSchemaMap) { this.measurementSchemaMap = measurementSchemaMap; } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/utils/ReadWriteForEncodingUtils.java b/java/tsfile/src/main/java/org/apache/tsfile/utils/ReadWriteForEncodingUtils.java index 7d7412231..15940db5f 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/utils/ReadWriteForEncodingUtils.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/utils/ReadWriteForEncodingUtils.java @@ -18,6 +18,8 @@ */ package org.apache.tsfile.utils; +import org.apache.tsfile.common.conf.TSFileConfig; + import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; @@ -237,6 +239,16 @@ public static int varIntSize(int value) { return position; } + public static int varIntStringSize(String str) { + final byte[] bytes = str.getBytes(TSFileConfig.STRING_CHARSET); + return varIntSize(bytes.length) + bytes.length; + } + + public static int intStringSize(String str) { + final byte[] bytes = str.getBytes(TSFileConfig.STRING_CHARSET); + return Integer.BYTES + bytes.length; + } + /** * Returns the encoding size in bytes of its input value. * diff --git a/java/tsfile/src/main/java/org/apache/tsfile/utils/ReadWriteIOUtils.java b/java/tsfile/src/main/java/org/apache/tsfile/utils/ReadWriteIOUtils.java index 6e55e760c..e4133cc7a 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/utils/ReadWriteIOUtils.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/utils/ReadWriteIOUtils.java @@ -64,7 +64,7 @@ public class ReadWriteIOUtils { public static final int FLOAT_LEN = 4; public static final float BIT_LEN = 0.125F; - private static final int NO_BYTE_TO_READ = -1; + public static final int NO_BYTE_TO_READ = -1; private static final byte[] magicStringBytes; @@ -378,7 +378,7 @@ public static int write(String s, OutputStream outputStream) throws IOException return len; } - byte[] bytes = s.getBytes(); + byte[] bytes = s.getBytes(TSFileConfig.STRING_CHARSET); len += write(bytes.length, outputStream); outputStream.write(bytes); len += bytes.length; @@ -414,7 +414,7 @@ public static int write(String s, ByteBuffer buffer) { return write(NO_BYTE_TO_READ, buffer); } int len = 0; - byte[] bytes = s.getBytes(); + byte[] bytes = s.getBytes(TSFileConfig.STRING_CHARSET); len += write(bytes.length, buffer); buffer.put(bytes); len += bytes.length; diff --git a/java/tsfile/src/main/java/org/apache/tsfile/utils/TsFileGeneratorUtils.java b/java/tsfile/src/main/java/org/apache/tsfile/utils/TsFileGeneratorUtils.java index ba1b5e317..ff7cb33cc 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/utils/TsFileGeneratorUtils.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/utils/TsFileGeneratorUtils.java @@ -57,7 +57,7 @@ public class TsFileGeneratorUtils { public static void writeWithTsRecord( TsFileWriter tsFileWriter, String deviceId, - List schemas, + List schemas, long rowSize, long startTime, long startValue, @@ -110,7 +110,7 @@ public static void writeWithTsRecord( public static void writeWithTablet( TsFileWriter tsFileWriter, String deviceId, - List schemas, + List schemas, long rowNum, long startTime, long startValue, @@ -173,7 +173,7 @@ public static File generateMixTsFile( TSFileDescriptor.getInstance().getConfig().setMaxNumberOfPointsInPage(pageSize); try (TsFileWriter tsFileWriter = new TsFileWriter(file)) { // register align timeseries - List alignedMeasurementSchemas = new ArrayList<>(); + List alignedMeasurementSchemas = new ArrayList<>(); for (int i = 0; i < measurementNum; i++) { alignedMeasurementSchemas.add( new MeasurementSchema("s" + i, TSDataType.INT64, TSEncoding.PLAIN)); @@ -196,7 +196,7 @@ public static File generateMixTsFile( } // register nonAlign timeseries - List measurementSchemas = new ArrayList<>(); + List measurementSchemas = new ArrayList<>(); for (int i = 0; i < measurementNum; i++) { measurementSchemas.add( new MeasurementSchema("s" + i, TSDataType.INT64, TSEncoding.PLAIN)); @@ -245,7 +245,7 @@ public static File generateAlignedTsFile( TSFileDescriptor.getInstance().getConfig().setMaxNumberOfPointsInPage(pageSize); try (TsFileWriter tsFileWriter = new TsFileWriter(file)) { // register align timeseries - List alignedMeasurementSchemas = new ArrayList<>(); + List alignedMeasurementSchemas = new ArrayList<>(); for (int i = 0; i < measurementNum; i++) { alignedMeasurementSchemas.add( new MeasurementSchema("s" + i, getDataType(i), TSEncoding.PLAIN)); @@ -290,7 +290,7 @@ public static File generateNonAlignedTsFile( TSFileDescriptor.getInstance().getConfig().setMaxNumberOfPointsInPage(pageSize); try (TsFileWriter tsFileWriter = new TsFileWriter(file)) { // register nonAlign timeseries - List measurementSchemas = new ArrayList<>(); + List measurementSchemas = new ArrayList<>(); for (int i = 0; i < measurementNum; i++) { measurementSchemas.add(new MeasurementSchema("s" + i, getDataType(i), TSEncoding.PLAIN)); } @@ -334,7 +334,7 @@ public static File generateAlignedTsFileWithTextValues( TSFileDescriptor.getInstance().getConfig().setMaxNumberOfPointsInPage(pageSize); try (TsFileWriter tsFileWriter = new TsFileWriter(file)) { // register align timeseries - List alignedMeasurementSchemas = new ArrayList<>(); + List alignedMeasurementSchemas = new ArrayList<>(); for (int i = 0; i < measurementIndex.size(); i++) { alignedMeasurementSchemas.add( new MeasurementSchema( @@ -392,7 +392,7 @@ public static File generateNonAlignedTsFileWithTextValues( TSFileDescriptor.getInstance().getConfig().setMaxNumberOfPointsInPage(pageSize); try (TsFileWriter tsFileWriter = new TsFileWriter(file)) { // register nonAlign timeseries - List measurementSchemas = new ArrayList<>(); + List measurementSchemas = new ArrayList<>(); for (int i = 0; i < measurementIndex.size(); i++) { measurementSchemas.add( new MeasurementSchema( diff --git a/java/tsfile/src/main/java/org/apache/tsfile/utils/TsFileSketchTool.java b/java/tsfile/src/main/java/org/apache/tsfile/utils/TsFileSketchTool.java new file mode 100644 index 000000000..e7ce355af --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/utils/TsFileSketchTool.java @@ -0,0 +1,730 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.utils; + +import org.apache.tsfile.common.conf.TSFileConfig; +import org.apache.tsfile.file.IMetadataIndexEntry; +import org.apache.tsfile.file.MetaMarker; +import org.apache.tsfile.file.header.ChunkGroupHeader; +import org.apache.tsfile.file.header.PageHeader; +import org.apache.tsfile.file.metadata.ChunkGroupMetadata; +import org.apache.tsfile.file.metadata.ChunkMetadata; +import org.apache.tsfile.file.metadata.DeviceMetadataIndexEntry; +import org.apache.tsfile.file.metadata.IDeviceID; +import org.apache.tsfile.file.metadata.MetadataIndexNode; +import org.apache.tsfile.file.metadata.TableSchema; +import org.apache.tsfile.file.metadata.TimeseriesMetadata; +import org.apache.tsfile.file.metadata.TsFileMetadata; +import org.apache.tsfile.file.metadata.enums.MetadataIndexNodeType; +import org.apache.tsfile.fileSystem.FSFactoryProducer; +import org.apache.tsfile.read.TsFileCheckStatus; +import org.apache.tsfile.read.TsFileSequenceReader; +import org.apache.tsfile.read.common.Chunk; +import org.apache.tsfile.read.common.Path; +import org.apache.tsfile.write.schema.Schema; + +import java.io.FileWriter; +import java.io.IOException; +import java.io.PrintWriter; +import java.nio.BufferOverflowException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.TreeMap; + +public class TsFileSketchTool { + + private String filename; + private PrintWriter pw; + private TsFileSketchToolReader reader; + private String splitStr; // for split different part of TsFile + TsFileMetadata tsFileMetaData; + List allChunkGroupMetadata; + + public static void main(String[] args) throws IOException { + Pair fileNames = checkArgs(args); + String filename = fileNames.left; + String outFile = fileNames.right; + System.out.println("TsFile path:" + filename); + System.out.println("Sketch save path:" + outFile); + new TsFileSketchTool(filename, outFile).run(); + } + + /** + * construct TsFileSketchTool + * + * @param filename input file path + * @param outFile output file path + */ + public TsFileSketchTool(String filename, String outFile) { + try { + this.filename = filename; + pw = new PrintWriter(new FileWriter(outFile)); + reader = new TsFileSketchToolReader(filename); + StringBuilder str1 = new StringBuilder(); + for (int i = 0; i < 21; i++) { + str1.append("|"); + } + splitStr = str1.toString(); + // get metadata information + tsFileMetaData = reader.readFileMetadata(); + allChunkGroupMetadata = new ArrayList<>(); + if (reader.selfCheck(new Schema(), allChunkGroupMetadata, false) + != TsFileCheckStatus.COMPLETE_FILE) { + throw new IOException( + String.format("Cannot load file %s because the file has crashed.", filename)); + } + } catch (IOException e) { + e.printStackTrace(); + } + } + + /** entry of tool */ + public void run() throws IOException { + long length = FSFactoryProducer.getFSFactory().getFile(filename).length(); + printlnBoth( + pw, "-------------------------------- TsFile Sketch --------------------------------"); + printlnBoth(pw, "file path: " + filename); + printlnBoth(pw, "file length: " + length); + + // print file information + printFileInfo(); + + // print chunk + printChunk(allChunkGroupMetadata); + + // metadata begins + if (tsFileMetaData.getTableMetadataIndexNodeMap().isEmpty()) { + printlnBoth( + pw, String.format("%20s", reader.getFileMetadataPos() - 1) + "|\t" + "[marker] 2"); + } else { + printlnBoth( + pw, + String.format("%20s", reader.readFileMetadata().getMetaOffset()) + "|\t" + "[marker] 2"); + } + + // get all TimerseriesIndex + Map> timeseriesMetadataMap = + reader.getAllTimeseriesMetadataWithOffset(); + + // get all IndexOfTimerseriesIndex (excluding the root node in TsFileMetadata) + TreeMap metadataIndexNodeMap = new TreeMap<>(); + List treeOutputStringBuffer = new ArrayList<>(); + for (Entry entry : + tsFileMetaData.getTableMetadataIndexNodeMap().entrySet()) { + treeOutputStringBuffer.add(entry.getKey()); + loadIndexTree(entry.getValue(), metadataIndexNodeMap, treeOutputStringBuffer, 0); + } + + // iterate timeseriesMetadataMap and metadataIndexNodeMap to print info in increasing order of + // position + Iterator>> ite1 = + timeseriesMetadataMap.entrySet().iterator(); + Iterator> ite2 = metadataIndexNodeMap.entrySet().iterator(); + Entry> value1 = (ite1.hasNext() ? ite1.next() : null); + Entry value2 = (ite2.hasNext() ? ite2.next() : null); + while (value1 != null || value2 != null) { + if (value2 == null || (value1 != null && value1.getKey().compareTo(value2.getKey()) <= 0)) { + printTimeseriesIndex(value1.getKey(), value1.getValue()); + value1 = (ite1.hasNext() ? ite1.next() : null); + } else { + printIndexOfTimerseriesIndex(value2.getKey(), value2.getValue()); + value2 = (ite2.hasNext() ? ite2.next() : null); + } + } + + // print TsFile Metadata + printTsFileMetadata(tsFileMetaData); + + printlnBoth(pw, String.format("%20s", length) + "|\tEND of TsFile"); + printlnBoth( + pw, + "---------------------------- IndexOfTimerseriesIndex Tree -----------------------------"); + // print index tree + for (String str : treeOutputStringBuffer) { + printlnBoth(pw, str); + } + printlnBoth( + pw, + "---------------------------------- TsFile Sketch End ----------------------------------"); + reader.close(); + pw.close(); + } + + public void close() throws IOException { + reader.close(); + pw.close(); + } + + private void printTsFileMetadata(TsFileMetadata tsFileMetaData) { + try { + long pos = reader.getFileMetadataPos(); + printlnBoth(pw, splitStr + " [TsFileMetadata] begins"); + + // metadataIndex + printlnBoth( + pw, + String.format("%20s", pos) + + "|\tTableIndexRootCnt=" + + tsFileMetaData.getTableMetadataIndexNodeMap().size()); + pos += Integer.BYTES; + for (Entry entry : + tsFileMetaData.getTableMetadataIndexNodeMap().entrySet()) { + printlnBoth( + pw, + String.format("%20s", pos) + + "|\t[Table Name] " + + entry.getKey() + + ", size=" + + ReadWriteForEncodingUtils.intStringSize(entry.getKey())); + pos += ReadWriteForEncodingUtils.intStringSize(entry.getKey()); + pos = printIndexOfTimerseriesIndex(pos, entry.getValue()); + } + + // table schema + printlnBoth( + pw, + String.format("%20s", pos) + + "|\tTableSchemaCnt=" + + tsFileMetaData.getTableSchemaMap().size()); + pos += Integer.BYTES; + for (Entry entry : tsFileMetaData.getTableSchemaMap().entrySet()) { + final String tableName = entry.getKey(); + final TableSchema tableSchema = entry.getValue(); + + final int serializedSize = tableSchema.serializedSize(); + printlnBoth( + pw, + String.format("%20s", pos) + + "|\t[TableSchema] " + + tableSchema + + ", size=" + + serializedSize); + pos += ReadWriteForEncodingUtils.intStringSize(tableName) + serializedSize; + } + + // metaOffset + printlnBoth( + pw, String.format("%20s", pos) + "|\t[Meta Offset] " + tsFileMetaData.getMetaOffset()); + pos += Long.BYTES; + + // bloom filter + + BloomFilter bloomFilter = tsFileMetaData.getBloomFilter(); + if (bloomFilter != null) { + final int length = bloomFilter.serialize().length; + printlnBoth( + pw, + String.format("%20s", pos) + + "|\t[Bloom Filter Size] " + + "bit vector byte array length=" + + length + + bloomFilter.getHashFunctionSize()); + pos += Integer.BYTES; + printlnBoth( + pw, + String.format("%20s", pos) + + "|\t[Bloom Filter] " + + ", filterCapacity=" + + bloomFilter.getSize() + + ", hashFunctionSize=" + + bloomFilter.getHashFunctionSize()); + pos += length; + } + + printlnBoth(pw, splitStr + " [TsFileMetadata] ends"); + + printlnBoth( + pw, + String.format("%20s", (reader.getFileMetadataPos() + reader.getTsFileMetadataSize())) + + "|\t[TsFileMetadataSize] " + + reader.getTsFileMetadataSize()); + + printlnBoth( + pw, + String.format("%20s", reader.getFileMetadataPos() + reader.getTsFileMetadataSize() + 4) + + "|\t[magic tail] " + + reader.readTailMagic()); + } catch (IOException e) { + e.printStackTrace(); + } + } + + private long printIndexOfTimerseriesIndex(long pos, MetadataIndexNode metadataIndexNode) { + printlnBoth(pw, String.format("%20s", pos) + "|\t[MetadataIndexNode]"); + printlnBoth( + pw, + String.format("%20s", pos) + "|\t\t childrenCnt=" + metadataIndexNode.getChildren().size()); + pos += ReadWriteForEncodingUtils.uVarIntSize(metadataIndexNode.getChildren().size()); + for (IMetadataIndexEntry metadataIndexEntry : metadataIndexNode.getChildren()) { + printlnBoth( + pw, + String.format("%20s", pos) + + "|\t\t<" + + metadataIndexEntry.getCompareKey() + + ", " + + metadataIndexEntry.getOffset() + + ">"); + pos += metadataIndexEntry.serializedSize(); + } + printlnBoth( + pw, String.format("%20s", pos) + "|\t\tendOffset=" + metadataIndexNode.getEndOffset()); + pos += Long.BYTES; + printlnBoth( + pw, String.format("%20s", pos) + "|\t\tnodeType=" + metadataIndexNode.getNodeType()); + pos += Byte.BYTES; + return pos; + } + + private void printFileInfo() { + try { + printlnBoth(pw, ""); + printlnBoth(pw, String.format("%20s", "POSITION") + "|\tCONTENT"); + printlnBoth(pw, String.format("%20s", "--------") + " \t-------"); + printlnBoth(pw, String.format("%20d", 0) + "|\t[magic head] " + reader.readHeadMagic()); + printlnBoth( + pw, + String.format("%20d", TSFileConfig.MAGIC_STRING.getBytes().length) + + "|\t[version number] " + + reader.readVersionNumber()); + } catch (IOException e) { + e.printStackTrace(); + } + } + + private void printChunk(List allChunkGroupMetadata) { + try { + long nextChunkGroupHeaderPos = + (long) TSFileConfig.MAGIC_STRING.getBytes().length + Byte.BYTES; + // ChunkGroup begins + for (ChunkGroupMetadata chunkGroupMetadata : allChunkGroupMetadata) { + printlnBoth( + pw, + splitStr + + " [ChunkGroup] of " + + chunkGroupMetadata.getDevice() + + ", num of Chunks:" + + chunkGroupMetadata.getChunkMetadataList().size()); + // chunkGroupHeader begins + long offset = nextChunkGroupHeaderPos; + printlnBoth(pw, String.format("%20s", offset) + "|\t[ChunkGroup Header]"); + ChunkGroupHeader chunkGroupHeader = + reader.readChunkGroupHeader(nextChunkGroupHeaderPos, false); + printlnBoth(pw, String.format("%20s", offset) + "|\t\t[marker] 0"); + printlnBoth( + pw, + String.format("%20s", offset + 1) + + "|\t\t[deviceID] " + + chunkGroupHeader.getDeviceID() + + " size=" + + chunkGroupHeader.getDeviceID().serializedSize()); + // chunk begins + for (ChunkMetadata chunkMetadata : chunkGroupMetadata.getChunkMetadataList()) { + offset = chunkMetadata.getOffsetOfChunkHeader(); + Chunk chunk = reader.readMemChunk(chunkMetadata); + printlnBoth( + pw, + String.format("%20d", offset) + + "|\t[Chunk] of " + + new Path( + chunkGroupHeader.getDeviceID(), chunkMetadata.getMeasurementUid(), false) + + ", " + + chunkMetadata.getStatistics()); + printlnBoth( + pw, + String.format("%20s", offset) + + "|\t\t[Chunk Header] " + + "marker=" + + chunk.getHeader().getChunkType() + + ", measurementID=" + + chunk.getHeader().getMeasurementID() + + ", dataSize=" + + chunk.getHeader().getDataSize() + + ", dataType=" + + chunk.getHeader().getDataType() + + ", compressionType=" + + chunk.getHeader().getCompressionType() + + ", encodingType=" + + chunk.getHeader().getEncodingType() + + ", size=" + + chunk.getHeader().getSerializedSize()); + offset += chunk.getHeader().getSerializedSize(); + PageHeader pageHeader; + if (((byte) (chunk.getHeader().getChunkType() & 0x3F)) + == MetaMarker.ONLY_ONE_PAGE_CHUNK_HEADER) { + pageHeader = PageHeader.deserializeFrom(chunk.getData(), chunkMetadata.getStatistics()); + printlnBoth( + pw, + String.format("%20s", offset) + + "|\t\t\t[Page Header] " + + " HeaderSize:" + + (pageHeader.getSerializedSize() + - pageHeader.getStatistics().getSerializedSize()) + + ", UncompressedSize:" + + pageHeader.getUncompressedSize() + + ", CompressedSize:" + + pageHeader.getCompressedSize()); + offset += + pageHeader.getSerializedSize() - pageHeader.getStatistics().getSerializedSize(); + printlnBoth( + pw, + String.format("%20s", offset) + + "|\t\t\t[Page Data] " + + " Size:" + + pageHeader.getCompressedSize()); + } else { // more than one page in this chunk + ByteBuffer chunkDataBuffer = chunk.getData(); + + int pageID = 0; + while (chunkDataBuffer.remaining() > 0) { + pageID++; + // deserialize a PageHeader from chunkDataBuffer + pageHeader = + PageHeader.deserializeFrom(chunkDataBuffer, chunk.getHeader().getDataType()); + // skip the compressed bytes + chunkDataBuffer.position(chunkDataBuffer.position() + pageHeader.getCompressedSize()); + // print page info + printlnBoth( + pw, + String.format("%20s", offset) + + String.format("|\t\t\t[PageHeader-%s] ", pageID) + + " HeaderSize:" + + pageHeader.getSerializedSize() + + ", UncompressedSize:" + + pageHeader.getUncompressedSize() + + ", CompressedSize:" + + pageHeader.getCompressedSize() + + ", " + + pageHeader.getStatistics()); + offset += pageHeader.getSerializedSize(); + printlnBoth( + pw, + String.format("%20s", offset) + + String.format("|\t\t\t[Page-%s] ", pageID) + + ", CompressedSize:" + + pageHeader.getCompressedSize() + + ", " + + pageHeader.getStatistics()); + offset += pageHeader.getCompressedSize(); + } + } + nextChunkGroupHeaderPos = + chunkMetadata.getOffsetOfChunkHeader() + + chunk.getHeader().getSerializedSize() + + chunk.getHeader().getDataSize(); + } + reader.position(nextChunkGroupHeaderPos); + byte marker = reader.readMarker(); + switch (marker) { + case MetaMarker.CHUNK_GROUP_HEADER: + // do nothing + break; + case MetaMarker.OPERATION_INDEX_RANGE: + // skip the PlanIndex + nextChunkGroupHeaderPos += 16; + break; + } + + printlnBoth(pw, splitStr + " [ChunkGroup] of " + chunkGroupMetadata.getDevice() + " ends"); + } + } catch (IOException e) { + e.printStackTrace(); + } + } + + private void printTimeseriesIndex(long pos, Pair timeseriesMetadata) { + try { + printlnBoth( + pw, + String.format("%20s", pos) + + "|\t[TimeseriesMetadata] of " + + timeseriesMetadata.left + + ", tsDataType:" + + timeseriesMetadata.right.getTsDataType() + + ", sizeWithoutChunkMetadata:" + + timeseriesMetadata.right.serializedSizeWithoutMetadata() + + ", " + + timeseriesMetadata.right.getStatistics()); + pos += timeseriesMetadata.getRight().serializedSizeWithoutMetadata(); + List chunkMetadataList = reader.getChunkMetadataList(timeseriesMetadata.left); + for (int i = 0; i < chunkMetadataList.size(); i++) { + ChunkMetadata chunkMetadata = chunkMetadataList.get(i); + final int serializedSize = chunkMetadata.serializedSize(i != 0); + printlnBoth( + pw, + String.format("%20s", pos) + + "|\t\t[ChunkMetadata] " + + "offset=" + + chunkMetadata.getOffsetOfChunkHeader() + + ", size=" + + serializedSize); + pos += serializedSize; + } + } catch (IOException e) { + e.printStackTrace(); + } + } + + /** + * load by dfs, and sort by TreeMap + * + * @param metadataIndexNode current node + * @param metadataIndexNodeMap result map, key is offset + * @param treeOutputStringBuffer result list, string is index tree + * @param deep current deep + */ + private void loadIndexTree( + MetadataIndexNode metadataIndexNode, + TreeMap metadataIndexNodeMap, + List treeOutputStringBuffer, + int deep) + throws IOException { + StringBuilder tableWriter = new StringBuilder("\t"); + for (int i = 0; i < deep; i++) { + tableWriter.append("\t\t"); + } + treeOutputStringBuffer.add( + tableWriter + "[MetadataIndex:" + metadataIndexNode.getNodeType() + "]"); + for (int i = 0; i < metadataIndexNode.getChildren().size(); i++) { + IMetadataIndexEntry metadataIndexEntry = metadataIndexNode.getChildren().get(i); + + treeOutputStringBuffer.add( + tableWriter + + "└──────[" + + metadataIndexEntry.getCompareKey() + + "," + + metadataIndexEntry.getOffset() + + "]"); + if (!metadataIndexNode.getNodeType().equals(MetadataIndexNodeType.LEAF_MEASUREMENT)) { + long endOffset = metadataIndexNode.getEndOffset(); + if (i != metadataIndexNode.getChildren().size() - 1) { + endOffset = metadataIndexNode.getChildren().get(i + 1).getOffset(); + } + boolean currentChildLevelIsDevice = + MetadataIndexNodeType.INTERNAL_DEVICE.equals(metadataIndexNode.getNodeType()); + MetadataIndexNode subNode = + reader.readMetadataIndexNode( + metadataIndexEntry.getOffset(), endOffset, currentChildLevelIsDevice); + metadataIndexNodeMap.put(metadataIndexEntry.getOffset(), subNode); + loadIndexTree(subNode, metadataIndexNodeMap, treeOutputStringBuffer, deep + 1); + } + } + } + + private void printlnBoth(PrintWriter pw, String str) { + System.out.println(str); + pw.println(str); + } + + private static Pair checkArgs(String[] args) { + String filename = "test.tsfile"; + String outFile = "TsFile_sketch_view.txt"; + if (args.length == 1) { + filename = args[0]; + } else if (args.length == 2) { + filename = args[0]; + outFile = args[1]; + } + return new Pair<>(filename, outFile); + } + + private class TsFileSketchToolReader extends TsFileSequenceReader { + + public TsFileSketchToolReader(String file) throws IOException { + super(file); + } + + /** + * Traverse the metadata index from MetadataIndexEntry to get TimeseriesMetadatas + * + * @param metadataIndex MetadataIndexEntry + * @param buffer byte buffer + * @param deviceId String + * @param timeseriesMetadataMap map: deviceId -> timeseriesMetadata list + * @param needChunkMetadata deserialize chunk metadata list or not + */ + private void generateMetadataIndexWithOffset( + long startOffset, + IMetadataIndexEntry metadataIndex, + ByteBuffer buffer, + IDeviceID deviceId, + MetadataIndexNodeType type, + Map> timeseriesMetadataMap, + boolean needChunkMetadata) + throws IOException { + try { + if (type.equals(MetadataIndexNodeType.LEAF_MEASUREMENT)) { + while (buffer.hasRemaining()) { + long pos = startOffset + buffer.position(); + TimeseriesMetadata timeseriesMetadata = + TimeseriesMetadata.deserializeFrom(buffer, needChunkMetadata); + timeseriesMetadataMap.put( + pos, + new Pair<>( + new Path(deviceId, timeseriesMetadata.getMeasurementId(), true), + timeseriesMetadata)); + } + } else { + // deviceId should be determined by LEAF_DEVICE node + if (type.equals(MetadataIndexNodeType.LEAF_DEVICE)) { + deviceId = ((DeviceMetadataIndexEntry) metadataIndex).getDeviceID(); + } + boolean currentChildLevelIsDevice = MetadataIndexNodeType.INTERNAL_DEVICE.equals(type); + MetadataIndexNode metadataIndexNode = + reader + .getDeserializeContext() + .deserializeMetadataIndexNode(buffer, currentChildLevelIsDevice); + int metadataIndexListSize = metadataIndexNode.getChildren().size(); + for (int i = 0; i < metadataIndexListSize; i++) { + long endOffset = metadataIndexNode.getEndOffset(); + if (i != metadataIndexListSize - 1) { + endOffset = metadataIndexNode.getChildren().get(i + 1).getOffset(); + } + if (endOffset - metadataIndexNode.getChildren().get(i).getOffset() + < Integer.MAX_VALUE) { + ByteBuffer nextBuffer = + readData(metadataIndexNode.getChildren().get(i).getOffset(), endOffset); + generateMetadataIndexWithOffset( + metadataIndexNode.getChildren().get(i).getOffset(), + metadataIndexNode.getChildren().get(i), + nextBuffer, + deviceId, + metadataIndexNode.getNodeType(), + timeseriesMetadataMap, + needChunkMetadata); + } else { + // when the buffer length is over than Integer.MAX_VALUE, + // using tsFileInput to get timeseriesMetadataList + generateMetadataIndexWithOffsetUsingTsFileInput( + metadataIndexNode.getChildren().get(i).getOffset(), + endOffset, + metadataIndexNode.getChildren().get(i), + deviceId, + metadataIndexNode.getNodeType(), + timeseriesMetadataMap, + needChunkMetadata); + } + } + } + } catch (BufferOverflowException e) { + throw e; + } + } + + /** + * Traverse the metadata index from MetadataIndexEntry to get TimeseriesMetadatas + * + * @param metadataIndex MetadataIndexEntry + * @param deviceId String + * @param timeseriesMetadataMap map: deviceId -> timeseriesMetadata list + * @param needChunkMetadata deserialize chunk metadata list or not + */ + private void generateMetadataIndexWithOffsetUsingTsFileInput( + long start, + long end, + IMetadataIndexEntry metadataIndex, + IDeviceID deviceId, + MetadataIndexNodeType type, + Map> timeseriesMetadataMap, + boolean needChunkMetadata) + throws IOException { + try { + tsFileInput.position(start); + if (type.equals(MetadataIndexNodeType.LEAF_MEASUREMENT)) { + while (tsFileInput.position() < end) { + long pos = tsFileInput.position(); + TimeseriesMetadata timeseriesMetadata = + TimeseriesMetadata.deserializeFrom(tsFileInput, needChunkMetadata); + timeseriesMetadataMap.put( + pos, + new Pair<>( + new Path(deviceId, timeseriesMetadata.getMeasurementId(), true), + timeseriesMetadata)); + } + } else { + // deviceId should be determined by LEAF_DEVICE node + if (type.equals(MetadataIndexNodeType.LEAF_DEVICE)) { + deviceId = ((DeviceMetadataIndexEntry) metadataIndex).getDeviceID(); + } + boolean isDeviceLevel = MetadataIndexNodeType.INTERNAL_DEVICE.equals(type); + MetadataIndexNode metadataIndexNode = + reader + .getDeserializeContext() + .deserializeMetadataIndexNode(tsFileInput.wrapAsInputStream(), isDeviceLevel); + int metadataIndexListSize = metadataIndexNode.getChildren().size(); + for (int i = 0; i < metadataIndexListSize; i++) { + long endOffset = metadataIndexNode.getEndOffset(); + if (i != metadataIndexListSize - 1) { + endOffset = metadataIndexNode.getChildren().get(i + 1).getOffset(); + } + generateMetadataIndexWithOffsetUsingTsFileInput( + metadataIndexNode.getChildren().get(i).getOffset(), + endOffset, + metadataIndexNode.getChildren().get(i), + deviceId, + metadataIndexNode.getNodeType(), + timeseriesMetadataMap, + needChunkMetadata); + } + } + } catch (BufferOverflowException e) { + throw e; + } + } + + public Map> getAllTimeseriesMetadataWithOffset() + throws IOException { + if (tsFileMetaData == null) { + readFileMetadata(); + } + Map> timeseriesMetadataMap = new TreeMap<>(); + for (Entry entry : + tsFileMetaData.getTableMetadataIndexNodeMap().entrySet()) { + List metadataIndexEntryList = entry.getValue().getChildren(); + for (int i = 0; i < metadataIndexEntryList.size(); i++) { + IMetadataIndexEntry metadataIndexEntry = metadataIndexEntryList.get(i); + long endOffset = entry.getValue().getEndOffset(); + if (i != metadataIndexEntryList.size() - 1) { + endOffset = metadataIndexEntryList.get(i + 1).getOffset(); + } + ByteBuffer buffer = readData(metadataIndexEntry.getOffset(), endOffset); + generateMetadataIndexWithOffset( + metadataIndexEntry.getOffset(), + metadataIndexEntry, + buffer, + null, + entry.getValue().getNodeType(), + timeseriesMetadataMap, + false); + } + } + + return timeseriesMetadataMap; + } + } + + // for test + protected List getAllChunkGroupMetadata() { + return allChunkGroupMetadata; + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/utils/WriteUtils.java b/java/tsfile/src/main/java/org/apache/tsfile/utils/WriteUtils.java new file mode 100644 index 000000000..542d252fd --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/utils/WriteUtils.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.utils; + +import org.apache.tsfile.file.metadata.IDeviceID; +import org.apache.tsfile.write.record.Tablet; + +import java.util.ArrayList; +import java.util.List; + +public class WriteUtils { + + /** + * A Tablet for the table-view insert interface may contain more than one device. This method + * splits a Tablet by different deviceIds so that the caller can insert them device-by-device. + * + * @return (deviceId, endRowNum) pairs + */ + public static List> splitTabletByDevice(Tablet tablet) { + List> result = new ArrayList<>(); + IDeviceID lastDeviceID = null; + for (int i = 0; i < tablet.rowSize; i++) { + final IDeviceID currDeviceID = tablet.getDeviceID(i); + if (!currDeviceID.equals(lastDeviceID)) { + if (lastDeviceID != null) { + result.add(new Pair<>(lastDeviceID, i)); + } + lastDeviceID = currDeviceID; + } + } + result.add(new Pair<>(lastDeviceID, tablet.rowSize)); + return result; + } + + public static int compareStrings(String a, String b) { + if (a == null && b == null) { + return 0; + } + if (a == null) { + return -1; + } + if (b == null) { + return 1; + } + return a.compareTo(b); + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/write/TsFileWriter.java b/java/tsfile/src/main/java/org/apache/tsfile/write/TsFileWriter.java index f7c2f2934..f3cc2036e 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/write/TsFileWriter.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/write/TsFileWriter.java @@ -20,12 +20,17 @@ import org.apache.tsfile.common.conf.TSFileConfig; import org.apache.tsfile.common.conf.TSFileDescriptor; +import org.apache.tsfile.exception.write.ConflictDataTypeException; +import org.apache.tsfile.exception.write.NoDeviceException; import org.apache.tsfile.exception.write.NoMeasurementException; +import org.apache.tsfile.exception.write.NoTableException; import org.apache.tsfile.exception.write.WriteProcessException; import org.apache.tsfile.file.metadata.IDeviceID; -import org.apache.tsfile.file.metadata.PlainDeviceID; +import org.apache.tsfile.file.metadata.TableSchema; import org.apache.tsfile.read.common.Path; import org.apache.tsfile.utils.MeasurementGroup; +import org.apache.tsfile.utils.Pair; +import org.apache.tsfile.utils.WriteUtils; import org.apache.tsfile.write.chunk.AlignedChunkGroupWriterImpl; import org.apache.tsfile.write.chunk.IChunkGroupWriter; import org.apache.tsfile.write.chunk.NonAlignedChunkGroupWriterImpl; @@ -33,9 +38,7 @@ import org.apache.tsfile.write.record.Tablet; import org.apache.tsfile.write.record.datapoint.DataPoint; import org.apache.tsfile.write.schema.IMeasurementSchema; -import org.apache.tsfile.write.schema.MeasurementSchema; import org.apache.tsfile.write.schema.Schema; -import org.apache.tsfile.write.schema.VectorMeasurementSchema; import org.apache.tsfile.write.writer.RestorableTsFileIOWriter; import org.apache.tsfile.write.writer.TsFileIOWriter; import org.apache.tsfile.write.writer.TsFileOutput; @@ -49,6 +52,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.TreeMap; /** * TsFileWriter is the entrance for writing processing. It receives a record and send it to @@ -62,9 +66,6 @@ public class TsFileWriter implements AutoCloseable { protected static final TSFileConfig config = TSFileDescriptor.getInstance().getConfig(); private static final Logger LOG = LoggerFactory.getLogger(TsFileWriter.class); - /** schema of this TsFile. */ - protected final Schema schema; - /** IO writer of this TsFile. */ private final TsFileIOWriter fileWriter; @@ -86,13 +87,15 @@ public class TsFileWriter implements AutoCloseable { */ private boolean isUnseq = false; - private Map groupWriters = new HashMap<>(); + private Map groupWriters = new TreeMap<>(); /** min value of threshold of data points num check. */ private long recordCountForNextMemCheck = 100; private long chunkGroupSizeThreshold; + private boolean isTableWriteAligned = true; + /** * init this TsFileWriter. * @@ -126,7 +129,6 @@ public TsFileWriter(File file, Schema schema) throws IOException { * * @param output the TsFileOutput of the file to be written by this TsFileWriter * @param schema the schema of this TsFile - * @throws IOException */ public TsFileWriter(TsFileOutput output, Schema schema) throws IOException { this(new TsFileIOWriter(output), schema, TSFileDescriptor.getInstance().getConfig()); @@ -159,41 +161,10 @@ protected TsFileWriter(TsFileIOWriter fileWriter, Schema schema, TSFileConfig co this.fileWriter = fileWriter; if (fileWriter instanceof RestorableTsFileIOWriter) { - Map schemaMap = - ((RestorableTsFileIOWriter) fileWriter).getKnownSchema(); - Map measurementGroupMap = new HashMap<>(); - for (Map.Entry entry : schemaMap.entrySet()) { - IMeasurementSchema measurementSchema = entry.getValue(); - if (measurementSchema instanceof VectorMeasurementSchema) { - MeasurementGroup group = - measurementGroupMap.getOrDefault( - new Path(entry.getKey().getDevice()), new MeasurementGroup(true)); - List measurementList = measurementSchema.getSubMeasurementsList(); - for (int i = 0; i < measurementList.size(); i++) { - group - .getMeasurementSchemaMap() - .put( - measurementList.get(i), - new MeasurementSchema( - measurementList.get(i), - measurementSchema.getSubMeasurementsTSDataTypeList().get(i), - measurementSchema.getSubMeasurementsTSEncodingList().get(i))); - } - measurementGroupMap.put(new Path(entry.getKey().getDevice()), group); - } else { - MeasurementGroup group = - measurementGroupMap.getOrDefault( - new Path(entry.getKey().getDevice()), new MeasurementGroup(false)); - group - .getMeasurementSchemaMap() - .put(measurementSchema.getMeasurementId(), (MeasurementSchema) measurementSchema); - measurementGroupMap.put(new Path(entry.getKey().getDevice()), group); - } - } - this.schema = new Schema(measurementGroupMap); - } else { - this.schema = schema; + schema = ((RestorableTsFileIOWriter) fileWriter).getKnownSchema(); } + fileWriter.setSchema(schema); + this.pageSize = conf.getPageSizeInByte(); this.chunkGroupSizeThreshold = conf.getGroupSizeInByte(); config.setTSFileStorageFs(conf.getTSFileStorageFs()); @@ -207,52 +178,50 @@ protected TsFileWriter(TsFileIOWriter fileWriter, Schema schema, TSFileConfig co } public void registerSchemaTemplate( - String templateName, Map template, boolean isAligned) { - schema.registerSchemaTemplate(templateName, new MeasurementGroup(isAligned, template)); + String templateName, Map template, boolean isAligned) { + getSchema().registerSchemaTemplate(templateName, new MeasurementGroup(isAligned, template)); } /** * This method is used to register all timeseries in the specified template under the specified * device. - * - * @param deviceId - * @param templateName - * @throws WriteProcessException */ - public void registerDevice(String deviceId, String templateName) throws WriteProcessException { - if (!schema.getSchemaTemplates().containsKey(templateName)) { + public void registerDevice(String deviceIdString, String templateName) + throws WriteProcessException { + IDeviceID deviceID = IDeviceID.Factory.DEFAULT_FACTORY.create(deviceIdString); + if (!getSchema().getSchemaTemplates().containsKey(templateName)) { throw new WriteProcessException("given template is not existed! " + templateName); } - if (schema.getRegisteredTimeseriesMap().containsKey(new Path(deviceId))) { + if (getSchema().getRegisteredTimeseriesMap().containsKey(deviceID)) { throw new WriteProcessException( "this device " - + deviceId + + deviceIdString + " has been registered, you can only use registerDevice method to register empty device."); } - schema.registerDevice(deviceId, templateName); + getSchema().registerDevice(deviceID, templateName); } - /** - * Register nonAligned timeseries by single. - * - * @param devicePath - * @param measurementSchema - * @throws WriteProcessException - */ - public void registerTimeseries(Path devicePath, MeasurementSchema measurementSchema) + @Deprecated + public void registerTimeseries(Path devicePath, IMeasurementSchema measurementSchema) + throws WriteProcessException { + registerTimeseries(devicePath.getIDeviceID(), measurementSchema); + } + + /** Register nonAligned timeseries by single. */ + public void registerTimeseries(IDeviceID deviceID, IMeasurementSchema measurementSchema) throws WriteProcessException { MeasurementGroup measurementGroup; - if (schema.containsDevice(devicePath)) { - measurementGroup = schema.getSeriesSchema(devicePath); + if (getSchema().containsDevice(deviceID)) { + measurementGroup = getSchema().getSeriesSchema(deviceID); if (measurementGroup.isAligned()) { throw new WriteProcessException( - "given device " + devicePath + " has been registered for aligned timeseries."); + "given device " + deviceID + " has been registered for aligned timeseries."); } else if (measurementGroup .getMeasurementSchemaMap() .containsKey(measurementSchema.getMeasurementId())) { throw new WriteProcessException( "given nonAligned timeseries " - + (devicePath + "." + measurementSchema.getMeasurementId()) + + (deviceID + "." + measurementSchema.getMeasurementId()) + " has been registered."); } } else { @@ -261,44 +230,42 @@ public void registerTimeseries(Path devicePath, MeasurementSchema measurementSch measurementGroup .getMeasurementSchemaMap() .put(measurementSchema.getMeasurementId(), measurementSchema); - schema.registerMeasurementGroup(devicePath, measurementGroup); + getSchema().registerMeasurementGroup(deviceID, measurementGroup); } - /** - * Register nonAligned timeseries by groups. - * - * @param devicePath - * @param measurementSchemas - */ - public void registerTimeseries(Path devicePath, List measurementSchemas) { - for (MeasurementSchema schema : measurementSchemas) { + @Deprecated + /** Register nonAligned timeseries by groups. */ + public void registerTimeseries(Path devicePath, List measurementSchemas) { + for (IMeasurementSchema schema : measurementSchemas) { try { - registerTimeseries(devicePath, schema); + registerTimeseries(devicePath.getIDeviceID(), schema); } catch (WriteProcessException e) { LOG.warn(e.getMessage()); } } } + public void registerAlignedTimeseries( + Path devicePath, List measurementSchemas) throws WriteProcessException { + registerAlignedTimeseries(devicePath.getIDeviceID(), measurementSchemas); + } + /** * Register aligned timeseries. Once the device is registered for aligned timeseries, it cannot be * expanded. - * - * @param devicePath - * @param measurementSchemas - * @throws WriteProcessException */ - public void registerAlignedTimeseries(Path devicePath, List measurementSchemas) + public void registerAlignedTimeseries( + IDeviceID deviceID, List measurementSchemas) throws WriteProcessException { - if (schema.containsDevice(devicePath)) { - if (schema.getSeriesSchema(devicePath).isAligned()) { + if (getSchema().containsDevice(deviceID)) { + if (getSchema().getSeriesSchema(deviceID).isAligned()) { throw new WriteProcessException( "given device " - + devicePath + + deviceID + " has been registered for aligned timeseries and should not be expanded."); } else { throw new WriteProcessException( - "given device " + devicePath + " has been registered for nonAligned timeseries."); + "given device " + deviceID + " has been registered for nonAligned timeseries."); } } MeasurementGroup measurementGroup = new MeasurementGroup(true); @@ -308,83 +275,98 @@ public void registerAlignedTimeseries(Path devicePath, List m .getMeasurementSchemaMap() .put(measurementSchema.getMeasurementId(), measurementSchema); }); - schema.registerMeasurementGroup(devicePath, measurementGroup); + getSchema().registerMeasurementGroup(deviceID, measurementGroup); } private boolean checkIsTimeseriesExist(TSRecord record, boolean isAligned) throws WriteProcessException, IOException { // initial ChunkGroupWriter of this device in the TSRecord - IChunkGroupWriter groupWriter = - tryToInitialGroupWriter(new PlainDeviceID(record.deviceId), isAligned); + final IDeviceID deviceID = record.deviceId; + IChunkGroupWriter groupWriter = tryToInitialGroupWriter(deviceID, isAligned); // initial all SeriesWriters of measurements in this TSRecord - Path devicePath = new Path(record.deviceId); - List measurementSchemas; - if (schema.containsDevice(devicePath)) { + List measurementSchemas; + if (getSchema().containsDevice(deviceID)) { measurementSchemas = checkIsAllMeasurementsInGroup( - record.dataPointList, schema.getSeriesSchema(devicePath), isAligned); + record.dataPointList, getSchema().getSeriesSchema(deviceID), isAligned); if (isAligned) { for (IMeasurementSchema s : measurementSchemas) { - if (flushedMeasurementsInDeviceMap.containsKey( - new PlainDeviceID(devicePath.getFullPath())) - && !flushedMeasurementsInDeviceMap - .get(new PlainDeviceID(devicePath.getFullPath())) - .contains(s.getMeasurementId())) { + if (flushedMeasurementsInDeviceMap.containsKey(deviceID) + && !flushedMeasurementsInDeviceMap.get(deviceID).contains(s.getMeasurementId())) { throw new WriteProcessException( "TsFile has flushed chunk group and should not add new measurement " + s.getMeasurementId() + " in device " - + devicePath.getFullPath()); + + deviceID); } } } groupWriter.tryToAddSeriesWriter(measurementSchemas); - } else if (schema.getSchemaTemplates() != null && schema.getSchemaTemplates().size() == 1) { + } else if (getSchema().getSchemaTemplates() != null + && getSchema().getSchemaTemplates().size() == 1) { // use the default template without needing to register device MeasurementGroup measurementGroup = - schema.getSchemaTemplates().entrySet().iterator().next().getValue(); + getSchema().getSchemaTemplates().entrySet().iterator().next().getValue(); measurementSchemas = checkIsAllMeasurementsInGroup(record.dataPointList, measurementGroup, isAligned); groupWriter.tryToAddSeriesWriter(measurementSchemas); } else { - throw new NoMeasurementException("input devicePath is invalid: " + devicePath); + throw new NoDeviceException(deviceID.toString()); } return true; } + private void checkIsTableExist(Tablet tablet) throws WriteProcessException { + String tableName = tablet.getTableName(); + final TableSchema tableSchema = getSchema().getTableSchemaMap().get(tableName); + if (tableSchema == null) { + throw new NoTableException(tableName); + } + + for (IMeasurementSchema writingColumnSchema : tablet.getSchemas()) { + final int columnIndex = tableSchema.findColumnIndex(writingColumnSchema.getMeasurementId()); + if (columnIndex < 0) { + throw new NoMeasurementException(writingColumnSchema.getMeasurementId()); + } + final IMeasurementSchema registeredColumnSchema = + tableSchema.getColumnSchemas().get(columnIndex); + if (!writingColumnSchema.getType().equals(registeredColumnSchema.getType())) { + throw new ConflictDataTypeException( + writingColumnSchema.getType(), registeredColumnSchema.getType()); + } + } + } + private void checkIsTimeseriesExist(Tablet tablet, boolean isAligned) throws WriteProcessException, IOException { - IChunkGroupWriter groupWriter = - tryToInitialGroupWriter(new PlainDeviceID(tablet.deviceId), isAligned); + final IDeviceID deviceID = IDeviceID.Factory.DEFAULT_FACTORY.create(tablet.getDeviceId()); + IChunkGroupWriter groupWriter = tryToInitialGroupWriter(deviceID, isAligned); - Path devicePath = new Path(tablet.deviceId); - List schemas = tablet.getSchemas(); - if (schema.containsDevice(devicePath)) { - checkIsAllMeasurementsInGroup(schema.getSeriesSchema(devicePath), schemas, isAligned); + List schemas = tablet.getSchemas(); + if (getSchema().containsDevice(deviceID)) { + checkIsAllMeasurementsInGroup(getSchema().getSeriesSchema(deviceID), schemas, isAligned); if (isAligned) { for (IMeasurementSchema s : schemas) { - if (flushedMeasurementsInDeviceMap.containsKey( - new PlainDeviceID(devicePath.getFullPath())) - && !flushedMeasurementsInDeviceMap - .get(new PlainDeviceID(devicePath.getFullPath())) - .contains(s.getMeasurementId())) { + if (flushedMeasurementsInDeviceMap.containsKey(deviceID) + && !flushedMeasurementsInDeviceMap.get(deviceID).contains(s.getMeasurementId())) { throw new WriteProcessException( "TsFile has flushed chunk group and should not add new measurement " + s.getMeasurementId() + " in device " - + devicePath.getFullPath()); + + deviceID); } } } groupWriter.tryToAddSeriesWriter(schemas); - } else if (schema.getSchemaTemplates() != null && schema.getSchemaTemplates().size() == 1) { + } else if (getSchema().getSchemaTemplates() != null + && getSchema().getSchemaTemplates().size() == 1) { MeasurementGroup measurementGroup = - schema.getSchemaTemplates().entrySet().iterator().next().getValue(); + getSchema().getSchemaTemplates().entrySet().iterator().next().getValue(); checkIsAllMeasurementsInGroup(measurementGroup, schemas, isAligned); groupWriter.tryToAddSeriesWriter(schemas); } else { - throw new NoMeasurementException("input devicePath is invalid: " + devicePath); + throw new NoDeviceException(deviceID.toString()); } } @@ -392,31 +374,23 @@ private void checkIsTimeseriesExist(Tablet tablet, boolean isAligned) * If it's aligned, then all measurementSchemas should be contained in the measurementGroup, or it * will throw exception. If it's nonAligned, then remove the measurementSchema that is not * contained in the measurementGroup. - * - * @param measurementGroup - * @param measurementSchemas - * @param isAligned - * @throws NoMeasurementException */ private void checkIsAllMeasurementsInGroup( MeasurementGroup measurementGroup, - List measurementSchemas, + List measurementSchemas, boolean isAligned) throws NoMeasurementException { if (isAligned && !measurementGroup.isAligned()) { - throw new NoMeasurementException("no aligned timeseries is registered in the group."); + throw new NoMeasurementException("aligned"); } else if (!isAligned && measurementGroup.isAligned()) { - throw new NoMeasurementException("no nonAligned timeseries is registered in the group."); + throw new NoMeasurementException("nonAligned"); } - for (MeasurementSchema measurementSchema : measurementSchemas) { + for (IMeasurementSchema measurementSchema : measurementSchemas) { if (!measurementGroup .getMeasurementSchemaMap() .containsKey(measurementSchema.getMeasurementId())) { if (isAligned) { - throw new NoMeasurementException( - "measurement " - + measurementSchema.getMeasurementId() - + " is not registered or in the default template"); + throw new NoMeasurementException(measurementSchema.getMeasurementId()); } else { measurementSchemas.remove(measurementSchema); } @@ -424,31 +398,20 @@ private void checkIsAllMeasurementsInGroup( } } - /** - * Check whether all measurements of dataPoints list are in the measurementGroup. - * - * @param dataPoints - * @param measurementGroup - * @param isAligned - * @return - * @throws NoMeasurementException - */ - private List checkIsAllMeasurementsInGroup( + /** Check whether all measurements of dataPoints list are in the measurementGroup. */ + private List checkIsAllMeasurementsInGroup( List dataPoints, MeasurementGroup measurementGroup, boolean isAligned) throws NoMeasurementException { if (isAligned && !measurementGroup.isAligned()) { - throw new NoMeasurementException("no aligned timeseries is registered in the group."); + throw new NoMeasurementException("aligned"); } else if (!isAligned && measurementGroup.isAligned()) { - throw new NoMeasurementException("no nonAligned timeseries is registered in the group."); + throw new NoMeasurementException("nonAligned"); } - List schemas = new ArrayList<>(); + List schemas = new ArrayList<>(); for (DataPoint dataPoint : dataPoints) { if (!measurementGroup.getMeasurementSchemaMap().containsKey(dataPoint.getMeasurementId())) { if (isAligned) { - throw new NoMeasurementException( - "aligned measurement " - + dataPoint.getMeasurementId() - + " is not registered or in the default template"); + throw new NoMeasurementException(dataPoint.getMeasurementId()); } else { LOG.warn( "Ignore nonAligned measurement " @@ -463,8 +426,8 @@ private List checkIsAllMeasurementsInGroup( } private IChunkGroupWriter tryToInitialGroupWriter(IDeviceID deviceId, boolean isAligned) { - IChunkGroupWriter groupWriter; - if (!groupWriters.containsKey(deviceId)) { + IChunkGroupWriter groupWriter = groupWriters.get(deviceId); + if (groupWriter == null) { if (isAligned) { groupWriter = new AlignedChunkGroupWriterImpl(deviceId); if (!isUnseq) { // Sequence File @@ -480,8 +443,6 @@ private IChunkGroupWriter tryToInitialGroupWriter(IDeviceID deviceId, boolean is } } groupWriters.put(deviceId, groupWriter); - } else { - groupWriter = groupWriters.get(deviceId); } return groupWriter; } @@ -496,19 +457,13 @@ private IChunkGroupWriter tryToInitialGroupWriter(IDeviceID deviceId, boolean is */ public boolean write(TSRecord record) throws IOException, WriteProcessException { checkIsTimeseriesExist(record, false); - recordCount += - groupWriters - .get(new PlainDeviceID(record.deviceId)) - .write(record.time, record.dataPointList); + recordCount += groupWriters.get(record.deviceId).write(record.time, record.dataPointList); return checkMemorySizeAndMayFlushChunks(); } public boolean writeAligned(TSRecord record) throws IOException, WriteProcessException { checkIsTimeseriesExist(record, true); - recordCount += - groupWriters - .get(new PlainDeviceID(record.deviceId)) - .write(record.time, record.dataPointList); + recordCount += groupWriters.get(record.deviceId).write(record.time, record.dataPointList); return checkMemorySizeAndMayFlushChunks(); } @@ -523,7 +478,10 @@ public boolean write(Tablet tablet) throws IOException, WriteProcessException { // make sure the ChunkGroupWriter for this Tablet exist checkIsTimeseriesExist(tablet, false); // get corresponding ChunkGroupWriter and write this Tablet - recordCount += groupWriters.get(new PlainDeviceID(tablet.deviceId)).write(tablet); + recordCount += + groupWriters + .get(IDeviceID.Factory.DEFAULT_FACTORY.create(tablet.getDeviceId())) + .write(tablet); return checkMemorySizeAndMayFlushChunks(); } @@ -531,12 +489,15 @@ public boolean writeAligned(Tablet tablet) throws IOException, WriteProcessExcep // make sure the ChunkGroupWriter for this Tablet exist checkIsTimeseriesExist(tablet, true); // get corresponding ChunkGroupWriter and write this Tablet - recordCount += groupWriters.get(new PlainDeviceID(tablet.deviceId)).write(tablet); + recordCount += + groupWriters + .get(IDeviceID.Factory.DEFAULT_FACTORY.create(tablet.getDeviceId())) + .write(tablet); return checkMemorySizeAndMayFlushChunks(); } /** - * calculate total memory size occupied by all ChunkGroupWriter instances currently. + * calculate total memory size occupied by allT ChunkGroupWriter instances currently. * * @return total memory size used */ @@ -650,4 +611,81 @@ public void close() throws IOException { public TsFileIOWriter getIOWriter() { return this.fileWriter; } + + public Schema getSchema() { + return fileWriter.getSchema(); + } + + /** + * Write the tablet in to the TsFile with the table-view. The method will try to split the tablet + * by device. If you know the device association within the tablet, please use writeTable(Tablet + * tablet, List> deviceIdEndIndexPairs). One typical case where the other + * method should be used is that all rows in the tablet belong to the same device. + * + * @param tablet data to write + * @return true if a flush is triggered after write, false otherwise + * @throws IOException if the file cannot be written + * @throws WriteProcessException if the schema is not registered first + */ + public boolean writeTable(Tablet tablet) throws IOException, WriteProcessException { + return writeTable(tablet, null); + } + + /** + * Write the tablet in to the TsFile with the table-view. + * + * @param tablet data to write + * @param deviceIdEndIndexPairs each deviceId and its end row number in row order. For example, if + * the first three rows belong to device ("table1", "d1"), the next five rows belong to device + * ("table1", "d2"), and the last two rows belong to device ("table1", "d3"), then the list + * will be [(("table1", "d1"), 3), (("table1", "d2"), 8), (("table1", "d3"), 10)]. If the list + * is not provided, the method will try to split the tablet. + * @return true if a flush is triggered after write, false otherwise + * @throws IOException if the file cannot be written + * @throws WriteProcessException if the schema is not registered first + */ + public boolean writeTable(Tablet tablet, List> deviceIdEndIndexPairs) + throws IOException, WriteProcessException { + // make sure the ChunkGroupWriter for this Tablet exist and there is no type conflict + checkIsTableExist(tablet); + // spilt the tablet by deviceId + if (deviceIdEndIndexPairs == null) { + deviceIdEndIndexPairs = WriteUtils.splitTabletByDevice(tablet); + } + + int startIndex = 0; + for (Pair pair : deviceIdEndIndexPairs) { + // get corresponding ChunkGroupWriter and write this Tablet + recordCount += + tryToInitialGroupWriter(pair.left, isTableWriteAligned) + .write( + tablet, + startIndex, + pair.right, + tablet.getIdColumnRange(), + tablet.getSchemas().size()); + startIndex = pair.right; + } + return checkMemorySizeAndMayFlushChunks(); + } + + public boolean isTableWriteAligned() { + return isTableWriteAligned; + } + + public void setTableWriteAligned(boolean tableWriteAligned) { + isTableWriteAligned = tableWriteAligned; + } + + public void registerTableSchema(TableSchema tableSchema) { + getSchema().registerTableSchema(tableSchema); + } + + public boolean isGenerateTableSchemaForTree() { + return getIOWriter().isGenerateTableSchema(); + } + + public void setGenerateTableSchema(boolean generateTableSchema) { + this.getIOWriter().setGenerateTableSchema(generateTableSchema); + } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/write/chunk/AlignedChunkGroupWriterImpl.java b/java/tsfile/src/main/java/org/apache/tsfile/write/chunk/AlignedChunkGroupWriterImpl.java index ecd974f83..8cba8f6a1 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/write/chunk/AlignedChunkGroupWriterImpl.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/write/chunk/AlignedChunkGroupWriterImpl.java @@ -25,7 +25,6 @@ import org.apache.tsfile.enums.TSDataType; import org.apache.tsfile.exception.write.WriteProcessException; import org.apache.tsfile.file.metadata.IDeviceID; -import org.apache.tsfile.file.metadata.PlainDeviceID; import org.apache.tsfile.file.metadata.enums.CompressionType; import org.apache.tsfile.file.metadata.enums.TSEncoding; import org.apache.tsfile.utils.Binary; @@ -33,7 +32,7 @@ import org.apache.tsfile.write.UnSupportedDataTypeException; import org.apache.tsfile.write.record.Tablet; import org.apache.tsfile.write.record.datapoint.DataPoint; -import org.apache.tsfile.write.schema.MeasurementSchema; +import org.apache.tsfile.write.schema.IMeasurementSchema; import org.apache.tsfile.write.writer.TsFileIOWriter; import org.slf4j.Logger; @@ -72,9 +71,16 @@ public AlignedChunkGroupWriterImpl(IDeviceID deviceId) { } @Override - public void tryToAddSeriesWriter(MeasurementSchema measurementSchema) throws IOException { - if (!valueChunkWriterMap.containsKey(measurementSchema.getMeasurementId())) { - ValueChunkWriter valueChunkWriter = + public void tryToAddSeriesWriter(IMeasurementSchema measurementSchema) throws IOException { + tryToAddSeriesWriterInternal(measurementSchema); + } + + public ValueChunkWriter tryToAddSeriesWriterInternal(IMeasurementSchema measurementSchema) + throws IOException { + ValueChunkWriter valueChunkWriter = + valueChunkWriterMap.get(measurementSchema.getMeasurementId()); + if (valueChunkWriter == null) { + valueChunkWriter = new ValueChunkWriter( measurementSchema.getMeasurementId(), measurementSchema.getCompressor(), @@ -84,11 +90,12 @@ public void tryToAddSeriesWriter(MeasurementSchema measurementSchema) throws IOE valueChunkWriterMap.put(measurementSchema.getMeasurementId(), valueChunkWriter); tryToAddEmptyPageAndData(valueChunkWriter); } + return valueChunkWriter; } @Override - public void tryToAddSeriesWriter(List measurementSchemas) throws IOException { - for (MeasurementSchema schema : measurementSchemas) { + public void tryToAddSeriesWriter(List measurementSchemas) throws IOException { + for (IMeasurementSchema schema : measurementSchemas) { if (!valueChunkWriterMap.containsKey(schema.getMeasurementId())) { ValueChunkWriter valueChunkWriter = new ValueChunkWriter( @@ -157,30 +164,45 @@ public int write(long time, List data) throws WriteProcessException, } @Override - public int write(Tablet tablet) throws WriteProcessException, IOException { + public int write(Tablet tablet) throws IOException, WriteProcessException { + return write(tablet, 0, tablet.rowSize, 0, tablet.getSchemas().size()); + } + + public int write(Tablet tablet, int startRowIndex, int endRowIndex) + throws IOException, WriteProcessException { + return write(tablet, startRowIndex, endRowIndex, 0, tablet.getSchemas().size()); + } + + @Override + public int write( + Tablet tablet, int startRowIndex, int endRowIndex, int startColIndex, int endColIndex) + throws WriteProcessException, IOException { int pointCount = 0; - List measurementSchemas = tablet.getSchemas(); + List measurementSchemas = tablet.getSchemas(); List emptyValueChunkWriters = new ArrayList<>(); + // TODO: should we allow duplicated measurements in a Tablet? Set existingMeasurements = measurementSchemas.stream() - .map(MeasurementSchema::getMeasurementId) + .map(IMeasurementSchema::getMeasurementId) .collect(Collectors.toSet()); for (Map.Entry entry : valueChunkWriterMap.entrySet()) { if (!existingMeasurements.contains(entry.getKey())) { emptyValueChunkWriters.add(entry.getValue()); } } - for (int row = 0; row < tablet.rowSize; row++) { + // TODO: changing to a column-first style by calculating the remaining page space of each + // column firsts + for (int row = startRowIndex; row < endRowIndex; row++) { long time = tablet.timestamps[row]; checkIsHistoryData(time); - for (int columnIndex = 0; columnIndex < measurementSchemas.size(); columnIndex++) { + for (int columnIndex = startColIndex; columnIndex < endColIndex; columnIndex++) { boolean isNull = tablet.bitMaps != null && tablet.bitMaps[columnIndex] != null && tablet.bitMaps[columnIndex].isMarked(row); // check isNull by bitMap in tablet ValueChunkWriter valueChunkWriter = - valueChunkWriterMap.get(measurementSchemas.get(columnIndex).getMeasurementId()); + tryToAddSeriesWriterInternal(measurementSchemas.get(columnIndex)); switch (measurementSchemas.get(columnIndex).getType()) { case BOOLEAN: valueChunkWriter.write(time, ((boolean[]) tablet.values[columnIndex])[row], isNull); @@ -216,6 +238,8 @@ public int write(Tablet tablet) throws WriteProcessException, IOException { measurementSchemas.get(columnIndex).getType())); } } + // TODO: we can write the null columns after whole insertion, according to the point number + // in the time chunk before and after, no need to do it in a row-by-row manner if (!emptyValueChunkWriters.isEmpty()) { writeEmptyDataInOneRow(emptyValueChunkWriters); } @@ -340,7 +364,7 @@ private void checkIsHistoryData(long time) throws WriteProcessException { if (time <= lastTime) { throw new WriteProcessException( "Not allowed to write out-of-order data in timeseries " - + ((PlainDeviceID) deviceId).toStringID() + + deviceId + TsFileConstant.PATH_SEPARATOR + "" + ", time should later than " diff --git a/java/tsfile/src/main/java/org/apache/tsfile/write/chunk/IChunkGroupWriter.java b/java/tsfile/src/main/java/org/apache/tsfile/write/chunk/IChunkGroupWriter.java index c9ca7a922..bc7bc12c4 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/write/chunk/IChunkGroupWriter.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/write/chunk/IChunkGroupWriter.java @@ -21,7 +21,7 @@ import org.apache.tsfile.exception.write.WriteProcessException; import org.apache.tsfile.write.record.Tablet; import org.apache.tsfile.write.record.datapoint.DataPoint; -import org.apache.tsfile.write.schema.MeasurementSchema; +import org.apache.tsfile.write.schema.IMeasurementSchema; import org.apache.tsfile.write.writer.TsFileIOWriter; import java.io.IOException; @@ -52,13 +52,19 @@ public interface IChunkGroupWriter { */ int write(Tablet tablet) throws WriteProcessException, IOException; + int write(Tablet table, int startRowIndex, int endRowIndex) + throws WriteProcessException, IOException; + + int write(Tablet table, int startRowIndex, int endRowIndex, int startColIndex, int endColIndex) + throws WriteProcessException, IOException; + /** * flushing method for serializing to local file system or HDFS. Implemented by * ChunkWriterImpl.writeToFileWriter(). * * @param tsfileWriter - TSFileIOWriter - * @throws IOException exception in IO * @return current ChunkGroupDataSize + * @throws IOException exception in IO */ long flushToFileWriter(TsFileIOWriter tsfileWriter) throws IOException; @@ -76,15 +82,13 @@ public interface IChunkGroupWriter { * * @param measurementSchema a measurement descriptor containing the message of the series */ - void tryToAddSeriesWriter(MeasurementSchema measurementSchema) throws IOException; + void tryToAddSeriesWriter(IMeasurementSchema measurementSchema) throws IOException; /** * given a measurement descriptor list, create corresponding writers and put into this * ChunkGroupWriter. - * - * @param measurementSchemas */ - void tryToAddSeriesWriter(List measurementSchemas) throws IOException; + void tryToAddSeriesWriter(List measurementSchemas) throws IOException; /** * get the serialized size of current chunkGroup header + all chunks. Notice, the value does not diff --git a/java/tsfile/src/main/java/org/apache/tsfile/write/chunk/NonAlignedChunkGroupWriterImpl.java b/java/tsfile/src/main/java/org/apache/tsfile/write/chunk/NonAlignedChunkGroupWriterImpl.java index 2cb3c1e47..3c453a300 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/write/chunk/NonAlignedChunkGroupWriterImpl.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/write/chunk/NonAlignedChunkGroupWriterImpl.java @@ -22,14 +22,12 @@ import org.apache.tsfile.enums.TSDataType; import org.apache.tsfile.exception.write.WriteProcessException; import org.apache.tsfile.file.metadata.IDeviceID; -import org.apache.tsfile.file.metadata.PlainDeviceID; import org.apache.tsfile.utils.Binary; import org.apache.tsfile.utils.DateUtils; import org.apache.tsfile.write.UnSupportedDataTypeException; import org.apache.tsfile.write.record.Tablet; import org.apache.tsfile.write.record.datapoint.DataPoint; import org.apache.tsfile.write.schema.IMeasurementSchema; -import org.apache.tsfile.write.schema.MeasurementSchema; import org.apache.tsfile.write.writer.TsFileIOWriter; import org.slf4j.Logger; @@ -60,14 +58,14 @@ public NonAlignedChunkGroupWriterImpl(IDeviceID deviceId) { } @Override - public void tryToAddSeriesWriter(MeasurementSchema schema) { + public void tryToAddSeriesWriter(IMeasurementSchema schema) { if (!chunkWriters.containsKey(schema.getMeasurementId())) { this.chunkWriters.put(schema.getMeasurementId(), new ChunkWriterImpl(schema)); } } @Override - public void tryToAddSeriesWriter(List schemas) { + public void tryToAddSeriesWriter(List schemas) { for (IMeasurementSchema schema : schemas) { if (!chunkWriters.containsKey(schema.getMeasurementId())) { this.chunkWriters.put(schema.getMeasurementId(), new ChunkWriterImpl(schema)); @@ -92,14 +90,26 @@ public int write(long time, List data) throws IOException, WriteProce } @Override - public int write(Tablet tablet) throws WriteProcessException { + public int write(Tablet tablet) throws IOException, WriteProcessException { + return write(tablet, 0, tablet.rowSize, 0, tablet.getSchemas().size()); + } + + public int write(Tablet tablet, int startRowIndex, int endRowIndex) + throws IOException, WriteProcessException { + return write(tablet, startRowIndex, endRowIndex, 0, tablet.getSchemas().size()); + } + + @Override + public int write( + Tablet tablet, int startRowIndex, int endRowIndex, int startColIndex, int endColIndex) + throws WriteProcessException, IOException { int maxPointCount = 0, pointCount; - List timeseries = tablet.getSchemas(); - for (int column = 0; column < timeseries.size(); column++) { + List timeseries = tablet.getSchemas(); + for (int column = startColIndex; column < endColIndex; column++) { String measurementId = timeseries.get(column).getMeasurementId(); TSDataType tsDataType = timeseries.get(column).getType(); pointCount = 0; - for (int row = 0; row < tablet.rowSize; row++) { + for (int row = startRowIndex; row < endRowIndex; row++) { // check isNull in tablet if (tablet.bitMaps != null && tablet.bitMaps[column] != null @@ -191,7 +201,7 @@ private void checkIsHistoryData(String measurementId, long time) throws WritePro if (time <= lastTimeMap.getOrDefault(measurementId, -1L)) { throw new WriteProcessException( "Not allowed to write out-of-order data in timeseries " - + ((PlainDeviceID) deviceId).toStringID() + + deviceId + TsFileConstant.PATH_SEPARATOR + measurementId + ", time should later than " diff --git a/java/tsfile/src/main/java/org/apache/tsfile/write/record/TSRecord.java b/java/tsfile/src/main/java/org/apache/tsfile/write/record/TSRecord.java index 6c81a7d27..53f685193 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/write/record/TSRecord.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/write/record/TSRecord.java @@ -19,7 +19,7 @@ package org.apache.tsfile.write.record; import org.apache.tsfile.file.metadata.IDeviceID; -import org.apache.tsfile.file.metadata.PlainDeviceID; +import org.apache.tsfile.file.metadata.IDeviceID.Factory; import org.apache.tsfile.utils.StringContainer; import org.apache.tsfile.write.record.datapoint.DataPoint; @@ -36,7 +36,7 @@ public class TSRecord { public long time; /** deviceId of this TSRecord. */ - public String deviceId; + public IDeviceID deviceId; /** all value of this TSRecord. */ public List dataPointList = new ArrayList<>(); @@ -49,12 +49,12 @@ public class TSRecord { */ public TSRecord(long timestamp, String deviceId) { this.time = timestamp; - this.deviceId = deviceId; + this.deviceId = Factory.DEFAULT_FACTORY.create(deviceId); } public TSRecord(long timestamp, IDeviceID deviceId) { this.time = timestamp; - this.deviceId = ((PlainDeviceID) deviceId).toStringID(); + this.deviceId = deviceId; } public void setTime(long timestamp) { diff --git a/java/tsfile/src/main/java/org/apache/tsfile/write/record/Tablet.java b/java/tsfile/src/main/java/org/apache/tsfile/write/record/Tablet.java index d88a4ab9f..7b5b9910b 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/write/record/Tablet.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/write/record/Tablet.java @@ -21,6 +21,8 @@ import org.apache.tsfile.common.conf.TSFileConfig; import org.apache.tsfile.enums.TSDataType; +import org.apache.tsfile.file.metadata.IDeviceID; +import org.apache.tsfile.file.metadata.StringArrayDeviceID; import org.apache.tsfile.utils.Binary; import org.apache.tsfile.utils.BitMap; import org.apache.tsfile.utils.BytesUtils; @@ -28,6 +30,7 @@ import org.apache.tsfile.utils.PublicBAOS; import org.apache.tsfile.utils.ReadWriteIOUtils; import org.apache.tsfile.write.UnSupportedDataTypeException; +import org.apache.tsfile.write.schema.IMeasurementSchema; import org.apache.tsfile.write.schema.MeasurementSchema; import java.io.DataOutputStream; @@ -55,11 +58,20 @@ public class Tablet { private static final int DEFAULT_SIZE = 1024; private static final String NOT_SUPPORT_DATATYPE = "Data type %s is not supported."; - /** DeviceId of this {@link Tablet} */ - public String deviceId; + /** DeviceId if using tree-view interfaces or TableName when using table-view interfaces. */ + private String insertTargetName; /** The list of {@link MeasurementSchema}s for creating the {@link Tablet} */ - private List schemas; + private List schemas; + + /** + * Marking the type of each column, namely ID or MEASUREMENT. Notice: the ID columns must be the + * FIRST ones. + */ + private List columnTypes; + + /** Columns in [0, idColumnRange) are all ID columns. */ + private int idColumnRange; /** MeasurementId->indexOf({@link MeasurementSchema}) */ private final Map measurementIndex; @@ -83,12 +95,25 @@ public class Tablet { * Return a {@link Tablet} with default specified row number. This is the standard constructor * (all Tablet should be the same size). * - * @param deviceId the name of the device specified to be written in + * @param insertTargetName the name of the device specified to be written in * @param schemas the list of {@link MeasurementSchema}s for creating the tablet, only * measurementId and type take effects */ - public Tablet(String deviceId, List schemas) { - this(deviceId, schemas, DEFAULT_SIZE); + public Tablet(String insertTargetName, List schemas) { + this(insertTargetName, schemas, DEFAULT_SIZE); + } + + public Tablet(String insertTargetName, List schemas, int maxRowNumber) { + this( + insertTargetName, + schemas, + ColumnType.nCopy(ColumnType.MEASUREMENT, schemas.size()), + maxRowNumber); + } + + public Tablet( + String insertTargetName, List schemas, List columnTypes) { + this(insertTargetName, schemas, columnTypes, DEFAULT_SIZE); } /** @@ -96,14 +121,19 @@ public Tablet(String deviceId, List schemas) { * constructor directly for testing purposes. {@link Tablet} should normally always be default * size. * - * @param deviceId the name of the device specified to be written in + * @param insertTargetName the name of the device specified to be written in * @param schemas the list of {@link MeasurementSchema}s for creating the row batch, only * measurementId and type take effects * @param maxRowNumber the maximum number of rows for this tablet */ - public Tablet(String deviceId, List schemas, int maxRowNumber) { - this.deviceId = deviceId; + public Tablet( + String insertTargetName, + List schemas, + List columnTypes, + int maxRowNumber) { + this.insertTargetName = insertTargetName; this.schemas = new ArrayList<>(schemas); + setColumnTypes(columnTypes); this.maxRowNumber = maxRowNumber; measurementIndex = new HashMap<>(); constructMeasurementIndexMap(); @@ -117,7 +147,7 @@ public Tablet(String deviceId, List schemas, int maxRowNumber * Return a {@link Tablet} with specified timestamps and values. Only call this constructor * directly for Trigger. * - * @param deviceId the name of the device specified to be written in + * @param insertTargetName the name of the device specified to be written in * @param schemas the list of {@link MeasurementSchema}s for creating the row batch, only * measurementId and type take effects * @param timestamps given timestamps @@ -126,14 +156,33 @@ public Tablet(String deviceId, List schemas, int maxRowNumber * @param maxRowNumber the maximum number of rows for this {@link Tablet} */ public Tablet( - String deviceId, - List schemas, + String insertTargetName, + List schemas, long[] timestamps, Object[] values, BitMap[] bitMaps, int maxRowNumber) { - this.deviceId = deviceId; + this( + insertTargetName, + schemas, + ColumnType.nCopy(ColumnType.MEASUREMENT, schemas.size()), + timestamps, + values, + bitMaps, + maxRowNumber); + } + + public Tablet( + String insertTargetName, + List schemas, + List columnTypes, + long[] timestamps, + Object[] values, + BitMap[] bitMaps, + int maxRowNumber) { + this.insertTargetName = insertTargetName; this.schemas = schemas; + setColumnTypes(columnTypes); this.timestamps = timestamps; this.values = values; this.bitMaps = bitMaps; @@ -146,17 +195,17 @@ public Tablet( private void constructMeasurementIndexMap() { int indexInSchema = 0; - for (MeasurementSchema schema : schemas) { + for (IMeasurementSchema schema : schemas) { measurementIndex.put(schema.getMeasurementId(), indexInSchema); indexInSchema++; } } - public void setDeviceId(String deviceId) { - this.deviceId = deviceId; + public void setInsertTargetName(String insertTargetName) { + this.insertTargetName = insertTargetName; } - public void setSchemas(List schemas) { + public void setSchemas(List schemas) { this.schemas = schemas; } @@ -173,7 +222,7 @@ public void addTimestamp(int rowIndex, long timestamp) { public void addValue(String measurementId, int rowIndex, Object value) { int indexOfSchema = measurementIndex.get(measurementId); - MeasurementSchema measurementSchema = schemas.get(indexOfSchema); + IMeasurementSchema measurementSchema = schemas.get(indexOfSchema); addValueOfDataType(measurementSchema.getType(), rowIndex, indexOfSchema, value); } @@ -194,24 +243,19 @@ private void addValueOfDataType( switch (dataType) { case TEXT: case STRING: + case BLOB: { Binary[] sensor = (Binary[]) values[indexOfSchema]; if (value instanceof Binary) { - sensor[rowIndex] = (Binary) value; + sensor[rowIndex] = value != null ? (Binary) value : Binary.EMPTY_VALUE; } else { sensor[rowIndex] = value != null - ? new Binary((String) value, TSFileConfig.STRING_CHARSET) + ? new Binary(((String) value).getBytes(TSFileConfig.STRING_CHARSET)) : Binary.EMPTY_VALUE; } break; } - case BLOB: - { - Binary[] sensor = (Binary[]) values[indexOfSchema]; - sensor[rowIndex] = value != null ? (Binary) value : Binary.EMPTY_VALUE; - break; - } case FLOAT: { float[] sensor = (float[]) values[indexOfSchema]; @@ -254,7 +298,7 @@ private void addValueOfDataType( } } - public List getSchemas() { + public List getSchemas() { return schemas; } @@ -285,14 +329,16 @@ private void createColumns() { // value column values = new Object[valueColumnsSize]; int columnIndex = 0; - for (MeasurementSchema schema : schemas) { + for (int i = 0; i < schemas.size(); i++) { + IMeasurementSchema schema = schemas.get(i); + ColumnType columnType = columnTypes.get(i); TSDataType dataType = schema.getType(); - values[columnIndex] = createValueColumnOfDataType(dataType); + values[columnIndex] = createValueColumnOfDataType(dataType, columnType); columnIndex++; } } - private Object createValueColumnOfDataType(TSDataType dataType) { + private Object createValueColumnOfDataType(TSDataType dataType, ColumnType columnType) { Object valueColumn; switch (dataType) { @@ -336,7 +382,7 @@ public int getTimeBytesSize() { public int getTotalValueOccupation() { int valueOccupation = 0; int columnIndex = 0; - for (MeasurementSchema schema : schemas) { + for (IMeasurementSchema schema : schemas) { valueOccupation += calOccupationOfOneColumn(schema.getType(), columnIndex); columnIndex++; } @@ -394,7 +440,7 @@ public ByteBuffer serialize() throws IOException { } public void serialize(DataOutputStream stream) throws IOException { - ReadWriteIOUtils.write(deviceId, stream); + ReadWriteIOUtils.write(insertTargetName, stream); ReadWriteIOUtils.write(rowSize, stream); writeMeasurementSchemas(stream); writeTimes(stream); @@ -407,12 +453,15 @@ private void writeMeasurementSchemas(DataOutputStream stream) throws IOException ReadWriteIOUtils.write(BytesUtils.boolToByte(schemas != null), stream); if (schemas != null) { ReadWriteIOUtils.write(schemas.size(), stream); - for (MeasurementSchema schema : schemas) { + for (int i = 0; i < schemas.size(); i++) { + IMeasurementSchema schema = schemas.get(i); + ColumnType columnType = columnTypes.get(i); if (schema == null) { ReadWriteIOUtils.write(BytesUtils.boolToByte(false), stream); } else { ReadWriteIOUtils.write(BytesUtils.boolToByte(true), stream); schema.serializeTo(stream); + ReadWriteIOUtils.write((byte) columnType.ordinal(), stream); } } } @@ -450,12 +499,13 @@ private void writeValues(DataOutputStream stream) throws IOException { if (values != null) { int size = (schemas == null ? 0 : schemas.size()); for (int i = 0; i < size; i++) { - serializeColumn(schemas.get(i).getType(), values[i], stream); + serializeColumn(schemas.get(i).getType(), values[i], stream, columnTypes.get(i)); } } } - private void serializeColumn(TSDataType dataType, Object column, DataOutputStream stream) + private void serializeColumn( + TSDataType dataType, Object column, DataOutputStream stream, ColumnType columnType) throws IOException { ReadWriteIOUtils.write(BytesUtils.boolToByte(column != null), stream); @@ -499,8 +549,8 @@ private void serializeColumn(TSDataType dataType, Object column, DataOutputStrea } break; case TEXT: - case BLOB: case STRING: + case BLOB: Binary[] binaryValues = (Binary[]) column; for (int j = 0; j < rowSize; j++) { ReadWriteIOUtils.write(BytesUtils.boolToByte(binaryValues[j] != null), stream); @@ -523,7 +573,8 @@ public static Tablet deserialize(ByteBuffer byteBuffer) { // deserialize schemas int schemaSize = 0; - List schemas = new ArrayList<>(); + List schemas = new ArrayList<>(); + List columnTypes = new ArrayList<>(); boolean isSchemasNotNull = BytesUtils.byteToBool(ReadWriteIOUtils.readByte(byteBuffer)); if (isSchemasNotNull) { schemaSize = ReadWriteIOUtils.readInt(byteBuffer); @@ -531,6 +582,7 @@ public static Tablet deserialize(ByteBuffer byteBuffer) { boolean hasSchema = BytesUtils.byteToBool(ReadWriteIOUtils.readByte(byteBuffer)); if (hasSchema) { schemas.add(MeasurementSchema.deserializeFrom(byteBuffer)); + columnTypes.add(ColumnType.values()[byteBuffer.get()]); } } } @@ -553,14 +605,14 @@ public static Tablet deserialize(ByteBuffer byteBuffer) { // deserialize values TSDataType[] dataTypes = - schemas.stream().map(MeasurementSchema::getType).toArray(TSDataType[]::new); + schemas.stream().map(IMeasurementSchema::getType).toArray(TSDataType[]::new); Object[] values = new Object[schemaSize]; boolean isValuesNotNull = BytesUtils.byteToBool(ReadWriteIOUtils.readByte(byteBuffer)); if (isValuesNotNull) { - values = readTabletValuesFromBuffer(byteBuffer, dataTypes, schemaSize, rowSize); + values = readTabletValuesFromBuffer(byteBuffer, dataTypes, columnTypes, schemaSize, rowSize); } - Tablet tablet = new Tablet(deviceId, schemas, times, values, bitMaps, rowSize); + Tablet tablet = new Tablet(deviceId, schemas, columnTypes, times, values, bitMaps, rowSize); tablet.constructMeasurementIndexMap(); return tablet; } @@ -581,11 +633,16 @@ public static BitMap[] readBitMapsFromBuffer(ByteBuffer byteBuffer, int columns) /** * @param byteBuffer data values + * @param columnTypes * @param columns column number */ @SuppressWarnings("squid:S3776") // Suppress high Cognitive Complexity warning public static Object[] readTabletValuesFromBuffer( - ByteBuffer byteBuffer, TSDataType[] types, int columns, int rowSize) { + ByteBuffer byteBuffer, + TSDataType[] types, + List columnTypes, + int columns, + int rowSize) { Object[] values = new Object[columns]; for (int i = 0; i < columns; i++) { boolean isValueColumnsNotNull = BytesUtils.byteToBool(ReadWriteIOUtils.readByte(byteBuffer)); @@ -637,8 +694,8 @@ public static Object[] readTabletValuesFromBuffer( values[i] = doubleValues; break; case TEXT: - case BLOB: case STRING: + case BLOB: Binary[] binaryValues = new Binary[rowSize]; for (int index = 0; index < rowSize; index++) { boolean isNotNull = BytesUtils.byteToBool(ReadWriteIOUtils.readByte(byteBuffer)); @@ -661,10 +718,11 @@ public static Object[] readTabletValuesFromBuffer( } /** - * Note that the function will judge 2 {@link Tablet}s to be equal when their contents are logically the - * same. Namely, a {@link Tablet} with {@link BitMap} "null" may be equal to another {@link Tablet} with 3 columns and - * {@link BitMap "[null, null, null]", and a {@link Tablet} with rowSize 2 is judged identical to other {@link Tablet}s - * regardless of any timeStamps with indexes larger than or equal to 2. + * Note that the function will judge 2 {@link Tablet}s to be equal when their contents are + * logically the same. Namely, a {@link Tablet} with {@link BitMap} "null" may be equal to another + * {@link Tablet} with 3 columns and + * {@link BitMap "[null, null, null]", and a {@link Tablet} with rowSize 2 is judged identical to + * other {@link Tablet}s regardless of any timeStamps with indexes larger than or equal to 2. * * @param o the tablet to compare * @return {@code true} if the tablets are logically equal @@ -681,8 +739,9 @@ public boolean equals(Object o) { boolean flag = that.rowSize == rowSize - && Objects.equals(that.deviceId, deviceId) + && Objects.equals(that.insertTargetName, insertTargetName) && Objects.equals(that.schemas, schemas) + && Objects.equals(that.columnTypes, columnTypes) && Objects.equals(that.measurementIndex, measurementIndex); if (!flag) { return false; @@ -792,8 +851,8 @@ public boolean equals(Object o) { } break; case TEXT: - case BLOB: case STRING: + case BLOB: Binary[] thisBinaryValues = (Binary[]) values[i]; Binary[] thatBinaryValues = (Binary[]) thatValues[i]; if (thisBinaryValues.length < rowSize || thatBinaryValues.length < rowSize) { @@ -858,4 +917,117 @@ private boolean isBitMapsEqual(BitMap[] thisBitMaps, BitMap[] thatBitMaps, int c } return true; } + + public boolean isNull(int i, int j) { + return bitMaps != null && bitMaps[j] != null && !bitMaps[j].isMarked(i); + } + + /** + * @param i row number + * @param j column number + * @return the string format of the i-th value in the j-th column. + */ + public Object getValue(int i, int j) { + if (isNull(i, j)) { + return null; + } + switch (schemas.get(j).getType()) { + case BLOB: + case TEXT: + case STRING: + return ((Binary[]) values[j])[i]; + case INT32: + return ((int[]) values[j])[i]; + case FLOAT: + return ((float[]) values[j])[i]; + case DOUBLE: + return ((double[]) values[j])[i]; + case BOOLEAN: + return ((boolean[]) values[j])[i]; + case INT64: + return ((long[]) values[j])[i]; + default: + throw new IllegalArgumentException("Unsupported type: " + schemas.get(j).getType()); + } + } + + /** + * Only used when the tablet is used for table-view interfaces。 + * + * @param i a row number. + * @return the IDeviceID of the i-th row. + */ + public IDeviceID getDeviceID(int i) { + String[] idArray = new String[idColumnRange + 1]; + idArray[0] = insertTargetName; + for (int j = 0; j < idColumnRange; j++) { + final Object value = getValue(i, j); + idArray[j + 1] = value != null ? value.toString() : null; + } + return new StringArrayDeviceID(idArray); + } + + public int getIdColumnRange() { + return idColumnRange; + } + + public void setColumnTypes(List columnTypes) { + this.columnTypes = columnTypes; + idColumnRange = 0; + for (ColumnType columnType : columnTypes) { + if (columnType.equals(ColumnType.MEASUREMENT)) { + break; + } + idColumnRange++; + } + } + + public enum ColumnType { + ID, + MEASUREMENT, + ATTRIBUTE; + + public static List nCopy(ColumnType type, int n) { + List result = new ArrayList<>(n); + for (int i = 0; i < n; i++) { + result.add(type); + } + return result; + } + } + + /** + * A tree-interface. + * + * @return the insertTargetName as the deviceId + */ + public String getDeviceId() { + return insertTargetName; + } + + /** + * A tree-interface. + * + * @param deviceId set the deviceId as the insertTargetName + */ + public void setDeviceId(String deviceId) { + this.insertTargetName = deviceId; + } + + public String getTableName() { + return insertTargetName; + } + + /** + * A table-interface. + * + * @param tableName set the tableName as the insertTargetName + */ + public void setTableName(String tableName) { + this.insertTargetName = tableName; + } + + public List getColumnTypes() { + return columnTypes; + } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/write/schema/Schema.java b/java/tsfile/src/main/java/org/apache/tsfile/write/schema/Schema.java index 54ab66635..c2ada77bf 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/write/schema/Schema.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/write/schema/Schema.java @@ -18,6 +18,10 @@ */ package org.apache.tsfile.write.schema; +import org.apache.tsfile.file.metadata.ChunkGroupMetadata; +import org.apache.tsfile.file.metadata.IDeviceID; +import org.apache.tsfile.file.metadata.LogicalTableSchema; +import org.apache.tsfile.file.metadata.TableSchema; import org.apache.tsfile.read.common.Path; import org.apache.tsfile.utils.MeasurementGroup; @@ -33,32 +37,45 @@ public class Schema implements Serializable { /** - * Path (devicePath) -> measurementSchema By default, use the LinkedHashMap to store the order of + * IDeviceID -> measurementSchema By default, use the LinkedHashMap to store the order of * insertion */ - private final Map registeredTimeseries; + private Map registeredTimeseries; /** template name -> (measurement -> MeasurementSchema) */ private Map schemaTemplates; + private Map tableSchemaMap = new HashMap<>(); + private boolean enabledUpdateSchema = true; + public Schema() { this.registeredTimeseries = new LinkedHashMap<>(); } - public Schema(Map knownSchema) { + public Schema(Map knownSchema) { this.registeredTimeseries = knownSchema; } + @Deprecated + public void registerTimeseries(Path devicePath, IMeasurementSchema measurementSchema) { + registerTimeseries(devicePath.getIDeviceID(), measurementSchema); + } + // This method can only register nonAligned timeseries. - public void registerTimeseries(Path devicePath, MeasurementSchema measurementSchema) { + public void registerTimeseries(IDeviceID deviceID, IMeasurementSchema measurementSchema) { MeasurementGroup group = - registeredTimeseries.getOrDefault(devicePath, new MeasurementGroup(false)); + registeredTimeseries.getOrDefault(deviceID, new MeasurementGroup(false)); group.getMeasurementSchemaMap().put(measurementSchema.getMeasurementId(), measurementSchema); - this.registeredTimeseries.put(devicePath, group); + this.registeredTimeseries.put(deviceID, group); } + @Deprecated public void registerMeasurementGroup(Path devicePath, MeasurementGroup measurementGroup) { - this.registeredTimeseries.put(devicePath, measurementGroup); + this.registeredTimeseries.put(devicePath.getIDeviceID(), measurementGroup); + } + + public void registerMeasurementGroup(IDeviceID deviceID, MeasurementGroup measurementGroup) { + this.registeredTimeseries.put(deviceID, measurementGroup); } public void registerSchemaTemplate(String templateName, MeasurementGroup measurementGroup) { @@ -68,6 +85,10 @@ public void registerSchemaTemplate(String templateName, MeasurementGroup measure this.schemaTemplates.put(templateName, measurementGroup); } + public void registerTableSchema(TableSchema tableSchema) { + tableSchemaMap.put(tableSchema.getTableName(), tableSchema); + } + /** If template does not exist, an nonAligned timeseries is created by default */ public void extendTemplate(String templateName, MeasurementSchema descriptor) { if (schemaTemplates == null) { @@ -80,17 +101,26 @@ public void extendTemplate(String templateName, MeasurementSchema descriptor) { this.schemaTemplates.put(templateName, measurementGroup); } - public void registerDevice(String deviceId, String templateName) { + public void registerDevice(String deviceIdString, String templateName) { + registerDevice(IDeviceID.Factory.DEFAULT_FACTORY.create(deviceIdString), templateName); + } + + public void registerDevice(IDeviceID deviceId, String templateName) { if (!schemaTemplates.containsKey(templateName)) { return; } - Map template = + Map template = schemaTemplates.get(templateName).getMeasurementSchemaMap(); boolean isAligned = schemaTemplates.get(templateName).isAligned(); - registerMeasurementGroup(new Path(deviceId), new MeasurementGroup(isAligned, template)); + registerMeasurementGroup(deviceId, new MeasurementGroup(isAligned, template)); } + @Deprecated public MeasurementGroup getSeriesSchema(Path devicePath) { + return registeredTimeseries.get(devicePath.getIDeviceID()); + } + + public MeasurementGroup getSeriesSchema(IDeviceID devicePath) { return registeredTimeseries.get(devicePath); } @@ -99,12 +129,38 @@ public Map getSchemaTemplates() { } /** check if this schema contains a measurement named measurementId. */ - public boolean containsDevice(Path devicePath) { + public boolean containsDevice(IDeviceID devicePath) { return registeredTimeseries.containsKey(devicePath); } + public void setRegisteredTimeseries(Map registeredTimeseries) { + this.registeredTimeseries = registeredTimeseries; + } + // for test - public Map getRegisteredTimeseriesMap() { + public Map getRegisteredTimeseriesMap() { return registeredTimeseries; } + + public void updateTableSchema(ChunkGroupMetadata chunkGroupMetadata) { + if (!enabledUpdateSchema) { + return; + } + IDeviceID deviceID = chunkGroupMetadata.getDevice(); + String tableName = deviceID.getTableName(); + TableSchema tableSchema = tableSchemaMap.computeIfAbsent(tableName, LogicalTableSchema::new); + tableSchema.update(chunkGroupMetadata); + } + + public Map getTableSchemaMap() { + return tableSchemaMap; + } + + public boolean isEnabledUpdateSchema() { + return enabledUpdateSchema; + } + + public void setEnabledUpdateSchema(boolean enabledUpdateSchema) { + this.enabledUpdateSchema = enabledUpdateSchema; + } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/write/writer/RestorableTsFileIOWriter.java b/java/tsfile/src/main/java/org/apache/tsfile/write/writer/RestorableTsFileIOWriter.java index 74d84fb25..20837085f 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/write/writer/RestorableTsFileIOWriter.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/write/writer/RestorableTsFileIOWriter.java @@ -29,6 +29,7 @@ import org.apache.tsfile.read.TsFileSequenceReader; import org.apache.tsfile.read.common.Path; import org.apache.tsfile.write.schema.IMeasurementSchema; +import org.apache.tsfile.write.schema.Schema; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -59,7 +60,7 @@ public class RestorableTsFileIOWriter extends TsFileIOWriter { private static final Logger logger = LoggerFactory.getLogger("FileMonitor"); private long truncatedSize = -1; - private Map knownSchemas = new HashMap<>(); + private Schema schema = new Schema(); private int lastFlushedChunkGroupIndex = 0; @@ -108,8 +109,8 @@ public RestorableTsFileIOWriter(File file, boolean truncate) throws IOException if (file.exists()) { try (TsFileSequenceReader reader = new TsFileSequenceReader(file.getAbsolutePath(), false)) { - - truncatedSize = reader.selfCheck(knownSchemas, chunkGroupMetadataList, true); + schema.setEnabledUpdateSchema(false); + truncatedSize = reader.selfCheck(schema, chunkGroupMetadataList, true); minPlanIndex = reader.getMinPlanIndex(); maxPlanIndex = reader.getMaxPlanIndex(); if (truncatedSize == TsFileCheckStatus.COMPLETE_FILE) { @@ -167,8 +168,8 @@ long getTruncatedSize() { return truncatedSize; } - public Map getKnownSchema() { - return knownSchemas; + public Schema getKnownSchema() { + return schema; } /** @@ -279,7 +280,7 @@ private List getAppendedRowMetadata() { } public void addSchema(Path path, IMeasurementSchema schema) { - knownSchemas.put(path, schema); + this.schema.registerTimeseries(path.getIDeviceID(), schema); } @Override diff --git a/java/tsfile/src/main/java/org/apache/tsfile/write/writer/TsFileIOWriter.java b/java/tsfile/src/main/java/org/apache/tsfile/write/writer/TsFileIOWriter.java index ef9515c57..ffd30048f 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/write/writer/TsFileIOWriter.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/write/writer/TsFileIOWriter.java @@ -45,6 +45,7 @@ import org.apache.tsfile.utils.Pair; import org.apache.tsfile.utils.PublicBAOS; import org.apache.tsfile.utils.ReadWriteIOUtils; +import org.apache.tsfile.write.schema.Schema; import org.apache.tsfile.write.writer.tsmiterator.TSMIterator; import org.apache.commons.io.FileUtils; @@ -61,12 +62,14 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Queue; import java.util.TreeMap; import static org.apache.tsfile.file.metadata.MetadataIndexConstructor.addCurrentIndexNodeToQueue; import static org.apache.tsfile.file.metadata.MetadataIndexConstructor.checkAndBuildLevelIndex; import static org.apache.tsfile.file.metadata.MetadataIndexConstructor.generateRootNode; +import static org.apache.tsfile.file.metadata.MetadataIndexConstructor.splitDeviceByTable; /** * TsFileIOWriter is used to construct metadata and write data stored in memory to output stream. @@ -84,6 +87,9 @@ public class TsFileIOWriter implements AutoCloseable { VERSION_NUMBER_BYTE = TSFileConfig.VERSION_NUMBER; } + /** schema of this TsFile. */ + protected Schema schema = new Schema(); + protected TsFileOutput out; protected boolean canWrite = true; protected File file; @@ -116,6 +122,8 @@ public class TsFileIOWriter implements AutoCloseable { private volatile int chunkMetadataCount = 0; public static final String CHUNK_METADATA_TEMP_FILE_SUFFIX = ".meta"; + private boolean generateTableSchema = false; + /** empty construct function. */ protected TsFileIOWriter() {} @@ -189,8 +197,13 @@ public void endChunkGroup() throws IOException { if (currentChunkGroupDeviceId == null || chunkMetadataList.isEmpty()) { return; } - chunkGroupMetadataList.add( - new ChunkGroupMetadata(currentChunkGroupDeviceId, chunkMetadataList)); + + ChunkGroupMetadata chunkGroupMetadata = + new ChunkGroupMetadata(currentChunkGroupDeviceId, chunkMetadataList); + if (generateTableSchema) { + getSchema().updateTableSchema(chunkGroupMetadata); + } + chunkGroupMetadataList.add(chunkGroupMetadata); currentChunkGroupDeviceId = null; chunkMetadataList = null; out.flush(); @@ -229,7 +242,13 @@ public void startFlushChunk( throws IOException { currentChunkMetadata = - new ChunkMetadata(measurementId, tsDataType, out.getPosition(), statistics); + new ChunkMetadata( + measurementId, + tsDataType, + encodingType, + compressionCodecName, + out.getPosition(), + statistics); currentChunkMetadata.setMask((byte) mask); ChunkHeader header = @@ -251,6 +270,8 @@ public void writeChunk(Chunk chunk, ChunkMetadata chunkMetadata) throws IOExcept new ChunkMetadata( chunkHeader.getMeasurementID(), chunkHeader.getDataType(), + chunkHeader.getEncodingType(), + chunkHeader.getCompressionType(), out.getPosition(), chunkMetadata.getStatistics()); chunkHeader.serializeTo(out.wrapAsStream()); @@ -273,7 +294,13 @@ public void writeEmptyValueChunk( Statistics statistics) throws IOException { currentChunkMetadata = - new ChunkMetadata(measurementId, tsDataType, out.getPosition(), statistics); + new ChunkMetadata( + measurementId, + tsDataType, + encodingType, + compressionType, + out.getPosition(), + statistics); currentChunkMetadata.setMask(TsFileConstant.VALUE_COLUMN_MASK); ChunkHeader emptyChunkHeader = new ChunkHeader( @@ -294,6 +321,8 @@ public void writeChunk(Chunk chunk) throws IOException { new ChunkMetadata( chunkHeader.getMeasurementID(), chunkHeader.getDataType(), + chunkHeader.getEncodingType(), + chunkHeader.getCompressionType(), out.getPosition(), chunk.getChunkStatistic()); chunkHeader.serializeTo(out.wrapAsStream()); @@ -386,7 +415,6 @@ private void readChunkMetadataAndConstructIndexTree() throws IOException { // build bloom filter filter.add(currentPath.getFullPath()); // construct the index tree node for the series - currentDevice = currentPath.getIDeviceID(); if (!currentDevice.equals(prevDevice)) { if (prevDevice != null) { @@ -428,14 +456,22 @@ private void readChunkMetadataAndConstructIndexTree() throws IOException { measurementMetadataIndexQueue, out, MetadataIndexNodeType.INTERNAL_MEASUREMENT)); } - MetadataIndexNode metadataIndex = checkAndBuildLevelIndex(deviceMetadataIndexMap, out); + Map> tableDeviceNodesMap = + splitDeviceByTable(deviceMetadataIndexMap); + + // build an index root for each table + Map tableNodesMap = new TreeMap<>(); + for (Entry> entry : tableDeviceNodesMap.entrySet()) { + tableNodesMap.put(entry.getKey(), checkAndBuildLevelIndex(entry.getValue(), out)); + } TsFileMetadata tsFileMetadata = new TsFileMetadata(); - tsFileMetadata.setMetadataIndex(metadataIndex); + tsFileMetadata.setTableMetadataIndexNodeMap(tableNodesMap); + tsFileMetadata.setTableSchemaMap(schema.getTableSchemaMap()); tsFileMetadata.setMetaOffset(metaOffset); + tsFileMetadata.setBloomFilter(filter); int size = tsFileMetadata.serializeTo(out.wrapAsStream()); - size += tsFileMetadata.serializeBloomFilter(out.wrapAsStream(), filter); // write TsFileMetaData size ReadWriteIOUtils.write(size, out.wrapAsStream()); @@ -654,12 +690,12 @@ private int writeChunkMetadataToTempFile( int writtenSize = 0; // [DeviceId] measurementId datatype size chunkMetadataBuffer if (lastSerializePath == null - || !seriesPath.getDevice().equals(lastSerializePath.getDevice())) { + || !seriesPath.getIDeviceID().equals(lastSerializePath.getIDeviceID())) { // mark the end position of last device endPosInCMTForDevice.add(tempOutput.getPosition()); // serialize the device // for each device, we only serialize it once, in order to save io - writtenSize += ReadWriteIOUtils.write(seriesPath.getDevice(), tempOutput.wrapAsStream()); + writtenSize += seriesPath.getIDeviceID().serialize(tempOutput.wrapAsStream()); } if (isNewPath && !iChunkMetadataList.isEmpty()) { // serialize the public info of this measurement @@ -691,4 +727,20 @@ public void flush() throws IOException { public TsFileOutput getTsFileOutput() { return this.out; } + + public Schema getSchema() { + return schema; + } + + public void setSchema(Schema schema) { + this.schema = schema; + } + + public boolean isGenerateTableSchema() { + return generateTableSchema; + } + + public void setGenerateTableSchema(boolean generateTableSchema) { + this.generateTableSchema = generateTableSchema; + } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/write/writer/TsFileIOWriterEndFileTest.java b/java/tsfile/src/main/java/org/apache/tsfile/write/writer/TsFileIOWriterEndFileTest.java index 27e9e104d..f10d82ed3 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/write/writer/TsFileIOWriterEndFileTest.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/write/writer/TsFileIOWriterEndFileTest.java @@ -20,7 +20,7 @@ package org.apache.tsfile.write.writer; import org.apache.tsfile.enums.TSDataType; -import org.apache.tsfile.file.metadata.PlainDeviceID; +import org.apache.tsfile.file.metadata.IDeviceID; import org.apache.tsfile.file.metadata.enums.CompressionType; import org.apache.tsfile.file.metadata.enums.TSEncoding; import org.apache.tsfile.write.chunk.ChunkWriterImpl; @@ -32,7 +32,7 @@ public class TsFileIOWriterEndFileTest { public static void main(String[] args) throws Exception { try (TsFileIOWriter writer = new TsFileIOWriter(new File("test.tsfile"))) { for (int deviceIndex = 0; deviceIndex < 1000; deviceIndex++) { - writer.startChunkGroup(new PlainDeviceID("root.sg.d" + deviceIndex)); + writer.startChunkGroup(IDeviceID.Factory.DEFAULT_FACTORY.create("root.sg.d" + deviceIndex)); for (int seriesIndex = 0; seriesIndex < 1000; seriesIndex++) { ChunkWriterImpl chunkWriter = new ChunkWriterImpl( diff --git a/java/tsfile/src/main/java/org/apache/tsfile/write/writer/tsmiterator/DiskTSMIterator.java b/java/tsfile/src/main/java/org/apache/tsfile/write/writer/tsmiterator/DiskTSMIterator.java index 32a32340c..fd413b43a 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/write/writer/tsmiterator/DiskTSMIterator.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/write/writer/tsmiterator/DiskTSMIterator.java @@ -23,6 +23,8 @@ import org.apache.tsfile.file.metadata.ChunkGroupMetadata; import org.apache.tsfile.file.metadata.ChunkMetadata; import org.apache.tsfile.file.metadata.IChunkMetadata; +import org.apache.tsfile.file.metadata.IDeviceID; +import org.apache.tsfile.file.metadata.IDeviceID.Deserializer; import org.apache.tsfile.file.metadata.TimeseriesMetadata; import org.apache.tsfile.read.common.Path; import org.apache.tsfile.read.reader.LocalTsFileInput; @@ -54,7 +56,7 @@ public class DiskTSMIterator extends TSMIterator { private long fileLength = 0; private long currentPos = 0; private long nextEndPosForDevice = 0; - private String currentDevice; + private IDeviceID currentDevice; private boolean remainsInFile = true; protected DiskTSMIterator( @@ -94,7 +96,7 @@ public Pair next() { private Pair getTimeSerisMetadataFromFile() throws IOException { if (currentPos == nextEndPosForDevice) { // deserialize the current device name - currentDevice = ReadWriteIOUtils.readString(input.wrapAsInputStream()); + currentDevice = Deserializer.DEFAULT_DESERIALIZER.deserializeFrom(input.wrapAsInputStream()); nextEndPosForDevice = endPosForEachDevice.size() > 0 ? endPosForEachDevice.removeFirst() : fileLength; } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/write/writer/tsmiterator/TSMIterator.java b/java/tsfile/src/main/java/org/apache/tsfile/write/writer/tsmiterator/TSMIterator.java index 123618fb9..620cc2daa 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/write/writer/tsmiterator/TSMIterator.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/write/writer/tsmiterator/TSMIterator.java @@ -23,7 +23,6 @@ import org.apache.tsfile.file.metadata.ChunkMetadata; import org.apache.tsfile.file.metadata.IChunkMetadata; import org.apache.tsfile.file.metadata.IDeviceID; -import org.apache.tsfile.file.metadata.PlainDeviceID; import org.apache.tsfile.file.metadata.TimeseriesMetadata; import org.apache.tsfile.file.metadata.statistics.Statistics; import org.apache.tsfile.read.common.Path; @@ -121,10 +120,7 @@ public static List>> sortChunkMetadata( chunkMetadataMap .get(chunkGroupMetadata.getDevice()) .computeIfAbsent( - new Path( - ((PlainDeviceID) chunkGroupMetadata.getDevice()).toStringID(), - chunkMetadata.getMeasurementUid(), - false), + new Path(chunkGroupMetadata.getDevice(), chunkMetadata.getMeasurementUid(), false), x -> new ArrayList<>()) .add(chunkMetadata); } @@ -134,10 +130,7 @@ public static List>> sortChunkMetadata( chunkMetadataMap .computeIfAbsent(currentDevice, x -> new TreeMap<>()) .computeIfAbsent( - new Path( - ((PlainDeviceID) currentDevice).toStringID(), - chunkMetadata.getMeasurementUid(), - false), + new Path(currentDevice, chunkMetadata.getMeasurementUid(), false), x -> new ArrayList<>()) .add(chunkMetadata); } diff --git a/java/tsfile/src/test/java/org/apache/tsfile/common/block/TsBlockSerdeTest.java b/java/tsfile/src/test/java/org/apache/tsfile/common/block/TsBlockSerdeTest.java index 640259509..cfedb3809 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/common/block/TsBlockSerdeTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/common/block/TsBlockSerdeTest.java @@ -145,7 +145,6 @@ public void testSerializeAndDeserialize2() { output.rewind(); TsBlock deserializedTsBlock = tsBlockSerde.deserialize(output); - assertEquals(tsBlock.getRetainedSizeInBytes(), deserializedTsBlock.getRetainedSizeInBytes()); } catch (IOException e) { e.printStackTrace(); fail(); @@ -181,7 +180,6 @@ public void testSerializeAndDeserialize3() { output.rewind(); TsBlock deserializedTsBlock = tsBlockSerde.deserialize(output); - assertEquals(tsBlock.getRetainedSizeInBytes(), deserializedTsBlock.getRetainedSizeInBytes()); } catch (IOException e) { e.printStackTrace(); fail(); diff --git a/java/tsfile/src/test/java/org/apache/tsfile/compatibility/CompatibilityTest.java b/java/tsfile/src/test/java/org/apache/tsfile/compatibility/CompatibilityTest.java new file mode 100644 index 000000000..f12d2a2e4 --- /dev/null +++ b/java/tsfile/src/test/java/org/apache/tsfile/compatibility/CompatibilityTest.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.compatibility; + +import org.apache.tsfile.read.TsFileReader; +import org.apache.tsfile.read.TsFileSequenceReader; +import org.apache.tsfile.read.common.Path; +import org.apache.tsfile.read.common.RowRecord; +import org.apache.tsfile.read.expression.QueryExpression; +import org.apache.tsfile.read.query.dataset.QueryDataSet; + +import org.junit.Test; + +import java.io.File; +import java.io.IOException; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +public class CompatibilityTest { + + private String fileName = + "target" + File.separator + "test-classes" + File.separator + "v3TsFile"; + + /** The file is generated by the TsFileWriterTest version 3. */ + @Test + public void testReadV3() { + readOneRow(); + } + + private void readOneRow() { + readOneRow(5); + } + + private void readOneRow(int s2Value) { + try { + TsFileReader tsFileReader = new TsFileReader(new TsFileSequenceReader(fileName)); + QueryDataSet dataSet = + tsFileReader.query( + QueryExpression.create() + .addSelectedPath(new Path("d1", "s1", true)) + .addSelectedPath(new Path("d1", "s2", true)) + .addSelectedPath(new Path("d1", "s3", true))); + while (dataSet.hasNext()) { + RowRecord result = dataSet.next(); + assertEquals(2, result.getFields().size()); + assertEquals(10000, result.getTimestamp()); + assertEquals(5.0f, result.getFields().get(0).getFloatV(), 0.00001); + assertEquals(s2Value, result.getFields().get(1).getIntV()); + } + tsFileReader.close(); + } catch (IOException e) { + e.printStackTrace(); + fail(e.getMessage()); + } + } +} diff --git a/java/tsfile/src/test/java/org/apache/tsfile/exception/TsFileExceptionTest.java b/java/tsfile/src/test/java/org/apache/tsfile/exception/TsFileExceptionTest.java index 2132d999b..74bfbc240 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/exception/TsFileExceptionTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/exception/TsFileExceptionTest.java @@ -140,7 +140,7 @@ public void testUnSupportFilterDataTypeException() { @Test public void testNoMeasurementException() { NoMeasurementException e = new NoMeasurementException(MOCK); - assertEquals(MOCK, e.getMessage()); + assertEquals("No measurement for mock", e.getMessage()); } @Test diff --git a/java/tsfile/src/test/java/org/apache/tsfile/file/metadata/IDeviceIDTest.java b/java/tsfile/src/test/java/org/apache/tsfile/file/metadata/IDeviceIDTest.java new file mode 100644 index 000000000..29c8dd7f2 --- /dev/null +++ b/java/tsfile/src/test/java/org/apache/tsfile/file/metadata/IDeviceIDTest.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.file.metadata; + +import org.apache.tsfile.exception.IllegalDeviceIDException; +import org.apache.tsfile.file.metadata.IDeviceID.Deserializer; +import org.apache.tsfile.file.metadata.IDeviceID.Factory; + +import org.junit.Test; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; + +public class IDeviceIDTest { + + @Test + public void testStartWith() { + IDeviceID deviceID = Factory.DEFAULT_FACTORY.create("root.a.b.c.d"); + assertTrue(deviceID.startWith("root.a")); + assertTrue(deviceID.startWith("root.a.")); + assertTrue(deviceID.startWith("root.a.b")); + assertTrue(deviceID.startWith("root.a.b.")); + assertTrue(deviceID.startWith("root.a.b.c")); + assertTrue(deviceID.startWith("root.a.b.c.d")); + + assertFalse(deviceID.startWith("root.b")); + assertFalse(deviceID.startWith("root.a.b.d")); + assertFalse(deviceID.startWith("root.a.b.c.e")); + + assertFalse(deviceID.startWith("root.a.bb")); + assertFalse(deviceID.startWith("root.a.b.cc")); + assertFalse(deviceID.startWith("root.a.b.c.dd")); + + assertFalse(deviceID.startWith("root.a.b.c..")); + assertFalse(deviceID.startWith("root.a.b.c.d.")); + assertFalse(deviceID.startWith("root.a.b.c.d.e")); + assertFalse(deviceID.startWith("root.a..b.c")); + + deviceID = Factory.DEFAULT_FACTORY.create("root.aaaa.b.c.d"); + assertTrue(deviceID.startWith("root.a")); + } + + @Test + public void testMatchDatabaseName() { + IDeviceID deviceID = Factory.DEFAULT_FACTORY.create("root.a.b.c.d"); + assertTrue(deviceID.matchDatabaseName("root.a")); + assertFalse(deviceID.matchDatabaseName("root.a.")); + assertTrue(deviceID.matchDatabaseName("root.a.b")); + assertFalse(deviceID.matchDatabaseName("root.a.b.")); + assertTrue(deviceID.matchDatabaseName("root.a.b.c")); + assertTrue(deviceID.matchDatabaseName("root.a.b.c.d")); + + assertFalse(deviceID.matchDatabaseName("root.b")); + assertFalse(deviceID.matchDatabaseName("root.a.b.d")); + assertFalse(deviceID.matchDatabaseName("root.a.b.c.e")); + + assertFalse(deviceID.matchDatabaseName("root.a.bb")); + assertFalse(deviceID.matchDatabaseName("root.a.b.cc")); + assertFalse(deviceID.matchDatabaseName("root.a.b.c.dd")); + + assertFalse(deviceID.matchDatabaseName("root.a.b.c..")); + assertFalse(deviceID.matchDatabaseName("root.a.b.c.d.")); + assertFalse(deviceID.matchDatabaseName("root.a.b.c.d.e")); + assertFalse(deviceID.matchDatabaseName("root.a..b.c")); + + deviceID = Factory.DEFAULT_FACTORY.create("root.aaaa.b.c.d"); + assertFalse(deviceID.matchDatabaseName("root.a")); + } + + @Test + public void testWithNull() { + // auto adding tailing null for one segment id + IDeviceID deviceID = Factory.DEFAULT_FACTORY.create(new String[] {"table1"}); + assertEquals(1, deviceID.segmentNum()); + assertEquals("table1", deviceID.segment(0)); + + // removing tailing null + deviceID = Factory.DEFAULT_FACTORY.create(new String[] {"table1", "a", null, null}); + assertEquals(2, deviceID.segmentNum()); + assertEquals("table1", deviceID.segment(0)); + assertEquals("a", deviceID.segment(1)); + + // removing tailing null ant not leaving the last null + deviceID = Factory.DEFAULT_FACTORY.create(new String[] {"table1", null, null, null}); + assertEquals(1, deviceID.segmentNum()); + assertEquals("table1", deviceID.segment(0)); + + // all null + assertThrows( + IllegalDeviceIDException.class, + () -> Factory.DEFAULT_FACTORY.create(new String[] {null, null, null, null})); + assertThrows( + IllegalDeviceIDException.class, () -> Factory.DEFAULT_FACTORY.create(new String[] {})); + } + + @Test + public void testSerialize() throws IOException { + testSerialize(Factory.DEFAULT_FACTORY.create("root")); + testSerialize(Factory.DEFAULT_FACTORY.create("root.a")); + testSerialize(Factory.DEFAULT_FACTORY.create("root.a.b")); + testSerialize(Factory.DEFAULT_FACTORY.create("root.a.b.c")); + testSerialize(Factory.DEFAULT_FACTORY.create("root.a.b.c.d")); + testSerialize(Factory.DEFAULT_FACTORY.create(new String[] {"root", "a", null, "c", "d"})); + } + + private void testSerialize(IDeviceID deviceID) throws IOException { + ByteBuffer buffer = ByteBuffer.allocate(deviceID.serializedSize()); + deviceID.serialize(buffer); + buffer.flip(); + IDeviceID deserialized = Deserializer.DEFAULT_DESERIALIZER.deserializeFrom(buffer); + assertEquals(deserialized, deviceID); + + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + deviceID.serialize(byteArrayOutputStream); + assertEquals(deviceID.serializedSize(), byteArrayOutputStream.size()); + buffer = ByteBuffer.wrap(byteArrayOutputStream.toByteArray()); + deserialized = Deserializer.DEFAULT_DESERIALIZER.deserializeFrom(buffer); + assertEquals(deserialized, deviceID); + } +} diff --git a/java/tsfile/src/test/java/org/apache/tsfile/file/metadata/TsFileMetadataTest.java b/java/tsfile/src/test/java/org/apache/tsfile/file/metadata/TsFileMetadataTest.java index b10712fe5..cfac37e82 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/file/metadata/TsFileMetadataTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/file/metadata/TsFileMetadataTest.java @@ -18,6 +18,7 @@ */ package org.apache.tsfile.file.metadata; +import org.apache.tsfile.compatibility.DeserializeConfig; import org.apache.tsfile.constant.TestConstant; import org.apache.tsfile.file.metadata.utils.TestHelper; import org.apache.tsfile.file.metadata.utils.Utils; @@ -60,13 +61,14 @@ public void testWriteFileMetaData() { private TsFileMetadata deSerialized() { FileInputStream fileInputStream = null; TsFileMetadata metaData = null; + DeserializeConfig deserializeConfig = new DeserializeConfig(); try { fileInputStream = new FileInputStream(new File(PATH)); FileChannel channel = fileInputStream.getChannel(); ByteBuffer buffer = ByteBuffer.allocate((int) channel.size()); channel.read(buffer); buffer.rewind(); - metaData = TsFileMetadata.deserializeFrom(buffer); + metaData = TsFileMetadata.deserializeFrom(buffer, deserializeConfig); return metaData; } catch (IOException e) { e.printStackTrace(); diff --git a/java/tsfile/src/test/java/org/apache/tsfile/file/metadata/utils/TestHelper.java b/java/tsfile/src/test/java/org/apache/tsfile/file/metadata/utils/TestHelper.java index 65eec6abc..c9ebdec91 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/file/metadata/utils/TestHelper.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/file/metadata/utils/TestHelper.java @@ -21,7 +21,8 @@ import org.apache.tsfile.enums.TSDataType; import org.apache.tsfile.file.header.PageHeader; import org.apache.tsfile.file.header.PageHeaderTest; -import org.apache.tsfile.file.metadata.MeasurementMetadataIndexEntry; +import org.apache.tsfile.file.metadata.DeviceMetadataIndexEntry; +import org.apache.tsfile.file.metadata.IDeviceID.Factory; import org.apache.tsfile.file.metadata.MetadataIndexNode; import org.apache.tsfile.file.metadata.TimeseriesMetadata; import org.apache.tsfile.file.metadata.TsFileMetadata; @@ -33,19 +34,24 @@ import java.io.Serializable; import java.util.ArrayList; +import java.util.Collections; public class TestHelper { + public static final String TEST_TABLE_NAME = "test_table"; + public static TsFileMetadata createSimpleFileMetaData() { TsFileMetadata metaData = new TsFileMetadata(); - metaData.setMetadataIndex(generateMetaDataIndex()); + metaData.setTableMetadataIndexNodeMap( + Collections.singletonMap(TEST_TABLE_NAME, generateMetaDataIndex())); return metaData; } private static MetadataIndexNode generateMetaDataIndex() { - MetadataIndexNode metaDataIndex = new MetadataIndexNode(MetadataIndexNodeType.LEAF_MEASUREMENT); + MetadataIndexNode metaDataIndex = new MetadataIndexNode(MetadataIndexNodeType.LEAF_DEVICE); for (int i = 0; i < 5; i++) { - metaDataIndex.addEntry(new MeasurementMetadataIndexEntry("d" + i, (long) i * 5)); + metaDataIndex.addEntry( + new DeviceMetadataIndexEntry(Factory.DEFAULT_FACTORY.create("d" + i), (long) i * 5)); } return metaDataIndex; } diff --git a/java/tsfile/src/test/java/org/apache/tsfile/file/metadata/utils/Utils.java b/java/tsfile/src/test/java/org/apache/tsfile/file/metadata/utils/Utils.java index 103a84c8c..91c2cbf22 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/file/metadata/utils/Utils.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/file/metadata/utils/Utils.java @@ -51,9 +51,13 @@ public static boolean isTwoObjectsNotNULL(Object objectA, Object objectB, String public static boolean isFileMetaDataEqual(TsFileMetadata metadata1, TsFileMetadata metadata2) { if (Utils.isTwoObjectsNotNULL(metadata1, metadata2, "File MetaData")) { if (Utils.isTwoObjectsNotNULL( - metadata1.getMetadataIndex(), metadata2.getMetadataIndex(), "Metadata Index")) { - MetadataIndexNode metaDataIndex1 = metadata1.getMetadataIndex(); - MetadataIndexNode metaDataIndex2 = metadata2.getMetadataIndex(); + metadata1.getTableMetadataIndexNode(TestHelper.TEST_TABLE_NAME), + metadata2.getTableMetadataIndexNode(TestHelper.TEST_TABLE_NAME), + "Metadata " + "Index")) { + MetadataIndexNode metaDataIndex1 = + metadata1.getTableMetadataIndexNode(TestHelper.TEST_TABLE_NAME); + MetadataIndexNode metaDataIndex2 = + metadata2.getTableMetadataIndexNode(TestHelper.TEST_TABLE_NAME); return metaDataIndex1.getChildren().size() == metaDataIndex2.getChildren().size(); } } diff --git a/java/tsfile/src/test/java/org/apache/tsfile/read/GetAllDevicesTest.java b/java/tsfile/src/test/java/org/apache/tsfile/read/GetAllDevicesTest.java index 15660d45d..aef1129b3 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/read/GetAllDevicesTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/read/GetAllDevicesTest.java @@ -22,7 +22,6 @@ import org.apache.tsfile.common.conf.TSFileConfig; import org.apache.tsfile.common.conf.TSFileDescriptor; import org.apache.tsfile.file.metadata.IDeviceID; -import org.apache.tsfile.file.metadata.PlainDeviceID; import org.apache.tsfile.utils.FileGenerator; import org.junit.After; @@ -31,6 +30,7 @@ import org.junit.Test; import java.io.IOException; +import java.util.ArrayList; import java.util.List; public class GetAllDevicesTest { @@ -73,14 +73,20 @@ public void testGetAllDevices4() throws IOException { public void testGetAllDevices(int deviceNum, int measurementNum) throws IOException { FileGenerator.generateFile(10000, deviceNum, measurementNum); + try (TsFileSequenceReader fileReader = new TsFileSequenceReader(FILE_PATH)) { + List sortedDeviceIds = new ArrayList<>(); + for (int i = 0; i < deviceNum; i++) { + sortedDeviceIds.add( + IDeviceID.Factory.DEFAULT_FACTORY.create( + "d" + FileGenerator.generateIndexString(i, deviceNum))); + } + sortedDeviceIds.sort(null); List devices = fileReader.getAllDevices(); Assert.assertEquals(deviceNum, devices.size()); for (int i = 0; i < deviceNum; i++) { - Assert.assertEquals( - new PlainDeviceID("d" + FileGenerator.generateIndexString(i, deviceNum)), - devices.get(i)); + Assert.assertEquals(sortedDeviceIds.get(i), devices.get(i)); } FileGenerator.after(); diff --git a/java/tsfile/src/test/java/org/apache/tsfile/read/MeasurementChunkMetadataListMapIteratorTest.java b/java/tsfile/src/test/java/org/apache/tsfile/read/MeasurementChunkMetadataListMapIteratorTest.java index 42a98d5dd..0dd91bf2a 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/read/MeasurementChunkMetadataListMapIteratorTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/read/MeasurementChunkMetadataListMapIteratorTest.java @@ -24,8 +24,6 @@ import org.apache.tsfile.file.metadata.ChunkMetadata; import org.apache.tsfile.file.metadata.IChunkMetadata; import org.apache.tsfile.file.metadata.IDeviceID; -import org.apache.tsfile.file.metadata.PlainDeviceID; -import org.apache.tsfile.read.common.Path; import org.apache.tsfile.utils.FileGenerator; import org.junit.After; @@ -135,7 +133,7 @@ public void testCorrectness(int deviceNum, int measurementNum) throws IOExceptio expectedDeviceMeasurementChunkMetadataListMap .computeIfAbsent(device, d -> new HashMap<>()) .computeIfAbsent(measurement, m -> new ArrayList<>()) - .addAll(fileReader.getChunkMetadataList(new Path(device, measurement, true))); + .addAll(fileReader.getChunkMetadataList(device, measurement, false)); } } @@ -158,7 +156,8 @@ public void testCorrectness(int deviceNum, int measurementNum) throws IOExceptio // test not exist device Iterator>> iterator = - fileReader.getMeasurementChunkMetadataListMapIterator(new PlainDeviceID("dd")); + fileReader.getMeasurementChunkMetadataListMapIterator( + IDeviceID.Factory.DEFAULT_FACTORY.create("dd")); Assert.assertFalse(iterator.hasNext()); } diff --git a/java/tsfile/src/test/java/org/apache/tsfile/read/TimePlainEncodeReadTest.java b/java/tsfile/src/test/java/org/apache/tsfile/read/TimePlainEncodeReadTest.java index 402ddf3ba..a38419157 100755 --- a/java/tsfile/src/test/java/org/apache/tsfile/read/TimePlainEncodeReadTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/read/TimePlainEncodeReadTest.java @@ -50,9 +50,11 @@ public class TimePlainEncodeReadTest { private static final String fileName = FileGenerator.outputDataFile; private static TsFileReader roTsFile = null; + private String timeEncoder = null; @Before public void prepare() throws IOException { + timeEncoder = TSFileDescriptor.getInstance().getConfig().getTimeEncoder(); TSFileDescriptor.getInstance().getConfig().setTimeEncoder("PLAIN"); FileGenerator.generateFile(); TsFileSequenceReader reader = new TsFileSequenceReader(fileName); @@ -65,6 +67,7 @@ public void after() throws IOException { roTsFile.close(); } FileGenerator.after(); + TSFileDescriptor.getInstance().getConfig().setTimeEncoder(timeEncoder); } @Test diff --git a/java/tsfile/src/test/java/org/apache/tsfile/read/TsFileReaderTest.java b/java/tsfile/src/test/java/org/apache/tsfile/read/TsFileReaderTest.java index 19e48a0cf..58526ccdc 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/read/TsFileReaderTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/read/TsFileReaderTest.java @@ -26,7 +26,6 @@ import org.apache.tsfile.file.metadata.AlignedChunkMetadata; import org.apache.tsfile.file.metadata.IChunkMetadata; import org.apache.tsfile.file.metadata.IDeviceID; -import org.apache.tsfile.file.metadata.PlainDeviceID; import org.apache.tsfile.file.metadata.enums.CompressionType; import org.apache.tsfile.file.metadata.enums.TSEncoding; import org.apache.tsfile.read.common.Path; @@ -82,7 +81,7 @@ public void multiPagesTest() throws IOException, WriteProcessException { Path path = new Path("t", "id", true); tsFileWriter.registerTimeseries( - new Path(path.getDevice()), + new Path(path.getIDeviceID()), new MeasurementSchema("id", TSDataType.INT32, TSEncoding.PLAIN, CompressionType.LZ4)); for (int i = 0; i < 11000000; i++) { @@ -489,7 +488,7 @@ public void testGetAlignedChunkMetadata() throws IOException { String filePath = TsFileGeneratorForTest.alignedOutputDataFile; try (TsFileSequenceReader reader = new TsFileSequenceReader(filePath)) { // query for non-exist device - IDeviceID d3 = new PlainDeviceID("d3"); + IDeviceID d3 = IDeviceID.Factory.DEFAULT_FACTORY.create("d3"); try { reader.getAlignedChunkMetadata(d3); } catch (IOException e) { @@ -497,7 +496,7 @@ public void testGetAlignedChunkMetadata() throws IOException { } // query for non-aligned device - IDeviceID d2 = new PlainDeviceID("d2"); + IDeviceID d2 = IDeviceID.Factory.DEFAULT_FACTORY.create("d2"); try { reader.getAlignedChunkMetadata(d2); } catch (IOException e) { @@ -507,7 +506,7 @@ public void testGetAlignedChunkMetadata() throws IOException { String[] expected = new String[] {"s1", "s2", "s3", "s4"}; List chunkMetadataList = - reader.getAlignedChunkMetadata(new PlainDeviceID("d1")); + reader.getAlignedChunkMetadata(IDeviceID.Factory.DEFAULT_FACTORY.create("d1")); AlignedChunkMetadata alignedChunkMetadata = chunkMetadataList.get(0); Assert.assertEquals("", alignedChunkMetadata.getTimeChunkMetadata().getMeasurementUid()); int i = 0; diff --git a/java/tsfile/src/test/java/org/apache/tsfile/read/TsFileSequenceReaderTest.java b/java/tsfile/src/test/java/org/apache/tsfile/read/TsFileSequenceReaderTest.java index 0e4f5957d..e2198583f 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/read/TsFileSequenceReaderTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/read/TsFileSequenceReaderTest.java @@ -21,6 +21,7 @@ import org.apache.tsfile.common.conf.TSFileConfig; import org.apache.tsfile.common.conf.TSFileDescriptor; +import org.apache.tsfile.constant.TestConstant; import org.apache.tsfile.enums.TSDataType; import org.apache.tsfile.exception.write.WriteProcessException; import org.apache.tsfile.file.MetaMarker; @@ -28,15 +29,21 @@ import org.apache.tsfile.file.header.ChunkHeader; import org.apache.tsfile.file.header.PageHeader; import org.apache.tsfile.file.metadata.ChunkMetadata; +import org.apache.tsfile.file.metadata.IChunkMetadata; import org.apache.tsfile.file.metadata.IDeviceID; -import org.apache.tsfile.file.metadata.PlainDeviceID; +import org.apache.tsfile.file.metadata.IDeviceID.Factory; import org.apache.tsfile.file.metadata.enums.TSEncoding; import org.apache.tsfile.read.common.Path; +import org.apache.tsfile.utils.BloomFilter; import org.apache.tsfile.utils.FileGenerator; import org.apache.tsfile.utils.Pair; import org.apache.tsfile.utils.TsFileGeneratorUtils; import org.apache.tsfile.write.TsFileWriter; +import org.apache.tsfile.write.record.TSRecord; +import org.apache.tsfile.write.record.datapoint.DoubleDataPoint; +import org.apache.tsfile.write.schema.IMeasurementSchema; import org.apache.tsfile.write.schema.MeasurementSchema; +import org.apache.tsfile.write.schema.Schema; import org.junit.After; import org.junit.Assert; @@ -51,6 +58,10 @@ import java.util.List; import java.util.Map; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + public class TsFileSequenceReaderTest { private static final String FILE_PATH = FileGenerator.outputDataFile; @@ -119,7 +130,7 @@ public void testReadChunkMetadataInDevice() throws IOException { // test for exist device "d2" Map> chunkMetadataMap = - reader.readChunkMetadataInDevice(new PlainDeviceID("d2")); + reader.readChunkMetadataInDevice(IDeviceID.Factory.DEFAULT_FACTORY.create("d2")); int[] res = new int[] {20, 75, 100, 13}; Assert.assertEquals(4, chunkMetadataMap.size()); @@ -134,10 +145,48 @@ public void testReadChunkMetadataInDevice() throws IOException { } // test for non-exist device "d3" - Assert.assertTrue(reader.readChunkMetadataInDevice(new PlainDeviceID("d3")).isEmpty()); + assertTrue( + reader.readChunkMetadataInDevice(IDeviceID.Factory.DEFAULT_FACTORY.create("d3")).isEmpty()); reader.close(); } + @Test + public void testReadChunkMetadataInSimilarDevice() throws IOException, WriteProcessException { + File testFile = new File(TestConstant.BASE_OUTPUT_PATH + "test.tsfile"); + try (TsFileWriter writer = new TsFileWriter(testFile)) { + IDeviceID deviceID = Factory.DEFAULT_FACTORY.create("root.topic1"); + writer.registerTimeseries(deviceID, new MeasurementSchema("s", TSDataType.DOUBLE)); + TSRecord record = new TSRecord(0, deviceID); + record.addTuple(new DoubleDataPoint("s", 0.0)); + writer.write(record); + } + + TsFileSequenceReader reader = new TsFileSequenceReader(testFile.getAbsolutePath()); + + List iChunkMetadataList = + reader.getIChunkMetadataList(Factory.DEFAULT_FACTORY.create("root.topic2"), "s"); + assertTrue(iChunkMetadataList.isEmpty()); + } + + @Test + public void testReadBloomFilter() throws IOException, WriteProcessException { + File testFile = new File(TestConstant.BASE_OUTPUT_PATH + "test.tsfile"); + IDeviceID deviceID = Factory.DEFAULT_FACTORY.create("root.topic1"); + try (TsFileWriter writer = new TsFileWriter(testFile)) { + writer.registerTimeseries(deviceID, new MeasurementSchema("s", TSDataType.DOUBLE)); + TSRecord record = new TSRecord(0, deviceID); + record.addTuple(new DoubleDataPoint("s", 0.0)); + writer.write(record); + } + + TsFileSequenceReader reader = new TsFileSequenceReader(testFile.getAbsolutePath()); + BloomFilter bloomFilter = reader.readBloomFilter(); + assertNotNull(bloomFilter); + assertTrue(bloomFilter.contains(deviceID.toString() + ".s")); + assertFalse( + bloomFilter.contains(Factory.DEFAULT_FACTORY.create("root.topic2").toString() + ".s")); + } + @Test public void testReadEmptyPageInSelfCheck() throws IOException, WriteProcessException { int oldMaxPagePointNum = @@ -148,14 +197,14 @@ public void testReadEmptyPageInSelfCheck() throws IOException, WriteProcessExcep // create tsfile with empty page try (TsFileWriter tsFileWriter = new TsFileWriter(testFile)) { // register aligned timeseries - List alignedMeasurementSchemas = new ArrayList<>(); + List alignedMeasurementSchemas = new ArrayList<>(); alignedMeasurementSchemas.add( new MeasurementSchema("s1", TSDataType.INT64, TSEncoding.PLAIN)); alignedMeasurementSchemas.add( new MeasurementSchema("s2", TSDataType.INT64, TSEncoding.PLAIN)); tsFileWriter.registerAlignedTimeseries(new Path("d1"), alignedMeasurementSchemas); - List writeMeasurementScheams = new ArrayList<>(); + List writeMeasurementScheams = new ArrayList<>(); // only write s1 writeMeasurementScheams.add(alignedMeasurementSchemas.get(0)); TsFileGeneratorUtils.writeWithTsRecord( @@ -173,7 +222,7 @@ public void testReadEmptyPageInSelfCheck() throws IOException, WriteProcessExcep try (TsFileSequenceReader reader = new TsFileSequenceReader(FILE_PATH)) { Assert.assertEquals( TsFileCheckStatus.COMPLETE_FILE, - reader.selfCheck(new HashMap<>(), new ArrayList<>(), false)); + reader.selfCheck(new Schema(), new ArrayList<>(), false)); } } } diff --git a/java/tsfile/src/test/java/org/apache/tsfile/read/common/PathTest.java b/java/tsfile/src/test/java/org/apache/tsfile/read/common/PathTest.java index 8df524160..6ea9defa5 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/read/common/PathTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/read/common/PathTest.java @@ -19,114 +19,132 @@ package org.apache.tsfile.read.common; import org.apache.tsfile.exception.PathParseException; +import org.apache.tsfile.file.metadata.IDeviceID; +import org.apache.tsfile.file.metadata.IDeviceID.Deserializer; +import org.apache.tsfile.file.metadata.IDeviceID.Factory; +import org.apache.tsfile.file.metadata.StringArrayDeviceID; import org.apache.tsfile.read.common.parser.PathVisitor; import org.junit.Assert; import org.junit.Test; +import java.nio.ByteBuffer; + +import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; public class PathTest { + + @Test + public void testDeviceIdWithNull() { + IDeviceID deviceID = Factory.DEFAULT_FACTORY.create(new String[] {"table1", null, "id2"}); + ByteBuffer buffer = ByteBuffer.allocate(128); + deviceID.serialize(buffer); + buffer.flip(); + IDeviceID deserialized = Deserializer.DEFAULT_DESERIALIZER.deserializeFrom(buffer); + assertEquals(deviceID, deserialized); + } + @Test public void testLegalPath() { // empty path Path a = new Path("", true); - Assert.assertEquals("", a.getDevice()); + Assert.assertEquals(new StringArrayDeviceID(""), a.getIDeviceID()); Assert.assertEquals("", a.getMeasurement()); // empty device Path b = new Path("s1", true); Assert.assertEquals("s1", b.getMeasurement()); - Assert.assertEquals("", b.getDevice()); + Assert.assertEquals(new StringArrayDeviceID(""), b.getIDeviceID()); // normal node Path c = new Path("root.sg.a", true); - Assert.assertEquals("root.sg", c.getDevice()); + Assert.assertEquals("root.sg", c.getDeviceString()); Assert.assertEquals("a", c.getMeasurement()); // quoted node Path d = new Path("root.sg.`a.b`", true); - Assert.assertEquals("root.sg", d.getDevice()); + Assert.assertEquals("root.sg", d.getDeviceString()); Assert.assertEquals("`a.b`", d.getMeasurement()); Path e = new Path("root.sg.`a.``b`", true); - Assert.assertEquals("root.sg", e.getDevice()); + Assert.assertEquals("root.sg", e.getDeviceString()); Assert.assertEquals("`a.``b`", e.getMeasurement()); Path f = new Path("root.`sg\"`.`a.``b`", true); - Assert.assertEquals("root.`sg\"`", f.getDevice()); + Assert.assertEquals("root.`sg\"`", f.getDeviceString()); Assert.assertEquals("`a.``b`", f.getMeasurement()); Path g = new Path("root.sg.`a.b\\\\`", true); - Assert.assertEquals("root.sg", g.getDevice()); + Assert.assertEquals("root.sg", g.getDeviceString()); Assert.assertEquals("`a.b\\\\`", g.getMeasurement()); // quoted node of digits Path h = new Path("root.sg.`111`", true); - Assert.assertEquals("root.sg", h.getDevice()); + Assert.assertEquals("root.sg", h.getDeviceString()); Assert.assertEquals("`111`", h.getMeasurement()); // quoted node of key word Path i = new Path("root.sg.`select`", true); - Assert.assertEquals("root.sg", i.getDevice()); + Assert.assertEquals("root.sg", i.getDeviceString()); Assert.assertEquals("select", i.getMeasurement()); // wildcard Path j = new Path("root.sg.`a*b`", true); - Assert.assertEquals("root.sg", j.getDevice()); + Assert.assertEquals("root.sg", j.getDeviceString()); Assert.assertEquals("`a*b`", j.getMeasurement()); Path k = new Path("root.sg.*", true); - Assert.assertEquals("root.sg", k.getDevice()); + Assert.assertEquals("root.sg", k.getDeviceString()); Assert.assertEquals("*", k.getMeasurement()); Path l = new Path("root.sg.**", true); - Assert.assertEquals("root.sg", l.getDevice()); + Assert.assertEquals("root.sg", l.getDeviceString()); Assert.assertEquals("**", l.getMeasurement()); // raw key word Path m = new Path("root.sg.select", true); - Assert.assertEquals("root.sg", m.getDevice()); + Assert.assertEquals("root.sg", m.getDeviceString()); Assert.assertEquals("select", m.getMeasurement()); Path n = new Path("root.sg.device", true); - Assert.assertEquals("root.sg", n.getDevice()); + Assert.assertEquals("root.sg", n.getDeviceString()); Assert.assertEquals("device", n.getMeasurement()); Path o = new Path("root.sg.drop_trigger", true); - Assert.assertEquals("root.sg", o.getDevice()); + Assert.assertEquals("root.sg", o.getDeviceString()); Assert.assertEquals("drop_trigger", o.getMeasurement()); Path p = new Path("root.sg.and", true); - Assert.assertEquals("root.sg", p.getDevice()); + Assert.assertEquals("root.sg", p.getDeviceString()); Assert.assertEquals("and", p.getMeasurement()); p = new Path("root.sg.or", true); - Assert.assertEquals("root.sg", p.getDevice()); + Assert.assertEquals("root.sg", p.getDeviceString()); Assert.assertEquals("or", p.getMeasurement()); p = new Path("root.sg.not", true); - Assert.assertEquals("root.sg", p.getDevice()); + Assert.assertEquals("root.sg", p.getDeviceString()); Assert.assertEquals("not", p.getMeasurement()); p = new Path("root.sg.null", true); - Assert.assertEquals("root.sg", p.getDevice()); + Assert.assertEquals("root.sg", p.getDeviceString()); Assert.assertEquals("null", p.getMeasurement()); p = new Path("root.sg.contains", true); - Assert.assertEquals("root.sg", p.getDevice()); + Assert.assertEquals("root.sg", p.getDeviceString()); Assert.assertEquals("contains", p.getMeasurement()); p = new Path("root.sg.`0000`", true); - Assert.assertEquals("root.sg", p.getDevice()); + Assert.assertEquals("root.sg", p.getDeviceString()); Assert.assertEquals("`0000`", p.getMeasurement()); p = new Path("root.sg.`0e38`", true); - Assert.assertEquals("root.sg", p.getDevice()); + Assert.assertEquals("root.sg", p.getDeviceString()); Assert.assertEquals("`0e38`", p.getMeasurement()); p = new Path("root.sg.`00.12`", true); - Assert.assertEquals("root.sg", p.getDevice()); + Assert.assertEquals("root.sg", p.getDeviceString()); Assert.assertEquals("`00.12`", p.getMeasurement()); } diff --git a/java/tsfile/src/test/java/org/apache/tsfile/read/reader/AlignedChunkReaderWithoutStatisticsTest.java b/java/tsfile/src/test/java/org/apache/tsfile/read/reader/AlignedChunkReaderWithoutStatisticsTest.java index b3dfc5d5e..510ac1fee 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/read/reader/AlignedChunkReaderWithoutStatisticsTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/read/reader/AlignedChunkReaderWithoutStatisticsTest.java @@ -25,7 +25,7 @@ import org.apache.tsfile.file.metadata.AlignedChunkMetadata; import org.apache.tsfile.file.metadata.ChunkMetadata; import org.apache.tsfile.file.metadata.IChunkMetadata; -import org.apache.tsfile.file.metadata.PlainDeviceID; +import org.apache.tsfile.file.metadata.IDeviceID.Factory; import org.apache.tsfile.read.TsFileSequenceReader; import org.apache.tsfile.read.common.Chunk; import org.apache.tsfile.read.reader.chunk.AlignedChunkReader; @@ -98,7 +98,7 @@ public void testChunkReaderWithoutStatistics() throws IOException { for (int i = 0; i < deviceNum; i++) { final List chunkMetadataList = tsFileSequenceReader.getAlignedChunkMetadata( - new PlainDeviceID(testStorageGroup + PATH_SEPARATOR + "d1000" + i)); + Factory.DEFAULT_FACTORY.create(testStorageGroup + PATH_SEPARATOR + "d1000" + i)); for (final AlignedChunkMetadata chunkMetadata : chunkMetadataList) { Chunk timeChunk = tsFileSequenceReader.readMemChunk( diff --git a/java/tsfile/src/test/java/org/apache/tsfile/tableview/PerformanceTest.java b/java/tsfile/src/test/java/org/apache/tsfile/tableview/PerformanceTest.java new file mode 100644 index 000000000..f19ed613c --- /dev/null +++ b/java/tsfile/src/test/java/org/apache/tsfile/tableview/PerformanceTest.java @@ -0,0 +1,372 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.tableview; + +import org.apache.tsfile.enums.TSDataType; +import org.apache.tsfile.exception.read.ReadProcessException; +import org.apache.tsfile.exception.write.WriteProcessException; +import org.apache.tsfile.file.metadata.IDeviceID; +import org.apache.tsfile.file.metadata.IDeviceID.Factory; +import org.apache.tsfile.file.metadata.StringArrayDeviceID; +import org.apache.tsfile.file.metadata.TableSchema; +import org.apache.tsfile.file.metadata.enums.CompressionType; +import org.apache.tsfile.file.metadata.enums.TSEncoding; +import org.apache.tsfile.read.TsFileSequenceReader; +import org.apache.tsfile.read.common.Path; +import org.apache.tsfile.read.common.block.TsBlock; +import org.apache.tsfile.read.controller.CachedChunkLoaderImpl; +import org.apache.tsfile.read.controller.MetadataQuerierByFileImpl; +import org.apache.tsfile.read.expression.QueryExpression; +import org.apache.tsfile.read.query.dataset.QueryDataSet; +import org.apache.tsfile.read.query.executor.QueryExecutor; +import org.apache.tsfile.read.query.executor.TableQueryExecutor; +import org.apache.tsfile.read.query.executor.TableQueryExecutor.TableQueryOrdering; +import org.apache.tsfile.read.query.executor.TsFileExecutor; +import org.apache.tsfile.read.reader.block.TsBlockReader; +import org.apache.tsfile.utils.Pair; +import org.apache.tsfile.write.TsFileWriter; +import org.apache.tsfile.write.record.Tablet; +import org.apache.tsfile.write.record.Tablet.ColumnType; +import org.apache.tsfile.write.schema.IMeasurementSchema; +import org.apache.tsfile.write.schema.MeasurementSchema; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +import static org.junit.Assert.assertTrue; + +public class PerformanceTest { + + private final String testDir = "target" + File.separator + "tableViewTest"; + private final int idSchemaCnt = 3; + private final int measurementSchemaCnt = 100; + private final int tableCnt = 100; + private final int devicePerTable = 10; + private final int pointPerSeries = 100; + private final int tabletCnt = 100; + + private List idSchemas; + private List measurementSchemas; + + private List registerTimeList = new ArrayList<>(); + private List writeTimeList = new ArrayList<>(); + private List closeTimeList = new ArrayList<>(); + private List queryTimeList = new ArrayList<>(); + private List fileSizeList = new ArrayList<>(); + + public static void main(String[] args) throws Exception { + final PerformanceTest test = new PerformanceTest(); + test.initSchemas(); + + int repetitionCnt = 10; + for (int i = 0; i < repetitionCnt; i++) { + test.testTable(); + // test.testTree(); + } + + final double registerTime = + test.registerTimeList.subList(repetitionCnt / 2, repetitionCnt).stream() + .mapToLong(l -> l) + .average() + .orElse(0.0f); + final double writeTime = + test.writeTimeList.subList(repetitionCnt / 2, repetitionCnt).stream() + .mapToLong(l -> l) + .average() + .orElse(0.0f); + final double closeTime = + test.closeTimeList.subList(repetitionCnt / 2, repetitionCnt).stream() + .mapToLong(l -> l) + .average() + .orElse(0.0f); + final double queryTime = + test.queryTimeList.subList(repetitionCnt / 2, repetitionCnt).stream() + .mapToLong(l -> l) + .average() + .orElse(0.0f); + final double fileSize = + test.fileSizeList.subList(repetitionCnt / 2, repetitionCnt).stream() + .mapToLong(l -> l) + .average() + .orElse(0.0f); + System.out.printf( + "Register %fns, write %fns, close %fns, query %fns, fileSize %f %n", + registerTime, writeTime, closeTime, queryTime, fileSize); + } + + private void initSchemas() { + idSchemas = new ArrayList<>(idSchemaCnt); + for (int i = 0; i < idSchemaCnt; i++) { + idSchemas.add( + new MeasurementSchema( + "id" + i, TSDataType.TEXT, TSEncoding.PLAIN, CompressionType.UNCOMPRESSED)); + } + + measurementSchemas = new ArrayList<>(); + for (int i = 0; i < measurementSchemaCnt; i++) { + measurementSchemas.add( + new MeasurementSchema( + "s" + i, TSDataType.INT64, TSEncoding.GORILLA, CompressionType.LZ4)); + } + } + + private void testTree() throws IOException, WriteProcessException { + long registerTimeSum = 0; + long writeTimeSum = 0; + long closeTimeSum = 0; + long queryTimeSum = 0; + long startTime; + final File file = initFile(); + TsFileWriter tsFileWriter = new TsFileWriter(file); + try { + startTime = System.nanoTime(); + registerTree(tsFileWriter); + registerTimeSum = System.nanoTime() - startTime; + Tablet tablet = initTreeTablet(); + for (int tableNum = 0; tableNum < tableCnt; tableNum++) { + for (int deviceNum = 0; deviceNum < devicePerTable; deviceNum++) { + for (int tabletNum = 0; tabletNum < tabletCnt; tabletNum++) { + fillTreeTablet(tablet, tableNum, deviceNum, tabletNum); + startTime = System.nanoTime(); + tsFileWriter.writeAligned(tablet); + writeTimeSum += System.nanoTime() - startTime; + } + } + } + } finally { + startTime = System.nanoTime(); + tsFileWriter.close(); + closeTimeSum = System.nanoTime() - startTime; + } + long fileSize = file.length(); + + startTime = System.nanoTime(); + try (TsFileSequenceReader sequenceReader = new TsFileSequenceReader(file.getAbsolutePath())) { + QueryExecutor queryExecutor = + new TsFileExecutor( + new MetadataQuerierByFileImpl(sequenceReader), + new CachedChunkLoaderImpl(sequenceReader)); + + List selectedSeries = new ArrayList<>(); + for (int i = 0; i < measurementSchemaCnt; i++) { + for (int j = 0; j < devicePerTable; j++) { + selectedSeries.add(new Path(genTreeDeviceId(tableCnt / 2, j), "s" + i, false)); + } + } + final QueryExpression queryExpression = QueryExpression.create(selectedSeries, null); + final QueryDataSet queryDataSet = queryExecutor.execute(queryExpression); + int cnt = 0; + while (queryDataSet.hasNext()) { + queryDataSet.next(); + cnt++; + } + } + queryTimeSum = System.nanoTime() - startTime; + file.delete(); + + System.out.printf( + "Tree register %dns, write %dns, close %dns, query %dns, fileSize %d %n", + registerTimeSum, writeTimeSum, closeTimeSum, queryTimeSum, fileSize); + registerTimeList.add(registerTimeSum); + writeTimeList.add(writeTimeSum); + closeTimeList.add(closeTimeSum); + queryTimeList.add(queryTimeSum); + fileSizeList.add(fileSize); + } + + private void testTable() throws IOException, WriteProcessException, ReadProcessException { + long registerTimeSum = 0; + long writeTimeSum = 0; + long closeTimeSum = 0; + long queryTimeSum = 0; + long startTime; + final File file = initFile(); + TsFileWriter tsFileWriter = new TsFileWriter(file); + try { + startTime = System.nanoTime(); + registerTable(tsFileWriter); + registerTimeSum = System.nanoTime() - startTime; + Tablet tablet = initTableTablet(); + for (int tableNum = 0; tableNum < tableCnt; tableNum++) { + for (int deviceNum = 0; deviceNum < devicePerTable; deviceNum++) { + for (int tabletNum = 0; tabletNum < tabletCnt; tabletNum++) { + fillTableTablet(tablet, tableNum, deviceNum, tabletNum); + startTime = System.nanoTime(); + tsFileWriter.writeTable( + tablet, + Collections.singletonList(new Pair<>(tablet.getDeviceID(0), tablet.rowSize))); + writeTimeSum += System.nanoTime() - startTime; + } + } + } + } finally { + startTime = System.nanoTime(); + tsFileWriter.close(); + closeTimeSum = System.nanoTime() - startTime; + } + long fileSize = file.length(); + + startTime = System.nanoTime(); + try (TsFileSequenceReader sequenceReader = new TsFileSequenceReader(file.getAbsolutePath())) { + TableQueryExecutor tableQueryExecutor = + new TableQueryExecutor( + new MetadataQuerierByFileImpl(sequenceReader), + new CachedChunkLoaderImpl(sequenceReader), + TableQueryOrdering.DEVICE); + + List columns = + measurementSchemas.stream() + .map(IMeasurementSchema::getMeasurementId) + .collect(Collectors.toList()); + TsBlockReader reader = + tableQueryExecutor.query(genTableName(tableCnt / 2), columns, null, null, null); + assertTrue(reader.hasNext()); + int cnt = 0; + while (reader.hasNext()) { + final TsBlock result = reader.next(); + cnt += result.getPositionCount(); + } + } + file.delete(); + queryTimeSum = System.nanoTime() - startTime; + + System.out.printf( + "Table register %dns, write %dns, close %dns, query %dns, fileSize %d %n", + registerTimeSum, writeTimeSum, closeTimeSum, queryTimeSum, fileSize); + registerTimeList.add(registerTimeSum); + writeTimeList.add(writeTimeSum); + closeTimeList.add(closeTimeSum); + queryTimeList.add(queryTimeSum); + fileSizeList.add(fileSize); + } + + private File initFile() throws IOException { + File dir = new File(testDir); + dir.mkdirs(); + return new File(dir, "testTsFile"); + } + + private Tablet initTreeTablet() { + return new Tablet(null, measurementSchemas, pointPerSeries); + } + + private void fillTreeTablet(Tablet tablet, int tableNum, int deviceNum, int tabletNum) { + tablet.setDeviceId(genTreeDeviceId(tableNum, deviceNum).toString()); + for (int i = 0; i < measurementSchemaCnt; i++) { + long[] values = (long[]) tablet.values[i]; + for (int valNum = 0; valNum < pointPerSeries; valNum++) { + values[valNum] = (long) tabletNum * pointPerSeries + valNum; + } + } + for (int valNum = 0; valNum < pointPerSeries; valNum++) { + tablet.timestamps[valNum] = (long) tabletNum * pointPerSeries + valNum; + } + tablet.rowSize = pointPerSeries; + } + + private Tablet initTableTablet() { + List allSchema = new ArrayList<>(idSchemas); + List columnTypes = ColumnType.nCopy(ColumnType.ID, idSchemas.size()); + allSchema.addAll(measurementSchemas); + columnTypes.addAll(ColumnType.nCopy(ColumnType.MEASUREMENT, measurementSchemaCnt)); + return new Tablet(null, allSchema, columnTypes, pointPerSeries); + } + + private void fillTableTablet(Tablet tablet, int tableNum, int deviceNum, int tabletNum) { + IDeviceID deviceID = genTableDeviceId(tableNum, deviceNum); + tablet.setTableName(deviceID.segment(0).toString()); + for (int i = 0; i < idSchemaCnt; i++) { + String[] strings = ((String[]) tablet.values[i]); + for (int rowNum = 0; rowNum < pointPerSeries; rowNum++) { + strings[rowNum] = deviceID.segment(i + 1).toString(); + } + } + for (int i = 0; i < measurementSchemaCnt; i++) { + long[] values = (long[]) tablet.values[i + idSchemaCnt]; + for (int valNum = 0; valNum < pointPerSeries; valNum++) { + values[valNum] = (long) tabletNum * pointPerSeries + valNum; + } + } + for (int valNum = 0; valNum < pointPerSeries; valNum++) { + tablet.timestamps[valNum] = (long) tabletNum * pointPerSeries + valNum; + } + tablet.rowSize = pointPerSeries; + } + + private void registerTree(TsFileWriter writer) throws WriteProcessException { + for (int tableNum = 0; tableNum < tableCnt; tableNum++) { + for (int deviceNum = 0; deviceNum < devicePerTable; deviceNum++) { + writer.registerAlignedTimeseries(genTreeDeviceId(tableNum, deviceNum), measurementSchemas); + } + } + } + + private IMeasurementSchema genMeasurementSchema(int measurementNum) { + return measurementSchemas.get(measurementNum); + } + + private IMeasurementSchema genIdSchema(int idNum) { + return idSchemas.get(idNum); + } + + private String genTableName(int tableNum) { + return "table_" + tableNum; + } + + private IDeviceID genTableDeviceId(int tableNum, int deviceNum) { + String[] idSegments = new String[idSchemaCnt + 1]; + idSegments[0] = genTableName(tableNum); + for (int i = 0; i < idSchemaCnt; i++) { + idSegments[i + 1] = "0"; + } + idSegments[idSchemaCnt] = Integer.toString(deviceNum); + return new StringArrayDeviceID(idSegments); + } + + private IDeviceID genTreeDeviceId(int tableNum, int deviceNum) { + return Factory.DEFAULT_FACTORY.create(genTableDeviceId(tableNum, deviceNum).toString()); + } + + private void registerTable(TsFileWriter writer) { + for (int i = 0; i < tableCnt; i++) { + TableSchema tableSchema = genTableSchema(i); + writer.registerTableSchema(tableSchema); + } + } + + private TableSchema genTableSchema(int tableNum) { + List measurementSchemas = new ArrayList<>(); + List columnTypes = new ArrayList<>(); + + for (int i = 0; i < idSchemaCnt; i++) { + measurementSchemas.add(genIdSchema(i)); + columnTypes.add(ColumnType.ID); + } + for (int i = 0; i < measurementSchemaCnt; i++) { + measurementSchemas.add(genMeasurementSchema(i)); + columnTypes.add(ColumnType.MEASUREMENT); + } + return new TableSchema(genTableName(tableNum), measurementSchemas, columnTypes); + } +} diff --git a/java/tsfile/src/test/java/org/apache/tsfile/tableview/TableSchemaTest.java b/java/tsfile/src/test/java/org/apache/tsfile/tableview/TableSchemaTest.java new file mode 100644 index 000000000..84261ed1f --- /dev/null +++ b/java/tsfile/src/test/java/org/apache/tsfile/tableview/TableSchemaTest.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.tableview; + +import org.apache.tsfile.compatibility.DeserializeConfig; +import org.apache.tsfile.enums.TSDataType; +import org.apache.tsfile.file.metadata.ChunkGroupMetadata; +import org.apache.tsfile.file.metadata.ChunkMetadata; +import org.apache.tsfile.file.metadata.LogicalTableSchema; +import org.apache.tsfile.file.metadata.StringArrayDeviceID; +import org.apache.tsfile.file.metadata.TableSchema; +import org.apache.tsfile.file.metadata.enums.CompressionType; +import org.apache.tsfile.file.metadata.enums.TSEncoding; +import org.apache.tsfile.write.record.Tablet.ColumnType; +import org.apache.tsfile.write.schema.IMeasurementSchema; +import org.apache.tsfile.write.schema.MeasurementSchema; + +import org.junit.Test; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; + +import static org.apache.tsfile.write.record.Tablet.ColumnType.ID; +import static org.apache.tsfile.write.record.Tablet.ColumnType.MEASUREMENT; +import static org.junit.Assert.assertEquals; + +public class TableSchemaTest { + + private String tableName = "test_table"; + private int idSchemaCnt = 5; + private int measurementSchemaCnt = 5; + + public static List prepareIdSchemas(int schemaNum) { + List schemas = new ArrayList<>(); + for (int i = 0; i < schemaNum; i++) { + final MeasurementSchema measurementSchema = + new MeasurementSchema( + "__level" + i, TSDataType.TEXT, TSEncoding.PLAIN, CompressionType.UNCOMPRESSED); + schemas.add(measurementSchema); + } + return schemas; + } + + public static List prepareMeasurementSchemas(int schemaNum) { + List schemas = new ArrayList<>(); + for (int i = 0; i < schemaNum; i++) { + final MeasurementSchema measurementSchema = + new MeasurementSchema( + "s" + i, TSDataType.INT64, TSEncoding.PLAIN, CompressionType.UNCOMPRESSED); + schemas.add(measurementSchema); + } + return schemas; + } + + @Test + public void testTableSchema() throws IOException { + final List measurementSchemas = prepareIdSchemas(idSchemaCnt); + measurementSchemas.addAll(prepareMeasurementSchemas(measurementSchemaCnt)); + final List columnTypes = ColumnType.nCopy(ID, idSchemaCnt); + columnTypes.addAll(ColumnType.nCopy(MEASUREMENT, measurementSchemaCnt)); + TableSchema tableSchema = new TableSchema(tableName, measurementSchemas, columnTypes); + + try (ByteArrayOutputStream stream = new ByteArrayOutputStream()) { + tableSchema.serialize(stream); + final ByteBuffer buffer = ByteBuffer.wrap(stream.toByteArray()); + final TableSchema deserialized = TableSchema.deserialize(buffer, new DeserializeConfig()); + deserialized.setTableName(tableName); + assertEquals(tableSchema, deserialized); + } + } + + @Test + public void testLogicalTableSchema() throws IOException { + TableSchema tableSchema = new LogicalTableSchema(tableName); + for (int i = 0; i < measurementSchemaCnt; i++) { + List chunkMetadataList = new ArrayList<>(); + for (int chunkNum = 0; chunkNum <= i; chunkNum++) { + chunkMetadataList.add( + new ChunkMetadata( + "s" + chunkNum, + TSDataType.INT64, + TSEncoding.PLAIN, + CompressionType.UNCOMPRESSED, + 0, + null)); + } + ChunkGroupMetadata groupMetadata = + new ChunkGroupMetadata( + new StringArrayDeviceID("root.a.b" + ".c.d" + i), chunkMetadataList); + tableSchema.update(groupMetadata); + } + assertEquals(measurementSchemaCnt, tableSchema.getColumnSchemas().size()); + + try (ByteArrayOutputStream stream = new ByteArrayOutputStream()) { + tableSchema.serialize(stream); + final ByteBuffer buffer = ByteBuffer.wrap(stream.toByteArray()); + final TableSchema deserialized = TableSchema.deserialize(buffer, new DeserializeConfig()); + deserialized.setTableName(tableName); + assertEquals(tableSchema, deserialized); + assertEquals(measurementSchemaCnt + 2, deserialized.getColumnSchemas().size()); + } + } +} diff --git a/java/tsfile/src/test/java/org/apache/tsfile/tableview/TableViewTest.java b/java/tsfile/src/test/java/org/apache/tsfile/tableview/TableViewTest.java new file mode 100644 index 000000000..95fcc66bb --- /dev/null +++ b/java/tsfile/src/test/java/org/apache/tsfile/tableview/TableViewTest.java @@ -0,0 +1,402 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.tableview; + +import org.apache.tsfile.enums.TSDataType; +import org.apache.tsfile.exception.read.ReadProcessException; +import org.apache.tsfile.exception.write.WriteProcessException; +import org.apache.tsfile.file.metadata.IDeviceID; +import org.apache.tsfile.file.metadata.IDeviceID.Factory; +import org.apache.tsfile.file.metadata.TableSchema; +import org.apache.tsfile.file.metadata.enums.CompressionType; +import org.apache.tsfile.file.metadata.enums.TSEncoding; +import org.apache.tsfile.read.TsFileSequenceReader; +import org.apache.tsfile.read.common.Path; +import org.apache.tsfile.read.common.RowRecord; +import org.apache.tsfile.read.common.block.TsBlock; +import org.apache.tsfile.read.controller.CachedChunkLoaderImpl; +import org.apache.tsfile.read.controller.MetadataQuerierByFileImpl; +import org.apache.tsfile.read.expression.QueryExpression; +import org.apache.tsfile.read.query.dataset.QueryDataSet; +import org.apache.tsfile.read.query.executor.QueryExecutor; +import org.apache.tsfile.read.query.executor.TableQueryExecutor; +import org.apache.tsfile.read.query.executor.TableQueryExecutor.TableQueryOrdering; +import org.apache.tsfile.read.query.executor.TsFileExecutor; +import org.apache.tsfile.read.reader.block.TsBlockReader; +import org.apache.tsfile.utils.Binary; +import org.apache.tsfile.utils.TsFileSketchTool; +import org.apache.tsfile.write.TsFileWriter; +import org.apache.tsfile.write.record.TSRecord; +import org.apache.tsfile.write.record.Tablet; +import org.apache.tsfile.write.record.Tablet.ColumnType; +import org.apache.tsfile.write.record.datapoint.LongDataPoint; +import org.apache.tsfile.write.schema.IMeasurementSchema; +import org.apache.tsfile.write.schema.MeasurementSchema; + +import org.apache.commons.io.FileUtils; +import org.junit.After; +import org.junit.Before; +import org.junit.Ignore; +import org.junit.Test; + +import java.io.File; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class TableViewTest { + + private final String testDir = "target" + File.separator + "tableViewTest"; + private final int idSchemaNum = 5; + private final int measurementSchemaNum = 5; + private TableSchema testTableSchema; + + @Before + public void setUp() throws Exception { + new File(testDir).mkdirs(); + testTableSchema = genTableSchema(0); + } + + @After + public void tearDown() throws Exception { + FileUtils.deleteDirectory(new File(testDir)); + } + + @Test + public void tabletSerializationTest() throws IOException { + final Tablet tablet = genTablet(testTableSchema, 0, 100); + ByteBuffer buffer = tablet.serialize(); + Tablet deserialized = Tablet.deserialize(buffer); + assertEquals(tablet, deserialized); + } + + @Test + public void testWriteOneTable() throws IOException, WriteProcessException, ReadProcessException { + final File testFile = new File(testDir, "testFile"); + TsFileWriter writer = new TsFileWriter(testFile); + writer.setGenerateTableSchema(true); + writer.registerTableSchema(testTableSchema); + + writer.writeTable(genTablet(testTableSchema, 0, 100)); + writer.close(); + + TsFileSequenceReader sequenceReader = new TsFileSequenceReader(testFile.getAbsolutePath()); + TableQueryExecutor tableQueryExecutor = + new TableQueryExecutor( + new MetadataQuerierByFileImpl(sequenceReader), + new CachedChunkLoaderImpl(sequenceReader), + TableQueryOrdering.DEVICE); + + final List columns = + testTableSchema.getColumnSchemas().stream() + .map(IMeasurementSchema::getMeasurementId) + .collect(Collectors.toList()); + final TsBlockReader reader = + tableQueryExecutor.query(testTableSchema.getTableName(), columns, null, null, null); + assertTrue(reader.hasNext()); + int cnt = 0; + while (reader.hasNext()) { + final TsBlock result = reader.next(); + for (int i = 0; i < result.getPositionCount(); i++) { + String col = result.getColumn(0).getObject(i).toString(); + for (int j = 1; j < testTableSchema.getColumnSchemas().size(); j++) { + assertEquals(col, result.getColumn(j).getObject(i).toString()); + } + } + cnt += result.getPositionCount(); + } + assertEquals(100, cnt); + } + + @Test + public void testWriteMultipleTables() throws Exception { + final File testFile = new File(testDir, "testFile"); + TsFileWriter writer = new TsFileWriter(testFile); + writer.setGenerateTableSchema(true); + List tableSchemas = new ArrayList<>(); + + int tableNum = 10; + for (int i = 0; i < tableNum; i++) { + final TableSchema tableSchema = genTableSchema(i); + tableSchemas.add(tableSchema); + writer.registerTableSchema(tableSchema); + } + + for (int i = 0; i < tableNum; i++) { + writer.writeTable(genTablet(tableSchemas.get(i), 0, 100)); + } + writer.close(); + + TsFileSequenceReader sequenceReader = new TsFileSequenceReader(testFile.getAbsolutePath()); + TableQueryExecutor tableQueryExecutor = + new TableQueryExecutor( + new MetadataQuerierByFileImpl(sequenceReader), + new CachedChunkLoaderImpl(sequenceReader), + TableQueryOrdering.DEVICE); + + final List columns = + testTableSchema.getColumnSchemas().stream() + .map(IMeasurementSchema::getMeasurementId) + .collect(Collectors.toList()); + + for (int i = 0; i < tableNum; i++) { + int cnt; + try (TsBlockReader reader = + tableQueryExecutor.query(tableSchemas.get(i).getTableName(), columns, null, null, null)) { + assertTrue(reader.hasNext()); + cnt = 0; + while (reader.hasNext()) { + final TsBlock result = reader.next(); + cnt += result.getPositionCount(); + } + } + assertEquals(100, cnt); + } + } + + @Ignore + @Test + public void testSketch() throws Exception { + final File testFile = new File(testDir, "testFile"); + TsFileWriter writer = new TsFileWriter(testFile); + writer.setGenerateTableSchema(true); + // table-view registration + writer.registerTableSchema(testTableSchema); + // tree-view registration + IDeviceID deviceID = Factory.DEFAULT_FACTORY.create("root.a.b.c.d1"); + List treeSchemas = new ArrayList<>(); + for (int i = 0; i < measurementSchemaNum; i++) { + final MeasurementSchema measurementSchema = + new MeasurementSchema( + "s" + i, TSDataType.INT64, TSEncoding.PLAIN, CompressionType.UNCOMPRESSED); + treeSchemas.add(measurementSchema); + writer.registerTimeseries(deviceID, measurementSchema); + } + IDeviceID deviceIDAligned = Factory.DEFAULT_FACTORY.create("root.a.b.c.d2"); + writer.registerAlignedTimeseries(deviceIDAligned, treeSchemas); + + // table-view write + final Tablet tablet = genTablet(testTableSchema, 0, 5); + writer.writeTable(tablet); + // tree-view write + for (int i = 0; i < 50; i++) { + final TSRecord tsRecord = new TSRecord(i, deviceID); + for (int j = 0; j < measurementSchemaNum; j++) { + tsRecord.addTuple(new LongDataPoint("s" + j, i)); + } + writer.write(tsRecord); + tsRecord.deviceId = deviceIDAligned; + writer.writeAligned(tsRecord); + } + writer.close(); + + File sketchOutputFile = new File(testDir, "testFile.sketch"); + TsFileSketchTool sketchTool = + new TsFileSketchTool(testFile.getPath(), sketchOutputFile.getPath()); + sketchTool.run(); + } + + @Test + public void testHybridWrite() throws Exception { + final File testFile = new File(testDir, "testFile"); + TsFileWriter writer = new TsFileWriter(testFile); + writer.setGenerateTableSchema(true); + // table-view registration + writer.registerTableSchema(testTableSchema); + // tree-view registration + final IDeviceID deviceID = Factory.DEFAULT_FACTORY.create("root.a.b.c.d1"); + List treeSchemas = new ArrayList<>(); + for (int i = 0; i < measurementSchemaNum; i++) { + final MeasurementSchema measurementSchema = + new MeasurementSchema( + "s" + i, TSDataType.INT64, TSEncoding.PLAIN, CompressionType.UNCOMPRESSED); + treeSchemas.add(measurementSchema); + writer.registerTimeseries(deviceID, measurementSchema); + } + + // table-view write + final Tablet tablet = genTablet(testTableSchema, 0, 100); + writer.writeTable(tablet); + // tree-view write + for (int i = 0; i < 50; i++) { + final TSRecord tsRecord = new TSRecord(i, deviceID); + for (int j = 0; j < measurementSchemaNum; j++) { + tsRecord.addTuple(new LongDataPoint("s" + j, i)); + } + writer.write(tsRecord); + } + writer.close(); + + // table-view read table-view + int cnt; + try (TsFileSequenceReader sequenceReader = + new TsFileSequenceReader(testFile.getAbsolutePath())) { + TableQueryExecutor tableQueryExecutor = + new TableQueryExecutor( + new MetadataQuerierByFileImpl(sequenceReader), + new CachedChunkLoaderImpl(sequenceReader), + TableQueryOrdering.DEVICE); + + List columns = + testTableSchema.getColumnSchemas().stream() + .map(IMeasurementSchema::getMeasurementId) + .collect(Collectors.toList()); + TsBlockReader reader = + tableQueryExecutor.query(testTableSchema.getTableName(), columns, null, null, null); + assertTrue(reader.hasNext()); + cnt = 0; + while (reader.hasNext()) { + final TsBlock result = reader.next(); + cnt += result.getPositionCount(); + } + assertEquals(100, cnt); + } + + // tree-view read tree-view + try (TsFileSequenceReader sequenceReader = + new TsFileSequenceReader(testFile.getAbsolutePath())) { + QueryExecutor queryExecutor = + new TsFileExecutor( + new MetadataQuerierByFileImpl(sequenceReader), + new CachedChunkLoaderImpl(sequenceReader)); + + List selectedSeries = new ArrayList<>(); + for (int i = 0; i < measurementSchemaNum; i++) { + selectedSeries.add(new Path(deviceID, "s" + i, false)); + } + final QueryExpression queryExpression = QueryExpression.create(selectedSeries, null); + final QueryDataSet queryDataSet = queryExecutor.execute(queryExpression); + cnt = 0; + while (queryDataSet.hasNext()) { + queryDataSet.next(); + cnt++; + } + assertEquals(50, cnt); + } + + // table-view read tree-view + try (TsFileSequenceReader sequenceReader = + new TsFileSequenceReader(testFile.getAbsolutePath())) { + TableQueryExecutor tableQueryExecutor = + new TableQueryExecutor( + new MetadataQuerierByFileImpl(sequenceReader), + new CachedChunkLoaderImpl(sequenceReader), + TableQueryOrdering.DEVICE); + + List columns = + treeSchemas.stream() + .map(IMeasurementSchema::getMeasurementId) + .collect(Collectors.toList()); + TsBlockReader reader = + tableQueryExecutor.query(deviceID.getTableName(), columns, null, null, null); + assertTrue(reader.hasNext()); + cnt = 0; + while (reader.hasNext()) { + final TsBlock result = reader.next(); + cnt += result.getPositionCount(); + } + assertEquals(50, cnt); + } + + // tree-view read table-view + try (TsFileSequenceReader sequenceReader = + new TsFileSequenceReader(testFile.getAbsolutePath())) { + QueryExecutor queryExecutor = + new TsFileExecutor( + new MetadataQuerierByFileImpl(sequenceReader), + new CachedChunkLoaderImpl(sequenceReader)); + + List selectedSeries = new ArrayList<>(); + for (int i = 0; i < 100; i++) { + final IDeviceID tabletDeviceID = tablet.getDeviceID(i); + for (int j = 0; j < measurementSchemaNum; j++) { + selectedSeries.add(new Path(tabletDeviceID, "s" + j, false)); + } + } + + final QueryExpression queryExpression = QueryExpression.create(selectedSeries, null); + final QueryDataSet queryDataSet = queryExecutor.execute(queryExpression); + cnt = 0; + List rowRecords = new ArrayList<>(); + while (queryDataSet.hasNext()) { + rowRecords.add(queryDataSet.next()); + cnt++; + } + assertEquals(100, cnt); + } + } + + private Tablet genTablet(TableSchema tableSchema, int offset, int num) { + Tablet tablet = + new Tablet( + tableSchema.getTableName(), + tableSchema.getColumnSchemas(), + tableSchema.getColumnTypes()); + for (int i = 0; i < num; i++) { + tablet.addTimestamp(i, offset + i); + List columnSchemas = tableSchema.getColumnSchemas(); + for (int j = 0; j < columnSchemas.size(); j++) { + IMeasurementSchema columnSchema = columnSchemas.get(j); + tablet.addValue( + columnSchema.getMeasurementId(), + i, + getValue(columnSchema.getType(), i, tableSchema.getColumnTypes().get(j))); + } + } + tablet.rowSize = num; + return tablet; + } + + public Object getValue(TSDataType dataType, int i, ColumnType columnType) { + switch (dataType) { + case INT64: + return (long) i; + case TEXT: + return new Binary(String.valueOf(i), StandardCharsets.UTF_8); + default: + return i; + } + } + + private TableSchema genTableSchema(int tableNum) { + List measurementSchemas = new ArrayList<>(); + List columnTypes = new ArrayList<>(); + + for (int i = 0; i < idSchemaNum; i++) { + measurementSchemas.add( + new MeasurementSchema( + "id" + i, TSDataType.TEXT, TSEncoding.PLAIN, CompressionType.UNCOMPRESSED)); + columnTypes.add(ColumnType.ID); + } + for (int i = 0; i < measurementSchemaNum; i++) { + measurementSchemas.add( + new MeasurementSchema( + "s" + i, TSDataType.INT64, TSEncoding.PLAIN, CompressionType.UNCOMPRESSED)); + columnTypes.add(ColumnType.MEASUREMENT); + } + return new TableSchema("testTable" + tableNum, measurementSchemas, columnTypes); + } +} diff --git a/java/tsfile/src/test/java/org/apache/tsfile/utils/FileGenerator.java b/java/tsfile/src/test/java/org/apache/tsfile/utils/FileGenerator.java index 37a9a37c2..d33866554 100755 --- a/java/tsfile/src/test/java/org/apache/tsfile/utils/FileGenerator.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/utils/FileGenerator.java @@ -22,10 +22,12 @@ import org.apache.tsfile.common.conf.TSFileDescriptor; import org.apache.tsfile.enums.TSDataType; import org.apache.tsfile.exception.write.WriteProcessException; +import org.apache.tsfile.file.metadata.IDeviceID.Factory; import org.apache.tsfile.file.metadata.enums.TSEncoding; import org.apache.tsfile.read.common.Path; import org.apache.tsfile.write.TsFileWriter; import org.apache.tsfile.write.record.TSRecord; +import org.apache.tsfile.write.schema.IMeasurementSchema; import org.apache.tsfile.write.schema.MeasurementSchema; import org.apache.tsfile.write.schema.Schema; @@ -265,7 +267,7 @@ public static void write(String filePath) throws IOException { private static void generateTestSchema() { schema = new Schema(); - List schemaList = new ArrayList<>(); + List schemaList = new ArrayList<>(); schemaList.add( new MeasurementSchema( "s1", TSDataType.INT32, TSEncoding.valueOf(config.getValueEncoder()))); @@ -302,7 +304,7 @@ private static void generateTestSchema(int deviceNum, int measurementNum) { for (int i = 0; i < deviceNum; i++) { for (int j = 0; j < measurementNum; j++) { schema.registerTimeseries( - new Path("d" + generateIndexString(i, deviceNum)), + Factory.DEFAULT_FACTORY.create("d" + generateIndexString(i, deviceNum)), new MeasurementSchema( "s" + generateIndexString(j, measurementNum), TSDataType.INT32, diff --git a/java/tsfile/src/test/java/org/apache/tsfile/utils/ReadWriteForEncodingUtilsTest.java b/java/tsfile/src/test/java/org/apache/tsfile/utils/ReadWriteForEncodingUtilsTest.java index 5dbff6556..7026a5c21 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/utils/ReadWriteForEncodingUtilsTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/utils/ReadWriteForEncodingUtilsTest.java @@ -46,10 +46,16 @@ public void readAndWriteVarIntTest() { byteBuffer.flip(); assertEquals(9, ReadWriteForEncodingUtils.readVarInt(byteBuffer)); - byteBuffer.flip(); + byteBuffer.clear(); // negative num assertEquals(1, ReadWriteForEncodingUtils.writeVarInt(-1, byteBuffer)); byteBuffer.flip(); assertEquals(-1, ReadWriteForEncodingUtils.readVarInt(byteBuffer)); + + byteBuffer.clear(); + // negative num + assertEquals(5, ReadWriteForEncodingUtils.writeVarInt(0xbfffffff, byteBuffer)); + byteBuffer.flip(); + assertEquals(0xbfffffff, ReadWriteForEncodingUtils.readVarInt(byteBuffer)); } } diff --git a/java/tsfile/src/test/java/org/apache/tsfile/utils/RecordUtils.java b/java/tsfile/src/test/java/org/apache/tsfile/utils/RecordUtils.java index 41e72110d..887b13791 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/utils/RecordUtils.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/utils/RecordUtils.java @@ -20,7 +20,7 @@ import org.apache.tsfile.common.constant.JsonFormatConstant; import org.apache.tsfile.enums.TSDataType; -import org.apache.tsfile.read.common.Path; +import org.apache.tsfile.file.metadata.IDeviceID.Factory; import org.apache.tsfile.write.record.TSRecord; import org.apache.tsfile.write.record.datapoint.BooleanDataPoint; import org.apache.tsfile.write.record.datapoint.DoubleDataPoint; @@ -68,7 +68,8 @@ public static TSRecord parseSimpleTupleRecord(String str, Schema schema) { for (int i = 2; i < items.length - 1; i += 2) { // get measurementId and value measurementId = items[i].trim(); - MeasurementGroup measurementGroup = schema.getSeriesSchema(new Path(deviceId)); + MeasurementGroup measurementGroup = + schema.getSeriesSchema(Factory.DEFAULT_FACTORY.create(deviceId)); IMeasurementSchema measurementSchema = measurementGroup == null ? null diff --git a/java/tsfile/src/test/java/org/apache/tsfile/utils/RecordUtilsTest.java b/java/tsfile/src/test/java/org/apache/tsfile/utils/RecordUtilsTest.java index 1b8ed2d1c..9240cabbc 100755 --- a/java/tsfile/src/test/java/org/apache/tsfile/utils/RecordUtilsTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/utils/RecordUtilsTest.java @@ -21,6 +21,7 @@ import org.apache.tsfile.common.conf.TSFileConfig; import org.apache.tsfile.common.conf.TSFileDescriptor; import org.apache.tsfile.enums.TSDataType; +import org.apache.tsfile.file.metadata.IDeviceID.Factory; import org.apache.tsfile.file.metadata.enums.TSEncoding; import org.apache.tsfile.read.common.Path; import org.apache.tsfile.write.record.TSRecord; @@ -72,7 +73,7 @@ public void testParseSimpleTupleRecordInt() { String testString = "d1,1471522347000,s1,1"; TSRecord record = RecordUtils.parseSimpleTupleRecord(testString, schema); assertEquals(1471522347000l, record.time); - assertEquals("d1", record.deviceId); + assertEquals(Factory.DEFAULT_FACTORY.create("d1"), record.deviceId); List tuples = record.dataPointList; assertEquals(1, tuples.size()); DataPoint tuple = tuples.get(0); @@ -83,7 +84,7 @@ public void testParseSimpleTupleRecordInt() { testString = "d1,1471522347000,s1,1,"; record = RecordUtils.parseSimpleTupleRecord(testString, schema); assertEquals(1471522347000l, record.time); - assertEquals("d1", record.deviceId); + assertEquals(Factory.DEFAULT_FACTORY.create("d1"), record.deviceId); tuples = record.dataPointList; assertEquals(1, tuples.size()); tuple = tuples.get(0); @@ -94,7 +95,7 @@ record = RecordUtils.parseSimpleTupleRecord(testString, schema); testString = "d1,1471522347000,s1,1,s2"; record = RecordUtils.parseSimpleTupleRecord(testString, schema); assertEquals(1471522347000l, record.time); - assertEquals("d1", record.deviceId); + assertEquals(Factory.DEFAULT_FACTORY.create("d1"), record.deviceId); tuples = record.dataPointList; assertEquals(1, tuples.size()); tuple = tuples.get(0); @@ -121,7 +122,7 @@ public void testParseSimpleTupleRecordAll() { String testString = "d1,1471522347000,s1,1,s2,134134287192587,s3,1.4,s4,1.128794817,s5,true"; TSRecord record = RecordUtils.parseSimpleTupleRecord(testString, schema); assertEquals(1471522347000l, record.time); - assertEquals("d1", record.deviceId); + assertEquals(Factory.DEFAULT_FACTORY.create("d1"), record.deviceId); List tuples = record.dataPointList; assertEquals(5, tuples.size()); // enum type is omitted. DataPoint tuple = tuples.get(0); diff --git a/java/tsfile/src/test/java/org/apache/tsfile/utils/TsFileGeneratorForTest.java b/java/tsfile/src/test/java/org/apache/tsfile/utils/TsFileGeneratorForTest.java index cba459909..52f2951ed 100755 --- a/java/tsfile/src/test/java/org/apache/tsfile/utils/TsFileGeneratorForTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/utils/TsFileGeneratorForTest.java @@ -31,6 +31,7 @@ import org.apache.tsfile.read.common.Path; import org.apache.tsfile.write.TsFileWriter; import org.apache.tsfile.write.record.TSRecord; +import org.apache.tsfile.write.schema.IMeasurementSchema; import org.apache.tsfile.write.schema.MeasurementSchema; import org.apache.tsfile.write.schema.Schema; @@ -296,7 +297,7 @@ public static void generateAlignedTsFile(int rowCount, int chunkGroupSize, int p TSFileDescriptor.getInstance().getConfig().setMaxNumberOfPointsInPage(pageSize); try (TsFileWriter tsFileWriter = new TsFileWriter(file)) { // register align timeseries - List alignedMeasurementSchemas = new ArrayList<>(); + List alignedMeasurementSchemas = new ArrayList<>(); alignedMeasurementSchemas.add( new MeasurementSchema("s1", TSDataType.INT64, TSEncoding.PLAIN, CompressionType.LZ4)); alignedMeasurementSchemas.add( @@ -307,7 +308,7 @@ public static void generateAlignedTsFile(int rowCount, int chunkGroupSize, int p tsFileWriter.registerAlignedTimeseries(new Path("d1"), alignedMeasurementSchemas); // register nonAlign timeseries - List measurementSchemas = new ArrayList<>(); + List measurementSchemas = new ArrayList<>(); measurementSchemas.add( new MeasurementSchema("s1", TSDataType.INT64, TSEncoding.PLAIN, CompressionType.LZ4)); measurementSchemas.add( diff --git a/java/tsfile/src/test/java/org/apache/tsfile/write/DefaultSchemaTemplateTest.java b/java/tsfile/src/test/java/org/apache/tsfile/write/DefaultSchemaTemplateTest.java index 6fa667cc3..d57cf30a9 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/write/DefaultSchemaTemplateTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/write/DefaultSchemaTemplateTest.java @@ -28,6 +28,7 @@ import org.apache.tsfile.read.query.dataset.QueryDataSet; import org.apache.tsfile.utils.TsFileGeneratorForTest; import org.apache.tsfile.write.record.Tablet; +import org.apache.tsfile.write.schema.IMeasurementSchema; import org.apache.tsfile.write.schema.MeasurementSchema; import org.junit.Assert; @@ -53,11 +54,11 @@ public void testUsingDefaultSchemaTemplate() throws IOException, WriteProcessExc MeasurementSchema s1 = new MeasurementSchema("s1", TSDataType.INT64, TSEncoding.PLAIN); MeasurementSchema s2 = new MeasurementSchema("s2", TSDataType.INT64, TSEncoding.PLAIN); - List schemaList = new ArrayList<>(); + List schemaList = new ArrayList<>(); schemaList.add(s1); schemaList.add(s2); - Map schema = new HashMap<>(); + Map schema = new HashMap<>(); schema.put("s1", s1); schema.put("s2", s2); diff --git a/java/tsfile/src/test/java/org/apache/tsfile/write/MetadataIndexConstructorTest.java b/java/tsfile/src/test/java/org/apache/tsfile/write/MetadataIndexConstructorTest.java index c2f748246..f898996f5 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/write/MetadataIndexConstructorTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/write/MetadataIndexConstructorTest.java @@ -28,7 +28,6 @@ import org.apache.tsfile.file.metadata.IDeviceID; import org.apache.tsfile.file.metadata.MeasurementMetadataIndexEntry; import org.apache.tsfile.file.metadata.MetadataIndexNode; -import org.apache.tsfile.file.metadata.PlainDeviceID; import org.apache.tsfile.file.metadata.TimeseriesMetadata; import org.apache.tsfile.file.metadata.TsFileMetadata; import org.apache.tsfile.file.metadata.enums.MetadataIndexNodeType; @@ -42,6 +41,7 @@ import org.apache.tsfile.write.record.Tablet; import org.apache.tsfile.write.record.datapoint.DataPoint; import org.apache.tsfile.write.record.datapoint.LongDataPoint; +import org.apache.tsfile.write.schema.IMeasurementSchema; import org.apache.tsfile.write.schema.MeasurementSchema; import org.apache.tsfile.write.schema.Schema; @@ -101,7 +101,7 @@ public void singleIndexTest1() { int[][] vectorMeasurement = new int[deviceNum][]; String[][] singleMeasurement = new String[deviceNum][]; for (int i = 0; i < deviceNum; i++) { - devices[i] = new PlainDeviceID("d" + i); + devices[i] = IDeviceID.Factory.DEFAULT_FACTORY.create("d" + i); vectorMeasurement[i] = new int[0]; singleMeasurement[i] = new String[measurementNum]; for (int j = 0; j < measurementNum; j++) { @@ -120,7 +120,7 @@ public void singleIndexTest2() { int[][] vectorMeasurement = new int[deviceNum][]; String[][] singleMeasurement = new String[deviceNum][]; for (int i = 0; i < deviceNum; i++) { - devices[i] = new PlainDeviceID("d" + i); + devices[i] = IDeviceID.Factory.DEFAULT_FACTORY.create("d" + i); vectorMeasurement[i] = new int[0]; singleMeasurement[i] = new String[measurementNum]; for (int j = 0; j < measurementNum; j++) { @@ -139,7 +139,8 @@ public void singleIndexTest3() { int[][] vectorMeasurement = new int[deviceNum][]; String[][] singleMeasurement = new String[deviceNum][]; for (int i = 0; i < deviceNum; i++) { - devices[i] = new PlainDeviceID("d" + generateIndexString(i, deviceNum)); + devices[i] = + IDeviceID.Factory.DEFAULT_FACTORY.create("d" + generateIndexString(i, deviceNum)); vectorMeasurement[i] = new int[0]; singleMeasurement[i] = new String[measurementNum]; for (int j = 0; j < measurementNum; j++) { @@ -158,7 +159,8 @@ public void singleIndexTest4() { int[][] vectorMeasurement = new int[deviceNum][]; String[][] singleMeasurement = new String[deviceNum][]; for (int i = 0; i < deviceNum; i++) { - devices[i] = new PlainDeviceID("d" + generateIndexString(i, deviceNum)); + devices[i] = + IDeviceID.Factory.DEFAULT_FACTORY.create("d" + generateIndexString(i, deviceNum)); vectorMeasurement[i] = new int[0]; singleMeasurement[i] = new String[measurementNum]; for (int j = 0; j < measurementNum; j++) { @@ -171,7 +173,7 @@ public void singleIndexTest4() { /** Example 5: 1 entities with 1 vector containing 9 measurements */ @Test public void vectorIndexTest() { - IDeviceID[] devices = {new PlainDeviceID("d0")}; + IDeviceID[] devices = {IDeviceID.Factory.DEFAULT_FACTORY.create("d0")}; int[][] vectorMeasurement = {{9}}; test(devices, vectorMeasurement, null); } @@ -183,7 +185,9 @@ public void vectorIndexTest() { */ @Test public void compositeIndexTest() { - IDeviceID[] devices = {new PlainDeviceID("d0"), new PlainDeviceID("d1")}; + IDeviceID[] devices = { + IDeviceID.Factory.DEFAULT_FACTORY.create("d0"), IDeviceID.Factory.DEFAULT_FACTORY.create("d1") + }; int[][] vectorMeasurement = {{}, {4}}; String[][] singleMeasurement = { {"s0", "s1", "s2", "s3", "s4", "z0", "z1", "z2", "z3"}, @@ -284,8 +288,9 @@ private void test(IDeviceID[] devices, int[][] vectorMeasurement, String[][] sin private void readMetaDataDFS(List devices, List> measurements) { try (TsFileSequenceReader reader = new TsFileSequenceReader(FILE_PATH)) { TsFileMetadata tsFileMetaData = reader.readFileMetadata(); - MetadataIndexNode metadataIndexNode = tsFileMetaData.getMetadataIndex(); - deviceDFS(devices, measurements, reader, metadataIndexNode); + for (MetadataIndexNode node : tsFileMetaData.getTableMetadataIndexNodeMap().values()) { + deviceDFS(devices, measurements, reader, node); + } } catch (IOException e) { e.printStackTrace(); fail(e.getMessage()); @@ -432,7 +437,7 @@ private void generateFile( // the number of record rows int rowNum = 10; for (int row = 0; row < rowNum; row++) { - TSRecord tsRecord = new TSRecord(row, ((PlainDeviceID) device).toStringID()); + TSRecord tsRecord = new TSRecord(row, device); for (String measurement : singleMeasurement[i]) { DataPoint dPoint = new LongDataPoint(measurement, row); tsRecord.addTuple(dPoint); @@ -455,8 +460,8 @@ private void generateFile( vectorPrefix + generateIndexString(vectorIndex, vectorMeasurement.length); logger.info("generating vector {}...", vectorName); int measurementNum = vectorMeasurement[i][vectorIndex]; - List schemas = new ArrayList<>(); - List tabletSchema = new ArrayList<>(); + List schemas = new ArrayList<>(); + List tabletSchema = new ArrayList<>(); for (int measurementIndex = 0; measurementIndex < measurementNum; measurementIndex++) { String measurementName = measurementPrefix + generateIndexString(measurementIndex, measurementNum); @@ -471,7 +476,7 @@ private void generateFile( schema.registerMeasurementGroup(new Path(device), group); // add measurements into TSFileWriter // construct the tablet - Tablet tablet = new Tablet(((PlainDeviceID) device).toStringID(), tabletSchema); + Tablet tablet = new Tablet(device.toString(), tabletSchema); long[] timestamps = tablet.timestamps; Object[] values = tablet.values; long timestamp = 1; diff --git a/java/tsfile/src/test/java/org/apache/tsfile/write/TsFileIOWriterTest.java b/java/tsfile/src/test/java/org/apache/tsfile/write/TsFileIOWriterTest.java index dbda5319a..331d7aab5 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/write/TsFileIOWriterTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/write/TsFileIOWriterTest.java @@ -26,7 +26,7 @@ import org.apache.tsfile.file.header.ChunkGroupHeader; import org.apache.tsfile.file.header.ChunkHeader; import org.apache.tsfile.file.metadata.IDeviceID; -import org.apache.tsfile.file.metadata.PlainDeviceID; +import org.apache.tsfile.file.metadata.MetadataIndexNode; import org.apache.tsfile.file.metadata.TimeseriesMetadata; import org.apache.tsfile.file.metadata.TsFileMetadata; import org.apache.tsfile.file.metadata.enums.TSEncoding; @@ -35,6 +35,7 @@ import org.apache.tsfile.read.TsFileSequenceReader; import org.apache.tsfile.read.common.Path; import org.apache.tsfile.utils.MeasurementGroup; +import org.apache.tsfile.write.schema.IMeasurementSchema; import org.apache.tsfile.write.schema.MeasurementSchema; import org.apache.tsfile.write.schema.Schema; import org.apache.tsfile.write.schema.VectorMeasurementSchema; @@ -57,8 +58,8 @@ public class TsFileIOWriterTest { private static final String FILE_PATH = TestConstant.BASE_OUTPUT_PATH.concat("TsFileIOWriterTest.tsfile"); - private static final IDeviceID DEVICE_1 = new PlainDeviceID("device1"); - private static final IDeviceID DEVICE_2 = new PlainDeviceID("device2"); + private static final IDeviceID DEVICE_1 = IDeviceID.Factory.DEFAULT_FACTORY.create("device1"); + private static final IDeviceID DEVICE_2 = IDeviceID.Factory.DEFAULT_FACTORY.create("device2"); private static final String SENSOR_1 = "sensor1"; private static final int CHUNK_GROUP_NUM = 2; @@ -68,11 +69,11 @@ public void before() throws IOException { TsFileIOWriter writer = new TsFileIOWriter(new File(FILE_PATH)); // file schema - MeasurementSchema measurementSchema = TestHelper.createSimpleMeasurementSchema(SENSOR_1); + IMeasurementSchema measurementSchema = TestHelper.createSimpleMeasurementSchema(SENSOR_1); VectorMeasurementSchema vectorMeasurementSchema = new VectorMeasurementSchema( "", new String[] {"s1", "s2"}, new TSDataType[] {TSDataType.INT64, TSDataType.INT64}); - List schemas = new ArrayList<>(); + List schemas = new ArrayList<>(); schemas.add(new MeasurementSchema("s1", TSDataType.INT64, TSEncoding.RLE)); schemas.add(new MeasurementSchema("s2", TSDataType.INT64, TSEncoding.RLE)); MeasurementGroup group = new MeasurementGroup(true, schemas); @@ -154,10 +155,7 @@ public void endFileTest() throws IOException { for (Map.Entry> entry : deviceTimeseriesMetadataMap.entrySet()) { for (TimeseriesMetadata timeseriesMetadata : entry.getValue()) { - String seriesPath = - ((PlainDeviceID) entry.getKey()).toStringID() - + "." - + timeseriesMetadata.getMeasurementId(); + String seriesPath = entry.getKey() + "." + timeseriesMetadata.getMeasurementId(); Assert.assertFalse(pathSet.contains(seriesPath)); pathSet.add(seriesPath); } @@ -165,10 +163,14 @@ public void endFileTest() throws IOException { // FileMetaData TsFileMetadata metaData = reader.readFileMetadata(); - Assert.assertEquals(2, metaData.getMetadataIndex().getChildren().size()); + int cnt = 0; + for (MetadataIndexNode node : metaData.getTableMetadataIndexNodeMap().values()) { + cnt += node.getChildren().size(); + } + Assert.assertEquals(2, cnt); } - private void writeChunkGroup(TsFileIOWriter writer, MeasurementSchema measurementSchema) + private void writeChunkGroup(TsFileIOWriter writer, IMeasurementSchema measurementSchema) throws IOException { for (int i = 0; i < CHUNK_GROUP_NUM; i++) { // chunk group diff --git a/java/tsfile/src/test/java/org/apache/tsfile/write/TsFileReadWriteTest.java b/java/tsfile/src/test/java/org/apache/tsfile/write/TsFileReadWriteTest.java index 9ddb5d2f9..8e75ac424 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/write/TsFileReadWriteTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/write/TsFileReadWriteTest.java @@ -21,6 +21,8 @@ import org.apache.tsfile.common.conf.TSFileDescriptor; import org.apache.tsfile.enums.TSDataType; import org.apache.tsfile.exception.write.WriteProcessException; +import org.apache.tsfile.file.metadata.IDeviceID; +import org.apache.tsfile.file.metadata.IDeviceID.Factory; import org.apache.tsfile.file.metadata.enums.TSEncoding; import org.apache.tsfile.read.TsFileReader; import org.apache.tsfile.read.TsFileSequenceReader; @@ -56,6 +58,7 @@ public class TsFileReadWriteTest { private final double delta = 0.0000001; private final String path = TsFileGeneratorForTest.getTestTsFilePath("root.sg1", 0, 0, 1); private File f; + private final IDeviceID deviceID = Factory.DEFAULT_FACTORY.create("device_1"); @Before public void setUp() { @@ -163,13 +166,12 @@ public void readEmptyMeasurementTest() throws IOException, WriteProcessException try (TsFileWriter tsFileWriter = new TsFileWriter(f)) { // add measurements into file schema tsFileWriter.registerTimeseries( - new Path("device_1"), - new MeasurementSchema("sensor_1", TSDataType.FLOAT, TSEncoding.RLE)); + new Path(deviceID), new MeasurementSchema("sensor_1", TSDataType.FLOAT, TSEncoding.RLE)); tsFileWriter.registerTimeseries( - new Path("device_1"), + new Path(deviceID), new MeasurementSchema("sensor_2", TSDataType.INT32, TSEncoding.TS_2DIFF)); // construct TSRecord - TSRecord tsRecord = new TSRecord(1, "device_1"); + TSRecord tsRecord = new TSRecord(1, deviceID); DataPoint dPoint1 = new FloatDataPoint("sensor_1", 1.2f); tsRecord.addTuple(dPoint1); // write a TSRecord to TsFile @@ -180,7 +182,7 @@ public void readEmptyMeasurementTest() throws IOException, WriteProcessException TsFileSequenceReader reader = new TsFileSequenceReader(path); TsFileReader readTsFile = new TsFileReader(reader); ArrayList paths = new ArrayList<>(); - paths.add(new Path("device_1", "sensor_2", true)); + paths.add(new Path(deviceID, "sensor_2", true)); QueryExpression queryExpression = QueryExpression.create(paths, null); try { QueryDataSet queryDataSet = readTsFile.query(queryExpression); @@ -209,10 +211,10 @@ private void writeDataByTSRecord( // add measurements into file schema try (TsFileWriter tsFileWriter = new TsFileWriter(f)) { tsFileWriter.registerTimeseries( - new Path("device_1"), new MeasurementSchema("sensor_1", dataType, encodingType)); + new Path(deviceID), new MeasurementSchema("sensor_1", dataType, encodingType)); for (long i = 1; i < floatCount; i++) { // construct TSRecord - TSRecord tsRecord = new TSRecord(i, "device_1"); + TSRecord tsRecord = new TSRecord(i, deviceID); DataPoint dPoint1 = proxy.generateOne(i); tsRecord.addTuple(dPoint1); // write a TSRecord to TsFile @@ -225,7 +227,7 @@ private void readData(ReadDataPointProxy proxy) throws IOException { TsFileSequenceReader reader = new TsFileSequenceReader(path); TsFileReader readTsFile = new TsFileReader(reader); ArrayList paths = new ArrayList<>(); - paths.add(new Path("device_1", "sensor_1", true)); + paths.add(new Path(deviceID, "sensor_1", true)); QueryExpression queryExpression = QueryExpression.create(paths, null); QueryDataSet queryDataSet = readTsFile.query(queryExpression); diff --git a/java/tsfile/src/test/java/org/apache/tsfile/write/TsFileWriteApiTest.java b/java/tsfile/src/test/java/org/apache/tsfile/write/TsFileWriteApiTest.java index 4e5607a62..a1ec2aa1a 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/write/TsFileWriteApiTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/write/TsFileWriteApiTest.java @@ -26,7 +26,7 @@ import org.apache.tsfile.file.header.ChunkHeader; import org.apache.tsfile.file.header.PageHeader; import org.apache.tsfile.file.metadata.ChunkMetadata; -import org.apache.tsfile.file.metadata.PlainDeviceID; +import org.apache.tsfile.file.metadata.IDeviceID; import org.apache.tsfile.file.metadata.enums.TSEncoding; import org.apache.tsfile.fileSystem.FSFactoryProducer; import org.apache.tsfile.read.TsFileReader; @@ -59,8 +59,8 @@ public class TsFileWriteApiTest { private final File f = FSFactoryProducer.getFSFactory().getFile("TsFileWriteTest.tsfile"); private final String deviceId = "root.sg.d1"; - private final List alignedMeasurementSchemas = new ArrayList<>(); - private final List measurementSchemas = new ArrayList<>(); + private final List alignedMeasurementSchemas = new ArrayList<>(); + private final List measurementSchemas = new ArrayList<>(); private int oldChunkGroupSize = TSFileDescriptor.getInstance().getConfig().getGroupSizeInByte(); private int oldMaxNumOfPointsInPage = TSFileDescriptor.getInstance().getConfig().getMaxNumberOfPointsInPage(); @@ -109,7 +109,7 @@ public void writeWithTsRecord() throws IOException, WriteProcessException { try (TsFileWriter tsFileWriter = new TsFileWriter(f)) { registerTimeseries(tsFileWriter); - List writeMeasurementScheams = new ArrayList<>(); + List writeMeasurementScheams = new ArrayList<>(); // example 1 writeMeasurementScheams.add(measurementSchemas.get(0)); writeMeasurementScheams.add(measurementSchemas.get(1)); @@ -137,7 +137,7 @@ public void writeAlignedWithTsRecord() throws IOException, WriteProcessException try (TsFileWriter tsFileWriter = new TsFileWriter(f)) { registerAlignedTimeseries(tsFileWriter); - List writeMeasurementScheams = new ArrayList<>(); + List writeMeasurementScheams = new ArrayList<>(); // example1 writeMeasurementScheams.add(alignedMeasurementSchemas.get(0)); writeMeasurementScheams.add(alignedMeasurementSchemas.get(1)); @@ -165,7 +165,7 @@ public void writeWithTablet() throws IOException, WriteProcessException { try (TsFileWriter tsFileWriter = new TsFileWriter(f)) { registerTimeseries(tsFileWriter); - List writeMeasurementScheams = new ArrayList<>(); + List writeMeasurementScheams = new ArrayList<>(); // example 1 writeMeasurementScheams.add(measurementSchemas.get(0)); writeMeasurementScheams.add(measurementSchemas.get(1)); @@ -193,7 +193,7 @@ public void writeAlignedWithTablet() throws IOException, WriteProcessException { try (TsFileWriter tsFileWriter = new TsFileWriter(f)) { registerAlignedTimeseries(tsFileWriter); - List writeMeasurementScheams = new ArrayList<>(); + List writeMeasurementScheams = new ArrayList<>(); // example 1 writeMeasurementScheams.add(alignedMeasurementSchemas.get(0)); TsFileGeneratorUtils.writeWithTablet( @@ -220,7 +220,7 @@ public void writeNewAlignedMeasurementAfterFlushChunkGroup1() { try (TsFileWriter tsFileWriter = new TsFileWriter(f)) { registerAlignedTimeseries(tsFileWriter); - List writeMeasurementScheams = new ArrayList<>(); + List writeMeasurementScheams = new ArrayList<>(); // example 1 writeMeasurementScheams.add(alignedMeasurementSchemas.get(0)); writeMeasurementScheams.add(alignedMeasurementSchemas.get(1)); @@ -247,7 +247,7 @@ public void writeNewAlignedMeasurementAfterFlushChunkGroup2() { try (TsFileWriter tsFileWriter = new TsFileWriter(f)) { registerAlignedTimeseries(tsFileWriter); - List writeMeasurementScheams = new ArrayList<>(); + List writeMeasurementScheams = new ArrayList<>(); // example 1 writeMeasurementScheams.add(alignedMeasurementSchemas.get(0)); writeMeasurementScheams.add(alignedMeasurementSchemas.get(1)); @@ -273,7 +273,7 @@ public void writeOutOfOrderAlignedData() throws IOException, WriteProcessExcepti try (TsFileWriter tsFileWriter = new TsFileWriter(f)) { registerAlignedTimeseries(tsFileWriter); - List writeMeasurementScheams = new ArrayList<>(); + List writeMeasurementScheams = new ArrayList<>(); // example 1 writeMeasurementScheams.add(alignedMeasurementSchemas.get(0)); writeMeasurementScheams.add(alignedMeasurementSchemas.get(1)); @@ -314,7 +314,7 @@ public void writeOutOfOrderData() throws IOException, WriteProcessException { try (TsFileWriter tsFileWriter = new TsFileWriter(f)) { registerTimeseries(tsFileWriter); - List writeMeasurementScheams = new ArrayList<>(); + List writeMeasurementScheams = new ArrayList<>(); // example 1 writeMeasurementScheams.add(measurementSchemas.get(0)); writeMeasurementScheams.add(measurementSchemas.get(1)); @@ -448,7 +448,7 @@ public void writeAlignedTimeseriesWithEmptyPage() throws IOException, WriteProce try (TsFileWriter tsFileWriter = new TsFileWriter(f)) { registerAlignedTimeseries(tsFileWriter); - List writeMeasurementScheams = new ArrayList<>(); + List writeMeasurementScheams = new ArrayList<>(); // example1 writeMeasurementScheams.add(alignedMeasurementSchemas.get(0)); writeMeasurementScheams.add(alignedMeasurementSchemas.get(1)); @@ -499,7 +499,7 @@ public void writeAlignedTimeseriesWithEmptyPage2() throws IOException, WriteProc try (TsFileWriter tsFileWriter = new TsFileWriter(f)) { registerAlignedTimeseries(tsFileWriter); - List writeMeasurementScheams = new ArrayList<>(); + List writeMeasurementScheams = new ArrayList<>(); // example1 writeMeasurementScheams.add(alignedMeasurementSchemas.get(3)); writeMeasurementScheams.add(alignedMeasurementSchemas.get(2)); @@ -551,7 +551,7 @@ public void writeAlignedTimeseriesWithEmptyPage3() throws IOException, WriteProc writeMeasurementScheams.add(alignedMeasurementSchemas.get(3)); TsFileIOWriter tsFileIOWriter = tsFileWriter.getIOWriter(); - tsFileIOWriter.startChunkGroup(new PlainDeviceID(deviceId)); + tsFileIOWriter.startChunkGroup(IDeviceID.Factory.DEFAULT_FACTORY.create(deviceId)); AlignedChunkWriterImpl alignedChunkWriter = new AlignedChunkWriterImpl(writeMeasurementScheams); @@ -611,7 +611,7 @@ public void writeTsFileByFlushingPageDirectly() throws IOException, WriteProcess try (TsFileWriter tsFileWriter = new TsFileWriter(f)) { registerTimeseries(tsFileWriter); - List writeMeasurementSchemas = new ArrayList<>(); + List writeMeasurementSchemas = new ArrayList<>(); writeMeasurementSchemas.add(measurementSchemas.get(0)); TsFileGeneratorUtils.writeWithTsRecord( @@ -630,9 +630,11 @@ public void writeTsFileByFlushingPageDirectly() throws IOException, WriteProcess File file = FSFactoryProducer.getFSFactory().getFile("test.tsfile"); try (TsFileSequenceReader reader = new TsFileSequenceReader(f.getAbsolutePath()); TsFileIOWriter tsFileIOWriter = new TsFileIOWriter(file)) { - tsFileIOWriter.startChunkGroup(new PlainDeviceID(deviceId)); + tsFileIOWriter.startChunkGroup(IDeviceID.Factory.DEFAULT_FACTORY.create(deviceId)); for (List chunkMetadatas : - reader.readChunkMetadataInDevice(new PlainDeviceID(deviceId)).values()) { + reader + .readChunkMetadataInDevice(IDeviceID.Factory.DEFAULT_FACTORY.create(deviceId)) + .values()) { for (ChunkMetadata chunkMetadata : chunkMetadatas) { Chunk chunk = reader.readMemChunk(chunkMetadata); ByteBuffer chunkDataBuffer = chunk.getData(); diff --git a/java/tsfile/src/test/java/org/apache/tsfile/write/TsFileWriterTest.java b/java/tsfile/src/test/java/org/apache/tsfile/write/TsFileWriterTest.java index 49bba8d77..55e0cec0a 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/write/TsFileWriterTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/write/TsFileWriterTest.java @@ -36,6 +36,7 @@ import org.apache.tsfile.write.record.Tablet; import org.apache.tsfile.write.record.datapoint.FloatDataPoint; import org.apache.tsfile.write.record.datapoint.IntDataPoint; +import org.apache.tsfile.write.schema.IMeasurementSchema; import org.apache.tsfile.write.schema.MeasurementSchema; import org.junit.After; @@ -106,7 +107,7 @@ private void registerTimeseries() { Assert.assertEquals("given nonAligned timeseries d1.s1 has been registered.", e.getMessage()); } try { - List schemas = new ArrayList<>(); + List schemas = new ArrayList<>(); schemas.add( new MeasurementSchema("s1", TSDataType.FLOAT, TSEncoding.RLE, CompressionType.SNAPPY)); writer.registerAlignedTimeseries(new Path("d1"), schemas); @@ -114,7 +115,7 @@ private void registerTimeseries() { Assert.assertEquals( "given device d1 has been registered for nonAligned timeseries.", e.getMessage()); } - List schemas = new ArrayList<>(); + List schemas = new ArrayList<>(); schemas.add( new MeasurementSchema("s2", TSDataType.INT32, TSEncoding.RLE, CompressionType.SNAPPY)); schemas.add( @@ -123,7 +124,7 @@ private void registerTimeseries() { // Register aligned timeseries "d2.s1" , "d2.s2", "d2.s3" try { - List measurementSchemas = new ArrayList<>(); + List measurementSchemas = new ArrayList<>(); measurementSchemas.add(new MeasurementSchema("s1", TSDataType.TEXT, TSEncoding.PLAIN)); measurementSchemas.add(new MeasurementSchema("s2", TSDataType.TEXT, TSEncoding.PLAIN)); measurementSchemas.add(new MeasurementSchema("s3", TSDataType.TEXT, TSEncoding.PLAIN)); @@ -133,7 +134,7 @@ private void registerTimeseries() { fail(e.getMessage()); } try { - List measurementSchemas = new ArrayList<>(); + List measurementSchemas = new ArrayList<>(); measurementSchemas.add(new MeasurementSchema("s4", TSDataType.TEXT, TSEncoding.PLAIN)); writer.registerAlignedTimeseries(new Path("d2"), measurementSchemas); } catch (WriteProcessException e) { @@ -210,7 +211,7 @@ public void writeIncorrectTSRecord0() throws IOException { try { writer.write(record); } catch (WriteProcessException e) { - Assert.assertEquals("no nonAligned timeseries is registered in the group.", e.getMessage()); + Assert.assertEquals("No measurement for nonAligned", e.getMessage()); } closeFile(); readNothing(); diff --git a/java/tsfile/src/test/java/org/apache/tsfile/write/WriteTest.java b/java/tsfile/src/test/java/org/apache/tsfile/write/WriteTest.java index 38c843098..81a4fba59 100755 --- a/java/tsfile/src/test/java/org/apache/tsfile/write/WriteTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/write/WriteTest.java @@ -31,6 +31,7 @@ import org.apache.tsfile.utils.RecordUtils; import org.apache.tsfile.utils.StringContainer; import org.apache.tsfile.write.record.TSRecord; +import org.apache.tsfile.write.schema.IMeasurementSchema; import org.apache.tsfile.write.schema.MeasurementSchema; import org.apache.tsfile.write.schema.Schema; @@ -60,7 +61,7 @@ public class WriteTest { private String outputDataFile; private String errorOutputDataFile; private Random rm = new Random(); - private ArrayList measurementArray; + private ArrayList measurementArray; private ArrayList pathArray; private Schema schema; private int stageSize = 4; @@ -219,7 +220,7 @@ public void write() throws IOException, WriteProcessException { // add all measurement except the last one at before writing for (int i = 0; i < measurementArray.size() - 1; i++) { tsFileWriter.registerTimeseries( - new Path(pathArray.get(i).getDevice()), measurementArray.get(i)); + new Path(pathArray.get(i).getIDeviceID()), measurementArray.get(i)); } while (true) { if (lineCount % stageSize == 0) { @@ -235,7 +236,7 @@ public void write() throws IOException, WriteProcessException { } if (lineCount == ROW_COUNT / 2) { tsFileWriter.registerTimeseries( - new Path(pathArray.get(measurementArray.size() - 1).getDevice()), + new Path(pathArray.get(measurementArray.size() - 1).getIDeviceID()), measurementArray.get(measurementArray.size() - 1)); } strings = getNextRecord(lineCount, stageState); @@ -250,9 +251,9 @@ public void write() throws IOException, WriteProcessException { } // test duplicate measurement adding Path path = pathArray.get(measurementArray.size() - 1); - MeasurementSchema dupTimeseries = measurementArray.get(measurementArray.size() - 1); + IMeasurementSchema dupTimeseries = measurementArray.get(measurementArray.size() - 1); try { - tsFileWriter.registerTimeseries(new Path(path.getDevice()), dupTimeseries); + tsFileWriter.registerTimeseries(new Path(path.getIDeviceID()), dupTimeseries); } catch (WriteProcessException e) { assertEquals("given timeseries has exists! " + path, e.getMessage()); } diff --git a/java/tsfile/src/test/java/org/apache/tsfile/write/record/TabletTest.java b/java/tsfile/src/test/java/org/apache/tsfile/write/record/TabletTest.java index 07412e8dc..dd9f38b12 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/write/record/TabletTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/write/record/TabletTest.java @@ -22,6 +22,7 @@ import org.apache.tsfile.enums.TSDataType; import org.apache.tsfile.file.metadata.enums.TSEncoding; import org.apache.tsfile.utils.BitMap; +import org.apache.tsfile.write.schema.IMeasurementSchema; import org.apache.tsfile.write.schema.MeasurementSchema; import org.junit.Test; @@ -37,7 +38,7 @@ public class TabletTest { @Test public void testSerializationAndDeSerialization() { String deviceId = "root.sg"; - List measurementSchemas = new ArrayList<>(); + List measurementSchemas = new ArrayList<>(); measurementSchemas.add(new MeasurementSchema("s0", TSDataType.INT32, TSEncoding.PLAIN)); measurementSchemas.add(new MeasurementSchema("s1", TSDataType.INT64, TSEncoding.PLAIN)); @@ -74,7 +75,7 @@ public void testSerializationAndDeSerialization() { @Test public void testSerializationAndDeSerializationWithMoreData() { String deviceId = "root.sg"; - List measurementSchemas = new ArrayList<>(); + List measurementSchemas = new ArrayList<>(); measurementSchemas.add(new MeasurementSchema("s0", TSDataType.INT32, TSEncoding.PLAIN)); measurementSchemas.add(new MeasurementSchema("s1", TSDataType.INT64, TSEncoding.PLAIN)); measurementSchemas.add(new MeasurementSchema("s2", TSDataType.FLOAT, TSEncoding.PLAIN)); diff --git a/java/tsfile/src/test/java/org/apache/tsfile/write/schema/converter/SchemaBuilderTest.java b/java/tsfile/src/test/java/org/apache/tsfile/write/schema/converter/SchemaBuilderTest.java index cdab89fb3..70fc3fec0 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/write/schema/converter/SchemaBuilderTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/write/schema/converter/SchemaBuilderTest.java @@ -77,7 +77,7 @@ public void testJsonConverter2() { Map props = new HashMap<>(); props.put(JsonFormatConstant.MAX_POINT_NUMBER, "3"); Schema schema = new Schema(); - Map template = new HashMap<>(); + Map template = new HashMap<>(); template.put( "s4", new MeasurementSchema( @@ -113,7 +113,7 @@ public void testJsonConverter3() { Map props = new HashMap<>(); props.put(JsonFormatConstant.MAX_POINT_NUMBER, "3"); Schema schema = new Schema(); - Map template = new HashMap<>(); + Map template = new HashMap<>(); template.put( "s4", new MeasurementSchema( diff --git a/java/tsfile/src/test/java/org/apache/tsfile/write/writer/RestorableTsFileIOWriterTest.java b/java/tsfile/src/test/java/org/apache/tsfile/write/writer/RestorableTsFileIOWriterTest.java index 5752d564b..a6ae78519 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/write/writer/RestorableTsFileIOWriterTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/write/writer/RestorableTsFileIOWriterTest.java @@ -24,7 +24,7 @@ import org.apache.tsfile.exception.NotCompatibleTsFileException; import org.apache.tsfile.file.MetaMarker; import org.apache.tsfile.file.metadata.ChunkMetadata; -import org.apache.tsfile.file.metadata.PlainDeviceID; +import org.apache.tsfile.file.metadata.IDeviceID; import org.apache.tsfile.file.metadata.enums.CompressionType; import org.apache.tsfile.file.metadata.enums.TSEncoding; import org.apache.tsfile.file.metadata.statistics.FloatStatistics; @@ -132,7 +132,7 @@ public void testOnlyOneIncompleteChunkHeader() throws Exception { @Test public void testOnlyOneChunkHeader() throws Exception { TsFileWriter writer = new TsFileWriter(file); - writer.getIOWriter().startChunkGroup(new PlainDeviceID("root.sg1.d1")); + writer.getIOWriter().startChunkGroup(IDeviceID.Factory.DEFAULT_FACTORY.create("root.sg1.d1")); writer .getIOWriter() .startFlushChunk( @@ -254,7 +254,7 @@ public void testOnlyOneChunkGroupAndOneMarker() throws Exception { writer = new TsFileWriter(rWriter); writer.close(); assertNotEquals(TsFileIOWriter.MAGIC_STRING_BYTES.length, rWriter.getTruncatedSize()); - assertEquals(89, rWriter.getTruncatedSize()); + assertEquals(90, rWriter.getTruncatedSize()); rWriter.close(); TsFileSequenceReader reader = new TsFileSequenceReader(FILE_NAME); @@ -290,7 +290,7 @@ public void testAChunkGroupEndWithALotOfZeroBytes() throws Exception { writer = new TsFileWriter(rWriter); writer.close(); assertNotEquals(TsFileIOWriter.MAGIC_STRING_BYTES.length, rWriter.getTruncatedSize()); - assertEquals(89, rWriter.getTruncatedSize()); + assertEquals(98, rWriter.getTruncatedSize()); rWriter.close(); TsFileSequenceReader reader = new TsFileSequenceReader(FILE_NAME); diff --git a/java/tsfile/src/test/java/org/apache/tsfile/write/writer/TsFileIOWriterMemoryControlTest.java b/java/tsfile/src/test/java/org/apache/tsfile/write/writer/TsFileIOWriterMemoryControlTest.java index 4b161ff85..bb47294e5 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/write/writer/TsFileIOWriterMemoryControlTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/write/writer/TsFileIOWriterMemoryControlTest.java @@ -27,7 +27,6 @@ import org.apache.tsfile.file.metadata.ChunkMetadata; import org.apache.tsfile.file.metadata.IChunkMetadata; import org.apache.tsfile.file.metadata.IDeviceID; -import org.apache.tsfile.file.metadata.PlainDeviceID; import org.apache.tsfile.file.metadata.TimeseriesMetadata; import org.apache.tsfile.file.metadata.enums.CompressionType; import org.apache.tsfile.file.metadata.enums.TSEncoding; @@ -72,7 +71,7 @@ public void setUp() throws IOException { init = true; for (int i = 0; i < 2048; ++i) { sortedSeriesId.add("s" + i); - sortedDeviceId.add(new PlainDeviceID("root.sg.d" + i)); + sortedDeviceId.add(IDeviceID.Factory.DEFAULT_FACTORY.create("root.sg.d" + i)); } sortedSeriesId.sort((String::compareTo)); sortedDeviceId.sort((IDeviceID::compareTo)); @@ -164,9 +163,9 @@ public void testSerializeAndDeserializeAlignedChunkMetadata() throws IOException List measurementIds = new ArrayList<>(); for (int i = 0; i < 10; ++i) { - measurementIds.add(((PlainDeviceID) sortedDeviceId.get(i)).toStringID() + "."); + measurementIds.add(sortedDeviceId.get(i) + "."); for (int j = 1; j <= 6; ++j) { - measurementIds.add(((PlainDeviceID) sortedDeviceId.get(i)).toStringID() + ".s" + j); + measurementIds.add(sortedDeviceId.get(i) + ".s" + j); } } TSMIterator iterator = @@ -216,15 +215,15 @@ public void testSerializeAndDeserializeMixedChunkMetadata() throws IOException { break; } chunkWriter.writeToFileWriter(writer); - seriesIds.add(((PlainDeviceID) deviceId).toStringID() + "." + sortedSeriesId.get(j)); + seriesIds.add(deviceId + "." + sortedSeriesId.get(j)); } } else { // write vector AlignedChunkWriterImpl chunkWriter = generateVectorData(0L, new ArrayList<>(), 6); chunkWriter.writeToFileWriter(writer); - seriesIds.add(((PlainDeviceID) deviceId).toStringID() + "."); + seriesIds.add(deviceId + "."); for (int l = 1; l <= 6; ++l) { - seriesIds.add(((PlainDeviceID) deviceId).toStringID() + ".s" + l); + seriesIds.add(deviceId + ".s" + l); } } originChunkMetadataList.addAll(writer.chunkMetadataList); diff --git a/java/tsfile/src/test/resources/v3TsFile b/java/tsfile/src/test/resources/v3TsFile new file mode 100644 index 000000000..5502286b4 Binary files /dev/null and b/java/tsfile/src/test/resources/v3TsFile differ diff --git a/pom.xml b/pom.xml index fbec22c8f..ed75b3c88 100644 --- a/pom.xml +++ b/pom.xml @@ -28,7 +28,7 @@ org.apache.tsfile tsfile-parent - 1.0.1-SNAPSHOT + 1.2.0-SNAPSHOT pom Apache TsFile Project Parent POM diff --git a/python/pom.xml b/python/pom.xml index 0efca65c1..1a0180d8d 100644 --- a/python/pom.xml +++ b/python/pom.xml @@ -22,7 +22,7 @@ org.apache.tsfile tsfile-parent - 1.0.1-SNAPSHOT + 1.2.0-SNAPSHOT tsfile-python pom