From c09500eb96989276d27819a1ecb34490ed6c6d90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?= =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?= =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= Date: Fri, 26 Apr 2024 08:19:40 +0000 Subject: [PATCH] [DOP-13853] Update MongoDB package to 10.2.3 --- .github/workflows/data/clickhouse/matrix.yml | 6 ++--- docs/changelog/next_release/255.feature.rst | 2 +- docs/changelog/next_release/267.breaking.rst | 26 +++++++++++++++++++ .../clickhouse/prerequisites.rst | 2 +- .../db_connection/mongodb/types.rst | 4 +-- .../db_connection/clickhouse/dialect.py | 8 +++--- .../db_connection/mongodb/connection.py | 16 ++++++------ tests/fixtures/processing/clickhouse.py | 2 +- .../test_strategy_increment_clickhouse.py | 25 ++++++++++++++++++ .../test_mongodb_unit.py | 18 ++++++------- 10 files changed, 81 insertions(+), 28 deletions(-) create mode 100644 docs/changelog/next_release/267.breaking.rst diff --git a/.github/workflows/data/clickhouse/matrix.yml b/.github/workflows/data/clickhouse/matrix.yml index 9c8c558ba..15c6bffd5 100644 --- a/.github/workflows/data/clickhouse/matrix.yml +++ b/.github/workflows/data/clickhouse/matrix.yml @@ -25,16 +25,16 @@ matrix: clickhouse-version: 23.6.1-alpine <<: *max full: - # the lowest supported Clickhouse version by JDBC driver + # Clickhouse version with proper DateTime > DateTime64 comparison - clickhouse-image: yandex/clickhouse-server - clickhouse-version: '20.7' + clickhouse-version: '21.1' <<: *min - clickhouse-image: clickhouse/clickhouse-server clickhouse-version: 23.6.1-alpine <<: *max nightly: - clickhouse-image: yandex/clickhouse-server - clickhouse-version: '20.7' + clickhouse-version: '21.1' <<: *min - clickhouse-image: clickhouse/clickhouse-server clickhouse-version: latest-alpine diff --git a/docs/changelog/next_release/255.feature.rst b/docs/changelog/next_release/255.feature.rst index 55d780196..aff64c57c 100644 --- a/docs/changelog/next_release/255.feature.rst +++ b/docs/changelog/next_release/255.feature.rst @@ -1 +1 @@ -:class:`MongoDB` connection now uses MongoDB Spark connector ``10.2.2``, upgraded from ``10.1.1``, and supports passing custom versions: ``MongoDB.get_packages(scala_version=..., package_version=...)``. +:class:`MongoDB` connection now uses MongoDB Spark connector ``10.2.3``, upgraded from ``10.1.1``, and supports passing custom versions: ``MongoDB.get_packages(scala_version=..., package_version=...)``. diff --git a/docs/changelog/next_release/267.breaking.rst b/docs/changelog/next_release/267.breaking.rst new file mode 100644 index 000000000..5ce301393 --- /dev/null +++ b/docs/changelog/next_release/267.breaking.rst @@ -0,0 +1,26 @@ +Serialize DateTimeHWM to Clickhouse's ``DateTime64(6)`` (precision up to microseconds) instead of ``DateTime`` (precision up to seconds). + +For Clickhouse below 21.1 comparing column of type ``DateTime`` with a value of type ``DateTime64`` was not supported, returning an empty dataframe. +To avoid this, replace: + +.. code:: python + + DBReader( + ..., + hwm=DBReader.AutoDetectHWM( + name="my_hwm", + expression="hwm_column", # <-- + ), + ) + +with: + +.. code:: python + + DBReader( + ..., + hwm=DBReader.AutoDetectHWM( + name="my_hwm", + expression="CAST(hwm_column AS DateTime64)", # <-- + ), + ) diff --git a/docs/connection/db_connection/clickhouse/prerequisites.rst b/docs/connection/db_connection/clickhouse/prerequisites.rst index 654add047..f7ade0341 100644 --- a/docs/connection/db_connection/clickhouse/prerequisites.rst +++ b/docs/connection/db_connection/clickhouse/prerequisites.rst @@ -6,7 +6,7 @@ Prerequisites Version Compatibility --------------------- -* Clickhouse server versions: 20.7 or higher +* Clickhouse server versions: 21.1 or higher * Spark versions: 2.3.x - 3.5.x * Java versions: 8 - 20 diff --git a/docs/connection/db_connection/mongodb/types.rst b/docs/connection/db_connection/mongodb/types.rst index bd5978aa9..f9787ff2e 100644 --- a/docs/connection/db_connection/mongodb/types.rst +++ b/docs/connection/db_connection/mongodb/types.rst @@ -73,8 +73,8 @@ References Here you can find source code with type conversions: -* `MongoDB -> Spark `_ -* `Spark -> MongoDB `_ +* `MongoDB -> Spark `_ +* `Spark -> MongoDB `_ Supported types --------------- diff --git a/onetl/connection/db_connection/clickhouse/dialect.py b/onetl/connection/db_connection/clickhouse/dialect.py index 187b2e787..2c03620d3 100644 --- a/onetl/connection/db_connection/clickhouse/dialect.py +++ b/onetl/connection/db_connection/clickhouse/dialect.py @@ -26,9 +26,11 @@ def get_min_value(self, value: Any) -> str: return f"minOrNull({result})" def _serialize_datetime(self, value: datetime) -> str: - result = value.strftime("%Y-%m-%d %H:%M:%S") - return f"CAST('{result}' AS DateTime)" + # this requires at least Clickhouse 21.1, see: + # https://github.com/ClickHouse/ClickHouse/issues/16655 + result = value.strftime("%Y-%m-%d %H:%M:%S.%f") + return f"toDateTime64('{result}', 6)" def _serialize_date(self, value: date) -> str: result = value.strftime("%Y-%m-%d") - return f"CAST('{result}' AS Date)" + return f"toDate('{result}')" diff --git a/onetl/connection/db_connection/mongodb/connection.py b/onetl/connection/db_connection/mongodb/connection.py index 5d4299f39..f2c959295 100644 --- a/onetl/connection/db_connection/mongodb/connection.py +++ b/onetl/connection/db_connection/mongodb/connection.py @@ -50,8 +50,8 @@ class Config: class MongoDB(DBConnection): """MongoDB connection. |support_hooks| - Based on package ``org.mongodb.spark:mongo-spark-connector:10.1.1`` - (`MongoDB connector for Spark `_) + Based on package `org.mongodb.spark:mongo-spark-connector:10.2.3 `_ + (`MongoDB connector for Spark `_) .. warning:: @@ -149,7 +149,7 @@ def get_packages( Spark version in format ``major.minor``. Used only if ``scala_version=None``. package_version : str, optional - Specifies the version of the MongoDB Spark connector to use. Defaults to ``10.2.2``. + Specifies the version of the MongoDB Spark connector to use. Defaults to ``10.2.3``. Examples -------- @@ -160,10 +160,10 @@ def get_packages( MongoDB.get_packages(scala_version="2.12") # specify custom connector version - MongoDB.get_packages(scala_version="2.12", package_version="10.2.2") + MongoDB.get_packages(scala_version="2.12", package_version="10.2.3") """ - default_package_version = "10.2.2" + default_package_version = "10.2.3" if scala_version: scala_ver = Version(scala_version).min_digits(2) @@ -190,7 +190,7 @@ def package_spark_3_2(cls) -> str: "use `MongoDB.get_packages(spark_version='3.2')` instead" ) warnings.warn(msg, UserWarning, stacklevel=3) - return "org.mongodb.spark:mongo-spark-connector_2.12:10.2.2" + return "org.mongodb.spark:mongo-spark-connector_2.12:10.2.3" @classproperty def package_spark_3_3(cls) -> str: @@ -200,7 +200,7 @@ def package_spark_3_3(cls) -> str: "use `MongoDB.get_packages(spark_version='3.3')` instead" ) warnings.warn(msg, UserWarning, stacklevel=3) - return "org.mongodb.spark:mongo-spark-connector_2.12:10.2.2" + return "org.mongodb.spark:mongo-spark-connector_2.12:10.2.3" @classproperty def package_spark_3_4(cls) -> str: @@ -210,7 +210,7 @@ def package_spark_3_4(cls) -> str: "use `MongoDB.get_packages(spark_version='3.4')` instead" ) warnings.warn(msg, UserWarning, stacklevel=3) - return "org.mongodb.spark:mongo-spark-connector_2.12:10.2.2" + return "org.mongodb.spark:mongo-spark-connector_2.12:10.2.3" @slot def pipeline( diff --git a/tests/fixtures/processing/clickhouse.py b/tests/fixtures/processing/clickhouse.py index 2b3e4cec1..bf0b2f3e7 100644 --- a/tests/fixtures/processing/clickhouse.py +++ b/tests/fixtures/processing/clickhouse.py @@ -20,7 +20,7 @@ class ClickhouseProcessing(BaseProcessing): "text_string": "String", "hwm_int": "Int32", "hwm_date": "Date", - "hwm_datetime": "DateTime", + "hwm_datetime": "DateTime64(6)", "float_value": "Float32", } diff --git a/tests/tests_integration/tests_strategy_integration/tests_incremental_strategy_integration/test_strategy_increment_clickhouse.py b/tests/tests_integration/tests_strategy_integration/tests_incremental_strategy_integration/test_strategy_increment_clickhouse.py index 77ec071b3..67e67b065 100644 --- a/tests/tests_integration/tests_strategy_integration/tests_incremental_strategy_integration/test_strategy_increment_clickhouse.py +++ b/tests/tests_integration/tests_strategy_integration/tests_incremental_strategy_integration/test_strategy_increment_clickhouse.py @@ -302,12 +302,37 @@ def test_clickhouse_strategy_incremental_explicit_hwm_type( ColumnDateHWM, lambda x: x.isoformat(), ), + pytest.param( + "hwm_date", + "CAST(text_string AS Date32)", + ColumnDateHWM, + lambda x: x.isoformat(), + marks=pytest.mark.xfail(reason="Date32 type was added in ClickHouse 21.9"), + ), ( "hwm_datetime", "CAST(text_string AS DateTime)", ColumnDateTimeHWM, lambda x: x.isoformat(), ), + ( + "hwm_datetime", + "CAST(text_string AS DateTime64)", + ColumnDateTimeHWM, + lambda x: x.isoformat(), + ), + ( + "hwm_datetime", + "CAST(text_string AS DateTime64(3))", + ColumnDateTimeHWM, + lambda x: x.isoformat(), + ), + ( + "hwm_datetime", + "CAST(text_string AS DateTime64(6))", + ColumnDateTimeHWM, + lambda x: x.isoformat(), + ), ], ) def test_clickhouse_strategy_incremental_with_hwm_expr( diff --git a/tests/tests_unit/tests_db_connection_unit/test_mongodb_unit.py b/tests/tests_unit/tests_db_connection_unit/test_mongodb_unit.py index 5333617b1..3e5f85215 100644 --- a/tests/tests_unit/tests_db_connection_unit/test_mongodb_unit.py +++ b/tests/tests_unit/tests_db_connection_unit/test_mongodb_unit.py @@ -12,9 +12,9 @@ def test_mongodb_package(): warning_msg = re.escape("will be removed in 1.0.0, use `MongoDB.get_packages(spark_version=") with pytest.warns(UserWarning, match=warning_msg): - assert MongoDB.package_spark_3_2 == "org.mongodb.spark:mongo-spark-connector_2.12:10.2.2" - assert MongoDB.package_spark_3_3 == "org.mongodb.spark:mongo-spark-connector_2.12:10.2.2" - assert MongoDB.package_spark_3_4 == "org.mongodb.spark:mongo-spark-connector_2.12:10.2.2" + assert MongoDB.package_spark_3_2 == "org.mongodb.spark:mongo-spark-connector_2.12:10.2.3" + assert MongoDB.package_spark_3_3 == "org.mongodb.spark:mongo-spark-connector_2.12:10.2.3" + assert MongoDB.package_spark_3_4 == "org.mongodb.spark:mongo-spark-connector_2.12:10.2.3" def test_mongodb_get_packages_no_input(): @@ -50,16 +50,16 @@ def test_mongodb_get_packages_scala_version_not_supported(scala_version): @pytest.mark.parametrize( "spark_version, scala_version, package_version, package", [ - (None, "2.12", "10.2.2", "org.mongodb.spark:mongo-spark-connector_2.12:10.2.2"), - (None, "2.13", "10.2.2", "org.mongodb.spark:mongo-spark-connector_2.13:10.2.2"), - ("3.2", None, "10.2.2", "org.mongodb.spark:mongo-spark-connector_2.12:10.2.2"), - ("3.3", None, "10.2.2", "org.mongodb.spark:mongo-spark-connector_2.12:10.2.2"), - ("3.4", None, "10.2.2", "org.mongodb.spark:mongo-spark-connector_2.12:10.2.2"), + (None, "2.12", "10.2.3", "org.mongodb.spark:mongo-spark-connector_2.12:10.2.3"), + (None, "2.13", "10.2.3", "org.mongodb.spark:mongo-spark-connector_2.13:10.2.3"), + ("3.2", None, "10.2.3", "org.mongodb.spark:mongo-spark-connector_2.12:10.2.3"), + ("3.3", None, "10.2.3", "org.mongodb.spark:mongo-spark-connector_2.12:10.2.3"), + ("3.4", None, "10.2.3", "org.mongodb.spark:mongo-spark-connector_2.12:10.2.3"), ("3.2", "2.12", "10.1.1", "org.mongodb.spark:mongo-spark-connector_2.12:10.1.1"), ("3.4", "2.13", "10.1.1", "org.mongodb.spark:mongo-spark-connector_2.13:10.1.1"), ("3.2", "2.12", "10.2.1", "org.mongodb.spark:mongo-spark-connector_2.12:10.2.1"), ("3.2", "2.12", "10.2.0", "org.mongodb.spark:mongo-spark-connector_2.12:10.2.0"), - ("3.2.4", "2.12.1", "10.2.2", "org.mongodb.spark:mongo-spark-connector_2.12:10.2.2"), + ("3.2.4", "2.12.1", "10.2.3", "org.mongodb.spark:mongo-spark-connector_2.12:10.2.3"), ], ) def test_mongodb_get_packages(spark_version, scala_version, package_version, package):