Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DOP-14063] Drop onetl._internal module #290

Merged
merged 1 commit into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/data/core/tracked.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,5 @@ onetl/plugins/**
onetl/impl/**
onetl/hwm/**
onetl/_util/**
onetl/_internal.py
onetl/log.py
.github/workflows/data/core/**
2 changes: 1 addition & 1 deletion .github/workflows/test-core.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ jobs:
- name: Run tests
run: |
./run_tests.sh -m 'not connection'
./run_tests.sh onetl/_util onetl/_internal.py onetl/hooks onetl/file/filter onetl/file/limit onetl/hwm/store/hwm_class_registry.py
./run_tests.sh onetl/_util onetl/hooks onetl/file/filter onetl/file/limit onetl/hwm/store/hwm_class_registry.py

- name: Upload coverage results
uses: actions/upload-artifact@v4
Expand Down
172 changes: 0 additions & 172 deletions onetl/_internal.py

This file was deleted.

34 changes: 33 additions & 1 deletion onetl/_util/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,15 @@
import hashlib
import io
import os
from pathlib import Path
from datetime import datetime
from pathlib import Path, PurePath

from onetl.exception import NotAFileError
from onetl.impl import path_repr

# e.g. 20230524122150
DATETIME_FORMAT = "%Y%m%d%H%M%S"


def get_file_hash(
path: os.PathLike | str,
Expand Down Expand Up @@ -41,3 +45,31 @@ def is_file_readable(path: str | os.PathLike) -> Path:
raise OSError(f"No read access to file {path_repr(path)}")

return path


def generate_temp_path(root: PurePath) -> PurePath:
"""
Returns prefix which will be used for creating temp directory

Returns
-------
RemotePath
Temp path, containing current host name, process name and datetime

Examples
--------

>>> from etl_entities.process import Process
>>> from pathlib import Path
>>> generate_temp_path(Path("/tmp")) # doctest: +SKIP
Path("/tmp/onetl/currenthost/myprocess/20230524122150")
>>> with Process(dag="mydag", task="mytask"): # doctest: +SKIP
... generate_temp_path(Path("/abc"))
Path("/abc/onetl/currenthost/mydag.mytask.myprocess/20230524122150")
"""

from etl_entities.process import ProcessStackManager

current_process = ProcessStackManager.get_current()
current_dt = datetime.now().strftime(DATETIME_FORMAT)
return root / "onetl" / current_process.host / current_process.full_name / current_dt
63 changes: 63 additions & 0 deletions onetl/_util/spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,81 @@
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations

import os
import textwrap
from contextlib import contextmanager
from math import inf
from typing import TYPE_CHECKING, Any

from onetl._util.version import Version

try:
from pydantic.v1 import SecretStr
except (ImportError, AttributeError):
from pydantic import SecretStr # type: ignore[no-redef, assignment]

if TYPE_CHECKING:
from pyspark.sql import SparkSession
from pyspark.sql.conf import RuntimeConfig


def stringify(value: Any, quote: bool = False) -> Any: # noqa: WPS212
"""
Convert values to strings.

Values ``True``, ``False`` and ``None`` become ``"true"``, ``"false"`` and ``"null"``.

If input is dict, return dict with stringified values and keys (recursive).

If input is list, return list with stringified values (recursive).

If ``quote=True``, wrap string values with double quotes.

Examples
--------

>>> stringify(1)
'1'
>>> stringify(True)
'true'
>>> stringify(False)
'false'
>>> stringify(None)
'null'
>>> stringify("string")
'string'
>>> stringify("string", quote=True)
'"string"'
>>> stringify({"abc": 1})
{'abc': '1'}
>>> stringify([1, True, False, None, "string"])
['1', 'true', 'false', 'null', 'string']
"""

if isinstance(value, dict):
return {stringify(k): stringify(v, quote) for k, v in value.items()}

if isinstance(value, list):
return [stringify(v, quote) for v in value]

if value is None:
return "null"

if isinstance(value, bool):
return "true" if value else "false"

if isinstance(value, SecretStr):
value = value.get_secret_value()

if isinstance(value, os.PathLike):
value = os.fspath(value)

if isinstance(value, str):
return f'"{value}"' if quote else value

return str(value)


@contextmanager
def inject_spark_param(conf: RuntimeConfig, name: str, value: Any):
"""
Expand Down
25 changes: 25 additions & 0 deletions onetl/_util/sql.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
# SPDX-License-Identifier: Apache-2.0
def clear_statement(statement: str) -> str:
"""
Clear unnecessary spaces and semicolons at the statement end.

Oracle-specific: adds semicolon after END statement.

Examples
--------

>>> clear_statement("SELECT * FROM mytable")
'SELECT * FROM mytable'
>>> clear_statement("SELECT * FROM mytable ; ")
'SELECT * FROM mytable'
>>> clear_statement("CREATE TABLE mytable (id NUMBER)")
'CREATE TABLE mytable (id NUMBER)'
>>> clear_statement("BEGIN ... END")
'BEGIN ... END;'
"""

statement = statement.rstrip().lstrip("\n\r").rstrip(";").rstrip()
if statement.lower().endswith("end"):
statement += ";"
return statement
2 changes: 1 addition & 1 deletion onetl/connection/db_connection/hive/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
except (ImportError, AttributeError):
from pydantic import validator # type: ignore[no-redef, assignment]

from onetl._internal import clear_statement
from onetl._util.spark import inject_spark_param
from onetl._util.sql import clear_statement
from onetl.connection.db_connection.db_connection import DBConnection
from onetl.connection.db_connection.hive.dialect import HiveDialect
from onetl.connection.db_connection.hive.options import (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import warnings
from typing import TYPE_CHECKING, Any

from onetl._internal import clear_statement
from onetl._util.sql import clear_statement
from onetl.connection.db_connection.db_connection import DBConnection
from onetl.connection.db_connection.jdbc_connection.dialect import JDBCDialect
from onetl.connection.db_connection.jdbc_connection.options import (
Expand Down
Loading