Skip to content

Commit

Permalink
[DOP-14063] Drop onetl._internal module
Browse files Browse the repository at this point in the history
  • Loading branch information
dolfinus committed Jun 4, 2024
1 parent f50f008 commit 5cde60a
Show file tree
Hide file tree
Showing 23 changed files with 153 additions and 212 deletions.
1 change: 0 additions & 1 deletion .github/workflows/data/core/tracked.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,5 @@ onetl/plugins/**
onetl/impl/**
onetl/hwm/**
onetl/_util/**
onetl/_internal.py
onetl/log.py
.github/workflows/data/core/**
2 changes: 1 addition & 1 deletion .github/workflows/test-core.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ jobs:
- name: Run tests
run: |
./run_tests.sh -m 'not connection'
./run_tests.sh onetl/_util onetl/_internal.py onetl/hooks onetl/file/filter onetl/file/limit onetl/hwm/store/hwm_class_registry.py
./run_tests.sh onetl/_util onetl/hooks onetl/file/filter onetl/file/limit onetl/hwm/store/hwm_class_registry.py
- name: Upload coverage results
uses: actions/upload-artifact@v4
Expand Down
172 changes: 0 additions & 172 deletions onetl/_internal.py

This file was deleted.

34 changes: 33 additions & 1 deletion onetl/_util/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,15 @@
import hashlib
import io
import os
from pathlib import Path
from datetime import datetime
from pathlib import Path, PurePath

from onetl.exception import NotAFileError
from onetl.impl import path_repr

# e.g. 20230524122150
DATETIME_FORMAT = "%Y%m%d%H%M%S"


def get_file_hash(
path: os.PathLike | str,
Expand Down Expand Up @@ -41,3 +45,31 @@ def is_file_readable(path: str | os.PathLike) -> Path:
raise OSError(f"No read access to file {path_repr(path)}")

return path


def generate_temp_path(root: PurePath) -> PurePath:
"""
Returns prefix which will be used for creating temp directory
Returns
-------
RemotePath
Temp path, containing current host name, process name and datetime
Examples
--------
>>> from etl_entities.process import Process
>>> from pathlib import Path
>>> generate_temp_path(Path("/tmp")) # doctest: +SKIP
Path("/tmp/onetl/currenthost/myprocess/20230524122150")
>>> with Process(dag="mydag", task="mytask"): # doctest: +SKIP
... generate_temp_path(Path("/abc"))
Path("/abc/onetl/currenthost/mydag.mytask.myprocess/20230524122150")
"""

from etl_entities.process import ProcessStackManager

current_process = ProcessStackManager.get_current()
current_dt = datetime.now().strftime(DATETIME_FORMAT)
return root / "onetl" / current_process.host / current_process.full_name / current_dt
63 changes: 63 additions & 0 deletions onetl/_util/spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,19 @@
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations

import os
import textwrap
from contextlib import contextmanager
from math import inf
from typing import TYPE_CHECKING, Any

from onetl._util.version import Version

try:
from pydantic.v1 import SecretStr
except (ImportError, AttributeError):
from pydantic import SecretStr # type: ignore[no-redef, assignment]

if TYPE_CHECKING:
from pyspark.sql import SparkSession
from pyspark.sql.conf import RuntimeConfig
Expand Down Expand Up @@ -122,3 +128,60 @@ def get_executor_total_cores(spark_session: SparkSession, include_driver: bool =
expected_cores += 1

return expected_cores, config


def stringify(value: Any, quote: bool = False) -> Any: # noqa: WPS212
"""
Convert values to strings.
Values ``True``, ``False`` and ``None`` become ``"true"``, ``"false"`` and ``"null"``.
If input is dict, return dict with stringified values and keys (recursive).
If input is list, return list with stringified values (recursive).
If ``quote=True``, wrap string values with double quotes.
Examples
--------
>>> stringify(1)
'1'
>>> stringify(True)
'true'
>>> stringify(False)
'false'
>>> stringify(None)
'null'
>>> stringify("string")
'string'
>>> stringify("string", quote=True)
'"string"'
>>> stringify({"abc": 1})
{'abc': '1'}
>>> stringify([1, True, False, None, "string"])
['1', 'true', 'false', 'null', 'string']
"""

if isinstance(value, dict):
return {stringify(k): stringify(v, quote) for k, v in value.items()}

if isinstance(value, list):
return [stringify(v, quote) for v in value]

if value is None:
return "null"

if isinstance(value, bool):
return "true" if value else "false"

if isinstance(value, SecretStr):
value = value.get_secret_value()

if isinstance(value, os.PathLike):
value = os.fspath(value)

if isinstance(value, str):
return f'"{value}"' if quote else value

return str(value)
25 changes: 25 additions & 0 deletions onetl/_util/sql.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
# SPDX-License-Identifier: Apache-2.0
def clear_statement(statement: str) -> str:
"""
Clear unnecessary spaces and semicolons at the statement end.
Oracle-specific: adds semicolon after END statement.
Examples
--------
>>> clear_statement("SELECT * FROM mytable")
'SELECT * FROM mytable'
>>> clear_statement("SELECT * FROM mytable ; ")
'SELECT * FROM mytable'
>>> clear_statement("CREATE TABLE mytable (id NUMBER)")
'CREATE TABLE mytable (id NUMBER)'
>>> clear_statement("BEGIN ... END")
'BEGIN ... END;'
"""

statement = statement.rstrip().lstrip("\n\r").rstrip(";").rstrip()
if statement.lower().endswith("end"):
statement += ";"
return statement
2 changes: 1 addition & 1 deletion onetl/connection/db_connection/hive/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
except (ImportError, AttributeError):
from pydantic import validator # type: ignore[no-redef, assignment]

from onetl._internal import clear_statement
from onetl._util.spark import inject_spark_param
from onetl._util.sql import clear_statement
from onetl.connection.db_connection.db_connection import DBConnection
from onetl.connection.db_connection.hive.dialect import HiveDialect
from onetl.connection.db_connection.hive.options import (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import warnings
from typing import TYPE_CHECKING, Any

from onetl._internal import clear_statement
from onetl._util.sql import clear_statement
from onetl.connection.db_connection.db_connection import DBConnection
from onetl.connection.db_connection.jdbc_connection.dialect import JDBCDialect
from onetl.connection.db_connection.jdbc_connection.options import (
Expand Down
Loading

0 comments on commit 5cde60a

Please sign in to comment.