Skip to content

Commit

Permalink
Merge pull request #128 from zypp-io/development
Browse files Browse the repository at this point in the history
Release 0.9.1
  • Loading branch information
erfannariman authored Feb 15, 2024
2 parents 3523ee6 + ed1dd4e commit 715aac3
Show file tree
Hide file tree
Showing 9 changed files with 56 additions and 55 deletions.
12 changes: 3 additions & 9 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8, 3.9]
python-version: ['3.10']

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
Expand All @@ -28,12 +28,6 @@ jobs:
- name: Run pre-commit
run: |
pre-commit run --all-files
- name: assert equality between setup.cfg and requirements.txt
uses: actions/checkout@v2
- name: setup python
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: execute py script
run: |
python ./scripts/check_setupcfg_and_requirements_equal.py
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -144,3 +144,6 @@ settings.yml

# mac
.DS_Store

#VSCode
.vscode/
2 changes: 1 addition & 1 deletion df_to_azure/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from .export import df_to_azure

__version__ = "0.9.0"
__version__ = "0.9.1"

logging.basicConfig(
format="%(asctime)s.%(msecs)03d [%(levelname)-5s] [%(name)s] - %(message)s",
Expand Down
18 changes: 13 additions & 5 deletions df_to_azure/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@

from sqlalchemy import create_engine
from sqlalchemy.exc import ProgrammingError
from sqlalchemy.sql import text

from df_to_azure.exceptions import UpsertError
from df_to_azure.exceptions import DriverError, UpsertError


class SqlUpsert:
Expand Down Expand Up @@ -71,7 +72,15 @@ def create_stored_procedure(self):
)


def auth_azure(driver: str = "ODBC Driver 17 for SQL Server"):
def auth_azure(driver: str = None):

if driver is None:
import pyodbc

try:
driver = pyodbc.drivers()[-1]
except IndexError:
raise DriverError("ODBC driver not found")

connection_string = "mssql+pyodbc://{}:{}@{}:1433/{}?driver={}".format(
os.environ.get("SQL_USER"),
Expand All @@ -98,6 +107,5 @@ def execute_stmt(stmt: str):
"""
with auth_azure() as con:
t = con.begin()
con.execute(stmt)
t.commit()
with con.begin():
con.execute(text(stmt))
6 changes: 6 additions & 0 deletions df_to_azure/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,9 @@ class UpsertError(Exception):
"""For the moment upsert gives an error"""

pass


class DriverError(Exception):
"""Can't find correct odbc driver"""

pass
56 changes: 21 additions & 35 deletions df_to_azure/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,8 @@
import azure.core.exceptions
import pandas as pd
from azure.storage.blob import BlobServiceClient
from numpy import dtype
from pandas import (
BooleanDtype,
CategoricalDtype,
DataFrame,
DatetimeTZDtype,
Float64Dtype,
Int8Dtype,
Int16Dtype,
Int32Dtype,
Int64Dtype,
StringDtype,
)
from pandas import CategoricalDtype, DataFrame
from pandas.api.types import is_bool_dtype, is_datetime64_any_dtype, is_float_dtype, is_integer_dtype, is_string_dtype
from sqlalchemy.sql.visitors import VisitableType
from sqlalchemy.types import BigInteger, Boolean, DateTime, Integer, Numeric, String

Expand Down Expand Up @@ -234,29 +223,26 @@ def column_types(self) -> dict:
"""
string = String(length=self.text_length)
numeric = Numeric(precision=18, scale=self.decimal_precision)
type_conversion = {
dtype("O"): string,
StringDtype(): string,
dtype("int64"): Integer(),
dtype("int32"): Integer(),
dtype("int16"): Integer(),
dtype("int8"): Integer(),
Int8Dtype(): Integer(),
Int16Dtype(): Integer(),
Int32Dtype(): Integer(),
Int64Dtype(): Integer(),
Float64Dtype(): numeric,
dtype("float64"): numeric,
dtype("float32"): numeric,
dtype("float16"): numeric,
dtype("<M8[ns]"): DateTime(),
dtype("bool"): Boolean(),
BooleanDtype(): Boolean(),
DatetimeTZDtype(tz="utc"): DateTime(),
CategoricalDtype(): string,
}

col_types = {col_name: type_conversion[col_type] for col_name, col_type in self.df.dtypes.to_dict().items()}
def convert_type(col_name, col_type):
if is_string_dtype(col_type):
return string
elif is_bool_dtype(col_type):
return Boolean()
elif is_integer_dtype(col_type):
return Integer()
elif is_float_dtype(col_type):
return numeric
elif is_datetime64_any_dtype(col_type):
return DateTime()
elif isinstance(col_type, CategoricalDtype):
return string
else:
raise ValueError(f"Column {col_name} has unknown dtype: {col_type}")

col_types = {
col_name: convert_type(col_name, col_type) for col_name, col_type in self.df.dtypes.to_dict().items()
}

return col_types

Expand Down
7 changes: 5 additions & 2 deletions df_to_azure/tests/test_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def test_mapping_column_types():
"Date": dr1,
"Timedelta": dr1 - dr2,
"Bool": [True, False, True],
"Categorical": Series(["a", "b", "c"], dtype="category"),
}
)
df_to_azure(
Expand Down Expand Up @@ -68,6 +69,7 @@ def test_mapping_column_types():
"Date",
"Timedelta",
"Bool",
"Categorical",
],
"DATA_TYPE": [
"varchar",
Expand All @@ -83,9 +85,10 @@ def test_mapping_column_types():
"datetime",
"numeric",
"bit",
"varchar",
],
"CHARACTER_MAXIMUM_LENGTH": [255, 255, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan],
"NUMERIC_PRECISION": [nan, nan, 10, 10, 10, 10, 10, 10, 18, 18, nan, 18, nan],
"CHARACTER_MAXIMUM_LENGTH": [255, 255, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, 255],
"NUMERIC_PRECISION": [nan, nan, 10, 10, 10, 10, 10, 10, 18, 18, nan, 18, nan, nan],
}
)

Expand Down
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
keyvault
pre-commit
pytest
6 changes: 3 additions & 3 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
[metadata]
name = df_to_azure
version = 0.9.0
author = Melvin Folkers, Erfan Nariman
author_email = melvin@zypp.io, erfan@zypp.io
version = 0.9.1
author = Zypp
author_email = hello@zypp.io
description = Automatically write pandas DataFrames to SQL by creating pipelines in Azure Data Factory with copy activity from blob to SQL
long_description = file: README.md
long_description_content_type = text/markdown
Expand Down

0 comments on commit 715aac3

Please sign in to comment.