Skip to content

Commit

Permalink
Merge pull request #124 from zypp-io/development
Browse files Browse the repository at this point in the history
Release to main
  • Loading branch information
erfannariman authored Mar 24, 2023
2 parents 8bae1b0 + 2e6b6cc commit 3523ee6
Show file tree
Hide file tree
Showing 17 changed files with 81 additions and 56 deletions.
13 changes: 3 additions & 10 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,10 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 pytest black
- name: Lint with flake8
pip install -r requirements-dev.txt
- name: Run pre-commit
run: |
# exit-zero treats all errors as warnings.
flake8 df_to_azure --count --ignore=E722 --max-complexity=10 --max-line-length=120 --statistics --per-file-ignores="__init__.py:F401"
# - name: Test with pytest
# run: |
# pytest df_to_azure
- name: Python Black Check
run: |
black --line-length=120 --check df_to_azure
pre-commit run --all-files
- name: assert equality between setup.cfg and requirements.txt
uses: actions/checkout@v2
- name: setup python
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -143,4 +143,4 @@ notebooks/
settings.yml

# mac
.DS_Store
.DS_Store
21 changes: 18 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,22 +1,37 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.2.0
rev: v4.3.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-added-large-files
- id: check-merge-conflict
- id: debug-statements
- id: detect-private-key
- id: name-tests-test
args: [--pytest-test-first]
- id: requirements-txt-fixer
- repo: https://github.com/pycqa/flake8
rev: 3.9.2
rev: 5.0.4
hooks:
- id: flake8
args: ["--statistics", "--count", "--max-complexity=10", "--max-line-length=120", "--per-file-ignore=__init__.py: F401"]
- repo: https://github.com/psf/black
rev: 22.3.0
hooks:
- id: black
args: [--line-length=120]
- repo: https://github.com/PyCQA/isort
rev: 5.9.1
rev: 5.12.0
hooks:
- id: isort
args: ["--profile", "black", --line-length=120]
- repo: local
hooks:
- id: check-requirements
name: Check requirements
description: Check if requirements in setup.cfg and requirements.txt are equal
language: python
entry: python scripts/check_setupcfg_and_requirements_equal.py
pass_filenames: false
2 changes: 1 addition & 1 deletion data/employee_1.csv
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ employee_id,week_nr,hours
333,30,10
444,15,2
444,16,4
444,20,10
444,20,10
2 changes: 1 addition & 1 deletion data/employee_2.csv
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ employee_id,week_nr,hours
333,30,10
444,15,99
444,16,4
444,20,99
444,20,99
2 changes: 1 addition & 1 deletion data/employee_duplicate_keys_1.csv
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ employee_id,week_nr,hours
333,30,10
444,15,2
444,15,4
444,20,10
444,20,10
2 changes: 1 addition & 1 deletion data/employee_duplicate_keys_2.csv
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ employee_id,week_nr,hours
333,30,10
444,15,99
444,15,4
444,20,99
444,20,99
2 changes: 1 addition & 1 deletion data/sample_1.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
col_a,col_b,col_c
1,test,X
3,test,Z
4,test,A
4,test,A
2 changes: 1 addition & 1 deletion data/sample_2.csv
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ col_a,col_b,col_c
1,updated value,E
3,test,Z
5,new value,F
6,also new,H
6,also new,H
2 changes: 1 addition & 1 deletion df_to_azure/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from .export import df_to_azure

__version__ = "0.8.0"
__version__ = "0.9.0"

logging.basicConfig(
format="%(asctime)s.%(msecs)03d [%(levelname)-5s] [%(name)s] - %(message)s",
Expand Down
4 changes: 2 additions & 2 deletions df_to_azure/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,14 @@ def create_stored_procedure(self):
)


def auth_azure():
def auth_azure(driver: str = "ODBC Driver 17 for SQL Server"):

connection_string = "mssql+pyodbc://{}:{}@{}:1433/{}?driver={}".format(
os.environ.get("SQL_USER"),
quote_plus(os.environ.get("SQL_PW")),
os.environ.get("SQL_SERVER"),
os.environ.get("SQL_DB"),
"ODBC Driver 17 for SQL Server",
driver,
)
con = create_engine(connection_string).connect()

Expand Down
28 changes: 21 additions & 7 deletions df_to_azure/export.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import logging
import os
import sys
from datetime import datetime
from io import BytesIO
from typing import Union
Expand All @@ -9,7 +8,18 @@
import pandas as pd
from azure.storage.blob import BlobServiceClient
from numpy import dtype
from pandas import BooleanDtype, DataFrame, Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype, StringDtype
from pandas import (
BooleanDtype,
CategoricalDtype,
DataFrame,
DatetimeTZDtype,
Float64Dtype,
Int8Dtype,
Int16Dtype,
Int32Dtype,
Int64Dtype,
StringDtype,
)
from sqlalchemy.sql.visitors import VisitableType
from sqlalchemy.types import BigInteger, Boolean, DateTime, Integer, Numeric, String

Expand Down Expand Up @@ -97,6 +107,11 @@ def __init__(
self.clean_staging = clean_staging

def run(self):

if self.df.empty:
logging.info("Data empty, no new records to upload.")
return None, None

if self.create:

# azure components
Expand Down Expand Up @@ -134,10 +149,6 @@ def _checks(self):

def upload_dataset(self):

if self.df.empty:
logging.info("Data empty, no new records to upload.")
sys.exit(1)

if self.method == "create":
self.create_schema()
self.push_to_azure()
Expand Down Expand Up @@ -186,7 +197,7 @@ def upload_to_blob(self):
blob=f"{self.table_name}/{self.table_name}.csv",
)

data = self.df.to_csv(index=False, sep="^", quotechar='"', line_terminator="\n")
data = self.df.to_csv(index=False, sep="^", quotechar='"', lineterminator="\n")
blob_client.upload_blob(data, overwrite=True)

def create_schema(self):
Expand Down Expand Up @@ -234,12 +245,15 @@ def column_types(self) -> dict:
Int16Dtype(): Integer(),
Int32Dtype(): Integer(),
Int64Dtype(): Integer(),
Float64Dtype(): numeric,
dtype("float64"): numeric,
dtype("float32"): numeric,
dtype("float16"): numeric,
dtype("<M8[ns]"): DateTime(),
dtype("bool"): Boolean(),
BooleanDtype(): Boolean(),
DatetimeTZDtype(tz="utc"): DateTime(),
CategoricalDtype(): string,
}

col_types = {col_name: type_conversion[col_type] for col_name, col_type in self.df.dtypes.to_dict().items()}
Expand Down
1 change: 1 addition & 0 deletions df_to_azure/settings.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Union

from pandas import DataFrame

from df_to_azure.utils import test_unique_column_names


Expand Down
17 changes: 9 additions & 8 deletions df_to_azure/tests/test_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,14 +161,15 @@ def test_pipeline_name():
def test_empty_dataframe():
df = DataFrame()

with pytest.raises(SystemExit):
df_to_azure(
df=df,
tablename="empty_dataframe",
schema="test",
method="create",
wait_till_finished=True,
)
adf_client, run_response = df_to_azure(
df=df,
tablename="empty_dataframe",
schema="test",
method="create",
wait_till_finished=True,
)
assert adf_client is None
assert run_response is None


def test_convert_bigint():
Expand Down
2 changes: 1 addition & 1 deletion df_to_azure/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
import time

from df_to_azure.exceptions import PipelineRunError, DoubleColumnNamesError
from df_to_azure.exceptions import DoubleColumnNamesError, PipelineRunError


def print_item(group):
Expand Down
16 changes: 8 additions & 8 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
azure-identity~=1.7.1
azure-mgmt-datafactory~=2.2.0
azure-mgmt-resource~=20.1.0
azure-storage-blob~=12.8.1
pandas~=1.4.1
pyarrow~=7.0.0
pyodbc~=4.0.32
sqlalchemy~=1.4.31
azure-identity>=1.7.1
azure-mgmt-datafactory>=2.2.0,<2.7.0
azure-mgmt-resource>=20.1.0
azure-storage-blob>=12.8.1
pandas>=1.5.0
pyarrow>=7.0.0
pyodbc>=4.0.32
sqlalchemy>=1.4.31,<2.0.0
19 changes: 10 additions & 9 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = df_to_azure
version = 0.8.0
version = 0.9.0
author = Melvin Folkers, Erfan Nariman
author_email = melvin@zypp.io, erfan@zypp.io
description = Automatically write pandas DataFrames to SQL by creating pipelines in Azure Data Factory with copy activity from blob to SQL
Expand All @@ -20,14 +20,15 @@ classifiers =
packages = df_to_azure
python_requires = >=3.7
install_requires =
azure-identity~=1.7.1
azure-mgmt-datafactory~=2.2.0
azure-mgmt-resource~=20.1.0
azure-storage-blob~=12.8.1
pandas~=1.4.1
pyarrow~=7.0.0
pyodbc~=4.0.32
sqlalchemy~=1.4.31
azure-identity>=1.7.1
azure-mgmt-datafactory>=2.2.0,<2.7.0
azure-mgmt-resource>=20.1.0
azure-storage-blob>=12.8.1
pandas>=1.5.0
pyarrow>=7.0.0
pyodbc>=4.0.32
sqlalchemy>=1.4.31,<2.0.0



[flake8]
Expand Down

0 comments on commit 3523ee6

Please sign in to comment.