-
Notifications
You must be signed in to change notification settings - Fork 24
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Included basic csv upload in airflow pipeline so that users can use t…
…he demo setup with their own data if they don't have it stored in a DB (such as riders)
- Loading branch information
1 parent
566d3b2
commit f74d18c
Showing
2 changed files
with
74 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
import os | ||
import pandas as pd | ||
import re | ||
from sqlalchemy import create_engine | ||
import argparse | ||
|
||
def read_csv_file(file_path): | ||
print(f"Reading csv {file_path} ...") | ||
df = pd.read_csv(file_path) | ||
return df | ||
|
||
def get_postgres_engine(connection_string): | ||
print(connection_string) | ||
engine = create_engine(connection_string) | ||
return engine | ||
|
||
def get_table_name(file_name): | ||
# Strip non-alphanumeric characters from the filename | ||
table_name = re.sub(r'\W+', '_', os.path.splitext(file_name)[0]).lower() | ||
return table_name | ||
|
||
parser = argparse.ArgumentParser(description="Specify arguments") | ||
parser.add_argument( | ||
"--connection_string", | ||
action="store", | ||
required=True, | ||
help="Connection to airflow target_conn", | ||
) | ||
|
||
connection_string = parser.parse_args().connection_string | ||
|
||
engine = get_postgres_engine(connection_string) | ||
|
||
for file_name in os.listdir("./local_files"): | ||
# Check if the file is an Excel file | ||
if file_name.endswith(".csv"): | ||
print(f"Saving {file_name} to the database ...") | ||
# Create a database engine | ||
|
||
with engine.connect() as conn: | ||
# Drop the table if it already exists | ||
table_name = get_table_name(file_name) | ||
engine.execute(f'DROP TABLE IF EXISTS {table_name} CASCADE;') | ||
# Read the file and save it to the database | ||
df = read_csv_file(f"./local_files/{file_name}") | ||
#ToDo: populate schema from target_conn instead of hard coded! | ||
df.to_sql(name=table_name, con=engine, schema='public', if_exists='replace', index=False) |