Skip to content

Commit

Permalink
Merge pull request #1 from Jake-Pullen/feature/better_error_handling
Browse files Browse the repository at this point in the history
Feature/better error handling
  • Loading branch information
Jake-Pullen authored Aug 10, 2024
2 parents fa6332e + 19b633e commit b8681f0
Show file tree
Hide file tree
Showing 11 changed files with 550 additions and 237 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ data/*
.venv/*
__pycache__/*
*/__pycache__/*
*.pbix
*.pbix
/logs/*
Empty file added config/__init__.py
Empty file.
4 changes: 3 additions & 1 deletion config.yaml → config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,6 @@ primary_keys:
raw_data_path: data/raw
processed_data_path: data/processed
base_data_path: data/base
warehouse_data_path: data/warehouse
warehouse_data_path: data/warehouse
REQUESTS_MAX_RETRIES: 3
REQUESTS_RETRY_DELAY: 5
41 changes: 41 additions & 0 deletions config/custom_json_logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import datetime as dt
import json
import logging
from typing import override

class custom_json_logger(logging.Formatter):
def __init__(
self,
*,
format_keys: dict[str,str] | None = None,
):
super().__init__()
self.format_keys = format_keys if format_keys is not None else {}

@override
def format(self, record: logging.LogRecord) -> str:
record_dict = self._prepare_log_dict(record)
return json.dumps(record_dict, default=str)

def _prepare_log_dict(self, record: logging.LogRecord) -> dict:
always_fields = {
"message" : record.getMessage(),
"timestamp" : dt.datetime.fromtimestamp(
record.created, tz=dt.timezone.utc
).isoformat(),
}
if record.exc_info is not None:
always_fields["exc_info"] = self.formatException(record.exc_info)

if record.stack_info is not None:
always_fields["stack_info"] = self.formatStack(record.stack_info)

message = {
key: msg_val
if (msg_val := always_fields.pop(val, None)) is not None
else getattr(record, val)
for key, val in self.format_keys.items()
}
message.update(always_fields)
return message

12 changes: 12 additions & 0 deletions config/exit_codes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
SUCCESS = 0
MISSING_ENV_VARS = 1
MISSING_CONFIG_FILE = 2
CORRUPTED_CONFIG_FILE = 3
UNAUTHORIZED_API_TOKEN = 4
REQUESTS_ERROR = 5
BAD_REQUEST = 6
FORBIDDEN = 7
NOT_FOUND = 8
CONFLICT = 9
MOVE_FILE_ERROR = 10
DUPLICATE_RESOLUTION_ERROR = 11
41 changes: 41 additions & 0 deletions config/logging_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
version: 1
disable_existing_loggers: False
formatters:
simple:
format: "%(asctime)s - %(levelname)s - %(module)s - %(funcName)s - %(message)s"
datefmt: "%Y-%m-%d %H:%M:%S%z"
json:
"()": config.custom_json_logger.custom_json_logger
format_keys:
level: levelname
timestamp: timestamp
logger: name
module: module
function: funcName
line: lineno
message: message
thread_name: threadName
handlers:
stderr:
class: logging.StreamHandler
level: INFO
formatter: simple
stream: ext://sys.stdout
file:
class: logging.handlers.RotatingFileHandler
level: DEBUG
formatter: json
filename: logs/dpfy_log.jsonl
maxBytes: 10485760 # 10MB
backupCount: 10
queue_handler:
class: logging.handlers.QueueHandler
handlers:
- stderr
- file
respect_handler_level: True
loggers:
root:
level: DEBUG
handlers:
- queue_handler
62 changes: 56 additions & 6 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,61 @@
import dotenv
import logging
import yaml
import sys
import atexit
import logging.config
import logging.handlers

import config.exit_codes as ec
from pipeline.ingest import Ingest
from pipeline.raw_to_base import RawToBase
from pipeline.dimensions import DimAccounts, DimCategories, DimPayees, DimDate
from pipeline.facts import FactTransactions, FactScheduledTransactions

def set_up_logging():
try:
with open('config/logging_config.yaml', 'r') as f:
log_config = yaml.safe_load(f)
logging.config.dictConfig(log_config)
except yaml.YAMLError as e:
print(f"Error parsing logging configuration file: {e}")
log_config = {} # Initialize log_config to an empty dictionary
logging.basicConfig(level=logging.INFO) # Fallback to a basic configuration
queue_handler = logging.getHandlerByName('queue_handler')
if queue_handler is not None:
queue_handler.listener.start()
atexit.register(queue_handler.listener.stop)

logger = logging.getLogger("data_pipeline_for_ynab")
os.makedirs('logs', exist_ok=True)
set_up_logging()

# Load environment variables
dotenv.load_dotenv()

API_TOKEN = os.getenv('API_TOKEN')
BUDGET_ID = os.getenv('BUDGET_ID')
logging.basicConfig(level=logging.DEBUG)

with open('config.yaml', 'r') as file:
config = yaml.safe_load(file)
def main():
if not API_TOKEN or not BUDGET_ID:
logging.error('API_TOKEN or BUDGET_ID is not set in .env file')
sys.exit(ec.MISSING_ENV_VARS)

config['API_TOKEN'] = API_TOKEN
config['BUDGET_ID'] = BUDGET_ID
try:
with open('config/config.yaml', 'r') as file:
config = yaml.safe_load(file)
except FileNotFoundError:
logging.error('config.yaml file not found')
sys.exit(ec.MISSING_CONFIG_FILE)
except yaml.YAMLError as e:
logging.error(f'Error loading config.yaml: {e}')
sys.exit(ec.CORRUPTED_CONFIG_FILE)

config['API_TOKEN'] = API_TOKEN
config['BUDGET_ID'] = BUDGET_ID

logging.info('Starting data pipeline')

if __name__ == '__main__':
Ingest(config)
RawToBase(config)
DimAccounts(config)
Expand All @@ -29,3 +65,17 @@
DimDate(config)
FactTransactions(config)
FactScheduledTransactions(config)

logging.info('Data pipeline completed successfully')
sys.exit(ec.SUCCESS)

if __name__ == '__main__':
try:
main()
except SystemExit as e:
exit_code = e.code
if exit_code == ec.SUCCESS:
logging.info('Program exited successfully')
else:
logging.error(f'Program exited with code {exit_code}')
raise
Loading

0 comments on commit b8681f0

Please sign in to comment.