diff --git a/.github/workflows/cicd.yaml b/.github/workflows/cicd.yaml index 1748af7..20cb437 100644 --- a/.github/workflows/cicd.yaml +++ b/.github/workflows/cicd.yaml @@ -11,7 +11,6 @@ on: paths: - 'justfile' - 'website/**' - - '_quarto.yml' - 'dev-requirements.txt' - '.github/workflows/cicd.yaml' diff --git a/justfile b/justfile index 5c6eb44..4110346 100644 --- a/justfile +++ b/justfile @@ -22,8 +22,7 @@ build: # setup setup: - @pip install uv - @uv venv + @uv venv --python 3.12 @. .venv/bin/activate @uv pip install --upgrade --resolution=highest -r dev-requirements.txt diff --git a/src/icarus/cli.py b/src/icarus/cli.py index 8cd5236..001f8c5 100644 --- a/src/icarus/cli.py +++ b/src/icarus/cli.py @@ -24,7 +24,7 @@ } # typer apps -app = typer.Typer(help="Icarus", **TYPER_KWARGS) +app = typer.Typer(help="Icarus: soaring beyond limits.", **TYPER_KWARGS) clean_app = typer.Typer(help="Clean the data lake.", **TYPER_KWARGS) # add subcommands diff --git a/src/icarus/investments/extract.py b/src/icarus/investments/extract.py index beb102e..165603e 100644 --- a/src/icarus/investments/extract.py +++ b/src/icarus/investments/extract.py @@ -11,6 +11,8 @@ ) # set extracted_at timestamp +# note we don't use ibis.now() to ensure it's the same... +# ...for all tables/rows on a given run extracted_at = datetime.utcnow().isoformat() diff --git a/src/icarus/investments/run.py b/src/icarus/investments/run.py index c0c4a55..cf275ac 100644 --- a/src/icarus/investments/run.py +++ b/src/icarus/investments/run.py @@ -20,10 +20,26 @@ def main(): extract_buy_sell_t = extract_buy_sell() extract_social_media_t = extract_social_media() + # data validation + assert ( + extract_buy_sell_t.count().to_pyarrow().as_py() > 0 + ), "No extracted buy/sell data" + assert ( + extract_social_media_t.count().to_pyarrow().as_py() > 0 + ), "No extracted social media data" + # transform transform_buy_sell_t = transform_buy_sell(extract_buy_sell_t) transform_social_media_t = transform_social_media(extract_social_media_t) - # load? + # data validation + assert ( + transform_buy_sell_t.count().to_pyarrow().as_py() > 0 + ), "No transformed buy/sell data" + assert ( + transform_social_media_t.count().to_pyarrow().as_py() > 0 + ), "No transformed social media data" + + # load catalog.write_table(transform_buy_sell_t, BUY_SELL_TABLE) catalog.write_table(transform_social_media_t, SOCIAL_MEDIA_TABLE)