diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml new file mode 100644 index 00000000..c42402be --- /dev/null +++ b/.github/workflows/pytest.yml @@ -0,0 +1,33 @@ +# This workflow will install Python dependencies and run the tests +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: Pytest + +on: + push: + branches: [ '**' ] + pull_request: + branches: [ '**' ] + +jobs: + tests: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python 3.9 + uses: actions/setup-python@v1 + with: + python-version: 3.9 + + - name: Install dependencies + run: | + pip install -r requirements.txt + pip install pytest + + - name: Install and test package functions + run: | + pip install --editable . + pytest tests diff --git a/.github/workflows/validation.yml b/.github/workflows/validation.yml index 0d1520cb..c0c6f3d7 100644 --- a/.github/workflows/validation.yml +++ b/.github/workflows/validation.yml @@ -1,7 +1,7 @@ # This workflow will install Python dependencies and validate the project # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions -name: Validate the project +name: Nomenclature on: push: @@ -10,7 +10,7 @@ on: branches: [ '**' ] jobs: - pytest: + validation: runs-on: ubuntu-latest @@ -22,7 +22,7 @@ jobs: with: python-version: 3.9 - - name: Install requirements + - name: Install dependencies run: pip install -r requirements.txt - name: Run the nomenclature project validation diff --git a/openentrance/tests/test_core.py b/tests/test_core.py similarity index 58% rename from openentrance/tests/test_core.py rename to tests/test_core.py index 233fc702..e1809ed8 100644 --- a/openentrance/tests/test_core.py +++ b/tests/test_core.py @@ -1,12 +1,12 @@ -import openentrance as oe +from openentrance import iso_mapping, nuts_hierarchy def test_iso_mapping(): # check that iso-mapping dictionary is not empty and has specific elements for name in ["GR", "GRC", "EL"]: - assert oe.iso_mapping[name] == "Greece" + assert iso_mapping[name] == "Greece" def test_nuts_hierarchy(): # check that nuts-hierarchy is not empty and has specific elements - assert oe.nuts_hierarchy["Belgium"]["BE2"]["BE24"] == ["BE241", "BE242"] + assert nuts_hierarchy["Belgium"]["BE2"]["BE24"] == ["BE241", "BE242"] diff --git a/tests/test_definitions.py b/tests/test_definitions.py new file mode 100644 index 00000000..3139da57 --- /dev/null +++ b/tests/test_definitions.py @@ -0,0 +1,54 @@ +import nomenclature + +definition = nomenclature.DataStructureDefinition("definitions") + + +def test_variables(): + # check that regions dictionary is not empty and has specific element + assert "Emissions|CO2" in definition.variable + + +def test_variables_fuel_types(): + # check that exploding of to fuels works (including CCS subcategory) + obs = definition.variable["Secondary Energy|Electricity|Gas"] + exp = ( + "Net electricity production from natural gas " + "(including methane from biomass or hydrogenation)" + ) + assert obs["description"] == exp + + obs = definition.variable["Secondary Energy|Electricity|Gas|w/ CCS"] + exp = ( + "Net electricity production from natural gas (including methane " + "from biomass or hydrogenation) with a CO2 capture component" + ) + assert obs["description"] == exp + + +def test_variables_industry_types(): + # check that exploding of to industries works + obs = definition.variable["Capital|iAGRI"] + exp = "Total capital costs spend by agriculture" + assert obs["description"] == exp + + +def test_variables_transport_types(): + # check that exploding of to transportation modes works + obs = definition.variable["Energy Service|Transportation|Freight|Rail"] + exp = ( + "Provision of energy services related to freight " + "rail-based transportation technologies" + ) + assert obs["description"] == exp + + +def test_variables_product_types(): + # check that exploding of to procuts works + obs = definition.variable["Consumption|Households|pAGRI|Imported"] + exp = "Consumption of imported agriculture by households" + assert obs["description"] == exp + + +def test_regions(): + # check that regions dictionary is not empty and has specific element + assert "Europe" in definition.region diff --git a/tests/test_validate.py b/tests/test_validate.py new file mode 100644 index 00000000..bf1571c9 --- /dev/null +++ b/tests/test_validate.py @@ -0,0 +1,90 @@ +import pandas as pd +from pyam import IamDataFrame +import pytest + +import sys + +sys.path.append("..") + +from workflow import main as workflow + + +TEST_DF = pd.DataFrame( + [ + ["model_a", "scen_a", "Europe", "Primary Energy", "EJ/yr", 1, 6.0], + ], + columns=["model", "scenario", "region", "variable", "unit", 2005, 2010], +) +df = IamDataFrame(TEST_DF) + + +def validate(df): + try: + workflow(df) + return True + except ValueError as e: + print(e) + return False + + +def test_validate(): + # test simple validation + assert validate(df) + + +def test_validate_fail(): + # test that simple validation fails on variable and region dimension + assert not (validate(df.rename(variable={"Primary Energy": "foo"}))) + assert not (validate(df.rename(region={"Europe": "foo"}))) + + +def _test_validate_directional(): + # test that validation works as expected with directional data + assert validate(df.rename(region={"Europe": "Austria>Germany"})) + assert not validate(df.rename(region={"Europe": "Austria>foo"})) + + # test that directional data with more than one `>` fails + assert not validate(df.rename(region={"Europe": "Austria>Italy>France"})) + + +def test_validate_subannual_months(): + # test that validation works as expected with months + # (and representative timeslices generally) + assert validate(IamDataFrame(TEST_DF, subannual="January")) + assert not validate(IamDataFrame(TEST_DF, subannual="foo")) + + +@pytest.mark.parametrize( + "subannual, status", + [ + ("01-01 00:00+01:00", True), + ("01-01 00:00", False), + ("01-01 00:00+02:00", False), + ("01-32 00:00+01:00", False), + ], +) +def test_validate_subannual_datetime(subannual, status): + # test that validation works as expected with continuous time as subannual + assert validate(IamDataFrame(TEST_DF, subannual=subannual)) == status + + +@pytest.mark.parametrize( + "rename_mapping, status", + [ + ({2005: "2005-06-17 00:00+01:00", 2010: "2010-06-17 00:00+01:00"}, True), + ({2005: "2005-06-17 00:00+02:00", 2010: "2010-06-17 00:00+02:00"}, False), + ({2005: "2005-06-17 00:00", 2010: "2010-06-17 00:00"}, False), + ], +) +def test_validate_time_entry(rename_mapping, status): + # test that validation works as expected with datetime-domain + _df = IamDataFrame( + IamDataFrame(TEST_DF) + .data.rename(columns={"year": "time"}) + .replace(rename_mapping) + ) + assert validate(_df) == status + + +def test_validate_unit_entry(): + assert not (validate(df.rename(unit={"EJ/yr": "MWh"}))) diff --git a/workflow.py b/workflow.py index 4e0c0027..f975e31c 100755 --- a/workflow.py +++ b/workflow.py @@ -5,22 +5,49 @@ here = Path(__file__).absolute().parent logger = logging.getLogger(__name__) +from datetime import datetime, timedelta + + +# datetime must be in Central European Time (CET) +EXP_TZ = "UTC+01:00" +EXP_TIME_OFFSET = timedelta(seconds=3600) def main(df: pyam.IamDataFrame) -> pyam.IamDataFrame: """Main function for validation and processing""" logger.info("Starting openENTRANCE timeseries-upload processing workflow...") - if "subannual" in df.dimensions: + if "subannual" in df.dimensions or df.time_col == "time": dimensions = ["region", "variable", "subannual"] else: dimensions = ["region", "variable"] definition = DataStructureDefinition(here / "definitions", dimensions=dimensions) - definition.validate(df) + definition.validate(df, dimensions=["region", "variable"]) + + # convert to subannual format if data provided in datetime format if df.time_col == "time": logger.info('Re-casting from "time" column to categorical "subannual" format') - df.swap_time_for_year(inplace=True) + df = df.swap_time_for_year(subannual=True) + + # check that any datetime-like items in "subannual" are valid datetime and UTC+01:00 + if "subannual" in df.dimensions: + _datetime = [s for s in df.subannual if s not in definition.subannual] + + for d in _datetime: + try: + _dt = datetime.strptime(f"2020-{d}", "%Y-%m-%d %H:%M%z") + except ValueError: + try: + datetime.strptime(f"2020-{d}", "%Y-%m-%d %H:%M") + except ValueError: + raise ValueError(f"Invalid subannual timeslice: {d}") + + raise ValueError(f"Missing timezone: {d}") + + # casting to datetime with timezone was successful + if not (_dt.tzname() == EXP_TZ or _dt.utcoffset() == EXP_TIME_OFFSET): + raise ValueError(f"Invalid timezone: {d}") return df