Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Data Request #29

Merged
merged 27 commits into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
e88a065
root: start commit for feat/data_request
pgierz Aug 8, 2024
22729f5
added reading tables functionality for a given variable
siligam Aug 8, 2024
859be33
feat(units.py): add CMIP frequencies
pgierz Aug 9, 2024
4676aea
chore: allows max line length of 120 to keep various linters happy
pgierz Aug 9, 2024
9e56b76
feat: add data request classes as in Ruby
pgierz Aug 9, 2024
7620098
test: add Python translation of Ruby data request tests
pgierz Aug 9, 2024
4a4c24f
test: reads correct fixtures, tests still broken
pgierz Aug 9, 2024
0459652
test: most tests pass for old data request, now in Python
pgierz Aug 9, 2024
63a22ba
chore: filter out warnings for matplotlib support, as these are not r…
pgierz Aug 9, 2024
02b7689
wip: working tests on data request up until the point where a variabl…
pgierz Aug 9, 2024
ff755a5
test: translate tests for frequency into pytest
pgierz Aug 9, 2024
59c0aa1
feat: adds Python tranlation of Seamore Controlled Vocabularies
pgierz Aug 9, 2024
32051ad
fixed rules object
siligam Aug 19, 2024
7f0626a
fixed minor issues in tests
siligam Aug 19, 2024
a1ffc58
linking variable names with tables
siligam Aug 20, 2024
ea00806
wip
pgierz Sep 2, 2024
357e757
removes breakpoints
pgierz Sep 2, 2024
93bdc7d
fix: fixed merge table id test
siligam Sep 2, 2024
d3fc7af
doc: add documentation for frequency module
pgierz Sep 3, 2024
ebddf28
refactor: removes repetation of ignore files, these should only be de…
pgierz Sep 3, 2024
eaa155b
test: rearrange several paths to keep the tests more portable
pgierz Sep 3, 2024
80bd3b6
ci: submodules
pgierz Sep 3, 2024
2ce41fd
more submodule stuff
pgierz Sep 3, 2024
6fa2c53
Update test_cmorizer.py
pgierz Sep 3, 2024
05a3dc1
Update test_cmorizer.py
pgierz Sep 3, 2024
6ff75bf
map rules to drvs
pgierz Sep 4, 2024
abbb819
fixes tests for data request
pgierz Sep 9, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/CI-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ jobs:
python-version: ["3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
Expand Down
5 changes: 5 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,8 @@ style = pep440
versionfile_source = src/pymorize/_version.py
versionfile_build = pymorize/_version.py
tag_prefix = 'v'
[black]
max-line-length = 120
[flake8]
max-line-length = 120
exclude = cmip6-cmor-tables/CMIP6_CVs/src
51 changes: 48 additions & 3 deletions src/pymorize/cmorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from dask.distributed import Client
from rich.progress import track

# from . import logging_helper
from .data_request import DataRequest, DataRequestTable, IgnoreTableFiles
from .logging import logger
from .pipeline import Pipeline
from .rule import Rule
Expand All @@ -27,6 +27,49 @@ def __init__(

self._post_init_create_pipelines()
self._post_init_create_rules()
self._post_init_read_bare_tables()
self._post_init_create_data_request()
self._post_init_populate_rules_with_tables()

def _post_init_read_bare_tables(self):
"""
Loads all the tables from table directory as a mapping object.
A shortened version of the filename (i.e., ``CMIP6_Omon.json`` -> ``Omon``) is used as the mapping key.
The same key format is used in CMIP6_table_id.json
"""
table_dir = Path(self._general_cfg["CMIP_Tables_Dir"])
table_files = {
path.stem.replace("CMIP6_", ""): path for path in table_dir.glob("*.json")
}
tables = {}
ignore_files = set(ignore_file.value for ignore_file in IgnoreTableFiles)
for tbl_name, tbl_file in table_files.items():
logger.debug(f"{tbl_name}, {tbl_file}")
if tbl_file.name not in ignore_files:
logger.debug(f"Adding Table {tbl_name}")
tables[tbl_name] = DataRequestTable(tbl_file)
self._general_cfg["tables"] = self.tables = tables

def _post_init_create_data_request(self):
"""
Creates a DataRequest object from the tables directory.
"""
table_dir = self._general_cfg["CMIP_Tables_Dir"]
self.data_request = DataRequest.from_tables_dir(table_dir)

def _post_init_populate_rules_with_tables(self):
"""
Populates the rules with the tables in which the variable described by that rule is found.
"""
tables = self._general_cfg["tables"]
for rule in self.rules:
for tbl in tables.values():
if rule.cmor_variable in tbl.variable_ids:
rule.add_table(tbl)

def _post_init_data_request_variables(self):
for rule in self.rules:
rule.cmor_variable

def _post_init_create_pipelines(self):
pipelines = []
Expand All @@ -50,12 +93,14 @@ def _post_init_checks(self):
@classmethod
def from_dict(cls, data):
instance = cls(
pymorize_cfg=data.get("pymorize_cfg", {}),
general_cfg=data.get("general_cfg", {}),
pymorize_cfg=data.get("pymorize", {}),
general_cfg=data.get("general", {}),
)
for rule in data.get("rules", []):
rule_obj = Rule.from_dict(rule)
instance.add_rule(rule_obj)
instance._post_init_populate_rules_with_tables()
instance._post_init_create_data_request()
for pipeline in data.get("pipelines", []):
pipeline_obj = Pipeline.from_dict(pipeline)
instance.add_pipeline(pipeline_obj)
Expand Down
68 changes: 68 additions & 0 deletions src/pymorize/controlled_vocabularies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"""
Controlled vocabularies for CMIP6
"""

import glob
import json
import os


class ControlledVocabularies(dict):
"""Controlled vocabularies for CMIP6"""

def __init__(self, json_files):
"""Create a new ControlledVocabularies object from a list of json files

Parameters
----------
json_files : list
List of json files to load

Returns
-------
ControlledVocabularies
A new ControlledVocabularies object, behaves like a dictionary.
"""
super().__init__()
for f in json_files:
d = self.dict_from_json_file(f)
self.update(d)

@classmethod
def new_from_dir(cls, cmip6_cvs_dir):
"""Create a new ControlledVocabularies object from a directory of json files

Parameters
----------
cmip6_cvs_dir : str
Path to the directory containing the json files
"""
json_files = glob.glob(os.path.join(cmip6_cvs_dir, "*.json"))
return cls(json_files)

def print_experiment_ids(self):
"""Print experiment ids with start and end years and parent experiment ids"""
for k, v in self["experiment_id"].items():
print(
f"{k} {v['start_year']}-{v['end_year']} parent:{', '.join(v['parent_experiment_id'])}"
)

@staticmethod
def dict_from_json_file(path):
"""Load a json file into a dictionary object

Parameters
----------
path : str
Path to the json file to load

Raises
------
ValueError
If the file cannot be loaded
"""
try:
with open(path, "r") as file:
return json.load(file)
except json.JSONDecodeError as e:
raise ValueError(f"file {path}: {e.msg}")
Loading
Loading