diff --git a/CHANGELOG.md b/CHANGELOG.md index ba425bcfcb..a4108df460 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +### 43.2.8 [#1285](https://github.com/openfisca/openfisca-core/pull/1285) + +#### Documentation + +- Add some types to `holders` (1 of 3) + ### 43.2.7 [#1300](https://github.com/openfisca/openfisca-core/pull/1300) #### Technical changes diff --git a/openfisca_core/data_storage/__init__.py b/openfisca_core/data_storage/__init__.py index 4dbbb89543..2c0bc30df9 100644 --- a/openfisca_core/data_storage/__init__.py +++ b/openfisca_core/data_storage/__init__.py @@ -1,7 +1,6 @@ """Different storage backends for the data of a simulation.""" -from . import types from .in_memory_storage import InMemoryStorage from .on_disk_storage import OnDiskStorage -__all__ = ["InMemoryStorage", "OnDiskStorage", "types"] +__all__ = ["InMemoryStorage", "OnDiskStorage"] diff --git a/openfisca_core/data_storage/in_memory_storage.py b/openfisca_core/data_storage/in_memory_storage.py index d4d5240c92..750794a867 100644 --- a/openfisca_core/data_storage/in_memory_storage.py +++ b/openfisca_core/data_storage/in_memory_storage.py @@ -1,16 +1,17 @@ from __future__ import annotations from collections.abc import KeysView, MutableMapping +from typing import Generic, TypeVar import numpy -from openfisca_core import periods -from openfisca_core.periods import DateUnit +from openfisca_core import periods, types as t -from . import types as t +#: Type var for numpy arrays (invariant). +_N = TypeVar("_N", bound=t.VarDType) -class InMemoryStorage: +class InMemoryStorage(Generic[_N]): """Storing and retrieving calculated vectors in memory. Args: @@ -22,13 +23,13 @@ class InMemoryStorage: is_eternal: bool #: A dictionary containing data that has been stored in memory. - _arrays: MutableMapping[t.Period, t.Array[t.DTypeGeneric]] + _arrays: MutableMapping[t.Period, t.Array[_N]] def __init__(self, is_eternal: bool = False) -> None: self._arrays = {} self.is_eternal = is_eternal - def get(self, period: None | t.Period = None) -> None | t.Array[t.DTypeGeneric]: + def get(self, period: None | t.Period = None) -> None | t.Array[_N]: """Retrieve the data for the specified :obj:`.Period` from memory. Args: @@ -56,7 +57,7 @@ def get(self, period: None | t.Period = None) -> None | t.Array[t.DTypeGeneric]: """ if self.is_eternal: - period = periods.period(DateUnit.ETERNITY) + period = periods.Period.eternity() period = periods.period(period) values = self._arrays.get(period) @@ -64,7 +65,7 @@ def get(self, period: None | t.Period = None) -> None | t.Array[t.DTypeGeneric]: return None return values - def put(self, value: t.Array[t.DTypeGeneric], period: None | t.Period) -> None: + def put(self, value: t.Array[_N], period: None | t.Period) -> None: """Store the specified data in memory for the specified :obj:`.Period`. Args: @@ -88,7 +89,7 @@ def put(self, value: t.Array[t.DTypeGeneric], period: None | t.Period) -> None: """ if self.is_eternal: - period = periods.period(DateUnit.ETERNITY) + period = periods.Period.eternity() period = periods.period(period) self._arrays[period] = value @@ -133,7 +134,7 @@ def delete(self, period: None | t.Period = None) -> None: return if self.is_eternal: - period = periods.period(DateUnit.ETERNITY) + period = periods.Period.eternity() period = periods.period(period) self._arrays = { diff --git a/openfisca_core/data_storage/on_disk_storage.py b/openfisca_core/data_storage/on_disk_storage.py index 22cb54a413..3fb59b4c74 100644 --- a/openfisca_core/data_storage/on_disk_storage.py +++ b/openfisca_core/data_storage/on_disk_storage.py @@ -1,20 +1,20 @@ from __future__ import annotations from collections.abc import KeysView, MutableMapping +from typing import Generic, TypeVar import os import shutil import numpy -from openfisca_core import periods -from openfisca_core.indexed_enums import EnumArray -from openfisca_core.periods import DateUnit +from openfisca_core import indexed_enums as enum, periods, types as t -from . import types as t +#: Type var for numpy arrays (invariant). +_N = TypeVar("_N", bound=t.VarDType) -class OnDiskStorage: +class OnDiskStorage(Generic[_N]): """Storing and retrieving calculated vectors on disk. Args: @@ -44,7 +44,7 @@ def __init__( storage_dir: str, is_eternal: bool = False, preserve_storage_dir: bool = False, - enums: MutableMapping[str, type[t.Enum]] | None = None, + enums: None | MutableMapping[str, type[t.Enum]] = None, ) -> None: self._files = {} self._enums = {} if enums is None else enums @@ -52,55 +52,16 @@ def __init__( self.preserve_storage_dir = preserve_storage_dir self.storage_dir = storage_dir - def _decode_file(self, file: str) -> t.Array[t.DTypeGeneric]: - """Decode a file by loading its contents as a :mod:`numpy` array. - - Args: - file: Path to the file to be decoded. - - Returns: - EnumArray: Representing the data in the file. - ndarray[generic]: Representing the data in the file. - - Note: - If the file is associated with :class:`~indexed_enums.Enum` values, the - array is converted back to an :obj:`~indexed_enums.EnumArray` object. - - Examples: - >>> import tempfile - - >>> import numpy - - >>> from openfisca_core import data_storage, indexed_enums, periods - - >>> class Housing(indexed_enums.Enum): - ... OWNER = "Owner" - ... TENANT = "Tenant" - ... FREE_LODGER = "Free lodger" - ... HOMELESS = "Homeless" - - >>> array = numpy.array([1]) - >>> value = indexed_enums.EnumArray(array, Housing) - >>> instant = periods.Instant((2017, 1, 1)) - >>> period = periods.Period(("year", instant, 1)) - - >>> with tempfile.TemporaryDirectory() as directory: - ... storage = data_storage.OnDiskStorage(directory) - ... storage.put(value, period) - ... storage._decode_file(storage._files[period]) - EnumArray([Housing.TENANT]) - - """ - enum = self._enums.get(self.storage_dir) - - if enum is not None: - return EnumArray(numpy.load(file), enum) - - array: t.Array[t.DTypeGeneric] = numpy.load(file) - - return array + def __del__(self) -> None: + if self.preserve_storage_dir: + return + shutil.rmtree(self.storage_dir) # Remove the holder temporary files + # If the simulation temporary directory is empty, remove it + parent_dir = os.path.abspath(os.path.join(self.storage_dir, os.pardir)) + if not os.listdir(parent_dir): + shutil.rmtree(parent_dir) - def get(self, period: None | t.Period = None) -> None | t.Array[t.DTypeGeneric]: + def get(self, period: None | t.Period = None) -> None | t.Array[_N]: """Retrieve the data for the specified period from disk. Args: @@ -130,7 +91,7 @@ def get(self, period: None | t.Period = None) -> None | t.Array[t.DTypeGeneric]: """ if self.is_eternal: - period = periods.period(DateUnit.ETERNITY) + period = periods.Period.eternity() period = periods.period(period) values = self._files.get(period) @@ -138,7 +99,7 @@ def get(self, period: None | t.Period = None) -> None | t.Array[t.DTypeGeneric]: return None return self._decode_file(values) - def put(self, value: t.Array[t.DTypeGeneric], period: None | t.Period) -> None: + def put(self, value: t.Array[_N], period: None | t.Period) -> None: """Store the specified data on disk for the specified period. Args: @@ -164,12 +125,12 @@ def put(self, value: t.Array[t.DTypeGeneric], period: None | t.Period) -> None: """ if self.is_eternal: - period = periods.period(DateUnit.ETERNITY) + period = periods.Period.eternity() period = periods.period(period) filename = str(period) path = os.path.join(self.storage_dir, filename) + ".npy" - if isinstance(value, EnumArray) and value.possible_values is not None: + if isinstance(value, enum.EnumArray) and value.possible_values is not None: self._enums[self.storage_dir] = value.possible_values value = value.view(numpy.ndarray) numpy.save(path, value) @@ -217,7 +178,7 @@ def delete(self, period: None | t.Period = None) -> None: return if self.is_eternal: - period = periods.period(DateUnit.ETERNITY) + period = periods.Period.eternity() period = periods.period(period) self._files = { @@ -297,14 +258,53 @@ def restore(self) -> None: period = periods.period(filename_core) files[period] = path - def __del__(self) -> None: - if self.preserve_storage_dir: - return - shutil.rmtree(self.storage_dir) # Remove the holder temporary files - # If the simulation temporary directory is empty, remove it - parent_dir = os.path.abspath(os.path.join(self.storage_dir, os.pardir)) - if not os.listdir(parent_dir): - shutil.rmtree(parent_dir) + def _decode_file(self, file: str) -> t.Array[_N]: + """Decode a file by loading its contents as a :mod:`numpy` array. + + Args: + file: Path to the file to be decoded. + + Returns: + EnumArray: Representing the data in the file. + ndarray[generic]: Representing the data in the file. + + Note: + If the file is associated with :class:`~indexed_enums.Enum` values, the + array is converted back to an :obj:`~indexed_enums.EnumArray` object. + + Examples: + >>> import tempfile + + >>> import numpy + + >>> from openfisca_core import data_storage, indexed_enums, periods + + >>> class Housing(indexed_enums.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + ... FREE_LODGER = "Free lodger" + ... HOMELESS = "Homeless" + + >>> array = numpy.array([1]) + >>> value = indexed_enums.EnumArray(array, Housing) + >>> instant = periods.Instant((2017, 1, 1)) + >>> period = periods.Period(("year", instant, 1)) + + >>> with tempfile.TemporaryDirectory() as directory: + ... storage = data_storage.OnDiskStorage(directory) + ... storage.put(value, period) + ... storage._decode_file(storage._files[period]) + EnumArray([Housing.TENANT]) + + """ + enum_class = self._enums.get(self.storage_dir) + + if enum_class is not None: + return enum.EnumArray(numpy.load(file), enum_class) + + array: t.Array[_N] = numpy.load(file) + + return array __all__ = ["OnDiskStorage"] diff --git a/openfisca_core/data_storage/py.typed b/openfisca_core/data_storage/py.typed new file mode 100644 index 0000000000..e69de29bb2 diff --git a/openfisca_core/data_storage/types.py b/openfisca_core/data_storage/types.py deleted file mode 100644 index db71abbf57..0000000000 --- a/openfisca_core/data_storage/types.py +++ /dev/null @@ -1,14 +0,0 @@ -from typing_extensions import TypedDict - -from openfisca_core.types import Array, DTypeGeneric, Enum, Period - - -class MemoryUsage(TypedDict, total=True): - """Memory usage information.""" - - cell_size: float - nb_arrays: int - total_nb_bytes: int - - -__all__ = ["Array", "DTypeGeneric", "Enum", "Period"] diff --git a/openfisca_core/entities/__init__.py b/openfisca_core/entities/__init__.py index 1811e3fe94..2b9aaab391 100644 --- a/openfisca_core/entities/__init__.py +++ b/openfisca_core/entities/__init__.py @@ -1,6 +1,5 @@ """Provide a way of representing the entities of a rule system.""" -from . import types from ._core_entity import CoreEntity from .entity import Entity from .group_entity import GroupEntity @@ -19,5 +18,4 @@ "build_entity", "check_role_validity", "find_role", - "types", ] diff --git a/openfisca_core/entities/_core_entity.py b/openfisca_core/entities/_core_entity.py index 33002e9af5..75b695c623 100644 --- a/openfisca_core/entities/_core_entity.py +++ b/openfisca_core/entities/_core_entity.py @@ -5,7 +5,8 @@ import abc import os -from . import types as t +from openfisca_core import types as t + from .role import Role @@ -17,8 +18,7 @@ class CoreEntity: **__kwargs: Any keyword arguments. Examples: - >>> from openfisca_core import entities - >>> from openfisca_core.entities import types as t + >>> from openfisca_core import entities, types as t >>> class Entity(entities.CoreEntity): ... def __init__(self, key): @@ -61,7 +61,7 @@ def get_variable( self, variable_name: t.VariableName, check_existence: bool = False, - ) -> t.Variable | None: + ) -> None | t.Variable[t.VarDType]: """Get ``variable_name`` from ``variables``. Args: @@ -168,7 +168,7 @@ def check_variable_defined_for_entity(self, variable_name: t.VariableName) -> No """ entity: None | t.CoreEntity = None - variable: None | t.Variable = self.get_variable( + variable: None | t.Variable[t.VarDType] = self.get_variable( variable_name, check_existence=True, ) diff --git a/openfisca_core/entities/entity.py b/openfisca_core/entities/entity.py index 673aae48b7..7101fab630 100644 --- a/openfisca_core/entities/entity.py +++ b/openfisca_core/entities/entity.py @@ -2,7 +2,8 @@ import textwrap -from . import types as t +from openfisca_core import types as t + from ._core_entity import CoreEntity diff --git a/openfisca_core/entities/group_entity.py b/openfisca_core/entities/group_entity.py index 796da105ee..8f6421158b 100644 --- a/openfisca_core/entities/group_entity.py +++ b/openfisca_core/entities/group_entity.py @@ -6,7 +6,8 @@ import textwrap from itertools import chain -from . import types as t +from openfisca_core import types as t + from ._core_entity import CoreEntity from .role import Role @@ -84,7 +85,7 @@ class GroupEntity(CoreEntity): doc: str #: The list of roles of the ``GroupEntity``. - roles: Iterable[Role] + roles: Iterable[t.Role] #: Whether the entity is a person or not. is_person: ClassVar[bool] = False @@ -103,7 +104,7 @@ def __init__( self.label = label self.doc = textwrap.dedent(doc) self.roles_description = roles - self.roles: Iterable[Role] = () + self.roles: Iterable[t.Role] = () for role_description in roles: role = Role(role_description, self) setattr(self, role.key.upper(), role) diff --git a/openfisca_core/entities/helpers.py b/openfisca_core/entities/helpers.py index 1dcdad88a3..79d69f173e 100644 --- a/openfisca_core/entities/helpers.py +++ b/openfisca_core/entities/helpers.py @@ -2,7 +2,8 @@ from collections.abc import Iterable, Sequence -from . import types as t +from openfisca_core import types as t + from .entity import Entity as SingleEntity from .group_entity import GroupEntity @@ -15,7 +16,6 @@ def build_entity( roles: None | Sequence[t.RoleParams] = None, is_person: bool = False, *, - class_override: object = None, containing_entities: Sequence[str] = (), ) -> t.SingleEntity | t.GroupEntity: """Build an ``Entity`` or a ``GroupEntity``. @@ -27,7 +27,6 @@ def build_entity( doc: A full description. roles: A list of roles —if it's a ``GroupEntity``. is_person: If is an individual, or not. - class_override: ? containing_entities: Keys of contained entities. Returns: @@ -104,8 +103,7 @@ def find_role( None: Else ``None``. Examples: - >>> from openfisca_core import entities - >>> from openfisca_core.entities import types as t + >>> from openfisca_core import entities, types as t >>> principal = t.RoleParams( ... key="principal", diff --git a/openfisca_core/entities/role.py b/openfisca_core/entities/role.py index 39bd5090ed..4f706ad9fa 100644 --- a/openfisca_core/entities/role.py +++ b/openfisca_core/entities/role.py @@ -2,7 +2,8 @@ from collections.abc import Iterable -from . import types as t +from openfisca_core import types as t + from ._description import _Description @@ -51,7 +52,7 @@ class Role: max: None | int = None #: A list of subroles. - subroles: None | Iterable[Role] = None + subroles: None | Iterable[t.Role] = None @property def key(self) -> t.RoleKey: diff --git a/openfisca_core/entities/types.py b/openfisca_core/entities/types.py deleted file mode 100644 index ef6af9024f..0000000000 --- a/openfisca_core/entities/types.py +++ /dev/null @@ -1,42 +0,0 @@ -from typing_extensions import Required, TypedDict - -from openfisca_core.types import ( - CoreEntity, - EntityKey, - EntityPlural, - GroupEntity, - Role, - RoleKey, - RolePlural, - SingleEntity, - TaxBenefitSystem, - Variable, - VariableName, -) - -# Entities - - -class RoleParams(TypedDict, total=False): - key: Required[str] - plural: str - label: str - doc: str - max: int - subroles: list[str] - - -__all__ = [ - "CoreEntity", - "EntityKey", - "EntityPlural", - "GroupEntity", - "Role", - "RoleKey", - "RoleParams", - "RolePlural", - "SingleEntity", - "TaxBenefitSystem", - "Variable", - "VariableName", -] diff --git a/openfisca_core/experimental/_memory_config.py b/openfisca_core/experimental/_memory_config.py index 6fba790e90..3fdde41050 100644 --- a/openfisca_core/experimental/_memory_config.py +++ b/openfisca_core/experimental/_memory_config.py @@ -13,6 +13,9 @@ class MemoryConfig: #: Maximum memory occupation allowed. max_memory_occupation: float + #: Maximum memory occupation allowed in percentage. + max_memory_occupation_pc: float + #: Priority variables. priority_variables: frozenset[str] diff --git a/openfisca_core/experimental/py.typed b/openfisca_core/experimental/py.typed new file mode 100644 index 0000000000..e69de29bb2 diff --git a/openfisca_core/holders/__init__.py b/openfisca_core/holders/__init__.py index a120a671b9..a1b69b27c1 100644 --- a/openfisca_core/holders/__init__.py +++ b/openfisca_core/holders/__init__.py @@ -21,7 +21,6 @@ # # See: https://www.python.org/dev/peps/pep-0008/#imports -from . import types from .helpers import set_input_dispatch_by_period, set_input_divide_by_period from .holder import Holder @@ -29,5 +28,4 @@ "Holder", "set_input_dispatch_by_period", "set_input_divide_by_period", - "types", ] diff --git a/openfisca_core/holders/holder.py b/openfisca_core/holders/holder.py index f60d92f70b..be3dc250b7 100644 --- a/openfisca_core/holders/holder.py +++ b/openfisca_core/holders/holder.py @@ -1,7 +1,6 @@ from __future__ import annotations -from collections.abc import Sequence -from typing import Any +from typing import Generic, TypeVar, cast import os import warnings @@ -15,20 +14,48 @@ errors, indexed_enums as enums, periods, - types, + types as t, ) -from . import types as t +#: Type var for numpy arrays (invariant). +_N = TypeVar("_N", bound=t.VarDType) +#: Type var for array-like objects. +_L = TypeVar("_L") -class Holder: - """A holder keeps tracks of a variable values after they have been calculated, or set as an input.""" - def __init__(self, variable, population) -> None: +class Holder(Generic[_N]): + """Track variable values after they have been calculated or set.""" + + #: The population the variable is calculated for. + population: t.CorePopulation + + #: The simulation the variable is calculated in. + simulation: None | t.Simulation = None + + #: The variable the holder is tracking. + variable: t.Variable[_N] + + #: Whether the variable is eternal. + _eternal: bool + + #: The memory storage. + _memory_storage: t.InMemoryStorage[_N] + + #: The disk storage. + _disk_storage: None | t.OnDiskStorage[_N] + + #: Whether the variable is on-disk storable. + _on_disk_storable: bool + + #: Whether the variable should not be stored. + _do_not_store: bool + + def __init__(self, variable: t.Variable[_N], population: t.CorePopulation) -> None: self.population = population self.variable = variable self.simulation = population.simulation - self._eternal = self.variable.definition_period == periods.DateUnit.ETERNITY + self._eternal = self.variable.definition_period == periods.ETERNITY self._memory_storage = storage.InMemoryStorage(is_eternal=self._eternal) # By default, do not activate on-disk storage, or variable dropping @@ -45,22 +72,26 @@ def __init__(self, variable, population) -> None: if self.variable.name in self.simulation.memory_config.variables_to_drop: self._do_not_store = True - def clone(self, population: t.CorePopulation) -> t.Holder: - """Copy the holder just enough to be able to run a new simulation without modifying the original simulation.""" + def clone(self, population: t.CorePopulation) -> t.Holder[_N]: + """Copy the holder just enough to be able to run a new simulation.""" new = commons.empty_clone(self) new_dict = new.__dict__ for key, value in self.__dict__.items(): - if key not in ("population", "formula", "simulation"): + if key not in {"population", "formula", "simulation"}: new_dict[key] = value new_dict["population"] = population new_dict["simulation"] = population.simulation - return new + return cast(t.Holder[_N], new) - def create_disk_storage(self, directory=None, preserve=False): + def create_disk_storage( + self, directory: None | str = None, preserve: bool = False + ) -> t.OnDiskStorage[_N]: if directory is None: + if self.simulation is None: + raise NotImplementedError directory = self.simulation.data_storage_dir storage_dir = os.path.join(directory, self.variable.name) if not os.path.isdir(storage_dir): @@ -71,7 +102,7 @@ def create_disk_storage(self, directory=None, preserve=False): preserve_storage_dir=preserve, ) - def delete_arrays(self, period=None) -> None: + def delete_arrays(self, period: None | t.Period = None) -> None: """If ``period`` is ``None``, remove all known values of the variable. If ``period`` is not ``None``, only remove all values for any period included in period (e.g. if period is "2017", values for "2017-01", "2017-07", etc. would be removed) @@ -80,7 +111,7 @@ def delete_arrays(self, period=None) -> None: if self._disk_storage: self._disk_storage.delete(period) - def get_array(self, period): + def get_array(self, period: t.Period) -> None | t.Array[_N]: """Get the value of the variable for the given period. If the value is not known, return ``None``. @@ -136,28 +167,26 @@ def get_memory_usage(self) -> t.MemoryUsage: """ usage = t.MemoryUsage( - nb_cells_by_array=self.population.count, dtype=self.variable.dtype, + nb_arrays=0, + nb_cells_by_array=self.population.count, + total_nb_bytes=0, ) usage.update(self._memory_storage.get_memory_usage()) - if self.simulation.trace: + if self.simulation is not None and self.simulation.trace: nb_requests = self.simulation.tracer.get_nb_requests(self.variable.name) - usage.update( - { - "nb_requests": nb_requests, - "nb_requests_by_array": ( - nb_requests / float(usage["nb_arrays"]) - if usage["nb_arrays"] > 0 - else numpy.nan - ), - }, + usage["nb_requests"] = nb_requests + usage["nb_requests_by_array"] = ( + nb_requests / float(usage["nb_arrays"]) + if usage["nb_arrays"] > 0 + else numpy.nan ) return usage - def get_known_periods(self): + def get_known_periods(self) -> list[t.Period]: """Get the list of periods the variable value is known for.""" return list(self._memory_storage.get_known_periods()) + list( self._disk_storage.get_known_periods() if self._disk_storage else [], @@ -165,9 +194,9 @@ def get_known_periods(self): def set_input( self, - period: types.Period, - array: numpy.ndarray | Sequence[Any], - ) -> numpy.ndarray | None: + period: t.Period, + array: t.Array[_N] | t.ArrayLike[_L], + ) -> None | t.Array[_N]: """Set a Variable's array of values of a given Period. Args: @@ -239,7 +268,7 @@ def set_input( return self.variable.set_input(self, period, array) return self._set(period, array) - def _to_array(self, value): + def _to_array(self, value: t.Array[_N] | t.ArrayLike[_L], /) -> t.Array[_N]: if not isinstance(value, numpy.ndarray): value = numpy.asarray(value) if value.ndim == 0: @@ -262,7 +291,7 @@ def _to_array(self, value): ) return value - def _set(self, period, value) -> None: + def _set(self, /, period: None | t.Period, value: t.Array[_N]) -> None: value = self._to_array(value) if not self._eternal: if period is None: @@ -298,19 +327,26 @@ def _set(self, period, value) -> None: should_store_on_disk = ( self._on_disk_storable and self._memory_storage.get(period) is None + and self.simulation is not None + and self.simulation.memory_config is not None and psutil.virtual_memory().percent # If there is already a value in memory, replace it and don't put a new value in the disk storage >= self.simulation.memory_config.max_memory_occupation_pc ) if should_store_on_disk: + if self._disk_storage is None: + raise NotImplementedError self._disk_storage.put(value, period) else: self._memory_storage.put(value, period) - def put_in_cache(self, value, period) -> None: + def put_in_cache(self, value: t.Array[_N], period: t.Period) -> None: if self._do_not_store: return + if self.simulation is None: + raise NotImplementedError + if ( self.simulation.opt_out_cache and self.simulation.tax_benefit_system.cache_blacklist @@ -320,6 +356,34 @@ def put_in_cache(self, value, period) -> None: self._set(period, value) - def default_array(self): - """Return a new array of the appropriate length for the entity, filled with the variable default values.""" + def default_array(self) -> t.Array[_N]: + """Return a default array of the appropriate length for the entity. + + Returns: + ndarray[generic]: The default array for the variable. + + Examples: + >>> from openfisca_core import ( + ... entities, + ... periods, + ... populations, + ... variables, + ... ) + + >>> entity = entities.SingleEntity("", "", "", "") + + >>> class TestVariable(variables.Variable): + ... definition_period = periods.WEEKDAY + ... entity = entity + ... value_type = bool + + >>> variable = TestVariable() + >>> population = populations.CorePopulation(entity) + >>> population.count = 2 + >>> holder = Holder(variable, population) + + >>> holder.default_array() + array([False, False]) + + """ return self.variable.default_array(self.population.count) diff --git a/openfisca_core/holders/types.py b/openfisca_core/holders/types.py deleted file mode 100644 index 7137b86483..0000000000 --- a/openfisca_core/holders/types.py +++ /dev/null @@ -1,3 +0,0 @@ -from openfisca_core.types import CorePopulation, Holder, MemoryUsage - -__all__ = ["CorePopulation", "Holder", "MemoryUsage"] diff --git a/openfisca_core/indexed_enums/__init__.py b/openfisca_core/indexed_enums/__init__.py index 494601fc8d..abdfea2848 100644 --- a/openfisca_core/indexed_enums/__init__.py +++ b/openfisca_core/indexed_enums/__init__.py @@ -1,6 +1,5 @@ """Enumerations for variables with a limited set of possible values.""" -from . import types from ._enum_type import EnumType from ._errors import EnumEncodingError, EnumMemberNotFoundError from .config import ENUM_ARRAY_DTYPE @@ -14,5 +13,4 @@ "EnumEncodingError", "EnumMemberNotFoundError", "EnumType", - "types", ] diff --git a/openfisca_core/indexed_enums/_enum_type.py b/openfisca_core/indexed_enums/_enum_type.py index 8083a6d49f..0b831a4a44 100644 --- a/openfisca_core/indexed_enums/_enum_type.py +++ b/openfisca_core/indexed_enums/_enum_type.py @@ -4,7 +4,7 @@ import numpy -from . import types as t +from openfisca_core import types as t @final @@ -36,6 +36,15 @@ class EnumType(t.EnumType): """ + #: The indices of the enum members. + indices: t.IndexArray + + #: The names of the enum members. + names: t.StrArray + + #: The enum members. + enums: t.ObjArray + def __new__( metacls, name: str, diff --git a/openfisca_core/indexed_enums/_errors.py b/openfisca_core/indexed_enums/_errors.py index e9b543fc73..fa93bf8e90 100644 --- a/openfisca_core/indexed_enums/_errors.py +++ b/openfisca_core/indexed_enums/_errors.py @@ -1,6 +1,6 @@ from __future__ import annotations -from . import types as t +from openfisca_core import types as t class EnumEncodingError(TypeError): diff --git a/openfisca_core/indexed_enums/_guards.py b/openfisca_core/indexed_enums/_guards.py index 6c47471b3e..bb7cf820b3 100644 --- a/openfisca_core/indexed_enums/_guards.py +++ b/openfisca_core/indexed_enums/_guards.py @@ -5,7 +5,7 @@ import numpy -from . import types as t +from openfisca_core import types as t #: Types for int arrays. ints: Final = { diff --git a/openfisca_core/indexed_enums/_utils.py b/openfisca_core/indexed_enums/_utils.py index aa676b92f7..8c642f0d7b 100644 --- a/openfisca_core/indexed_enums/_utils.py +++ b/openfisca_core/indexed_enums/_utils.py @@ -2,7 +2,7 @@ import numpy -from . import types as t +from openfisca_core import types as t def _enum_to_index(value: t.ObjArray | t.ArrayLike[t.Enum]) -> t.IndexArray: diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index a733fd5daf..bf0e068950 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -4,7 +4,8 @@ import numpy -from . import types as t +from openfisca_core import types as t + from ._enum_type import EnumType from ._errors import EnumEncodingError, EnumMemberNotFoundError from ._guards import ( @@ -128,7 +129,9 @@ def __repr__(self) -> str: return f"{self.__class__.__name__}.{self.name}" @classmethod - def encode(cls, array: t.VarArray | t.ArrayLike[object]) -> t.EnumArray: + def encode( + cls, array: t.VarArray | t.ArrayLike[object] + ) -> t.EnumArray[t.EnumDType]: """Encode an encodable array into an :class:`.EnumArray`. Args: @@ -194,7 +197,7 @@ def encode(cls, array: t.VarArray | t.ArrayLike[object]) -> t.EnumArray: return cls._encode_array(array) @classmethod - def _encode_array(cls, value: t.VarArray) -> t.EnumArray: + def _encode_array(cls, value: t.VarArray) -> t.EnumArray[t.EnumDType]: if _is_int_array(value): indices = _int_to_index(cls, value) elif _is_str_array(value): # type: ignore[unreachable] @@ -208,7 +211,7 @@ def _encode_array(cls, value: t.VarArray) -> t.EnumArray: return EnumArray(indices, cls) @classmethod - def _encode_array_like(cls, value: t.ArrayLike[object]) -> t.EnumArray: + def _encode_array_like(cls, value: t.ArrayLike[object]) -> t.EnumArray[t.EnumDType]: if _is_int_array_like(value): indices = _int_to_index(cls, value) elif _is_str_array_like(value): # type: ignore[unreachable] diff --git a/openfisca_core/indexed_enums/enum_array.py b/openfisca_core/indexed_enums/enum_array.py index 98f9b4c6aa..e779934beb 100644 --- a/openfisca_core/indexed_enums/enum_array.py +++ b/openfisca_core/indexed_enums/enum_array.py @@ -1,14 +1,17 @@ from __future__ import annotations -from typing import NoReturn +from typing import NoReturn, TypeVar from typing_extensions import Self import numpy -from . import types as t +from openfisca_core import types as t +#: Type var for numpy arrays (invariant). +_N = TypeVar("_N", bound=t.VarDType) -class EnumArray(t.EnumArray): + +class EnumArray(t.EnumArray[_N]): """A subclass of :class:`~numpy.ndarray` of :class:`.Enum`. :class:`.Enum` arrays are encoded as :class:`int` to improve performance. @@ -82,7 +85,7 @@ def __new__( obj.possible_values = possible_values return obj - def __array_finalize__(self, obj: None | t.EnumArray | t.VarArray) -> None: + def __array_finalize__(self, obj: None | t.EnumArray[_N] | t.VarArray) -> None: """See comment above.""" if obj is None: return @@ -229,10 +232,10 @@ def __ne__(self, other: object) -> t.BoolArray: # type: ignore[override] return numpy.logical_not(self == other) @staticmethod - def _forbidden_operation(*__args: object, **__kwds: object) -> NoReturn: + def _forbidden_operation(*args: object, **kwds: object) -> NoReturn: msg = ( "Forbidden operation. The only operations allowed on EnumArrays " - "are '==' and '!='." + f"are '==' and '!=' (called with {args!r} and {kwds!r})." ) raise TypeError(msg) @@ -276,7 +279,8 @@ def decode(self) -> t.ObjArray: f"not defined." ) raise TypeError(msg) - array = self.reshape(1).astype(t.EnumDType) if self.ndim == 0 else self + array = self.reshape(1) if self.ndim == 0 else self + array = array.astype(t.EnumDType) result = self.possible_values.enums[array] return result @@ -311,7 +315,8 @@ def decode_to_str(self) -> t.StrArray: f"not defined." ) raise TypeError(msg) - array = self.reshape(1).astype(t.EnumDType) if self.ndim == 0 else self + array = self.reshape(1) if self.ndim == 0 else self + array = array.astype(t.EnumDType) result = self.possible_values.names[array] return result diff --git a/openfisca_core/indexed_enums/types.py b/openfisca_core/indexed_enums/types.py deleted file mode 100644 index e0a71b3221..0000000000 --- a/openfisca_core/indexed_enums/types.py +++ /dev/null @@ -1,41 +0,0 @@ -from typing_extensions import TypeAlias - -from openfisca_core.types import Array, ArrayLike, DTypeLike, Enum, EnumArray, EnumType - -from enum import _EnumDict as EnumDict # noqa: PLC2701 - -from numpy import ( - bool_ as BoolDType, - generic as VarDType, - int32 as IntDType, - object_ as ObjDType, - str_ as StrDType, - uint8 as EnumDType, -) - -#: Type for enum indices arrays. -IndexArray: TypeAlias = Array[EnumDType] - -#: Type for boolean arrays. -BoolArray: TypeAlias = Array[BoolDType] - -#: Type for int arrays. -IntArray: TypeAlias = Array[IntDType] - -#: Type for str arrays. -StrArray: TypeAlias = Array[StrDType] - -#: Type for object arrays. -ObjArray: TypeAlias = Array[ObjDType] - -#: Type for generic arrays. -VarArray: TypeAlias = Array[VarDType] - -__all__ = [ - "ArrayLike", - "DTypeLike", - "Enum", - "EnumArray", - "EnumDict", - "EnumType", -] diff --git a/openfisca_core/populations/__init__.py b/openfisca_core/populations/__init__.py index 36f000e38d..cd83082157 100644 --- a/openfisca_core/populations/__init__.py +++ b/openfisca_core/populations/__init__.py @@ -29,8 +29,8 @@ ) from openfisca_core.projectors.helpers import get_projector_from_shortcut, projectable -from . import types from ._core_population import CorePopulation +from ._enums import Option from ._errors import ( IncompatibleOptionsError, InvalidArraySizeError, @@ -40,7 +40,7 @@ from .group_population import GroupPopulation from .population import Population -ADD, DIVIDE = types.Option +ADD, DIVIDE = Option SinglePopulation = Population __all__ = [ @@ -60,5 +60,4 @@ "UniqueRoleToEntityProjector", "get_projector_from_shortcut", "projectable", - "types", ] diff --git a/openfisca_core/populations/_core_population.py b/openfisca_core/populations/_core_population.py index 0041a6927a..0f0d968ce7 100644 --- a/openfisca_core/populations/_core_population.py +++ b/openfisca_core/populations/_core_population.py @@ -7,9 +7,9 @@ import numpy -from openfisca_core import holders, periods +from openfisca_core import holders, periods, types as t -from . import types as t +from ._enums import Calculate, Option from ._errors import ( IncompatibleOptionsError, InvalidArraySizeError, @@ -17,6 +17,9 @@ PeriodValidityError, ) +#: Options for set inputs. +ADD, DIVIDE = Option + #: Type variable for a covariant data type. _DT_co = TypeVar("_DT_co", covariant=True, bound=t.VarDType) @@ -25,7 +28,7 @@ class CorePopulation: """Base class to build populations from. Args: - entity: The :class:`.CoreEntity` of the population. + entity: The :class:`~entities.CoreEntity` of the population. *__args: Variable length argument list. **__kwds: Arbitrary keyword arguments. @@ -40,18 +43,21 @@ class CorePopulation: #: A pseudo index for the members of the population. ids: Sequence[str] = [] - #: The :class:`.Simulation` for which the population is calculated. + #: The :class:`~simulations.Simulation` for which the population is calculated. simulation: None | t.Simulation = None + #: The holders of the variables. + _holders: t.HolderByVariable[t.VarDType] + def __init__(self, entity: t.CoreEntity, *__args: object, **__kwds: object) -> None: self.entity = entity - self._holders: t.HolderByVariable = {} + self._holders = {} def __call__( self, variable_name: t.VariableName, period: t.PeriodLike, - options: None | Sequence[t.Option] = None, + options: None | Sequence[Option] = None, ) -> None | t.VarArray: """Calculate ``variable_name`` for ``period``, using the formula if it exists. @@ -61,7 +67,7 @@ def __call__( options: The options to use for the calculation. Returns: - None: If there is no :class:`.Simulation`. + None: If there is no :class:`~simulations.Simulation`. ndarray[generic]: The result of the calculation. Raises: @@ -136,7 +142,7 @@ def __call__( if self.simulation is None: return None - calculate = t.Calculate( + calculate = Calculate( variable=variable_name, period=periods.period(period), option=options, @@ -151,16 +157,16 @@ def __call__( calculate.period, ) - if t.Option.ADD in calculate.option and t.Option.DIVIDE in calculate.option: + if ADD in calculate.option and DIVIDE in calculate.option: raise IncompatibleOptionsError(variable_name) - if t.Option.ADD in calculate.option: + if ADD in calculate.option: return self.simulation.calculate_add( calculate.variable, calculate.period, ) - if t.Option.DIVIDE in calculate.option: + if DIVIDE in calculate.option: return self.simulation.calculate_divide( calculate.variable, calculate.period, @@ -334,7 +340,7 @@ def check_period_validity( # Helpers - def get_holder(self, variable_name: t.VariableName) -> t.Holder: + def get_holder(self, variable_name: t.VariableName) -> t.Holder[t.VarDType]: """Return the holder of a variable. Args: @@ -385,8 +391,10 @@ def get_holder(self, variable_name: t.VariableName) -> t.Holder: if holder: return holder variable = self.entity.get_variable(variable_name) - self._holders[variable_name] = holder = holders.Holder(variable, self) - return holder + if variable is None: + raise NotImplementedError + self._holders[variable_name] = holders.Holder(variable, self) + return self._holders[variable_name] def get_memory_usage( self, diff --git a/openfisca_core/populations/_enums.py b/openfisca_core/populations/_enums.py new file mode 100644 index 0000000000..2be65fafe5 --- /dev/null +++ b/openfisca_core/populations/_enums.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +from collections.abc import Sequence +from typing import NamedTuple + +import enum + +import strenum + +from openfisca_core import types as t + + +class Option(strenum.StrEnum): + ADD = enum.auto() + DIVIDE = enum.auto() + + def __contains__(self, option: str) -> bool: + return option.upper() is self + + +class Calculate(NamedTuple): + variable: t.VariableName + period: t.Period + option: None | Sequence[Option] + + +__all__ = ["Calculate", "Option"] diff --git a/openfisca_core/populations/_errors.py b/openfisca_core/populations/_errors.py index 0aad0d11dc..69bda1a5ad 100644 --- a/openfisca_core/populations/_errors.py +++ b/openfisca_core/populations/_errors.py @@ -1,11 +1,13 @@ -from . import types as t +from openfisca_core import types as t + +from ._enums import Option class IncompatibleOptionsError(ValueError): """Raised when two options are incompatible.""" def __init__(self, variable_name: t.VariableName) -> None: - add, divide = t.Option + add, divide = Option msg = ( f"Options {add} and {divide} are incompatible (trying to compute " f"variable {variable_name})." diff --git a/openfisca_core/populations/group_population.py b/openfisca_core/populations/group_population.py index 120dc9c656..8f74b23722 100644 --- a/openfisca_core/populations/group_population.py +++ b/openfisca_core/populations/group_population.py @@ -4,9 +4,8 @@ import numpy -from openfisca_core import entities, indexed_enums, projectors +from openfisca_core import entities, indexed_enums, projectors, types as t -from . import types as t from .population import Population diff --git a/openfisca_core/populations/population.py b/openfisca_core/populations/population.py index 24742ab0a0..227a0b1f0c 100644 --- a/openfisca_core/populations/population.py +++ b/openfisca_core/populations/population.py @@ -2,9 +2,8 @@ import numpy -from openfisca_core import projectors +from openfisca_core import projectors, types as t -from . import types as t from ._core_population import CorePopulation diff --git a/openfisca_core/populations/types.py b/openfisca_core/populations/types.py deleted file mode 100644 index 07f34d2f5f..0000000000 --- a/openfisca_core/populations/types.py +++ /dev/null @@ -1,103 +0,0 @@ -from __future__ import annotations - -from collections.abc import Iterable, MutableMapping, Sequence -from typing import NamedTuple, Union -from typing_extensions import TypeAlias, TypedDict - -from openfisca_core.types import ( - Array, - CoreEntity, - CorePopulation, - DTypeLike, - EntityKey, - GroupEntity, - Holder, - MemoryUsage, - Period, - PeriodInt, - PeriodStr, - Role, - Simulation, - SingleEntity, - SinglePopulation, - VariableName, -) - -import enum - -import strenum -from numpy import ( - bool_ as BoolDType, - float32 as FloatDType, - generic as VarDType, - int32 as IntDType, - str_ as StrDType, -) - -# Commons - -#: Type alias for an array of strings. -IntArray: TypeAlias = Array[IntDType] - -#: Type alias for an array of strings. -StrArray: TypeAlias = Array[StrDType] - -#: Type alias for an array of booleans. -BoolArray: TypeAlias = Array[BoolDType] - -#: Type alias for an array of floats. -FloatArray: TypeAlias = Array[FloatDType] - -#: Type alias for an array of generic objects. -VarArray: TypeAlias = Array[VarDType] - -# Periods - -#: Type alias for a period-like object. -PeriodLike: TypeAlias = Union[Period, PeriodStr, PeriodInt] - -# Populations - -#: Type alias for a population's holders. -HolderByVariable: TypeAlias = MutableMapping[VariableName, Holder] - -# TODO(Mauko Quiroga-Alvarado): I'm not sure if this type alias is correct. -# https://openfisca.org/doc/coding-the-legislation/50_entities.html -Members: TypeAlias = Iterable[SinglePopulation] - - -class Option(strenum.StrEnum): - ADD = enum.auto() - DIVIDE = enum.auto() - - def __contains__(self, option: str) -> bool: - return option.upper() is self - - -class Calculate(NamedTuple): - variable: VariableName - period: Period - option: None | Sequence[Option] - - -class MemoryUsageByVariable(TypedDict, total=False): - by_variable: dict[VariableName, MemoryUsage] - total_nb_bytes: int - - -__all__ = [ - "CoreEntity", - "CorePopulation", - "DTypeLike", - "EntityKey", - "GroupEntity", - "Holder", - "MemoryUsage", - "Period", - "Role", - "Simulation", - "SingleEntity", - "SinglePopulation", - "VarDType", - "VariableName", -] diff --git a/openfisca_core/projectors/__init__.py b/openfisca_core/projectors/__init__.py index 28776e3cf9..fa787d4c05 100644 --- a/openfisca_core/projectors/__init__.py +++ b/openfisca_core/projectors/__init__.py @@ -21,7 +21,6 @@ # # See: https://www.python.org/dev/peps/pep-0008/#imports -from . import typing from .entity_to_person_projector import EntityToPersonProjector from .first_person_to_entity_projector import FirstPersonToEntityProjector from .helpers import get_projector_from_shortcut, projectable @@ -31,9 +30,8 @@ __all__ = [ "EntityToPersonProjector", "FirstPersonToEntityProjector", - "get_projector_from_shortcut", - "projectable", "Projector", "UniqueRoleToEntityProjector", - "typing", + "get_projector_from_shortcut", + "projectable", ] diff --git a/openfisca_core/projectors/helpers.py b/openfisca_core/projectors/helpers.py index 8071eecf94..b47e11f460 100644 --- a/openfisca_core/projectors/helpers.py +++ b/openfisca_core/projectors/helpers.py @@ -2,11 +2,7 @@ from collections.abc import Mapping -from openfisca_core.types import GroupEntity, Role, SingleEntity - -from openfisca_core import entities, projectors - -from .typing import GroupPopulation, Population +from openfisca_core import entities, projectors, types as t def projectable(function): @@ -18,7 +14,7 @@ def projectable(function): def get_projector_from_shortcut( - population: Population | GroupPopulation, + population: t.CorePopulation, shortcut: str, parent: projectors.Projector | None = None, ) -> projectors.Projector | None: @@ -108,12 +104,12 @@ def get_projector_from_shortcut( <...UniqueRoleToEntityProjector object at ...> """ - entity: SingleEntity | GroupEntity = population.entity + entity: t.CorePopulation = population.entity if isinstance(entity, entities.Entity): populations: Mapping[ str, - Population | GroupPopulation, + t.CorePopulation, ] = population.simulation.populations if shortcut not in populations: @@ -125,7 +121,7 @@ def get_projector_from_shortcut( return projectors.FirstPersonToEntityProjector(population, parent) if isinstance(entity, entities.GroupEntity): - role: Role | None = entities.find_role(entity.roles, shortcut, total=1) + role: None | t.Role = entities.find_role(entity.roles, shortcut, total=1) if role is not None: return projectors.UniqueRoleToEntityProjector(population, role, parent) diff --git a/openfisca_core/projectors/typing.py b/openfisca_core/projectors/typing.py deleted file mode 100644 index a49bc96621..0000000000 --- a/openfisca_core/projectors/typing.py +++ /dev/null @@ -1,27 +0,0 @@ -from __future__ import annotations - -from collections.abc import Mapping -from typing import Protocol - -from openfisca_core.types import GroupEntity, SingleEntity - - -class Population(Protocol): - @property - def entity(self) -> SingleEntity: ... - - @property - def simulation(self) -> Simulation: ... - - -class GroupPopulation(Protocol): - @property - def entity(self) -> GroupEntity: ... - - @property - def simulation(self) -> Simulation: ... - - -class Simulation(Protocol): - @property - def populations(self) -> Mapping[str, Population | GroupPopulation]: ... diff --git a/openfisca_core/reforms/reform.py b/openfisca_core/reforms/reform.py index 76e7152334..1f0b0ed38c 100644 --- a/openfisca_core/reforms/reform.py +++ b/openfisca_core/reforms/reform.py @@ -71,7 +71,8 @@ def modify_parameters(self, modifier_function): Call this function in `apply()` if the reform asks for legislation parameter modifications. Args: - modifier_function: A function that takes a :obj:`.ParameterNode` and should return an object of the same type. + modifier_function: A function that takes a :obj:`~parameters.ParameterNode` and should return an object of + the same type. """ baseline_parameters = self.baseline.parameters diff --git a/openfisca_core/simulations/_build_default_simulation.py b/openfisca_core/simulations/_build_default_simulation.py index adc7cf4783..8d3b29a9a3 100644 --- a/openfisca_core/simulations/_build_default_simulation.py +++ b/openfisca_core/simulations/_build_default_simulation.py @@ -1,12 +1,14 @@ """This module contains the _BuildDefaultSimulation class.""" -from typing import Union +from __future__ import annotations + from typing_extensions import Self import numpy +from openfisca_core import types as t + from .simulation import Simulation -from .typing import Entity, Population, TaxBenefitSystem class _BuildDefaultSimulation: @@ -47,12 +49,12 @@ class _BuildDefaultSimulation: count: int #: The built populations. - populations: dict[str, Union[Population[Entity]]] + populations: dict[str, t.CorePopulation] #: The built simulation. simulation: Simulation - def __init__(self, tax_benefit_system: TaxBenefitSystem, count: int) -> None: + def __init__(self, tax_benefit_system: t.TaxBenefitSystem, count: int) -> None: self.count = count self.populations = tax_benefit_system.instantiate_entities() self.simulation = Simulation(tax_benefit_system, self.populations) @@ -157,3 +159,6 @@ def add_members_entity_id(self) -> Self: population.members_entity_id = numpy.array(range(self.count)) return self + + +__all__ = ["_BuildDefaultSimulation"] diff --git a/openfisca_core/simulations/_build_from_variables.py b/openfisca_core/simulations/_build_from_variables.py index 20f49ce113..33dc58f57a 100644 --- a/openfisca_core/simulations/_build_from_variables.py +++ b/openfisca_core/simulations/_build_from_variables.py @@ -4,12 +4,12 @@ from typing_extensions import Self -from openfisca_core import errors +from openfisca_core import errors, types as t from ._build_default_simulation import _BuildDefaultSimulation from ._type_guards import is_variable_dated from .simulation import Simulation -from .typing import Entity, Population, TaxBenefitSystem, Variables +from .typing import Variables class _BuildFromVariables: @@ -67,7 +67,7 @@ class _BuildFromVariables: default_period: str | None #: The built populations. - populations: dict[str, Population[Entity]] + populations: dict[str, t.CorePopulation] #: The built simulation. simulation: Simulation @@ -77,7 +77,7 @@ class _BuildFromVariables: def __init__( self, - tax_benefit_system: TaxBenefitSystem, + tax_benefit_system: t.TaxBenefitSystem, params: Variables, default_period: str | None = None, ) -> None: @@ -228,3 +228,6 @@ def _person_count(params: Variables) -> int: except Exception: return 1 + + +__all__ = ["_BuildFromVariables"] diff --git a/openfisca_core/simulations/simulation_builder.py b/openfisca_core/simulations/simulation_builder.py index 7464b46500..c3d4c47bb8 100644 --- a/openfisca_core/simulations/simulation_builder.py +++ b/openfisca_core/simulations/simulation_builder.py @@ -9,7 +9,7 @@ import dpath import numpy -from openfisca_core import entities, errors, periods, populations, variables +from openfisca_core import entities, errors, periods, populations, types as t, variables from . import helpers from ._build_default_simulation import _BuildDefaultSimulation @@ -23,17 +23,11 @@ from .simulation import Simulation from .typing import ( Axis, - Entity, FullySpecifiedEntities, GroupEntities, - GroupEntity, ImplicitGroupEntities, Params, ParamsWithoutAxes, - Population, - Role, - SingleEntity, - TaxBenefitSystem, Variables, ) @@ -72,7 +66,7 @@ def __init__(self) -> None: def build_from_dict( self, - tax_benefit_system: TaxBenefitSystem, + tax_benefit_system: t.TaxBenefitSystem, input_dict: Params, ) -> Simulation: """Build a simulation from an input dictionary. @@ -156,7 +150,7 @@ def build_from_dict( def build_from_entities( self, - tax_benefit_system: TaxBenefitSystem, + tax_benefit_system: t.TaxBenefitSystem, input_dict: FullySpecifiedEntities, ) -> Simulation: """Build a simulation from a Python dict ``input_dict`` fully specifying @@ -208,7 +202,7 @@ def build_from_entities( # Check for unexpected entities helpers.check_unexpected_entities(params, plural) - person_entity: SingleEntity = tax_benefit_system.person_entity + person_entity: t.SingleEntity = tax_benefit_system.person_entity persons_json = params.get(person_entity.plural, None) @@ -271,7 +265,7 @@ def build_from_entities( def build_from_variables( self, - tax_benefit_system: TaxBenefitSystem, + tax_benefit_system: t.TaxBenefitSystem, input_dict: Variables, ) -> Simulation: """Build a simulation from a Python dict ``input_dict`` describing @@ -311,7 +305,7 @@ def build_from_variables( @staticmethod def build_default_simulation( - tax_benefit_system: TaxBenefitSystem, + tax_benefit_system: t.TaxBenefitSystem, count: int = 1, ) -> Simulation: """Build a default simulation. @@ -381,7 +375,7 @@ def build(self, tax_benefit_system): def explicit_singular_entities( self, - tax_benefit_system: TaxBenefitSystem, + tax_benefit_system: t.TaxBenefitSystem, input_dict: ImplicitGroupEntities, ) -> GroupEntities: """Preprocess ``input_dict`` to explicit entities defined using the @@ -446,7 +440,7 @@ def add_person_entity(self, entity, instances_json): def add_default_group_entity( self, persons_ids: list[str], - entity: GroupEntity, + entity: t.GroupEntity, ) -> None: persons_count = len(persons_ids) roles = list(entity.flattened_roles) @@ -461,7 +455,7 @@ def add_group_entity( self, persons_plural: str, persons_ids: list[str], - entity: GroupEntity, + entity: t.GroupEntity, instances_json, ) -> None: """Add all instances of one of the model's entities as described in ``instances_json``.""" @@ -723,7 +717,7 @@ def get_memberships(self, entity_name): ) # Returns the roles of individuals in this entity, including when there is replication along axes - def get_roles(self, entity_name: str) -> Sequence[Role]: + def get_roles(self, entity_name: str) -> Sequence[t.Role]: # Return empty array for the "persons" entity return self.axes_roles.get(entity_name, self.roles.get(entity_name, [])) @@ -841,17 +835,17 @@ def expand_axes(self) -> None: ) self.input_buffer[axis_name][str(axis_period)] = array - def get_variable_entity(self, variable_name: str) -> Entity: + def get_variable_entity(self, variable_name: str) -> t.CoreEntity: return self.variable_entities[variable_name] - def register_variable(self, variable_name: str, entity: Entity) -> None: + def register_variable(self, variable_name: str, entity: t.CoreEntity) -> None: self.variable_entities[variable_name] = entity def register_variables(self, simulation: Simulation) -> None: - tax_benefit_system: TaxBenefitSystem = simulation.tax_benefit_system + tax_benefit_system: t.TaxBenefitSystem = simulation.tax_benefit_system variables: Iterable[str] = tax_benefit_system.variables.keys() for name in variables: - population: Population = simulation.get_variable_population(name) - entity: Entity = population.entity + population: t.CorePopulation = simulation.get_variable_population(name) + entity: t.CoreEntity = population.entity self.register_variable(name, entity) diff --git a/openfisca_core/simulations/typing.py b/openfisca_core/simulations/typing.py index 8091994e53..0595cebe90 100644 --- a/openfisca_core/simulations/typing.py +++ b/openfisca_core/simulations/typing.py @@ -2,29 +2,15 @@ from __future__ import annotations -from collections.abc import Iterable, Sequence -from numpy.typing import NDArray as Array -from typing import Protocol, TypeVar, TypedDict, Union +from collections.abc import Iterable +from typing import TypeVar, TypedDict, Union from typing_extensions import NotRequired, Required, TypeAlias import datetime -from abc import abstractmethod - -from numpy import ( - bool_ as Bool, - datetime64 as Date, - float32 as Float, - int16 as Enum, - int32 as Int, - str_ as String, -) #: Generic type variables. E = TypeVar("E") -G = TypeVar("G", covariant=True) -T = TypeVar("T", Bool, Date, Enum, Float, Int, String, covariant=True) U = TypeVar("U", bool, datetime.date, float, str) -V = TypeVar("V", covariant=True) #: Type alias for a simulation dictionary defining the roles. @@ -77,127 +63,3 @@ class Axis(TypedDict, total=False): min: Required[float] name: Required[str] period: NotRequired[str | int] - - -class Entity(Protocol): - """Interface representing an entity of a simulation.""" - - key: str - plural: str | None - - def get_variable( - self, - __variable_name: str, - __check_existence: bool = ..., - ) -> Variable[T] | None: - """Get a variable.""" - - -class SingleEntity(Entity, Protocol): - """Interface representing a single entity of a simulation.""" - - -class GroupEntity(Entity, Protocol): - """Interface representing a group entity of a simulation.""" - - @property - @abstractmethod - def flattened_roles(self) -> Iterable[Role[G]]: - """Get the flattened roles of the GroupEntity.""" - - -class Holder(Protocol[V]): - """Interface representing a holder of a simulation's computed values.""" - - @property - @abstractmethod - def variable(self) -> Variable[T]: - """Get the Variable of the Holder.""" - - def get_array(self, __period: str) -> Array[T] | None: - """Get the values of the Variable for a given Period.""" - - def set_input( - self, - __period: Period, - __array: Array[T] | Sequence[U], - ) -> Array[T] | None: - """Set values for a Variable for a given Period.""" - - -class Period(Protocol): - """Interface representing a period of a simulation.""" - - -class Population(Protocol[E]): - """Interface representing a data vector of an Entity.""" - - count: int - entity: E - ids: Array[String] - - def get_holder(self, __variable_name: str) -> Holder[V]: - """Get the holder of a Variable.""" - - -class SinglePopulation(Population[E], Protocol): - """Interface representing a data vector of a SingleEntity.""" - - -class GroupPopulation(Population[E], Protocol): - """Interface representing a data vector of a GroupEntity.""" - - members_entity_id: Array[String] - - def nb_persons(self, __role: Role[G] | None = ...) -> int: - """Get the number of persons for a given Role.""" - - -class Role(Protocol[G]): - """Interface representing a role of the group entities of a simulation.""" - - -class TaxBenefitSystem(Protocol): - """Interface representing a tax-benefit system.""" - - @property - @abstractmethod - def person_entity(self) -> SingleEntity: - """Get the person entity of the tax-benefit system.""" - - @person_entity.setter - @abstractmethod - def person_entity(self, person_entity: SingleEntity) -> None: - """Set the person entity of the tax-benefit system.""" - - @property - @abstractmethod - def variables(self) -> dict[str, V]: - """Get the variables of the tax-benefit system.""" - - def entities_by_singular(self) -> dict[str, E]: - """Get the singular form of the entities' keys.""" - - def entities_plural(self) -> Iterable[str]: - """Get the plural form of the entities' keys.""" - - def get_variable( - self, - __variable_name: str, - __check_existence: bool = ..., - ) -> V | None: - """Get a variable.""" - - def instantiate_entities( - self, - ) -> dict[str, Population[E]]: - """Instantiate the populations of each Entity.""" - - -class Variable(Protocol[T]): - """Interface representing a variable of a tax-benefit system.""" - - end: str - - def default_array(self, __array_size: int) -> Array[T]: - """Fill an array with the default value of the Variable.""" diff --git a/openfisca_core/types.py b/openfisca_core/types.py index e40148544f..01992ec400 100644 --- a/openfisca_core/types.py +++ b/openfisca_core/types.py @@ -1,15 +1,23 @@ from __future__ import annotations -from collections.abc import Iterable, Iterator, Sequence, Sized +from collections.abc import ( + Iterable, + Iterator, + KeysView, + MutableMapping, + Sequence, + Sized, +) from numpy.typing import DTypeLike, NDArray from typing import NewType, TypeVar, Union from typing_extensions import Protocol, Required, Self, TypeAlias, TypedDict import abc +import datetime import enum import re +from enum import _EnumDict as EnumDict # noqa: PLC2701 -import numpy import pendulum from numpy import ( bool_ as BoolDType, @@ -28,8 +36,11 @@ # Arrays -#: Type var for numpy arrays. -_N_co = TypeVar("_N_co", covariant=True, bound="DTypeGeneric") +#: Type var for numpy arrays (invariant). +_N = TypeVar("_N", bound=VarDType) + +#: Type var for numpy arrays (covariant). +_N_co = TypeVar("_N_co", covariant=True, bound=VarDType) #: Type representing an numpy array. Array: TypeAlias = NDArray[_N_co] @@ -69,33 +80,6 @@ #: Type representing an array-like object. ArrayLike: TypeAlias = Sequence[_L] -#: Type for bool arrays. -DTypeBool: TypeAlias = numpy.bool_ - -#: Type for int arrays. -DTypeInt: TypeAlias = numpy.int32 - -#: Type for float arrays. -DTypeFloat: TypeAlias = numpy.float32 - -#: Type for string arrays. -DTypeStr: TypeAlias = numpy.str_ - -#: Type for bytes arrays. -DTypeBytes: TypeAlias = numpy.bytes_ - -#: Type for Enum arrays. -DTypeEnum: TypeAlias = numpy.uint8 - -#: Type for date arrays. -DTypeDate: TypeAlias = numpy.datetime64 - -#: Type for "object" arrays. -DTypeObject: TypeAlias = numpy.object_ - -#: Type for "generic" arrays. -DTypeGeneric: TypeAlias = numpy.generic - # TODO(): Properly resolve metaclass types. # https://github.com/python/mypy/issues/14033 @@ -112,6 +96,24 @@ def __instancecheck__(self, arg: object, /) -> bool: class SeqInt(list[int], metaclass=_SeqIntMeta): ... # type: ignore[misc] +# Data storage + + +class CoreStorage(Protocol[_N]): + def get(self, __period: None | Period, /) -> None | Array[_N]: ... + def put(self, __value: Array[_N], __period: None | Period, /) -> None: ... + def delete(self, __period: None | Period = ..., /) -> None: ... + def get_known_periods(self, /) -> KeysView[Period]: ... + + +class InMemoryStorage(CoreStorage[_N], Protocol): + def get_memory_usage(self, /) -> MemoryUsage: ... + + +class OnDiskStorage(CoreStorage[_N], Protocol): + def restore(self, /) -> None: ... + + # Entities #: For example "person". @@ -128,78 +130,155 @@ class SeqInt(list[int], metaclass=_SeqIntMeta): ... # type: ignore[misc] class CoreEntity(Protocol): - key: EntityKey - plural: EntityPlural - - def check_role_validity(self, role: object, /) -> None: ... + @property + def key(self, /) -> EntityKey: ... + @property + def plural(self, /) -> EntityPlural: ... + @property + def label(self, /) -> str: ... + @property + def doc(self, /) -> str: ... + def set_tax_benefit_system(self, __tbs: TaxBenefitSystem, /) -> None: ... + def get_variable( + self, __name: VariableName, __check: bool = ..., / + ) -> None | Variable[VarDType]: ... def check_variable_defined_for_entity( self, - variable_name: VariableName, + __name: VariableName, /, ) -> None: ... - def get_variable( - self, - variable_name: VariableName, - check_existence: bool = ..., - /, - ) -> None | Variable: ... + @staticmethod + def check_role_validity(__role: object, /) -> None: ... class SingleEntity(CoreEntity, Protocol): ... -class GroupEntity(CoreEntity, Protocol): ... +class GroupEntity(CoreEntity, Protocol): + @property + def roles(self, /) -> Iterable[Role]: ... + @property + def flattened_roles(self, /) -> Iterable[Role]: ... class Role(Protocol): - entity: GroupEntity - max: int | None - subroles: None | Iterable[Role] - + @property + def entity(self, /) -> GroupEntity: ... + @property + def max(self, /) -> None | int: ... + @property + def subroles(self, /) -> None | Iterable[Role]: ... @property def key(self, /) -> RoleKey: ... @property def plural(self, /) -> None | RolePlural: ... + @property + def label(self, /) -> None | str: ... + @property + def doc(self, /) -> None | str: ... + + +class RoleParams(TypedDict, total=False): + key: Required[str] + plural: str + label: str + doc: str + max: int + subroles: list[str] + + +# Experimental + + +class MemoryConfig(Protocol): + @property + def max_memory_occupation(self, /) -> float: ... + @property + def max_memory_occupation_pc(self, /) -> float: ... + @property + def priority_variables(self, /) -> frozenset[str]: ... + @property + def variables_to_drop(self, /) -> frozenset[str]: ... # Indexed enums class EnumType(enum.EnumMeta): - indices: Array[DTypeEnum] - names: Array[DTypeStr] - enums: Array[DTypeObject] + indices: IndexArray + names: StrArray + enums: ObjArray class Enum(enum.Enum, metaclass=EnumType): index: int _member_names_: list[str] + @classmethod + @abc.abstractmethod + def encode( + cls, __array: Array[_N] | ArrayLike[object], / + ) -> EnumArray[EnumDType]: ... + -class EnumArray(Array[DTypeEnum], metaclass=abc.ABCMeta): +class EnumArray(Array[_N]): possible_values: None | type[Enum] @abc.abstractmethod - def __new__( - cls, input_array: Array[DTypeEnum], possible_values: type[Enum] - ) -> Self: ... + def __new__(cls, /, __array: Array[_N], __enum: type[Enum]) -> Self: ... + @abc.abstractmethod + def decode(self, /) -> ObjArray: ... + @abc.abstractmethod + def decode_to_str(self, /) -> StrArray: ... # Holders -class Holder(Protocol): - def clone(self, population: CorePopulation, /) -> Holder: ... +class Holder(Protocol[_N]): + @property + def population(self, /) -> CorePopulation: ... + @property + def simulation(self, /) -> None | Simulation: ... + @property + def variable(self, /) -> Variable[_N]: ... + @property + def _eternal(self, /) -> bool: ... + @property + def _memory_storage(self, /) -> InMemoryStorage[_N]: ... + @property + def _disk_storage(self, /) -> None | OnDiskStorage[_N]: ... + @property + def _on_disk_storable(self, /) -> bool: ... + @property + def _do_not_store(self, /) -> bool: ... + def clone(self, __population: CorePopulation, /) -> Holder[_N]: ... + def create_disk_storage( + self, __dir: None | str = ..., __preserve: bool = ..., / + ) -> OnDiskStorage[_N]: ... + def delete_arrays(self, __period: None | Period = None, /) -> None: ... + def get_array(self, __period: Period, /) -> None | Array[_N]: ... def get_memory_usage(self, /) -> MemoryUsage: ... + def get_known_periods(self) -> list[Period]: ... + def set_input( + self, + __period: Period, + __array: Array[_N] | ArrayLike[_L], + /, + ) -> None | Array[_N]: ... + def put_in_cache(self, __value: Array[_N], period: Period, /) -> None: ... + def default_array(self, /) -> Array[_N]: ... + def _set(self, __period: None | Period, __value: Array[_N], /) -> None: ... + def _to_array(self, __value: Array[_N] | ArrayLike[_L], /) -> Array[_N]: ... class MemoryUsage(TypedDict, total=False): - cell_size: int + cell_size: float dtype: DTypeLike - nb_arrays: int + nb_arrays: Required[int] nb_cells_by_array: int nb_requests: int - nb_requests_by_array: int + nb_requests_by_array: float total_nb_bytes: Required[int] @@ -218,18 +297,14 @@ class ParameterNodeAtInstant(Protocol): _instant_str: InstantStr def __contains__(self, __item: object, /) -> bool: ... - def __getitem__( - self, __index: str | Array[DTypeGeneric], / - ) -> ParameterNodeChild: ... + def __getitem__(self, __index: str | VarArray, /) -> ParameterNodeChild: ... class VectorialParameterNodeAtInstant(Protocol): _instant_str: InstantStr - def __contains__(self, item: object, /) -> bool: ... - def __getitem__( - self, __index: str | Array[DTypeGeneric], / - ) -> ParameterNodeChild: ... + def __contains__(self, __item: object, /) -> bool: ... + def __getitem__(self, __index: str | VarArray, /) -> ParameterNodeChild: ... # Periods @@ -331,49 +406,96 @@ def offset( #: Type alias for a period-like object. PeriodLike: TypeAlias = Union[Period, PeriodStr, PeriodInt] + # Populations +#: Type alias for a population's holders. +HolderByVariable: TypeAlias = MutableMapping["VariableName", Holder[_N]] + + +class MemoryUsageByVariable(TypedDict, total=False): + by_variable: dict[VariableName, MemoryUsage] + total_nb_bytes: int -class CorePopulation(Protocol): ... + +class CorePopulation(Protocol): + @property + def count(self, /) -> int: ... + @property + def entity(self, /) -> CoreEntity: ... + @property + def ids(self, /) -> ArrayLike[str]: ... + @property + def simulation(self, /) -> None | Simulation: ... + @property + def _holders(self, /) -> HolderByVariable[_N]: ... class SinglePopulation(CorePopulation, Protocol): - entity: SingleEntity + @property + def entity(self, /) -> SingleEntity: ... + def get_holder(self, __name: VariableName, /) -> Holder[VarDType]: ... + - def get_holder(self, variable_name: VariableName, /) -> Holder: ... +class GroupPopulation(CorePopulation, Protocol): + @property + def entity(self, /) -> GroupEntity: ... + @property + def members_entity_id(self, /) -> StrArray: ... + def nb_persons(self, /, __role: None | Role = ...) -> int: ... -class GroupPopulation(CorePopulation, Protocol): ... +# TODO(Mauko Quiroga-Alvarado): I'm not sure if this type alias is correct. +# https://openfisca.org/doc/coding-the-legislation/50_entities.html +Members: TypeAlias = Iterable[SinglePopulation] # Simulations class Simulation(Protocol): - def calculate( - self, variable_name: VariableName, period: Period, / - ) -> Array[DTypeGeneric]: ... - def calculate_add( - self, variable_name: VariableName, period: Period, / - ) -> Array[DTypeGeneric]: ... + @property + def data_storage_dir(self, /) -> str: ... + @property + def opt_out_cache(self, /) -> bool: ... + @property + def memory_config(self, /) -> None | MemoryConfig: ... + @property + def populations(self, /) -> MutableMapping[str, CorePopulation]: ... + @property + def tax_benefit_system(self, /) -> TaxBenefitSystem: ... + @property + def trace(self, /) -> bool: ... + @property + def tracer(self, /) -> FullTracer: ... + def calculate(self, __name: VariableName, __period: Period, /) -> VarArray: ... + def calculate_add(self, __name: VariableName, __period: Period, /) -> VarArray: ... def calculate_divide( - self, variable_name: VariableName, period: Period, / - ) -> Array[DTypeGeneric]: ... - def get_population(self, plural: None | str, /) -> CorePopulation: ... + self, __name: VariableName, __period: Period, / + ) -> VarArray: ... + def get_population(self, __plural: None | str, /) -> CorePopulation: ... # Tax-Benefit systems class TaxBenefitSystem(Protocol): - person_entity: SingleEntity - + @property + def cache_blacklist(self, /) -> frozenset[VariableName]: ... + @property + def person_entity(self, /) -> SingleEntity: ... + @person_entity.setter + def person_entity(self, /, __entity: SingleEntity) -> None: ... + def variables(self, /) -> dict[VariableName, Variable[VarDType]]: ... + def entities_by_singular(self, /) -> dict[EntityKey, CoreEntity]: ... + def entities_plural(self, /) -> Iterable[EntityPlural]: ... def get_variable( self, - variable_name: VariableName, - check_existence: bool = ..., + __name: VariableName, + __check: bool = ..., /, - ) -> None | Variable: ... + ) -> None | Variable[VarDType]: ... + def instantiate_entities(self, /) -> dict[str, CorePopulation]: ... # Tracers @@ -473,23 +595,40 @@ def append_child(self, __node: TraceNode, /) -> None: ... VariableName = NewType("VariableName", str) -class Variable(Protocol): - entity: CoreEntity - name: VariableName +class Variable(Protocol[_N]): + @property + def definition_period(self, /) -> DateUnit: ... + @property + def dtype(self, /) -> DTypeLike: ... + @property + def end(self, /) -> PeriodStr: ... + @property + def entity(self, /) -> CoreEntity: ... + @property + def holder(self, /) -> Holder[_N]: ... + @property + def is_neutralized(self, /) -> bool: ... + @property + def name(self, /) -> VariableName: ... + @property + def value_type( + self, / + ) -> type[bool | int | float | str | Enum | datetime.date]: ... + def default_array(self, /, __size: int) -> Array[_N]: ... class Formula(Protocol): def __call__( self, - population: CorePopulation, - instant: Instant, - params: Params, + __population: CorePopulation, + __instant: Instant, + __params: Params, /, - ) -> Array[DTypeGeneric]: ... + ) -> VarArray: ... class Params(Protocol): - def __call__(self, instant: Instant, /) -> ParameterNodeAtInstant: ... + def __call__(self, __instant: Instant, /) -> ParameterNodeAtInstant: ... -__all__ = ["DTypeLike"] +__all__ = ["DTypeLike", "EnumDict"] diff --git a/openfisca_core/variables/variable.py b/openfisca_core/variables/variable.py index 926e4c59c1..1303782a58 100644 --- a/openfisca_core/variables/variable.py +++ b/openfisca_core/variables/variable.py @@ -471,7 +471,7 @@ def check_set_value(self, value): return value - def default_array(self, array_size): + def default_array(self, array_size: int) -> t.VarArray: array = numpy.empty(array_size, dtype=self.dtype) if self.value_type == Enum: array.fill(self.default_value.index) diff --git a/setup.py b/setup.py index c2ced97088..06eacab5d1 100644 --- a/setup.py +++ b/setup.py @@ -70,7 +70,7 @@ setup( name="OpenFisca-Core", - version="43.2.7", + version="43.2.8", author="OpenFisca Team", author_email="contact@openfisca.org", classifiers=[ diff --git a/tasks/lint.mk b/tasks/lint.mk index 532518dc7e..7886fc1557 100644 --- a/tasks/lint.mk +++ b/tasks/lint.mk @@ -45,8 +45,9 @@ check-types: @python -m mypy \ openfisca_core/commons \ openfisca_core/data_storage \ - openfisca_core/experimental \ openfisca_core/entities \ + openfisca_core/experimental \ + openfisca_core/holders \ openfisca_core/indexed_enums \ openfisca_core/periods \ openfisca_core/types.py