From d0e363c64d4888d5dc8694391ccdc5e880b16735 Mon Sep 17 00:00:00 2001 From: David Hensle Date: Mon, 13 Nov 2023 22:24:22 -0800 Subject: [PATCH] cleanup and documentation --- .../configs/input_checker.yaml | 32 +--- .../production_semcog/data_model/enums.py | 137 ++---------------- .../data_model/input_checks.py | 84 +++++++---- .../configs/input_checker.yaml | 2 +- .../data_model/input_checks.py | 30 ++-- docs/models.rst | 19 ++- 6 files changed, 91 insertions(+), 213 deletions(-) diff --git a/activitysim/examples/production_semcog/configs/input_checker.yaml b/activitysim/examples/production_semcog/configs/input_checker.yaml index d1df38f1d..a9e636ff3 100644 --- a/activitysim/examples/production_semcog/configs/input_checker.yaml +++ b/activitysim/examples/production_semcog/configs/input_checker.yaml @@ -24,38 +24,8 @@ table_list: class: Landuse - name: Network - path: . # must be absolute path or relative path to one of the supplied data directories + # path: . # must be absolute path or relative path. Can also omit if supplied in a data directory is_activitysim_input: False # will read raw table with pandas.read_csv from above path validation: class: NetworkLinks method: pandera - -# --------- pydantic examples ---------- -# WARNING: Still in development! - # - name: households - # is_activitysim_input: True # will load via ActivitySim's built-in reader and apply column transformations - # validation: - # method: pydantic - # class: Household - # helper_class: HouseholdValidator - # helper_class_attribute: list_of_households - # children: - # table_name: persons - # child_name: persons # Household class has variable "persons: list[Person]" - # merged_on: household_id - - # - name: persons - # is_activitysim_input: True # will load via ActivitySim's built-in reader and apply column transformations - # validation: - # method: pydantic - # class: Person - # helper_class: PersonValidator - # helper_class_attribute: list_of_persons - - # - name: land_use - # is_activitysim_input: True # will load via ActivitySim's built-in reader and apply column transformations - # validation: - # method: pydantic - # class: Landuse - # helper_class: TazValidator - # helper_class_attribute: list_of_zones \ No newline at end of file diff --git a/activitysim/examples/production_semcog/data_model/enums.py b/activitysim/examples/production_semcog/data_model/enums.py index e7c02de7d..71c436924 100644 --- a/activitysim/examples/production_semcog/data_model/enums.py +++ b/activitysim/examples/production_semcog/data_model/enums.py @@ -22,66 +22,6 @@ class PersonType(IntEnum): PRE_SCHOOL_CHILD = 8 -class Gender(IntEnum): - """ - Provides an integer mapping for gender. - """ - - MALE = 1 - FEMALE = 2 - OTHER = 3 - - -class Occupation(IntEnum): - """ - Provides an integer mapping for a persons occupation. - """ - - WHITE_COLLAR = 1 - SERVICES = 2 - HEALTH = 3 - RETAIL = 4 - BLUE_COLLAR = 5 - NOT_EMPLOYED = 6 - - -class DailyActivityPattern(IntEnum): - """ - Provides an integer mapping to the daily activity pattern variable. - """ - - MANDATORY = 1 - NON_MANDATORY = 2 - HOME = 3 - - -class Purpose(IntEnum): - """ - Provides an integer mapping for trip and tour purpose. - """ - - WORK = 1 - ADULT_SCHOOL = 2 - CHILD_SCHOOL = 3 - SHOPPING = 4 - ESCORT = 5 - MAINTENANCE = 6 - DISCRETIONARY = 7 - - -class AreaType(IntEnum): - """ - Provide an integer mapping for area type, which serves as a proxy for urban form. - """ - - REGIONAL_CORE = 0 - CBD = 1 - URBAN_BUSINESS = 2 - URBAN = 3 - SUBURBAN = 4 - RURAL = 5 - - class HHT(IntEnum): """ Provide an integer mapping for household/family type. @@ -98,74 +38,15 @@ class HHT(IntEnum): UNKNOWN = -9 -class Mode(IntEnum): - """ - Provides an integer mapping for travel mode. - """ - - DRIVE_ALONE_FREE = 1 - DRIVE_ALONE_PAY = 2 - SHARED_RIDE_2_FREE = 3 - SHARED_RIDE_2_PAY = 4 - SHARED_RIDE_3_FREE = 5 - SHARED_RIDE_3_PAY = 6 - WALK = 7 - BICYCLE = 8 - WALK_TO_TRANSIT_ALL = 9 - WALK_TO_TRANSIT_PREMIUM_ONLY = 10 - PARK_AND_RIDE_TRANSIT_ALL = 11 - PARK_AND_RIDE_TRANSIT_PREMIUM_ONLY = 12 - KISS_AND_RIDE_TRANSIT_ALL = 13 - KISS_AND_RIDE_TRANSIT_PREMIUM_ONLY = 14 - SCHOOL_BUS = 15 - - -class ModelTime(IntEnum): +class ESR(IntEnum): """ - Provides an integer mapping from military time to model time interval index. - The name represents the starting point of the interval. So the interval from - 3:00 am to 3:30 am is represented by index 1, which is named `ZERO_THREE`. + Employment Status Recode """ - ZERO_THREE = 1 - ZERO_THREE_THIRTY = 2 - ZERO_FOUR = 3 - ZERO_FOUR_THIRTY = 4 - ZERO_FIVE = 5 - ZERO_FIVE_THIRTY = 6 - ZERO_SIX = 7 - ZERO_SIX_THIRTY = 8 - ZERO_SEVEN = 9 - ZERO_SEVEN_THIRTY = 10 - ZERO_EIGHT = 11 - ZERO_EIGHT_THIRTY = 12 - ZERO_NINE = 13 - ZERO_NINE_THIRTY = 14 - TEN = 15 - TEN_THIRTY = 16 - ELEVEN = 17 - ELEVEN_THIRTY = 18 - TWELVE = 19 - TWELVE_THIRTY = 20 - THIRTEEN = 21 - THIRTEEN_THIRTY = 22 - FOURTEEN = 23 - FOURTEEN_THIRTY = 24 - FIFTEEN = 25 - FIFTEEN_THIRTY = 26 - SIXTEEN = 27 - SIXTEEN_THIRTY = 28 - SEVENTEEN = 29 - SEVENTEEN_THIRTY = 30 - EIGHTEEN = 31 - EIGHTEEN_THIRTY = 32 - NINETEEN = 33 - NINETEEN_THIRTY = 34 - TWENTY = 35 - TWENTY_THIRTY = 36 - TWENTY_ONE = 37 - TWENTY_ONE_THIRTY = 38 - TWENTY_TWO = 39 - TWENTY_TWO_THIRTY = 40 - TWENTY_THREE = 41 - TWENTY_THREE_THIRTY = 42 + NA = -9 + EMPLOYED_AT_WORK = 1 + EMPLOYED_NOT_AT_WORK = 2 + UNEMPLOYED = 3 + ARMED_FORCES_AT_WORK = 4 + ARMED_FORCES_NOT_AT_WORK = 5 + NOT_IN_LABOR_FORCE = 6 diff --git a/activitysim/examples/production_semcog/data_model/input_checks.py b/activitysim/examples/production_semcog/data_model/input_checks.py index 106e59b63..b9b1f338b 100644 --- a/activitysim/examples/production_semcog/data_model/input_checks.py +++ b/activitysim/examples/production_semcog/data_model/input_checks.py @@ -22,24 +22,26 @@ from activitysim.abm.models.input_checker import TABLE_STORE, log_info -# logger = logging.getLogger(__name__) - class Household(pa.DataFrameModel): """ Household data from PopulationSim and input to ActivitySim. - Customize as needed for your application. Fields: household_id: unique number identifying each household - home_zone_id: zone number where household resides, MAZ in two zone systems, TAZ in one zone + age_of_head: age of the head of household + auto_ownership: Seeding for initial number of autos owned by the household hhsize: number of people in the household + race_id: + children: Number of children in household + home_zone_id: zone number where household resides, MAZ in two zone systems, TAZ in one zone income: Annual income in $ - auto_ownership: Seeding for initial number of autos owned by the household + adjinc: Adjusted income HHT: Household type, see enums.HHT + home_zone_id: MAZ of household + TAZ: TAZ of household """ - # auto_ownership: int = pa.Field(ge=0, le=6) household_id: int = pa.Field(unique=True, gt=0) age_of_head: int = pa.Field(ge=0, coerce=True) auto_ownership: int = pa.Field( @@ -69,7 +71,7 @@ def check_persons_per_household(cls, households: pd.DataFrame): mismatched_cases = households.set_index("household_id").loc[mismatched_indices] if len(mismatched_cases) > 0: log_info( - f"Household size dose not equal the number of persons at \n{mismatched_cases}.\n" + f"Household size does not equal the number of persons at \n{mismatched_cases}.\n" ) else: log_info(f"Household size equals the number of persons.\n") @@ -104,7 +106,7 @@ def check_children_per_household(cls, households: pd.DataFrame): mismatched_cases = households.set_index("household_id").loc[mismatched_indices] if len(mismatched_cases) > 0: log_info( - f"Household children dose not equal the number of children in persons at \n{mismatched_cases}.\n" + f"Household children does not equal the number of children in persons at \n{mismatched_cases}.\n" ) else: log_info(f"Household children equals the number of children in persons.\n") @@ -115,6 +117,22 @@ class Person(pa.DataFrameModel): """ Person data from PopulationSim and input to ActivitySim. Customize as needed for your application. + + person_id: unique person identification number + relate: + age: person age + sex: person sex + race_id: person race + member_id: person number in the household + household_id: household identification number + esr: Employment status recode (from PUMS) + wkhp: Usual hours worked per week past 12 months (from PUMS) + wkw: Weeks worked during past 12 months (from PUMS) + schg: Grade Level Attending (from PUMS) + mil: Military Service (from PUMS) + naicsp: North American Industry Classification System recode (from PUMS) + industry: Employment industry + zone_id: MAZ of the household """ person_id: int = pa.Field(unique=True, gt=0) @@ -124,7 +142,7 @@ class Person(pa.DataFrameModel): race_id: int = pa.Field(gt=0, le=4) member_id: int = pa.Field(gt=0) household_id: int = pa.Field(nullable=False) - esr: float = pa.Field(isin=(set([-9.0] + [float(x) for x in range(1, 7)]))) + esr: float = pa.Field(isin=e.ESR) wkhp: float = pa.Field(isin=(set([-9.0] + [float(x) for x in range(0, 100)]))) wkw: float = pa.Field(isin=(set([-9.0] + [float(x) for x in range(0, 7)]))) schg: float = pa.Field(isin=(set([-9.0] + [float(x) for x in range(0, 17)]))) @@ -165,20 +183,35 @@ def check_households_have_persons(cls, persons: pd.DataFrame): class Landuse(pa.DataFrameModel): """ Land use data. - Customize as needed for your application. - zone_id: TAZ of the zone - DISTRICT: District the zone relies in - SD: Super District - COUNTY: County of zone, see enums.County - TOTHH: Total households - TOTEMP: Total Employment - RETEMPN: Retail trade employment - FPSEMPN: Financial and processional services employment - HEREMPN: Health, educational, and recreational service employment - OTHEMPN: Other employment - AGREMPN: Agricultural and natural resources employment - MWTEMPN: Manufacturing, wholesale trade, and transporation employment + zone_id: MAZ ID + tot_acres: Acres of the zone + TAZ: TAZ ID + tot_hhs: Number of households + hhs_pop: Non-Group Quarters population + grppop: Group-Quarters population + tot_pop: Total population + K_8: Preschool through 8th grade enrollment + G9_12: High school enrollment + e01_nrm: + e02_constr: contrsruction employment + e03_manuf: manufacturing employment + e04_whole: wholsesale employment + e05_retail: retail employment + e06_trans: transportation employment + e07_utility: Utility employment + e08_infor: information services employment + e09_finan: financial services employment + e10_pstsvc: postal services employment(?) + e11_compmgt: management services employment + e12_admsvc: administrative services employment + e13_edusvc: educational services employment + e14_medfac: medical employment + e15_hospit: hospital employment + e16_leisure: leisure employment + e17_othsvc: other services employment + e18_pubadm: public administration employment + tot_emp: total employment """ zone_id: int = pa.Field(gt=0, le=22818, nullable=False) @@ -188,7 +221,6 @@ class Landuse(pa.DataFrameModel): hhs_pop: float = pa.Field(ge=0, coerce=True) grppop: float = pa.Field(ge=0, coerce=True) tot_pop: float = pa.Field(ge=0, coerce=True) - # enrollment_k_8: is_numeric = pa.Field(ge=0) K_8: float = pa.Field(ge=0, coerce=True) G9_12: float = pa.Field(ge=0, coerce=True) e01_nrm: float = pa.Field(ge=0, coerce=True) @@ -355,13 +387,11 @@ def extract_skim_names(file_path): state.filesystem.get_config_file_path(tour_mode_choice_spec) ) - # Adding breaking change for testing! - skim_names.append("break") - missing_skims = [ skim_name for skim_name in skim_names if skim_name not in omx_keys ] if len(missing_skims) > 0: log_info(f"Missing skims {missing_skims} found in {tour_mode_choice_spec}") - result = len(missing_skims) == 0 + else: + log_info(f"Found all skimms in {tour_mode_choice_spec}") return len(missing_skims) == 0 diff --git a/activitysim/examples/prototype_mtc_extended/configs/input_checker.yaml b/activitysim/examples/prototype_mtc_extended/configs/input_checker.yaml index 1433a6767..6a38e9f1b 100644 --- a/activitysim/examples/prototype_mtc_extended/configs/input_checker.yaml +++ b/activitysim/examples/prototype_mtc_extended/configs/input_checker.yaml @@ -24,7 +24,7 @@ table_list: class: Landuse - name: example_hwy_data - path: ../../prototype_mtc_extended/test/additional_data + path: ../../prototype_mtc_extended/test/additional_data # must be absolute path or relative path. Can also omit if supplied in a data directory is_activitysim_input: False # will read raw table with pandas.read_csv from above path relative to a data directory validation: class: NetworkLinks diff --git a/activitysim/examples/prototype_mtc_extended/data_model/input_checks.py b/activitysim/examples/prototype_mtc_extended/data_model/input_checks.py index c70d68e98..5c5f4b19c 100644 --- a/activitysim/examples/prototype_mtc_extended/data_model/input_checks.py +++ b/activitysim/examples/prototype_mtc_extended/data_model/input_checks.py @@ -45,8 +45,6 @@ class Household(pa.DataFrameModel): income: int = pa.Field(ge=0, raise_warning=True) auto_ownership: int = pa.Field(ge=0, le=6) HHT: int = pa.Field(isin=e.HHT, raise_warning=True) - # bug1: int - # bug2: int @pa.dataframe_check( name="Do household sizes equal the number of persons in that household?", @@ -102,6 +100,12 @@ class Person(pa.DataFrameModel): """ Person data from PopulationSim and input to ActivitySim. Customize as needed for your application. + + person_id: Unique person ID + household_id: household ID of the person + age: Person age + sex: Person sex (see enums.py::Gender) + ptype: Person type (see enums.py::PersonType) """ person_id: int = pa.Field(unique=True, ge=0) @@ -249,29 +253,21 @@ class NetworkLinks(pa.DataFrameModel): BA_LANES: int = pa.Field(ge=0, le=10) FENAME: str = pa.Field() - @pa.dataframe_check( - name="Are all skims listed in the tour mode choice config found in the taz_skims OMX file?", - raise_warning=True, - ) + @pa.dataframe_check(name="All skims in File?", raise_warning=True) def check_all_skims_exist(cls, land_use: pd.DataFrame): state = TABLE_STORE["state"] # code duplicated from skim_dict_factory.py but need to copy here to not load skim data los_settings = state.filesystem.read_settings_file("network_los.yaml") omx_file_paths = state.filesystem.expand_input_file_list( - los_settings["taz_skims"]["omx"] + los_settings["taz_skims"] ) omx_manifest = dict() - # FIXME getting numpy deprication warning from below omx read - import warnings - - # warnings.filterwarnings("ignore", category=DeprecationWarning) for omx_file_path in omx_file_paths: with omx.open_file(omx_file_path, mode="r") as omx_file: for skim_name in omx_file.listMatrices(): omx_manifest[skim_name] = omx_file_path - omx_keys = [] for skim_name in omx_manifest.keys(): key1, sep, key2 = skim_name.partition("__") @@ -302,15 +298,11 @@ def extract_skim_names(file_path): state.filesystem.get_config_file_path(tour_mode_choice_spec) ) - # Adding breaking change for testing! - skim_names.append("break") - missing_skims = [ skim_name for skim_name in skim_names if skim_name not in omx_keys ] if len(missing_skims) > 0: - logger.warning( - f"Missing skims {missing_skims} found in {tour_mode_choice_spec}" - ) - + log_info(f"Missing skims {missing_skims} found in {tour_mode_choice_spec}") + else: + log_info(f"Found all skimms in {tour_mode_choice_spec}") return len(missing_skims) == 0 diff --git a/docs/models.rst b/docs/models.rst index 0c220f9e6..f33a54a1e 100644 --- a/docs/models.rst +++ b/docs/models.rst @@ -31,18 +31,20 @@ If any checks fail, ActivitySim will crash and direct you to the output input_ch will provide details of the checks that did not pass. The user can also setup checks to output warnings instead of fatal errors. Warning details will be output to the input_checker.log file for user review and documentation. Syntax examples for both errors and warnings are demonstrated in the -:ref:`prototype_mtc_extended` example. +:ref:`prototype_mtc_extended` and :ref:`production_semcog` examples. Setup steps for new users: - * Copy the data_model directory in the :ref:`prototype_mtc_extended` example folder to your setup - space. You will need the enums.py and input_checks.py scripts. The additional - input_checks_pydantic_dev.py script is there for future development and can be discarded. - * Modify enums.py to be consistent with your implementation by changing / adding / removing variable - definitions. + * Copy the data_model directory in the :ref:`prototype_mtc_extended` or + :ref:`production_semcog` example folder to your setup space. You will need the enums.py and + input_checks.py scripts. The additional input_checks_pydantic_dev.py script in + :ref:`prototype_mtc_extended` is there for future development and can be discarded. * Modify the input_checker.py to be consistent with your input data. This can include changing variable names and adding or removing checks. The amount and types of checks to perform are completely up to you! Syntax is shown for many different checks in the example. - * Copy the input_checker.yaml script from :ref:`prototype_mtc_extended` into your configs + * Modify enums.py to be consistent with your implesmentation by changing / adding / removing variable + definitions. + * Copy the input_checker.yaml script from :ref:`prototype_mtc_extended` or + :ref:`production_semcog` configs into your configs directory. Update the list of data tables you would like to check in the input_checker.yaml file. The "validator_class" option should correspond to the name of the corresponding class in the input_checker.py file you modified in the above step. @@ -51,6 +53,9 @@ Setup steps for new users: directory from the first step, e.g. activitysim run -c configs -d data -o outout --data_model data_model. +.. note:: + If you are running ActivitySim with the input checker module active, you must supply a --data_model argument that points to where the input_checks.py file exists! + .. automodule:: activitysim.abm.models.input_checker :members: