cleanup and documentation

ActivitySim · Nov 14, 2023 · d0e363c · d0e363c
1 parent 7093b25
commit d0e363c
Show file tree

Hide file tree

Showing 6 changed files with 91 additions and 213 deletions.
diff --git a/activitysim/examples/production_semcog/configs/input_checker.yaml b/activitysim/examples/production_semcog/configs/input_checker.yaml
@@ -24,38 +24,8 @@ table_list:
       class: Landuse
 
   - name: Network
-    path: .         # must be absolute path or relative path to one of the supplied data directories
+    # path: .         # must be absolute path or relative path.  Can also omit if supplied in a data directory
     is_activitysim_input: False # will read raw table with pandas.read_csv from above path
     validation:
       class: NetworkLinks
       method: pandera
-
-# --------- pydantic examples ----------
-# WARNING: Still in development!
-  # - name: households
-  #   is_activitysim_input: True  # will load via ActivitySim's built-in reader and apply column transformations
-  #   validation:
-  #     method: pydantic
-  #     class: Household
-  #     helper_class: HouseholdValidator
-  #     helper_class_attribute: list_of_households
-  #     children:
-  #       table_name: persons
-  #       child_name: persons  # Household class has variable "persons: list[Person]"
-  #       merged_on: household_id
-
-  # - name: persons
-  #   is_activitysim_input: True  # will load via ActivitySim's built-in reader and apply column transformations
-  #   validation:
-  #     method: pydantic
-  #     class: Person
-  #     helper_class: PersonValidator
-  #     helper_class_attribute: list_of_persons
-
-  # - name: land_use
-  #   is_activitysim_input: True # will load via ActivitySim's built-in reader and apply column transformations
-  #   validation:
-  #     method: pydantic
-  #     class: Landuse
-  #     helper_class: TazValidator
-  #     helper_class_attribute: list_of_zones
diff --git a/activitysim/examples/production_semcog/data_model/enums.py b/activitysim/examples/production_semcog/data_model/enums.py
@@ -22,66 +22,6 @@ class PersonType(IntEnum):
     PRE_SCHOOL_CHILD = 8
 
 
-class Gender(IntEnum):
-    """
-    Provides an integer mapping for gender.
-    """
-
-    MALE = 1
-    FEMALE = 2
-    OTHER = 3
-
-
-class Occupation(IntEnum):
-    """
-    Provides an integer mapping for a persons occupation.
-    """
-
-    WHITE_COLLAR = 1
-    SERVICES = 2
-    HEALTH = 3
-    RETAIL = 4
-    BLUE_COLLAR = 5
-    NOT_EMPLOYED = 6
-
-
-class DailyActivityPattern(IntEnum):
-    """
-    Provides an integer mapping to the daily activity pattern variable.
-    """
-
-    MANDATORY = 1
-    NON_MANDATORY = 2
-    HOME = 3
-
-
-class Purpose(IntEnum):
-    """
-    Provides an integer mapping for trip and tour purpose.
-    """
-
-    WORK = 1
-    ADULT_SCHOOL = 2
-    CHILD_SCHOOL = 3
-    SHOPPING = 4
-    ESCORT = 5
-    MAINTENANCE = 6
-    DISCRETIONARY = 7
-
-
-class AreaType(IntEnum):
-    """
-    Provide an integer mapping for area type, which serves as a proxy for urban form.
-    """
-
-    REGIONAL_CORE = 0
-    CBD = 1
-    URBAN_BUSINESS = 2
-    URBAN = 3
-    SUBURBAN = 4
-    RURAL = 5
-
-
 class HHT(IntEnum):
     """
     Provide an integer mapping for household/family type.
@@ -98,74 +38,15 @@ class HHT(IntEnum):
     UNKNOWN = -9
 
 
-class Mode(IntEnum):
-    """
-    Provides an integer mapping for travel mode.
-    """
-
-    DRIVE_ALONE_FREE = 1
-    DRIVE_ALONE_PAY = 2
-    SHARED_RIDE_2_FREE = 3
-    SHARED_RIDE_2_PAY = 4
-    SHARED_RIDE_3_FREE = 5
-    SHARED_RIDE_3_PAY = 6
-    WALK = 7
-    BICYCLE = 8
-    WALK_TO_TRANSIT_ALL = 9
-    WALK_TO_TRANSIT_PREMIUM_ONLY = 10
-    PARK_AND_RIDE_TRANSIT_ALL = 11
-    PARK_AND_RIDE_TRANSIT_PREMIUM_ONLY = 12
-    KISS_AND_RIDE_TRANSIT_ALL = 13
-    KISS_AND_RIDE_TRANSIT_PREMIUM_ONLY = 14
-    SCHOOL_BUS = 15
-
-
-class ModelTime(IntEnum):
+class ESR(IntEnum):
     """
-    Provides an integer mapping from military time to model time interval index.
-    The name represents the starting point of the interval. So the interval from
-    3:00 am to 3:30 am is represented by index 1, which is named `ZERO_THREE`.
+    Employment Status Recode
     """
 
-    ZERO_THREE = 1
-    ZERO_THREE_THIRTY = 2
-    ZERO_FOUR = 3
-    ZERO_FOUR_THIRTY = 4
-    ZERO_FIVE = 5
-    ZERO_FIVE_THIRTY = 6
-    ZERO_SIX = 7
-    ZERO_SIX_THIRTY = 8
-    ZERO_SEVEN = 9
-    ZERO_SEVEN_THIRTY = 10
-    ZERO_EIGHT = 11
-    ZERO_EIGHT_THIRTY = 12
-    ZERO_NINE = 13
-    ZERO_NINE_THIRTY = 14
-    TEN = 15
-    TEN_THIRTY = 16
-    ELEVEN = 17
-    ELEVEN_THIRTY = 18
-    TWELVE = 19
-    TWELVE_THIRTY = 20
-    THIRTEEN = 21
-    THIRTEEN_THIRTY = 22
-    FOURTEEN = 23
-    FOURTEEN_THIRTY = 24
-    FIFTEEN = 25
-    FIFTEEN_THIRTY = 26
-    SIXTEEN = 27
-    SIXTEEN_THIRTY = 28
-    SEVENTEEN = 29
-    SEVENTEEN_THIRTY = 30
-    EIGHTEEN = 31
-    EIGHTEEN_THIRTY = 32
-    NINETEEN = 33
-    NINETEEN_THIRTY = 34
-    TWENTY = 35
-    TWENTY_THIRTY = 36
-    TWENTY_ONE = 37
-    TWENTY_ONE_THIRTY = 38
-    TWENTY_TWO = 39
-    TWENTY_TWO_THIRTY = 40
-    TWENTY_THREE = 41
-    TWENTY_THREE_THIRTY = 42
+    NA = -9
+    EMPLOYED_AT_WORK = 1
+    EMPLOYED_NOT_AT_WORK = 2
+    UNEMPLOYED = 3
+    ARMED_FORCES_AT_WORK = 4
+    ARMED_FORCES_NOT_AT_WORK = 5
+    NOT_IN_LABOR_FORCE = 6
diff --git a/activitysim/examples/production_semcog/data_model/input_checks.py b/activitysim/examples/production_semcog/data_model/input_checks.py
@@ -22,24 +22,26 @@
 
 from activitysim.abm.models.input_checker import TABLE_STORE, log_info
 
-# logger = logging.getLogger(__name__)
-
 
 class Household(pa.DataFrameModel):
     """
     Household data from PopulationSim and input to ActivitySim.
-    Customize as needed for your application.
 
     Fields:
     household_id: unique number identifying each household
-    home_zone_id: zone number where household resides, MAZ in two zone systems, TAZ in one zone
+    age_of_head: age of the head of household
+    auto_ownership: Seeding for initial number of autos owned by the household
     hhsize: number of people in the household
+    race_id:
+    children: Number of children in household
+    home_zone_id: zone number where household resides, MAZ in two zone systems, TAZ in one zone
     income: Annual income in $
-    auto_ownership: Seeding for initial number of autos owned by the household
+    adjinc: Adjusted income
     HHT: Household type, see enums.HHT
+    home_zone_id: MAZ of household
+    TAZ: TAZ of household
     """
 
-    # auto_ownership: int = pa.Field(ge=0, le=6)
     household_id: int = pa.Field(unique=True, gt=0)
     age_of_head: int = pa.Field(ge=0, coerce=True)
     auto_ownership: int = pa.Field(
@@ -69,7 +71,7 @@ def check_persons_per_household(cls, households: pd.DataFrame):
         mismatched_cases = households.set_index("household_id").loc[mismatched_indices]
         if len(mismatched_cases) > 0:
             log_info(
-                f"Household size dose not equal the number of persons at \n{mismatched_cases}.\n"
+                f"Household size does not equal the number of persons at \n{mismatched_cases}.\n"
             )
         else:
             log_info(f"Household size equals the number of persons.\n")
@@ -104,7 +106,7 @@ def check_children_per_household(cls, households: pd.DataFrame):
         mismatched_cases = households.set_index("household_id").loc[mismatched_indices]
         if len(mismatched_cases) > 0:
             log_info(
-                f"Household children dose not equal the number of children in persons at \n{mismatched_cases}.\n"
+                f"Household children does not equal the number of children in persons at \n{mismatched_cases}.\n"
             )
         else:
             log_info(f"Household children equals the number of children in persons.\n")
@@ -115,6 +117,22 @@ class Person(pa.DataFrameModel):
     """
     Person data from PopulationSim and input to ActivitySim.
     Customize as needed for your application.
+
+    person_id: unique person identification number
+    relate:
+    age: person age
+    sex: person sex
+    race_id: person race
+    member_id: person number in the household
+    household_id: household identification number
+    esr: Employment status recode (from PUMS)
+    wkhp: Usual hours worked per week past 12 months (from PUMS)
+    wkw: Weeks worked during past 12 months (from PUMS)
+    schg: Grade Level Attending (from PUMS)
+    mil: Military Service (from PUMS)
+    naicsp: North American Industry Classification System recode (from PUMS)
+    industry: Employment industry
+    zone_id: MAZ of the household
     """
 
     person_id: int = pa.Field(unique=True, gt=0)
@@ -124,7 +142,7 @@ class Person(pa.DataFrameModel):
     race_id: int = pa.Field(gt=0, le=4)
     member_id: int = pa.Field(gt=0)
     household_id: int = pa.Field(nullable=False)
-    esr: float = pa.Field(isin=(set([-9.0] + [float(x) for x in range(1, 7)])))
+    esr: float = pa.Field(isin=e.ESR)
     wkhp: float = pa.Field(isin=(set([-9.0] + [float(x) for x in range(0, 100)])))
     wkw: float = pa.Field(isin=(set([-9.0] + [float(x) for x in range(0, 7)])))
     schg: float = pa.Field(isin=(set([-9.0] + [float(x) for x in range(0, 17)])))
@@ -165,20 +183,35 @@ def check_households_have_persons(cls, persons: pd.DataFrame):
 class Landuse(pa.DataFrameModel):
     """
     Land use data.
-    Customize as needed for your application.
 
-    zone_id: TAZ of the zone
-    DISTRICT: District the zone relies in
-    SD: Super District
-    COUNTY: County of zone, see enums.County
-    TOTHH: Total households
-    TOTEMP: Total Employment
-    RETEMPN: Retail trade employment
-    FPSEMPN: Financial and processional services employment
-    HEREMPN: Health, educational, and recreational service employment
-    OTHEMPN: Other employment
-    AGREMPN: Agricultural and natural resources employment
-    MWTEMPN: Manufacturing, wholesale trade, and transporation employment
+    zone_id: MAZ ID
+    tot_acres: Acres of the zone
+    TAZ: TAZ ID
+    tot_hhs: Number of households
+    hhs_pop: Non-Group Quarters population
+    grppop: Group-Quarters population
+    tot_pop: Total population
+    K_8: Preschool through 8th grade enrollment
+    G9_12: High school enrollment
+    e01_nrm:
+    e02_constr: contrsruction employment
+    e03_manuf: manufacturing employment
+    e04_whole: wholsesale employment
+    e05_retail: retail employment
+    e06_trans: transportation employment
+    e07_utility: Utility employment
+    e08_infor: information services employment
+    e09_finan: financial services employment
+    e10_pstsvc: postal services employment(?)
+    e11_compmgt: management services employment
+    e12_admsvc: administrative services employment
+    e13_edusvc: educational services employment
+    e14_medfac: medical employment
+    e15_hospit: hospital employment
+    e16_leisure: leisure employment
+    e17_othsvc: other services employment
+    e18_pubadm: public administration employment
+    tot_emp: total employment
     """
 
     zone_id: int = pa.Field(gt=0, le=22818, nullable=False)
@@ -188,7 +221,6 @@ class Landuse(pa.DataFrameModel):
     hhs_pop: float = pa.Field(ge=0, coerce=True)
     grppop: float = pa.Field(ge=0, coerce=True)
     tot_pop: float = pa.Field(ge=0, coerce=True)
-    # enrollment_k_8: is_numeric = pa.Field(ge=0)
     K_8: float = pa.Field(ge=0, coerce=True)
     G9_12: float = pa.Field(ge=0, coerce=True)
     e01_nrm: float = pa.Field(ge=0, coerce=True)
@@ -355,13 +387,11 @@ def extract_skim_names(file_path):
             state.filesystem.get_config_file_path(tour_mode_choice_spec)
         )
 
-        # Adding breaking change for testing!
-        skim_names.append("break")
-
         missing_skims = [
             skim_name for skim_name in skim_names if skim_name not in omx_keys
         ]
         if len(missing_skims) > 0:
             log_info(f"Missing skims {missing_skims} found in {tour_mode_choice_spec}")
-        result = len(missing_skims) == 0
+        else:
+            log_info(f"Found all skimms in {tour_mode_choice_spec}")
         return len(missing_skims) == 0
diff --git a/activitysim/examples/prototype_mtc_extended/configs/input_checker.yaml b/activitysim/examples/prototype_mtc_extended/configs/input_checker.yaml
@@ -24,7 +24,7 @@ table_list:
       class: Landuse
 
   - name: example_hwy_data
-    path: ../../prototype_mtc_extended/test/additional_data
+    path: ../../prototype_mtc_extended/test/additional_data # must be absolute path or relative path.  Can also omit if supplied in a data directory
     is_activitysim_input: False # will read raw table with pandas.read_csv from above path relative to a data directory
     validation:
       class: NetworkLinks