Skip to content

Commit

Permalink
cleanup and documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
dhensle committed Nov 14, 2023
1 parent 7093b25 commit d0e363c
Show file tree
Hide file tree
Showing 6 changed files with 91 additions and 213 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,38 +24,8 @@ table_list:
class: Landuse

- name: Network
path: . # must be absolute path or relative path to one of the supplied data directories
# path: . # must be absolute path or relative path. Can also omit if supplied in a data directory
is_activitysim_input: False # will read raw table with pandas.read_csv from above path
validation:
class: NetworkLinks
method: pandera

# --------- pydantic examples ----------
# WARNING: Still in development!
# - name: households
# is_activitysim_input: True # will load via ActivitySim's built-in reader and apply column transformations
# validation:
# method: pydantic
# class: Household
# helper_class: HouseholdValidator
# helper_class_attribute: list_of_households
# children:
# table_name: persons
# child_name: persons # Household class has variable "persons: list[Person]"
# merged_on: household_id

# - name: persons
# is_activitysim_input: True # will load via ActivitySim's built-in reader and apply column transformations
# validation:
# method: pydantic
# class: Person
# helper_class: PersonValidator
# helper_class_attribute: list_of_persons

# - name: land_use
# is_activitysim_input: True # will load via ActivitySim's built-in reader and apply column transformations
# validation:
# method: pydantic
# class: Landuse
# helper_class: TazValidator
# helper_class_attribute: list_of_zones
137 changes: 9 additions & 128 deletions activitysim/examples/production_semcog/data_model/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,66 +22,6 @@ class PersonType(IntEnum):
PRE_SCHOOL_CHILD = 8


class Gender(IntEnum):
"""
Provides an integer mapping for gender.
"""

MALE = 1
FEMALE = 2
OTHER = 3


class Occupation(IntEnum):
"""
Provides an integer mapping for a persons occupation.
"""

WHITE_COLLAR = 1
SERVICES = 2
HEALTH = 3
RETAIL = 4
BLUE_COLLAR = 5
NOT_EMPLOYED = 6


class DailyActivityPattern(IntEnum):
"""
Provides an integer mapping to the daily activity pattern variable.
"""

MANDATORY = 1
NON_MANDATORY = 2
HOME = 3


class Purpose(IntEnum):
"""
Provides an integer mapping for trip and tour purpose.
"""

WORK = 1
ADULT_SCHOOL = 2
CHILD_SCHOOL = 3
SHOPPING = 4
ESCORT = 5
MAINTENANCE = 6
DISCRETIONARY = 7


class AreaType(IntEnum):
"""
Provide an integer mapping for area type, which serves as a proxy for urban form.
"""

REGIONAL_CORE = 0
CBD = 1
URBAN_BUSINESS = 2
URBAN = 3
SUBURBAN = 4
RURAL = 5


class HHT(IntEnum):
"""
Provide an integer mapping for household/family type.
Expand All @@ -98,74 +38,15 @@ class HHT(IntEnum):
UNKNOWN = -9


class Mode(IntEnum):
"""
Provides an integer mapping for travel mode.
"""

DRIVE_ALONE_FREE = 1
DRIVE_ALONE_PAY = 2
SHARED_RIDE_2_FREE = 3
SHARED_RIDE_2_PAY = 4
SHARED_RIDE_3_FREE = 5
SHARED_RIDE_3_PAY = 6
WALK = 7
BICYCLE = 8
WALK_TO_TRANSIT_ALL = 9
WALK_TO_TRANSIT_PREMIUM_ONLY = 10
PARK_AND_RIDE_TRANSIT_ALL = 11
PARK_AND_RIDE_TRANSIT_PREMIUM_ONLY = 12
KISS_AND_RIDE_TRANSIT_ALL = 13
KISS_AND_RIDE_TRANSIT_PREMIUM_ONLY = 14
SCHOOL_BUS = 15


class ModelTime(IntEnum):
class ESR(IntEnum):
"""
Provides an integer mapping from military time to model time interval index.
The name represents the starting point of the interval. So the interval from
3:00 am to 3:30 am is represented by index 1, which is named `ZERO_THREE`.
Employment Status Recode
"""

ZERO_THREE = 1
ZERO_THREE_THIRTY = 2
ZERO_FOUR = 3
ZERO_FOUR_THIRTY = 4
ZERO_FIVE = 5
ZERO_FIVE_THIRTY = 6
ZERO_SIX = 7
ZERO_SIX_THIRTY = 8
ZERO_SEVEN = 9
ZERO_SEVEN_THIRTY = 10
ZERO_EIGHT = 11
ZERO_EIGHT_THIRTY = 12
ZERO_NINE = 13
ZERO_NINE_THIRTY = 14
TEN = 15
TEN_THIRTY = 16
ELEVEN = 17
ELEVEN_THIRTY = 18
TWELVE = 19
TWELVE_THIRTY = 20
THIRTEEN = 21
THIRTEEN_THIRTY = 22
FOURTEEN = 23
FOURTEEN_THIRTY = 24
FIFTEEN = 25
FIFTEEN_THIRTY = 26
SIXTEEN = 27
SIXTEEN_THIRTY = 28
SEVENTEEN = 29
SEVENTEEN_THIRTY = 30
EIGHTEEN = 31
EIGHTEEN_THIRTY = 32
NINETEEN = 33
NINETEEN_THIRTY = 34
TWENTY = 35
TWENTY_THIRTY = 36
TWENTY_ONE = 37
TWENTY_ONE_THIRTY = 38
TWENTY_TWO = 39
TWENTY_TWO_THIRTY = 40
TWENTY_THREE = 41
TWENTY_THREE_THIRTY = 42
NA = -9
EMPLOYED_AT_WORK = 1
EMPLOYED_NOT_AT_WORK = 2
UNEMPLOYED = 3
ARMED_FORCES_AT_WORK = 4
ARMED_FORCES_NOT_AT_WORK = 5
NOT_IN_LABOR_FORCE = 6
84 changes: 57 additions & 27 deletions activitysim/examples/production_semcog/data_model/input_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,24 +22,26 @@

from activitysim.abm.models.input_checker import TABLE_STORE, log_info

# logger = logging.getLogger(__name__)


class Household(pa.DataFrameModel):
"""
Household data from PopulationSim and input to ActivitySim.
Customize as needed for your application.
Fields:
household_id: unique number identifying each household
home_zone_id: zone number where household resides, MAZ in two zone systems, TAZ in one zone
age_of_head: age of the head of household
auto_ownership: Seeding for initial number of autos owned by the household
hhsize: number of people in the household
race_id:
children: Number of children in household
home_zone_id: zone number where household resides, MAZ in two zone systems, TAZ in one zone
income: Annual income in $
auto_ownership: Seeding for initial number of autos owned by the household
adjinc: Adjusted income
HHT: Household type, see enums.HHT
home_zone_id: MAZ of household
TAZ: TAZ of household
"""

# auto_ownership: int = pa.Field(ge=0, le=6)
household_id: int = pa.Field(unique=True, gt=0)
age_of_head: int = pa.Field(ge=0, coerce=True)
auto_ownership: int = pa.Field(
Expand Down Expand Up @@ -69,7 +71,7 @@ def check_persons_per_household(cls, households: pd.DataFrame):
mismatched_cases = households.set_index("household_id").loc[mismatched_indices]
if len(mismatched_cases) > 0:
log_info(
f"Household size dose not equal the number of persons at \n{mismatched_cases}.\n"
f"Household size does not equal the number of persons at \n{mismatched_cases}.\n"
)
else:
log_info(f"Household size equals the number of persons.\n")
Expand Down Expand Up @@ -104,7 +106,7 @@ def check_children_per_household(cls, households: pd.DataFrame):
mismatched_cases = households.set_index("household_id").loc[mismatched_indices]
if len(mismatched_cases) > 0:
log_info(
f"Household children dose not equal the number of children in persons at \n{mismatched_cases}.\n"
f"Household children does not equal the number of children in persons at \n{mismatched_cases}.\n"
)
else:
log_info(f"Household children equals the number of children in persons.\n")
Expand All @@ -115,6 +117,22 @@ class Person(pa.DataFrameModel):
"""
Person data from PopulationSim and input to ActivitySim.
Customize as needed for your application.
person_id: unique person identification number
relate:
age: person age
sex: person sex
race_id: person race
member_id: person number in the household
household_id: household identification number
esr: Employment status recode (from PUMS)
wkhp: Usual hours worked per week past 12 months (from PUMS)
wkw: Weeks worked during past 12 months (from PUMS)
schg: Grade Level Attending (from PUMS)
mil: Military Service (from PUMS)
naicsp: North American Industry Classification System recode (from PUMS)
industry: Employment industry
zone_id: MAZ of the household
"""

person_id: int = pa.Field(unique=True, gt=0)
Expand All @@ -124,7 +142,7 @@ class Person(pa.DataFrameModel):
race_id: int = pa.Field(gt=0, le=4)
member_id: int = pa.Field(gt=0)
household_id: int = pa.Field(nullable=False)
esr: float = pa.Field(isin=(set([-9.0] + [float(x) for x in range(1, 7)])))
esr: float = pa.Field(isin=e.ESR)
wkhp: float = pa.Field(isin=(set([-9.0] + [float(x) for x in range(0, 100)])))
wkw: float = pa.Field(isin=(set([-9.0] + [float(x) for x in range(0, 7)])))
schg: float = pa.Field(isin=(set([-9.0] + [float(x) for x in range(0, 17)])))
Expand Down Expand Up @@ -165,20 +183,35 @@ def check_households_have_persons(cls, persons: pd.DataFrame):
class Landuse(pa.DataFrameModel):
"""
Land use data.
Customize as needed for your application.
zone_id: TAZ of the zone
DISTRICT: District the zone relies in
SD: Super District
COUNTY: County of zone, see enums.County
TOTHH: Total households
TOTEMP: Total Employment
RETEMPN: Retail trade employment
FPSEMPN: Financial and processional services employment
HEREMPN: Health, educational, and recreational service employment
OTHEMPN: Other employment
AGREMPN: Agricultural and natural resources employment
MWTEMPN: Manufacturing, wholesale trade, and transporation employment
zone_id: MAZ ID
tot_acres: Acres of the zone
TAZ: TAZ ID
tot_hhs: Number of households
hhs_pop: Non-Group Quarters population
grppop: Group-Quarters population
tot_pop: Total population
K_8: Preschool through 8th grade enrollment
G9_12: High school enrollment
e01_nrm:
e02_constr: contrsruction employment
e03_manuf: manufacturing employment
e04_whole: wholsesale employment
e05_retail: retail employment
e06_trans: transportation employment
e07_utility: Utility employment
e08_infor: information services employment
e09_finan: financial services employment
e10_pstsvc: postal services employment(?)
e11_compmgt: management services employment
e12_admsvc: administrative services employment
e13_edusvc: educational services employment
e14_medfac: medical employment
e15_hospit: hospital employment
e16_leisure: leisure employment
e17_othsvc: other services employment
e18_pubadm: public administration employment
tot_emp: total employment
"""

zone_id: int = pa.Field(gt=0, le=22818, nullable=False)
Expand All @@ -188,7 +221,6 @@ class Landuse(pa.DataFrameModel):
hhs_pop: float = pa.Field(ge=0, coerce=True)
grppop: float = pa.Field(ge=0, coerce=True)
tot_pop: float = pa.Field(ge=0, coerce=True)
# enrollment_k_8: is_numeric = pa.Field(ge=0)
K_8: float = pa.Field(ge=0, coerce=True)
G9_12: float = pa.Field(ge=0, coerce=True)
e01_nrm: float = pa.Field(ge=0, coerce=True)
Expand Down Expand Up @@ -355,13 +387,11 @@ def extract_skim_names(file_path):
state.filesystem.get_config_file_path(tour_mode_choice_spec)
)

# Adding breaking change for testing!
skim_names.append("break")

missing_skims = [
skim_name for skim_name in skim_names if skim_name not in omx_keys
]
if len(missing_skims) > 0:
log_info(f"Missing skims {missing_skims} found in {tour_mode_choice_spec}")
result = len(missing_skims) == 0
else:
log_info(f"Found all skimms in {tour_mode_choice_spec}")
return len(missing_skims) == 0
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ table_list:
class: Landuse

- name: example_hwy_data
path: ../../prototype_mtc_extended/test/additional_data
path: ../../prototype_mtc_extended/test/additional_data # must be absolute path or relative path. Can also omit if supplied in a data directory
is_activitysim_input: False # will read raw table with pandas.read_csv from above path relative to a data directory
validation:
class: NetworkLinks
Expand Down
Loading

0 comments on commit d0e363c

Please sign in to comment.