#359 (#364)

* updates for CDAP person type mapping * allow duplicate time labels * fix a copy small items for #359 * test updates * pycodestyle * 3 zone test fix Co-authored-by: Clint Daniels <clint.daniels@wsp.com>
ActivitySim · Dec 23, 2020 · 1eb89ea · 1eb89ea
1 parent b93cbe0
commit 1eb89ea
Show file tree

Hide file tree

Showing 14 changed files with 117 additions and 29 deletions.
diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py
@@ -34,7 +34,7 @@ def cdap_simulate(persons_merged, persons, households,
 
     trace_label = 'cdap'
     model_settings = config.read_model_settings('cdap.yaml')
-
+    person_type_map = model_settings.get('PERSON_TYPE_MAP', {})
     cdap_indiv_spec = simulate.read_model_spec(file_name=model_settings['INDIV_AND_HHSIZE1_SPEC'])
 
     # Rules and coefficients for generating interaction specs for different household sizes
@@ -88,6 +88,7 @@ def cdap_simulate(persons_merged, persons, households,
 
     choices = cdap.run_cdap(
         persons=persons_merged,
+        person_type_map=person_type_map,
         cdap_indiv_spec=cdap_indiv_spec,
         cdap_interaction_coefficients=cdap_interaction_coefficients,
         cdap_fixed_relative_proportions=cdap_fixed_relative_proportions,

diff --git a/activitysim/abm/models/util/cdap.py b/activitysim/abm/models/util/cdap.py
@@ -35,9 +35,6 @@
 
 MAX_INTERACTION_CARDINALITY = 3
 
-WORKER_PTYPES = [1, 2]
-CHILD_PTYPES = [6, 7, 8]
-
 
 def set_hh_index(df):
 
@@ -61,7 +58,7 @@ def add_pn(col, pnum):
         raise RuntimeError("add_pn col not list or str")
 
 
-def assign_cdap_rank(persons, trace_hh_id=None, trace_label=None):
+def assign_cdap_rank(persons, person_type_map, trace_hh_id=None, trace_label=None):
     """
     Assign an integer index, cdap_rank, to each household member. (Starting with 1, not 0)
 
@@ -109,7 +106,7 @@ def assign_cdap_rank(persons, trace_hh_id=None, trace_label=None):
 
     # choose up to 2 workers, preferring full over part, older over younger
     workers = \
-        persons.loc[persons[_ptype_].isin(WORKER_PTYPES), [_hh_id_, _ptype_]]\
+        persons.loc[persons[_ptype_].isin(person_type_map['WORKER']), [_hh_id_, _ptype_]]\
         .sort_values(by=[_hh_id_, _ptype_], ascending=[True, True])\
         .groupby(_hh_id_).head(2)
     # tag the selected workers
@@ -118,7 +115,7 @@ def assign_cdap_rank(persons, trace_hh_id=None, trace_label=None):
 
     # choose up to 3, preferring youngest
     children = \
-        persons.loc[persons[_ptype_].isin(CHILD_PTYPES), [_hh_id_, _ptype_, _age_]]\
+        persons.loc[persons[_ptype_].isin(person_type_map['CHILD']), [_hh_id_, _ptype_, _age_]]\
         .sort_values(by=[_hh_id_, _ptype_], ascending=[True, True])\
         .groupby(_hh_id_).head(3)
     # tag the selected children
@@ -795,6 +792,7 @@ def extra_hh_member_choices(persons, cdap_fixed_relative_proportions, locals_d,
 
 def _run_cdap(
         persons,
+        person_type_map,
         cdap_indiv_spec,
         interaction_coefficients,
         cdap_fixed_relative_proportions,
@@ -815,7 +813,7 @@ def _run_cdap(
     # assign integer cdap_rank to each household member
     # persons with cdap_rank 1..MAX_HHSIZE will be have their activities chose by CDAP model
     # extra household members, will have activities assigned by in fixed proportions
-    assign_cdap_rank(persons, trace_hh_id, trace_label)
+    assign_cdap_rank(persons, person_type_map, trace_hh_id, trace_label)
     chunk.log_df(trace_label, 'persons', persons)
 
     # Calculate CDAP utilities for each individual, ignoring interactions
@@ -904,6 +902,7 @@ def cdap_calc_row_size(choosers, cdap_indiv_spec, trace_label):
 
 def run_cdap(
         persons,
+        person_type_map,
         cdap_indiv_spec,
         cdap_interaction_coefficients,
         cdap_fixed_relative_proportions,
@@ -956,6 +955,7 @@ def run_cdap(
 
         cdap_results = \
             _run_cdap(persons_chunk,
+                      person_type_map,
                       cdap_indiv_spec,
                       cdap_interaction_coefficients,
                       cdap_fixed_relative_proportions,

diff --git a/activitysim/abm/models/util/test/configs/cdap.yaml b/activitysim/abm/models/util/test/configs/cdap.yaml
@@ -0,0 +1,8 @@
+PERSON_TYPE_MAP:
+  WORKER:
+    - 1
+    - 2
+  CHILD:
+    - 6
+    - 7
+    - 8
diff --git a/activitysim/abm/models/util/test/test_cdap.py b/activitysim/abm/models/util/test/test_cdap.py
@@ -2,6 +2,7 @@
 # See full license in LICENSE.txt.
 
 import os.path
+import yaml
 
 import pandas as pd
 import pandas.testing as pdt
@@ -31,6 +32,19 @@ def teardown_function(func):
     inject.reinject_decorated_tables()
 
 
+@pytest.fixture(scope='module')
+def model_settings(configs_dir):
+    yml_file = os.path.join(configs_dir, 'cdap.yaml')
+    with open(yml_file) as f:
+        model_settings = yaml.load(f, Loader=yaml.loader.SafeLoader)
+    return model_settings
+
+
+@pytest.fixture(scope='module')
+def configs_dir():
+    return os.path.join(os.path.dirname(__file__), 'configs')
+
+
 def setup_function():
     configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
     inject.add_injectable("configs_dir", configs_dir)
@@ -48,9 +62,11 @@ def test_bad_coefficients():
     assert "Expect only M, N, or H" in str(excinfo.value)
 
 
-def test_assign_cdap_rank(people):
+def test_assign_cdap_rank(people, model_settings):
+
+    person_type_map = model_settings.get('PERSON_TYPE_MAP', {})
 
-    cdap.assign_cdap_rank(people)
+    cdap.assign_cdap_rank(people, person_type_map)
 
     expected = pd.Series(
         [1, 1, 1, 2, 2, 1, 3, 1, 2, 1, 3, 2, 1, 3, 2, 4, 1, 3, 4, 2],
@@ -60,11 +76,12 @@ def test_assign_cdap_rank(people):
     pdt.assert_series_equal(people['cdap_rank'], expected, check_dtype=False, check_names=False)
 
 
-def test_individual_utilities(people):
+def test_individual_utilities(people, model_settings):
 
     cdap_indiv_and_hhsize1 = simulate.read_model_spec(file_name='cdap_indiv_and_hhsize1.csv')
 
-    cdap.assign_cdap_rank(people)
+    person_type_map = model_settings.get('PERSON_TYPE_MAP', {})
+    cdap.assign_cdap_rank(people, person_type_map)
     individual_utils = cdap.individual_utilities(people, cdap_indiv_and_hhsize1, locals_d=None)
 
     individual_utils = individual_utils[['M', 'N', 'H']]
@@ -96,15 +113,16 @@ def test_individual_utilities(people):
         individual_utils, expected, check_dtype=False, check_names=False)
 
 
-def test_build_cdap_spec_hhsize2(people):
+def test_build_cdap_spec_hhsize2(people, model_settings):
 
     hhsize = 2
     cdap_indiv_and_hhsize1 = simulate.read_model_spec(file_name='cdap_indiv_and_hhsize1.csv')
 
     interaction_coefficients = pd.read_csv(config.config_file_path('cdap_interaction_coefficients.csv'), comment='#')
     interaction_coefficients = cdap.preprocess_interaction_coefficients(interaction_coefficients)
 
-    cdap.assign_cdap_rank(people)
+    person_type_map = model_settings.get('PERSON_TYPE_MAP', {})
+    cdap.assign_cdap_rank(people, person_type_map)
     indiv_utils = cdap.individual_utilities(people, cdap_indiv_and_hhsize1, locals_d=None)
 
     choosers = cdap.hh_choosers(indiv_utils, hhsize=hhsize)

diff --git a/activitysim/abm/test/test_multi_zone.py b/activitysim/abm/test/test_multi_zone.py
@@ -24,7 +24,7 @@
 
 # 3-zone is currently big and slow - so set this way low
 HOUSEHOLDS_SAMPLE_SIZE_3_ZONE = 10
-EXPECT_3_ZONE_TOUR_COUNT = 26
+EXPECT_3_ZONE_TOUR_COUNT = 30
 
 
 # household with mandatory, non mandatory, atwork_subtours, and joint tours
@@ -85,9 +85,12 @@ def close_handlers():
 
 def inject_settings(**kwargs):
 
-    settings = config.read_settings_file('settings.yaml', mandatory=True)
-
     for k in kwargs:
+        if k == "two_zone":
+            if kwargs[k]:
+                settings = config.read_settings_file('settings.yaml', mandatory=True)
+            else:
+                settings = config.read_settings_file('settings_static.yaml', mandatory=True)
         settings[k] = kwargs[k]
 
     inject.add_injectable("settings", settings)
@@ -98,11 +101,12 @@ def inject_settings(**kwargs):
 def full_run(configs_dir, data_dir,
              resume_after=None, chunk_size=0,
              households_sample_size=HOUSEHOLDS_SAMPLE_SIZE,
-             trace_hh_id=None, trace_od=None, check_for_variability=None):
+             trace_hh_id=None, trace_od=None, check_for_variability=None, two_zone=True):
 
     setup_dirs(configs_dir, data_dir)
 
     settings = inject_settings(
+        two_zone=two_zone,
         households_sample_size=households_sample_size,
         chunk_size=chunk_size,
         trace_hh_id=trace_hh_id,
@@ -164,7 +168,7 @@ def test_full_run_2_zone():
     tour_count = full_run(configs_dir=[example_path('configs_2_zone'), mtc_example_path('configs')],
                           data_dir=example_path('data_2'),
                           trace_hh_id=HH_ID, check_for_variability=True,
-                          households_sample_size=HOUSEHOLDS_SAMPLE_SIZE)
+                          households_sample_size=HOUSEHOLDS_SAMPLE_SIZE, two_zone=True)
 
     print("tour_count", tour_count)
 
@@ -181,7 +185,7 @@ def test_full_run_3_zone():
     tour_count = full_run(configs_dir=[example_path('configs_3_zone'), mtc_example_path('configs')],
                           data_dir=example_path('data_3'),
                           trace_hh_id=HH_ID_3_ZONE, check_for_variability=True,
-                          households_sample_size=HOUSEHOLDS_SAMPLE_SIZE_3_ZONE)
+                          households_sample_size=HOUSEHOLDS_SAMPLE_SIZE_3_ZONE, two_zone=False)
 
     print("tour_count", tour_count)
 

diff --git a/activitysim/cli/test/test_cli.py b/activitysim/cli/test/test_cli.py
@@ -45,7 +45,9 @@ def test_create_copy():
     assert 'copying configs ...' in str(cp.stdout)
     assert 'copying configs_mp ...' in str(cp.stdout)
     assert 'copying output ...' in str(cp.stdout)
-    assert str(target) in str(cp.stdout)
+
+    # replace slashes on windows
+    assert str(target).replace("\\\\", "\\") in str(cp.stdout).replace("\\\\", "\\")
 
     assert os.path.exists(target)
     for folder in ['configs', 'configs_mp', 'data', 'output']:

diff --git a/activitysim/core/expressions.py b/activitysim/core/expressions.py
@@ -129,6 +129,50 @@ def assign_columns(df, model_settings, locals_dict={}, trace_label=None):
 # helpers
 # ##################################################################################################
 
+def skim_time_period_label(time_period):
+    """
+    convert time period times to skim time period labels (e.g. 9 -> 'AM')
+    Parameters
+    ----------
+    time_period : pandas Series
+    Returns
+    -------
+    pandas Series
+        string time period labels
+    """
+
+    skim_time_periods = config.setting('skim_time_periods')
+
+    # Default to 60 minute time periods
+    period_minutes = 60
+    if 'period_minutes' in skim_time_periods.keys():
+        period_minutes = skim_time_periods['period_minutes']
+
+    # Default to a day
+    model_time_window_min = 1440
+    if ('time_window') in skim_time_periods.keys():
+        model_time_window_min = skim_time_periods['time_window']
+
+    # Check to make sure the intervals result in no remainder time throught 24 hour day
+    assert 0 == model_time_window_min % period_minutes
+    total_periods = model_time_window_min / period_minutes
+
+    # FIXME - eventually test and use np version always?
+    period_label = 'periods'
+    if 'hours' in skim_time_periods.keys():
+        period_label = 'hours'
+        warnings.warn('`skim_time_periods` key `hours` in settings.yml will be removed in '
+                      'future verions. Use `periods` instead',
+                      FutureWarning)
+
+    if np.isscalar(time_period):
+        bin = np.digitize([time_period % total_periods],
+                          skim_time_periods[period_label], right=True)[0] - 1
+        return skim_time_periods['labels'][bin]
+
+    return pd.cut(time_period, skim_time_periods[period_label],
+                  labels=skim_time_periods['labels'], ordered=False).astype(str)
+
 
 def annotate_preprocessors(
         tours_df, locals_dict, skims,

diff --git a/activitysim/core/los.py b/activitysim/core/los.py
@@ -597,4 +597,4 @@ def skim_time_period_label(self, time_period):
             return self.skim_time_periods['labels'][bin]
 
         return pd.cut(time_period, self.skim_time_periods['periods'],
-                      labels=self.skim_time_periods['labels'], right=True).astype(str)
+                      labels=self.skim_time_periods['labels'], ordered=False).astype(str)
diff --git a/activitysim/core/pathbuilder.py b/activitysim/core/pathbuilder.py
@@ -548,7 +548,8 @@ def build_virtual_path(self, recipe, path_type, orig, dest, tod, demographic_seg
 
         access_mode = self.network_los.setting(f'TVPB_SETTINGS.{recipe}.path_types.{path_type}.access')
         egress_mode = self.network_los.setting(f'TVPB_SETTINGS.{recipe}.path_types.{path_type}.egress')
-        paths_nest_nesting_coefficient = self.network_los.setting(f'TVPB_SETTINGS.{recipe}.path_types.{path_type}').get('paths_nest_nesting_coefficient', 1)
+        path_types_settings = self.network_los.setting(f'TVPB_SETTINGS.{recipe}.path_types.{path_type}')
+        paths_nest_nesting_coefficient = path_types_settings.get('paths_nest_nesting_coefficient', 1)
 
         # maz od pairs requested
         with memo("#TVPB build_virtual_path maz_od_df"):
@@ -640,7 +641,8 @@ def build_virtual_path(self, recipe, path_type, orig, dest, tod, demographic_seg
 
                 chunk.log_df(trace_label, "utilities_df", utilities_df)
 
-                logsums = np.maximum(np.log(np.nansum(np.exp(utilities_df.values/paths_nest_nesting_coefficient), axis=1)), UNAVAILABLE)
+                logsums = np.maximum(np.log(np.nansum(np.exp(utilities_df.values/paths_nest_nesting_coefficient),
+                                                      axis=1)), UNAVAILABLE)
 
             if want_choices:
 

diff --git a/activitysim/core/test/test_timetable.py b/activitysim/core/test/test_timetable.py
@@ -183,4 +183,4 @@ def test_basic(persons, tdd_alts):
     starts = pd.Series([9, 6, 9, 5])
     ends = pd.Series([10, 10, 10, 9])
     periods_available = timetable.remaining_periods_available(person_ids, starts, ends)
-    pdt.assert_series_equal(periods_available, pd.Series([6, 3, 4, 3]))
+    pdt.assert_series_equal(periods_available, pd.Series([6, 3, 4, 3]), check_dtype=False)
diff --git a/activitysim/examples/example_mtc/configs/cdap.yaml b/activitysim/examples/example_mtc/configs/cdap.yaml
@@ -13,6 +13,15 @@ CONSTANTS:
   SCHOOL: 7
   PRESCHOOL: 8
 
+PERSON_TYPE_MAP:
+  WORKER:
+    - 1
+    - 2
+  CHILD:
+    - 6
+    - 7
+    - 8
+
 annotate_persons:
   SPEC: annotate_persons_cdap
   DF: persons

diff --git a/activitysim/examples/example_mtc/configs/network_los.yaml b/activitysim/examples/example_mtc/configs/network_los.yaml
@@ -13,5 +13,5 @@ taz_skims: skims.omx
 skim_time_periods:
     time_window: 1440
     period_minutes: 60
-    periods: [0, 5, 9, 14, 18, 24] # 5=5:00-5:59, 9=9:00-9:59, 14=2:00-2:59, 18=6:00-6:59
-    labels: ['EA', 'AM', 'MD', 'PM', 'EV']
+    periods: [0, 3, 5, 9, 14, 18, 24] # 3=3:00-3:59, 5=5:00-5:59, 9=9:00-9:59, 14=2:00-2:59, 18=6:00-6:59
+    labels: ['EA', 'EA', 'AM', 'MD', 'PM', 'EV']
diff --git a/activitysim/examples/example_multiple_zone/configs_3_zone/network_los.yaml b/activitysim/examples/example_multiple_zone/configs_3_zone/network_los.yaml
@@ -6,7 +6,7 @@ skim_dict_factory: NumpyArraySkimFactory
 #skim_dict_factory: MemMapSkimFactory
 
 # read cached skims (using numpy memmap) from output directory (memmap is faster than omx )
-read_skim_cache: True
+read_skim_cache: False
 # write memmapped cached skims to output directory after reading from omx, for use in subsequent runs
 write_skim_cache: True
 

diff --git a/setup.py b/setup.py
@@ -30,7 +30,7 @@
         'pyarrow >= 2.0',
         'numpy >= 1.16.1',
         'openmatrix >= 0.3.4.1',
-        'pandas >= 1.0.1',
+        'pandas >= 1.1.0',
         'pyyaml >= 5.1',
         'tables >= 3.5.1',
         'toolz >= 0.8.1',