major work on phase 5 (#325)

* estimation through atwork_subtour_mode_choice * Tnc updates and notebooks (#18) * move other resources into folder since examples now part of package as well * add shapefile * clean up folder setup * estimation notebooks for larch (#19) * multiprocessing related logging and error checking * trip_destination handle all trips fail * skim caching with numpy memmap to speed skim loading * better chunking in vectorize_tour_scheduling * Cli (#22) * use activitysim_resources Fixed auto sufficiency conditions in tour_mode_choice.csv. (Issue #324) * increment to version 0.9.5 * correct write trip matrices sampling expansion and add vehicle occupancy to the expression file * updates to documentation for phase 5 work except multiple zone systems
ActivitySim · Jul 29, 2020 · efe66b3 · efe66b3
1 parent 1041bf6
commit efe66b3
Show file tree

Hide file tree

Showing 312 changed files with 440,944 additions and 290,626 deletions.
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,10 +1,13 @@
 include ez_setup.py
 include README.rst
+graft notebooks
+graft activitysim/examples
+
+# required for test system
+
 include activitysim\abm\test\data\mtc_asim.h5
 include activitysim\abm\test\data\skims.omx
 include activitysim\abm\test\data\households.csv
 include activitysim\abm\test\data\persons.csv
 include activitysim\abm\test\data\land_use.csv
 include activitysim\abm\test\data\override_hh_ids.csv
-
-graft activitysim/examples
diff --git a/activitysim/__init__.py b/activitysim/__init__.py
@@ -1,5 +1,5 @@
 # ActivitySim
 # See full license in LICENSE.txt.
 
-__version__ = '0.10.0'
+__version__ = '0.9.5'
 __doc__ = 'Activity-Based Travel Modeling'
diff --git a/activitysim/abm/models/accessibility.py b/activitysim/abm/models/accessibility.py
@@ -46,25 +46,29 @@ def __init__(self, skim_dict, orig_zones, dest_zones, transpose=False):
         self.skim_dict = skim_dict
         self.transpose = transpose
 
-        if omx_shape[0] == len(orig_zones):
-            # no slicing required
-            self.slice_map = None
+        if omx_shape[0] == len(orig_zones) and skim_dict.offset_mapper.offset_series is None:
+            # no slicing required because whatever the offset_int, the skim data aligns with zone list
+            self.map_data = False
         else:
-            # 2-d boolean slicing in numpy is a bit tricky
-            # data = data[orig_map, dest_map]          # <- WRONG!
-            # data = data[orig_map, :][:, dest_map]    # <- RIGHT
-            # data = data[np.ix_(orig_map, dest_map)]  # <- ALSO RIGHT
+
+            if omx_shape[0] == len(orig_zones):
+                logger.debug("AccessibilitySkims - applying offset_mapper")
 
             skim_index = list(range(omx_shape[0]))
-            orig_map = np.isin(skim_index, skim_dict.offset_mapper.map(orig_zones))
-            dest_map = np.isin(skim_index, skim_dict.offset_mapper.map(dest_zones))
+            orig_map = skim_dict.offset_mapper.map(orig_zones)
+            dest_map = skim_dict.offset_mapper.map(dest_zones)
 
-            if not dest_map.all():
+            # (we might be sliced multiprocessing)
+            # assert np.isin(skim_index, orig_map).all()
+
+            if np.isin(skim_index, dest_map).all():
                 # not using the whole skim matrix
                 logger.info("%s skim zones not in dest_map: %s" %
                             ((~dest_map).sum(), np.ix_(~dest_map)))
 
-            self.slice_map = np.ix_(orig_map, dest_map)
+            self.map_data = True
+            self.orig_map = orig_map
+            self.dest_map = dest_map
 
     def __getitem__(self, key):
         """
@@ -80,10 +84,15 @@ def __getitem__(self, key):
         if self.transpose:
             data = data.transpose()
 
-        if self.slice_map is not None:
+        if self.map_data:
+
             # slice skim to include only orig rows and dest columns
-            # 2-d boolean slicing in numpy is a bit tricky - see explanation in __init__
-            data = data[self.slice_map]
+            # 2-d boolean slicing in numpy is a bit tricky
+            # data = data[orig_map, dest_map]          # <- WRONG!
+            # data = data[orig_map, :][:, dest_map]    # <- RIGHT
+            # data = data[np.ix_(orig_map, dest_map)]  # <- ALSO RIGHT
+
+            data = data[self.orig_map, :][:, self.dest_map]
 
         return data.flatten()
 
@@ -116,19 +125,12 @@ def compute_accessibility(accessibility, skim_dict, land_use, trace_od):
     logger.info("Running %s with %d dest zones" % (trace_label, len(accessibility_df)))
 
     constants = config.get_model_constants(model_settings)
-    land_use_columns = model_settings.get('land_use_columns', [])
 
+    land_use_columns = model_settings.get('land_use_columns', [])
     land_use_df = land_use.to_frame()
+    land_use_df = land_use_df[land_use_columns]
 
-    # #bug
-    #
-    # land_use_df = land_use_df[land_use_df.index % 2 == 1]
-    # accessibility_df = accessibility_df[accessibility_df.index.isin(land_use_df.index)].head(5)
-    #
-    # print "land_use_df", land_use_df.index
-    # print "accessibility_df", accessibility_df.index
-    # #bug
-
+    # don't assume they are the same: accessibility may be sliced if we are multiprocessing
     orig_zones = accessibility_df.index.values
     dest_zones = land_use_df.index.values
 
@@ -153,7 +155,6 @@ def compute_accessibility(accessibility, skim_dict, land_use, trace_od):
         trace_od_rows = None
 
     # merge land_use_columns into od_df
-    land_use_df = land_use_df[land_use_columns]
     od_df = pd.merge(od_df, land_use_df, left_on='dest', right_index=True).sort_index()
 
     locals_d = {
@@ -170,7 +171,7 @@ def compute_accessibility(accessibility, skim_dict, land_use, trace_od):
 
     for column in results.columns:
         data = np.asanyarray(results[column])
-        data.shape = (orig_zone_count, dest_zone_count)
+        data.shape = (orig_zone_count, dest_zone_count)  # (o,d)
         accessibility_df[column] = np.log(np.sum(data, axis=1) + 1)
 
     # - write table to pipeline

diff --git a/activitysim/abm/models/atwork_subtour_destination.py b/activitysim/abm/models/atwork_subtour_destination.py
@@ -14,6 +14,8 @@
 from activitysim.core.interaction_sample import interaction_sample
 from activitysim.core.util import assign_in_place
 
+from .util import estimation
+
 from .util import logsums as logsum
 from activitysim.abm.tables.size_terms import tour_destination_size_terms
 
@@ -27,9 +29,12 @@ def atwork_subtour_destination_sample(
         model_settings,
         skim_dict,
         destination_size_terms,
+        estimator,
         chunk_size, trace_label):
 
-    model_spec = simulate.read_model_spec(file_name='atwork_subtour_destination_sample.csv')
+    model_spec = simulate.read_model_spec(file_name=model_settings['SAMPLE_SPEC'])
+    coefficients_df = simulate.read_model_coefficients(model_settings)
+    model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator)
 
     # merge persons into tours
     choosers = pd.merge(tours, persons_merged, left_on='person_id', right_index=True)
@@ -40,6 +45,11 @@ def atwork_subtour_destination_sample(
     constants = config.get_model_constants(model_settings)
 
     sample_size = model_settings['SAMPLE_SIZE']
+    if estimator:
+        # FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count
+        logger.info("Estimation mode for %s using unsampled alternatives short_circuit_choices" % (trace_label,))
+        sample_size = 0
+
     alt_dest_col_name = model_settings['ALT_DEST_COL_NAME']
 
     logger.info("Running atwork_subtour_location_sample with %d tours", len(choosers))
@@ -77,7 +87,7 @@ def atwork_subtour_destination_logsums(
         destination_sample,
         model_settings,
         skim_dict, skim_stack,
-        chunk_size, trace_hh_id, trace_label):
+        chunk_size, trace_label):
     """
     add logsum column to existing atwork_subtour_destination_sample table
 
@@ -139,13 +149,16 @@ def atwork_subtour_destination_simulate(
         model_settings,
         skim_dict,
         destination_size_terms,
+        estimator,
         chunk_size, trace_label):
     """
     atwork_subtour_destination model on atwork_subtour_destination_sample
     annotated with mode_choice logsum to select a destination from sample alternatives
     """
 
-    model_spec = simulate.read_model_spec(file_name='atwork_subtour_destination.csv')
+    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
+    coefficients_df = simulate.read_model_coefficients(model_settings)
+    model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator)
 
     # interaction_sample_simulate insists choosers appear in same order as alts
     subtours = subtours.sort_index()
@@ -158,6 +171,9 @@ def atwork_subtour_destination_simulate(
     chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
     choosers = choosers[chooser_columns]
 
+    if estimator:
+        estimator.write_choosers(choosers)
+
     alt_dest_col_name = model_settings['ALT_DEST_COL_NAME']
     chooser_col_name = 'workplace_taz'
 
@@ -196,7 +212,8 @@ def atwork_subtour_destination_simulate(
         locals_d=locals_d,
         chunk_size=chunk_size,
         trace_label=trace_label,
-        trace_choice_name='workplace_location')
+        trace_choice_name='workplace_location',
+        estimator=estimator)
 
     if not want_logsums:
         # for consistency, always return a dataframe with canonical column name
@@ -216,27 +233,37 @@ def atwork_subtour_destination(
         chunk_size, trace_hh_id):
 
     trace_label = 'atwork_subtour_destination'
-    model_settings = config.read_model_settings('atwork_subtour_destination.yaml')
+    model_settings_file_name = 'atwork_subtour_destination.yaml'
+    model_settings = config.read_model_settings(model_settings_file_name)
 
     destination_column_name = 'destination'
     logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME')
     want_logsums = logsum_column_name is not None
 
     sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME')
-    want_sample_table = sample_table_name is not None
+    want_sample_table = config.setting('want_dest_choice_sample_tables') and sample_table_name is not None
 
     persons_merged = persons_merged.to_frame()
 
     tours = tours.to_frame()
     subtours = tours[tours.tour_category == 'atwork']
+    # interaction_sample_simulate insists choosers appear in same order as alts
+    subtours = subtours.sort_index()
 
     # - if no atwork subtours
     if subtours.shape[0] == 0:
         tracing.no_results('atwork_subtour_destination')
         return
 
-    # interaction_sample_simulate insists choosers appear in same order as alts
-    subtours = subtours.sort_index()
+    estimator = estimation.manager.begin_estimation('atwork_subtour_destination')
+    if estimator:
+        estimator.write_coefficients(simulate.read_model_coefficients(model_settings))
+        # estimator.write_spec(model_settings, tag='SAMPLE_SPEC')
+        estimator.write_spec(model_settings, tag='SPEC')
+        estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"])
+        estimator.write_table(inject.get_injectable('size_terms'), 'size_terms', append=False)
+        estimator.write_table(inject.get_table('land_use').to_frame(), 'landuse', append=False)
+        estimator.write_model_settings(model_settings, model_settings_file_name)
 
     destination_size_terms = tour_destination_size_terms(land_use, size_terms, 'atwork')
 
@@ -246,16 +273,17 @@ def atwork_subtour_destination(
         model_settings,
         skim_dict,
         destination_size_terms,
-        chunk_size,
-        tracing.extend_trace_label(trace_label, 'sample'))
+        estimator=estimator,
+        chunk_size=chunk_size,
+        trace_label=tracing.extend_trace_label(trace_label, 'sample'))
 
     destination_sample_df = atwork_subtour_destination_logsums(
         persons_merged,
         destination_sample_df,
         model_settings,
         skim_dict, skim_stack,
-        chunk_size, trace_hh_id,
-        tracing.extend_trace_label(trace_label, 'logsums'))
+        chunk_size=chunk_size,
+        trace_label=tracing.extend_trace_label(trace_label, 'logsums'))
 
     choices_df = atwork_subtour_destination_simulate(
         subtours,
@@ -265,8 +293,15 @@ def atwork_subtour_destination(
         model_settings,
         skim_dict,
         destination_size_terms,
-        chunk_size,
-        tracing.extend_trace_label(trace_label, 'simulate'))
+        estimator=estimator,
+        chunk_size=chunk_size,
+        trace_label=tracing.extend_trace_label(trace_label, 'simulate'))
+
+    if estimator:
+        estimator.write_choices(choices_df['choice'])
+        choices_df['choice'] = estimator.get_survey_values(choices_df['choice'], 'tours', 'destination')
+        estimator.write_override_choices(choices_df['choice'])
+        estimator.end_estimation()
 
     subtours[destination_column_name] = choices_df['choice']
     assign_in_place(tours, subtours[[destination_column_name]])
@@ -282,8 +317,6 @@ def atwork_subtour_destination(
         assert len(destination_sample_df.index.unique()) == len(choices_df)
         destination_sample_df.set_index(model_settings['ALT_DEST_COL_NAME'],
                                         append=True, inplace=True)
-
-        print(destination_sample_df)
         pipeline.extend_table(sample_table_name, destination_sample_df)
 
     tracing.print_summary(destination_column_name,

diff --git a/activitysim/abm/models/atwork_subtour_frequency.py b/activitysim/abm/models/atwork_subtour_frequency.py
@@ -11,6 +11,8 @@
 from activitysim.core import config
 from activitysim.core import inject
 
+from .util import estimation
+
 from .util.tour_frequency import process_atwork_subtours
 from .util.expressions import assign_columns
 
@@ -35,25 +37,27 @@ def atwork_subtour_frequency(tours,
     """
 
     trace_label = 'atwork_subtour_frequency'
-
-    model_settings = config.read_model_settings('atwork_subtour_frequency.yaml')
-    model_spec = simulate.read_model_spec(file_name='atwork_subtour_frequency.csv')
-
-    alternatives = simulate.read_model_alts(
-        config.config_file_path('atwork_subtour_frequency_alternatives.csv'), set_index='alt')
+    model_settings_file_name = 'atwork_subtour_frequency.yaml'
 
     tours = tours.to_frame()
-
-    persons_merged = persons_merged.to_frame()
-
     work_tours = tours[tours.tour_type == 'work']
 
     # - if no work_tours
     if len(work_tours) == 0:
         add_null_results(trace_label, tours)
         return
 
+    model_settings = config.read_model_settings(model_settings_file_name)
+    estimator = estimation.manager.begin_estimation('atwork_subtour_frequency')
+
+    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
+    coefficients_df = simulate.read_model_coefficients(model_settings)
+    model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator)
+
+    alternatives = simulate.read_model_alts('atwork_subtour_frequency_alternatives.csv', set_index='alt')
+
     # merge persons into work_tours
+    persons_merged = persons_merged.to_frame()
     work_tours = pd.merge(work_tours, persons_merged, left_on='person_id', right_index=True)
 
     logger.info("Running atwork_subtour_frequency with %d work tours", len(work_tours))
@@ -70,19 +74,30 @@ def atwork_subtour_frequency(tours,
             model_settings=preprocessor_settings,
             trace_label=trace_label)
 
+    if estimator:
+        estimator.write_spec(model_settings)
+        estimator.write_model_settings(model_settings, model_settings_file_name)
+        estimator.write_coefficients(coefficients_df)
+        estimator.write_choosers(work_tours)
+
     choices = simulate.simple_simulate(
         choosers=work_tours,
         spec=model_spec,
         nest_spec=nest_spec,
         locals_d=constants,
         chunk_size=chunk_size,
         trace_label=trace_label,
-        trace_choice_name='atwork_subtour_frequency')
+        trace_choice_name='atwork_subtour_frequency',
+        estimator=estimator)
 
     # convert indexes to alternative names
     choices = pd.Series(model_spec.columns[choices.values], index=choices.index)
 
-    tracing.print_summary('atwork_subtour_frequency', choices, value_counts=True)
+    if estimator:
+        estimator.write_choices(choices)
+        choices = estimator.get_survey_values(choices, 'tours', 'atwork_subtour_frequency')
+        estimator.write_override_choices(choices)
+        estimator.end_estimation()
 
     # add atwork_subtour_frequency column to tours
     # reindex since we are working with a subset of tours
@@ -100,6 +115,9 @@ def atwork_subtour_frequency(tours,
     tracing.register_traceable_table('tours', subtours)
     pipeline.get_rn_generator().add_channel('tours', subtours)
 
+    tracing.print_summary('atwork_subtour_frequency', tours.atwork_subtour_frequency,
+                          value_counts=True)
+
     if trace_hh_id:
         tracing.trace_df(tours,
                          label='atwork_subtour_frequency.tours')