From 79980b86397e7283598e1b040abd1e30e75693f7 Mon Sep 17 00:00:00 2001 From: David Hensle Date: Fri, 15 Dec 2023 17:16:51 -0800 Subject: [PATCH 1/8] NMTF person available periods --- .../models/non_mandatory_tour_frequency.py | 25 +++-- activitysim/abm/models/util/overlap.py | 93 +++++++++++++++++++ 2 files changed, 109 insertions(+), 9 deletions(-) diff --git a/activitysim/abm/models/non_mandatory_tour_frequency.py b/activitysim/abm/models/non_mandatory_tour_frequency.py index 521f49c47..a76a3650b 100644 --- a/activitysim/abm/models/non_mandatory_tour_frequency.py +++ b/activitysim/abm/models/non_mandatory_tour_frequency.py @@ -20,7 +20,7 @@ from .util import annotate from .util.school_escort_tours_trips import recompute_tour_count_statistics -from .util.overlap import person_max_window +from .util.overlap import person_max_window, person_available_periods from .util.tour_frequency import process_non_mandatory_tours logger = logging.getLogger(__name__) @@ -166,7 +166,10 @@ def non_mandatory_tour_frequency(persons, persons_merged, chunk_size, trace_hh_i preprocessor_settings = model_settings.get("preprocessor", None) if preprocessor_settings: - locals_dict = {"person_max_window": person_max_window} + locals_dict = { + "person_max_window": person_max_window, + "person_available_periods": person_available_periods, + } expressions.assign_columns( df=choosers, @@ -259,6 +262,9 @@ def non_mandatory_tour_frequency(persons, persons_merged, chunk_size, trace_hh_i choices_list.append(choices) + # FIXME only want to keep actual purposes, adding cols in alts will mess this up + # this is complicated by canonical_ids calculated based on alts if not specified explicitly + # thus, adding column to input alts will change IDs and break estimation mode.... del alternatives["tot_tours"] # del tot_tours column we added above # The choice value 'non_mandatory_tour_frequency' assigned by interaction_simulate @@ -345,13 +351,14 @@ def non_mandatory_tour_frequency(persons, persons_merged, chunk_size, trace_hh_i # make sure they created the right tours survey_tours = estimation.manager.get_survey_table("tours").sort_index() - non_mandatory_survey_tours = survey_tours[ - survey_tours.tour_category == "non_mandatory" - ] - assert len(non_mandatory_survey_tours) == len(non_mandatory_tours) - assert non_mandatory_survey_tours.index.equals( - non_mandatory_tours.sort_index().index - ) + # FIXME below check needs to remove the pure-escort tours from the survey tours table + # non_mandatory_survey_tours = survey_tours[ + # survey_tours.tour_category == "non_mandatory" + # ] + # assert len(non_mandatory_survey_tours) == len(non_mandatory_tours) + # assert non_mandatory_survey_tours.index.equals( + # non_mandatory_tours.sort_index().index + # ) # make sure they created tours with the expected tour_ids columns = ["person_id", "household_id", "tour_type", "tour_category"] diff --git a/activitysim/abm/models/util/overlap.py b/activitysim/abm/models/util/overlap.py index 70fadfbd4..f7773a14a 100644 --- a/activitysim/abm/models/util/overlap.py +++ b/activitysim/abm/models/util/overlap.py @@ -250,3 +250,96 @@ def person_max_window(persons): max_window.index = persons.index return max_window + + +def calculate_consecutive(array): + # Append zeros columns at either sides of counts + append1 = np.zeros((array.shape[0], 1), dtype=int) + array_ext = np.column_stack((append1, array, append1)) + + # Get start and stop indices with 1s as triggers + diffs = np.diff((array_ext == 1).astype(int), axis=1) + starts = np.argwhere(diffs == 1) + stops = np.argwhere(diffs == -1) + + # Get intervals using differences between start and stop indices + intvs = stops[:, 1] - starts[:, 1] + + # Store intervals as a 2D array for further vectorized ops to make. + c = np.bincount(starts[:, 0]) + mask = np.arange(c.max()) < c[:, None] + intvs2D = mask.astype(float) + intvs2D[mask] = intvs + + # Get max along each row as final output + out = intvs2D.max(1).astype(int) + return out + + +def person_available_periods(persons, start_bin=None, end_bin=None, continuous=False): + """ + Returns the number of available time period bins foreach person in persons. + Can limit the calculation to include starting and/or ending bins. + Can return either the total number of available time bins with continuous = True, + or only the maximum + + This is equivalent to person_max_window if no start/end bins provided and continous=True + + time bins are inclusive, i.e. [start_bin, end_bin] + + e.g. + available out of timetable has dummy first and last bins + available = [ + [1,1,1,1,1,1,1,1,1,1,1,1], + [1,1,0,1,1,0,0,1,0,1,0,1], + #-,0,1,2,3,4,5,6,7,8,9,- time bins + ] + returns: + for start_bin=None, end_bin=None, continuous=False: (10, 5) + for start_bin=None, end_bin=None, continuous=True: (10, 2) + for start_bin=5, end_bin=9, continuous=False: (5, 2) + for start_bin=5, end_bin=9, continuous=True: (5, 1) + + + Parameters + ---------- + start_bin : (int) starting time bin to include starting from 0 + end_bin : (int) ending time bin to include + continuous : (bool) count all available bins if false or just largest continuous run if True + + Returns + ------- + pd.Series of the number of available time bins indexed by person ID + """ + timetable = inject.get_injectable("timetable") + + # ndarray with one row per person and one column per time period + # array value of 1 where free periods and 0 elsewhere + s = pd.Series(persons.index.values, index=persons.index) + + # first and last bins are dummys in the time table + # so if you have 48 half hour time periods, shape is (len(persons), 50) + available = timetable.individually_available(s) + + # Create a mask to exclude bins before the starting bin and after the ending bin + mask = np.ones(available.shape[1], dtype=bool) + mask[0] = False + mask[len(mask) - 1] = False + if start_bin is not None: + # +1 needed due to dummy first bin + mask[: start_bin + 1] = False + if end_bin is not None: + # +2 for dummy first bin and inclusive end_bin + mask[end_bin + 2 :] = False + + # Apply the mask to the array + masked_array = available[:, mask] + + # Calculate the number of available time periods for each person + availability = np.sum(masked_array, axis=1) + + if continuous: + availability = calculate_consecutive(masked_array) + + availability = pd.Series(availability, index=persons.index) + return availability From 3aa3f46e914aa51163646f88cf9058cb4f40696a Mon Sep 17 00:00:00 2001 From: David Hensle Date: Fri, 15 Dec 2023 17:16:51 -0800 Subject: [PATCH 2/8] NMTF person available periods --- .../models/non_mandatory_tour_frequency.py | 27 ++++-- activitysim/abm/models/util/overlap.py | 93 +++++++++++++++++++ 2 files changed, 111 insertions(+), 9 deletions(-) diff --git a/activitysim/abm/models/non_mandatory_tour_frequency.py b/activitysim/abm/models/non_mandatory_tour_frequency.py index af3871bd5..d158f9e64 100644 --- a/activitysim/abm/models/non_mandatory_tour_frequency.py +++ b/activitysim/abm/models/non_mandatory_tour_frequency.py @@ -11,7 +11,7 @@ import pandas as pd from activitysim.abm.models.util import annotate -from activitysim.abm.models.util.overlap import person_max_window +from activitysim.abm.models.util.overlap import person_max_window, person_available_periods from activitysim.abm.models.util.school_escort_tours_trips import ( recompute_tour_count_statistics, ) @@ -24,6 +24,7 @@ simulate, tracing, workflow, + annotate, ) from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable from activitysim.core.configuration.logit import LogitComponentSettings @@ -230,7 +231,11 @@ def non_mandatory_tour_frequency( # - preprocessor preprocessor_settings = model_settings.preprocessor if preprocessor_settings: - locals_dict = {"person_max_window": lambda x: person_max_window(state, x)} + + locals_dict = { + "person_max_window": lambda x: person_max_window(state, x), + "person_available_periods": lambda x: person_available_periods(state, x), + } expressions.assign_columns( state, @@ -324,6 +329,9 @@ def non_mandatory_tour_frequency( choices_list.append(choices) + # FIXME only want to keep actual purposes, adding cols in alts will mess this up + # this is complicated by canonical_ids calculated based on alts if not specified explicitly + # thus, adding column to input alts will change IDs and break estimation mode.... del alternatives["tot_tours"] # del tot_tours column we added above # The choice value 'non_mandatory_tour_frequency' assigned by interaction_simulate @@ -418,13 +426,14 @@ def non_mandatory_tour_frequency( if estimator: # make sure they created the right tours survey_tours = estimation.manager.get_survey_table("tours").sort_index() - non_mandatory_survey_tours = survey_tours[ - survey_tours.tour_category == "non_mandatory" - ] - assert len(non_mandatory_survey_tours) == len(non_mandatory_tours) - assert non_mandatory_survey_tours.index.equals( - non_mandatory_tours.sort_index().index - ) + # FIXME below check needs to remove the pure-escort tours from the survey tours table + # non_mandatory_survey_tours = survey_tours[ + # survey_tours.tour_category == "non_mandatory" + # ] + # assert len(non_mandatory_survey_tours) == len(non_mandatory_tours) + # assert non_mandatory_survey_tours.index.equals( + # non_mandatory_tours.sort_index().index + # ) # make sure they created tours with the expected tour_ids columns = ["person_id", "household_id", "tour_type", "tour_category"] diff --git a/activitysim/abm/models/util/overlap.py b/activitysim/abm/models/util/overlap.py index 914a25dbd..8faf3af34 100644 --- a/activitysim/abm/models/util/overlap.py +++ b/activitysim/abm/models/util/overlap.py @@ -252,3 +252,96 @@ def person_max_window(state: workflow.State, persons): max_window.index = persons.index return max_window + + +def calculate_consecutive(array): + # Append zeros columns at either sides of counts + append1 = np.zeros((array.shape[0], 1), dtype=int) + array_ext = np.column_stack((append1, array, append1)) + + # Get start and stop indices with 1s as triggers + diffs = np.diff((array_ext == 1).astype(int), axis=1) + starts = np.argwhere(diffs == 1) + stops = np.argwhere(diffs == -1) + + # Get intervals using differences between start and stop indices + intvs = stops[:, 1] - starts[:, 1] + + # Store intervals as a 2D array for further vectorized ops to make. + c = np.bincount(starts[:, 0]) + mask = np.arange(c.max()) < c[:, None] + intvs2D = mask.astype(float) + intvs2D[mask] = intvs + + # Get max along each row as final output + out = intvs2D.max(1).astype(int) + return out + + +def person_available_periods(state: workflow.State, persons, start_bin=None, end_bin=None, continuous=False): + """ + Returns the number of available time period bins foreach person in persons. + Can limit the calculation to include starting and/or ending bins. + Can return either the total number of available time bins with continuous = True, + or only the maximum + + This is equivalent to person_max_window if no start/end bins provided and continous=True + + time bins are inclusive, i.e. [start_bin, end_bin] + + e.g. + available out of timetable has dummy first and last bins + available = [ + [1,1,1,1,1,1,1,1,1,1,1,1], + [1,1,0,1,1,0,0,1,0,1,0,1], + #-,0,1,2,3,4,5,6,7,8,9,- time bins + ] + returns: + for start_bin=None, end_bin=None, continuous=False: (10, 5) + for start_bin=None, end_bin=None, continuous=True: (10, 2) + for start_bin=5, end_bin=9, continuous=False: (5, 2) + for start_bin=5, end_bin=9, continuous=True: (5, 1) + + + Parameters + ---------- + start_bin : (int) starting time bin to include starting from 0 + end_bin : (int) ending time bin to include + continuous : (bool) count all available bins if false or just largest continuous run if True + + Returns + ------- + pd.Series of the number of available time bins indexed by person ID + """ + timetable = state.get_injectable("timetable") + + # ndarray with one row per person and one column per time period + # array value of 1 where free periods and 0 elsewhere + s = pd.Series(persons.index.values, index=persons.index) + + # first and last bins are dummys in the time table + # so if you have 48 half hour time periods, shape is (len(persons), 50) + available = timetable.individually_available(s) + + # Create a mask to exclude bins before the starting bin and after the ending bin + mask = np.ones(available.shape[1], dtype=bool) + mask[0] = False + mask[len(mask) - 1] = False + if start_bin is not None: + # +1 needed due to dummy first bin + mask[: start_bin + 1] = False + if end_bin is not None: + # +2 for dummy first bin and inclusive end_bin + mask[end_bin + 2 :] = False + + # Apply the mask to the array + masked_array = available[:, mask] + + # Calculate the number of available time periods for each person + availability = np.sum(masked_array, axis=1) + + if continuous: + availability = calculate_consecutive(masked_array) + + availability = pd.Series(availability, index=persons.index) + return availability From 481238b30e176c5be3778885602788aaf413eb43 Mon Sep 17 00:00:00 2001 From: David Hensle Date: Fri, 8 Mar 2024 13:37:37 -0800 Subject: [PATCH 3/8] blacken --- activitysim/abm/models/non_mandatory_tour_frequency.py | 5 ++++- activitysim/abm/models/util/overlap.py | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/activitysim/abm/models/non_mandatory_tour_frequency.py b/activitysim/abm/models/non_mandatory_tour_frequency.py index d158f9e64..ec790ce7f 100644 --- a/activitysim/abm/models/non_mandatory_tour_frequency.py +++ b/activitysim/abm/models/non_mandatory_tour_frequency.py @@ -11,7 +11,10 @@ import pandas as pd from activitysim.abm.models.util import annotate -from activitysim.abm.models.util.overlap import person_max_window, person_available_periods +from activitysim.abm.models.util.overlap import ( + person_max_window, + person_available_periods, +) from activitysim.abm.models.util.school_escort_tours_trips import ( recompute_tour_count_statistics, ) diff --git a/activitysim/abm/models/util/overlap.py b/activitysim/abm/models/util/overlap.py index 8faf3af34..a7c102a56 100644 --- a/activitysim/abm/models/util/overlap.py +++ b/activitysim/abm/models/util/overlap.py @@ -278,7 +278,9 @@ def calculate_consecutive(array): return out -def person_available_periods(state: workflow.State, persons, start_bin=None, end_bin=None, continuous=False): +def person_available_periods( + state: workflow.State, persons, start_bin=None, end_bin=None, continuous=False +): """ Returns the number of available time period bins foreach person in persons. Can limit the calculation to include starting and/or ending bins. From a40fa16a15aac8240d4b69ef2dcd834fcecdc29c Mon Sep 17 00:00:00 2001 From: David Hensle Date: Fri, 8 Mar 2024 14:15:31 -0800 Subject: [PATCH 4/8] remove bad path to annotate.py --- activitysim/abm/models/non_mandatory_tour_frequency.py | 1 - 1 file changed, 1 deletion(-) diff --git a/activitysim/abm/models/non_mandatory_tour_frequency.py b/activitysim/abm/models/non_mandatory_tour_frequency.py index ec790ce7f..97a27ece2 100644 --- a/activitysim/abm/models/non_mandatory_tour_frequency.py +++ b/activitysim/abm/models/non_mandatory_tour_frequency.py @@ -27,7 +27,6 @@ simulate, tracing, workflow, - annotate, ) from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable from activitysim.core.configuration.logit import LogitComponentSettings From ac69af88348b4dd790745930c3fe109f91d589fe Mon Sep 17 00:00:00 2001 From: David Hensle Date: Fri, 8 Mar 2024 14:15:31 -0800 Subject: [PATCH 5/8] remove bad path to annotate.py --- activitysim/abm/models/non_mandatory_tour_frequency.py | 1 - 1 file changed, 1 deletion(-) diff --git a/activitysim/abm/models/non_mandatory_tour_frequency.py b/activitysim/abm/models/non_mandatory_tour_frequency.py index ec790ce7f..97a27ece2 100644 --- a/activitysim/abm/models/non_mandatory_tour_frequency.py +++ b/activitysim/abm/models/non_mandatory_tour_frequency.py @@ -27,7 +27,6 @@ simulate, tracing, workflow, - annotate, ) from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable from activitysim.core.configuration.logit import LogitComponentSettings From f76d08408ed6d5ce9f8583dde31861744ef937c3 Mon Sep 17 00:00:00 2001 From: David Hensle Date: Thu, 28 Mar 2024 14:37:04 -0700 Subject: [PATCH 6/8] time_periods_available unit test --- .../test/test_person_available_periods.py | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 activitysim/abm/models/util/test/test_person_available_periods.py diff --git a/activitysim/abm/models/util/test/test_person_available_periods.py b/activitysim/abm/models/util/test/test_person_available_periods.py new file mode 100644 index 000000000..f5cb97eed --- /dev/null +++ b/activitysim/abm/models/util/test/test_person_available_periods.py @@ -0,0 +1,90 @@ +# ActivitySim +# See full license in LICENSE.txt. + +import pandas as pd +import pandas.testing as pdt + +from activitysim.abm.models.util.overlap import person_available_periods +from activitysim.core import workflow + + +def test_person_available_periods(): + state = workflow.State.make_default(__file__) + + # state.add_injectable("timetable", timetable) + + persons = pd.DataFrame(index=[1, 2, 3, 4]) + + state.add_table("persons", persons) + + timetable = state.get_injectable("timetable") + + # first testing scenario with no tours assigned + all_open = person_available_periods( + state, persons, start_bin=None, end_bin=None, continuous=False + ) + + all_open_expected = pd.Series([19, 19, 19, 19], index=[1, 2, 3, 4]) + pdt.assert_series_equal(all_open, all_open_expected, check_dtype=False) + + # adding tours to the timetable + + tours = pd.DataFrame( + { + "person_id": [1, 1, 2, 2, 3, 4], + "tour_num": [1, 2, 1, 2, 1, 1], + "start": [5, 10, 5, 20, 10, 20], + "end": [6, 14, 18, 21, 23, 23], + "tdds": [1, 89, 13, 181, 98, 183], + }, + index=[1, 2, 3, 4, 5, 6], + ) + # timetable.assign requires only 1 tour per person, so need to loop through tour nums + for tour_num, nth_tours in tours.groupby("tour_num", sort=True): + timetable.assign( + window_row_ids=nth_tours["person_id"], + tdds=nth_tours.tdds, + ) + + # testing time bins now available + tours_all_bins = person_available_periods( + state, persons, start_bin=None, end_bin=None, continuous=False + ) + tours_all_bins_expected = pd.Series([16, 7, 7, 17], index=[1, 2, 3, 4]) + pdt.assert_series_equal(tours_all_bins, tours_all_bins_expected, check_dtype=False) + + # continuous time bins available + continuous_test = person_available_periods( + state, persons, start_bin=None, end_bin=None, continuous=True + ) + continuous_test_expected = pd.Series([10, 6, 6, 16], index=[1, 2, 3, 4]) + pdt.assert_series_equal( + continuous_test, continuous_test_expected, check_dtype=False + ) + + # start bin test + start_test = person_available_periods( + state, persons, start_bin=11, end_bin=None, continuous=True + ) + start_test_expected = pd.Series([8, 6, 1, 5], index=[1, 2, 3, 4]) + pdt.assert_series_equal(start_test, start_test_expected, check_dtype=False) + + # end bin test + end_test = person_available_periods( + state, persons, start_bin=None, end_bin=11, continuous=False + ) + end_test_expected = pd.Series([9, 1, 6, 12], index=[1, 2, 3, 4]) + pdt.assert_series_equal(end_test, end_test_expected, check_dtype=False) + + # assortment settings test + assortment_test = person_available_periods( + state, persons, start_bin=8, end_bin=15, continuous=True + ) + assortment_test_expected = pd.Series([7, 3, 0, 8], index=[1, 2, 3, 4]) + pdt.assert_series_equal( + assortment_test, assortment_test_expected, check_dtype=False + ) + + +if "__main__" == __name__: + test_person_available_periods() From 6fd03185a12fb78e666afc5b2d4f15bcd411c96e Mon Sep 17 00:00:00 2001 From: David Hensle Date: Thu, 28 Mar 2024 15:02:32 -0700 Subject: [PATCH 7/8] removing outdated comment --- activitysim/abm/models/non_mandatory_tour_frequency.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/activitysim/abm/models/non_mandatory_tour_frequency.py b/activitysim/abm/models/non_mandatory_tour_frequency.py index 97a27ece2..d032e3aae 100644 --- a/activitysim/abm/models/non_mandatory_tour_frequency.py +++ b/activitysim/abm/models/non_mandatory_tour_frequency.py @@ -331,9 +331,6 @@ def non_mandatory_tour_frequency( choices_list.append(choices) - # FIXME only want to keep actual purposes, adding cols in alts will mess this up - # this is complicated by canonical_ids calculated based on alts if not specified explicitly - # thus, adding column to input alts will change IDs and break estimation mode.... del alternatives["tot_tours"] # del tot_tours column we added above # The choice value 'non_mandatory_tour_frequency' assigned by interaction_simulate From 8a9f772cc5d863bd91b71bc14131378ef414af29 Mon Sep 17 00:00:00 2001 From: David Hensle Date: Thu, 28 Mar 2024 15:18:16 -0700 Subject: [PATCH 8/8] estimation mode tour checking --- .../models/non_mandatory_tour_frequency.py | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/activitysim/abm/models/non_mandatory_tour_frequency.py b/activitysim/abm/models/non_mandatory_tour_frequency.py index d032e3aae..972c4b3dc 100644 --- a/activitysim/abm/models/non_mandatory_tour_frequency.py +++ b/activitysim/abm/models/non_mandatory_tour_frequency.py @@ -236,7 +236,9 @@ def non_mandatory_tour_frequency( locals_dict = { "person_max_window": lambda x: person_max_window(state, x), - "person_available_periods": lambda x: person_available_periods(state, x), + "person_available_periods": lambda persons, start_bin, end_bin, continuous: person_available_periods( + state, persons, start_bin, end_bin, continuous + ), } expressions.assign_columns( @@ -425,14 +427,21 @@ def non_mandatory_tour_frequency( if estimator: # make sure they created the right tours survey_tours = estimation.manager.get_survey_table("tours").sort_index() - # FIXME below check needs to remove the pure-escort tours from the survey tours table - # non_mandatory_survey_tours = survey_tours[ - # survey_tours.tour_category == "non_mandatory" - # ] - # assert len(non_mandatory_survey_tours) == len(non_mandatory_tours) - # assert non_mandatory_survey_tours.index.equals( - # non_mandatory_tours.sort_index().index - # ) + non_mandatory_survey_tours = survey_tours[ + survey_tours.tour_category == "non_mandatory" + ] + # need to remove the pure-escort tours from the survey tours table for comparison below + if state.is_table("school_escort_tours"): + non_mandatory_survey_tours = non_mandatory_survey_tours[ + ~non_mandatory_survey_tours.index.isin( + state.get_table("school_escort_tours").index + ) + ] + + assert len(non_mandatory_survey_tours) == len(non_mandatory_tours) + assert non_mandatory_survey_tours.index.equals( + non_mandatory_tours.sort_index().index + ) # make sure they created tours with the expected tour_ids columns = ["person_id", "household_id", "tour_type", "tour_category"]