From 46062679825b55d44fec7a5dd4dde9f5fb0fea97 Mon Sep 17 00:00:00 2001 From: Jeffrey Doyle Date: Fri, 17 Apr 2020 12:49:57 -0400 Subject: [PATCH 1/5] improved validation diagnostics in trip_purpose and various windows-related int32/int64 conversions --- .../abm/models/atwork_subtour_scheduling.py | 3 +- activitysim/abm/models/trip_purpose.py | 30 +++++++++++++++---- activitysim/abm/models/util/tour_frequency.py | 6 ++++ activitysim/abm/tables/shadow_pricing.py | 2 +- activitysim/core/chunk.py | 2 +- 5 files changed, 35 insertions(+), 8 deletions(-) diff --git a/activitysim/abm/models/atwork_subtour_scheduling.py b/activitysim/abm/models/atwork_subtour_scheduling.py index eb674cea3..538463436 100644 --- a/activitysim/abm/models/atwork_subtour_scheduling.py +++ b/activitysim/abm/models/atwork_subtour_scheduling.py @@ -3,6 +3,7 @@ import logging import pandas as pd +import numpy as np from activitysim.core import simulate from activitysim.core import tracing @@ -61,7 +62,7 @@ def atwork_subtour_scheduling( model_settings, trace_label) # parent_tours table with columns ['tour_id', 'tdd'] index = tour_id - parent_tour_ids = subtours.parent_tour_id.astype(int).unique() + parent_tour_ids = subtours.parent_tour_id.astype(np.int64).unique() parent_tours = pd.DataFrame({'tour_id': parent_tour_ids}, index=parent_tour_ids) parent_tours = parent_tours.merge(tours[['tdd']], left_index=True, right_index=True) diff --git a/activitysim/abm/models/trip_purpose.py b/activitysim/abm/models/trip_purpose.py index ddf4778e5..714d7fa39 100644 --- a/activitysim/abm/models/trip_purpose.py +++ b/activitysim/abm/models/trip_purpose.py @@ -73,15 +73,35 @@ def choose_intermediate_trip_purpose(trips, probs_spec, trace_hh_id, trace_label choosers = pd.merge(trips.reset_index(), probs_spec, on=probs_join_cols, how='left').set_index('trip_id') - chunk.log_df(trace_label, 'choosers', choosers) + # select the matching depart range (this should result on in exactly one chooser row per trip) + chooser_probs = \ + (choosers.start >= choosers['depart_range_start']) & (choosers.start <= choosers['depart_range_end']) + + # if we failed to match a row in probs_spec + if chooser_probs.sum() < num_trips: + # this can happen if the spec doesn't have probs for the trips matching a trip's probs_join_cols + missing_trip_ids = trips.index[~trips.index.isin(choosers.index[chooser_probs])].values + unmatched_choosers = choosers[choosers.index.isin(missing_trip_ids)] + unmatched_choosers = unmatched_choosers[['person_id', 'start'] + non_purpose_cols] + + # join to persons for better diagnostics + persons = inject.get_table('persons').to_frame() + persons_cols = ['age', 'is_worker', 'is_student', 'is_gradeschool', 'is_highschool', 'is_university'] + unmatched_choosers = pd.merge(unmatched_choosers, persons[persons_cols], + left_on='person_id', right_index=True, how='left') + + file_name = '%s.UNMATCHED_PROBS' % trace_label + logger.error("%s %s of %s intermediate trips could not be matched to probs based on join columns %s" % + (trace_label, len(unmatched_choosers), len(choosers), probs_join_cols)) + logger.info("Writing %s unmatched choosers to %s" % (len(unmatched_choosers), file_name,)) + tracing.write_csv(unmatched_choosers, file_name=file_name, transpose=False) + raise RuntimeError("Some trips could not be matched to probs based on join columns %s." % probs_join_cols) # select the matching depart range (this should result on in exactly one chooser row per trip) - choosers = choosers[(choosers.start >= choosers['depart_range_start']) & ( - choosers.start <= choosers['depart_range_end'])] + choosers = choosers[chooser_probs] # choosers should now match trips row for row - assert choosers.index.is_unique - assert len(choosers.index) == num_trips + assert choosers.index.identical(trips.index) choices, rands = logit.make_choices( choosers[purpose_cols], diff --git a/activitysim/abm/models/util/tour_frequency.py b/activitysim/abm/models/util/tour_frequency.py index f29f9d53a..186ca24a7 100644 --- a/activitysim/abm/models/util/tour_frequency.py +++ b/activitysim/abm/models/util/tour_frequency.py @@ -213,6 +213,9 @@ def create_tours(tour_counts, tour_category, parent_col='person_id'): # for joint tours, the correct number will be filled in after participation step tours['number_of_participants'] = 1 + # index is arbitrary but don't want any duplicates + tours.reset_index(drop=True, inplace=True) + return tours @@ -525,6 +528,9 @@ def process_joint_tours(joint_tour_frequency, joint_tour_frequency_alts, point_p tour_category='joint', parent_col='household_id') + assert not tours.index.duplicated().any() + assert point_persons.index.name == 'household_id' + # - assign a temp point person to tour so we can create stable index tours['person_id'] = reindex(point_persons.person_id, tours.household_id) tours['origin'] = reindex(point_persons.home_taz, tours.household_id) diff --git a/activitysim/abm/tables/shadow_pricing.py b/activitysim/abm/tables/shadow_pricing.py index c33f4e842..684d87307 100644 --- a/activitysim/abm/tables/shadow_pricing.py +++ b/activitysim/abm/tables/shadow_pricing.py @@ -634,7 +634,7 @@ def buffers_for_shadow_pricing(shadow_pricing_info): for block_key, block_shape in block_shapes.items(): # buffer_size must be int (or p2.7 long), not np.int64 - buffer_size = int(np.prod(block_shape)) + buffer_size = int(np.prod(block_shape, dtype=np.int64)) csz = buffer_size * np.dtype(dtype).itemsize logger.info("allocating shared buffer %s %s buffer_size %s bytes %s (%s)" % diff --git a/activitysim/core/chunk.py b/activitysim/core/chunk.py index ce8fac119..054688eef 100644 --- a/activitysim/core/chunk.py +++ b/activitysim/core/chunk.py @@ -97,7 +97,7 @@ def log_df(trace_label, table_name, df): else: shape = df.shape - elements = np.prod(shape) + elements = np.prod(shape, dtype=np.int64) op = 'add' if isinstance(df, pd.Series): From 773f79680c90314919cd9993ba09276a464129c1 Mon Sep 17 00:00:00 2001 From: Jeffrey Doyle Date: Fri, 17 Apr 2020 15:05:48 -0400 Subject: [PATCH 2/5] pycodestyle --- activitysim/abm/models/trip_purpose.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/activitysim/abm/models/trip_purpose.py b/activitysim/abm/models/trip_purpose.py index 714d7fa39..f9d0fd81e 100644 --- a/activitysim/abm/models/trip_purpose.py +++ b/activitysim/abm/models/trip_purpose.py @@ -75,27 +75,29 @@ def choose_intermediate_trip_purpose(trips, probs_spec, trace_hh_id, trace_label # select the matching depart range (this should result on in exactly one chooser row per trip) chooser_probs = \ - (choosers.start >= choosers['depart_range_start']) & (choosers.start <= choosers['depart_range_end']) + (choosers.start >= choosers['depart_range_start']) & \ + (choosers.start <= choosers['depart_range_end']) # if we failed to match a row in probs_spec if chooser_probs.sum() < num_trips: - # this can happen if the spec doesn't have probs for the trips matching a trip's probs_join_cols + # this can happen if the spec doesn't have probs matching a trip's probs_join_cols missing_trip_ids = trips.index[~trips.index.isin(choosers.index[chooser_probs])].values unmatched_choosers = choosers[choosers.index.isin(missing_trip_ids)] unmatched_choosers = unmatched_choosers[['person_id', 'start'] + non_purpose_cols] # join to persons for better diagnostics persons = inject.get_table('persons').to_frame() - persons_cols = ['age', 'is_worker', 'is_student', 'is_gradeschool', 'is_highschool', 'is_university'] + persons_cols = \ + ['age', 'is_worker', 'is_student', 'is_gradeschool', 'is_highschool', 'is_university'] unmatched_choosers = pd.merge(unmatched_choosers, persons[persons_cols], left_on='person_id', right_index=True, how='left') file_name = '%s.UNMATCHED_PROBS' % trace_label - logger.error("%s %s of %s intermediate trips could not be matched to probs based on join columns %s" % + logger.error("%s %s of %s trips did not match probs based on join columns %s" % (trace_label, len(unmatched_choosers), len(choosers), probs_join_cols)) logger.info("Writing %s unmatched choosers to %s" % (len(unmatched_choosers), file_name,)) tracing.write_csv(unmatched_choosers, file_name=file_name, transpose=False) - raise RuntimeError("Some trips could not be matched to probs based on join columns %s." % probs_join_cols) + raise RuntimeError("Some trips did not match probs on join columns %s." % probs_join_cols) # select the matching depart range (this should result on in exactly one chooser row per trip) choosers = choosers[chooser_probs] From 23190276b8153864b95619a38ce6c79e2227a2fe Mon Sep 17 00:00:00 2001 From: Jeffrey Doyle Date: Fri, 17 Apr 2020 16:07:26 -0400 Subject: [PATCH 3/5] deprecations in test code --- activitysim/abm/models/util/cdap.py | 4 ++-- activitysim/abm/models/util/test/test_cdap.py | 2 +- .../abm/models/util/test/test_mandatory_tour_frequency.py | 2 +- .../abm/models/util/test/test_non_mandatory_tour_frequency.py | 2 +- .../abm/models/util/test/test_vectorize_tour_scheduling.py | 2 +- activitysim/abm/test/run_mp.py | 2 +- activitysim/core/test/test_logit.py | 2 +- activitysim/core/test/test_orca.py | 4 ++-- activitysim/core/test/test_simulate.py | 2 +- activitysim/core/test/test_skim.py | 2 +- activitysim/core/test/test_timetable.py | 2 +- activitysim/core/test/test_util.py | 2 +- 12 files changed, 14 insertions(+), 14 deletions(-) diff --git a/activitysim/abm/models/util/cdap.py b/activitysim/abm/models/util/cdap.py index 67c5ea231..391f5ac6c 100644 --- a/activitysim/abm/models/util/cdap.py +++ b/activitysim/abm/models/util/cdap.py @@ -638,7 +638,7 @@ def household_activity_choices(indiv_utils, interaction_coefficients, hhsize, utils = simulate.eval_utilities(spec, choosers, trace_label=trace_label) if len(utils.index) == 0: - return pd.Series() + return pd.Series(dtype='float64') probs = logit.utils_to_probs(utils, trace_label=trace_label) @@ -752,7 +752,7 @@ def extra_hh_member_choices(persons, cdap_fixed_relative_proportions, locals_d, choosers = persons[persons['cdap_rank'] > MAX_HHSIZE] if len(choosers.index) == 0: - return pd.Series() + return pd.Series(dtype='float64') # eval the expression file values = simulate.eval_variables(cdap_fixed_relative_proportions.index, choosers, locals_d) diff --git a/activitysim/abm/models/util/test/test_cdap.py b/activitysim/abm/models/util/test/test_cdap.py index 2dbfcea26..12bda06ff 100644 --- a/activitysim/abm/models/util/test/test_cdap.py +++ b/activitysim/abm/models/util/test/test_cdap.py @@ -4,7 +4,7 @@ import os.path import pandas as pd -import pandas.util.testing as pdt +import pandas.testing as pdt import pytest from .. import cdap diff --git a/activitysim/abm/models/util/test/test_mandatory_tour_frequency.py b/activitysim/abm/models/util/test/test_mandatory_tour_frequency.py index 29a1cda34..8fbe70470 100644 --- a/activitysim/abm/models/util/test/test_mandatory_tour_frequency.py +++ b/activitysim/abm/models/util/test/test_mandatory_tour_frequency.py @@ -5,7 +5,7 @@ import pytest import os import pandas as pd -import pandas.util.testing as pdt +import pandas.testing as pdt from ..tour_frequency import process_mandatory_tours diff --git a/activitysim/abm/models/util/test/test_non_mandatory_tour_frequency.py b/activitysim/abm/models/util/test/test_non_mandatory_tour_frequency.py index b9ca04bf8..7f5702b77 100644 --- a/activitysim/abm/models/util/test/test_non_mandatory_tour_frequency.py +++ b/activitysim/abm/models/util/test/test_non_mandatory_tour_frequency.py @@ -5,7 +5,7 @@ import pytest import os import pandas as pd -import pandas.util.testing as pdt +import pandas.testing as pdt from ..tour_frequency import process_non_mandatory_tours diff --git a/activitysim/abm/models/util/test/test_vectorize_tour_scheduling.py b/activitysim/abm/models/util/test/test_vectorize_tour_scheduling.py index f507e3f59..42575a364 100644 --- a/activitysim/abm/models/util/test/test_vectorize_tour_scheduling.py +++ b/activitysim/abm/models/util/test/test_vectorize_tour_scheduling.py @@ -6,7 +6,7 @@ import pandas as pd import numpy as np -import pandas.util.testing as pdt +import pandas.testing as pdt from activitysim.core import inject diff --git a/activitysim/abm/test/run_mp.py b/activitysim/abm/test/run_mp.py index 78d7e589f..64effbbe2 100644 --- a/activitysim/abm/test/run_mp.py +++ b/activitysim/abm/test/run_mp.py @@ -3,7 +3,7 @@ import os import pandas as pd -import pandas.util.testing as pdt +import pandas.testing as pdt from activitysim.core import pipeline from activitysim.core import inject diff --git a/activitysim/core/test/test_logit.py b/activitysim/core/test/test_logit.py index b90ce9d6e..d97bf579e 100644 --- a/activitysim/core/test/test_logit.py +++ b/activitysim/core/test/test_logit.py @@ -6,7 +6,7 @@ import numpy as np import pandas as pd -import pandas.util.testing as pdt +import pandas.testing as pdt import pytest from ..simulate import eval_variables diff --git a/activitysim/core/test/test_orca.py b/activitysim/core/test/test_orca.py index f040a8c28..84ed8c92e 100644 --- a/activitysim/core/test/test_orca.py +++ b/activitysim/core/test/test_orca.py @@ -369,12 +369,12 @@ def test_update_col(df): pdt.assert_series_equal(wrapped['a'], df['a']) # test 2 - let the update method do the cast - wrapped.update_col_from_series('a', pd.Series(), True) + wrapped.update_col_from_series('a', pd.Series(dtype='float64'), True) pdt.assert_series_equal(wrapped['a'], df['a']) # test 3 - don't cast, should raise an error with pytest.raises(ValueError): - wrapped.update_col_from_series('a', pd.Series()) + wrapped.update_col_from_series('a', pd.Series(dtype='float64')) wrapped.update_col_from_series('a', pd.Series([99], index=['y'])) pdt.assert_series_equal( diff --git a/activitysim/core/test/test_simulate.py b/activitysim/core/test/test_simulate.py index 5943396a4..506cfca9a 100644 --- a/activitysim/core/test/test_simulate.py +++ b/activitysim/core/test/test_simulate.py @@ -6,7 +6,7 @@ import numpy.testing as npt import numpy as np import pandas as pd -import pandas.util.testing as pdt +import pandas.testing as pdt import pytest from .. import inject diff --git a/activitysim/core/test/test_skim.py b/activitysim/core/test/test_skim.py index f37564e6f..7d594f908 100644 --- a/activitysim/core/test/test_skim.py +++ b/activitysim/core/test/test_skim.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd import numpy.testing as npt -import pandas.util.testing as pdt +import pandas.testing as pdt import pytest from .. import skim diff --git a/activitysim/core/test/test_timetable.py b/activitysim/core/test/test_timetable.py index 5d3262ed4..5069cfdf3 100644 --- a/activitysim/core/test/test_timetable.py +++ b/activitysim/core/test/test_timetable.py @@ -4,7 +4,7 @@ from builtins import range import numpy as np import pandas as pd -import pandas.util.testing as pdt +import pandas.testing as pdt import pytest from .. import timetable as tt diff --git a/activitysim/core/test/test_util.py b/activitysim/core/test/test_util.py index 188d85e9f..3e7017dd6 100644 --- a/activitysim/core/test/test_util.py +++ b/activitysim/core/test/test_util.py @@ -3,7 +3,7 @@ import numpy as np import pandas as pd -import pandas.util.testing as pdt +import pandas.testing as pdt import pytest from ..util import reindex From 001c5b2456d2299234014c10b4b209be762b9a57 Mon Sep 17 00:00:00 2001 From: Jeffrey Doyle Date: Mon, 20 Apr 2020 08:42:02 -0400 Subject: [PATCH 4/5] fix bug in handling of no viable trips case in choose_trip_destination --- activitysim/abm/models/trip_destination.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/activitysim/abm/models/trip_destination.py b/activitysim/abm/models/trip_destination.py index 26f30703a..c3d4607e4 100644 --- a/activitysim/abm/models/trip_destination.py +++ b/activitysim/abm/models/trip_destination.py @@ -322,7 +322,7 @@ def choose_trip_destination( t0 = print_elapsed_time("%s.trip_destination_sample" % trace_label, t0) if trips.empty: - return pd.Series(index=trips.index), None + return pd.Series(index=trips.index).to_frame('choice'), None # - compute logsums destination_sample = compute_logsums( From 2c595b269d67fda54ace199596f42a6f46d1fd0c Mon Sep 17 00:00:00 2001 From: Jeff Doyle Date: Tue, 21 Apr 2020 09:31:07 -0400 Subject: [PATCH 5/5] add trace folder to example_mtc output folder --- activitysim/examples/example_mtc/output/trace/.gitignore | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 activitysim/examples/example_mtc/output/trace/.gitignore diff --git a/activitysim/examples/example_mtc/output/trace/.gitignore b/activitysim/examples/example_mtc/output/trace/.gitignore new file mode 100644 index 000000000..8edb80678 --- /dev/null +++ b/activitysim/examples/example_mtc/output/trace/.gitignore @@ -0,0 +1,3 @@ +*.csv +*.log +*.txt