diff --git a/activitysim/abm/models/atwork_subtour_scheduling.py b/activitysim/abm/models/atwork_subtour_scheduling.py index eb674cea3..538463436 100644 --- a/activitysim/abm/models/atwork_subtour_scheduling.py +++ b/activitysim/abm/models/atwork_subtour_scheduling.py @@ -3,6 +3,7 @@ import logging import pandas as pd +import numpy as np from activitysim.core import simulate from activitysim.core import tracing @@ -61,7 +62,7 @@ def atwork_subtour_scheduling( model_settings, trace_label) # parent_tours table with columns ['tour_id', 'tdd'] index = tour_id - parent_tour_ids = subtours.parent_tour_id.astype(int).unique() + parent_tour_ids = subtours.parent_tour_id.astype(np.int64).unique() parent_tours = pd.DataFrame({'tour_id': parent_tour_ids}, index=parent_tour_ids) parent_tours = parent_tours.merge(tours[['tdd']], left_index=True, right_index=True) diff --git a/activitysim/abm/models/trip_destination.py b/activitysim/abm/models/trip_destination.py index 26f30703a..c3d4607e4 100644 --- a/activitysim/abm/models/trip_destination.py +++ b/activitysim/abm/models/trip_destination.py @@ -322,7 +322,7 @@ def choose_trip_destination( t0 = print_elapsed_time("%s.trip_destination_sample" % trace_label, t0) if trips.empty: - return pd.Series(index=trips.index), None + return pd.Series(index=trips.index).to_frame('choice'), None # - compute logsums destination_sample = compute_logsums( diff --git a/activitysim/abm/models/trip_purpose.py b/activitysim/abm/models/trip_purpose.py index ddf4778e5..f9d0fd81e 100644 --- a/activitysim/abm/models/trip_purpose.py +++ b/activitysim/abm/models/trip_purpose.py @@ -73,15 +73,37 @@ def choose_intermediate_trip_purpose(trips, probs_spec, trace_hh_id, trace_label choosers = pd.merge(trips.reset_index(), probs_spec, on=probs_join_cols, how='left').set_index('trip_id') - chunk.log_df(trace_label, 'choosers', choosers) + # select the matching depart range (this should result on in exactly one chooser row per trip) + chooser_probs = \ + (choosers.start >= choosers['depart_range_start']) & \ + (choosers.start <= choosers['depart_range_end']) + + # if we failed to match a row in probs_spec + if chooser_probs.sum() < num_trips: + # this can happen if the spec doesn't have probs matching a trip's probs_join_cols + missing_trip_ids = trips.index[~trips.index.isin(choosers.index[chooser_probs])].values + unmatched_choosers = choosers[choosers.index.isin(missing_trip_ids)] + unmatched_choosers = unmatched_choosers[['person_id', 'start'] + non_purpose_cols] + + # join to persons for better diagnostics + persons = inject.get_table('persons').to_frame() + persons_cols = \ + ['age', 'is_worker', 'is_student', 'is_gradeschool', 'is_highschool', 'is_university'] + unmatched_choosers = pd.merge(unmatched_choosers, persons[persons_cols], + left_on='person_id', right_index=True, how='left') + + file_name = '%s.UNMATCHED_PROBS' % trace_label + logger.error("%s %s of %s trips did not match probs based on join columns %s" % + (trace_label, len(unmatched_choosers), len(choosers), probs_join_cols)) + logger.info("Writing %s unmatched choosers to %s" % (len(unmatched_choosers), file_name,)) + tracing.write_csv(unmatched_choosers, file_name=file_name, transpose=False) + raise RuntimeError("Some trips did not match probs on join columns %s." % probs_join_cols) # select the matching depart range (this should result on in exactly one chooser row per trip) - choosers = choosers[(choosers.start >= choosers['depart_range_start']) & ( - choosers.start <= choosers['depart_range_end'])] + choosers = choosers[chooser_probs] # choosers should now match trips row for row - assert choosers.index.is_unique - assert len(choosers.index) == num_trips + assert choosers.index.identical(trips.index) choices, rands = logit.make_choices( choosers[purpose_cols], diff --git a/activitysim/abm/models/util/cdap.py b/activitysim/abm/models/util/cdap.py index 67c5ea231..391f5ac6c 100644 --- a/activitysim/abm/models/util/cdap.py +++ b/activitysim/abm/models/util/cdap.py @@ -638,7 +638,7 @@ def household_activity_choices(indiv_utils, interaction_coefficients, hhsize, utils = simulate.eval_utilities(spec, choosers, trace_label=trace_label) if len(utils.index) == 0: - return pd.Series() + return pd.Series(dtype='float64') probs = logit.utils_to_probs(utils, trace_label=trace_label) @@ -752,7 +752,7 @@ def extra_hh_member_choices(persons, cdap_fixed_relative_proportions, locals_d, choosers = persons[persons['cdap_rank'] > MAX_HHSIZE] if len(choosers.index) == 0: - return pd.Series() + return pd.Series(dtype='float64') # eval the expression file values = simulate.eval_variables(cdap_fixed_relative_proportions.index, choosers, locals_d) diff --git a/activitysim/abm/models/util/test/test_cdap.py b/activitysim/abm/models/util/test/test_cdap.py index 2dbfcea26..12bda06ff 100644 --- a/activitysim/abm/models/util/test/test_cdap.py +++ b/activitysim/abm/models/util/test/test_cdap.py @@ -4,7 +4,7 @@ import os.path import pandas as pd -import pandas.util.testing as pdt +import pandas.testing as pdt import pytest from .. import cdap diff --git a/activitysim/abm/models/util/test/test_mandatory_tour_frequency.py b/activitysim/abm/models/util/test/test_mandatory_tour_frequency.py index 29a1cda34..8fbe70470 100644 --- a/activitysim/abm/models/util/test/test_mandatory_tour_frequency.py +++ b/activitysim/abm/models/util/test/test_mandatory_tour_frequency.py @@ -5,7 +5,7 @@ import pytest import os import pandas as pd -import pandas.util.testing as pdt +import pandas.testing as pdt from ..tour_frequency import process_mandatory_tours diff --git a/activitysim/abm/models/util/test/test_non_mandatory_tour_frequency.py b/activitysim/abm/models/util/test/test_non_mandatory_tour_frequency.py index b9ca04bf8..7f5702b77 100644 --- a/activitysim/abm/models/util/test/test_non_mandatory_tour_frequency.py +++ b/activitysim/abm/models/util/test/test_non_mandatory_tour_frequency.py @@ -5,7 +5,7 @@ import pytest import os import pandas as pd -import pandas.util.testing as pdt +import pandas.testing as pdt from ..tour_frequency import process_non_mandatory_tours diff --git a/activitysim/abm/models/util/test/test_vectorize_tour_scheduling.py b/activitysim/abm/models/util/test/test_vectorize_tour_scheduling.py index f507e3f59..42575a364 100644 --- a/activitysim/abm/models/util/test/test_vectorize_tour_scheduling.py +++ b/activitysim/abm/models/util/test/test_vectorize_tour_scheduling.py @@ -6,7 +6,7 @@ import pandas as pd import numpy as np -import pandas.util.testing as pdt +import pandas.testing as pdt from activitysim.core import inject diff --git a/activitysim/abm/models/util/tour_frequency.py b/activitysim/abm/models/util/tour_frequency.py index f29f9d53a..186ca24a7 100644 --- a/activitysim/abm/models/util/tour_frequency.py +++ b/activitysim/abm/models/util/tour_frequency.py @@ -213,6 +213,9 @@ def create_tours(tour_counts, tour_category, parent_col='person_id'): # for joint tours, the correct number will be filled in after participation step tours['number_of_participants'] = 1 + # index is arbitrary but don't want any duplicates + tours.reset_index(drop=True, inplace=True) + return tours @@ -525,6 +528,9 @@ def process_joint_tours(joint_tour_frequency, joint_tour_frequency_alts, point_p tour_category='joint', parent_col='household_id') + assert not tours.index.duplicated().any() + assert point_persons.index.name == 'household_id' + # - assign a temp point person to tour so we can create stable index tours['person_id'] = reindex(point_persons.person_id, tours.household_id) tours['origin'] = reindex(point_persons.home_taz, tours.household_id) diff --git a/activitysim/abm/tables/shadow_pricing.py b/activitysim/abm/tables/shadow_pricing.py index c33f4e842..684d87307 100644 --- a/activitysim/abm/tables/shadow_pricing.py +++ b/activitysim/abm/tables/shadow_pricing.py @@ -634,7 +634,7 @@ def buffers_for_shadow_pricing(shadow_pricing_info): for block_key, block_shape in block_shapes.items(): # buffer_size must be int (or p2.7 long), not np.int64 - buffer_size = int(np.prod(block_shape)) + buffer_size = int(np.prod(block_shape, dtype=np.int64)) csz = buffer_size * np.dtype(dtype).itemsize logger.info("allocating shared buffer %s %s buffer_size %s bytes %s (%s)" % diff --git a/activitysim/abm/test/run_mp.py b/activitysim/abm/test/run_mp.py index 78d7e589f..64effbbe2 100644 --- a/activitysim/abm/test/run_mp.py +++ b/activitysim/abm/test/run_mp.py @@ -3,7 +3,7 @@ import os import pandas as pd -import pandas.util.testing as pdt +import pandas.testing as pdt from activitysim.core import pipeline from activitysim.core import inject diff --git a/activitysim/core/chunk.py b/activitysim/core/chunk.py index ce8fac119..054688eef 100644 --- a/activitysim/core/chunk.py +++ b/activitysim/core/chunk.py @@ -97,7 +97,7 @@ def log_df(trace_label, table_name, df): else: shape = df.shape - elements = np.prod(shape) + elements = np.prod(shape, dtype=np.int64) op = 'add' if isinstance(df, pd.Series): diff --git a/activitysim/core/test/test_logit.py b/activitysim/core/test/test_logit.py index b90ce9d6e..d97bf579e 100644 --- a/activitysim/core/test/test_logit.py +++ b/activitysim/core/test/test_logit.py @@ -6,7 +6,7 @@ import numpy as np import pandas as pd -import pandas.util.testing as pdt +import pandas.testing as pdt import pytest from ..simulate import eval_variables diff --git a/activitysim/core/test/test_orca.py b/activitysim/core/test/test_orca.py index f040a8c28..84ed8c92e 100644 --- a/activitysim/core/test/test_orca.py +++ b/activitysim/core/test/test_orca.py @@ -369,12 +369,12 @@ def test_update_col(df): pdt.assert_series_equal(wrapped['a'], df['a']) # test 2 - let the update method do the cast - wrapped.update_col_from_series('a', pd.Series(), True) + wrapped.update_col_from_series('a', pd.Series(dtype='float64'), True) pdt.assert_series_equal(wrapped['a'], df['a']) # test 3 - don't cast, should raise an error with pytest.raises(ValueError): - wrapped.update_col_from_series('a', pd.Series()) + wrapped.update_col_from_series('a', pd.Series(dtype='float64')) wrapped.update_col_from_series('a', pd.Series([99], index=['y'])) pdt.assert_series_equal( diff --git a/activitysim/core/test/test_simulate.py b/activitysim/core/test/test_simulate.py index 5943396a4..506cfca9a 100644 --- a/activitysim/core/test/test_simulate.py +++ b/activitysim/core/test/test_simulate.py @@ -6,7 +6,7 @@ import numpy.testing as npt import numpy as np import pandas as pd -import pandas.util.testing as pdt +import pandas.testing as pdt import pytest from .. import inject diff --git a/activitysim/core/test/test_skim.py b/activitysim/core/test/test_skim.py index f37564e6f..7d594f908 100644 --- a/activitysim/core/test/test_skim.py +++ b/activitysim/core/test/test_skim.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd import numpy.testing as npt -import pandas.util.testing as pdt +import pandas.testing as pdt import pytest from .. import skim diff --git a/activitysim/core/test/test_timetable.py b/activitysim/core/test/test_timetable.py index 5d3262ed4..5069cfdf3 100644 --- a/activitysim/core/test/test_timetable.py +++ b/activitysim/core/test/test_timetable.py @@ -4,7 +4,7 @@ from builtins import range import numpy as np import pandas as pd -import pandas.util.testing as pdt +import pandas.testing as pdt import pytest from .. import timetable as tt diff --git a/activitysim/core/test/test_util.py b/activitysim/core/test/test_util.py index 188d85e9f..3e7017dd6 100644 --- a/activitysim/core/test/test_util.py +++ b/activitysim/core/test/test_util.py @@ -3,7 +3,7 @@ import numpy as np import pandas as pd -import pandas.util.testing as pdt +import pandas.testing as pdt import pytest from ..util import reindex diff --git a/activitysim/examples/example_mtc/output/trace/.gitignore b/activitysim/examples/example_mtc/output/trace/.gitignore new file mode 100644 index 000000000..8edb80678 --- /dev/null +++ b/activitysim/examples/example_mtc/output/trace/.gitignore @@ -0,0 +1,3 @@ +*.csv +*.log +*.txt