Skip to content

Commit

Permalink
small fixes for semcog deployment (#319)
Browse files Browse the repository at this point in the history
* improved validation diagnostics in trip_purpose and various windows-related int32/int64 conversions

* pycodestyle

* deprecations in test code

* fix bug in handling of no viable trips case in choose_trip_destination

* add trace folder to example_mtc output folder

Co-authored-by: Jeffrey Doyle <jeff.doyle@rsginc.com>
Co-authored-by: Jeff Doyle <toliwaga@gmail.com>
  • Loading branch information
3 people authored May 6, 2020
1 parent 4ff7510 commit 1041bf6
Show file tree
Hide file tree
Showing 19 changed files with 55 additions and 23 deletions.
3 changes: 2 additions & 1 deletion activitysim/abm/models/atwork_subtour_scheduling.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import logging

import pandas as pd
import numpy as np

from activitysim.core import simulate
from activitysim.core import tracing
Expand Down Expand Up @@ -61,7 +62,7 @@ def atwork_subtour_scheduling(
model_settings, trace_label)

# parent_tours table with columns ['tour_id', 'tdd'] index = tour_id
parent_tour_ids = subtours.parent_tour_id.astype(int).unique()
parent_tour_ids = subtours.parent_tour_id.astype(np.int64).unique()
parent_tours = pd.DataFrame({'tour_id': parent_tour_ids}, index=parent_tour_ids)
parent_tours = parent_tours.merge(tours[['tdd']], left_index=True, right_index=True)

Expand Down
2 changes: 1 addition & 1 deletion activitysim/abm/models/trip_destination.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ def choose_trip_destination(
t0 = print_elapsed_time("%s.trip_destination_sample" % trace_label, t0)

if trips.empty:
return pd.Series(index=trips.index), None
return pd.Series(index=trips.index).to_frame('choice'), None

# - compute logsums
destination_sample = compute_logsums(
Expand Down
32 changes: 27 additions & 5 deletions activitysim/abm/models/trip_purpose.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,15 +73,37 @@ def choose_intermediate_trip_purpose(trips, probs_spec, trace_hh_id, trace_label
choosers = pd.merge(trips.reset_index(), probs_spec, on=probs_join_cols,
how='left').set_index('trip_id')

chunk.log_df(trace_label, 'choosers', choosers)
# select the matching depart range (this should result on in exactly one chooser row per trip)
chooser_probs = \
(choosers.start >= choosers['depart_range_start']) & \
(choosers.start <= choosers['depart_range_end'])

# if we failed to match a row in probs_spec
if chooser_probs.sum() < num_trips:
# this can happen if the spec doesn't have probs matching a trip's probs_join_cols
missing_trip_ids = trips.index[~trips.index.isin(choosers.index[chooser_probs])].values
unmatched_choosers = choosers[choosers.index.isin(missing_trip_ids)]
unmatched_choosers = unmatched_choosers[['person_id', 'start'] + non_purpose_cols]

# join to persons for better diagnostics
persons = inject.get_table('persons').to_frame()
persons_cols = \
['age', 'is_worker', 'is_student', 'is_gradeschool', 'is_highschool', 'is_university']
unmatched_choosers = pd.merge(unmatched_choosers, persons[persons_cols],
left_on='person_id', right_index=True, how='left')

file_name = '%s.UNMATCHED_PROBS' % trace_label
logger.error("%s %s of %s trips did not match probs based on join columns %s" %
(trace_label, len(unmatched_choosers), len(choosers), probs_join_cols))
logger.info("Writing %s unmatched choosers to %s" % (len(unmatched_choosers), file_name,))
tracing.write_csv(unmatched_choosers, file_name=file_name, transpose=False)
raise RuntimeError("Some trips did not match probs on join columns %s." % probs_join_cols)

# select the matching depart range (this should result on in exactly one chooser row per trip)
choosers = choosers[(choosers.start >= choosers['depart_range_start']) & (
choosers.start <= choosers['depart_range_end'])]
choosers = choosers[chooser_probs]

# choosers should now match trips row for row
assert choosers.index.is_unique
assert len(choosers.index) == num_trips
assert choosers.index.identical(trips.index)

choices, rands = logit.make_choices(
choosers[purpose_cols],
Expand Down
4 changes: 2 additions & 2 deletions activitysim/abm/models/util/cdap.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,7 +638,7 @@ def household_activity_choices(indiv_utils, interaction_coefficients, hhsize,
utils = simulate.eval_utilities(spec, choosers, trace_label=trace_label)

if len(utils.index) == 0:
return pd.Series()
return pd.Series(dtype='float64')

probs = logit.utils_to_probs(utils, trace_label=trace_label)

Expand Down Expand Up @@ -752,7 +752,7 @@ def extra_hh_member_choices(persons, cdap_fixed_relative_proportions, locals_d,
choosers = persons[persons['cdap_rank'] > MAX_HHSIZE]

if len(choosers.index) == 0:
return pd.Series()
return pd.Series(dtype='float64')

# eval the expression file
values = simulate.eval_variables(cdap_fixed_relative_proportions.index, choosers, locals_d)
Expand Down
2 changes: 1 addition & 1 deletion activitysim/abm/models/util/test/test_cdap.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os.path

import pandas as pd
import pandas.util.testing as pdt
import pandas.testing as pdt
import pytest

from .. import cdap
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pytest
import os
import pandas as pd
import pandas.util.testing as pdt
import pandas.testing as pdt
from ..tour_frequency import process_mandatory_tours


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pytest
import os
import pandas as pd
import pandas.util.testing as pdt
import pandas.testing as pdt
from ..tour_frequency import process_non_mandatory_tours


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pandas as pd
import numpy as np

import pandas.util.testing as pdt
import pandas.testing as pdt

from activitysim.core import inject

Expand Down
6 changes: 6 additions & 0 deletions activitysim/abm/models/util/tour_frequency.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,9 @@ def create_tours(tour_counts, tour_category, parent_col='person_id'):
# for joint tours, the correct number will be filled in after participation step
tours['number_of_participants'] = 1

# index is arbitrary but don't want any duplicates
tours.reset_index(drop=True, inplace=True)

return tours


Expand Down Expand Up @@ -525,6 +528,9 @@ def process_joint_tours(joint_tour_frequency, joint_tour_frequency_alts, point_p
tour_category='joint',
parent_col='household_id')

assert not tours.index.duplicated().any()
assert point_persons.index.name == 'household_id'

# - assign a temp point person to tour so we can create stable index
tours['person_id'] = reindex(point_persons.person_id, tours.household_id)
tours['origin'] = reindex(point_persons.home_taz, tours.household_id)
Expand Down
2 changes: 1 addition & 1 deletion activitysim/abm/tables/shadow_pricing.py
Original file line number Diff line number Diff line change
Expand Up @@ -634,7 +634,7 @@ def buffers_for_shadow_pricing(shadow_pricing_info):
for block_key, block_shape in block_shapes.items():

# buffer_size must be int (or p2.7 long), not np.int64
buffer_size = int(np.prod(block_shape))
buffer_size = int(np.prod(block_shape, dtype=np.int64))

csz = buffer_size * np.dtype(dtype).itemsize
logger.info("allocating shared buffer %s %s buffer_size %s bytes %s (%s)" %
Expand Down
2 changes: 1 addition & 1 deletion activitysim/abm/test/run_mp.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import os

import pandas as pd
import pandas.util.testing as pdt
import pandas.testing as pdt

from activitysim.core import pipeline
from activitysim.core import inject
Expand Down
2 changes: 1 addition & 1 deletion activitysim/core/chunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def log_df(trace_label, table_name, df):
else:

shape = df.shape
elements = np.prod(shape)
elements = np.prod(shape, dtype=np.int64)
op = 'add'

if isinstance(df, pd.Series):
Expand Down
2 changes: 1 addition & 1 deletion activitysim/core/test/test_logit.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np
import pandas as pd

import pandas.util.testing as pdt
import pandas.testing as pdt
import pytest

from ..simulate import eval_variables
Expand Down
4 changes: 2 additions & 2 deletions activitysim/core/test/test_orca.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,12 +369,12 @@ def test_update_col(df):
pdt.assert_series_equal(wrapped['a'], df['a'])

# test 2 - let the update method do the cast
wrapped.update_col_from_series('a', pd.Series(), True)
wrapped.update_col_from_series('a', pd.Series(dtype='float64'), True)
pdt.assert_series_equal(wrapped['a'], df['a'])

# test 3 - don't cast, should raise an error
with pytest.raises(ValueError):
wrapped.update_col_from_series('a', pd.Series())
wrapped.update_col_from_series('a', pd.Series(dtype='float64'))

wrapped.update_col_from_series('a', pd.Series([99], index=['y']))
pdt.assert_series_equal(
Expand Down
2 changes: 1 addition & 1 deletion activitysim/core/test/test_simulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy.testing as npt
import numpy as np
import pandas as pd
import pandas.util.testing as pdt
import pandas.testing as pdt
import pytest

from .. import inject
Expand Down
2 changes: 1 addition & 1 deletion activitysim/core/test/test_skim.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy as np
import pandas as pd
import numpy.testing as npt
import pandas.util.testing as pdt
import pandas.testing as pdt
import pytest

from .. import skim
Expand Down
2 changes: 1 addition & 1 deletion activitysim/core/test/test_timetable.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from builtins import range
import numpy as np
import pandas as pd
import pandas.util.testing as pdt
import pandas.testing as pdt
import pytest

from .. import timetable as tt
Expand Down
2 changes: 1 addition & 1 deletion activitysim/core/test/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import numpy as np
import pandas as pd
import pandas.util.testing as pdt
import pandas.testing as pdt
import pytest

from ..util import reindex
Expand Down
3 changes: 3 additions & 0 deletions activitysim/examples/example_mtc/output/trace/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
*.csv
*.log
*.txt

0 comments on commit 1041bf6

Please sign in to comment.