Skip to content

Commit

Permalink
Use numeric indexing and .take instead of .loc
Browse files Browse the repository at this point in the history
.loc is really slow with large indexes.
We can do things much faster using location based indexing
instead of label based indexing.
Here I'm replacing a .loc with a .take.
  • Loading branch information
jiffyclub committed Feb 24, 2015
1 parent 1aa4683 commit 6f7a6e7
Showing 1 changed file with 8 additions and 6 deletions.
14 changes: 8 additions & 6 deletions urbansim/urbanchoice/interaction.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,16 +52,18 @@ def mnl_interaction_dataset(choosers, alternatives, SAMPLE_SIZE,
# SAMPLE_SIZE >= numalts. That may not happen often in
# practical situations but it should be supported
# because a) why not? and b) testing.
alts_idx = np.arange(len(alternatives))
if SAMPLE_SIZE < numalts:
sample = np.random.choice(
alternatives.index.values, SAMPLE_SIZE * numchoosers)
sample = np.random.choice(alts_idx, SAMPLE_SIZE * numchoosers)
if chosenalts is not None:
# replace the first row for each chooser with
# the currently chosen alternative.
sample[::SAMPLE_SIZE] = chosenalts
# chosenalts -> integer position
sample[::SAMPLE_SIZE] = pd.Series(
alts_idx, index=alternatives.index).loc[chosenalts].values
else:
assert chosenalts is None # if not sampling, must be simulating
sample = np.tile(alternatives.index.values, numchoosers)
sample = np.tile(alts_idx, numchoosers)

if not choosers.index.is_unique:
raise Exception(
Expand All @@ -72,7 +74,7 @@ def mnl_interaction_dataset(choosers, alternatives, SAMPLE_SIZE,
"ERROR: alternatives index is not unique, "
"sample will not work correctly")

alts_sample = alternatives.loc[sample]
alts_sample = alternatives.take(sample)
assert len(alts_sample.index) == SAMPLE_SIZE * len(choosers.index)
alts_sample['join_index'] = np.repeat(choosers.index.values, SAMPLE_SIZE)

Expand All @@ -84,4 +86,4 @@ def mnl_interaction_dataset(choosers, alternatives, SAMPLE_SIZE,
chosen[:, 0] = 1

logger.debug('finish: compute MNL interaction dataset')
return sample, alts_sample, chosen
return alternatives.index.values[sample], alts_sample, chosen

0 comments on commit 6f7a6e7

Please sign in to comment.