Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BayDAG Contribution #3: CDAP estimation with joint component #769

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions activitysim/abm/models/cdap.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,11 @@ def cdap_simulate(
for hhsize in range(2, cdap.MAX_HHSIZE + 1):
spec = cdap.get_cached_spec(state, hhsize)
estimator.write_table(spec, "spec_%s" % hhsize, append=False)
if add_joint_tour_utility:
joint_spec = cdap.get_cached_joint_spec(hhsize)
estimator.write_table(
joint_spec, "joint_spec_%s" % hhsize, append=False
)

logger.info("Running cdap_simulate with %d persons", len(persons_merged.index))

Expand Down Expand Up @@ -215,6 +220,11 @@ def cdap_simulate(
if estimator:
estimator.write_choices(choices)
choices = estimator.get_survey_values(choices, "persons", "cdap_activity")
if add_joint_tour_utility:
hh_joint.index.name = "household_id"
hh_joint = estimator.get_survey_values(
hh_joint, "households", "has_joint_tour"
)
estimator.write_override_choices(choices)
estimator.end_estimation()

Expand Down
144 changes: 127 additions & 17 deletions activitysim/estimation/larch/cdap.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@

_logger = logging.getLogger(logger_name)

MAX_HHSIZE = 5

def generate_alternatives(n_persons):

def generate_alternatives(n_persons, add_joint=False):
"""
Generate a dictionary of CDAP alternatives.

Expand All @@ -41,8 +43,14 @@ def generate_alternatives(n_persons):
alt_names = list(
"".join(i) for i in itertools.product(basic_patterns, repeat=n_persons)
)
if add_joint:
pattern = r"[MN]"
joint_alts = [
alt + "J" for alt in alt_names if len(re.findall(pattern, alt)) >= 2
]
alt_names = alt_names + joint_alts
alt_codes = np.arange(1, len(alt_names) + 1)
return Dict(zip(alt_names, alt_codes))
return dict(zip(alt_names, alt_codes))


def apply_replacements(expression, prefix, tokens):
Expand All @@ -69,7 +77,9 @@ def apply_replacements(expression, prefix, tokens):
return expression


def cdap_base_utility_by_person(model, n_persons, spec, alts=None, value_tokens=()):
def cdap_base_utility_by_person(
model, n_persons, spec, alts=None, value_tokens=(), add_joint=False
):
"""
Build the base utility by person for each pattern.

Expand Down Expand Up @@ -102,7 +112,7 @@ def cdap_base_utility_by_person(model, n_persons, spec, alts=None, value_tokens=
model.utility_co[3] += X(spec.Expression[i]) * P(spec.loc[i, "H"])
else:
if alts is None:
alts = generate_alternatives(n_persons)
alts = generate_alternatives(n_persons, add_joint)
person_numbers = range(1, n_persons + 1)
for pnum in person_numbers:
for i in spec.index:
Expand Down Expand Up @@ -222,13 +232,71 @@ def cdap_interaction_utility(model, n_persons, alts, interaction_coef, coefficie
model.utility_co[anum] += linear_component


def cdap_split_data(households, values):
def cdap_joint_tour_utility(model, n_persons, alts, joint_coef, values):
"""
FIXME: Not fully implemented!!!!

Code is adapted from the cdap model in ActivitySim with the joint tour component
Structure is pretty much in place, but dependencies need to be filtered out.
"""

for row in joint_coef.itertuples():
for aname, anum in alts.items():
# only adding joint tour utility to alternatives with joint tours
if "J" not in aname:
continue
expression = row.Expression
dependency_name = row.dependency
coefficient = row.coefficient

# dealing with dependencies
if dependency_name in ["M_px", "N_px", "H_px"]:
if "_pxprod" in expression:
prod_conds = row.Expression.split("|")
expanded_expressions = [
tup
for tup in itertools.product(
range(len(prod_conds)), repeat=n_persons
)
]
for expression_tup in expanded_expressions:
expression_list = []
dependency_list = []
for counter in range(len(expression_tup)):
expression_list.append(
prod_conds[expression_tup[counter]].replace(
"xprod", str(counter + 1)
)
)
if expression_tup[counter] == 0:
dependency_list.append(
dependency_name.replace("x", str(counter + 1))
)

expression_value = "&".join(expression_list)
# FIXME only apply to alternative if dependency satisfied
bug
model.utility_co[anum] += X(expression_value) * P(coefficient)

elif "_px" in expression:
for pnum in range(1, n_persons + 1):
dependency_name = row.dependency.replace("x", str(pnum))
expression = row.Expression.replace("x", str(pnum))
# FIXME only apply to alternative if dependency satisfied
bug
model.utility_co[anum] += X(expression) * P(coefficient)

else:
model.utility_co[anum] += X(expression) * P(coefficient)


def cdap_split_data(households, values, add_joint):
if "cdap_rank" not in values:
raise ValueError("assign cdap_rank to values first")
# only process the first 5 household members
values = values[values.cdap_rank <= 5]
values = values[values.cdap_rank <= MAX_HHSIZE]
cdap_data = {}
for hhsize, hhs_part in households.groupby(households.hhsize.clip(1, 5)):
for hhsize, hhs_part in households.groupby(households.hhsize.clip(1, MAX_HHSIZE)):
if hhsize == 1:
v = pd.merge(values, hhs_part.household_id, on="household_id").set_index(
"household_id"
Expand All @@ -241,16 +309,31 @@ def cdap_split_data(households, values):
)
v.columns = [f"p{i[1]}_{i[0]}" for i in v.columns]
for agglom in ["override_choice", "model_choice"]:
v[agglom] = v[[f"p{p}_{agglom}" for p in range(1, hhsize + 1)]].sum(1)
v[agglom] = (
v[[f"p{p}_{agglom}" for p in range(1, hhsize + 1)]]
.fillna("H")
.sum(1)
)
if add_joint:
joint_tour_indicator = (
hhs_part.set_index("household_id")
.reindex(v.index)
.has_joint_tour
)
pd.testing.assert_index_equal(v.index, joint_tour_indicator.index)
v[agglom] = np.where(
joint_tour_indicator == 1, v[agglom] + "J", v[agglom]
)
cdap_data[hhsize] = v

return cdap_data


def cdap_dataframes(households, values):
data = cdap_split_data(households, values)
def cdap_dataframes(households, values, add_joint):
data = cdap_split_data(households, values, add_joint)
dfs = {}
for hhsize in data.keys():
alts = generate_alternatives(hhsize)
alts = generate_alternatives(hhsize, add_joint)
dfs[hhsize] = DataFrames(
co=data[hhsize],
alt_names=alts.keys(),
Expand Down Expand Up @@ -298,6 +381,7 @@ def cdap_data(
spec1_file="{name}_INDIV_AND_HHSIZE1_SPEC.csv",
settings_file="{name}_model_settings.yaml",
chooser_data_file="{name}_values_combined.csv",
joint_coeffs_file="{name}_joint_tour_coefficients.csv",
):
edb_directory = edb_directory.format(name=name)
if not os.path.exists(edb_directory):
Expand Down Expand Up @@ -328,8 +412,6 @@ def read_yaml(filename, **kwargs):
if person_type_map is None:
raise KeyError("PERSON_TYPE_MAP missing from cdap_settings.yaml")

person_rank = cdap.assign_cdap_rank(None, persons, person_type_map)

coefficients = read_csv(
coefficients_file,
index_col="coefficient_name",
Expand All @@ -343,9 +425,29 @@ def read_yaml(filename, **kwargs):
comment="#",
)

try:
joint_coef = read_csv(
joint_coeffs_file,
# dtype={"interaction_ptypes": str},
# keep_default_na=False,
comment="#",
)
add_joint = True
except FileNotFoundError:
joint_coef = None
add_joint = False
print("Including joint tour utiltiy?:", add_joint)

spec1 = read_csv(spec1_file, comment="#")
values = read_csv(chooser_data_file, comment="#")
values["cdap_rank"] = person_rank
person_rank = cdap.assign_cdap_rank(
None,
persons[persons.household_id.isin(values.household_id)]
.set_index("person_id")
.reindex(values.person_id),
person_type_map,
)
values["cdap_rank"] = person_rank.values

return Dict(
edb_directory=Path(edb_directory),
Expand All @@ -355,6 +457,8 @@ def read_yaml(filename, **kwargs):
coefficients=coefficients,
households=hhs,
settings=settings,
joint_coef=joint_coef,
add_joint=add_joint,
)


Expand All @@ -367,6 +471,7 @@ def cdap_model(
spec1_file="{name}_INDIV_AND_HHSIZE1_SPEC.csv",
settings_file="{name}_model_settings.yaml",
chooser_data_file="{name}_values_combined.csv",
joint_coeffs_file="{name}_joint_tour_coefficients.csv",
return_data=False,
):
d = cdap_data(
Expand All @@ -379,15 +484,17 @@ def cdap_model(
spec1_file=spec1_file,
settings_file=settings_file,
chooser_data_file=chooser_data_file,
joint_coeffs_file=joint_coeffs_file,
)

households = d.households
values = d.person_data
spec1 = d.spec1
interaction_coef = d.interaction_coef
coefficients = d.coefficients
add_joint = d.add_joint

cdap_dfs = cdap_dataframes(households, values)
cdap_dfs = cdap_dataframes(households, values, add_joint)
m = {}
_logger.info(f"building for model 1")
m[1] = Model(dataservice=cdap_dfs[1])
Expand All @@ -400,12 +507,15 @@ def cdap_model(
interaction_coef["cardinality"] = interaction_coef[
"interaction_ptypes"
].str.len()
for s in [2, 3, 4, 5]:
for s in range(2, MAX_HHSIZE + 1):
# for s in [2, 3, 4, 5]:
_logger.info(f"building for model {s}")
m[s] = Model(dataservice=cdap_dfs[s])
alts = generate_alternatives(s)
alts = generate_alternatives(s, add_joint)
cdap_base_utility_by_person(m[s], s, spec1, alts, values.columns)
cdap_interaction_utility(m[s], s, alts, interaction_coef, coefficients)
# if add_joint:
# cdap_joint_tour_utility(m[s], s, alts, d.joint_coef, values)
m[s].choice_any = True
m[s].availability_any = True

Expand Down
Loading