ActivitySim · fscottfoti · Jan 5, 2015 · Dec 8, 2014 · Dec 9, 2014 · Dec 9, 2014
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,7 @@
+example/data/*
+
+.ipynb_checkpoints
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

diff --git a/activitysim/activitysim.py b/activitysim/activitysim.py
@@ -0,0 +1,71 @@
+import urbansim.sim.simulation as sim
+from urbansim.urbanchoice import interaction, mnl
+import pandas as pd
+import numpy as np
+import os
+
+
+def random_rows(df, n):
+    return df.take(np.random.choice(len(df), size=n, replace=False))
+
+
+def read_model_spec(fname,
+                    description_name="Description",
+                    expression_name="Expression"):
+    """
+    Read in the excel file and reformat for machines
+    """
+    cfg = pd.read_csv(fname)
+    # don't need description and set the expression to the index
+    cfg = cfg.drop(description_name, axis=1).set_index(expression_name).stack()
+    return cfg
+
+
+def identity_matrix(alt_names):
+    return pd.DataFrame(np.identity(len(alt_names)),
+                        columns=alt_names,
+                        index=alt_names)
+
+
+def simple_simulate(choosers, alternatives, spec):
+    exprs = spec.index
+    coeffs = spec.values
+
+    # merge choosers and alternatives
+    _, df, _ = interaction.mnl_interaction_dataset(
+        choosers, alternatives, len(alternatives))
+
+    # evaluate the expressions to build the final matrix
+    vars, names = [], []
+    for expr in exprs:
+        if expr[0][0] == "@":
+            expr = "({}) * df.{}".format(expr[0][1:], expr[1])
+            try:
+                s = eval(expr)
+            except Exception as e:
+                print "Failed with Python eval:\n%s" % expr
+                raise e
+        else:
+            expr = "({}) * {}".format(*expr)
+            try:
+                s = df.eval(expr)
+            except Exception as e:
+                print "Failed with DataFrame eval:\n%s" % expr
+                raise e
+        names.append(expr)
+        vars.append(s)
+    model_design = pd.concat(vars, axis=1)
+    model_design.columns = names
+
+    df = random_rows(model_design, 100000).describe().transpose()
+    df = df[df["std"] == 0]
+    if len(df):
+        print "WARNING: Describe of columns with no variability:\n", df
+
+    choices = mnl.mnl_simulate(
+        model_design.as_matrix(),
+        coeffs,
+        numalts=len(alternatives),
+        returnprobs=False)
+
+    return pd.Series(choices, index=choosers.index), model_design
diff --git a/activitysim/defaults/__init__.py b/activitysim/defaults/__init__.py
diff --git a/activitysim/defaults/datasources.py b/activitysim/defaults/datasources.py
@@ -0,0 +1,74 @@
+import numpy as np
+import pandas as pd
+import os
+import uuid
+import yaml
+from urbansim.utils import misc
+import urbansim.sim.simulation as sim
+from .. import activitysim as asim
+
+import warnings
+
+warnings.filterwarnings('ignore', category=pd.io.pytables.PerformanceWarning)
+pd.options.mode.chained_assignment = None
+
+
+@sim.injectable('settings', cache=True)
+def settings():
+    with open(os.path.join(misc.configs_dir(), "settings.yaml")) as f:
+        settings = yaml.load(f)
+        # monkey patch on the settings object since it's pretty global
+        # but will also be available as injectable
+        sim.settings = settings
+        return settings
+
+
+@sim.injectable('run_number')
+def run_number():
+    return misc.get_run_number()
+
+
+@sim.injectable('uuid', cache=True)
+def uuid_hex():
+    return uuid.uuid4().hex
+
+
+@sim.injectable('store', cache=True)
+def hdfstore(settings):
+    return pd.HDFStore(
+        os.path.join(misc.data_dir(), settings["store"]),
+        mode='r')
+
+
+@sim.injectable("scenario")
+def scenario(settings):
+    return settings["scenario"]
+
+
+@sim.table(cache=True)
+def land_use(store):
+    return store["land_use/taz_data"]
+
+
+@sim.table(cache=True)
+def accessibility(store):
+    df = store["skims/accessibility"]
+    df.columns = [c.upper() for c in df.columns]
+    return df
+
+
+@sim.table(cache=True)
+def households(store, settings):
+    if "households_sample_size" in settings:
+        return asim.random_rows(store["households"],
+                                settings["households_sample_size"])
+    return store["households"]
+
+
+@sim.table(cache=True)
+def persons(store):
+    return store["persons"]
+
+
+sim.broadcast('land_use', 'households', cast_index=True, onto_on='TAZ')
+sim.broadcast('accessibility', 'households', cast_index=True, onto_on='TAZ')
diff --git a/activitysim/defaults/variables.py b/activitysim/defaults/variables.py
@@ -0,0 +1,73 @@
+import urbansim.sim.simulation as sim
+from activitysim.defaults import datasources
+
+
+@sim.column("households")
+def income_in_thousands(households):
+    return households.income / 1000
+
+
+@sim.column("households")
+def drivers(households, persons):
+    # we assume that everyone 16 and older is a potential driver
+    return persons.local.query("16 <= age").\
+        groupby("household_id").size().\
+        reindex(households.index).fillna(0)
+
+
+@sim.column("households")
+def num_young_children(households, persons):
+    return persons.local.query("age <= 4").\
+        groupby("household_id").size().\
+        reindex(households.index).fillna(0)
+
+
+@sim.column("households")
+def num_children(households, persons):
+    return persons.local.query("5 <= age <= 15").\
+        groupby("household_id").size().\
+        reindex(households.index).fillna(0)
+
+
+@sim.column("households")
+def num_adolescents(households, persons):
+    return persons.local.query("16 <= age <= 17").\
+        groupby("household_id").size().\
+        reindex(households.index).fillna(0)
+
+
+@sim.column("households")
+def num_college_age(households, persons):
+    return persons.local.query("18 <= age <= 24").\
+        groupby("household_id").size().\
+        reindex(households.index).fillna(0)
+
+
+@sim.column("households")
+def num_young_adults(households, persons):
+    return persons.local.query("25 <= age <= 34").\
+        groupby("household_id").size().\
+        reindex(households.index).fillna(0)
+
+
+@sim.column("land_use")
+def household_density(land_use):
+    return land_use.total_households / land_use.total_acres
+
+
+@sim.column("land_use")
+def employment_density(land_use):
+    return land_use.total_employment / land_use.total_acres
+
+
+@sim.column("land_use")
+def density_index(land_use):
+    return (land_use.household_density * land_use.employment_density) / \
+        (land_use.household_density + land_use.employment_density)
+
+
+@sim.column("land_use")
+def county_name(land_use, settings):
+    assert "county_map" in settings
+    inv_map = {v: k for k, v in settings["county_map"].items()}
+    return land_use.county_id.map(inv_map)
diff --git a/example/configs/auto_ownership_coeffs.csv b/example/configs/auto_ownership_coeffs.csv
@@ -0,0 +1 @@
+Description,Expression,cars0,cars1,cars2,cars3,cars42 Adults (age 16+),drivers==2,,0,3.0773,3.1962,2.66163 Adults (age 16+),drivers==3,,0,3.5401,5.5131,5.2084+ Adults (age 16+),drivers>3,,2.0107,6.3662,8.5148,9.5807Persons age 16-17,num_adolescents,,0,-0.881,-1.7313,-1.7313Persons age 18-24,num_college_age,,-0.4087,-1.0095,-1.0107,-1.0107Persons age 35-34,num_young_adults,,0,-0.4849,-0.8596,-0.8596Presence of children age 0-4,num_young_children>0,,0.3669,0.7627,0.7627,0.7627Presence of children age 5-17,(num_children+num_adolescents)>0,,0.0158,0.2936,0.4769,0.4769"Number of workers, capped at 3",@df.workers.clip(upper=3),,0,0.2936,0.6389,0.8797"Piecewise Linear household income, $0-30k","@df.income_in_thousands.clip(0, 30)",,0.0383,0.054,0.0559,0.0619"Piecewise Linear household income, $30-75k","@(df.income_in_thousands-30).clip(0, 45)",,0,0.0083,0.011,0.0147"Piecewise Linear household income, $75k+, capped at $125k","@(df.income_in_thousands-75).clip(0, 50)",,0,0.0083,0.011,0.0147"Density index up to 10, if 0 workers","@(df.workers==0)*df.density_index.clip(0, 10)",,0,-0.2028,-0.3654,-0.3654"Density index in excess of 10, if 0 workers",@(df.workers==0)*(df.density_index-10).clip(0),,-0.0152,-0.1106,-0.1766,-0.1766"Density index up to 10, if 1+ workers","@(df.workers>0)*df.density_index.clip(0, 10)",,0,-0.2028,-0.3654,-0.3654"Density index in excess of 10, if 1+ workers",@(df.workers>0)*(df.density_index-10).clip(0),,-0.0152,-0.1106,-0.1766,-0.1766Constants,@1,,1.1865,-1.0846,-3.2502,-5.313San Francisco county,county_name == 'San Francisco',,0.4259,0.4683,0.1458,0.1458Solano county,county_name == 'Solano',,-0.566,-0.4429,-0.2372,-0.2372Napa county,county_name == 'Napa',,-0.566,-0.4429,-0.2372,-0.2372Sonoma county,county_name == 'Sonoma',,-0.566,-0.4429,-0.2372,-0.2372Marin county,county_name == 'Marin',,-0.2434,0,0,0"Retail accessibility (0.66*PK + 0.34*OP) by auto, if 0 workers",(workers==0)*(0.66*AUTOPEAKRETAIL+0.34*AUTOOFFPEAKRETAIL),,0.0626,0.0626,0.0626,0.0626"Retail accessibility (0.66*PK + 0.34*OP) by auto, if 1+ workers",(workers>0)*(0.66*AUTOPEAKRETAIL+0.34*AUTOOFFPEAKRETAIL),,0.1646,0.1646,0.1646,0.1646"Retail accessibility (0.66*PK + 0.34*OP) by transit, if 0 workers",(workers==0)*(0.66*TRANSITPEAKRETAIL+0.34*TRANSITOFFPEAKRETAIL),,-0.3053,-0.3053,-0.3053,-0.3053"Retail accessibility (0.66*PK + 0.34*OP) by transit, if 1+ workers",(workers>0)*(0.66*TRANSITPEAKRETAIL+0.34*TRANSITOFFPEAKRETAIL),,-0.5117,-0.5117,-0.5117,-0.5117"Retail accessibility by non-motorized, if 0 workers",(workers==0)*NONMOTORIZEDRETAIL,,-0.03,-0.03,-0.03,-0.03"Retail accessibility by non-motorized, if 1+ workers",(workers>0)*NONMOTORIZEDRETAIL,,-0.03,-0.03,-0.03,-0.03"Auto time savings per worker (over walk or transit, max 120) to work",workTourAutoTimeSavings/workers,,0.4707,0.6142,0.5705,0.7693

diff --git a/example/configs/settings.yaml b/example/configs/settings.yaml
@@ -0,0 +1,14 @@
+store: mtc_asim.h5
+
+households_sample_size: 100000
+
+county_map:
+    San Francisco: 1
+    San Mateo: 2
+    Santa Clara: 3
+    Alameda: 4
+    Contra Costa: 5
+    Solano: 6
+    Napa: 7
+    Sonoma: 8
+    Marin: 9
diff --git a/example/data/README.md b/example/data/README.md
@@ -0,0 +1 @@
+Keep data here
diff --git a/example/models.py b/example/models.py
@@ -0,0 +1,35 @@
+import urbansim.sim.simulation as sim
+import os
+from activitysim import activitysim as asim
+
+
+@sim.table()
+def auto_alts():
+    return asim.identity_matrix(["cars%d" % i for i in range(5)])
+
+
+@sim.injectable()
+def auto_ownership_spec():
+    f = os.path.join('configs', "auto_ownership_coeffs.csv")
+    return asim.read_model_spec(f).head(4*26)
+
+
+@sim.model()
+def auto_ownership_simulate(households,
+                            auto_alts,
+                            auto_ownership_spec,
+                            land_use,
+                            accessibility):
+
+    choosers = sim.merge_tables(households.name, tables=[households,
+                                                         land_use,
+                                                         accessibility])
+    alternatives = auto_alts.to_frame()
+
+    choices, model_design = \
+        asim.simple_simulate(choosers, alternatives, auto_ownership_spec)
+
+    print "Choices:\n", choices.value_counts()
+    sim.add_column("households", "auto_ownership", choices)
+
+    return model_design
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Description,Expression,cars0,cars1,cars2,cars3,cars42 Adults (age 16+),drivers==2,,0,3.0773,3.1962,2.66163 Adults (age 16+),drivers==3,,0,3.5401,5.5131,5.2084+ Adults (age 16+),drivers>3,,2.0107,6.3662,8.5148,9.5807Persons age 16-17,num_adolescents,,0,-0.881,-1.7313,-1.7313Persons age 18-24,num_college_age,,-0.4087,-1.0095,-1.0107,-1.0107Persons age 35-34,num_young_adults,,0,-0.4849,-0.8596,-0.8596Presence of children age 0-4,num_young_children>0,,0.3669,0.7627,0.7627,0.7627Presence of children age 5-17,(num_children+num_adolescents)>0,,0.0158,0.2936,0.4769,0.4769"Number of workers, capped at 3",@df.workers.clip(upper=3),,0,0.2936,0.6389,0.8797"Piecewise Linear household income, $0-30k","@df.income_in_thousands.clip(0, 30)",,0.0383,0.054,0.0559,0.0619"Piecewise Linear household income, $30-75k","@(df.income_in_thousands-30).clip(0, 45)",,0,0.0083,0.011,0.0147"Piecewise Linear household income, $75k+, capped at $125k","@(df.income_in_thousands-75).clip(0, 50)",,0,0.0083,0.011,0.0147"Density index up to 10, if 0 workers","@(df.workers==0)df.density_index.clip(0, 10)",,0,-0.2028,-0.3654,-0.3654"Density index in excess of 10, if 0 workers",@(df.workers==0)(df.density_index-10).clip(0),,-0.0152,-0.1106,-0.1766,-0.1766"Density index up to 10, if 1+ workers","@(df.workers>0)df.density_index.clip(0, 10)",,0,-0.2028,-0.3654,-0.3654"Density index in excess of 10, if 1+ workers",@(df.workers>0)(df.density_index-10).clip(0),,-0.0152,-0.1106,-0.1766,-0.1766Constants,@1,,1.1865,-1.0846,-3.2502,-5.313San Francisco county,county_name == 'San Francisco',,0.4259,0.4683,0.1458,0.1458Solano county,county_name == 'Solano',,-0.566,-0.4429,-0.2372,-0.2372Napa county,county_name == 'Napa',,-0.566,-0.4429,-0.2372,-0.2372Sonoma county,county_name == 'Sonoma',,-0.566,-0.4429,-0.2372,-0.2372Marin county,county_name == 'Marin',,-0.2434,0,0,0"Retail accessibility (0.66PK + 0.34OP) by auto, if 0 workers",(workers==0)(0.66AUTOPEAKRETAIL+0.34AUTOOFFPEAKRETAIL),,0.0626,0.0626,0.0626,0.0626"Retail accessibility (0.66PK + 0.34OP) by auto, if 1+ workers",(workers>0)(0.66AUTOPEAKRETAIL+0.34AUTOOFFPEAKRETAIL),,0.1646,0.1646,0.1646,0.1646"Retail accessibility (0.66PK + 0.34OP) by transit, if 0 workers",(workers==0)(0.66TRANSITPEAKRETAIL+0.34TRANSITOFFPEAKRETAIL),,-0.3053,-0.3053,-0.3053,-0.3053"Retail accessibility (0.66PK + 0.34OP) by transit, if 1+ workers",(workers>0)(0.66TRANSITPEAKRETAIL+0.34TRANSITOFFPEAKRETAIL),,-0.5117,-0.5117,-0.5117,-0.5117"Retail accessibility by non-motorized, if 0 workers",(workers==0)NONMOTORIZEDRETAIL,,-0.03,-0.03,-0.03,-0.03"Retail accessibility by non-motorized, if 1+ workers",(workers>0)NONMOTORIZEDRETAIL,,-0.03,-0.03,-0.03,-0.03"Auto time savings per worker (over walk or transit, max 120) to work",workTourAutoTimeSavings/workers,,0.4707,0.6142,0.5705,0.7693
Expand Down