Merge pull request #18 from synthicity/workplace-size-coeffs

Workplace size coeffs
ActivitySim · Feb 20, 2015 · 411ee42 · 411ee42
2 parents b3272dd + 24f770e
commit 411ee42
Show file tree

Hide file tree

Showing 6 changed files with 254 additions and 204 deletions.
diff --git a/activitysim/activitysim.py b/activitysim/activitysim.py
@@ -110,6 +110,7 @@ def simple_simulate(choosers, alternatives, spec,
                 print "Failed with DataFrame eval:\n%s" % expr
                 raise e
         vars[expr] = s
+        vars[expr] = vars[expr].astype('float')  # explicit cast
     model_design = pd.DataFrame(vars, index=df.index)
 
     df = random_rows(model_design, min(100000, len(model_design)))\

diff --git a/example/configs/workplace_location.csv b/example/configs/workplace_location.csv
@@ -1 +1 @@
-Description,Expression,Alt"Distance, piecewise linear from 0 to 1 miles",@df.distance.clip(1),-0.8428"Distance, piecewise linear from 1 to 2 miles","@(df.distance-1).clip(0,1)",-0.3104"Distance, piecewise linear from 2 to 5 miles","@(df.distance-2).clip(0,3)",-0.3783"Distance, piecewise linear from 5 to 15 miles","@(df.distance-5).clip(0,10)",-0.1285"Distance, piecewise linear for 15+ miles",@(df.distance-15.0).clip(0),-0.0917"Distance 0 to 5 mi, high and very high income",@(df.income_segment>=3)*df.distance.clip(upper=5),0.15"Distance 5+ mi, high and very high income",@(df.income_segment>=3)*(df.distance-5).clip(0),0.02"Size variable full-time worker, low income",(income_segment==1)*lnWorkLowDcSizeAlt,1"Size variable full-time worker, medium income",(income_segment==2)*lnWorkMedDcSizeAlt,1"Size variable full-time worker, high income",(income_segment==3)*lnWorkHighDcSizeAlt,1"Size variable full-time worker, very high income",(income_segment==4)*lnWorkVeryHighDcSizeAlt,1"No attractions full-time worker, low income",(income_segment==1)*lnWorkLowDcSizeAlt==0,-999"No attractions full-time worker, medium income",(income_segment==2)*lnWorkMedDcSizeAlt==0,-999"No attractions full-time worker, high income",(income_segment==3)*lnWorkHighDcSizeAlt==0,-999"No attractions full-time worker, very high income",(income_segment==4)*lnWorkVeryHighDcSizeAlt==0,-999Mode choice logsum,mcLogsum,0.3
+Description,Expression,Alt"Distance, piecewise linear from 0 to 1 miles",@df.distance.clip(1),-0.8428"Distance, piecewise linear from 1 to 2 miles","@(df.distance-1).clip(0,1)",-0.3104"Distance, piecewise linear from 2 to 5 miles","@(df.distance-2).clip(0,3)",-0.3783"Distance, piecewise linear from 5 to 15 miles","@(df.distance-5).clip(0,10)",-0.1285"Distance, piecewise linear for 15+ miles",@(df.distance-15.0).clip(0),-0.0917"Distance 0 to 5 mi, high and very high income",@(df.income_segment>=3)*df.distance.clip(upper=5),0.15"Distance 5+ mi, high and very high income",@(df.income_segment>=3)*(df.distance-5).clip(0),0.02"Size variable full-time worker, low income",@(df.income_segment==1)*df.size_low,1"Size variable full-time worker, medium income",@(df.income_segment==2)*df.size_med,1"Size variable full-time worker, high income",@(df.income_segment==3)*df.size_high,1"Size variable full-time worker, very high income",@(df.income_segment==4)*df.size_veryhigh,1"No attractions full-time worker, low income",@(df.income_segment==1)&(df.size_low==0),-999"No attractions full-time worker, medium income",@(df.income_segment==2)&(df.size_med==0),-999"No attractions full-time worker, high income",@(df.income_segment==3)&(df.size_high==0),-999"No attractions full-time worker, very high income",@(df.income_segment==4)&(df.size_veryhigh==0),-999Mode choice logsum,mcLogsum,0.3

diff --git a/example/configs/workplace_location_size_terms.csv b/example/configs/workplace_location_size_terms.csv
@@ -0,0 +1 @@
+purpose,segment,TOTHH,RETEMPN,FPSEMPN,HEREMPN,OTHEMPN,AGREMPN,MWTEMPN,AGE0519,HSENROLL,COLLFTE,COLLPTEwork,low,0,0.129,0.193,0.383,0.12,0.01,0.164,0,0,0,0work,med,0,0.12,0.197,0.325,0.139,0.008,0.21,0,0,0,0work,high,0,0.11,0.207,0.284,0.154,0.006,0.239,0,0,0,0work,veryhigh,0,0.093,0.27,0.241,0.146,0.004,0.246,0,0,0,0university,university,0,0,0,0,0,0,0,0,0,0.592,0.408school,grade,0,0,0,0,0,0,0,1,0,0,0school,high,0,0,0,0,0,0,0,0,1,0,0escort,kids,0,0.225,0,0.144,0,0,0,0.465,0.166,0,0escort,no kids,0,0.225,0,0.144,0,0,0,0.465,0.166,0,0shopping,shopping,0,1,0,0,0,0,0,0,0,0,0eatOut,eatOut,0,0.742,0,0.258,0,0,0,0,0,0,0othMaint,othMaint,0,0.482,0,0.518,0,0,0,0,0,0,0social,social,0,0.522,0,0.478,0,0,0,0,0,0,0othDiscr,othDiscr,0.252,0.212,0,0.272,0.165,0,0,0,0.098,0,0atwork,atwork,0,0.742,0,0.258,0,0,0,0,0,0,0

diff --git a/example/models.py b/example/models.py
@@ -37,7 +37,37 @@ def auto_ownership_spec():
 @sim.injectable()
 def workplace_location_spec():
     f = os.path.join('configs', "workplace_location.csv")
-    return asim.read_model_spec(f).head(7)
+    return asim.read_model_spec(f).head(15)
+
+
+@sim.table()
+def workplace_size_spec():
+    f = os.path.join('configs', 'workplace_location_size_terms.csv')
+    return pd.read_csv(f)
+
+
+@sim.table()
+def workplace_size_terms(land_use, workplace_size_spec):
+    """
+    This method takes the land use data and multiplies various columns of the
+    land use data by coefficients from the workplace_size_spec table in order
+    to yield a size term (a linear combination of land use variables) with
+    specified coefficients for different segments (like low, med, and high
+    income)
+    """
+    land_use = land_use.to_frame()
+    df = workplace_size_spec.to_frame().query("purpose == 'work'")
+    df = df.drop("purpose", axis=1).set_index("segment")
+    new_df = {}
+    for index, row in df.iterrows():
+        missing = row[~row.index.isin(land_use.columns)]
+        if len(missing) > 0:
+            print "WARNING: missing columns in land use\n", missing.index
+        row = row[row.index.isin(land_use.columns)]
+        sparse = land_use[list(row.index)]
+        new_df["size_"+index] = np.dot(sparse.as_matrix(), row.values)
+    new_df = pd.DataFrame(new_df, index=land_use.index)
+    return new_df
 
 
 @sim.model()
@@ -67,10 +97,11 @@ def workplace_location_simulate(persons,
                                 households,
                                 zones,
                                 workplace_location_spec,
-                                distance_matrix):
+                                distance_matrix,
+                                workplace_size_terms):
 
     choosers = sim.merge_tables(persons.name, tables=[persons, households])
-    alternatives = zones.to_frame()
+    alternatives = zones.to_frame().join(workplace_size_terms.to_frame())
 
     skims = {
         "distance": distance_matrix
@@ -88,4 +119,24 @@ def workplace_location_simulate(persons,
     print "Describe of hoices:\n", choices.describe()
     sim.add_column("persons", "workplace_taz", choices)
 
-    return model_design
+    return model_design
+
+
+@sim.column("land_use")
+def total_households(land_use):
+    return land_use.local.TOTHH
+
+
+@sim.column("land_use")
+def total_employment(land_use):
+    return land_use.local.TOTEMP
+
+
+@sim.column("land_use")
+def total_acres(land_use):
+    return land_use.local.TOTACRE
+
+
+@sim.column("land_use")
+def county_id(land_use):
+    return land_use.local.COUNTY
diff --git a/notebooks/data_mover.ipynb b/notebooks/data_mover.ipynb
@@ -1,7 +1,7 @@
 {
  "metadata": {
   "name": "",
-  "signature": "sha256:d62442075d195d4893cdd65305dac0932aeb18464bb30a1d8aac6a89ad987ef1"
+  "signature": "sha256:afbc3e7040dd9e4a5b21433063f13a6a8abfcc04bcfc6574e7e43376c257cd33"
  },
  "nbformat": 3,
  "nbformat_minor": 0,
@@ -43,10 +43,6 @@
       "col_map = {\n",
       "    \"HHID\": \"household_id\",\n",
       "    \"AGE\": \"age\",\n",
-      "    \"TOTHH\": \"total_households\",\n",
-      "    \"TOTEMP\": \"total_employment\",\n",
-      "    \"TOTACRE\": \"total_acres\",\n",
-      "    \"COUNTY\": \"county_id\",\n",
       "    \"hworkers\": \"workers\",\n",
       "    \"HINC\": \"income\"\n",
       "}"
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		Description,Expression,Alt"Distance, piecewise linear from 0 to 1 miles",@df.distance.clip(1),-0.8428"Distance, piecewise linear from 1 to 2 miles","@(df.distance-1).clip(0,1)",-0.3104"Distance, piecewise linear from 2 to 5 miles","@(df.distance-2).clip(0,3)",-0.3783"Distance, piecewise linear from 5 to 15 miles","@(df.distance-5).clip(0,10)",-0.1285"Distance, piecewise linear for 15+ miles",@(df.distance-15.0).clip(0),-0.0917"Distance 0 to 5 mi, high and very high income",@(df.income_segment>=3)df.distance.clip(upper=5),0.15"Distance 5+ mi, high and very high income",@(df.income_segment>=3)(df.distance-5).clip(0),0.02"Size variable full-time worker, low income",(income_segment==1)lnWorkLowDcSizeAlt,1"Size variable full-time worker, medium income",(income_segment==2)lnWorkMedDcSizeAlt,1"Size variable full-time worker, high income",(income_segment==3)lnWorkHighDcSizeAlt,1"Size variable full-time worker, very high income",(income_segment==4)lnWorkVeryHighDcSizeAlt,1"No attractions full-time worker, low income",(income_segment==1)lnWorkLowDcSizeAlt==0,-999"No attractions full-time worker, medium income",(income_segment==2)lnWorkMedDcSizeAlt==0,-999"No attractions full-time worker, high income",(income_segment==3)lnWorkHighDcSizeAlt==0,-999"No attractions full-time worker, very high income",(income_segment==4)lnWorkVeryHighDcSizeAlt==0,-999Mode choice logsum,mcLogsum,0.3
		Description,Expression,Alt"Distance, piecewise linear from 0 to 1 miles",@df.distance.clip(1),-0.8428"Distance, piecewise linear from 1 to 2 miles","@(df.distance-1).clip(0,1)",-0.3104"Distance, piecewise linear from 2 to 5 miles","@(df.distance-2).clip(0,3)",-0.3783"Distance, piecewise linear from 5 to 15 miles","@(df.distance-5).clip(0,10)",-0.1285"Distance, piecewise linear for 15+ miles",@(df.distance-15.0).clip(0),-0.0917"Distance 0 to 5 mi, high and very high income",@(df.income_segment>=3)df.distance.clip(upper=5),0.15"Distance 5+ mi, high and very high income",@(df.income_segment>=3)(df.distance-5).clip(0),0.02"Size variable full-time worker, low income",@(df.income_segment==1)df.size_low,1"Size variable full-time worker, medium income",@(df.income_segment==2)df.size_med,1"Size variable full-time worker, high income",@(df.income_segment==3)df.size_high,1"Size variable full-time worker, very high income",@(df.income_segment==4)df.size_veryhigh,1"No attractions full-time worker, low income",@(df.income_segment==1)&(df.size_low==0),-999"No attractions full-time worker, medium income",@(df.income_segment==2)&(df.size_med==0),-999"No attractions full-time worker, high income",@(df.income_segment==3)&(df.size_high==0),-999"No attractions full-time worker, very high income",@(df.income_segment==4)&(df.size_veryhigh==0),-999Mode choice logsum,mcLogsum,0.3
Expand Down
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		purpose,segment,TOTHH,RETEMPN,FPSEMPN,HEREMPN,OTHEMPN,AGREMPN,MWTEMPN,AGE0519,HSENROLL,COLLFTE,COLLPTEwork,low,0,0.129,0.193,0.383,0.12,0.01,0.164,0,0,0,0work,med,0,0.12,0.197,0.325,0.139,0.008,0.21,0,0,0,0work,high,0,0.11,0.207,0.284,0.154,0.006,0.239,0,0,0,0work,veryhigh,0,0.093,0.27,0.241,0.146,0.004,0.246,0,0,0,0university,university,0,0,0,0,0,0,0,0,0,0.592,0.408school,grade,0,0,0,0,0,0,0,1,0,0,0school,high,0,0,0,0,0,0,0,0,1,0,0escort,kids,0,0.225,0,0.144,0,0,0,0.465,0.166,0,0escort,no kids,0,0.225,0,0.144,0,0,0,0.465,0.166,0,0shopping,shopping,0,1,0,0,0,0,0,0,0,0,0eatOut,eatOut,0,0.742,0,0.258,0,0,0,0,0,0,0othMaint,othMaint,0,0.482,0,0.518,0,0,0,0,0,0,0social,social,0,0.522,0,0.478,0,0,0,0,0,0,0othDiscr,othDiscr,0.252,0.212,0,0.272,0.165,0,0,0,0.098,0,0atwork,atwork,0,0.742,0,0.258,0,0,0,0,0,0,0
Expand Down