forked from toddwschneider/nyc-taxi-data
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_nyc_taxi_schema.sql
144 lines (132 loc) · 3.57 KB
/
create_nyc_taxi_schema.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
CREATE EXTENSION postgis;
CREATE TABLE green_tripdata_staging (
id serial primary key,
vendor_id varchar,
lpep_pickup_datetime varchar,
lpep_dropoff_datetime varchar,
store_and_fwd_flag varchar,
rate_code_id varchar,
pickup_longitude numeric,
pickup_latitude numeric,
dropoff_longitude numeric,
dropoff_latitude numeric,
passenger_count varchar,
trip_distance varchar,
fare_amount varchar,
extra varchar,
mta_tax varchar,
tip_amount varchar,
tolls_amount varchar,
ehail_fee varchar,
improvement_surcharge varchar,
total_amount varchar,
payment_type varchar,
trip_type varchar,
pickup_location_id varchar,
dropoff_location_id varchar,
junk1 varchar,
junk2 varchar
);
/*
N.B. junk columns are there because some tripdata file headers are
inconsistent with the actual data, e.g. header says 20 or 21 columns per row,
but data actually has 22 or 23 columns per row, which COPY doesn't like.
junk1 and junk2 should always be null
*/
CREATE TABLE yellow_tripdata_staging (
id serial primary key,
vendor_id varchar,
tpep_pickup_datetime varchar,
tpep_dropoff_datetime varchar,
passenger_count varchar,
trip_distance varchar,
pickup_longitude numeric,
pickup_latitude numeric,
rate_code_id varchar,
store_and_fwd_flag varchar,
dropoff_longitude numeric,
dropoff_latitude numeric,
payment_type varchar,
fare_amount varchar,
extra varchar,
mta_tax varchar,
tip_amount varchar,
tolls_amount varchar,
improvement_surcharge varchar,
total_amount varchar,
pickup_location_id varchar,
dropoff_location_id varchar,
junk1 varchar,
junk2 varchar
);
CREATE TABLE uber_trips_2014 (
id serial primary key,
pickup_datetime timestamp without time zone,
pickup_latitude numeric,
pickup_longitude numeric,
base_code varchar
);
CREATE TABLE fhv_trips (
id serial primary key,
dispatching_base_num varchar,
pickup_datetime timestamp without time zone,
dropoff_datetime timestamp without time zone,
pickup_location_id integer,
dropoff_location_id integer
);
CREATE TABLE fhv_bases (
base_number varchar primary key,
base_name varchar,
dba varchar,
dba_category varchar
);
CREATE INDEX index_fhv_bases_on_dba_category ON fhv_bases (dba_category);
CREATE TABLE cab_types (
id serial primary key,
type varchar
);
INSERT INTO cab_types (type) SELECT 'yellow';
INSERT INTO cab_types (type) SELECT 'green';
CREATE TABLE trips (
id serial primary key,
cab_type_id integer,
vendor_id varchar,
pickup_datetime timestamp without time zone,
dropoff_datetime timestamp without time zone,
store_and_fwd_flag char(1),
rate_code_id integer,
pickup_longitude numeric,
pickup_latitude numeric,
dropoff_longitude numeric,
dropoff_latitude numeric,
passenger_count integer,
trip_distance numeric,
fare_amount numeric,
extra numeric,
mta_tax numeric,
tip_amount numeric,
tolls_amount numeric,
ehail_fee numeric,
improvement_surcharge numeric,
total_amount numeric,
payment_type varchar,
trip_type integer,
pickup_nyct2010_gid integer,
dropoff_nyct2010_gid integer,
pickup_location_id integer,
dropoff_location_id integer
);
SELECT AddGeometryColumn('trips', 'pickup', 4326, 'POINT', 2);
SELECT AddGeometryColumn('trips', 'dropoff', 4326, 'POINT', 2);
CREATE TABLE central_park_weather_observations (
station_id varchar,
station_name varchar,
date date,
precipitation numeric,
snow_depth numeric,
snowfall numeric,
max_temperature numeric,
min_temperature numeric,
average_wind_speed numeric
);
CREATE UNIQUE INDEX index_weather_observations ON central_park_weather_observations (date);