-
Notifications
You must be signed in to change notification settings - Fork 0
/
create_load_data.sql
192 lines (169 loc) · 7.78 KB
/
create_load_data.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
-- CREATE DATABASE
CREATE DATABASE IF NOT EXISTS BD_RAW;
-- CREATE SCHEMA
CREATE SCHEMA IF NOT EXISTS BD_RAW.BROOKLYNDATA;
-- CREATE INTERNAL STAGE
CREATE STAGE IF NOT EXISTS BD_RAW.BROOKLYNDATA.BROOKLYNDATA_OLIST_FILES;
-- PUT FILES
PUT 'file://brooklyndata/olist_orders_dataset__batch_0.csv' @BD_RAW.BROOKLYNDATA.BROOKLYNDATA_OLIST_FILES AUTO_COMPRESS=FALSE;
PUT 'file://brooklyndata/olist_orders_dataset__batch_1.csv' @BD_RAW.BROOKLYNDATA.BROOKLYNDATA_OLIST_FILES AUTO_COMPRESS=FALSE;
PUT 'file://brooklyndata/olist_order_reviews_dataset__batch_0.csv' @BD_RAW.BROOKLYNDATA.BROOKLYNDATA_OLIST_FILES AUTO_COMPRESS=FALSE;
PUT 'file://brooklyndata/olist_order_reviews_dataset__batch_1.csv' @BD_RAW.BROOKLYNDATA.BROOKLYNDATA_OLIST_FILES AUTO_COMPRESS=FALSE;
PUT 'file://brooklyndata/olist_order_items_dataset__batch_0.csv' @BD_RAW.BROOKLYNDATA.BROOKLYNDATA_OLIST_FILES AUTO_COMPRESS=FALSE;
PUT 'file://brooklyndata/olist_order_items_dataset__batch_1.csv' @BD_RAW.BROOKLYNDATA.BROOKLYNDATA_OLIST_FILES AUTO_COMPRESS=FALSE;
PUT 'file://brooklyndata/olist_order_payments_dataset.csv' @BD_RAW.BROOKLYNDATA.BROOKLYNDATA_OLIST_FILES AUTO_COMPRESS=FALSE;
PUT 'file://brooklyndata/olist_order_customer_dataset.csv' @BD_RAW.BROOKLYNDATA.BROOKLYNDATA_OLIST_FILES AUTO_COMPRESS=FALSE;
PUT 'file://brooklyndata/olist_products_dataset.csv' @BD_RAW.BROOKLYNDATA.BROOKLYNDATA_OLIST_FILES AUTO_COMPRESS=FALSE;
PUT 'file://brooklyndata/olist_sellers_dataset.csv' @BD_RAW.BROOKLYNDATA.BROOKLYNDATA_OLIST_FILES AUTO_COMPRESS=FALSE;
PUT 'file://brooklyndata/olist_geolocation_dataset__batch_0.csv' @BD_RAW.BROOKLYNDATA.BROOKLYNDATA_OLIST_FILES AUTO_COMPRESS=FALSE;
PUT 'file://brooklyndata/olist_geolocation_dataset__batch_1.csv' @BD_RAW.BROOKLYNDATA.BROOKLYNDATA_OLIST_FILES AUTO_COMPRESS=FALSE;
PUT 'file://brooklyndata/olist_geolocation_dataset__batch_2.csv' @BD_RAW.BROOKLYNDATA.BROOKLYNDATA_OLIST_FILES AUTO_COMPRESS=FALSE;
PUT 'file://brooklyndata/olist_geolocation_dataset__batch_3.csv' @BD_RAW.BROOKLYNDATA.BROOKLYNDATA_OLIST_FILES AUTO_COMPRESS=FALSE;
-- CREATE FILE FORMAT
CREATE OR REPLACE FILE FORMAT BD_RAW.BROOKLYNDATA.MY_CSV_FORMAT TYPE = 'CSV' SKIP_HEADER = 1 FIELD_OPTIONALLY_ENCLOSED_BY = '"';
-- CREATE OLIST_ORDERS
CREATE OR REPLACE TABLE BD_RAW.BROOKLYNDATA.OLIST_ORDERS (
order_id STRING,
customer_id STRING,
order_status STRING,
order_purchase_timestamp TIMESTAMP,
order_approved_at TIMESTAMP,
order_delivered_carrier_date TIMESTAMP,
order_delivered_customer_date TIMESTAMP,
order_estimated_delivery_date TIMESTAMP
);
-- Load data from the first batch file into the table
COPY INTO BD_RAW.BROOKLYNDATA.OLIST_ORDERS
FROM @BD_RAW.BROOKLYNDATA.BROOKLYNDATA_OLIST_FILES
FILE_FORMAT = (FORMAT_NAME = BD_RAW.BROOKLYNDATA.MY_CSV_FORMAT)
PATTERN = 'olist_orders_dataset__batch_0\.csv';
-- Load data from the second batch file into the table
COPY INTO BD_RAW.BROOKLYNDATA.OLIST_ORDERS
FROM @BD_RAW.BROOKLYNDATA.BROOKLYNDATA_OLIST_FILES
FILE_FORMAT = (FORMAT_NAME = BD_RAW.BROOKLYNDATA.MY_CSV_FORMAT)
PATTERN = 'olist_orders_dataset__batch_1\.csv';
-- CREATE OLIST_ORDER_REVIEWS
CREATE OR REPLACE TABLE BD_RAW.BROOKLYNDATA.OLIST_ORDER_REVIEWS (
review_id STRING,
order_id STRING,
review_score INTEGER,
review_comment_title STRING,
review_comment_message STRING,
review_creation_date TIMESTAMP,
review_answer_timestamp TIMESTAMP
);
-- Load data from the first batch file into the table
COPY INTO BD_RAW.BROOKLYNDATA.OLIST_ORDER_REVIEWS
FROM @BD_RAW.BROOKLYNDATA.BROOKLYNDATA_OLIST_FILES
FILE_FORMAT = (FORMAT_NAME = BD_RAW.BROOKLYNDATA.MY_CSV_FORMAT)
PATTERN = 'olist_order_reviews_dataset__batch_0\.csv';
-- Load data from the second batch file into the table
COPY INTO BD_RAW.BROOKLYNDATA.OLIST_ORDER_REVIEWS
FROM @BD_RAW.BROOKLYNDATA.BROOKLYNDATA_OLIST_FILES
FILE_FORMAT = (FORMAT_NAME = BD_RAW.BROOKLYNDATA.MY_CSV_FORMAT)
PATTERN = 'olist_order_reviews_dataset__batch_1\.csv';
-- CREATE OLIST_ORDER_ITEMS
CREATE OR REPLACE TABLE BD_RAW.BROOKLYNDATA.OLIST_ORDER_ITEMS (
order_id STRING,
order_item_id INTEGER,
product_id STRING,
seller_id STRING,
shipping_limit_date TIMESTAMP,
price FLOAT,
freight_value FLOAT
);
-- Load data from the first batch file into the table
COPY INTO BD_RAW.BROOKLYNDATA.OLIST_ORDER_ITEMS
FROM @BD_RAW.BROOKLYNDATA.BROOKLYNDATA_OLIST_FILES
FILE_FORMAT = (FORMAT_NAME = BD_RAW.BROOKLYNDATA.MY_CSV_FORMAT)
PATTERN = 'olist_order_items_dataset__batch_0\.csv';
-- Load data from the second batch file into the table
COPY INTO BD_RAW.BROOKLYNDATA.OLIST_ORDER_ITEMS
FROM @BD_RAW.BROOKLYNDATA.BROOKLYNDATA_OLIST_FILES
FILE_FORMAT = (FORMAT_NAME = BD_RAW.BROOKLYNDATA.MY_CSV_FORMAT)
PATTERN = 'olist_order_items_dataset__batch_1\.csv';
-- CREATE OLIST_ORDER_PAYMENTS
CREATE OR REPLACE TABLE BD_RAW.BROOKLYNDATA.OLIST_ORDER_PAYMENTS (
order_id STRING,
payment_sequential INTEGER,
payment_type STRING,
payment_installments INTEGER,
payment_value FLOAT
);
-- load data
COPY INTO BD_RAW.BROOKLYNDATA.OLIST_ORDER_PAYMENTS
FROM @BD_RAW.BROOKLYNDATA.BROOKLYNDATA_OLIST_FILES
FILE_FORMAT = (FORMAT_NAME = BD_RAW.BROOKLYNDATA.MY_CSV_FORMAT)
PATTERN = 'olist_order_payments_dataset.csv';
-- CREATE OLIST_ORDER_CUSTOMER
CREATE OR REPLACE TABLE BD_RAW.BROOKLYNDATA.OLIST_ORDER_CUSTOMER (
customer_id STRING,
customer_unique_id STRING,
customer_zip_code_prefix STRING,
customer_city STRING,
customer_state STRING
);
-- load data
COPY INTO BD_RAW.BROOKLYNDATA.OLIST_ORDER_CUSTOMER
FROM @BD_RAW.BROOKLYNDATA.BROOKLYNDATA_OLIST_FILES
FILE_FORMAT = (FORMAT_NAME = BD_RAW.BROOKLYNDATA.MY_CSV_FORMAT)
PATTERN = 'olist_order_customer_dataset.csv';
-- CREATE OLIST_PRODUCTS
CREATE OR REPLACE TABLE BD_RAW.BROOKLYNDATA.OLIST_PRODUCTS (
product_id STRING,
product_category_name STRING,
product_name_length INTEGER,
product_description_length INTEGER,
product_photos_qty INTEGER,
product_weight_g INTEGER,
product_length_cm FLOAT,
product_height_cm FLOAT,
product_width_cm FLOAT
);
-- load data
COPY INTO BD_RAW.BROOKLYNDATA.OLIST_PRODUCTS
FROM @BD_RAW.BROOKLYNDATA.BROOKLYNDATA_OLIST_FILES
FILE_FORMAT = (FORMAT_NAME = BD_RAW.BROOKLYNDATA.MY_CSV_FORMAT)
PATTERN = 'olist_products_dataset.csv';
-- CREATE OLIST_SELLERS
CREATE OR REPLACE TABLE BD_RAW.BROOKLYNDATA.OLIST_SELLERS (
seller_id STRING,
seller_zip_code_prefix STRING,
seller_city STRING,
seller_state STRING
);
-- load data
COPY INTO BD_RAW.BROOKLYNDATA.OLIST_SELLERS
FROM @BD_RAW.BROOKLYNDATA.BROOKLYNDATA_OLIST_FILES
FILE_FORMAT = (FORMAT_NAME = BD_RAW.BROOKLYNDATA.MY_CSV_FORMAT)
PATTERN = 'olist_sellers_dataset.csv';
-- CREATE OLIST_GEOLOCATION
CREATE OR REPLACE TABLE BD_RAW.BROOKLYNDATA.OLIST_GEOLOCATION (
geolocation_zip_code_prefix STRING,
geolocation_lat FLOAT,
geolocation_lng FLOAT,
geolocation_city STRING,
geolocation_state STRING
);
-- Load data from the first batch file into the table
COPY INTO BD_RAW.BROOKLYNDATA.OLIST_GEOLOCATION
FROM @BD_RAW.BROOKLYNDATA.BROOKLYNDATA_OLIST_FILES
FILE_FORMAT = (FORMAT_NAME = BD_RAW.BROOKLYNDATA.MY_CSV_FORMAT)
PATTERN = 'olist_geolocation_dataset__batch_0\.csv';
-- Load data from the second batch file into the table
COPY INTO BD_RAW.BROOKLYNDATA.OLIST_GEOLOCATION
FROM @BD_RAW.BROOKLYNDATA.BROOKLYNDATA_OLIST_FILES
FILE_FORMAT = (FORMAT_NAME = BD_RAW.BROOKLYNDATA.MY_CSV_FORMAT)
PATTERN = 'olist_geolocation_dataset__batch_1\.csv';
-- Load data from the third batch file into the table
COPY INTO BD_RAW.BROOKLYNDATA.OLIST_GEOLOCATION
FROM @BD_RAW.BROOKLYNDATA.BROOKLYNDATA_OLIST_FILES
FILE_FORMAT = (FORMAT_NAME = BD_RAW.BROOKLYNDATA.MY_CSV_FORMAT)
PATTERN = 'olist_geolocation_dataset__batch_2\.csv';
-- Load data from the fourth batch file into the table
COPY INTO BD_RAW.BROOKLYNDATA.OLIST_GEOLOCATION
FROM @BD_RAW.BROOKLYNDATA.BROOKLYNDATA_OLIST_FILES
FILE_FORMAT = (FORMAT_NAME = BD_RAW.BROOKLYNDATA.MY_CSV_FORMAT)
PATTERN = 'olist_geolocation_dataset__batch_3\.csv';
-- CREATE GOLD DATABASE
CREATE DATABASE IF NOT EXISTS GOLD;