-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchap1.qmd
304 lines (264 loc) · 12.2 KB
/
chap1.qmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
---
title: "必知必会第1课:了解SQL"
author: "黄天元"
format: html
editor: visual
---
在本课的实践中,我们将会构建并了解贯穿我们课程始终的数据库。首先,我们先使用脚本来构建数据库。
我们将会在R中来运行SQL脚本,从而自动化地构建我们的原始数据库。其中,创建数据库的脚本如下:
```{sql}
#| eval: false
-----------------------------------------------------
-- Sams Teach Yourself SQL in 10 Minutes, 5th Edition
-- http://forta.com/books/0135182794/
-- Example table creation scripts for SQLite
-----------------------------------------------------
-- ----------------------
-- Create Customers table
-- ----------------------
CREATE TABLE Customers
(
cust_id char(10) NOT NULL ,
cust_name char(50) NOT NULL ,
cust_address char(50) NULL ,
cust_city char(50) NULL ,
cust_state char(5) NULL ,
cust_zip char(10) NULL ,
cust_country char(50) NULL ,
cust_contact char(50) NULL ,
cust_email char(255) NULL ,
PRIMARY KEY (cust_id)
);
-- -----------------------
-- Create OrderItems table
-- -----------------------
CREATE TABLE OrderItems
(
order_num int NOT NULL ,
order_item int NOT NULL ,
prod_id char(10) NOT NULL ,
quantity int NOT NULL ,
item_price decimal(8,2) NOT NULL ,
PRIMARY KEY (order_num, order_item) ,
FOREIGN KEY (order_num) REFERENCES Orders (order_num) ,
FOREIGN KEY (prod_id) REFERENCES Products (prod_id)
);
-- -------------------
-- Create Orders table
-- -------------------
CREATE TABLE Orders
(
order_num int NOT NULL ,
order_date datetime NOT NULL ,
cust_id char(10) NOT NULL ,
PRIMARY KEY (order_num) ,
FOREIGN KEY (cust_id) REFERENCES Customers (cust_id)
);
-- ---------------------
-- Create Products table
-- ---------------------
CREATE TABLE Products
(
prod_id char(10) NOT NULL ,
vend_id char(10) NOT NULL ,
prod_name char(255) NOT NULL ,
prod_price decimal(8,2) NOT NULL ,
prod_desc text NULL ,
PRIMARY KEY (prod_id) ,
FOREIGN KEY (vend_id) REFERENCES Vendors (vend_id)
);
-- --------------------
-- Create Vendors table
-- --------------------
CREATE TABLE Vendors
(
vend_id char(10) NOT NULL ,
vend_name char(50) NOT NULL ,
vend_address char(50) NULL ,
vend_city char(50) NULL ,
vend_state char(5) NULL ,
vend_zip char(10) NULL ,
vend_country char(50) NULL ,
PRIMARY KEY (vend_id)
);
-- -------------------
-- Define primary keys
-- SQLite does not allow PK to be defined using ALTER TABLE, so defined in CREATE TABLE
-- -------------------
-- -------------------
-- Define foreign keys
-- SQLite does not allow FK to be defined using ALTER TABLE, so defined in CREATE TABLE
-- -------------------
```
然后,我们往创建的数据库中添加数据:
```{sql}
#| eval: false
-----------------------------------------------------
-- Sams Teach Yourself SQL in 10 Minutes, 5th Edition
-- http://forta.com/books/0135182794/
-- Example table population scripts for SQLite
-----------------------------------------------------
-- ------------------------
-- Populate Customers table
-- ------------------------
INSERT INTO Customers(cust_id, cust_name, cust_address, cust_city, cust_state, cust_zip, cust_country, cust_contact, cust_email)
VALUES('1000000001', 'Village Toys', '200 Maple Lane', 'Detroit', 'MI', '44444', 'USA', 'John Smith', 'sales@villagetoys.com');
INSERT INTO Customers(cust_id, cust_name, cust_address, cust_city, cust_state, cust_zip, cust_country, cust_contact)
VALUES('1000000002', 'Kids Place', '333 South Lake Drive', 'Columbus', 'OH', '43333', 'USA', 'Michelle Green');
INSERT INTO Customers(cust_id, cust_name, cust_address, cust_city, cust_state, cust_zip, cust_country, cust_contact, cust_email)
VALUES('1000000003', 'Fun4All', '1 Sunny Place', 'Muncie', 'IN', '42222', 'USA', 'Jim Jones', 'jjones@fun4all.com');
INSERT INTO Customers(cust_id, cust_name, cust_address, cust_city, cust_state, cust_zip, cust_country, cust_contact, cust_email)
VALUES('1000000004', 'Fun4All', '829 Riverside Drive', 'Phoenix', 'AZ', '88888', 'USA', 'Denise L. Stephens', 'dstephens@fun4all.com');
INSERT INTO Customers(cust_id, cust_name, cust_address, cust_city, cust_state, cust_zip, cust_country, cust_contact)
VALUES('1000000005', 'The Toy Store', '4545 53rd Street', 'Chicago', 'IL', '54545', 'USA', 'Kim Howard');
-- ----------------------
-- Populate Vendors table
-- ----------------------
INSERT INTO Vendors(vend_id, vend_name, vend_address, vend_city, vend_state, vend_zip, vend_country)
VALUES('BRS01','Bears R Us','123 Main Street','Bear Town','MI','44444', 'USA');
INSERT INTO Vendors(vend_id, vend_name, vend_address, vend_city, vend_state, vend_zip, vend_country)
VALUES('BRE02','Bear Emporium','500 Park Street','Anytown','OH','44333', 'USA');
INSERT INTO Vendors(vend_id, vend_name, vend_address, vend_city, vend_state, vend_zip, vend_country)
VALUES('DLL01','Doll House Inc.','555 High Street','Dollsville','CA','99999', 'USA');
INSERT INTO Vendors(vend_id, vend_name, vend_address, vend_city, vend_state, vend_zip, vend_country)
VALUES('FRB01','Furball Inc.','1000 5th Avenue','New York','NY','11111', 'USA');
INSERT INTO Vendors(vend_id, vend_name, vend_address, vend_city, vend_state, vend_zip, vend_country)
VALUES('FNG01','Fun and Games','42 Galaxy Road','London', NULL,'N16 6PS', 'England');
INSERT INTO Vendors(vend_id, vend_name, vend_address, vend_city, vend_state, vend_zip, vend_country)
VALUES('JTS01','Jouets et ours','1 Rue Amusement','Paris', NULL,'45678', 'France');
-- -----------------------
-- Populate Products table
-- -----------------------
INSERT INTO Products(prod_id, vend_id, prod_name, prod_price, prod_desc)
VALUES('BR01', 'BRS01', '8 inch teddy bear', 5.99, '8 inch teddy bear, comes with cap and jacket');
INSERT INTO Products(prod_id, vend_id, prod_name, prod_price, prod_desc)
VALUES('BR02', 'BRS01', '12 inch teddy bear', 8.99, '12 inch teddy bear, comes with cap and jacket');
INSERT INTO Products(prod_id, vend_id, prod_name, prod_price, prod_desc)
VALUES('BR03', 'BRS01', '18 inch teddy bear', 11.99, '18 inch teddy bear, comes with cap and jacket');
INSERT INTO Products(prod_id, vend_id, prod_name, prod_price, prod_desc)
VALUES('BNBG01', 'DLL01', 'Fish bean bag toy', 3.49, 'Fish bean bag toy, complete with bean bag worms with which to feed it');
INSERT INTO Products(prod_id, vend_id, prod_name, prod_price, prod_desc)
VALUES('BNBG02', 'DLL01', 'Bird bean bag toy', 3.49, 'Bird bean bag toy, eggs are not included');
INSERT INTO Products(prod_id, vend_id, prod_name, prod_price, prod_desc)
VALUES('BNBG03', 'DLL01', 'Rabbit bean bag toy', 3.49, 'Rabbit bean bag toy, comes with bean bag carrots');
INSERT INTO Products(prod_id, vend_id, prod_name, prod_price, prod_desc)
VALUES('RGAN01', 'DLL01', 'Raggedy Ann', 4.99, '18 inch Raggedy Ann doll');
INSERT INTO Products(prod_id, vend_id, prod_name, prod_price, prod_desc)
VALUES('RYL01', 'FNG01', 'King doll', 9.49, '12 inch king doll with royal garments and crown');
INSERT INTO Products(prod_id, vend_id, prod_name, prod_price, prod_desc)
VALUES('RYL02', 'FNG01', 'Queen doll', 9.49, '12 inch queen doll with royal garments and crown');
-- ---------------------
-- Populate Orders table
-- ---------------------
INSERT INTO Orders(order_num, order_date, cust_id)
VALUES(20005, '2020-05-01', '1000000001');
INSERT INTO Orders(order_num, order_date, cust_id)
VALUES(20006, '2020-01-12', '1000000003');
INSERT INTO Orders(order_num, order_date, cust_id)
VALUES(20007, '2020-01-30', '1000000004');
INSERT INTO Orders(order_num, order_date, cust_id)
VALUES(20008, '2020-02-03', '1000000005');
INSERT INTO Orders(order_num, order_date, cust_id)
VALUES(20009, '2020-02-08', '1000000001');
-- -------------------------
-- Populate OrderItems table
-- -------------------------
INSERT INTO OrderItems(order_num, order_item, prod_id, quantity, item_price)
VALUES(20005, 1, 'BR01', 100, 5.49);
INSERT INTO OrderItems(order_num, order_item, prod_id, quantity, item_price)
VALUES(20005, 2, 'BR03', 100, 10.99);
INSERT INTO OrderItems(order_num, order_item, prod_id, quantity, item_price)
VALUES(20006, 1, 'BR01', 20, 5.99);
INSERT INTO OrderItems(order_num, order_item, prod_id, quantity, item_price)
VALUES(20006, 2, 'BR02', 10, 8.99);
INSERT INTO OrderItems(order_num, order_item, prod_id, quantity, item_price)
VALUES(20006, 3, 'BR03', 10, 11.99);
INSERT INTO OrderItems(order_num, order_item, prod_id, quantity, item_price)
VALUES(20007, 1, 'BR03', 50, 11.49);
INSERT INTO OrderItems(order_num, order_item, prod_id, quantity, item_price)
VALUES(20007, 2, 'BNBG01', 100, 2.99);
INSERT INTO OrderItems(order_num, order_item, prod_id, quantity, item_price)
VALUES(20007, 3, 'BNBG02', 100, 2.99);
INSERT INTO OrderItems(order_num, order_item, prod_id, quantity, item_price)
VALUES(20007, 4, 'BNBG03', 100, 2.99);
INSERT INTO OrderItems(order_num, order_item, prod_id, quantity, item_price)
VALUES(20007, 5, 'RGAN01', 50, 4.49);
INSERT INTO OrderItems(order_num, order_item, prod_id, quantity, item_price)
VALUES(20008, 1, 'RGAN01', 5, 4.99);
INSERT INTO OrderItems(order_num, order_item, prod_id, quantity, item_price)
VALUES(20008, 2, 'BR03', 5, 11.99);
INSERT INTO OrderItems(order_num, order_item, prod_id, quantity, item_price)
VALUES(20008, 3, 'BNBG01', 10, 3.49);
INSERT INTO OrderItems(order_num, order_item, prod_id, quantity, item_price)
VALUES(20008, 4, 'BNBG02', 10, 3.49);
INSERT INTO OrderItems(order_num, order_item, prod_id, quantity, item_price)
VALUES(20008, 5, 'BNBG03', 10, 3.49);
INSERT INTO OrderItems(order_num, order_item, prod_id, quantity, item_price)
VALUES(20009, 1, 'BNBG01', 250, 2.49);
INSERT INTO OrderItems(order_num, order_item, prod_id, quantity, item_price)
VALUES(20009, 2, 'BNBG02', 250, 2.49);
INSERT INTO OrderItems(order_num, order_item, prod_id, quantity, item_price)
VALUES(20009, 3, 'BNBG03', 250, 2.49);
```
然后,我们观察《SQL必知必会》的附录A来观察数据框中各个表的结构和相互关系。
```{r}
#| eval: false
#| echo: false
# 如果想要使用R脚本来对数据库进行创建,可以参考以下代码:
# 注意:其中,“my_create.txt”和“my_populate.txt”分别是上面我们在SQL中创建并插入数据的文本。
library(pacman)
p_load(tidyverse,DBI,RSQLite,fs)
if(file_exists("data/TYSQL.sqlite")) {
file_delete("data/TYSQL.sqlite")
cat("EXECUTE DELETE.")
}
# 连接数据库
conn <- dbConnect(RSQLite::SQLite(), "data/TYSQL.sqlite")
# 创建数据库
read_lines("data/my_create.txt") -> sql_code
str_subset(sql_code,pattern = "^--",negate = T) %>%
map_chr(\(x) str_replace(x,"\\);$",");[FLAG]")) %>%
str_c(collapse = "\n") %>%
str_squish() %>%
str_remove("\\[FLAG\\]$") %>%
str_split("\\[FLAG\\]") %>%
unlist() -> sql_statements
# 执行 SQL 语句
for (sql in sql_statements) {
if (nchar(sql) > 0) {
tryCatch({
dbExecute(conn, sql) # 执行每条 SQL 语句
}, error = function(e) {
message("Error executing SQL: ", e$message, " in SQL: ", sql)
})
}
}
# 观察数据库中的所有表格
dbListTables(conn) -> all_tables
all_tables
lapply(all_tables, function(x) tbl(conn,x))
# 插入数据
read_lines("data/my_populate.txt") -> sql_code
str_subset(sql_code,pattern = "^--",negate = T) %>%
map_chr(\(x) str_replace(x,"\\);$",");[FLAG]")) %>%
str_c(collapse = "\n") %>%
str_squish() %>%
str_remove("\\[FLAG\\]$") %>%
str_split("\\[FLAG\\]") %>%
unlist() -> sql_statements
# 执行 SQL 语句
for (sql in sql_statements) {
if (nchar(sql) > 0) {
tryCatch({
dbExecute(conn, sql) # 执行每条 SQL 语句
}, error = function(e) {
message("Error executing SQL: ", e$message, " in SQL: ", sql)
})
}
}
# 观察数据库中的所有表格
dbListTables(conn) -> all_tables
lapply(all_tables, function(x) tbl(conn,x))
# 关闭数据库连接
dbDisconnect(conn)
```