-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSQL_Subquery.sql
359 lines (293 loc) · 9.4 KB
/
SQL_Subquery.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
/* Subquery */
/* q1. To find the average number of events for each day for each channel. */
SELECT channel, AVG(num_event)
FROM (SELECT DATE_TRUNC('day', occurred_at) event_day, channel, COUNT(*) num_event
FROM web_events
GROUP BY 1, 2) sub
GROUP BY 1
ORDER BY 2 DESC;
/* q2. The average amount of standard/gloss/poster paper sold and
the total amount spent on all orders on the first month that
any order was placed in the orders table (in terms of quantity). */
SELECT AVG(standard_qty)standard, AVG(gloss_qty) gloss, AVG(poster_qty) poster,
SUM(total_amt_usd) total_sales_usd
FROM orders
WHERE DATE_TRUNC('month', occurred_at) =
(SELECT DATE_TRUNC('month', MIN(occurred_at))
FROM orders);
/* q3. Provide the name of the sales_rep in each region with
the largest amount of total_amt_usd sales. */
/* Solution
SUB1. I wanted to find the total_amt_usd totals associated with each sales rep,
and I also wanted the region in which they were located
SUB2. I pulled the max for each region, and then we can use this to pull those rows
in our final result.
SUB3. This is a JOIN of these two tables, where the region and amount match. */
SELECT sub3.rep_name, sub3.region, sub3.total_sales_usd
FROM (SELECT region, MAX(total_sales_usd) total_sales_usd
FROM (SELECT s.id rep_id, s.name rep_name, r.name region, SUM(o.total_amt_usd) total_sales_usd
FROM region r
JOIN sales_reps s
ON r.id = s.region_id
JOIN accounts a
ON a.sales_rep_id = s.id
JOIN orders o
ON o.account_id = a.id
GROUP BY rep_id, rep_name, region) sub1
GROUP BY 1 ) sub2
JOIN (SELECT s.id rep_id, s.name rep_name, r.name region, SUM(o.total_amt_usd) total_sales_usd
FROM region r
JOIN sales_reps s
ON r.id = s.region_id
JOIN accounts a
ON a.sales_rep_id = s.id
JOIN orders o
ON o.account_id = a.id
GROUP BY rep_id, rep_name, region
ORDER BY total_sales_usd) sub3
ON sub2.region = sub3.region AND sub2.total_sales_usd = sub3.total_sales_usd;
/* q4. For the region with the largest (sum) of sales total_amt_usd,
how many total (count) orders were placed? */
/* Solution
1. The first query I wrote was to pull the total_amt_usd for each region.
2. sub1: Then we just want the region with the max amount from this table.
3. Finally, we want to pull the total orders for the region with this amount. */
SELECT r.name region, COUNT(o.total) num_sales
FROM region r
JOIN sales_reps s
ON r.id = s.region_id
JOIN accounts a
ON a.sales_rep_id = s.id
JOIN orders o
ON o.account_id = a.id
GROUP BY region
HAVING SUM(o.total_amt_usd) = (
SELECT MAX(total_sales_usd)
FROM (SELECT r.name region, SUM(o.total_amt_usd) total_sales_usd
FROM region r
JOIN sales_reps s
ON r.id = s.region_id
JOIN accounts a
ON a.sales_rep_id = s.id
JOIN orders o
ON o.account_id = a.id
GROUP BY region)sub1);
/* Without Subqueries */
SELECT r.name region, SUM(o.total_amt_usd) total_sales_usd, COUNT(*) Num_orders
FROM region r
JOIN sales_reps s
ON r.id = s.region_id
JOIN accounts a
ON a.sales_rep_id = s.id
JOIN orders o
ON o.account_id = a.id
GROUP BY region
ORDER BY total_sales_usd DESC
LIMIT 1;
/* q5. How many accounts had more total purchases than the account name which
has bought the most standard_qty paper throughout their lifetime as a customer? */
/* Solution
1. sub: The account which has bought the most standard_qty paper throughout their lifetime
+ sum(total_qty)
2. Accounts that had more total purchases than 1.
3. To count the number of 2. */
SELECT COUNT(*)
FROM (SELECT a.name
FROM accounts a
JOIN orders o
ON a.id = o.account_id
GROUP by a.name
HAVING SUM(o.total) > (
SELECT total_qty
FROM (SELECT a.name account, SUM(o.standard_qty) total_standard_qty, SUM(o.total) total_qty
FROM accounts a
JOIN orders o
ON a.id = o.account_id
GROUP BY account
ORDER BY 2 DESC
LIMIT 1)sub)
)counter;
/* q6. For the customer that spent the most (in total over their lifetime as a customer)
total_amt_usd, how many web_events did they have for each channel? */
SELECT a.name, w.channel, COUNT(*)
FROM accounts a
JOIN web_events w
ON a.id = w.account_id
GROUP BY a.name, w.channel
HAVING a.name = (
SELECT cust_name
FROM (SELECT a.name cust_name, SUM(total_amt_usd) total_spent
FROM accounts a
JOIN orders o
ON a.id = o.account_id
GROUP BY a.name
ORDER BY total_spent DESC
LIMIT 1)sub)
ORDER BY 3 DESC;
/* another ver. */
SELECT a.name, w.channel, COUNT(*)
FROM accounts a
JOIN web_events w
ON a.id = w.account_id AND a.id = (SELECT id
FROM (SELECT a.id, a.name, SUM(o.total_amt_usd) tot_spent
FROM orders o
JOIN accounts a
ON a.id = o.account_id
GROUP BY a.id, a.name
ORDER BY 3 DESC
LIMIT 1) inner_table)
GROUP BY 1, 2
ORDER BY 3 DESC;
/* q7. What is the lifetime average amount spent in terms of total_amt_usd for
the top 10 total spending accounts? */
SELECT account_name, AVG(total_spent)
FROM (SELECT a.id account_id, a.name account_name, SUM(o.total_amt_usd) total_spent
FROM accounts a
JOIN orders o
ON a.id = o.account_id
GROUP BY 1, 2
ORDER BY 3 DESC
LIMIT 10)sub
GROUP BY account_name;
/* q8. What is the lifetime average amount spent in terms of total_amt_usd, including
only the companies that spent more per order, on average, than the average of all orders. */
SELECT AVG(avg_sales)
FROM (SELECT o.account_id, AVG(o.total_amt_usd) avg_sales
FROM orders o
GROUP BY o.account_id
HAVING AVG(o.total_amt_usd) > (SELECT AVG(total_amt_usd) avg_all
FROM orders)) sub1;
/* WITH */
/* Template
WITH table1 AS (
SELECT *
FROM web_events),
table2 AS (
SELECT *
FROM accounts)
SELECT *
FROM table1
JOIN table2
ON table1.account_id = table2.id;
*/
/* q1. You need to find the average number of events for each channel per day. */
/* Regular Subquery */
SELECT channel, AVG(num_event) avg_events
FROM (SELECT channel, DATE_TRUNC('day', occurred_at) event_day, COUNT(*) num_event
FROM web_events
GROUP BY channel, event_day) sub
GROUP BY channel
ORDER BY avg_events DESC;
/* WITH */
WITH events AS(
SELECT channel, DATE_TRUNC('day', occurred_at) event_day, COUNT(*) num_event
FROM web_events
GROUP BY channel, event_day)
SELECT channel, AVG(num_event) avg_events
FROM events
GROUP BY channel
ORDER BY avg_events DESC;
/* q2. Provide the name of the sales_rep in each region with the largest amount of
total_amt_usd sales. */
WITH t1 AS (
SELECT s.id rep_id, s.name rep_name, r.name region, SUM(o.total_amt_usd) total_amt
FROM region r
JOIN sales_reps s
ON r.id = s.region_id
JOIN accounts a
ON a.sales_rep_id = s.id
JOIN orders o
ON o.account_id = a.id
GROUP BY rep_id, rep_name, region),
t2 AS (
SELECT region, MAX(total_amt) top_amt
FROM t1
GROUP BY region)
SELECT t1.rep_name, t1.region, t1.total_amt
FROM t1
JOIN t2
ON t1.region = t2.region AND t1.total_amt = t2.top_amt;
/* q3. For the region with the largest sales total_amt_usd, how many total orders were placed? */
WITH t1 AS(
SELECT r.name region, SUM(o.total_amt_usd) total_sales, COUNT(*) num_orders
FROM region r
JOIN sales_reps s
ON r.id = s.region_id
JOIN accounts a
ON a.sales_rep_id = s.id
JOIN orders o
ON o.account_id = a.id
GROUP BY r.name),
t2 AS(
SELECT region
FROM t1
WHERE total_sales = (
SELECT MAX(total_sales)
FROM t1)
)
SELECT t1.region, t1.num_orders
FROM t1
JOIN t2
ON t1.region = t2.region;
/* q4. How many accounts had more total purchases than the account name which has bought
the most standard_qty paper throughout their lifetime as a customer? */
WITH t1 AS(
SELECT a.name, SUM(standard_qty) standard_sales, SUM(total) total_qty
FROM accounts a
JOIN orders o
ON a.id = o.account_id
GROUP BY a.name
ORDER BY standard_sales DESC
LIMIT 1),
t2 AS(
SELECT a.name
FROM accounts a
JOIN orders o
ON a.id = o.account_id
GROUP BY a.name
HAVING SUM(o.total) > (SELECT total_qty
FROM t1))
SELECT COUNT(*) num_account
FROM t2;
/* q5. For the customer that spent the most (in total over their lifetime as a customer)
total_amt_usd, how many web_events did they have for each channel? */
WITH t1 AS(
SELECT a.id, a.name, SUM(o.total_amt_usd) total_spent
FROM accounts a
JOIN orders o
ON a.id = o.account_id
GROUP BY a.id, a.name
ORDER BY total_spent DESC
LIMIT 1)
SELECT a.name, w.channel, COUNT(*) num_event
FROM accounts a
JOIN web_events w
ON a.id = w.account_id AND a.id = (SELECT id FROM t1)
GROUP BY a.name, channel
ORDER BY 3 DESC;
/* q6. What is the lifetime average amount spent in terms of total_amt_usd for
the top 10 total spending accounts? */
WITH t1 AS (
SELECT a.name, SUM(o.total_amt_usd) total_spent
FROM accounts a
JOIN orders o
ON a.id = o.account_id
GROUP BY a.name
ORDER BY total_spent DESC
LIMIT 10)
SELECT AVG(total_spent)
FROM t1;
/* q7. What is the lifetime average amount spent in terms of total_amt_usd, including
only the companies that spent more per order, on average, than the average of all orders. */
WITH t1 AS(
SELECT AVG(o.total_amt_usd) avg_all
FROM orders o
JOIN accounts a
ON a.id = o.account_id),
t2 AS(
SELECT o.account_id, AVG(o.total_amt_usd) avg_total
FROM orders o
GROUP BY 1
HAVING AVG(o.total_amt_usd) > (SELECT * FROM t1) )
SELECT AVG(avg_total)
FROM t2;