Skip to content

Files

Latest commit

 

History

History

Seller_analysis

Folders and files

NameName
Last commit message
Last commit date

parent directory

..
 
 
 
 

Seller Analysis

%load_ext sql
import os
host = "localhost"
database = "olist"
user = "postgres"
password = "sql123"
connection_string = f"postgresql://{user}:{password}@{host}/{database}"
%sql $connection_string
'Connected: postgres@olist'

Sellers by state

%%sql
SELECT seller_state,
       number_of_sellers,
       RANK() OVER(ORDER BY number_of_sellers DESC)
FROM  (SELECT seller_state,
              COUNT(seller_id) AS number_of_sellers
       FROM sellers
       GROUP BY seller_state) AS nos
LIMIT 10
 * postgresql://postgres:***@localhost/olist
10 rows affected.
seller_state number_of_sellers rank
SP 1849 1
PR 349 2
MG 244 3
SC 190 4
RJ 171 5
RS 129 6
GO 40 7
DF 30 8
ES 23 9
BA 19 10

Top 5 sellers by average shipping time

%%sql
WITH ship_interval AS(SELECT seller_id,
                          order_id,
                          EXTRACT(EPOCH FROM (order_delivered_carrier - order_purchase)::interval)/3600 AS delivery_interval,
                          EXTRACT(EPOCH FROM (shipping_limit_date - order_purchase)::interval)/3600 AS shipping_limit_interval
                      FROM(SELECT order_id,
                                  seller_id,
                                  order_purchase,
                                  order_delivered_carrier,
                                  shipping_limit_date
                           FROM order_items
                           JOIN orders USING(order_id)
                           JOIN sellers USING(seller_id)
                           WHERE order_status IN('delivered')) AS ship_time)
SELECT seller_id,
       AVG(delivery_interval)::real AS avg_del_time,
       AVG(shipping_limit_interval)::real AS ship_limit_time
FROM ship_interval
GROUP BY seller_id
ORDER BY avg_del_time 
LIMIT 5
 * postgresql://postgres:***@localhost/olist
5 rows affected.
seller_id avg_del_time ship_limit_time
89de2d6f23e9746ff309705b23581faa -413.78876 148.53955
1fa2d3def6adfa70e58c276bb64fe5bb -1.0680555 144.33945
1927cdb9fa74f5270097f6250e8e8f34 -0.6963889 96.12444
b19c48688808720822399ffa9f2dbe2f -0.48055556 144.17473
0f94588695d71662beec8d883ffacf09 1.2702777 144.14223

Categories per seller

%%sql
WITH seller_category AS(SELECT seller_id,
                               COUNT(product_category) AS num_of_cat
                        FROM sellers
                        JOIN order_items USING(seller_id)
                        JOIN products USING(product_id)
                        GROUP BY seller_id)
SELECT AVG(num_of_cat) AS avg_categories,
       percentile_cont(.5)
       WITHIN GROUP (ORDER BY num_of_cat) AS median_categories
FROM seller_category
LIMIT 10
 * postgresql://postgres:***@localhost/olist
1 rows affected.
avg_categories median_categories
35.8794830371567044 7.0

Average sales per order by seller

%%sql
SELECT seller_id,
       avg_order_rev,
       RANK() OVER(ORDER BY avg_order_rev DESC) AS rank
FROM(WITH order_unit AS(SELECT seller_id,
                               price,
                               COUNT(order_id) AS total_orders,
                               SUM(order_item_id) AS total_units
                        FROM sellers
                        JOIN order_items USING(seller_id)
                        GROUP BY seller_id, price),
    seller_unique AS(SELECT seller_id,
                            (price*total_units)::real AS revenue,
                            total_orders
                     FROM order_unit)

    SELECT seller_id,
           (revenue/total_orders)::real AS avg_order_rev
    FROM seller_unique) AS seller_avg
LIMIT 10
 * postgresql://postgres:***@localhost/olist
10 rows affected.
seller_id avg_order_rev rank
b37c4c02bda3161a7546a4e6d222d5b2 7560.0 1
e3b4998c7a498169dc7bce44e6bb6277 6735.0 2
80ceebb4ee9b31afb6c6a916a574a1e2 6729.0 3
ee27a8f15b1dded4d213a468ba4eb391 6499.0 4
59417c56835dd8e2e72f91f809cd4092 4799.0 5
59417c56835dd8e2e72f91f809cd4092 4690.0 6
c72de06d72748d1a0dfb2125be43ba63 4590.0 7
b37c4c02bda3161a7546a4e6d222d5b2 4475.0 8
512d298ac2a96d1931b6bd30aa21f61d 4399.87 9
0873d9f8f36123f8d910f4760e788cfb 4170.0 10

Top 5 sellers top 3 markets

%%sql
SELECT *
FROM(WITH seller_market AS(SELECT seller_id,
                             customer_state,
                             SUM(revenue) AS state_revenue
                      FROM(SELECT seller_id, 
                                  (price * order_item_id)::real AS revenue,
                                  customer_state
                           FROM sellers
                           JOIN order_items USING(seller_id)
                           JOIN orders USING(order_id)
                           JOIN customers USING(customer_id)) AS revenue_per_state
                           GROUP BY seller_id, customer_state)
SELECT seller_id, 
       customer_state,
       state_revenue,
       RANK() OVER(PARTITION BY seller_id ORDER BY state_revenue DESC)
FROM seller_market
WHERE seller_id IN(SELECT seller_id
                   FROM(SELECT seller_id,
                               SUM(revenue) AS total_rev
                        FROM(SELECT seller_id, 
                                    (order_item_id * price)::real AS revenue
                              FROM sellers
                              JOIN order_items USING(seller_id)) AS rev
                              GROUP BY seller_id
                              ORDER BY total_rev DESC) AS seller_rev
                              LIMIT 5)) AS rank_market_revenue
WHERE rank < 4
 * postgresql://postgres:***@localhost/olist
15 rows affected.
seller_id customer_state state_revenue rank
4869f7a5dfa277a7dca6462dcf3b52b2 SP 91269.52 1
4869f7a5dfa277a7dca6462dcf3b52b2 RJ 39960.938 2
4869f7a5dfa277a7dca6462dcf3b52b2 MG 24398.865 3
4a3ca9315b744ce9f8e9374361493884 SP 97923.86 1
4a3ca9315b744ce9f8e9374361493884 RJ 34929.03 2
4a3ca9315b744ce9f8e9374361493884 MG 28982.508 3
53243585a1d6dc2643021fd1853d8905 SP 58100.15 1
53243585a1d6dc2643021fd1853d8905 RJ 30554.172 2
53243585a1d6dc2643021fd1853d8905 BA 26564.33 3
7c67e1448b00f6e969d365cea6b010ab SP 115870.59 1
7c67e1448b00f6e969d365cea6b010ab RJ 61318.992 2
7c67e1448b00f6e969d365cea6b010ab BA 20978.963 3
da8622b14eb17ae2831f4ac5b9dab84a SP 93375.36 1
da8622b14eb17ae2831f4ac5b9dab84a MG 28836.824 2
da8622b14eb17ae2831f4ac5b9dab84a RJ 28352.11 3