Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

First 25 queries of TPC-DS #632

Merged
merged 1 commit into from
Dec 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions ydb/library/benchmarks/queries/tpcds/yql/q1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{% include 'header.sql.jinja' %}

-- NB: Subquerys
-- NB: Used YQL's named expressions instead of ANSI SQL's "with" statement.
$customer_total_return =
-- NB: Must use correlation names (table names) in group by. And then everywhere else...
(select a.sr_customer_sk as ctr_customer_sk
,a.sr_store_sk as ctr_store_sk
-- NB: renamed "SR_FEE" -> "sr_fee"
,sum(a.sr_fee) as ctr_total_return
from {{store_returns}} as a cross join {{date_dim}} as b
where sr_returned_date_sk = d_date_sk
and d_year =2000
group by a.sr_customer_sk
,a.sr_store_sk);

$avg_total_returns = (
select ctr_store_sk, avg(ctr_total_return)*1.2 as ctr_avg
from $customer_total_return
group by ctr_store_sk
);

-- -- start query 1 in stream 0 using template query1.tpl and seed 2031708268
select c_customer_id
from $customer_total_return ctr1
cross join {{store}}
cross join {{customer}}
-- NB: Rewrote inequality condition with explicit join
join $avg_total_returns ctr2 on ctr1.ctr_store_sk = ctr2.ctr_store_sk
where ctr_total_return > ctr_avg
and s_store_sk = ctr1.ctr_store_sk
and s_state = 'NM'
and ctr_customer_sk = c_customer_sk
order by c_customer_id
limit 100;

-- -- end query 1 in stream 0 using template query1.tpl
69 changes: 69 additions & 0 deletions ydb/library/benchmarks/queries/tpcds/yql/q10.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
{% include 'header.sql.jinja' %}

-- NB: Subquerys
$bla1 = (select DISTINCT ss_customer_sk
from {{store_sales}} as store_sales
cross join {{date_dim}} as date_dim
where ss_sold_date_sk = d_date_sk and
d_year = 2000 and
d_moy between 3 and 3+3);
$bla2 = (select DISTINCT customer_sk from ((select ws_bill_customer_sk customer_sk
from {{web_sales}} as web_sales
cross join {{date_dim}} as date_dim
where ws_sold_date_sk = d_date_sk and
d_year = 2000 and
d_moy between 3 ANd 3+3)
union all
(select cs_ship_customer_sk customer_sk
from {{catalog_sales}} as catalog_sales
cross join {{date_dim}} as date_dim
where
cs_sold_date_sk = d_date_sk and
d_year = 2000 and
d_moy between 3 and 3+3)));

-- start query 1 in stream 0 using template query10.tpl and seed 797269820
select
customer_demographics.cd_gender,
customer_demographics.cd_marital_status,
customer_demographics.cd_education_status,
count(*) cnt1,
customer_demographics.cd_purchase_estimate,
count(*) cnt2,
customer_demographics.cd_credit_rating,
count(*) cnt3,
customer_demographics.cd_dep_count,
count(*) cnt4,
customer_demographics.cd_dep_employed_count,
count(*) cnt5,
customer_demographics.cd_dep_college_count,
count(*) cnt6
from
{{customer}} c
cross join {{customer_address}} ca
cross join {{customer_demographics}} as customer_demographics
left semi join $bla1 bla1 on (c.c_customer_sk = bla1.ss_customer_sk)
left semi join $bla2 bla2 on (c.c_customer_sk = bla2.customer_sk)
where
c.c_current_addr_sk = ca.ca_address_sk and
ca_county in ('Fillmore County','McPherson County','Bonneville County','Boone County','Brown County') and
cd_demo_sk = c.c_current_cdemo_sk
group by customer_demographics.cd_gender,
customer_demographics.cd_marital_status,
customer_demographics.cd_education_status,
customer_demographics.cd_purchase_estimate,
customer_demographics.cd_credit_rating,
customer_demographics.cd_dep_count,
customer_demographics.cd_dep_employed_count,
customer_demographics.cd_dep_college_count
order by customer_demographics.cd_gender,
customer_demographics.cd_marital_status,
customer_demographics.cd_education_status,
customer_demographics.cd_purchase_estimate,
customer_demographics.cd_credit_rating,
customer_demographics.cd_dep_count,
customer_demographics.cd_dep_employed_count,
customer_demographics.cd_dep_college_count
limit 100;

-- end query 1 in stream 0 using template query10.tpl
86 changes: 86 additions & 0 deletions ydb/library/benchmarks/queries/tpcds/yql/q11.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
{% include 'header.sql.jinja' %}

-- NB: Subquerys

-- start query 1 in stream 0 using template query11.tpl and seed 1819994127
$year_total = (
select customer.c_customer_id customer_id
,customer.c_first_name customer_first_name
,customer.c_last_name customer_last_name
,customer.c_preferred_cust_flag customer_preferred_cust_flag
,customer.c_birth_country customer_birth_country
,customer.c_login customer_login
,customer.c_email_address customer_email_address
,date_dim.d_year dyear
,sum(ss_ext_list_price-ss_ext_discount_amt) year_total
,'s' sale_type
from {{customer}} as customer
cross join {{store_sales}} as store_sales
cross join {{date_dim}} as date_dim
where c_customer_sk = ss_customer_sk
and ss_sold_date_sk = d_date_sk
group by customer.c_customer_id
,customer.c_first_name
,customer.c_last_name
,customer.c_preferred_cust_flag
,customer.c_birth_country
,customer.c_login
,customer.c_email_address
,date_dim.d_year
union all
select customer.c_customer_id customer_id
,customer.c_first_name customer_first_name
,customer.c_last_name customer_last_name
,customer.c_preferred_cust_flag customer_preferred_cust_flag
,customer.c_birth_country customer_birth_country
,customer.c_login customer_login
,customer.c_email_address customer_email_address
,date_dim.d_year dyear
,sum(ws_ext_list_price-ws_ext_discount_amt) year_total
,'w' sale_type
from {{customer}} as customer
cross join {{web_sales}} as web_sales
cross join {{date_dim}} as date_dim
where c_customer_sk = ws_bill_customer_sk
and ws_sold_date_sk = d_date_sk
group by customer.c_customer_id
,customer.c_first_name
,customer.c_last_name
,customer.c_preferred_cust_flag
,customer.c_birth_country
,customer.c_login
,customer.c_email_address
,date_dim.d_year
);

select
t_s_secyear.customer_id
,t_s_secyear.customer_first_name
,t_s_secyear.customer_last_name
,t_s_secyear.customer_birth_country
from $year_total t_s_firstyear
cross join $year_total t_s_secyear
cross join $year_total t_w_firstyear
cross join $year_total t_w_secyear
where t_s_secyear.customer_id = t_s_firstyear.customer_id
and t_s_firstyear.customer_id = t_w_secyear.customer_id
and t_s_firstyear.customer_id = t_w_firstyear.customer_id
and t_s_firstyear.sale_type = 's'
and t_w_firstyear.sale_type = 'w'
and t_s_secyear.sale_type = 's'
and t_w_secyear.sale_type = 'w'
and t_s_firstyear.dyear = 1999
and t_s_secyear.dyear = 1999+1
and t_w_firstyear.dyear = 1999
and t_w_secyear.dyear = 1999+1
and t_s_firstyear.year_total > 0
and t_w_firstyear.year_total > 0
and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end
> case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end
order by t_s_secyear.customer_id
,t_s_secyear.customer_first_name
,t_s_secyear.customer_last_name
,t_s_secyear.customer_birth_country
limit 100;

-- end query 1 in stream 0 using template query11.tpl
36 changes: 36 additions & 0 deletions ydb/library/benchmarks/queries/tpcds/yql/q12.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{% include 'header.sql.jinja' %}

-- NB: Subquerys
-- start query 1 in stream 0 using template query12.tpl and seed 345591136
select item.i_item_id
,item.i_item_desc
,item.i_category
,item.i_class
,item.i_current_price
,sum(web_sales.ws_ext_sales_price) as itemrevenue
,sum(web_sales.ws_ext_sales_price)*100/sum(sum(web_sales.ws_ext_sales_price)) over
(partition by item.i_class) as revenueratio
from
{{web_sales}} as web_sales
cross join {{item}} as item
cross join {{date_dim}} as date_dim
where
web_sales.ws_item_sk = item.i_item_sk
and item.i_category in ('Electronics', 'Books', 'Women')
and web_sales.ws_sold_date_sk = date_dim.d_date_sk
and cast(date_dim.d_date as date) between cast('1998-01-06' as date)
and (cast('1998-01-06' as date) + cast('P30D' as interval))
group by
item.i_item_id
,item.i_item_desc
,item.i_category
,item.i_class
,item.i_current_price
order by
item.i_category
,item.i_class
,item.i_item_id
,item.i_item_desc
,revenueratio
limit 100;
-- end query 1 in stream 0 using template query12.tpl
56 changes: 56 additions & 0 deletions ydb/library/benchmarks/queries/tpcds/yql/q13.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
{% include 'header.sql.jinja' %}

-- NB: Subquerys

-- start query 1 in stream 0 using template query13.tpl and seed 622697896
select avg(ss_quantity)
,avg(ss_ext_sales_price)
,avg(ss_ext_wholesale_cost)
,sum(ss_ext_wholesale_cost)
from {{store_sales}} as store_sales
cross join {{store}} as store
cross join {{customer_demographics}} as customer_demographics
cross join {{household_demographics}} as household_demographics
cross join {{customer_address}} as customer_address
cross join {{date_dim}} as date_dim
where s_store_sk = ss_store_sk
and ss_sold_date_sk = d_date_sk and d_year = 2001
and((ss_hdemo_sk=hd_demo_sk
and cd_demo_sk = ss_cdemo_sk
and cd_marital_status = 'U'
and cd_education_status = 'Secondary'
and ss_sales_price between 100.00 and 150.00
and hd_dep_count = 3
)or
(ss_hdemo_sk=hd_demo_sk
and cd_demo_sk = ss_cdemo_sk
and cd_marital_status = 'W'
and cd_education_status = 'College'
and ss_sales_price between 50.00 and 100.00
and hd_dep_count = 1
) or
(ss_hdemo_sk=hd_demo_sk
and cd_demo_sk = ss_cdemo_sk
and cd_marital_status = 'D'
and cd_education_status = 'Primary'
and ss_sales_price between 150.00 and 200.00
and hd_dep_count = 1
))
and((ss_addr_sk = ca_address_sk
and ca_country = 'United States'
and ca_state in ('TX', 'OK', 'MI')
and ss_net_profit between 100 and 200
) or
(ss_addr_sk = ca_address_sk
and ca_country = 'United States'
and ca_state in ('WA', 'NC', 'OH')
and ss_net_profit between 150 and 300
) or
(ss_addr_sk = ca_address_sk
and ca_country = 'United States'
and ca_state in ('MT', 'FL', 'GA')
and ss_net_profit between 50 and 250
))
;

-- end query 1 in stream 0 using template query13.tpl
Loading
Loading