Skip to content

Commit

Permalink
feat(script): generate tpch data set
Browse files Browse the repository at this point in the history
  • Loading branch information
xudong963 committed Jun 16, 2022
1 parent 7de80ae commit f98f909
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,6 @@ venv/
__pycache__/

*.zip

# tpch data set
tpch/data
1 change: 1 addition & 0 deletions .licenserc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ header:
- "website"
- "tests"
- "tools"
- "tpch"
# Ignore hidden files
- ".cargo"
- ".databend"
Expand Down
5 changes: 5 additions & 0 deletions tpch/run-tpch-dbgen.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash

cd /tpch-dbgen
./dbgen -vf -s 1
mv *.tbl /data
14 changes: 14 additions & 0 deletions tpch/tpch-data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash

# Construct a docker imagine to generate tpch-data
docker build -f tpchdata.dockerfile -t databend:latest .

# Generate data into the ./data directory if it does not already exist
FILE=./data/customer.tbl
if test -f "$FILE"; then
echo "$FILE exists."
else
mkdir data 2>/dev/null
docker run -v `pwd`/data:/data --rm databend:latest
ls -l data
fi
14 changes: 14 additions & 0 deletions tpch/tpchdata.dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
FROM ubuntu:22.04

RUN apt-get update && \
apt-get install -y git build-essential

# Use https://github.com/databricks/tpch-dbgen to generate data
RUN git clone https://github.com/databricks/tpch-dbgen.git && cd tpch-dbgen && make

WORKDIR /tpch-dbgen
ADD run-tpch-dbgen.sh /tpch-dbgen/

VOLUME /data

ENTRYPOINT [ "bash", "./run-tpch-dbgen.sh" ]

0 comments on commit f98f909

Please sign in to comment.