Skip to content

Commit

Permalink
feat(script): generate tpch data set
Browse files Browse the repository at this point in the history
  • Loading branch information
xudong963 committed Jun 16, 2022
1 parent 7de80ae commit 5329fc3
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,6 @@ venv/
__pycache__/

*.zip

# tpch data set
tpch/data
7 changes: 7 additions & 0 deletions tpch/run-tpch-dbgen.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash
# Copyright 2020-2021 The Databend Authors.
# SPDX-License-Identifier: Apache-2.0.

cd /tpch-dbgen
./dbgen -vf -s 1
mv *.tbl /data
16 changes: 16 additions & 0 deletions tpch/tpch-data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash
# Copyright 2020-2021 The Databend Authors.
# SPDX-License-Identifier: Apache-2.0.

# Construct a docker imagine to generate tpch-data
docker build -f tpchdata.dockerfile -t databend:latest .

# Generate data into the ./data directory if it does not already exist
FILE=./data/customer.tbl
if test -f "$FILE"; then
echo "$FILE exists."
else
mkdir data 2>/dev/null
docker run -v `pwd`/data:/data --rm databend:latest
ls -l data
fi
17 changes: 17 additions & 0 deletions tpch/tpchdata.dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright 2020-2021 The Databend Authors.
# SPDX-License-Identifier: Apache-2.0.

FROM ubuntu:22.04

RUN apt-get update && \
apt-get install -y git build-essential

# Use https://github.com/databricks/tpch-dbgen to generate data
RUN git clone https://github.com/databricks/tpch-dbgen.git && cd tpch-dbgen && make

WORKDIR /tpch-dbgen
ADD run-tpch-dbgen.sh /tpch-dbgen/

VOLUME /data

ENTRYPOINT [ "bash", "./run-tpch-dbgen.sh" ]

0 comments on commit 5329fc3

Please sign in to comment.