diff --git a/Dockerfile b/Dockerfile index c0dfb3e..a0c315a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,31 +1,23 @@ -FROM openjdk:16-slim - -# Lifted from: https://github.com/joshuarobinson/presto-on-k8s/blob/1c91f0b97c3b7b58bdcdec5ad6697b42e50d74c7/hive_metastore/Dockerfile +# Inspired from: https://github.com/joshuarobinson/presto-on-k8s/blob/1c91f0b97c3b7b58bdcdec5ad6697b42e50d74c7/hive_metastore/Dockerfile # see https://hadoop.apache.org/releases.html -ARG HADOOP_VERSION=3.3.0 +ARG HADOOP_VERSION=3.4.0 # see https://downloads.apache.org/hive/ ARG HIVE_METASTORE_VERSION=3.0.0 -# see https://jdbc.postgresql.org/download.html#current -ARG POSTGRES_CONNECTOR_VERSION=42.2.18 +# see https://jdbc.postgresql.org/download +ARG POSTGRES_CONNECTOR_VERSION=42.7.3 -# Set necessary environment variables. -ENV HADOOP_HOME="/opt/hadoop" -ENV PATH="/opt/spark/bin:/opt/hadoop/bin:${PATH}" -ENV DATABASE_DRIVER=org.postgresql.Driver -ENV DATABASE_TYPE=postgres -ENV DATABASE_TYPE_JDBC=postgresql -ENV DATABASE_PORT=5432 +FROM debian:12-slim AS fetcher -WORKDIR /app -SHELL ["/bin/bash", "-o", "pipefail", "-c"] +ARG HADOOP_VERSION +ARG HIVE_METASTORE_VERSION +ARG POSTGRES_CONNECTOR_VERSION -# hadolint ignore=DL3008,SC2086 +# hadolint ignore=DL3008 RUN \ echo "Install OS dependencies" && \ - build_deps="curl" && \ apt-get update -y && \ - apt-get install -y $build_deps net-tools --no-install-recommends && \ + apt-get install -y ca-certificates curl net-tools --no-install-recommends && \ echo "Download and extract the Hadoop binary package" && \ curl https://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz \ | tar xvz -C /opt/ && \ @@ -45,11 +37,28 @@ RUN \ echo "Download and install the database connector" && \ curl -L https://jdbc.postgresql.org/download/postgresql-$POSTGRES_CONNECTOR_VERSION.jar --output /opt/postgresql-$POSTGRES_CONNECTOR_VERSION.jar && \ ln -s /opt/postgresql-$POSTGRES_CONNECTOR_VERSION.jar /opt/hadoop/share/hadoop/common/lib/ && \ - ln -s /opt/postgresql-$POSTGRES_CONNECTOR_VERSION.jar /opt/hive-metastore/lib/ && \ - echo "Purge build artifacts" && \ - apt-get purge -y --auto-remove $build_deps && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* + ln -s /opt/postgresql-$POSTGRES_CONNECTOR_VERSION.jar /opt/hive-metastore/lib/ + +FROM gcr.io/distroless/java21-debian12:latest + +ARG HADOOP_VERSION +ARG HIVE_METASTORE_VERSION + +COPY --from=fetcher /bin/netstat /bin/netstat +COPY --from=fetcher /opt/hadoop-$HADOOP_VERSION /opt/hadoop-$HADOOP_VERSION +COPY --from=fetcher /opt/hadoop /opt/hadoop +COPY --from=fetcher /opt/hive-metastore /opt/hive-metastore +COPY --from=fetcher /opt/apache-hive-metastore-$HIVE_METASTORE_VERSION-bin/lib/ /opt/apache-hive-metastore-$HIVE_METASTORE_VERSION-bin/lib/ + +WORKDIR /app + +# Set necessary environment variables. +ENV HADOOP_HOME="/opt/hadoop" +ENV PATH="/opt/spark/bin:/opt/hadoop/bin:${PATH}" +ENV DATABASE_DRIVER=org.postgresql.Driver +ENV DATABASE_TYPE=postgres +ENV DATABASE_TYPE_JDBC=postgresql +ENV DATABASE_PORT=5432 COPY run.sh run.sh