Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use base image Docker image with Kafka and build other images on top of it #20

Merged
merged 11 commits into from
Oct 9, 2017
Merged
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
SUBDIRS=kafka-inmemory kafka-statefulsets kafka-connect
SUBDIRS=kafka-base zookeeper kafka-inmemory kafka-statefulsets kafka-connect

all: $(SUBDIRS)

Expand Down
29 changes: 29 additions & 0 deletions kafka-base/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
FROM fedora:26

RUN dnf -y install java-1.8.0-openjdk gettext hostname && dnf clean all -y

# set Kafka home folder
ENV KAFKA_HOME=/opt/kafka

# Add kafka group / user
RUN groupadd -r -g 1001 kafka && useradd -r -m -u 1001 -g kafka kafka

# Set Scala and Kafka version
ENV SCALA_VERSION=2.11
ENV KAFKA_VERSION=0.11.0.1

# Set Kafka MD5checksum
ENV CHECKSUM="C8FD6521EC8D414687C7471524009A8A kafka_2.11-0.11.0.1.tgz"

# Set from build args
ARG version=latest
ENV VERSION ${version}

WORKDIR $KAFKA_HOME

# downloading/extracting Apache Kafka
RUN curl -O http://www.eu.apache.org/dist/kafka/$KAFKA_VERSION/kafka_$SCALA_VERSION-$KAFKA_VERSION.tgz \
&& echo $CHECKSUM > kafka_$SCALA_VERSION-$KAFKA_VERSION.tgz.md5 \
&& md5sum --check kafka_$SCALA_VERSION-$KAFKA_VERSION.tgz.md5 \
&& tar xvfz kafka_$SCALA_VERSION-$KAFKA_VERSION.tgz -C $KAFKA_HOME --strip-components=1 \
&& rm -f kafka_$SCALA_VERSION-$KAFKA_VERSION.tgz*
5 changes: 5 additions & 0 deletions kafka-base/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
PROJECT_NAME=kafka-base

include ../Makefile.common

.PHONY: build clean
26 changes: 4 additions & 22 deletions kafka-connect/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,28 +1,10 @@
FROM fedora:25

RUN dnf -y install which java-1.8.0-openjdk libaio python gettext hostname iputils wget && dnf clean all -y

# set Scala and Kafka version
ENV SCALA_VERSION=2.11
ENV KAFKA_VERSION=0.11.0.1
# set Kafka home folder
ENV KAFKA_HOME=/opt/kafka

# Set from build args
ARG version=latest
ENV VERSION ${version}

# downloading/extracting Apache Kafka
RUN wget http://www.eu.apache.org/dist/kafka/$KAFKA_VERSION/kafka_$SCALA_VERSION-$KAFKA_VERSION.tgz

RUN mkdir $KAFKA_HOME \
&& tar xvfz kafka_$SCALA_VERSION-$KAFKA_VERSION.tgz -C $KAFKA_HOME --strip-components=1 \
&& mkdir $KAFKA_HOME/plugins

WORKDIR $KAFKA_HOME
FROM enmasseproject/kafka-base:latest

EXPOSE 8083

# copy scripts for starting Kafka Connect
COPY ./scripts/ $KAFKA_HOME

USER kafka:kafka

ENTRYPOINT ["/opt/kafka/kafka_connect_run.sh"]
8 changes: 1 addition & 7 deletions kafka-connect/resources/kafka-connect.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,7 @@ spec:
protocol: TCP
env:
- name: KAFKA_CONNECT_BOOTSTRAP_SERVERS
value: "${KAFKA_CONNECT_BOOTSTRAP_SERVERS}"
- name: KAFKA_CONNECT_GROUP_ID
value: "${KAFKA_CONNECT_GROUP_ID}"
- name: KAFKA_CONNECT_KEY_CONVERTER
value: "${KAFKA_CONNECT_KEY_CONVERTER}"
- name: KAFKA_CONNECT_VALUE_CONVERTER
value: "${KAFKA_CONNECT_VALUE_CONVERTER}"
value: "kafka:9092"
livenessProbe:
httpGet:
path: /
Expand Down
19 changes: 11 additions & 8 deletions kafka-connect/scripts/kafka_connect_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,35 +6,35 @@ if [ "$1" = 'bash' ]; then
exec "$@"
else
if [ -z "$KAFKA_CONNECT_BOOTSTRAP_SERVERS" ]; then
KAFKA_CONNECT_BOOTSTRAP_SERVERS="kafka:9092"
export KAFKA_CONNECT_BOOTSTRAP_SERVERS="kafka:9092"
fi

if [ -z "$KAFKA_CONNECT_GROUP_ID" ]; then
KAFKA_CONNECT_GROUP_ID="connect-cluster"
export KAFKA_CONNECT_GROUP_ID="connect-cluster"
fi

if [ -z "$KAFKA_CONNECT_OFFSET_STORAGE_TOPIC" ]; then
KAFKA_CONNECT_OFFSET_STORAGE_TOPIC="${KAFKA_CONNECT_GROUP_ID}-offsets"
export KAFKA_CONNECT_OFFSET_STORAGE_TOPIC="${KAFKA_CONNECT_GROUP_ID}-offsets"
fi

if [ -z "$KAFKA_CONNECT_CONFIG_STORAGE_TOPIC" ]; then
KAFKA_CONNECT_CONFIG_STORAGE_TOPIC="${KAFKA_CONNECT_GROUP_ID}-configs"
export KAFKA_CONNECT_CONFIG_STORAGE_TOPIC="${KAFKA_CONNECT_GROUP_ID}-configs"
fi

if [ -z "$KAFKA_CONNECT_STATUS_STORAGE_TOPIC" ]; then
KAFKA_CONNECT_STATUS_STORAGE_TOPIC="${KAFKA_CONNECT_GROUP_ID}-status"
export KAFKA_CONNECT_STATUS_STORAGE_TOPIC="${KAFKA_CONNECT_GROUP_ID}-status"
fi

if [ -z "$KAFKA_CONNECT_KEY_CONVERTER" ]; then
KAFKA_CONNECT_KEY_CONVERTER="org.apache.kafka.connect.json.JsonConverter"
export KAFKA_CONNECT_KEY_CONVERTER="org.apache.kafka.connect.json.JsonConverter"
fi

if [ -z "$KAFKA_CONNECT_VALUE_CONVERTER" ]; then
KAFKA_CONNECT_VALUE_CONVERTER="org.apache.kafka.connect.json.JsonConverter"
export KAFKA_CONNECT_VALUE_CONVERTER="org.apache.kafka.connect.json.JsonConverter"
fi

if [ -z "$KAFKA_CONNECT_PLUGIN_PATH" ]; then
KAFKA_CONNECT_PLUGIN_PATH="${KAFKA_HOME}/plugins"
export KAFKA_CONNECT_PLUGIN_PATH="${KAFKA_HOME}/plugins"
fi

# Write the config file
Expand All @@ -59,6 +59,9 @@ EOF
cat /tmp/barnabas-connect.properties
echo ""

# dir for saving application logs
export LOG_DIR=/tmp/logs
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does anything actually read LOG_DIR? AFAICS, it's only KAFKA_LOG_DIRS which is actually used.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This affects the logs (as in Log4j logs). Before I moved this to the /tmp directory Kafka Connect was complaining that it cannot create the logs directory in /opt/kafka (where the kafka user has no write access). This should stay there to avoid the error and should be removed with #21 once we stop logging to files.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, thanks. I noticed only after I commented that you'd mentioned this problem in the PR description.


# starting Kafka server with final configuration
exec $KAFKA_HOME/bin/connect-distributed.sh /tmp/barnabas-connect.properties
fi
28 changes: 6 additions & 22 deletions kafka-inmemory/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,26 +1,10 @@
FROM fedora:25
FROM enmasseproject/kafka-base:latest

RUN dnf -y install which java-1.8.0-openjdk libaio python gettext hostname iputils wget && dnf clean all -y
EXPOSE 9092

# set Scala and Kafka version
ENV SCALA_VERSION=2.11
ENV KAFKA_VERSION=0.11.0.0
# set Kafka home folder
ENV KAFKA_HOME=/opt/kafka

# Set from build args
ARG version=latest
ENV VERSION ${version}

# downloading/extracting Apache Kafka
RUN wget http://www.eu.apache.org/dist/kafka/$KAFKA_VERSION/kafka_$SCALA_VERSION-$KAFKA_VERSION.tgz

RUN mkdir $KAFKA_HOME \
&& tar xvfz kafka_$SCALA_VERSION-$KAFKA_VERSION.tgz -C $KAFKA_HOME --strip-components=1

WORKDIR $KAFKA_HOME

# copy template configuration files
COPY ./config/zookeeper.properties.template $KAFKA_HOME/config/
# copy scripts for starting Kafka and Zookeeper
COPY ./scripts/ $KAFKA_HOME

USER kafka:kafka

ENTRYPOINT ["/opt/kafka/kafka_run.sh"]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Genuine question: Why is it better to use a run script like this than put the command line in the .yml for the replicaset/statefulset?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My two cents ...

  • Leaving Kubernetes / Openshift aside ... setting properly the ENTRYPOINT / CMD in docker image makes it a lot easier for users to run the image. They can just type docker run ... without searching for the rights script to execute.
  • In the Openshift / Kubernetes yaml files we already use the Zookeeper image on two places. So instead of specifying the zookeeper_run.sh command once in the Docker image we would already have the command duplicated twice in the repo.
  • I think it leads a bit to cleaner design of the Docker images. If you have a single purpose docker images than it should be easy for you to have a single entrypoint and the command is not needed.

4 changes: 2 additions & 2 deletions kafka-inmemory/resources/kafka.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@ spec:
labels:
name: kafka
spec:
securityContext:
fsGroup: 1001
containers:
- name: kafka
image: "enmasseproject/kafka-inmemory:latest"
ports:
- name: kafka
containerPort: 9092
protocol: TCP
command:
- ./kafka_run.sh
volumeMounts:
- name: kafka-storage
mountPath: /tmp/kafka/
Expand Down
6 changes: 3 additions & 3 deletions kafka-inmemory/resources/zookeeper.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@ spec:
labels:
name: zookeeper
spec:
securityContext:
fsGroup: 1001
containers:
- name: zookeeper
image: "enmasseproject/kafka-inmemory:latest"
image: "enmasseproject/zookeeper:latest"
ports:
- name: clientport
containerPort: 2181
protocol: TCP
command:
- ./zookeeper_run.sh
volumeMounts:
- name: zookeeper-storage
mountPath: /tmp/zookeeper
Expand Down
21 changes: 0 additions & 21 deletions kafka-inmemory/scripts/zookeeper_run.sh

This file was deleted.

28 changes: 6 additions & 22 deletions kafka-statefulsets/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,26 +1,10 @@
FROM fedora:25
FROM enmasseproject/kafka-base:latest

RUN dnf -y install which java-1.8.0-openjdk libaio python gettext hostname iputils wget && dnf clean all -y
EXPOSE 9092

# set Scala and Kafka version
ENV SCALA_VERSION=2.11
ENV KAFKA_VERSION=0.11.0.0
# set Kafka home folder
ENV KAFKA_HOME=/opt/kafka

# Set from build args
ARG version=latest
ENV VERSION ${version}

# downloading/extracting Apache Kafka
RUN wget http://www.eu.apache.org/dist/kafka/$KAFKA_VERSION/kafka_$SCALA_VERSION-$KAFKA_VERSION.tgz

RUN mkdir $KAFKA_HOME \
&& tar xvfz kafka_$SCALA_VERSION-$KAFKA_VERSION.tgz -C $KAFKA_HOME --strip-components=1
# copy scripts for starting Kafka and Zookeeper
COPY ./scripts/ $KAFKA_HOME

WORKDIR $KAFKA_HOME
USER kafka:kafka

# copy template configuration files
COPY ./config/zookeeper.properties.template $KAFKA_HOME/config/
# copy scripts for starting Kafka
COPY ./scripts/ $KAFKA_HOME
ENTRYPOINT ["/opt/kafka/kafka_run.sh"]
25 changes: 0 additions & 25 deletions kafka-statefulsets/config/zookeeper.properties.template

This file was deleted.

2 changes: 0 additions & 2 deletions kafka-statefulsets/resources/hostpath-provisioner/kafka.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ spec:
- name: kafka
containerPort: 9092
protocol: TCP
command:
- /opt/kafka/kafka_run.sh
volumeMounts:
- name: kafka-storage
mountPath: /tmp/kafka/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ spec:
- name: clientport
containerPort: 2181
protocol: TCP
command:
- ./zookeeper_run.sh
volumeMounts:
- name: zookeeper-storage
mountPath: /tmp/zookeeper
Expand Down
11 changes: 9 additions & 2 deletions kafka-statefulsets/resources/kafka.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,22 @@ spec:
labels:
name: kafka
spec:
securityContext:
fsGroup: 1001
initContainers:
- name: volume-mount-hack
image: busybox
command: ["sh", "-c", "chown -R 1001:1001 /tmp/kafka/"]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we putting the kafka data in /tmp, would somwhere under /var be more conventional?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both Zookeeper and Kafka data dirs are not in /var/zookeeper and /var/kafka.

volumeMounts:
- name: kafka-storage
mountPath: /tmp/kafka/
containers:
- name: kafka
image: "enmasseproject/kafka-statefulsets:latest"
ports:
- name: kafka
containerPort: 9092
protocol: TCP
command:
- /opt/kafka/kafka_run.sh
volumeMounts:
- name: kafka-storage
mountPath: /tmp/kafka/
Expand Down
18 changes: 9 additions & 9 deletions kafka-statefulsets/resources/openshift-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,14 @@ parameters:
displayName: Repository Name
name: IMAGE_REPO_NAME
value: enmasseproject
- description: Image name
displayName: Image Name
name: IMAGE_NAME
- description: Kafka image name
displayName: Kafka image Name
name: KAFKA_IMAGE_NAME
value: kafka-statefulsets
- description: Zookeeper image name
displayName: Zookeeper image Name
name: ZOOKEEPER_IMAGE_NAME
value: zookeeper
- description: Kafka Version
displayName: Kafka Version
name: KAFKA_VERSION
Expand Down Expand Up @@ -103,10 +107,8 @@ objects:
spec:
containers:
- name: kafka
image: ${IMAGE_REPO_NAME}/${IMAGE_NAME}:latest
image: ${IMAGE_REPO_NAME}/${KAFKA_IMAGE_NAME}:latest
imagePullPolicy: Always
command:
- /opt/kafka/kafka_run.sh
ports:
- containerPort: 9092
name: kafka
Expand Down Expand Up @@ -147,10 +149,8 @@ objects:
spec:
containers:
- name: zookeeper
image: ${IMAGE_REPO_NAME}/${IMAGE_NAME}:latest
image: ${IMAGE_REPO_NAME}/${ZOOKEEPER_IMAGE_NAME}:latest
imagePullPolicy: Always
command:
- ./zookeeper_run.sh
ports:
- containerPort: 2181
name: clientport
Expand Down
Loading