diff --git a/conf/bkenv.sh b/conf/bkenv.sh index d3ef33e07af4a..e6b7af687fdf2 100644 --- a/conf/bkenv.sh +++ b/conf/bkenv.sh @@ -18,6 +18,8 @@ # under the License. # +# NOTE: this script is intentionally not executable. It is only meant to be sourced for environment variables. + # Set JAVA_HOME here to override the environment setting # JAVA_HOME= diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 0000000000000..5318d331ce9f4 --- /dev/null +++ b/docker/README.md @@ -0,0 +1,109 @@ + + +# Apache Pulsar Docker Images + +The Apache Pulsar community produces 2 docker images with each official release. + +* `apachepulsar/pulsar` - contains the necessary components for a working Pulsar cluster +* `apachepulsar/pulsar-all` - extends the `apachepulsar/pulsar` image by adding many Pulsar connectors and offloaders + +Since the 2.10.0 release, these docker images run as an unnamed, non-root user that is also part of the root group, by +default. This was done to increase container security. The user is part of the root group to ensure that the container +image can easily run on OpenShift and to ensure that the Pulsar process can write to configuration files. + +## Development + +You can build and test these docker images on your own machine by running the `./build.sh` script in this directory. +Note that you first must build the project in order to have the right dependencies in your local environment. + +## Building Derivative Custom Images + +If you find the `apachepulsar/pulsar-all` docker image too large, but you want to use a connector or an offloader, +you can easily build an image with a curated list of connectors or offloaders based on the official Apache Pulsar +images. You can use the following sample docker image as a guide: + +```Dockerfile +ARG VERSION + +# Load the pulsar-all image as a builder image +FROM apachepulsar/pulsar-all:${VERSION} as pulsar-all + +FROM apachepulsar/pulsar:${VERSION} + +# Add the cassandra connector +COPY --from=pulsar-all /pulsar/connectors/pulsar-io-cassandra-*.nar /pulsar/connectors + +# Add the jcloud offloader +COPY --from=pulsar-all /pulsar/connectors/tiered-storage-jcloud-*.nar /pulsar/offloaders +``` + +NOTE: the above example uses a wildcard in the `COPY` commands because argument expansion does not work for `COPY`. + +Assuming that you have the above `Dockerfile` in your local directory and are running docker on your local host, you can +run the following command to build a custom image with the cassandra connector and the jcloud offloader. + +```shell +docker build --build-arg VERSION=2.9.1 -t pulsar-custom:2.9.1 . +``` + +For reference, here are the sizes of the official 2.9.1 docker images and the custom image built from the above +`Dockerfile`: + +| REPOSITORY | TAG | SIZE | +| :---------------------- | :---- | :----- | +| apachepulsar/pulsar | 2.9.1 | 1.59GB | +| apachepulsar/pulsar-all | 2.9.1 | 3.44GB | +| pulsar-custom | 2.9.1 | 1.6GB | + + +## Troubleshooting non-root containers + +Troubleshooting is harder because the docker image runs as a non-root user. For example, a non-root user won't be able +to download arbitrary utilities. There are several ways to troubleshoot. + +One option is to build a custom docker image that includes your preferred debugging tools. Here is an example of adding +some tools to an existing docker image. + +```Dockerfile +FROM apachepulsar/pulsar:2.10.0 + +# Switch to root user to download tools +USER 0 + +# Install your preferred utilities +RUN apt-get update \ + && apt-get install -y vim net-tools unzip \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Assuming you still want to run as a non root user by default +USER 10000 +``` + +The remaining debug options depend on your environment. For example, if you have access to the host running your +container, you might be able to use the `docker exec` command to shell into the container. By using the `--user` +argument, you can run as the root user. + +If you're running your container on kubernetes, you can override the container's default user by setting the pod's +`securityContext`. + +Bitnami provides a helpful guide here: https://engineering.bitnami.com/articles/running-non-root-containers-on-openshift.html. \ No newline at end of file diff --git a/docker/pulsar/Dockerfile b/docker/pulsar/Dockerfile index ae0c399d26a0e..1a19220d45575 100644 --- a/docker/pulsar/Dockerfile +++ b/docker/pulsar/Dockerfile @@ -33,7 +33,13 @@ COPY scripts/pulsar-zookeeper-ruok.sh /pulsar/bin COPY scripts/watch-znode.py /pulsar/bin COPY scripts/install-pulsar-client.sh /pulsar/bin -RUN mkdir /pulsar/data +# In order to support running this docker image as a container on OpenShift +# the final image needs to give the root group sufficient permission. +# The file permissions are preserved when copying files from this builder image to the target image. +RUN chmod -R g+w /pulsar/conf +# Presto writes logs to this directory (at least during tests), so we need to give the process permission +# to create those log directories. This should be removed when presto is removed. +RUN chmod g+w /pulsar/lib/presto ### Create 2nd stage from Ubuntu image ### and add OpenJDK and Python dependencies (for Pulsar functions) @@ -57,6 +63,14 @@ RUN sed -i "s|http://archive\.ubuntu\.com/ubuntu/|${UBUNTU_MIRROR:-mirror://mirr RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 10 +# Pulsar currently writes to the below directories, assuming the default configuration. +# Note that number 4 is the reason that pulsar components need write access to the /pulsar directory. +# 1. /pulsar/data - both bookkeepers and zookeepers use this directory +# 2. /pulsar/logs - function workers write to this directory and pulsar-admin initializes this directory +# 3. /pulsar/download - functions write to this directory +# 4. /pulsar - hadoop writes to this directory +RUN mkdir /pulsar && chmod g+w /pulsar + ENV JAVA_HOME /usr/lib/jvm/java-11-openjdk-amd64 RUN echo networkaddress.cache.ttl=1 >> /usr/lib/jvm/java-11-openjdk-amd64/conf/security/java.security ADD target/python-client/ /pulsar/pulsar-client @@ -67,4 +81,8 @@ ENV PULSAR_ROOT_LOGGER=INFO,CONSOLE COPY --from=pulsar /pulsar /pulsar WORKDIR /pulsar +# This script is intentionally run as the root user to make the dependencies available for all UIDs. RUN /pulsar/bin/install-pulsar-client.sh + +# The UID must be non-zero. Otherwise, it is arbitrary. No logic should rely on its specific value. +USER 10000 diff --git a/site2/docs/getting-started-docker.md b/site2/docs/getting-started-docker.md index 148995140ecf3..0d8fb11b169bd 100644 --- a/site2/docs/getting-started-docker.md +++ b/site2/docs/getting-started-docker.md @@ -20,6 +20,7 @@ A few things to note about this command: * The data, metadata, and configuration are persisted on Docker volumes in order to not start "fresh" every time the container is restarted. For details on the volumes you can use `docker volume inspect ` * For Docker on Windows make sure to configure it to use Linux containers + * The docker container will run as UID 10000 and GID 0, by default. You'll need to ensure the mounted volumes give write permission to either UID 10000 or GID 0. Note that UID 10000 is arbitrary, so it is recommended to make these mounts writable for the root group (GID 0). If you start Pulsar successfully, you will see `INFO`-level log messages like this: diff --git a/tests/docker-images/latest-version-image/Dockerfile b/tests/docker-images/latest-version-image/Dockerfile index 7300daad721b5..06cc18462b82d 100644 --- a/tests/docker-images/latest-version-image/Dockerfile +++ b/tests/docker-images/latest-version-image/Dockerfile @@ -21,6 +21,7 @@ FROM apachepulsar/pulsar:latest as pulsar-function-go +# Use root for builder USER root RUN rm -rf /var/lib/apt/lists/* && apt-get update @@ -55,6 +56,16 @@ FROM apachepulsar/pulsar-all:latest as pulsar-all ######################################## FROM apachepulsar/pulsar:latest +# Switch to run as the root user to simplify building container and then running +# supervisord. Each of the pulsar components are spawned by supervisord and their +# process configuration files specify that the process will be run with UID 10000. +# However, any processes exec'ing into the containers will run as root, by default. +USER root + +# We need to define the user in order for supervisord to work correctly +# We don't need a user defined in the public docker image, though. +RUN adduser -u 10000 --gid 0 --disabled-login --disabled-password --gecos '' pulsar + RUN rm -rf /var/lib/apt/lists/* && apt update RUN apt-get clean && apt-get update && apt-get install -y supervisor vim procps curl diff --git a/tests/docker-images/latest-version-image/conf/bookie.conf b/tests/docker-images/latest-version-image/conf/bookie.conf index f65a37c823795..97d2c72074831 100644 --- a/tests/docker-images/latest-version-image/conf/bookie.conf +++ b/tests/docker-images/latest-version-image/conf/bookie.conf @@ -24,3 +24,4 @@ stdout_logfile=/var/log/pulsar/bookie.log directory=/pulsar environment=PULSAR_MEM="-Xmx128M -XX:MaxDirectMemorySize=512M",PULSAR_GC="-XX:+UseG1GC" command=/pulsar/bin/pulsar bookie +user=pulsar diff --git a/tests/docker-images/latest-version-image/conf/broker.conf b/tests/docker-images/latest-version-image/conf/broker.conf index f8bf8e761398f..966c2545677b6 100644 --- a/tests/docker-images/latest-version-image/conf/broker.conf +++ b/tests/docker-images/latest-version-image/conf/broker.conf @@ -24,4 +24,4 @@ stdout_logfile=/var/log/pulsar/broker.log directory=/pulsar environment=PULSAR_MEM="-Xmx128M",PULSAR_GC="-XX:+UseG1GC" command=/pulsar/bin/pulsar broker - +user=pulsar diff --git a/tests/docker-images/latest-version-image/conf/functions_worker.conf b/tests/docker-images/latest-version-image/conf/functions_worker.conf index 3610b03251884..9b2890c8a2df0 100644 --- a/tests/docker-images/latest-version-image/conf/functions_worker.conf +++ b/tests/docker-images/latest-version-image/conf/functions_worker.conf @@ -24,4 +24,4 @@ stdout_logfile=/var/log/pulsar/functions_worker.log directory=/pulsar environment=PULSAR_MEM="-Xmx128M",PULSAR_GC="-XX:+UseG1GC" command=/pulsar/bin/pulsar functions-worker - +user=pulsar diff --git a/tests/docker-images/latest-version-image/conf/global-zk.conf b/tests/docker-images/latest-version-image/conf/global-zk.conf index bf56c5b62882a..dcc7701ce8c43 100644 --- a/tests/docker-images/latest-version-image/conf/global-zk.conf +++ b/tests/docker-images/latest-version-image/conf/global-zk.conf @@ -24,4 +24,4 @@ stdout_logfile=/var/log/pulsar/global-zk.log directory=/pulsar environment=PULSAR_MEM="-Xmx128M",PULSAR_GC="-XX:+UseG1GC" command=/pulsar/bin/pulsar configuration-store - +user=pulsar diff --git a/tests/docker-images/latest-version-image/conf/local-zk.conf b/tests/docker-images/latest-version-image/conf/local-zk.conf index 57681937791be..50ba294074d97 100644 --- a/tests/docker-images/latest-version-image/conf/local-zk.conf +++ b/tests/docker-images/latest-version-image/conf/local-zk.conf @@ -24,4 +24,4 @@ stdout_logfile=/var/log/pulsar/local-zk.log directory=/pulsar environment=PULSAR_MEM="-Xmx128M",PULSAR_GC="-XX:+UseG1GC" command=/pulsar/bin/pulsar zookeeper - +user=pulsar diff --git a/tests/docker-images/latest-version-image/conf/presto_worker.conf b/tests/docker-images/latest-version-image/conf/presto_worker.conf index 28e3c36661e09..3ddb11278b6b8 100644 --- a/tests/docker-images/latest-version-image/conf/presto_worker.conf +++ b/tests/docker-images/latest-version-image/conf/presto_worker.conf @@ -23,4 +23,5 @@ redirect_stderr=true stdout_logfile=/var/log/pulsar/presto_worker.log directory=/pulsar environment=PULSAR_MEM="-Xmx128M",PULSAR_GC="-XX:+UseG1GC" -command=/pulsar/bin/pulsar sql-worker start \ No newline at end of file +command=/pulsar/bin/pulsar sql-worker start +user=pulsar diff --git a/tests/docker-images/latest-version-image/conf/proxy.conf b/tests/docker-images/latest-version-image/conf/proxy.conf index 8bc1a53fe4fa8..9e14a0e934bed 100644 --- a/tests/docker-images/latest-version-image/conf/proxy.conf +++ b/tests/docker-images/latest-version-image/conf/proxy.conf @@ -24,4 +24,4 @@ stdout_logfile=/var/log/pulsar/proxy.log directory=/pulsar environment=PULSAR_MEM="-Xmx128M",PULSAR_GC="-XX:+UseG1GC" command=/pulsar/bin/pulsar proxy - +user=pulsar