Azure · jafreck · May 16, 2018 · May 3, 2018 · May 3, 2018 · May 4, 2018
diff --git a/aztk/models/plugins/internal/plugin_manager.py b/aztk/models/plugins/internal/plugin_manager.py
@@ -23,6 +23,7 @@ class PluginManager:
         hdfs=plugins.HDFSPlugin,
         simple=plugins.SimplePlugin,
         spark_ui_proxy=plugins.SparkUIProxyPlugin,
+        openblas=plugins.OpenBLASPlugin,
     )
 
     def __init__(self):

diff --git a/aztk/spark/models/plugins/__init__.py b/aztk/spark/models/plugins/__init__.py
@@ -5,3 +5,4 @@
 from .rstudio_server import RStudioServerPlugin
 from .simple import SimplePlugin
 from .spark_ui_proxy import SparkUIProxyPlugin
+from .openblas import OpenBLASPlugin
diff --git a/aztk/spark/models/plugins/openblas/__init__.py b/aztk/spark/models/plugins/openblas/__init__.py
@@ -0,0 +1 @@
+from .configuration import *
diff --git a/aztk/spark/models/plugins/openblas/configuration.py b/aztk/spark/models/plugins/openblas/configuration.py
@@ -0,0 +1,18 @@
+import os
+from aztk.models.plugins.plugin_configuration import PluginConfiguration, PluginPort, PluginTargetRole
+from aztk.models.plugins.plugin_file import PluginFile
+from aztk.utils import constants
+
+dir_path = os.path.dirname(os.path.realpath(__file__))
+
+class OpenBLASPlugin(PluginConfiguration):
+    def __init__(self):
+        super().__init__(
+            name="openblas",
+            ports=[],
+            target_role=PluginTargetRole.All,
+            execute="openblas.sh",
+            files=[
+                PluginFile("openblas.sh", os.path.join(dir_path, "openblas.sh")),
+            ],
+        )
diff --git a/aztk/spark/models/plugins/openblas/openblas.sh b/aztk/spark/models/plugins/openblas/openblas.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+apt-get update
+apt-get install -y libopenblas-base
+update-alternatives --config libblas.so.3
diff --git a/docker-image/gpu/spark1.6.3/Dockerfile b/docker-image/gpu/spark1.6.3/Dockerfile
@@ -1,4 +1,159 @@
-FROM  aztk/spark:v0.1.0-spark1.6.3-base
+# Ubuntu 16.04 (Xenial)
+FROM ubuntu:16.04
+
+# set AZTK version compatibility
+ENV AZTK_DOCKER_IMAGE_VERSION 0.1.0
+
+# set version of python required for aztk
+ENV AZTK_PYTHON_VERSION=3.5.2
+
+# modify these ARGs on build time to specify your desired versions of Spark/Hadoop
+ENV SPARK_VERSION_KEY 1.6.3
+ENV SPARK_FULL_VERSION spark-${SPARK_VERSION_KEY}-bin-without-hadoop
+ENV HADOOP_VERSION 2.8.3
+ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
+
+# set env vars
+ENV JAVA_HOME /usr/lib/jvm/java-1.8.0-openjdk-amd64
+ENV SPARK_HOME /home/spark-current
+ENV PATH $SPARK_HOME/bin:$PATH
+
+RUN apt-get clean \
+    && apt-get update -y \
+    # install dependency packages
+    && apt-get install -y --no-install-recommends \
+       make \
+       build-essential \
+       zlib1g-dev \
+       libssl-dev \
+       libbz2-dev \
+       libreadline-dev \
+       libsqlite3-dev \
+       maven \
+       wget \
+       curl \
+       llvm \
+       git \
+       libncurses5-dev \
+       libncursesw5-dev \
+       python3-pip \
+       python3-venv \
+       xz-utils \
+       tk-dev \
+    && apt-get update -y \
+    # install [software-properties-common]
+    # so we can use [apt-add-repository] to add the repository [ppa:webupd8team/java]
+    # from which we install Java8
+    && apt-get install -y --no-install-recommends software-properties-common \
+    && apt-add-repository ppa:webupd8team/java -y \
+    && apt-get update -y \
+    # install java
+    && apt-get install -y --no-install-recommends default-jdk \
+    # set up user python and aztk python
+    && ln -s /usr/bin/python3.5 /usr/bin/python \
+    && /usr/bin/python -m pip install --upgrade pip setuptools wheel \
+    && apt-get remove -y python3-pip \
+    # build and install spark
+    && git clone https://github.com/apache/spark.git \
+    && cd spark \
+    && git checkout tags/v${SPARK_VERSION_KEY} \
+    && export MAVEN_OPTS="-Xmx3g -XX:ReservedCodeCacheSize=1024m" \
+    && ./make-distribution.sh --name custom-spark --tgz -Pnetlib-lgpl -Phive -Phive-thriftserver -Dhadoop.version=${HADOOP_VERSION} -Phadoop-2.6 -DskipTests \
+    && tar -xvzf /spark/spark-${SPARK_VERSION_KEY}-bin-custom-spark.tgz --directory=/home \
+    && ln -s "/home/spark-${SPARK_VERSION_KEY}-bin-custom-spark" /home/spark-current \
+    && rm -rf /spark \ 
+    # copy azure storage jars and dependencies to $SPARK_HOME/jars
+    && echo "<project>" \
+                    "<modelVersion>4.0.0</modelVersion>" \
+                    "<groupId>groupId</groupId>" \
+                    "<artifactId>artifactId</artifactId>" \
+                    "<version>1.0</version>" \
+                "<dependencies>" \
+                    "<dependency>" \
+                        "<groupId>org.apache.hadoop</groupId>" \
+                        "<artifactId>hadoop-azure-datalake</artifactId>" \
+                        "<version>${HADOOP_VERSION}</version>" \
+                    "<exclusions>" \
+                        "<exclusion>" \
+                            "<groupId>org.apache.hadoop</groupId>" \
+                            "<artifactId>hadoop-common</artifactId>" \
+                        "</exclusion>" \
+                    "</exclusions> " \
+                    "</dependency>" \
+                    "<dependency>" \
+                        "<groupId>org.apache.hadoop</groupId>" \
+                        "<artifactId>hadoop-azure</artifactId>" \
+                        "<version>${HADOOP_VERSION}</version>" \
+                    "<exclusions>" \
+                        "<exclusion>" \
+                            "<groupId>org.apache.hadoop</groupId>" \
+                            "<artifactId>hadoop-common</artifactId>" \
+                        "</exclusion>" \
+                        "<exclusion>" \
+                            "<groupId>com.fasterxml.jackson.core</groupId>" \
+                            "<artifactId>jackson-core</artifactId>" \
+                        "</exclusion>" \
+                    "</exclusions> " \
+                    "</dependency>" \
+                    "<dependency>" \
+                        "<groupId>com.microsoft.sqlserver</groupId>" \
+                        "<artifactId>mssql-jdbc</artifactId>" \
+                        "<version>6.4.0.jre8</version>" \
+                    "</dependency>" \
+                    "<dependency>" \
+                        "<groupId>com.microsoft.azure</groupId>" \
+                        "<artifactId>azure-storage</artifactId>" \
+                        "<version>2.2.0</version>" \
+                        "<exclusions>" \
+                            "<exclusion>" \
+                                "<groupId>com.fasterxml.jackson.core</groupId>" \
+                                "<artifactId>jackson-core</artifactId>" \
+                            "</exclusion>" \
+                            "<exclusion>" \
+                                "<groupId>org.apache.commons</groupId>" \
+                                "<artifactId>commons-lang3</artifactId>" \
+                            "</exclusion>" \
+                            "<exclusion>" \
+                                "<groupId>org.slf4j</groupId>" \
+                                "<artifactId>slf4j-api</artifactId>" \
+                            "</exclusion>" \
+                        "</exclusions>" \
+                    "</dependency>" \
+                    "<dependency>" \
+                        "<groupId>com.microsoft.azure</groupId>" \
+                        "<artifactId>azure-cosmosdb-spark_2.1.0_2.11</artifactId>" \
+                        "<version>1.1.1</version>" \
+                        "<exclusions>" \
+                            "<exclusion>" \
+                                "<groupId>org.apache.tinkerpop</groupId>" \
+                                "<artifactId>tinkergraph-gremlin</artifactId>" \
+                            "</exclusion>" \
+                            "<exclusion>" \
+                                "<groupId>org.apache.tinkerpop</groupId>" \
+                                "<artifactId>spark-gremlin</artifactId>" \
+                            "</exclusion>" \
+                            "<exclusion>" \
+                                "<groupId>io.netty</groupId>" \
+                                "<artifactId>*</artifactId>" \
+                            "</exclusion>" \
+                            "<exclusion>" \
+                                "<groupId>com.fasterxml.jackson.core</groupId>" \
+                                "<artifactId>jackson-annotations</artifactId>" \
+                            "</exclusion>" \
+                        "</exclusions> " \
+                    "</dependency>" \
+                "</dependencies>" \
+            "</project>" > /tmp/pom.xml \
+    && cd /tmp \
+    && mvn dependency:copy-dependencies -DoutputDirectory="${SPARK_HOME}/jars/" \
+    # cleanup
+    && apt-get --purge autoremove -y maven python3-pip \
+    && apt-get autoremove -y \
+    && apt-get autoclean -y \
+    && rm -rf /tmp/* \
+    && rm -rf /root/.cache \
+    && rm -rf /root/.m2 \
+    && rm -rf /var/lib/apt/lists/*
 
 LABEL com.nvidia.volumes.needed="nvidia_driver"
 

diff --git a/docker-image/gpu/spark2.1.0/Dockerfile b/docker-image/gpu/spark2.1.0/Dockerfile
@@ -1,4 +1,159 @@
-FROM  aztk/spark:v0.1.0-spark2.1.0-base
+# Ubuntu 16.04 (Xenial)
+FROM ubuntu:16.04
+
+# set AZTK version compatibility
+ENV AZTK_DOCKER_IMAGE_VERSION 0.1.0
+
+# set version of python required for aztk
+ENV AZTK_PYTHON_VERSION=3.5.2
+
+# modify these ARGs on build time to specify your desired versions of Spark/Hadoop
+ENV SPARK_VERSION_KEY 2.1.0
+ENV SPARK_FULL_VERSION spark-${SPARK_VERSION_KEY}-bin-without-hadoop
+ENV HADOOP_VERSION 2.8.3
+ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
+
+# set env vars
+ENV JAVA_HOME /usr/lib/jvm/java-1.8.0-openjdk-amd64
+ENV SPARK_HOME /home/spark-current
+ENV PATH $SPARK_HOME/bin:$PATH
+
+RUN apt-get clean \
+    && apt-get update -y \
+    # install dependency packages
+    && apt-get install -y --no-install-recommends \
+       make \
+       build-essential \
+       zlib1g-dev \
+       libssl-dev \
+       libbz2-dev \
+       libreadline-dev \
+       libsqlite3-dev \
+       maven \
+       wget \
+       curl \
+       llvm \
+       git \
+       libncurses5-dev \
+       libncursesw5-dev \
+       python3-pip \
+       python3-venv \
+       xz-utils \
+       tk-dev \
+    && apt-get update -y \
+    # install [software-properties-common]
+    # so we can use [apt-add-repository] to add the repository [ppa:webupd8team/java]
+    # from which we install Java8
+    && apt-get install -y --no-install-recommends software-properties-common \
+    && apt-add-repository ppa:webupd8team/java -y \
+    && apt-get update -y \
+    # install java
+    && apt-get install -y --no-install-recommends default-jdk \
+    # set up user python and aztk python
+    && ln -s /usr/bin/python3.5 /usr/bin/python \
+    && /usr/bin/python -m pip install --upgrade pip setuptools wheel \
+    && apt-get remove -y python3-pip \
+    # build and install spark
+    && git clone https://github.com/apache/spark.git \
+    && cd spark \
+    && git checkout tags/v${SPARK_VERSION_KEY} \
+    && export MAVEN_OPTS="-Xmx3g -XX:ReservedCodeCacheSize=1024m" \
+    && ./dev/make-distribution.sh --name custom-spark --pip --tgz -Pnetlib-lgpl -Phive -Phive-thriftserver -Dhadoop.version=${HADOOP_VERSION} -DskipTests \
+    && tar -xvzf /spark/spark-${SPARK_VERSION_KEY}-bin-custom-spark.tgz --directory=/home \
+    && ln -s "/home/spark-${SPARK_VERSION_KEY}-bin-custom-spark" /home/spark-current \
+    && rm -rf /spark \ 
+    # copy azure storage jars and dependencies to $SPARK_HOME/jars
+    && echo "<project>" \
+                    "<modelVersion>4.0.0</modelVersion>" \
+                    "<groupId>groupId</groupId>" \
+                    "<artifactId>artifactId</artifactId>" \
+                    "<version>1.0</version>" \
+                "<dependencies>" \
+                    "<dependency>" \
+                        "<groupId>org.apache.hadoop</groupId>" \
+                        "<artifactId>hadoop-azure-datalake</artifactId>" \
+                        "<version>${HADOOP_VERSION}</version>" \
+                    "<exclusions>" \
+                        "<exclusion>" \
+                            "<groupId>org.apache.hadoop</groupId>" \
+                            "<artifactId>hadoop-common</artifactId>" \
+                        "</exclusion>" \
+                    "</exclusions> " \
+                    "</dependency>" \
+                    "<dependency>" \
+                        "<groupId>org.apache.hadoop</groupId>" \
+                        "<artifactId>hadoop-azure</artifactId>" \
+                        "<version>${HADOOP_VERSION}</version>" \
+                    "<exclusions>" \
+                        "<exclusion>" \
+                            "<groupId>org.apache.hadoop</groupId>" \
+                            "<artifactId>hadoop-common</artifactId>" \
+                        "</exclusion>" \
+                        "<exclusion>" \
+                            "<groupId>com.fasterxml.jackson.core</groupId>" \
+                            "<artifactId>jackson-core</artifactId>" \
+                        "</exclusion>" \
+                    "</exclusions> " \
+                    "</dependency>" \
+                    "<dependency>" \
+                        "<groupId>com.microsoft.sqlserver</groupId>" \
+                        "<artifactId>mssql-jdbc</artifactId>" \
+                        "<version>6.4.0.jre8</version>" \
+                    "</dependency>" \
+                    "<dependency>" \
+                        "<groupId>com.microsoft.azure</groupId>" \
+                        "<artifactId>azure-storage</artifactId>" \
+                        "<version>2.2.0</version>" \
+                        "<exclusions>" \
+                            "<exclusion>" \
+                                "<groupId>com.fasterxml.jackson.core</groupId>" \
+                                "<artifactId>jackson-core</artifactId>" \
+                            "</exclusion>" \
+                            "<exclusion>" \
+                                "<groupId>org.apache.commons</groupId>" \
+                                "<artifactId>commons-lang3</artifactId>" \
+                            "</exclusion>" \
+                            "<exclusion>" \
+                                "<groupId>org.slf4j</groupId>" \
+                                "<artifactId>slf4j-api</artifactId>" \
+                            "</exclusion>" \
+                        "</exclusions>" \
+                    "</dependency>" \
+                    "<dependency>" \
+                        "<groupId>com.microsoft.azure</groupId>" \
+                        "<artifactId>azure-cosmosdb-spark_${SPARK_VERSION_KEY}_2.11</artifactId>" \
+                        "<version>1.1.1</version>" \
+                        "<exclusions>" \
+                            "<exclusion>" \
+                                "<groupId>org.apache.tinkerpop</groupId>" \
+                                "<artifactId>tinkergraph-gremlin</artifactId>" \
+                            "</exclusion>" \
+                            "<exclusion>" \
+                                "<groupId>org.apache.tinkerpop</groupId>" \
+                                "<artifactId>spark-gremlin</artifactId>" \
+                            "</exclusion>" \
+                            "<exclusion>" \
+                                "<groupId>io.netty</groupId>" \
+                                "<artifactId>*</artifactId>" \
+                            "</exclusion>" \
+                            "<exclusion>" \
+                                "<groupId>com.fasterxml.jackson.core</groupId>" \
+                                "<artifactId>jackson-annotations</artifactId>" \
+                            "</exclusion>" \
+                        "</exclusions> " \
+                    "</dependency>" \
+                "</dependencies>" \
+            "</project>" > /tmp/pom.xml \
+    && cd /tmp \
+    && mvn dependency:copy-dependencies -DoutputDirectory="${SPARK_HOME}/jars/" \
+    # cleanup
+    && apt-get --purge autoremove -y maven python3-pip \
+    && apt-get autoremove -y \
+    && apt-get autoclean -y \
+    && rm -rf /tmp/* \
+    && rm -rf /root/.cache \
+    && rm -rf /root/.m2 \
+    && rm -rf /var/lib/apt/lists/*
 
 LABEL com.nvidia.volumes.needed="nvidia_driver"