-
Notifications
You must be signed in to change notification settings - Fork 66
Feature: nvBLAS and OpenBLAS plugin #539
Changes from 1 commit
0a242ee
1990c55
40ce7ec
cf97a04
e94c125
8938190
85b2a69
1678c06
934e84e
e35e443
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from .configuration import * |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
import os | ||
from aztk.models.plugins.plugin_configuration import PluginConfiguration, PluginPort, PluginTargetRole | ||
from aztk.models.plugins.plugin_file import PluginFile | ||
from aztk.utils import constants | ||
|
||
dir_path = os.path.dirname(os.path.realpath(__file__)) | ||
|
||
class OpenBLASPlugin(PluginConfiguration): | ||
def __init__(self): | ||
super().__init__( | ||
name="openblas", | ||
ports=[], | ||
target_role=PluginTargetRole.All, | ||
execute="openblas.sh", | ||
files=[ | ||
PluginFile("openblas.sh", os.path.join(dir_path, "openblas.sh")), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it make more sense to have a generic apt-get install plugin? The way I did this for the conda package installer was to let users specify an array of packages as a parameter. The main issue in this case though is the update-alternatives command which is non-standard... not sure how to get around that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Update alternatives is borderline not necessary since we are running in a container with a known environment and I already know that there aren't any alternatives present. It's mostly there as a precaution. I think that a general |
||
], | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#!/bin/bash | ||
apt-get update | ||
apt-get install -y libopenblas-base | ||
update-alternatives --config libblas.so.3 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does it make sense to just bake this into the dockerfile? |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,159 @@ | ||
FROM aztk/spark:v0.1.0-spark1.6.3-base | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why not build on top of this base image any more? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is a difference between base and gpu images in the Spark building process now (the added -Pnetlib-lgpl). We could build all images with that profile -- that might be the best option here. |
||
# Ubuntu 16.04 (Xenial) | ||
FROM ubuntu:16.04 | ||
|
||
# set AZTK version compatibility | ||
ENV AZTK_DOCKER_IMAGE_VERSION 0.1.0 | ||
|
||
# set version of python required for aztk | ||
ENV AZTK_PYTHON_VERSION=3.5.2 | ||
|
||
# modify these ARGs on build time to specify your desired versions of Spark/Hadoop | ||
ENV SPARK_VERSION_KEY 1.6.3 | ||
ENV SPARK_FULL_VERSION spark-${SPARK_VERSION_KEY}-bin-without-hadoop | ||
ENV HADOOP_VERSION 2.8.3 | ||
ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 | ||
|
||
# set env vars | ||
ENV JAVA_HOME /usr/lib/jvm/java-1.8.0-openjdk-amd64 | ||
ENV SPARK_HOME /home/spark-current | ||
ENV PATH $SPARK_HOME/bin:$PATH | ||
|
||
RUN apt-get clean \ | ||
&& apt-get update -y \ | ||
# install dependency packages | ||
&& apt-get install -y --no-install-recommends \ | ||
make \ | ||
build-essential \ | ||
zlib1g-dev \ | ||
libssl-dev \ | ||
libbz2-dev \ | ||
libreadline-dev \ | ||
libsqlite3-dev \ | ||
maven \ | ||
wget \ | ||
curl \ | ||
llvm \ | ||
git \ | ||
libncurses5-dev \ | ||
libncursesw5-dev \ | ||
python3-pip \ | ||
python3-venv \ | ||
xz-utils \ | ||
tk-dev \ | ||
&& apt-get update -y \ | ||
# install [software-properties-common] | ||
# so we can use [apt-add-repository] to add the repository [ppa:webupd8team/java] | ||
# from which we install Java8 | ||
&& apt-get install -y --no-install-recommends software-properties-common \ | ||
&& apt-add-repository ppa:webupd8team/java -y \ | ||
&& apt-get update -y \ | ||
# install java | ||
&& apt-get install -y --no-install-recommends default-jdk \ | ||
# set up user python and aztk python | ||
&& ln -s /usr/bin/python3.5 /usr/bin/python \ | ||
&& /usr/bin/python -m pip install --upgrade pip setuptools wheel \ | ||
&& apt-get remove -y python3-pip \ | ||
# build and install spark | ||
&& git clone https://github.com/apache/spark.git \ | ||
&& cd spark \ | ||
&& git checkout tags/v${SPARK_VERSION_KEY} \ | ||
&& export MAVEN_OPTS="-Xmx3g -XX:ReservedCodeCacheSize=1024m" \ | ||
&& ./make-distribution.sh --name custom-spark --tgz -Pnetlib-lgpl -Phive -Phive-thriftserver -Dhadoop.version=${HADOOP_VERSION} -Phadoop-2.6 -DskipTests \ | ||
&& tar -xvzf /spark/spark-${SPARK_VERSION_KEY}-bin-custom-spark.tgz --directory=/home \ | ||
&& ln -s "/home/spark-${SPARK_VERSION_KEY}-bin-custom-spark" /home/spark-current \ | ||
&& rm -rf /spark \ | ||
# copy azure storage jars and dependencies to $SPARK_HOME/jars | ||
&& echo "<project>" \ | ||
"<modelVersion>4.0.0</modelVersion>" \ | ||
"<groupId>groupId</groupId>" \ | ||
"<artifactId>artifactId</artifactId>" \ | ||
"<version>1.0</version>" \ | ||
"<dependencies>" \ | ||
"<dependency>" \ | ||
"<groupId>org.apache.hadoop</groupId>" \ | ||
"<artifactId>hadoop-azure-datalake</artifactId>" \ | ||
"<version>${HADOOP_VERSION}</version>" \ | ||
"<exclusions>" \ | ||
"<exclusion>" \ | ||
"<groupId>org.apache.hadoop</groupId>" \ | ||
"<artifactId>hadoop-common</artifactId>" \ | ||
"</exclusion>" \ | ||
"</exclusions> " \ | ||
"</dependency>" \ | ||
"<dependency>" \ | ||
"<groupId>org.apache.hadoop</groupId>" \ | ||
"<artifactId>hadoop-azure</artifactId>" \ | ||
"<version>${HADOOP_VERSION}</version>" \ | ||
"<exclusions>" \ | ||
"<exclusion>" \ | ||
"<groupId>org.apache.hadoop</groupId>" \ | ||
"<artifactId>hadoop-common</artifactId>" \ | ||
"</exclusion>" \ | ||
"<exclusion>" \ | ||
"<groupId>com.fasterxml.jackson.core</groupId>" \ | ||
"<artifactId>jackson-core</artifactId>" \ | ||
"</exclusion>" \ | ||
"</exclusions> " \ | ||
"</dependency>" \ | ||
"<dependency>" \ | ||
"<groupId>com.microsoft.sqlserver</groupId>" \ | ||
"<artifactId>mssql-jdbc</artifactId>" \ | ||
"<version>6.4.0.jre8</version>" \ | ||
"</dependency>" \ | ||
"<dependency>" \ | ||
"<groupId>com.microsoft.azure</groupId>" \ | ||
"<artifactId>azure-storage</artifactId>" \ | ||
"<version>2.2.0</version>" \ | ||
"<exclusions>" \ | ||
"<exclusion>" \ | ||
"<groupId>com.fasterxml.jackson.core</groupId>" \ | ||
"<artifactId>jackson-core</artifactId>" \ | ||
"</exclusion>" \ | ||
"<exclusion>" \ | ||
"<groupId>org.apache.commons</groupId>" \ | ||
"<artifactId>commons-lang3</artifactId>" \ | ||
"</exclusion>" \ | ||
"<exclusion>" \ | ||
"<groupId>org.slf4j</groupId>" \ | ||
"<artifactId>slf4j-api</artifactId>" \ | ||
"</exclusion>" \ | ||
"</exclusions>" \ | ||
"</dependency>" \ | ||
"<dependency>" \ | ||
"<groupId>com.microsoft.azure</groupId>" \ | ||
"<artifactId>azure-cosmosdb-spark_2.1.0_2.11</artifactId>" \ | ||
"<version>1.1.1</version>" \ | ||
"<exclusions>" \ | ||
"<exclusion>" \ | ||
"<groupId>org.apache.tinkerpop</groupId>" \ | ||
"<artifactId>tinkergraph-gremlin</artifactId>" \ | ||
"</exclusion>" \ | ||
"<exclusion>" \ | ||
"<groupId>org.apache.tinkerpop</groupId>" \ | ||
"<artifactId>spark-gremlin</artifactId>" \ | ||
"</exclusion>" \ | ||
"<exclusion>" \ | ||
"<groupId>io.netty</groupId>" \ | ||
"<artifactId>*</artifactId>" \ | ||
"</exclusion>" \ | ||
"<exclusion>" \ | ||
"<groupId>com.fasterxml.jackson.core</groupId>" \ | ||
"<artifactId>jackson-annotations</artifactId>" \ | ||
"</exclusion>" \ | ||
"</exclusions> " \ | ||
"</dependency>" \ | ||
"</dependencies>" \ | ||
"</project>" > /tmp/pom.xml \ | ||
&& cd /tmp \ | ||
&& mvn dependency:copy-dependencies -DoutputDirectory="${SPARK_HOME}/jars/" \ | ||
# cleanup | ||
&& apt-get --purge autoremove -y maven python3-pip \ | ||
&& apt-get autoremove -y \ | ||
&& apt-get autoclean -y \ | ||
&& rm -rf /tmp/* \ | ||
&& rm -rf /root/.cache \ | ||
&& rm -rf /root/.m2 \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
LABEL com.nvidia.volumes.needed="nvidia_driver" | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same