Skip to content

Commit

Permalink
Support RayOnSpark for k8s and add docs (intel-analytics#2836)
Browse files Browse the repository at this point in the history
* support ray on k8s

* add to init orca context

* style

* minor

* minor

* ut
  • Loading branch information
hkvision committed Sep 11, 2020
1 parent b8d51ea commit 9639a1c
Showing 1 changed file with 45 additions and 9 deletions.
54 changes: 45 additions & 9 deletions pyspark/bigdl/common/zooUtils/nncontext.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ def init_spark_on_local(cores=2, conf=None, python_location=None, spark_log_leve
:param conf: You can append extra conf for Spark in key-value format.
i.e conf={"spark.executor.extraJavaOptions": "-XX:+PrintGCDetails"}.
Default to be None.
:param python_location: The path to your running python executable.
:param python_location: The path to your running Python executable. If not specified, the
default Python interpreter in effect would be used.
:param spark_log_level: The log level for Spark. Default to be 'WARN'.
:param redirect_spark_log: Whether to redirect the Spark log to local file. Default to be True.
:return: An instance of SparkContext.
Expand Down Expand Up @@ -132,6 +133,7 @@ def init_spark_standalone(num_executors,
redirect_spark_log=True,
conf=None,
jars=None,
python_location=None,
enable_numa_binding=False):
"""
Create a SparkContext with Analytics Zoo configurations on Spark standalone cluster of
Expand Down Expand Up @@ -159,6 +161,8 @@ def init_spark_standalone(num_executors,
:param conf: You can append extra conf for Spark in key-value format.
i.e conf={"spark.executor.extraJavaOptions": "-XX:+PrintGCDetails"}.
Default to be None.
:param python_location: The path to your running Python executable. If not specified, the
default Python interpreter in effect would be used.
:param enable_numa_binding: Whether to use numactl to start spark worker in order to bind
different worker processes to different cpus and memory areas. This is may lead to
better performance on a multi-sockets machine. Defaults to False.
Expand All @@ -179,6 +183,7 @@ def init_spark_standalone(num_executors,
extra_python_lib=extra_python_lib,
conf=conf,
jars=jars,
python_location=python_location,
enable_numa_binding=enable_numa_binding)
return sc

Expand All @@ -197,7 +202,35 @@ def init_spark_on_k8s(master,
jars=None,
conf=None,
python_location=None):
"""
Create a SparkContext with Analytics Zoo configurations on Kubernetes cluster for k8s client
mode. You are recommended to use the Docker image intelanalytics/hyperzoo:latest.
You can refer to https://github.com/intel-analytics/analytics-zoo/tree/master/docker/hyperzoo
to build your own Docker image.
:param master: The master address of your k8s cluster.
:param container_image: The name of the docker container image for Spark executors.
For example, intelanalytics/hyperzoo:latest
:param num_executors: The number of Spark executors.
:param executor_cores: The number of cores for each executor.
:param executor_memory: The memory for each executor. Default to be '2g'.
:param driver_cores: The number of cores for the Spark driver. Default to be 4.
:param driver_memory: The memory for the Spark driver. Default to be '1g'.
:param extra_executor_memory_for_ray: The extra memory for Ray services. Default to be None.
:param extra_python_lib: Extra python files or packages needed for distribution.
Default to be None.
:param spark_log_level: The log level for Spark. Default to be 'WARN'.
:param redirect_spark_log: Whether to redirect the Spark log to local file. Default to be True.
:param jars: Comma-separated list of jars to be included on driver and executor's classpath.
Default to be None.
:param conf: You can append extra conf for Spark in key-value format.
i.e conf={"spark.executor.extraJavaOptions": "-XX:+PrintGCDetails"}.
Default to be None.
:param python_location: The path to your running Python executable. If not specified, the
default Python interpreter in effect would be used.
:return: An instance of SparkContext.
"""
from zoo.util.spark import SparkRunner
runner = SparkRunner(spark_log_level=spark_log_level,
redirect_spark_log=redirect_spark_log)
Expand Down Expand Up @@ -285,13 +318,14 @@ def init_nncontext(conf=None, spark_log_level="WARN", redirect_spark_log=True):
or the properties file before calling this method. In this case, you are recommended
to use the launching scripts under `analytics-zoo/scripts`.
:param conf: You can append extra conf for Spark in key-value format.
i.e conf={"spark.executor.extraJavaOptions": "-XX:+PrintGCDetails"}.
Default to be None.
:param conf: An instance of SparkConf. If not specified, a new SparkConf with
Analytics Zoo and BigDL configurations would be created and used.
You can also input a string here to indicate the name of the application.
:param spark_log_level: The log level for Spark. Default to be 'WARN'.
:param redirect_spark_log: Whether to redirect the Spark log to local file. Default to be True.
"""
:return: An instance of SparkContext.
"""
# The following code copied and modified from
# https://github.com/Valassis-Digital-Media/spylon-kernel/blob/master/
# spylon_kernel/scala_interpreter.py
Expand Down Expand Up @@ -349,11 +383,13 @@ def Popen(*args, **kwargs):

def getOrCreateSparkContext(conf=None, appName=None):
"""
Get the current active spark context and create one if no active instance
:param conf: combining bigdl configs into spark conf
:return: SparkContext
"""
Get the current active SparkContext or create a new SparkContext.
:param conf: An instance of SparkConf. If not specified, a new SparkConf with
Analytics Zoo and BigDL configurations would be created and used.
:param appName: The name of the application if any.
:return: An instance of SparkContext.
"""
with SparkContext._lock:
if SparkContext._active_spark_context is None:
spark_conf = init_spark_conf() if conf is None else conf
Expand Down

0 comments on commit 9639a1c

Please sign in to comment.