Skip to content

Commit

Permalink
Add dynamic Spark configuration for Databricks (NVIDIA#2116)
Browse files Browse the repository at this point in the history
* Add daynamic spark confs for the Databricks

We need a way to set spark confs dynamically for the Databricks,
e.g., when we test cuDF sonatype release jars, we need to disable cudf-rapids version match by adding
"--conf spark.rapids.cudfVersionOverride=true", or enable/disable AQE, or anything else.

By adding the parameter spark_conf="--conf spark.xxx.xxx=xxx --conf ......" for the script 'run-tests.py',
we can dynamically add whatever confs for the Databricks cluster.

Signed-off-by: Tim Liu <timl@nvidia.com>

* Comma separated list of spark configurations

Signed-off-by: Tim Liu <timl@nvidia.com>

* Add a comment to make the '-f' format clear

* Add a comment to make the '-f' format clear

* Fix typo

* Add '--conf' if the SPARK_CONF is not empty
  • Loading branch information
NvTimLiu authored Apr 15, 2021
1 parent 306cb7c commit 27d7af5
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 8 deletions.
13 changes: 9 additions & 4 deletions jenkins/databricks/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,21 @@
clusterid = ''
build_profiles = 'databricks,!snapshot-shims'
jar_path = ''
# `spark_conf` can take comma seperated mutiple spark configurations, e.g., spark.foo=1,spark.bar=2,...'
spark_conf = ''

try:
opts, args = getopt.getopt(sys.argv[1:], 'hw:t:c:p:l:d:z:m:v:b:j:',
['workspace=', 'token=', 'clusterid=', 'private=', 'localscript=', 'dest=', 'sparktgz=', 'basesparkpomversion=', 'buildprofiles=', 'jarpath'])
opts, args = getopt.getopt(sys.argv[1:], 'hw:t:c:p:l:d:z:m:v:b:j:f:',
['workspace=', 'token=', 'clusterid=', 'private=', 'localscript=', 'dest=', 'sparktgz=', 'basesparkpomversion=', 'buildprofiles=', 'jarpath', 'sparkconf'])
except getopt.GetoptError:
print(
'run-tests.py -s <workspace> -t <token> -c <clusterid> -p <privatekeyfile> -l <localscript> -d <scriptdestinatino> -z <sparktgz> -v <basesparkpomversion> -b <buildprofiles> -j <jarpath>')
'run-tests.py -s <workspace> -t <token> -c <clusterid> -p <privatekeyfile> -l <localscript> -d <scriptdestinatino> -z <sparktgz> -v <basesparkpomversion> -b <buildprofiles> -j <jarpath> -f <sparkconf>')
sys.exit(2)

for opt, arg in opts:
if opt == '-h':
print(
'run-tests.py -s <workspace> -t <token> -c <clusterid> -p <privatekeyfile> -n <skipstartingcluster> -l <localscript> -d <scriptdestinatino>, -z <sparktgz> -v <basesparkpomversion> -b <buildprofiles>')
'run-tests.py -s <workspace> -t <token> -c <clusterid> -p <privatekeyfile> -n <skipstartingcluster> -l <localscript> -d <scriptdestinatino>, -z <sparktgz> -v <basesparkpomversion> -b <buildprofiles> -f <sparkconf>')
sys.exit()
elif opt in ('-w', '--workspace'):
workspace = arg
Expand All @@ -60,6 +62,8 @@
build_profiles = arg
elif opt in ('-j', '--jarpath'):
jar_path = arg
elif opt in ('-f', '--sparkconf'):
spark_conf = arg

print('-w is ' + workspace)
print('-c is ' + clusterid)
Expand All @@ -69,3 +73,4 @@
print('-z is ' + source_tgz)
print('-v is ' + base_spark_pom_version)
print('-j is ' + jar_path)
print('-f is ' + spark_conf)
2 changes: 1 addition & 1 deletion jenkins/databricks/run-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def main():
print("rsync command: %s" % rsync_command)
subprocess.check_call(rsync_command, shell = True)

ssh_command = "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ubuntu@%s -p 2200 -i %s %s %s 2>&1 | tee testout; if [ `echo ${PIPESTATUS[0]}` -ne 0 ]; then false; else true; fi" % (master_addr, params.private_key_file, params.script_dest, params.jar_path)
ssh_command = "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ubuntu@%s -p 2200 -i %s %s %s %s 2>&1 | tee testout; if [ `echo ${PIPESTATUS[0]}` -ne 0 ]; then false; else true; fi" % (master_addr, params.private_key_file, params.script_dest, params.jar_path, params.spark_conf)
print("ssh command: %s" % ssh_command)
subprocess.check_call(ssh_command, shell = True)

Expand Down
21 changes: 18 additions & 3 deletions jenkins/databricks/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@
# limitations under the License.
#

set -e
set -ex

LOCAL_JAR_PATH=$1
SPARK_CONF=$2

# tests
export PATH=/databricks/conda/envs/databricks-ml-gpu/bin:/databricks/conda/condabin:$PATH
Expand All @@ -38,21 +39,35 @@ CUDF_UDF_TEST_ARGS="--conf spark.python.daemon.module=rapids.daemon_databricks \
--conf spark.rapids.python.memory.gpu.allocFraction=0.1 \
--conf spark.rapids.python.concurrentPythonWorkers=2"

## 'spark.foo=1,spark.bar=2,...' to 'export PYSP_TEST_spark_foo=1 export PYSP_TEST_spark_bar=2'
if [ -n "$SPARK_CONF" ]; then
CONF_LIST=${SPARK_CONF//','/' '}
for CONF in ${CONF_LIST}; do
KEY=${CONF%%=*}
VALUE=${CONF#*=}
## run_pyspark_from_build.sh requires 'export PYSP_TEST_spark_foo=1' as the spark configs
export PYSP_TEST_${KEY//'.'/'_'}=$VALUE
done

## 'spark.foo=1,spark.bar=2,...' to '--conf spark.foo=1 --conf spark.bar=2 --conf ...'
SPARK_CONF="--conf ${SPARK_CONF/','/' --conf '}"
fi

TEST_TYPE="nightly"
if [ -d "$LOCAL_JAR_PATH" ]; then
## Run tests with jars in the LOCAL_JAR_PATH dir downloading from the denpedency repo
LOCAL_JAR_PATH=$LOCAL_JAR_PATH bash $LOCAL_JAR_PATH/integration_tests/run_pyspark_from_build.sh --runtime_env="databricks" --test_type=$TEST_TYPE

## Run cudf-udf tests
CUDF_UDF_TEST_ARGS="$CUDF_UDF_TEST_ARGS --conf spark.executorEnv.PYTHONPATH=`ls $LOCAL_JAR_PATH/rapids-4-spark_*.jar | grep -v 'tests.jar'`"
LOCAL_JAR_PATH=$LOCAL_JAR_PATH SPARK_SUBMIT_FLAGS=$CUDF_UDF_TEST_ARGS TEST_PARALLEL=1 \
LOCAL_JAR_PATH=$LOCAL_JAR_PATH SPARK_SUBMIT_FLAGS="$SPARK_CONF $CUDF_UDF_TEST_ARGS" TEST_PARALLEL=1 \
bash $LOCAL_JAR_PATH/integration_tests/run_pyspark_from_build.sh --runtime_env="databricks" -m "cudf_udf" --cudf_udf --test_type=$TEST_TYPE
else
## Run tests with jars building from the spark-rapids source code
bash /home/ubuntu/spark-rapids/integration_tests/run_pyspark_from_build.sh --runtime_env="databricks" --test_type=$TEST_TYPE

## Run cudf-udf tests
CUDF_UDF_TEST_ARGS="$CUDF_UDF_TEST_ARGS --conf spark.executorEnv.PYTHONPATH=`ls /home/ubuntu/spark-rapids/dist/target/rapids-4-spark_*.jar | grep -v 'tests.jar'`"
SPARK_SUBMIT_FLAGS=$CUDF_UDF_TEST_ARGS TEST_PARALLEL=1 \
SPARK_SUBMIT_FLAGS="$SPARK_CONF $CUDF_UDF_TEST_ARGS" TEST_PARALLEL=1 \
bash /home/ubuntu/spark-rapids/integration_tests/run_pyspark_from_build.sh --runtime_env="databricks" -m "cudf_udf" --cudf_udf --test_type=$TEST_TYPE
fi

0 comments on commit 27d7af5

Please sign in to comment.