From 449948769479a558d5ab05a6b2a11de830815005 Mon Sep 17 00:00:00 2001 From: Allen Xu Date: Wed, 5 Aug 2020 18:05:26 +0800 Subject: [PATCH] Move codes from spark to rapids repo --- ...d-with-rapids-accelerator-on-databricks.md | 176 +- .../apache/spark/examples/ShortCircuit.scala | 47 - .../org/apache/spark/examples/UDFToExpr.scala | 40 - .../spark/examples/sql/UDFMathToExpr.scala | 40 - .../launcher/AbstractCommandBuilder.java | 318 --- scripts/parseTestOutput.py | 116 -- sql/catalyst/pom.xml | 185 -- .../sql/catalyst/expressions/ScalaUDF.scala | 1764 ----------------- sql/core/pom.xml | 230 --- .../sql/expressions/UserDefinedFunction.scala | 142 -- .../org/apache/spark/sql/OpcodeSuite.scala | 1073 ---------- .../scala/org/apache/spark/sql/UDFSuite.scala | 584 ------ sql/hive/pom.xml | 273 --- 13 files changed, 88 insertions(+), 4900 deletions(-) delete mode 100644 examples/src/main/scala/org/apache/spark/examples/ShortCircuit.scala delete mode 100644 examples/src/main/scala/org/apache/spark/examples/UDFToExpr.scala delete mode 100644 examples/src/main/scala/org/apache/spark/examples/sql/UDFMathToExpr.scala delete mode 100644 launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java delete mode 100644 scripts/parseTestOutput.py delete mode 100644 sql/catalyst/pom.xml delete mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala delete mode 100644 sql/core/pom.xml delete mode 100644 sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/OpcodeSuite.scala delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala delete mode 100644 sql/hive/pom.xml diff --git a/docs/get-started/getting-started-with-rapids-accelerator-on-databricks.md b/docs/get-started/getting-started-with-rapids-accelerator-on-databricks.md index b3b17477f9db..be895e6881d1 100644 --- a/docs/get-started/getting-started-with-rapids-accelerator-on-databricks.md +++ b/docs/get-started/getting-started-with-rapids-accelerator-on-databricks.md @@ -1,88 +1,88 @@ ---- -layout: page -title: Databricks -nav_order: 3 -parent: Getting-Started ---- - -# Getting started with RAPIDS Accelerator on Databricks -This guide will run through how to set up the RAPIDS Accelerator for Apache Spark 3.0 on Databricks. At the end of this guide, the reader will be able to run a sample Apache Spark application that runs on NVIDIA GPUs on Databricks. - -## Prerequisites - * Apache Spark 3.0 running in DataBricks Runtime 7.0 ML with GPU - * AWS: 7.0 ML (includes Apache Spark 3.0.0, GPU, Scala 2.12) - * Azure: 7.0 ML (GPU, Scala 2.12, Spark 3.0.0) - -The number of GPUs per node dictates the number of Spark executors that can run in that node. - -## Start a Databricks Cluster -Create a Databricks cluster by going to Clusters, then clicking “+ Create Cluster”. Ensure the cluster meets the prerequisites above by configuring it as follows: -1. On AWS, make sure to use 7.0 ML (GPU, Scala 2.12, Spark 3.0.0), or for Azure, choose 7.0 ML (GPU, Scala 2.12, Spark 3.0.0). -2. Under Autopilot Options, disable auto scaling. -3. Choose the number of workers that matches the number of GPUs you want to use. -4. Select a worker type. On AWS, use nodes with 1 GPU each such as `p3.xlarge` or `g4dn.xlarge`. p2 nodes do not meet the architecture requirements for the Spark worker (although they can be used for the driver node). For Azure, choose GPU nodes such as Standard_NC6s_v3. -5. Select the driver type. Generally this can be set to be the same as the worker. -6. Start the cluster - -## Advanced Cluster Configuration - -We will need to create an initialization script for the cluster that installs the RAPIDS jars to the cluster. - -1. To create the initialization script, import the initialization script notebook from the repo [generate-init-script.ipynb](../demo/Databricks/generate-init-script.ipynb) to your workspace. See [Managing Notebooks](https://docs.databricks.com/notebooks/notebooks-manage.html#id2) on how to import a notebook, then open the notebook. -2. Once you are in the notebook, click the “Run All” button. -3. Ensure that the newly created init.sh script is present in the output from cell 2 and that the contents of the script are correct. -4. Go back and edit your cluster to configure it to use the init script. To do this, click the “Clusters” button on the left panel, then select your cluster. -5. Click the “Edit” button, then navigate down to the “Advanced Options” section. Select the “Init Scripts” tab in the advanced options section, and paste the initialization script: `dbfs:/databricks/init_scripts/init.sh`, then click “Add”. - - ![Init Script](../img/initscript.png) - -6. Now select the “Spark” tab, and paste the following config options into the Spark Config section. Change the config values based on the workers you choose. See Apache Spark [configuration](https://spark.apache.org/docs/latest/configuration.html) and RAPIDS Accelerator for Apache Spark [descriptions](../configs) for each config. - - The [`spark.task.resource.gpu.amount`](https://spark.apache.org/docs/latest/configuration.html#scheduling) configuration is defaulted to 1 by Databricks. That means that only 1 task can run on an executor with 1 GPU, which is limiting, especially on the reads and writes from Parquet. Set this to 1/(number of cores per executor) which will allow multiple tasks to run in parallel just like the CPU side. Having the value smaller is fine as well. - - ```bash - spark.plugins com.nvidia.spark.SQLPlugin - spark.sql.parquet.filterPushdown false - spark.rapids.sql.incompatibleOps.enabled true - spark.rapids.memory.pinnedPool.size 2G - spark.task.resource.gpu.amount 0.1 - spark.rapids.sql.concurrentGpuTasks 2 - spark.locality.wait 0s - spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version 2 - spark.executor.extraJavaOptions "-Dai.rapids.cudf.prefer-pinned=true" - ``` - - ![Spark Config](../img/sparkconfig.png) - -7. Once you’ve added the Spark config, click “Confirm and Restart”. -8. Once the cluster comes back up, it is now enabled for GPU-accelerated Spark with RAPIDS and cuDF. - -## Import the GPU Mortgage Example Notebook -Import the example [notebook](../demo/gpu-mortgage_accelerated.ipynb) from the repo into your workspace, then open the notebook. -Modify the first cell to point to your workspace, and download a larger dataset if needed. You can find the links to the datasets at [docs.rapids.ai](https://docs.rapids.ai/datasets/mortgage-data). - -```bash -%sh - -wget http://rapidsai-data.s3-website.us-east-2.amazonaws.com/notebook-mortgage-data/mortgage_2000.tgz -P /Users// - -mkdir -p /dbfs/FileStore/tables/mortgage -mkdir -p /dbfs/FileStore/tables/mortgage_parquet_gpu/perf -mkdir /dbfs/FileStore/tables/mortgage_parquet_gpu/acq -mkdir /dbfs/FileStore/tables/mortgage_parquet_gpu/output - -tar xfvz /Users//mortgage_2000.tgz --directory /dbfs/FileStore/tables/mortgage -``` - -In Cell 3, update the data paths if necessary. The example notebook merges the columns and prepares the data for XGoost training. The temp and final output results are written back to the dbfs. -```bash -orig_perf_path='dbfs:///FileStore/tables/mortgage/perf/*' -orig_acq_path='dbfs:///FileStore/tables/mortgage/acq/*' -tmp_perf_path='dbfs:///FileStore/tables/mortgage_parquet_gpu/perf/' -tmp_acq_path='dbfs:///FileStore/tables/mortgage_parquet_gpu/acq/' -output_path='dbfs:///FileStore/tables/mortgage_parquet_gpu/output/' -``` -Run the notebook by clicking “Run All”. - -## Hints -Spark logs in Databricks are removed upon cluster shutdown. It is possible to save logs in a cloud storage location using Databricks [cluster log delivery](https://docs.databricks.com/clusters/configure.html#cluster-log-delivery-1). Enable this option before starting the cluster to capture the logs. +--- +layout: page +title: Databricks +nav_order: 3 +parent: Getting-Started +--- + +# Getting started with RAPIDS Accelerator on Databricks +This guide will run through how to set up the RAPIDS Accelerator for Apache Spark 3.0 on Databricks. At the end of this guide, the reader will be able to run a sample Apache Spark application that runs on NVIDIA GPUs on Databricks. + +## Prerequisites + * Apache Spark 3.0 running in DataBricks Runtime 7.0 ML with GPU + * AWS: 7.0 ML (includes Apache Spark 3.0.0, GPU, Scala 2.12) + * Azure: 7.0 ML (GPU, Scala 2.12, Spark 3.0.0) + +The number of GPUs per node dictates the number of Spark executors that can run in that node. + +## Start a Databricks Cluster +Create a Databricks cluster by going to Clusters, then clicking “+ Create Cluster”. Ensure the cluster meets the prerequisites above by configuring it as follows: +1. On AWS, make sure to use 7.0 ML (GPU, Scala 2.12, Spark 3.0.0), or for Azure, choose 7.0 ML (GPU, Scala 2.12, Spark 3.0.0). +2. Under Autopilot Options, disable auto scaling. +3. Choose the number of workers that matches the number of GPUs you want to use. +4. Select a worker type. On AWS, use nodes with 1 GPU each such as `p3.xlarge` or `g4dn.xlarge`. p2 nodes do not meet the architecture requirements for the Spark worker (although they can be used for the driver node). For Azure, choose GPU nodes such as Standard_NC6s_v3. +5. Select the driver type. Generally this can be set to be the same as the worker. +6. Start the cluster + +## Advanced Cluster Configuration + +We will need to create an initialization script for the cluster that installs the RAPIDS jars to the cluster. + +1. To create the initialization script, import the initialization script notebook from the repo [generate-init-script.ipynb](../demo/Databricks/generate-init-script.ipynb) to your workspace. See [Managing Notebooks](https://docs.databricks.com/notebooks/notebooks-manage.html#id2) on how to import a notebook, then open the notebook. +2. Once you are in the notebook, click the “Run All” button. +3. Ensure that the newly created init.sh script is present in the output from cell 2 and that the contents of the script are correct. +4. Go back and edit your cluster to configure it to use the init script. To do this, click the “Clusters” button on the left panel, then select your cluster. +5. Click the “Edit” button, then navigate down to the “Advanced Options” section. Select the “Init Scripts” tab in the advanced options section, and paste the initialization script: `dbfs:/databricks/init_scripts/init.sh`, then click “Add”. + + ![Init Script](../img/initscript.png) + +6. Now select the “Spark” tab, and paste the following config options into the Spark Config section. Change the config values based on the workers you choose. See Apache Spark [configuration](https://spark.apache.org/docs/latest/configuration.html) and RAPIDS Accelerator for Apache Spark [descriptions](../configs) for each config. + + The [`spark.task.resource.gpu.amount`](https://spark.apache.org/docs/latest/configuration.html#scheduling) configuration is defaulted to 1 by Databricks. That means that only 1 task can run on an executor with 1 GPU, which is limiting, especially on the reads and writes from Parquet. Set this to 1/(number of cores per executor) which will allow multiple tasks to run in parallel just like the CPU side. Having the value smaller is fine as well. + + ```bash + spark.plugins com.nvidia.spark.SQLPlugin + spark.sql.parquet.filterPushdown false + spark.rapids.sql.incompatibleOps.enabled true + spark.rapids.memory.pinnedPool.size 2G + spark.task.resource.gpu.amount 0.1 + spark.rapids.sql.concurrentGpuTasks 2 + spark.locality.wait 0s + spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version 2 + spark.executor.extraJavaOptions "-Dai.rapids.cudf.prefer-pinned=true" + ``` + + ![Spark Config](../img/sparkconfig.png) + +7. Once you’ve added the Spark config, click “Confirm and Restart”. +8. Once the cluster comes back up, it is now enabled for GPU-accelerated Spark with RAPIDS and cuDF. + +## Import the GPU Mortgage Example Notebook +Import the example [notebook](../demo/gpu-mortgage_accelerated.ipynb) from the repo into your workspace, then open the notebook. +Modify the first cell to point to your workspace, and download a larger dataset if needed. You can find the links to the datasets at [docs.rapids.ai](https://docs.rapids.ai/datasets/mortgage-data). + +```bash +%sh + +wget http://rapidsai-data.s3-website.us-east-2.amazonaws.com/notebook-mortgage-data/mortgage_2000.tgz -P /Users// + +mkdir -p /dbfs/FileStore/tables/mortgage +mkdir -p /dbfs/FileStore/tables/mortgage_parquet_gpu/perf +mkdir /dbfs/FileStore/tables/mortgage_parquet_gpu/acq +mkdir /dbfs/FileStore/tables/mortgage_parquet_gpu/output + +tar xfvz /Users//mortgage_2000.tgz --directory /dbfs/FileStore/tables/mortgage +``` + +In Cell 3, update the data paths if necessary. The example notebook merges the columns and prepares the data for XGoost training. The temp and final output results are written back to the dbfs. +```bash +orig_perf_path='dbfs:///FileStore/tables/mortgage/perf/*' +orig_acq_path='dbfs:///FileStore/tables/mortgage/acq/*' +tmp_perf_path='dbfs:///FileStore/tables/mortgage_parquet_gpu/perf/' +tmp_acq_path='dbfs:///FileStore/tables/mortgage_parquet_gpu/acq/' +output_path='dbfs:///FileStore/tables/mortgage_parquet_gpu/output/' +``` +Run the notebook by clicking “Run All”. + +## Hints +Spark logs in Databricks are removed upon cluster shutdown. It is possible to save logs in a cloud storage location using Databricks [cluster log delivery](https://docs.databricks.com/clusters/configure.html#cluster-log-delivery-1). Enable this option before starting the cluster to capture the logs. diff --git a/examples/src/main/scala/org/apache/spark/examples/ShortCircuit.scala b/examples/src/main/scala/org/apache/spark/examples/ShortCircuit.scala deleted file mode 100644 index eadc41d8e96d..000000000000 --- a/examples/src/main/scala/org/apache/spark/examples/ShortCircuit.scala +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.examples - -import org.apache.spark._ -import org.apache.spark.sql._ -import org.apache.spark.sql.functions._; - -// scalastyle:off println - -object ShortCircuit { - val spark: SparkSession = SparkSession.builder.master("local").getOrCreate() - import spark.implicits._ - def main(args: Array[String]) : Unit = { - val f: Double => Double = { x => - val t = - if (x > 1.0 && x < 3.7) { - (if (x > 1.1 && x < 2.0) 1.0 else 1.1) + 24.0 - } else { - if (x < 0.1) 2.3 else 4.1 - } - - t + 2.2 - } - val u = udf(f) - val dataset = List(1.0, 2, 3, 4).toDS() - val result = dataset.withColumn("new", u('value)) - result.show; println(result.queryExecution) - } -} - -// scalastyle:on println diff --git a/examples/src/main/scala/org/apache/spark/examples/UDFToExpr.scala b/examples/src/main/scala/org/apache/spark/examples/UDFToExpr.scala deleted file mode 100644 index 186d9c77ff73..000000000000 --- a/examples/src/main/scala/org/apache/spark/examples/UDFToExpr.scala +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.examples - -import org.apache.spark._ -import org.apache.spark.sql._ -import org.apache.spark.sql.functions._; - -// scalastyle:off println - -object UDFToExpr { - val spark: SparkSession = SparkSession.builder.master("local").getOrCreate() - import spark.implicits._ - def main(args: Array[String]) : Unit = { - val inc = udf{x : Int => x + 1} - val dec = udf{x : Int => x - 1} - val dataset = List(1, 2, 3).toDS() - val result = dataset.withColumn("new", inc('value) + dec('value)) - result.show; println(result.queryExecution.analyzed) - val ref = dataset.withColumn("new", (col("value") + 1) + (col("value") - 1)) - ref.show; println(ref.queryExecution.analyzed) - } -} - -// scalastyle:on println diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/UDFMathToExpr.scala b/examples/src/main/scala/org/apache/spark/examples/sql/UDFMathToExpr.scala deleted file mode 100644 index 749513285a8f..000000000000 --- a/examples/src/main/scala/org/apache/spark/examples/sql/UDFMathToExpr.scala +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.examples.sql - -import org.apache.spark._ -import org.apache.spark.sql._ -import org.apache.spark.sql.functions._; - -// scalastyle:off println - -object UDFMathToExpr { - val spark: SparkSession = SparkSession.builder.master("local").getOrCreate() - import spark.implicits._ - def main(args: Array[String]) : Unit = { - val acosUDF = udf{x : Double => math.acos(x)} - val asinUDF = udf{x : Double => math.asin(x)} - val dataset = List(1.0, 2, 3).toDS() - val result = dataset.withColumn("new", acosUDF('value) + asinUDF('value)) - result.show; println(result.queryExecution.analyzed) - val ref = dataset.withColumn("new", acos(col("value")) + (asin(col("value")))) - ref.show; println(ref.queryExecution.analyzed) - } -} - -// scalastyle:on println diff --git a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java deleted file mode 100644 index 3ae4633c79b0..000000000000 --- a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java +++ /dev/null @@ -1,318 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.launcher; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.InputStreamReader; -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.Set; -import java.util.regex.Pattern; - -import static org.apache.spark.launcher.CommandBuilderUtils.*; - -/** - * Abstract Spark command builder that defines common functionality. - */ -abstract class AbstractCommandBuilder { - - boolean verbose; - String appName; - String appResource; - String deployMode; - String javaHome; - String mainClass; - String master; - protected String propertiesFile; - final List appArgs; - final List jars; - final List files; - final List pyFiles; - final Map childEnv; - final Map conf; - - // The merged configuration for the application. Cached to avoid having to read / parse - // properties files multiple times. - private Map effectiveConfig; - - AbstractCommandBuilder() { - this.appArgs = new ArrayList<>(); - this.childEnv = new HashMap<>(); - this.conf = new HashMap<>(); - this.files = new ArrayList<>(); - this.jars = new ArrayList<>(); - this.pyFiles = new ArrayList<>(); - } - - /** - * Builds the command to execute. - * - * @param env A map containing environment variables for the child process. It may already contain - * entries defined by the user (such as SPARK_HOME, or those defined by the - * SparkLauncher constructor that takes an environment), and may be modified to - * include other variables needed by the process to be executed. - */ - abstract List buildCommand(Map env) - throws IOException, IllegalArgumentException; - - /** - * Builds a list of arguments to run java. - * - * This method finds the java executable to use and appends JVM-specific options for running a - * class with Spark in the classpath. It also loads options from the "java-opts" file in the - * configuration directory being used. - * - * Callers should still add at least the class to run, as well as any arguments to pass to the - * class. - */ - List buildJavaCommand(String extraClassPath) throws IOException { - List cmd = new ArrayList<>(); - - String[] candidateJavaHomes = new String[] { - javaHome, - childEnv.get("JAVA_HOME"), - System.getenv("JAVA_HOME"), - System.getProperty("java.home") - }; - for (String javaHome : candidateJavaHomes) { - if (javaHome != null) { - cmd.add(join(File.separator, javaHome, "bin", "java")); - break; - } - } - - // Load extra JAVA_OPTS from conf/java-opts, if it exists. - File javaOpts = new File(join(File.separator, getConfDir(), "java-opts")); - if (javaOpts.isFile()) { - try (BufferedReader br = new BufferedReader(new InputStreamReader( - new FileInputStream(javaOpts), StandardCharsets.UTF_8))) { - String line; - while ((line = br.readLine()) != null) { - addOptionString(cmd, line); - } - } - } - - cmd.add("-cp"); - cmd.add(join(File.pathSeparator, buildClassPath(extraClassPath))); - return cmd; - } - - void addOptionString(List cmd, String options) { - if (!isEmpty(options)) { - for (String opt : parseOptionString(options)) { - cmd.add(opt); - } - } - } - - /** - * Builds the classpath for the application. Returns a list with one classpath entry per element; - * each entry is formatted in the way expected by java.net.URLClassLoader (more - * specifically, with trailing slashes for directories). - */ - List buildClassPath(String appClassPath) throws IOException { - String sparkHome = getSparkHome(); - - Set cp = new LinkedHashSet<>(); - addToClassPath(cp, appClassPath); - - addToClassPath(cp, getConfDir()); - - boolean prependClasses = !isEmpty(getenv("SPARK_PREPEND_CLASSES")); - boolean isTesting = "1".equals(getenv("SPARK_TESTING")); - if (prependClasses || isTesting) { - String scala = getScalaVersion(); - List projects = Arrays.asList( - "common/kvstore", - "common/network-common", - "common/network-shuffle", - "common/network-yarn", - "common/sketch", - "common/tags", - "common/unsafe", - "core", - "examples", - "graphx", - "launcher", - "mllib", - "repl", - "resource-managers/mesos", - "resource-managers/yarn", - "sql/catalyst", - "sql/core", - "sql/hive", - "sql/hive-thriftserver", - "streaming" - ); - if (prependClasses) { - if (!isTesting) { - System.err.println( - "NOTE: SPARK_PREPEND_CLASSES is set, placing locally compiled Spark classes ahead of " + - "assembly."); - } - for (String project : projects) { - addToClassPath(cp, String.format("%s/%s/target/scala-%s/classes", sparkHome, project, - scala)); - } - } - if (isTesting) { - for (String project : projects) { - addToClassPath(cp, String.format("%s/%s/target/scala-%s/test-classes", sparkHome, - project, scala)); - } - } - - // Add this path to include jars that are shaded in the final deliverable created during - // the maven build. These jars are copied to this directory during the build. - addToClassPath(cp, String.format("%s/core/target/jars/*", sparkHome)); - addToClassPath(cp, String.format("%s/mllib/target/jars/*", sparkHome)); - } - - // Add Spark jars to the classpath. For the testing case, we rely on the test code to set and - // propagate the test classpath appropriately. For normal invocation, look for the jars - // directory under SPARK_HOME. - boolean isTestingSql = "1".equals(getenv("SPARK_SQL_TESTING")); - String jarsDir = findJarsDir(getSparkHome(), getScalaVersion(), !isTesting && !isTestingSql); - if (jarsDir != null) { - addToClassPath(cp, join(File.separator, jarsDir, "*")); - } - - addToClassPath(cp, getenv("HADOOP_CONF_DIR")); - addToClassPath(cp, getenv("YARN_CONF_DIR")); - addToClassPath(cp, getenv("SPARK_DIST_CLASSPATH")); - return new ArrayList<>(cp); - } - - /** - * Adds entries to the classpath. - * - * @param cp List to which the new entries are appended. - * @param entries New classpath entries (separated by File.pathSeparator). - */ - private void addToClassPath(Set cp, String entries) { - if (isEmpty(entries)) { - return; - } - String[] split = entries.split(Pattern.quote(File.pathSeparator)); - for (String entry : split) { - if (!isEmpty(entry)) { - if (new File(entry).isDirectory() && !entry.endsWith(File.separator)) { - entry += File.separator; - } - cp.add(entry); - } - } - } - - String getScalaVersion() { - String scala = getenv("SPARK_SCALA_VERSION"); - if (scala != null) { - return scala; - } - String sparkHome = getSparkHome(); - // TODO: revisit for Scala 2.13 support - File scala212 = new File(sparkHome, "launcher/target/scala-2.12"); - // File scala211 = new File(sparkHome, "launcher/target/scala-2.11"); - // checkState(!scala212.isDirectory() || !scala211.isDirectory(), - // "Presence of build for multiple Scala versions detected.\n" + - // "Either clean one of them or set SPARK_SCALA_VERSION in your environment."); - // if (scala212.isDirectory()) { - // return "2.12"; - // } else { - // checkState(scala211.isDirectory(), "Cannot find any build directories."); - // return "2.11"; - // } - checkState(scala212.isDirectory(), "Cannot find any build directories."); - return "2.12"; - } - - String getSparkHome() { - String path = getenv(ENV_SPARK_HOME); - if (path == null && "1".equals(getenv("SPARK_TESTING"))) { - path = System.getProperty("spark.test.home"); - } - checkState(path != null, - "Spark home not found; set it explicitly or use the SPARK_HOME environment variable."); - return path; - } - - String getenv(String key) { - return firstNonEmpty(childEnv.get(key), System.getenv(key)); - } - - void setPropertiesFile(String path) { - effectiveConfig = null; - this.propertiesFile = path; - } - - Map getEffectiveConfig() throws IOException { - if (effectiveConfig == null) { - effectiveConfig = new HashMap<>(conf); - Properties p = loadPropertiesFile(); - for (String key : p.stringPropertyNames()) { - if (!effectiveConfig.containsKey(key)) { - effectiveConfig.put(key, p.getProperty(key)); - } - } - } - return effectiveConfig; - } - - /** - * Loads the configuration file for the application, if it exists. This is either the - * user-specified properties file, or the spark-defaults.conf file under the Spark configuration - * directory. - */ - private Properties loadPropertiesFile() throws IOException { - Properties props = new Properties(); - File propsFile; - if (propertiesFile != null) { - propsFile = new File(propertiesFile); - checkArgument(propsFile.isFile(), "Invalid properties file '%s'.", propertiesFile); - } else { - propsFile = new File(getConfDir(), DEFAULT_PROPERTIES_FILE); - } - - if (propsFile.isFile()) { - try (InputStreamReader isr = new InputStreamReader( - new FileInputStream(propsFile), StandardCharsets.UTF_8)) { - props.load(isr); - for (Map.Entry e : props.entrySet()) { - e.setValue(e.getValue().toString().trim()); - } - } - } - return props; - } - - private String getConfDir() { - String confDir = getenv("SPARK_CONF_DIR"); - return confDir != null ? confDir : join(File.separator, getSparkHome(), "conf"); - } - -} diff --git a/scripts/parseTestOutput.py b/scripts/parseTestOutput.py deleted file mode 100644 index e0d55e9a11b0..000000000000 --- a/scripts/parseTestOutput.py +++ /dev/null @@ -1,116 +0,0 @@ -### Script for generating test reports for OpcodeSuite tests (sql/core/src/test/scala/org/.../sql/OpcodeSuite.scala) -### "build/mvn -q -Dtest=none -DwildcardSuites=org.apache.spark.sql.OpcodeSuite test > test_results.txt" to execute tests -### "python scripts/parseTestOutput.py test_results.txt" to generate test report - -import os,sys - - -class Test(): - def __init__(self,testName,passing=True,fallback=False): - self.testName = testName - self.passing = passing - self.fallback = fallback - def set_passing(self,result): - self.passing = result - def set_fallback(self,result): - self.fallback = result - - -def getStart(thefile): - starttoken = "OpcodeSuite:" - for line in thefile: - if line.find(starttoken)==-1: - continue - else: - return - sys.exit("Couldn't find the start of OpcodeSuite results; tests were likely not executed due to a fatal error\n") - - -def getNextTest(thefile): - endtoken = "Run completed in" - testex_token = "EXECUTING TEST:" - for line in thefile: - if line.find(endtoken) != -1: - return -1 - if line.find(testex_token)==-1: - continue - else: - newTest = Test(line.split(':')[1].strip()) - return newTest - return -1 - - -def getTestResults(thefile,curr_test): - testend_token="TEST: *** END ***" - fallback_token = "UDF compilation failure:" - hardfail_token = "*** FAILED ***" - for line in thefile: - if line.find(testend_token)==-1: - pass - else: - return curr_test - if line.find(fallback_token)==-1: - pass - else: - curr_test.set_fallback(True) - if line.find(hardfail_token)==-1: - pass - else: - curr_test.set_passing(False) - return curr_test - - -def generateAndSaveReport(testList): - test_count=len(testList) - test_fail=0; test_pass=0; test_fallback=0 - test_fallback_pass=0; test_fallback_fail=0 - for mytest in testList: - if mytest.fallback==True: - test_fallback+=1 - if mytest.passing==True: - test_pass+=1 - test_fallback_pass+=1 - else: - test_fail+=1 - test_fallback_pass+=1 - else: - if mytest.passing==True: - test_pass+=1 - else: - test_fail+=1 - - with open("test_report.txt",'w+') as myout: - myout.write("*** Total number of tests: {}\n".format(test_count)) - myout.write("*** Total number of tests passing: {}\n".format(test_pass)) - myout.write("*** Total number of tests failing: {}\n".format(test_fail)) - myout.write("*** Total number of tests falling back to JVM execution: {}\n".format(test_fallback)) - myout.write("*** Total number of tests falling back AND passing: {}\n".format(test_fallback_pass)) - myout.write("*** Total number of tests falling back AND failing: {}\n".format(test_fallback_fail)) - myout.write("\n\n\n\n\n\n") - myout.write("INDIVIDUAL TEST INFORMATION BELOW:\n\n") - for mytest in testList: - myout.write("TEST NAME: {}\n".format(mytest.testName)) - myout.write("TEST PASSING? {}\n".format(mytest.passing)) - myout.write("TEST FALLING BACK? {}\n".format(mytest.fallback)) - myout.write("\n\n") - -def main(): - try: - open(sys.argv[1],'r') - except IOError: - print("Cannot read file: {}\n".format(sys.argv[1])) - - with open(sys.argv[1],'r') as myfile: - testList=[] - getStart(myfile) - while(1): - curr_test=getNextTest(myfile) - if curr_test==-1: - break - testList.append(getTestResults(myfile,curr_test)) - generateAndSaveReport(testList) - - -if __name__=="__main__": - main() - diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml deleted file mode 100644 index 33a22d190fa7..000000000000 --- a/sql/catalyst/pom.xml +++ /dev/null @@ -1,185 +0,0 @@ - - - - - 4.0.0 - - org.apache.spark - spark-parent_2.12 - 3.0.0-SNAPSHOT - ../../pom.xml - - - spark-catalyst_2.12 - jar - Spark Project Catalyst - http://spark.apache.org/ - - catalyst - - - - - org.scala-lang - scala-reflect - - - org.scala-lang.modules - scala-parser-combinators_${scala.binary.version} - - - - org.apache.spark - spark-core_${scala.binary.version} - ${project.version} - - - org.apache.spark - spark-core_${scala.binary.version} - ${project.version} - test-jar - test - - - org.apache.spark - spark-tags_${scala.binary.version} - - - - - org.apache.spark - spark-tags_${scala.binary.version} - test-jar - test - - - org.mockito - mockito-core - test - - - - org.apache.spark - spark-unsafe_${scala.binary.version} - ${project.version} - - - org.apache.spark - spark-sketch_${scala.binary.version} - ${project.version} - - - org.scalacheck - scalacheck_${scala.binary.version} - test - - - org.codehaus.janino - janino - - - org.codehaus.janino - commons-compiler - - - org.antlr - antlr4-runtime - - - commons-codec - commons-codec - - - com.univocity - univocity-parsers - 2.7.3 - jar - - - org.apache.arrow - arrow-vector - - - - target/scala-${scala.binary.version}/classes - target/scala-${scala.binary.version}/test-classes - - - - org.apache.maven.plugins - maven-jar-plugin - - - prepare-test-jar - test-compile - - test-jar - - - - - - org.scalatest - scalatest-maven-plugin - - -ea -Xmx4g -Xss4m -XX:ReservedCodeCacheSize=${CodeCacheSize} - - - - org.antlr - antlr4-maven-plugin - - - - antlr4 - - - - - true - ../catalyst/src/main/antlr4 - true - - - - - - - - scala-2.13 - - - org.scala-lang.modules - scala-parallel-collections_${scala.binary.version} - - - - - diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala deleted file mode 100644 index 7513153e7366..000000000000 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala +++ /dev/null @@ -1,1764 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.catalyst.expressions - -import org.apache.spark.SparkException -import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow, ScalaReflection} -import org.apache.spark.sql.catalyst.expressions.codegen._ -import org.apache.spark.sql.catalyst.expressions.codegen.Block._ -import org.apache.spark.sql.types.{AbstractDataType, DataType} - -/** - * User-defined function. - * @param function The user defined scala function to run. - * Note that if you use primitive parameters, you are not able to check if it is - * null or not, and the UDF will return null for you if the primitive input is - * null. Use boxed type or [[Option]] if you wanna do the null-handling yourself. - * @param dataType Return type of function. - * @param children The input expressions of this UDF. - * @param inputPrimitives The analyzer should be aware of Scala primitive types so as to make the - * UDF return null if there is any null input value of these types. On the - * other hand, Java UDFs can only have boxed types, thus this parameter will - * always be all false. - * @param inputTypes The expected input types of this UDF, used to perform type coercion. If we do - * not want to perform coercion, simply use "Nil". Note that it would've been - * better to use Option of Seq[DataType] so we can use "None" as the case for no - * type coercion. However, that would require more refactoring of the codebase. - * @param udfName The user-specified name of this UDF. - * @param nullable True if the UDF can return null value. - * @param udfDeterministic True if the UDF is deterministic. Deterministic UDF returns same result - * each time it is invoked with a particular input. - */ -case class ScalaUDF( - function: AnyRef, - dataType: DataType, - children: Seq[Expression], - inputPrimitives: Seq[Boolean], - inputTypes: Seq[AbstractDataType] = Nil, - udfName: Option[String] = None, - nullable: Boolean = true, - udfDeterministic: Boolean = true) - extends Expression with NonSQLExpression with UserDefinedExpression { - - override lazy val deterministic: Boolean = udfDeterministic && children.forall(_.deterministic) - - override def toString: String = s"${udfName.getOrElse("UDF")}(${children.mkString(", ")})" - - // scalastyle:off line.size.limit - - /** This method has been generated by this script - - (1 to 22).map { x => - val anys = (1 to x).map(x => "Any").reduce(_ + ", " + _) - val childs = (0 to x - 1).map(x => s"val child$x = children($x)").reduce(_ + "\n " + _) - val converters = (0 to x - 1).map(x => s"lazy val converter$x = CatalystTypeConverters.createToScalaConverter(child$x.dataType)").reduce(_ + "\n " + _) - val evals = (0 to x - 1).map(x => s"converter$x(child$x.eval(input))").reduce(_ + ",\n " + _) - - s"""case $x => - val func = function.asInstanceOf[($anys) => Any] - $childs - $converters - (input: InternalRow) => { - func( - $evals) - } - """ - }.foreach(println) - - */ - private[this] val f = children.size match { - case 0 => - val func = function.asInstanceOf[() => Any] - (input: InternalRow) => { - func() - } - - case 1 => - val func = function.asInstanceOf[(Any) => Any] - val child0 = children(0) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input))) - } - - case 2 => - val func = function.asInstanceOf[(Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input))) - } - - case 3 => - val func = function.asInstanceOf[(Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input))) - } - - case 4 => - val func = function.asInstanceOf[(Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input))) - } - - case 5 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input))) - } - - case 6 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input))) - } - - case 7 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input))) - } - - case 8 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - val child7 = children(7) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - lazy val converter7 = CatalystTypeConverters.createToScalaConverter(child7.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input)), - converter7(child7.eval(input))) - } - - case 9 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - val child7 = children(7) - val child8 = children(8) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - lazy val converter7 = CatalystTypeConverters.createToScalaConverter(child7.dataType) - lazy val converter8 = CatalystTypeConverters.createToScalaConverter(child8.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input)), - converter7(child7.eval(input)), - converter8(child8.eval(input))) - } - - case 10 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - val child7 = children(7) - val child8 = children(8) - val child9 = children(9) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - lazy val converter7 = CatalystTypeConverters.createToScalaConverter(child7.dataType) - lazy val converter8 = CatalystTypeConverters.createToScalaConverter(child8.dataType) - lazy val converter9 = CatalystTypeConverters.createToScalaConverter(child9.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input)), - converter7(child7.eval(input)), - converter8(child8.eval(input)), - converter9(child9.eval(input))) - } - - case 11 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - val child7 = children(7) - val child8 = children(8) - val child9 = children(9) - val child10 = children(10) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - lazy val converter7 = CatalystTypeConverters.createToScalaConverter(child7.dataType) - lazy val converter8 = CatalystTypeConverters.createToScalaConverter(child8.dataType) - lazy val converter9 = CatalystTypeConverters.createToScalaConverter(child9.dataType) - lazy val converter10 = CatalystTypeConverters.createToScalaConverter(child10.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input)), - converter7(child7.eval(input)), - converter8(child8.eval(input)), - converter9(child9.eval(input)), - converter10(child10.eval(input))) - } - - case 12 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - val child7 = children(7) - val child8 = children(8) - val child9 = children(9) - val child10 = children(10) - val child11 = children(11) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - lazy val converter7 = CatalystTypeConverters.createToScalaConverter(child7.dataType) - lazy val converter8 = CatalystTypeConverters.createToScalaConverter(child8.dataType) - lazy val converter9 = CatalystTypeConverters.createToScalaConverter(child9.dataType) - lazy val converter10 = CatalystTypeConverters.createToScalaConverter(child10.dataType) - lazy val converter11 = CatalystTypeConverters.createToScalaConverter(child11.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input)), - converter7(child7.eval(input)), - converter8(child8.eval(input)), - converter9(child9.eval(input)), - converter10(child10.eval(input)), - converter11(child11.eval(input))) - } - - case 13 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - val child7 = children(7) - val child8 = children(8) - val child9 = children(9) - val child10 = children(10) - val child11 = children(11) - val child12 = children(12) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - lazy val converter7 = CatalystTypeConverters.createToScalaConverter(child7.dataType) - lazy val converter8 = CatalystTypeConverters.createToScalaConverter(child8.dataType) - lazy val converter9 = CatalystTypeConverters.createToScalaConverter(child9.dataType) - lazy val converter10 = CatalystTypeConverters.createToScalaConverter(child10.dataType) - lazy val converter11 = CatalystTypeConverters.createToScalaConverter(child11.dataType) - lazy val converter12 = CatalystTypeConverters.createToScalaConverter(child12.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input)), - converter7(child7.eval(input)), - converter8(child8.eval(input)), - converter9(child9.eval(input)), - converter10(child10.eval(input)), - converter11(child11.eval(input)), - converter12(child12.eval(input))) - } - - case 14 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - val child7 = children(7) - val child8 = children(8) - val child9 = children(9) - val child10 = children(10) - val child11 = children(11) - val child12 = children(12) - val child13 = children(13) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - lazy val converter7 = CatalystTypeConverters.createToScalaConverter(child7.dataType) - lazy val converter8 = CatalystTypeConverters.createToScalaConverter(child8.dataType) - lazy val converter9 = CatalystTypeConverters.createToScalaConverter(child9.dataType) - lazy val converter10 = CatalystTypeConverters.createToScalaConverter(child10.dataType) - lazy val converter11 = CatalystTypeConverters.createToScalaConverter(child11.dataType) - lazy val converter12 = CatalystTypeConverters.createToScalaConverter(child12.dataType) - lazy val converter13 = CatalystTypeConverters.createToScalaConverter(child13.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input)), - converter7(child7.eval(input)), - converter8(child8.eval(input)), - converter9(child9.eval(input)), - converter10(child10.eval(input)), - converter11(child11.eval(input)), - converter12(child12.eval(input)), - converter13(child13.eval(input))) - } - - case 15 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - val child7 = children(7) - val child8 = children(8) - val child9 = children(9) - val child10 = children(10) - val child11 = children(11) - val child12 = children(12) - val child13 = children(13) - val child14 = children(14) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - lazy val converter7 = CatalystTypeConverters.createToScalaConverter(child7.dataType) - lazy val converter8 = CatalystTypeConverters.createToScalaConverter(child8.dataType) - lazy val converter9 = CatalystTypeConverters.createToScalaConverter(child9.dataType) - lazy val converter10 = CatalystTypeConverters.createToScalaConverter(child10.dataType) - lazy val converter11 = CatalystTypeConverters.createToScalaConverter(child11.dataType) - lazy val converter12 = CatalystTypeConverters.createToScalaConverter(child12.dataType) - lazy val converter13 = CatalystTypeConverters.createToScalaConverter(child13.dataType) - lazy val converter14 = CatalystTypeConverters.createToScalaConverter(child14.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input)), - converter7(child7.eval(input)), - converter8(child8.eval(input)), - converter9(child9.eval(input)), - converter10(child10.eval(input)), - converter11(child11.eval(input)), - converter12(child12.eval(input)), - converter13(child13.eval(input)), - converter14(child14.eval(input))) - } - - case 16 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - val child7 = children(7) - val child8 = children(8) - val child9 = children(9) - val child10 = children(10) - val child11 = children(11) - val child12 = children(12) - val child13 = children(13) - val child14 = children(14) - val child15 = children(15) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - lazy val converter7 = CatalystTypeConverters.createToScalaConverter(child7.dataType) - lazy val converter8 = CatalystTypeConverters.createToScalaConverter(child8.dataType) - lazy val converter9 = CatalystTypeConverters.createToScalaConverter(child9.dataType) - lazy val converter10 = CatalystTypeConverters.createToScalaConverter(child10.dataType) - lazy val converter11 = CatalystTypeConverters.createToScalaConverter(child11.dataType) - lazy val converter12 = CatalystTypeConverters.createToScalaConverter(child12.dataType) - lazy val converter13 = CatalystTypeConverters.createToScalaConverter(child13.dataType) - lazy val converter14 = CatalystTypeConverters.createToScalaConverter(child14.dataType) - lazy val converter15 = CatalystTypeConverters.createToScalaConverter(child15.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input)), - converter7(child7.eval(input)), - converter8(child8.eval(input)), - converter9(child9.eval(input)), - converter10(child10.eval(input)), - converter11(child11.eval(input)), - converter12(child12.eval(input)), - converter13(child13.eval(input)), - converter14(child14.eval(input)), - converter15(child15.eval(input))) - } - - case 17 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - val child7 = children(7) - val child8 = children(8) - val child9 = children(9) - val child10 = children(10) - val child11 = children(11) - val child12 = children(12) - val child13 = children(13) - val child14 = children(14) - val child15 = children(15) - val child16 = children(16) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - lazy val converter7 = CatalystTypeConverters.createToScalaConverter(child7.dataType) - lazy val converter8 = CatalystTypeConverters.createToScalaConverter(child8.dataType) - lazy val converter9 = CatalystTypeConverters.createToScalaConverter(child9.dataType) - lazy val converter10 = CatalystTypeConverters.createToScalaConverter(child10.dataType) - lazy val converter11 = CatalystTypeConverters.createToScalaConverter(child11.dataType) - lazy val converter12 = CatalystTypeConverters.createToScalaConverter(child12.dataType) - lazy val converter13 = CatalystTypeConverters.createToScalaConverter(child13.dataType) - lazy val converter14 = CatalystTypeConverters.createToScalaConverter(child14.dataType) - lazy val converter15 = CatalystTypeConverters.createToScalaConverter(child15.dataType) - lazy val converter16 = CatalystTypeConverters.createToScalaConverter(child16.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input)), - converter7(child7.eval(input)), - converter8(child8.eval(input)), - converter9(child9.eval(input)), - converter10(child10.eval(input)), - converter11(child11.eval(input)), - converter12(child12.eval(input)), - converter13(child13.eval(input)), - converter14(child14.eval(input)), - converter15(child15.eval(input)), - converter16(child16.eval(input))) - } - - case 18 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - val child7 = children(7) - val child8 = children(8) - val child9 = children(9) - val child10 = children(10) - val child11 = children(11) - val child12 = children(12) - val child13 = children(13) - val child14 = children(14) - val child15 = children(15) - val child16 = children(16) - val child17 = children(17) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - lazy val converter7 = CatalystTypeConverters.createToScalaConverter(child7.dataType) - lazy val converter8 = CatalystTypeConverters.createToScalaConverter(child8.dataType) - lazy val converter9 = CatalystTypeConverters.createToScalaConverter(child9.dataType) - lazy val converter10 = CatalystTypeConverters.createToScalaConverter(child10.dataType) - lazy val converter11 = CatalystTypeConverters.createToScalaConverter(child11.dataType) - lazy val converter12 = CatalystTypeConverters.createToScalaConverter(child12.dataType) - lazy val converter13 = CatalystTypeConverters.createToScalaConverter(child13.dataType) - lazy val converter14 = CatalystTypeConverters.createToScalaConverter(child14.dataType) - lazy val converter15 = CatalystTypeConverters.createToScalaConverter(child15.dataType) - lazy val converter16 = CatalystTypeConverters.createToScalaConverter(child16.dataType) - lazy val converter17 = CatalystTypeConverters.createToScalaConverter(child17.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input)), - converter7(child7.eval(input)), - converter8(child8.eval(input)), - converter9(child9.eval(input)), - converter10(child10.eval(input)), - converter11(child11.eval(input)), - converter12(child12.eval(input)), - converter13(child13.eval(input)), - converter14(child14.eval(input)), - converter15(child15.eval(input)), - converter16(child16.eval(input)), - converter17(child17.eval(input))) - } - - case 19 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - val child7 = children(7) - val child8 = children(8) - val child9 = children(9) - val child10 = children(10) - val child11 = children(11) - val child12 = children(12) - val child13 = children(13) - val child14 = children(14) - val child15 = children(15) - val child16 = children(16) - val child17 = children(17) - val child18 = children(18) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - lazy val converter7 = CatalystTypeConverters.createToScalaConverter(child7.dataType) - lazy val converter8 = CatalystTypeConverters.createToScalaConverter(child8.dataType) - lazy val converter9 = CatalystTypeConverters.createToScalaConverter(child9.dataType) - lazy val converter10 = CatalystTypeConverters.createToScalaConverter(child10.dataType) - lazy val converter11 = CatalystTypeConverters.createToScalaConverter(child11.dataType) - lazy val converter12 = CatalystTypeConverters.createToScalaConverter(child12.dataType) - lazy val converter13 = CatalystTypeConverters.createToScalaConverter(child13.dataType) - lazy val converter14 = CatalystTypeConverters.createToScalaConverter(child14.dataType) - lazy val converter15 = CatalystTypeConverters.createToScalaConverter(child15.dataType) - lazy val converter16 = CatalystTypeConverters.createToScalaConverter(child16.dataType) - lazy val converter17 = CatalystTypeConverters.createToScalaConverter(child17.dataType) - lazy val converter18 = CatalystTypeConverters.createToScalaConverter(child18.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input)), - converter7(child7.eval(input)), - converter8(child8.eval(input)), - converter9(child9.eval(input)), - converter10(child10.eval(input)), - converter11(child11.eval(input)), - converter12(child12.eval(input)), - converter13(child13.eval(input)), - converter14(child14.eval(input)), - converter15(child15.eval(input)), - converter16(child16.eval(input)), - converter17(child17.eval(input)), - converter18(child18.eval(input))) - } - - case 20 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - val child7 = children(7) - val child8 = children(8) - val child9 = children(9) - val child10 = children(10) - val child11 = children(11) - val child12 = children(12) - val child13 = children(13) - val child14 = children(14) - val child15 = children(15) - val child16 = children(16) - val child17 = children(17) - val child18 = children(18) - val child19 = children(19) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - lazy val converter7 = CatalystTypeConverters.createToScalaConverter(child7.dataType) - lazy val converter8 = CatalystTypeConverters.createToScalaConverter(child8.dataType) - lazy val converter9 = CatalystTypeConverters.createToScalaConverter(child9.dataType) - lazy val converter10 = CatalystTypeConverters.createToScalaConverter(child10.dataType) - lazy val converter11 = CatalystTypeConverters.createToScalaConverter(child11.dataType) - lazy val converter12 = CatalystTypeConverters.createToScalaConverter(child12.dataType) - lazy val converter13 = CatalystTypeConverters.createToScalaConverter(child13.dataType) - lazy val converter14 = CatalystTypeConverters.createToScalaConverter(child14.dataType) - lazy val converter15 = CatalystTypeConverters.createToScalaConverter(child15.dataType) - lazy val converter16 = CatalystTypeConverters.createToScalaConverter(child16.dataType) - lazy val converter17 = CatalystTypeConverters.createToScalaConverter(child17.dataType) - lazy val converter18 = CatalystTypeConverters.createToScalaConverter(child18.dataType) - lazy val converter19 = CatalystTypeConverters.createToScalaConverter(child19.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input)), - converter7(child7.eval(input)), - converter8(child8.eval(input)), - converter9(child9.eval(input)), - converter10(child10.eval(input)), - converter11(child11.eval(input)), - converter12(child12.eval(input)), - converter13(child13.eval(input)), - converter14(child14.eval(input)), - converter15(child15.eval(input)), - converter16(child16.eval(input)), - converter17(child17.eval(input)), - converter18(child18.eval(input)), - converter19(child19.eval(input))) - } - - case 21 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - val child7 = children(7) - val child8 = children(8) - val child9 = children(9) - val child10 = children(10) - val child11 = children(11) - val child12 = children(12) - val child13 = children(13) - val child14 = children(14) - val child15 = children(15) - val child16 = children(16) - val child17 = children(17) - val child18 = children(18) - val child19 = children(19) - val child20 = children(20) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - lazy val converter7 = CatalystTypeConverters.createToScalaConverter(child7.dataType) - lazy val converter8 = CatalystTypeConverters.createToScalaConverter(child8.dataType) - lazy val converter9 = CatalystTypeConverters.createToScalaConverter(child9.dataType) - lazy val converter10 = CatalystTypeConverters.createToScalaConverter(child10.dataType) - lazy val converter11 = CatalystTypeConverters.createToScalaConverter(child11.dataType) - lazy val converter12 = CatalystTypeConverters.createToScalaConverter(child12.dataType) - lazy val converter13 = CatalystTypeConverters.createToScalaConverter(child13.dataType) - lazy val converter14 = CatalystTypeConverters.createToScalaConverter(child14.dataType) - lazy val converter15 = CatalystTypeConverters.createToScalaConverter(child15.dataType) - lazy val converter16 = CatalystTypeConverters.createToScalaConverter(child16.dataType) - lazy val converter17 = CatalystTypeConverters.createToScalaConverter(child17.dataType) - lazy val converter18 = CatalystTypeConverters.createToScalaConverter(child18.dataType) - lazy val converter19 = CatalystTypeConverters.createToScalaConverter(child19.dataType) - lazy val converter20 = CatalystTypeConverters.createToScalaConverter(child20.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input)), - converter7(child7.eval(input)), - converter8(child8.eval(input)), - converter9(child9.eval(input)), - converter10(child10.eval(input)), - converter11(child11.eval(input)), - converter12(child12.eval(input)), - converter13(child13.eval(input)), - converter14(child14.eval(input)), - converter15(child15.eval(input)), - converter16(child16.eval(input)), - converter17(child17.eval(input)), - converter18(child18.eval(input)), - converter19(child19.eval(input)), - converter20(child20.eval(input))) - } - - case 22 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - val child7 = children(7) - val child8 = children(8) - val child9 = children(9) - val child10 = children(10) - val child11 = children(11) - val child12 = children(12) - val child13 = children(13) - val child14 = children(14) - val child15 = children(15) - val child16 = children(16) - val child17 = children(17) - val child18 = children(18) - val child19 = children(19) - val child20 = children(20) - val child21 = children(21) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - lazy val converter7 = CatalystTypeConverters.createToScalaConverter(child7.dataType) - lazy val converter8 = CatalystTypeConverters.createToScalaConverter(child8.dataType) - lazy val converter9 = CatalystTypeConverters.createToScalaConverter(child9.dataType) - lazy val converter10 = CatalystTypeConverters.createToScalaConverter(child10.dataType) - lazy val converter11 = CatalystTypeConverters.createToScalaConverter(child11.dataType) - lazy val converter12 = CatalystTypeConverters.createToScalaConverter(child12.dataType) - lazy val converter13 = CatalystTypeConverters.createToScalaConverter(child13.dataType) - lazy val converter14 = CatalystTypeConverters.createToScalaConverter(child14.dataType) - lazy val converter15 = CatalystTypeConverters.createToScalaConverter(child15.dataType) - lazy val converter16 = CatalystTypeConverters.createToScalaConverter(child16.dataType) - lazy val converter17 = CatalystTypeConverters.createToScalaConverter(child17.dataType) - lazy val converter18 = CatalystTypeConverters.createToScalaConverter(child18.dataType) - lazy val converter19 = CatalystTypeConverters.createToScalaConverter(child19.dataType) - lazy val converter20 = CatalystTypeConverters.createToScalaConverter(child20.dataType) - lazy val converter21 = CatalystTypeConverters.createToScalaConverter(child21.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input)), - converter7(child7.eval(input)), - converter8(child8.eval(input)), - converter9(child9.eval(input)), - converter10(child10.eval(input)), - converter11(child11.eval(input)), - converter12(child12.eval(input)), - converter13(child13.eval(input)), - converter14(child14.eval(input)), - converter15(child15.eval(input)), - converter16(child16.eval(input)), - converter17(child17.eval(input)), - converter18(child18.eval(input)), - converter19(child19.eval(input)), - converter20(child20.eval(input)), - converter21(child21.eval(input))) - } - } - - // scalastyle:on line.size.limit - override def doGenCode( - ctx: CodegenContext, - ev: ExprCode): ExprCode = { - val converterClassName = classOf[Any => Any].getName - - // The type converters for inputs and the result. - val converters: Array[Any => Any] = children.map { c => - CatalystTypeConverters.createToScalaConverter(c.dataType) - }.toArray :+ CatalystTypeConverters.createToCatalystConverter(dataType) - val convertersTerm = ctx.addReferenceObj("converters", converters, s"$converterClassName[]") - val errorMsgTerm = ctx.addReferenceObj("errMsg", udfErrorMessage) - val resultTerm = ctx.freshName("result") - - // codegen for children expressions - val evals = children.map(_.genCode(ctx)) - - // Generate the codes for expressions and calling user-defined function - // We need to get the boxedType of dataType's javaType here. Because for the dataType - // such as IntegerType, its javaType is `int` and the returned type of user-defined - // function is Object. Trying to convert an Object to `int` will cause casting exception. - val evalCode = evals.map(_.code).mkString("\n") - val (funcArgs, initArgs) = evals.zipWithIndex.zip(children.map(_.dataType)).map { - case ((eval, i), dt) => - val argTerm = ctx.freshName("arg") - val initArg = if (CatalystTypeConverters.isPrimitive(dt)) { - val convertedTerm = ctx.freshName("conv") - s""" - |${CodeGenerator.boxedType(dt)} $convertedTerm = ${eval.value}; - |Object $argTerm = ${eval.isNull} ? null : $convertedTerm; - """.stripMargin - } else { - s"Object $argTerm = ${eval.isNull} ? null : $convertersTerm[$i].apply(${eval.value});" - } - (argTerm, initArg) - }.unzip - - val udf = ctx.addReferenceObj("udf", function, s"scala.Function${children.length}") - val getFuncResult = s"$udf.apply(${funcArgs.mkString(", ")})" - val resultConverter = s"$convertersTerm[${children.length}]" - val boxedType = CodeGenerator.boxedType(dataType) - - val funcInvokation = if (CatalystTypeConverters.isPrimitive(dataType) - // If the output is nullable, the returned value must be unwrapped from the Option - && !nullable) { - s"$resultTerm = ($boxedType)$getFuncResult" - } else { - s"$resultTerm = ($boxedType)$resultConverter.apply($getFuncResult)" - } - val callFunc = - s""" - |$boxedType $resultTerm = null; - |try { - | $funcInvokation; - |} catch (Exception e) { - | throw new org.apache.spark.SparkException($errorMsgTerm, e); - |} - """.stripMargin - - ev.copy(code = - code""" - |$evalCode - |${initArgs.mkString("\n")} - |$callFunc - | - |boolean ${ev.isNull} = $resultTerm == null; - |${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)}; - |if (!${ev.isNull}) { - | ${ev.value} = $resultTerm; - |} - """.stripMargin) - } - - private[this] val resultConverter = CatalystTypeConverters.createToCatalystConverter(dataType) - - lazy val udfErrorMessage = { - val funcCls = function.getClass.getSimpleName - val inputTypes = children.map(_.dataType.catalogString).mkString(", ") - val outputType = dataType.catalogString - s"Failed to execute user defined function($funcCls: ($inputTypes) => $outputType)" - } - - override def eval(input: InternalRow): Any = { - val result = try { - f(input) - } catch { - case e: Exception => - throw new SparkException(udfErrorMessage, e) - } - - resultConverter(result) - } - - val expr = { - try { - CatalystExpressionBuilder(function)(children) - } catch { - case e: SparkException => - // scalastyle:off println - System.err.println("UDF compilation failure: " + e) - None - // scalastyle:on println - } - } -} - -case class CatalystExpressionBuilder(private val function: AnyRef) { - - import java.lang.invoke.SerializedLambda - import javassist.ClassClassPath - import javassist.ClassPool - import javassist.CtClass - import javassist.CtField - import javassist.CtMethod - import javassist.bytecode.CodeIterator - import javassist.bytecode.ConstPool - import javassist.bytecode.Descriptor - import javassist.bytecode.Opcode - import org.apache.spark.sql.types._ - import org.apache.spark.util.Utils.classForName - import scala.annotation.tailrec - import scala.collection.immutable.IntMap - import scala.collection.immutable.SortedMap - import scala.collection.immutable.SortedSet - - final private val lambdaReflection = LambdaReflection(function) - final private val cfg = CFG(lambdaReflection) - - def apply(children: Seq[Expression]): Option[Expression] = { - val entryState = State(lambdaReflection, children) - val entryBlock = cfg.basicBlocks.head - apply(List(entryBlock), Map(entryBlock -> entryState)) - } - - @tailrec - private def apply( - worklist: List[BB], - states: Map[BB, State], - pending: Map[BB, Int] = cfg.pred.mapValues { v => v.size }, - visited: Set[BB] = Set()): Option[Expression] = { - val basicBlock::rest = worklist - val state = states(basicBlock) - val newState = basicBlock.instructionTable.foldLeft(state) { (st, i) => - i._2(basicBlock, st) - } - val newStates = basicBlock.propagateCond(states + (basicBlock -> newState)) - if (basicBlock.lastInstruction.isReturn) { - newStates(basicBlock).expr - } else { - val newVisited = visited + basicBlock - val (readySucc, newPending) = - cfg.succ(basicBlock).foldLeft((List[BB](), pending)) { case (x@(r, np), s) => - if (newVisited(s)) { - x - } else { - val count = np(s) - 1 - if (count > 0) (r, np + (s -> count)) else (s::r, np - s) - } - } - apply( - readySucc:::rest, - newStates, - newPending, - newVisited) - } - } - - @tailrec - private def simplify(expr: Expression): Expression = { - def simplifyExpr(expr: Expression): Expression = { - expr match { - case And(Literal.TrueLiteral, c) => simplifyExpr(c) - case And(c, Literal.TrueLiteral) => simplifyExpr(c) - case And(Literal.FalseLiteral, c) => Literal.FalseLiteral - case And(c1@LessThan(s1, Literal(v1, t1)), - c2@LessThan(s2, Literal(v2, t2))) if s1 == s2 && t1 == t2 => { - t1 match { - case DoubleType => - if (v1.asInstanceOf[Double] < v2.asInstanceOf[Double]) { - c1 - } else { - c2 - } - case _ => expr - } - } - case And(c1@LessThanOrEqual(s1, Literal(v1, t1)), - c2@LessThanOrEqual(s2, Literal(v2, t2))) if s1 == s2 && t1 == t2 => { - t1 match { - case DoubleType => - if (v1.asInstanceOf[Double] < v2.asInstanceOf[Double]) { - c1 - } else { - c2 - } - case _ => expr - } - } - case And(c1@LessThanOrEqual(s1, Literal(v1, t1)), - c2@LessThan(s2, Literal(v2, t2))) if s1 == s2 && t1 == t2 => { - t1 match { - case DoubleType => - if (v1.asInstanceOf[Double] < v2.asInstanceOf[Double]) { - c1 - } else { - c2 - } - case _ => expr - } - } - case And(c1@GreaterThan(s1, Literal(v1, t1)), - c2@GreaterThan(s2, Literal(v2, t2))) if s1 == s2 && t1 == t2 => { - t1 match { - case DoubleType => - if (v1.asInstanceOf[Double] > v2.asInstanceOf[Double]) { - c1 - } else { - c2 - } - case _ => expr - } - } - case And(c1@GreaterThan(s1, Literal(v1, t1)), - c2@GreaterThanOrEqual(s2, Literal(v2, t2))) if s1 == s2 && t1 == t2 => { - t1 match { - case DoubleType => - if (v1.asInstanceOf[Double] >= v2.asInstanceOf[Double]) { - c1 - } else { - c2 - } - case _ => expr - } - } - case And(c1, c2) => And(simplifyExpr(c1), simplifyExpr(c2)) - case Or(Literal.TrueLiteral, c) => Literal.TrueLiteral - case Or(Literal.FalseLiteral, c) => simplifyExpr(c) - case Or(c, Literal.FalseLiteral) => simplifyExpr(c) - case Or(c1@GreaterThan(s1, Literal(v1, t1)), - c2@GreaterThanOrEqual(s2, Literal(v2, t2))) if s1 == s2 && t1 == t2 => { - t1 match { - case DoubleType => - if (v1.asInstanceOf[Double] < v2.asInstanceOf[Double]) { - c1 - } else { - c2 - } - case _ => - expr - } - } - case Or(c1, c2) => Or(simplifyExpr(c1), simplifyExpr(c2)) - case Not(Literal.TrueLiteral) => Literal.FalseLiteral - case Not(Literal.FalseLiteral) => Literal.TrueLiteral - case Not(LessThan(c1, c2)) => GreaterThanOrEqual(c1, c2) - case Not(LessThanOrEqual(c1, c2)) => GreaterThan(c1, c2) - case Not(GreaterThan(c1, c2)) => LessThanOrEqual(c1, c2) - case Not(GreaterThanOrEqual(c1, c2)) => LessThan(c1, c2) - case EqualTo(Literal(v1, _), Literal(v2, _)) => - if (v1 == v2) Literal.TrueLiteral else Literal.FalseLiteral - case LessThan(If(c1, - Literal(1, _), - If(c2, - Literal(-1, _), - Literal(0, _))), - Literal(0, _)) => simplifyExpr(And(Not(c1), c2)) - case LessThanOrEqual(If(c1, - Literal(1, _), - If(c2, - Literal(-1, _), - Literal(0, _))), - Literal(0, _)) => simplifyExpr(Not(c1)) - case GreaterThan(If(c1, - Literal(1, _), - If(c2, - Literal(-1, _), - Literal(0, _))), - Literal(0, _)) => c1 - case GreaterThanOrEqual(If(c1, - Literal(1, _), - If(c2, - Literal(-1, _), - Literal(0, _))), - Literal(0, _)) => simplifyExpr(Or(c1, Not(c2))) - case EqualTo(If(c1, - Literal(1, _), - If(c2, - Literal(-1, _), - Literal(0, _))), - Literal(0, _)) => simplifyExpr(And(Not(c1), Not(c2))) - case If(c, t, f) if t == f => t - case _ => expr - } - } - val simplifiedExpr = simplifyExpr(expr) - if (simplifiedExpr == expr) simplifiedExpr else simplify(simplifiedExpr) - } - - // - // State - // - case class State( - val locals: Array[Expression], - val stack: List[Expression] = List(), - val cond: Expression = Literal.TrueLiteral, - val expr: Option[Expression] = None) { - def +(that: Option[State]): State = { - that match { - case Some(s) => addConditional(s) - case None => this - } - } - - private def addConditional(that: State): State = { - val combine: ((Expression, Expression)) => - Expression = { case (l1, l2) => simplify(If(cond, l1, l2)) } - that.copy(locals = locals.zip(that.locals).map(combine), - stack = stack.zip(that.stack).map(combine), - cond = simplify(Or(that.cond, cond))) - } - } - object State { - def apply( - lambdaReflection: LambdaReflection, - children: Seq[Expression]): State = { - val max = lambdaReflection.maxLocals - val params = lambdaReflection.parameters.view.zip(children) - val (locals, _) = params.foldLeft((new Array[Expression](max), 0)) { (l, p) => - val (locals, index) = l - val (param, arg) = p - val newIndex = { - if (param == CtClass.doubleType || param == CtClass.longType) { - index + 2 - } else { - index + 1 - } - } - (locals.updated(index, arg), newIndex) - } - State(locals) - } - } - - // - // CFG - // - case class Instruction(val opcode: Int, operand: Int) { - def apply(basicBlock: BB, state: State): State = { - opcode match { - case Opcode.ALOAD_0 | Opcode.DLOAD_0 | Opcode.FLOAD_0 | - Opcode.ILOAD_0 | Opcode.LLOAD_0 => load(state, 0) - case Opcode.ALOAD_1 | Opcode.DLOAD_1 | Opcode.FLOAD_1 | - Opcode.ILOAD_1 | Opcode.LLOAD_1 => load(state, 1) - case Opcode.ALOAD_2 | Opcode.DLOAD_2 | Opcode.FLOAD_2 | - Opcode.ILOAD_2 | Opcode.LLOAD_2 => load(state, 2) - case Opcode.ALOAD_3 | Opcode.DLOAD_3 | Opcode.FLOAD_3 | - Opcode.ILOAD_3 | Opcode.LLOAD_3 => load(state, 3) - case Opcode.ALOAD | Opcode.DLOAD | Opcode.FLOAD | - Opcode.ILOAD | Opcode.LLOAD => load(state, operand) - case Opcode.ASTORE_0 | Opcode.DSTORE_0 | Opcode.FSTORE_0 | - Opcode.ISTORE_0 | Opcode.LSTORE_0 => store(state, 0) - case Opcode.ASTORE_1 | Opcode.DSTORE_1 | Opcode.FSTORE_1 | - Opcode.ISTORE_1 | Opcode.LSTORE_1 => store(state, 1) - case Opcode.ASTORE_2 | Opcode.DSTORE_2 | Opcode.FSTORE_2 | - Opcode.ISTORE_2 | Opcode.LSTORE_2 => store(state, 2) - case Opcode.ASTORE_3 | Opcode.DSTORE_3 | Opcode.FSTORE_3 | - Opcode.ISTORE_3 | Opcode.LSTORE_3 => store(state, 3) - case Opcode.DCONST_0 | Opcode.DCONST_1 => - const(state, (opcode - Opcode.DCONST_0).asInstanceOf[Double]) - case Opcode.FCONST_0 | Opcode.FCONST_1 | Opcode.FCONST_2 => - const(state, (opcode - Opcode.FCONST_0).asInstanceOf[Float]) - case Opcode.ICONST_0 | Opcode.ICONST_1 | Opcode.ICONST_2 | - Opcode.ICONST_3 | Opcode.ICONST_4 | Opcode.ICONST_5 => - const(state, (opcode - Opcode.ICONST_0).asInstanceOf[Int]) - case Opcode.LCONST_0 | Opcode.LCONST_1 => - const(state, (opcode - Opcode.LCONST_0).asInstanceOf[Long]) - case Opcode.DADD | Opcode.FADD | Opcode.IADD | Opcode.LADD => add(state) - case Opcode.DSUB | Opcode.FSUB | Opcode.ISUB | Opcode.LSUB => sub(state) - case Opcode.DMUL | Opcode.FMUL | Opcode.IMUL | Opcode.LMUL => mul(state) - case Opcode.DCMPL | Opcode.DCMPG => dcmp(state) - case Opcode.LDC | Opcode.LDC_W | Opcode.LDC2_W => ldc(state) - case Opcode.DUP => dup(state) - case Opcode.GETSTATIC => getstatic(state) - // Cast instructions - case Opcode.I2B => cast(state, ByteType) - case Opcode.I2C => - throw new SparkException("Opcode.I2C unsupported: no corresponding Catalyst expression") - case Opcode.F2D | Opcode.I2D | Opcode.L2D => cast(state, DoubleType) - case Opcode.D2F | Opcode.I2F | Opcode.L2F => cast(state, FloatType) - case Opcode.D2I | Opcode.F2I | Opcode.L2I => cast(state, IntegerType) - case Opcode.D2L | Opcode.F2L | Opcode.I2L => cast(state, LongType) - case Opcode.I2S => cast(state, ShortType) - // Branching instructions - case Opcode.IFLT => ifOp(state, x => simplify(LessThan(x, Literal(0)))) - case Opcode.IFLE => ifOp(state, x => simplify(LessThanOrEqual(x, Literal(0)))) - case Opcode.IFGT => ifOp(state, x => simplify(GreaterThan(x, Literal(0)))) - case Opcode.IFGE => ifOp(state, x => simplify(GreaterThanOrEqual(x, Literal(0)))) - case Opcode.IFEQ => ifOp(state, x => simplify(EqualTo(x, Literal(0)))) - case Opcode.IFNE => ifOp(state, x => simplify(Not(EqualTo(x, Literal(0))))) - case Opcode.IFNULL => ifOp(state, x => simplify(IsNull(x))) - case Opcode.IFNONNULL => ifOp(state, x => simplify(IsNotNull(x))) - case Opcode.GOTO => state - case Opcode.IRETURN | Opcode.LRETURN | Opcode.FRETURN | Opcode.DRETURN | - Opcode.ARETURN | Opcode.RETURN => - state.copy(expr = Some(state.stack.head)) - // Call instructions - case Opcode.INVOKEVIRTUAL => invokevirtual(state) - case _ => throw new SparkException("Unsupported instruction: " + opcode) - } - } - - def isReturn: Boolean = opcode match { - case Opcode.IRETURN | Opcode.LRETURN | Opcode.FRETURN | Opcode.DRETURN | - Opcode.ARETURN | Opcode.RETURN => true - case _ => false - } - - // - // Handle instructions - // - private def load(state: State, localsIndex: Int): State = { - val State(locals, stack, cond, expr) = state - State(locals, locals(localsIndex)::stack, cond, expr) - } - - private def store(state: State, localsIndex: Int): State = { - val State(locals, top::rest, cond, expr) = state - State(locals.updated(localsIndex, top), rest, cond, expr) - } - - private def const(state: State, value: Any): State = { - val State(locals, stack, cond, expr) = state - State(locals, Literal(value)::stack, cond, expr) - } - - private def add(state: State): State = { - val State(locals, op2::op1::rest, cond, expr) = state - State(locals, Add(op1, op2)::rest, cond, expr) - } - - private def sub(state: State): State = { - val State(locals, op2::op1::rest, cond, expr) = state - State(locals, Subtract(op1, op2)::rest, cond, expr) - } - - private def mul(state: State): State = { - val State(locals, op2::op1::rest, cond, expr) = state - State(locals, Multiply(op1, op2)::rest, cond, expr) - } - - private def ldc(state: State): State = { - val State(locals, stack, cond, expr) = state - val constant = Literal(lambdaReflection.lookupConstant(operand)) - State(locals, constant::stack, cond, expr) - } - - private def dup(state: State): State = { - val State(locals, top::rest, cond, expr) = state - State(locals, top::top::rest, cond, expr) - } - - private def getstatic(state: State): State = { - val State(locals, stack, cond, expr) = state - State(locals, Literal(operand)::stack, cond, expr) - } - - private def dcmp(state: State): State = { - val State(locals, op2::op1::rest, cond, expr) = state - val conditional = - If(GreaterThan(op1, op2), - Literal(1), - If(LessThan(op1, op2), - Literal(-1), - Literal(0))) - State(locals, conditional::rest, cond, expr) - } - - private def cast( - state: State, - dataType: DataType): State = { - val State(locals, top::rest, cond, expr) = state - State(locals, Cast(top, dataType)::rest, cond, expr) - } - - private def ifOp( - state: State, - predicate: Expression => Expression): State = { - val State(locals, top::rest, cond, expr) = state - State(locals, rest, And(cond, predicate(top)), expr) - } - - private def gotoOp(state: State): State = state - - private def invokevirtual(state: State): State = { - val State(locals, stack, cond, expr) = state - val method = lambdaReflection.lookupMethod(operand) - val paramTypes = method.getParameterTypes - val (args, objrefIndex::rest) = stack.splitAt(paramTypes.length) - val objref = objrefIndex match { - case Literal(index, IntegerType) => - lambdaReflection.lookupField(index.asInstanceOf[Int]) - case _ => - throw new SparkException( - "Unsupported instruction: " + Opcode.INVOKEVIRTUAL) - } - if (objref.getType.getName.equals("scala.math.package$")) { - // Math functions - val ret = method.getName match { - case "abs" => Abs(args(0)) - case "acos" => Acos(args(0)) - case "asin" => Asin(args(0)) - case "atan" => Atan(args(0)) - case "cos" => Cos(args(0)) - case "cosh" => Cosh(args(0)) - case "sin" => Sin(args(0)) - case "tan" => Tan(args(0)) - case "tanh" => Tanh(args(0)) - case "ceil" => Ceil(args(0)) - case "floor" => Floor(args(0)) - case "exp" => Exp(args(0)) - case "log" => Log(args(0)) - case "log10" => Log10(args(0)) - case "sqrt" => Sqrt(args(0)) - case _ => throw new SparkException( - "Unsupported math function: " + method.getName) - } - State(locals, ret::rest, cond, expr) - } else { - // Other functions - throw new SparkException( - "Unsupported instruction: " + Opcode.INVOKEVIRTUAL) - } - } - } - object Instruction { - def apply(codeIterator: CodeIterator, offset: Int): Instruction = { - val opcode = codeIterator.byteAt(offset) - val operand = opcode match { - case Opcode.ALOAD | Opcode.DLOAD | Opcode.FLOAD | - Opcode.ILOAD | Opcode.LLOAD | Opcode.LDC => - codeIterator.byteAt(offset + 1) - case Opcode.LDC_W | Opcode.LDC2_W | - Opcode.INVOKESTATIC | Opcode.INVOKEVIRTUAL | Opcode.INVOKEINTERFACE | - Opcode.GETSTATIC => - codeIterator.u16bitAt(offset + 1) - case Opcode.GOTO | - Opcode.IFEQ | Opcode.IFNE | Opcode.IFLT | - Opcode.IFGE | Opcode.IFGT | Opcode.IFLE | - Opcode.IFNULL | Opcode.IFNONNULL => - codeIterator.s16bitAt(offset + 1) - case _ => 0 - } - Instruction(opcode, operand) - } - } - - case class BB(instructionTable: SortedMap[Int, Instruction]) { - def offset: Int = instructionTable.head._1 - - def last: (Int, Instruction) = instructionTable.last - - def lastInstruction: Instruction = last._2 - - def propagateCond(states: Map[BB, State]): Map[BB, State] = { - val state@State(_, _, cond, _) = states(this) - last._2.opcode match { - case Opcode.IFLT | Opcode.IFLE | Opcode.IFGT | Opcode.IFGE | - Opcode.IFEQ | Opcode.IFNE | Opcode.IFNULL | Opcode.IFNONNULL => { - val falseSucc::trueSucc::Nil = cfg.succ(this) - val falseState = state.copy(cond = simplify(cond match { - case And(cond1, cond2) => And(cond1, Not(cond2)) - case _ => Not(cond) - })) - val trueState = state.copy(cond = simplify(cond)) - (states - + (falseSucc -> (falseState + states.get(falseSucc))) - + (trueSucc -> (trueState + states.get(trueSucc)))) - } - case Opcode.IRETURN | Opcode.LRETURN | Opcode.FRETURN | Opcode.DRETURN | - Opcode.ARETURN | Opcode.RETURN => states - case _ => - val succ::Nil = cfg.succ(this) - (states + (succ -> (state + states.get(succ)))) - } - } - } - - case class CFG( - basicBlocks: List[BB], - pred: Map[BB, List[BB]], - succ: Map[BB, List[BB]]) - object CFG { - def apply(lambdaReflection: LambdaReflection): CFG = { - val codeIterator = lambdaReflection.codeIterator - codeIterator.begin() - val (labels, edges) = collectLabelsAndEdges(codeIterator) - codeIterator.begin() - val instructionTable = createInstructionTable(codeIterator) - val (basicBlocks, offsetToBB) = createBasicBlocks(labels, - instructionTable) - val (pred, succ) = connectBasicBlocks(basicBlocks, offsetToBB, edges) - CFG(basicBlocks, pred, succ) - } - - @tailrec - private def collectLabelsAndEdges( - codeIterator: CodeIterator, - labels: SortedSet[Int] = SortedSet(), - edges: SortedMap[Int, List[Int]] = SortedMap()) - : (SortedSet[Int], SortedMap[Int, List[Int]]) = { - if (codeIterator.hasNext) { - val offset = codeIterator.next - val nextOffset = codeIterator.lookAhead - val opcode = codeIterator.byteAt(offset) - opcode match { - case Opcode.IFEQ | Opcode.IFNE | Opcode.IFLT | Opcode.IFGE | - Opcode.IFGT | Opcode.IFLE | Opcode.IFNULL | Opcode.IFNONNULL => { - val falseOffset = nextOffset - val trueOffset = offset + codeIterator.s16bitAt(offset + 1) - collectLabelsAndEdges( - codeIterator, - labels + falseOffset + trueOffset, - edges + (offset -> List(falseOffset, trueOffset))) - } - case Opcode.GOTO => - val labelOffset = offset + codeIterator.s16bitAt(offset + 1) - collectLabelsAndEdges( - codeIterator, - labels + labelOffset, - edges + (offset -> List(labelOffset))) - case _ => collectLabelsAndEdges(codeIterator, labels, edges) - } - } else { - (labels, edges) - } - } - - @tailrec - private def createInstructionTable( - codeIterator: CodeIterator, - instructionTable: SortedMap[Int, Instruction] = SortedMap()) - : SortedMap[Int, Instruction] = { - if (codeIterator.hasNext) { - val offset = codeIterator.next - val nextOffset = codeIterator.lookAhead - val instruction = Instruction(codeIterator, offset) - createInstructionTable(codeIterator, - instructionTable + (offset -> instruction)) - } else { - instructionTable - } - } - - @tailrec - private def createBasicBlocks( - labels: SortedSet[Int], - instructionTable: SortedMap[Int, Instruction], - basicBlocks: List[BB] = List(), - offsetToBB: IntMap[BB] = IntMap()): (List[BB], IntMap[BB]) = { - if (labels.isEmpty) { - val instructions = instructionTable - val bb = BB(instructions) - ((bb+:basicBlocks).reverse, - instructions.foldLeft(offsetToBB) { case (offsetToBB, (offset, _)) => - offsetToBB + (offset -> bb) - }) - } else { - val (instructions, rest) = instructionTable.span(_._1 < labels.head) - val bb = BB(instructions) - createBasicBlocks( - labels.tail, rest, bb+:basicBlocks, - instructions.foldLeft(offsetToBB) { case (offsetToBB, (offset, _)) => - offsetToBB + (offset -> bb) - }) - } - } - - @tailrec - private def connectBasicBlocks( - basicBlocks: List[BB], - offsetToBB: IntMap[BB], - edges: SortedMap[Int, List[Int]], - pred: Map[BB, List[BB]] = Map().withDefaultValue(Nil), - succ: Map[BB, List[BB]] = Map().withDefaultValue(Nil)) - : (Map[BB, List[BB]], Map[BB, List[BB]]) = { - if (basicBlocks.isEmpty) { - (pred, succ) - } else { - val src::rest = basicBlocks - val dst = edges.getOrElse(src.last._1, - if (rest.isEmpty) { - List() - } else { - List(rest.head.offset) - }).map(offsetToBB) - connectBasicBlocks( - rest, - offsetToBB, - edges, - dst.foldLeft(pred) { (p, l) => (p + (l -> (src::p(l)))) }, - succ + (src -> dst)) - } - } - } - - // - // Reflection using SerializedLambda and javassist - // - case class LambdaReflection(private val classPool: ClassPool, - private val serializedLambda: SerializedLambda) { - def lookupConstant(constPoolIndex: Int): Any = { - constPool.getTag(constPoolIndex) match { - case ConstPool.CONST_Integer => constPool.getIntegerInfo(constPoolIndex) - case ConstPool.CONST_Long => constPool.getLongInfo(constPoolIndex) - case ConstPool.CONST_Float => constPool.getFloatInfo(constPoolIndex) - case ConstPool.CONST_Double => constPool.getDoubleInfo(constPoolIndex) - case ConstPool.CONST_String => constPool.getStringInfo(constPoolIndex) - case _ => throw new SparkException("Unsupported constant") - } - } - - def lookupField(constPoolIndex: Int): CtField = { - if (constPool.getTag(constPoolIndex) != ConstPool.CONST_Fieldref) { - throw new SparkException("Unexpected index for field reference") - } - val fieldName = constPool.getFieldrefName(constPoolIndex) - val descriptor = constPool.getFieldrefType(constPoolIndex) - val className = constPool.getFieldrefClassName(constPoolIndex) - classPool.getCtClass(className).getField(fieldName, descriptor) - } - - def lookupMethod(constPoolIndex: Int): CtMethod = { - if (constPool.getTag(constPoolIndex) != ConstPool.CONST_Methodref) { - throw new SparkException("Unexpected index for method reference") - } - val methodName = constPool.getMethodrefName(constPoolIndex) - val descriptor = constPool.getMethodrefType(constPoolIndex) - val className = constPool.getMethodrefClassName(constPoolIndex) - classPool.getCtClass(className) - .getDeclaredMethod(methodName, - Descriptor.getParameterTypes(descriptor, - classPool)) - } - - private val ctClass = { - val name = serializedLambda.getCapturingClass.replace('/', '.') - classPool.insertClassPath(new ClassClassPath(classForName(name))) - classPool.getCtClass(name) - } - - private val ctMethod = { - val lambdaImplName = serializedLambda.getImplMethodName - ctClass.getDeclaredMethod(lambdaImplName.stripSuffix("$adapted")) - } - - private val methodInfo = ctMethod.getMethodInfo - - private val constPool = methodInfo.getConstPool - - private val codeAttribute = methodInfo.getCodeAttribute - - lazy val codeIterator = codeAttribute.iterator - - lazy val parameters = ctMethod.getParameterTypes - - lazy val maxLocals = codeAttribute.getMaxLocals - } - object LambdaReflection { - def apply(function: AnyRef): LambdaReflection = { - // Reference for the use of SerialziedLambda to detect the lambda body: - // getSerializedLambda in - // spark/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala - val functionClass = function.getClass - val writeReplace = functionClass.getDeclaredMethod("writeReplace") - writeReplace.setAccessible(true) - val serializedLambda = writeReplace.invoke(function) - .asInstanceOf[SerializedLambda] - val classPool = ClassPool.getDefault - LambdaReflection(classPool, serializedLambda) - } - } -} diff --git a/sql/core/pom.xml b/sql/core/pom.xml deleted file mode 100644 index 461d6f9ed8a4..000000000000 --- a/sql/core/pom.xml +++ /dev/null @@ -1,230 +0,0 @@ - - - - - 4.0.0 - - org.apache.spark - spark-parent_2.12 - 3.0.0-SNAPSHOT - ../../pom.xml - - - spark-sql_2.12 - jar - Spark Project SQL - http://spark.apache.org/ - - sql - - - - - com.univocity - univocity-parsers - 2.7.3 - jar - - - org.apache.spark - spark-sketch_${scala.binary.version} - ${project.version} - - - org.apache.spark - spark-core_${scala.binary.version} - ${project.version} - - - org.apache.spark - spark-core_${scala.binary.version} - ${project.version} - test-jar - test - - - org.apache.spark - spark-catalyst_${scala.binary.version} - ${project.version} - - - org.apache.spark - spark-catalyst_${scala.binary.version} - ${project.version} - test-jar - test - - - org.apache.spark - spark-tags_${scala.binary.version} - - - - - org.apache.spark - spark-tags_${scala.binary.version} - test-jar - test - - - - org.apache.orc - orc-core - ${orc.classifier} - - - org.apache.orc - orc-mapreduce - ${orc.classifier} - - - org.apache.parquet - parquet-column - - - org.apache.parquet - parquet-hadoop - - - org.eclipse.jetty - jetty-servlet - - - com.fasterxml.jackson.core - jackson-databind - - - org.apache.xbean - xbean-asm7-shaded - - - org.scalacheck - scalacheck_${scala.binary.version} - test - - - com.h2database - h2 - 1.4.195 - test - - - mysql - mysql-connector-java - test - - - org.postgresql - postgresql - test - - - org.apache.parquet - parquet-avro - test - - - org.mockito - mockito-core - test - - - - target/scala-${scala.binary.version}/classes - target/scala-${scala.binary.version}/test-classes - - - - org.apache.maven.plugins - maven-jar-plugin - - - prepare-test-jar - test-compile - - test-jar - - - - - - org.scalatest - scalatest-maven-plugin - - -ea -Xmx4g -Xss4m -XX:ReservedCodeCacheSize=${CodeCacheSize} - - - - org.codehaus.mojo - build-helper-maven-plugin - - - add-sources - generate-sources - - add-source - - - - v${hive.version.short}/src/main/scala - v${hive.version.short}/src/main/java - - - - - add-scala-test-sources - generate-test-sources - - add-test-source - - - - v${hive.version.short}/src/test/scala - src/test/gen-java - - - - - - - - - - - scala-2.13 - - - org.scala-lang.modules - scala-parallel-collections_${scala.binary.version} - - - - - diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala deleted file mode 100644 index 231d30fc95bd..000000000000 --- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.expressions - -import org.apache.spark.annotation.Stable -import org.apache.spark.sql.Column -import org.apache.spark.sql.catalyst.ScalaReflection -import org.apache.spark.sql.catalyst.expressions.{Expression, ScalaUDF} -import org.apache.spark.sql.types.{AnyDataType, DataType} - -/** - * A user-defined function. To create one, use the `udf` functions in `functions`. - * - * As an example: - * {{{ - * // Define a UDF that returns true or false based on some numeric score. - * val predict = udf((score: Double) => score > 0.5) - * - * // Projects a column that adds a prediction column based on the score column. - * df.select( predict(df("score")) ) - * }}} - * - * @since 1.3.0 - */ -@Stable -sealed abstract class UserDefinedFunction { - - /** - * Returns true when the UDF can return a nullable value. - * - * @since 2.3.0 - */ - def nullable: Boolean - - /** - * Returns true iff the UDF is deterministic, i.e. the UDF produces the same output given the same - * input. - * - * @since 2.3.0 - */ - def deterministic: Boolean - - /** - * Returns an expression that invokes the UDF, using the given arguments. - * - * @since 1.3.0 - */ - @scala.annotation.varargs - def apply(exprs: Column*): Column - - /** - * Updates UserDefinedFunction with a given name. - * - * @since 2.3.0 - */ - def withName(name: String): UserDefinedFunction - - /** - * Updates UserDefinedFunction to non-nullable. - * - * @since 2.3.0 - */ - def asNonNullable(): UserDefinedFunction - - /** - * Updates UserDefinedFunction to nondeterministic. - * - * @since 2.3.0 - */ - def asNondeterministic(): UserDefinedFunction -} - -private[sql] case class SparkUserDefinedFunction( - f: AnyRef, - dataType: DataType, - inputSchemas: Seq[Option[ScalaReflection.Schema]], - name: Option[String] = None, - nullable: Boolean = true, - deterministic: Boolean = true) extends UserDefinedFunction { - - @scala.annotation.varargs - override def apply(exprs: Column*): Column = { - val scalaUDF = createScalaUDF(exprs.map(_.expr)) - scalaUDF.expr match { - case Some(e) => Column(e) - case None => Column(scalaUDF) - } - } - - private[sql] def createScalaUDF(exprs: Seq[Expression]): ScalaUDF = { - // It's possible that some of the inputs don't have a specific type(e.g. `Any`), skip type - // check. - val inputTypes = inputSchemas.map(_.map(_.dataType).getOrElse(AnyDataType)) - // `ScalaReflection.Schema.nullable` is false iff the type is primitive. Also `Any` is not - // primitive. - val inputsPrimitive = inputSchemas.map(_.map(!_.nullable).getOrElse(false)) - ScalaUDF( - f, - dataType, - exprs, - inputsPrimitive, - inputTypes, - udfName = name, - nullable = nullable, - udfDeterministic = deterministic) - } - - override def withName(name: String): SparkUserDefinedFunction = { - copy(name = Option(name)) - } - - override def asNonNullable(): SparkUserDefinedFunction = { - if (!nullable) { - this - } else { - copy(nullable = false) - } - } - - override def asNondeterministic(): SparkUserDefinedFunction = { - if (!deterministic) { - this - } else { - copy(deterministic = false) - } - } -} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/OpcodeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/OpcodeSuite.scala deleted file mode 100644 index 76f5c309363a..000000000000 --- a/sql/core/src/test/scala/org/apache/spark/sql/OpcodeSuite.scala +++ /dev/null @@ -1,1073 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql -import org.apache.spark._ -import org.apache.spark.sql._ -import org.apache.spark.sql.functions._ -import org.scalatest.Assertions._ - -import org.apache.spark.sql.functions.{log => nickslog} - -import org.apache.spark.sql.api.java._ -import org.apache.spark.sql.catalyst.plans.logical.Project -import org.apache.spark.sql.execution.QueryExecution -import org.apache.spark.sql.execution.columnar.InMemoryRelation -import org.apache.spark.sql.execution.command.{CreateDataSourceTableAsSelectCommand, ExplainCommand} -import org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand -import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.test.SharedSparkSession -import org.apache.spark.sql.test.SQLTestData._ -import org.apache.spark.sql.types._ -import org.apache.spark.sql.util.QueryExecutionListener -import java.io._ - -class OpcodeSuite extends QueryTest with SharedSparkSession { - - import testImplicits._ - import org.scalatest.Tag - - -// Utility Function for checking equivalency of Dataset type - def checkEquiv[T](ds1: Dataset[T], ds2: Dataset[T]) : Unit = { - val resultdf = ds1.toDF() - val refdf = ds2.toDF() - ds1.show - ds2.show - val columns = refdf.schema.fields.map(_.name) - val selectiveDifferences = columns.map(col => refdf.select(col).except(resultdf.select(col))) - selectiveDifferences.map(diff => { assert(diff.count==0) } ) - println("TEST: ***PASSED***") - } - - - object test0 extends Tag("test0") - object test1 extends Tag("test1") - object test2 extends Tag("test2") - object test3 extends Tag("test3") - object test4 extends Tag("test4") - object test5 extends Tag("test5") - object test6 extends Tag("test6") - object test7 extends Tag("test7") - object test8 extends Tag("test8") - object test9 extends Tag("test9") - object test10 extends Tag("test10") - object test11 extends Tag("test11") - object test12 extends Tag("test12") - object test13 extends Tag("test13") - object test14 extends Tag("test14") - object test15 extends Tag("test15") - object test16 extends Tag("test16") - object test17 extends Tag("test17") - object test18 extends Tag("test18") - object test19 extends Tag("test19") - object test20 extends Tag("test20") - object test21 extends Tag("test21") - object test22 extends Tag("test22") - object test23 extends Tag("test23") - object test24 extends Tag("test24") - object test25 extends Tag("test25") - object test26 extends Tag("test26") - object test27 extends Tag("test27") - object test28 extends Tag("test28") - object test29 extends Tag("test29") - object test30 extends Tag("test30") - object test31 extends Tag("test31") - object test32 extends Tag("test32") - object test33 extends Tag("test33") - object test34 extends Tag("test34") - object test35 extends Tag("test35") - object test36 extends Tag("test36") - object test37 extends Tag("test37") - object test38 extends Tag("test38") - object test39 extends Tag("test39") - object test40 extends Tag("test40") - object test41 extends Tag("test41") - object test42 extends Tag("test42") - object test43 extends Tag("test43") - object test44 extends Tag("test44") - object test45 extends Tag("test45") - object test46 extends Tag("test46") - object test47 extends Tag("test47") - -// START OF TESTS - - - - -// conditional tests, all but test0 fall back to JVM execution - test("conditional floats", test0) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: conditional floats\n\n") - - val myudf: Float => Float = { x => - val t = - if (x > 1.0f && x < 3.7f) { - (if (x > 1.1f && x < 2.0f) 1.0f else 1.1f) + 24.0f - } else { - if (x < 0.1f) 2.3f else 4.1f - } - t + 2.2f - } - val u = udf(myudf) - val dataset = List(2.0f).toDS() - val result = dataset.withColumn("new", u('value)) - val ref = dataset.withColumn("new", lit(27.300001f)) - checkEquiv(result, ref) - val dataset2 = List(4.0f).toDS() - val result2 = dataset2.withColumn("new", u('value)) - val ref2 = dataset2.withColumn("new", lit(6.3f)) - checkEquiv(result2, ref2) - println("TEST: *** END ***\n") - } - - - - test("conditional doubles",test1) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: conditional doubles\n\n") - val myudf: Double => Double = { x => - val t = - if (x > 1.0 && x <= 3.7) { - (if (x >= 1.1 && x < 2.1) 1.0 else 1.1) + 24.0 - } else { - if (x < 1.1) 2.3 else 4.1 - } - t + 2.2 - } - val u = udf(myudf) - val dataset = List(1.0).toDS() - val result = dataset.withColumn("new", u('value)) - val ref = dataset.withColumn("new", lit(4.5)) - checkEquiv(result, ref) - val dataset2 = List(2.0).toDS() - val result2 = dataset2.withColumn("new", u('value)) - val ref2 = dataset2.withColumn("new", lit(27.2)) - checkEquiv(result2, ref2) - println("TEST: *** END ***\n") - } - - test("conditional ints",test2) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: conditional ints\n\n") - val myudf: Int => Int = { x => - val t = - if (x > 1 && x < 5) { - 10 - } else { - if (x > 7) 20 else 30 - } - t + 5 - } - val u = udf(myudf) - val dataset = List(2).toDS() - val result = dataset.withColumn("new",u('value)) - val ref = dataset.withColumn("new", lit(15)) - checkEquiv(result, ref) - val dataset2 = List(8).toDS() - val result2 = dataset2.withColumn("new", u('value)) - val ref2 = dataset2.withColumn("new", lit(25)) - checkEquiv(result2, ref2) - println("TEST: *** END ***\n") - } - - test("conditional longs", test3) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: conditional longs\n\n") - val myudf: Long => Long = { x => - val t = - if (x > 1l && x < 5l) { - 10l - } else { - if (x > 7l) 20l else 30l - } - t + 5l - } - val u = udf(myudf) - val dataset = List(2l).toDS() - val result = dataset.withColumn("new", u('value)) - val ref = dataset.withColumn("new", lit(15l)) - checkEquiv(result, ref) - val dataset2 = List(8l).toDS() - val result2 = dataset2.withColumn("new", u('value)) - val ref2 = dataset2.withColumn("new", lit(25l)) - checkEquiv(result2, ref2) - println("TEST: *** END ***\n") - } - - - -// tests for load and store operations, also cover +/-/* operators for int,long,double,float - test("LLOAD_ odd", test4) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: LLOAD_ odd") - println("\n\n") - val dataset = List(1).toDS() - val dataset2 = dataset.withColumn("value2",lit(1l)) - val dataset3 = dataset2.withColumn("value3",lit(1l)) - val myudf: (Int, Long, Long) => Long = (a,b,c) => { - (b+c)*c-b - } - val u = udf(myudf) - val result = dataset3.withColumn("new",u(col("value"),col("value2"),col("value3"))) - val ref = dataset3.withColumn("new",(col("value2")+col("value3"))*col("value3") - col("value2")) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - - test("DLOAD_ odd", test5) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: DLOAD_ odd") - println("\n\n") - val dataset = List(1).toDS() - val dataset2 = dataset.withColumn("value2",lit(1.0)) - val dataset3 = dataset2.withColumn("value3",lit(1.0)) - val myudf: (Int, Double, Double) => Double = (a,b,c) => { - (b+c)*b-c - } - val u = udf(myudf) - val result = dataset3.withColumn("new", u(col("value"),col("value2"),col("value3"))) - val ref = dataset3.withColumn("new",(col("value2")+col("value3"))*col("value2")-col("value3")) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("DLOAD_ even", test6) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: DLOAD_ even") - println("\n\n") - val dataset = List(1.0).toDS() - val dataset2 = dataset.withColumn("value2",col("value")) - val myudf: (Double, Double) => Double = (a,b) => { - (a+b)*a-b - } - val u = udf(myudf) - val result = dataset2.withColumn("new",u(col("value"),col("value2"))) - val ref = dataset2.withColumn("new",(col("value")+col("value2"))*col("value")-col("value2")) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("LLOAD_ even", test7) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: LLOAD_ even") - println("\n\n") - val dataset = List(1l).toDS() - val dataset2 = dataset.withColumn("value2",col("value")) - val myudf: (Long, Long) => Long = (a,b) => { - (a+b)*a-b - } - val u = udf(myudf) - val result = dataset2.withColumn("new",u(col("value"),col("value2"))) - val ref = dataset2.withColumn("new",(col("value")+col("value2"))*col("value")-col("value2")) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("ILOAD_ all",test8) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: ILOAD_ all") - println("\n\n") - val dataset = List(1).toDS() - val dataset2 = dataset.withColumn("value2",col("value")) - val dataset3 = dataset2.withColumn("value3",col("value")) - val dataset4 = dataset3.withColumn("value4",col("value")) - val myudf: (Int, Int, Int, Int) => Int = (a,b,c,d) => { - (a+b-c)*d - } - val u = udf(myudf) - val result = dataset4.withColumn("new",u(col("value"),col("value2"),col("value3"),col("value4"))) - val ref = dataset4.withColumn("new",(col("value")+col("value2")-col("value3"))*col("value4")) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("FLOAD_ all", test9) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: FLOAD_ all") - println("\n\n") - val dataset = List(1.0f).toDS() - val dataset2 = dataset.withColumn("value2",col("value")) - val dataset3 = dataset2.withColumn("value3",col("value")) - val dataset4 = dataset3.withColumn("value4",col("value")) - val myudf: (Float, Float, Float, Float) => Float = (a,b,c,d) => { - (a+b-c)*d - } - val u = udf(myudf) - val result = dataset4.withColumn("new",u(col("value"),col("value2"),col("value3"),col("value4"))) - val ref = dataset4.withColumn("new",(col("value")+col("value2")-col("value3"))*col("value4")) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("ISTORE_ all", test10) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: ISTORE_ all") - println("\n\n") - val myudf: () => Int = () => { - var myInt : Int = 1 - var myInt2 : Int = 1 - var myInt3 : Int = myInt - var myInt4 : Int = myInt * myInt3 - myInt4 - } - val dataset = List(1).toDS() - val u = udf(myudf) - val result = dataset.withColumn("new", u()) - val ref = dataset.withColumn("new",lit(1)) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("DSTORE_ even", test11) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: DSTORE_ even") - println("\n\n") - val myudf: () => Double = () => { - var myDoub : Double = 0.0 - var myDoub2 : Double = 1.0 - myDoub - myDoub2 - } - val dataset = List(1).toDS() - val u = udf(myudf) - val result = dataset.withColumn("new", u()) - val ref = dataset.withColumn("new",lit(1.0)) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("DSTORE_ odd", test12) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: DSTORE_ odd") - println("\n\n") - val myudf: (Int) => Double = (a) => { - var myDoub : Double = 1.0 - var myDoub2 : Double = 1.0 * myDoub - myDoub2 - } - val dataset = List(1).toDS() - val u = udf(myudf) - val result = dataset.withColumn("new", u(col("value"))) - val ref = dataset.withColumn("new",lit(1.0)) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("ALOAD_0", test13) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: ALOAD_0") - println("\n\n") - val myudf: (String,String,String,String) => String = (a,b,c,d) => { - a - } - val dataset = List("a").toDS() - val dataset2 = dataset.withColumn("value2",lit("b")) - val dataset3 = dataset2.withColumn("value3",lit("c")) - val dataset4 = dataset3.withColumn("value4",lit("d")) - val u = udf(myudf) - val result = dataset4.withColumn("new",u(col("value"),col("value2"),col("value3"),col("value4"))) - val ref = dataset4.withColumn("new",col("value")) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("ALOAD_1", test14) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: ALOAD_1") - println("\n\n") - val myudf: (String,String,String,String) => String = (a,b,c,d) => { - b - } - val dataset = List("a").toDS() - val dataset2 = dataset.withColumn("value2",lit("b")) - val dataset3 = dataset2.withColumn("value3",lit("c")) - val dataset4 = dataset3.withColumn("value4",lit("d")) - val u = udf(myudf) - val result = dataset4.withColumn("new",u(col("value"),col("value2"),col("value3"),col("value4"))) - val ref = dataset4.withColumn("new",col("value2")) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("ALOAD_2", test15) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: ALOAD_2") - println("\n\n") - val myudf: (String,String,String,String) => String = (a,b,c,d) => { - c - } - val dataset = List("a").toDS() - val dataset2 = dataset.withColumn("value2",lit("b")) - val dataset3 = dataset2.withColumn("value3",lit("c")) - val dataset4 = dataset3.withColumn("value4",lit("d")) - val u = udf(myudf) - val result = dataset4.withColumn("new",u(col("value"),col("value2"),col("value3"),col("value4"))) - val ref = dataset4.withColumn("new",col("value3")) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("ALOAD_3", test16) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: ALOAD_3") - println("\n\n") - val myudf: (String,String,String,String) => String = (a,b,c,d) => { - d - } - val dataset = List("a").toDS() - val dataset2 = dataset.withColumn("value2",lit("b")) - val dataset3 = dataset2.withColumn("value3",lit("c")) - val dataset4 = dataset3.withColumn("value4",lit("d")) - val u = udf(myudf) - val result = dataset4.withColumn("new",u(col("value"),col("value2"),col("value3"),col("value4"))) - val ref = dataset4.withColumn("new",col("value4")) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("ASTORE_1,2,3", test17) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: ASTORE_1,2,3") - println("\n\n") - val myudf: (String) => String = (a) => { - val myString : String = a - val myString2 : String = myString - val myString3 : String = myString2 - myString3 - } - val dataset = List("a").toDS() - val u = udf(myudf) - val result = dataset.withColumn("new",u(col("value"))) - val ref = dataset.withColumn("new",col("value")) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("FSTORE_1,2,3", test18) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: FSTORE_1,2,3") - println("\n\n") - val myudf: (Float) => Float = (a) => { - var myFloat : Float = a - var myFloat2 : Float = myFloat + a - var myFloat3 : Float = myFloat2 + a - myFloat3 - } - val dataset = List(5.0f).toDS() - val u = udf(myudf) - val result = dataset.withColumn("new", u(col("value"))) - val ref = dataset.withColumn("new",col("value")*3) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - - test("LSTORE_2", test19) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: LSTORE_2") - println("\n\n") - val myudf: (Long) => Long = (a) => { - var myLong : Long = a - myLong - } - val dataset = List(5l).toDS() - val u = udf(myudf) - val result = dataset.withColumn("new", u(col("value"))) - val ref = dataset.withColumn("new",col("value")) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("LSTORE_3", test20) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: LSTORE_3") - println("\n\n") - val myudf: (Int, Long) => Long = (a,b) => { - var myLong : Long = b - myLong - } - val dataset = List(1).toDS() - val dataset2 = dataset.withColumn("value2", lit(5l)) - val u = udf(myudf) - val result = dataset2.withColumn("new", u(col("value"),col("value2"))) - val ref = dataset2.withColumn("new",col("value2")) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - // misc. tests. Boolean check currently failing, can't handle true/false - - test("Boolean check", test21) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: Boolean check\n\n") - val myudf: () => Boolean = () => { - var myBool : Boolean = true - myBool - } - val dataset = List(true).toDS() - val u = udf(myudf) - val result = dataset.withColumn("new",u()) - val ref = dataset.withColumn("new",lit(true)) - // val resultdf = result.toDF() - // val refdf = ref.toDF() - // val columns = refdf.schema.fields.map(_.name) - // val selectiveDifferences = columns.map(col => refdf.select(col).except(resultdf.select(col))) - // selectiveDifferences.map(diff => {assert(diff.count==0)}) - result.show - ref.show - println("This test is *** FAILED *** as of 5/5/2019. If the two tables directly above are not identical, test is still failing.\n") - println("TEST: *** END ***\n") - } - - - // the test immediately below is meant to cover IFEQ, but is failing due to absense of IFNE - - test("IFEQ opcode", test22) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: IFEQ\n\n") - val myudf: (Double) => Double = (a) => { - var myDoub : Double = a; - if (a==a) { - myDoub = a*a - } - myDoub - } - val dataset = List(2.0).toDS() - val u = udf(myudf) - val result = dataset.withColumn("new", u(col("value"))) - val ref = dataset.withColumn("new", lit(4.0)) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - - // the test below is a one-off test used to test the functionality of LDC, also covers ASTORE_0. currently having trouble verifying output - - - test("LDC tests", test23) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: LDC tests\n\n") - class placeholder { - val myudf: () => (String) = () => { - val myString : String = "a" - myString - } - val u = udf(myudf) - val dataset = List("a").toDS() - val result = dataset.withColumn("new", u()) - val ref = dataset.withColumn("new",lit("a")) - // val resultdf = result.toDF() - // val refdf = ref.toDF() - // val columns = refdf.schema.fields.map(_.name) - // val selectiveDifferences = columns.map(col => refdf.select(col).except(resultdf.select(col))) - // selectiveDifferences.map(diff => {assert(diff.count==0)}) - // result.show - // ref.show - //println("LDC test: ***PASSED***") - checkEquiv(result, ref) - } - println("TEST: *** END ***\n") - } - - - - // this test makes sure we can handle udfs with more than 2 args - - test("UDF 4 args",test24) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: UDF 4 args\n\n") - val myudf: (Int, Int, Int, Int) => Int = (w,x,y,z) => { w+x+y+z } - val u = udf(myudf) - val dataset = List(1,2,3,4).toDS() - val dataset2 = dataset.withColumn("value2",col("value") + 1) - val dataset3 = dataset2.withColumn("value3",col("value2") + 1) - val dataset4 = dataset3.withColumn("value4",col("value3") + 1) - // val result = u(data) - // dataset3.show - // val dataset = List((1,2,3),(2,3,4),(3,4,5)).toDS() - val result = dataset4.withColumn("new", u(col("value"), col("value2"), col("value3"), col("value4"))) - val ref = dataset4.withColumn("new", col("value")+col("value2")+col("value3")+col("value4")) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - // this test covers getstatic and invokevirtual, shows we can handle math ops (only acos/asin) - - test("math functions - trig - (a)sin and (a)cos", test25) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: math functions - trig - (a)sin and (a)cos\n\n") - val myudf1: Double => Double = x => { math.cos(x) } - val u1 = udf(myudf1) - val myudf2: Double => Double = x => { math.sin(x) } - val u2 = udf(myudf2) - val myudf3: Double => Double = x => { math.acos(x) } - val u3 = udf(myudf3) - val myudf4: Double => Double = x => { math.asin(x) } - val u4 = udf(myudf4) - val dataset = List(1.0,2.0,3.0).toDS() - val result = dataset.withColumn("new", u1('value)+u2('value)+u3('value)+u4('value)) - val ref = dataset.withColumn("new", cos(col("value"))+sin(col("value"))+acos(col("value"))+asin(col("value"))) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - - test("math functions - trig - (a)tan(h) and cosh", test26) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: math functions - trig - (a)tan(h) and cosh\n\n") - val myudf1: Double => Double = x => { math.tan(x) } - val u1 = udf(myudf1) - val myudf2: Double => Double = x => { math.atan(x) } - val u2 = udf(myudf2) - val myudf3: Double => Double = x => { math.cosh(x) } - val u3 = udf(myudf3) - val myudf4: Double => Double = x => { math.tanh(x) } - val u4 = udf(myudf4) - val dataset = List(1.0,2.0,3.0).toDS() - val result = dataset.withColumn("new", u1('value)+u2('value)+u3('value)+u4('value)) - val ref = dataset.withColumn("new", tan(col("value")) + atan(col("value")) + cosh(col("value")) + tanh(col("value"))) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("math functions - abs, ceil, floor", test27) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: math functions - abs, ceil, floor\n\n") - val myudf1: Double => Double = x => { math.abs(x) } - val u1 = udf(myudf1) - val myudf2: Double => Double = x => { math.ceil(x) } - val u2 = udf(myudf2) - val myudf3: Double => Double = x => { math.floor(x) } - val u3 = udf(myudf3) - val dataset = List(-0.5,0.5).toDS() - val result = dataset.withColumn("new", u2(u1('value))+u3(u1('value))) - val ref = dataset.withColumn("new", ceil(abs(col("value"))) + floor(abs(col("value")))) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - - test("math functions - exp, log, log10, sqrt", test28) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: math functions - exp, log, log10, sqrt\n\n") - val myudf1: Double => Double = x => { math.exp(x) } - val u1 = udf(myudf1) - val myudf2: Double => Double = x => { math.log(x) } - val u2 = udf(myudf2) - val myudf3: Double => Double = x => { math.log10(x) } - val u3 = udf(myudf3) - val myudf4: Double => Double = x => { math.sqrt(x) } - val u4 = udf(myudf4) - val dataset = List(2.0,5.0).toDS() - val result = dataset.withColumn("new", u1('value)+u2('value)+u3('value)+u4('value)) - val ref = dataset.withColumn("new", exp(col("value"))+nickslog(col("value"))+log10(col("value"))+sqrt(col("value"))) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("FSTORE_0, LSTORE_1", test29) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: FSTORE_0, LSTORE_1\n\n") - val myudf: () => Float = () => { - var myFloat : Float = 1.0f - var myLong : Long = 1l - myFloat - } - val dataset = List(5.0f).toDS() - val u = udf(myudf) - val result = dataset.withColumn("new", u()) - val ref = dataset.withColumn("new",lit(1.0f)) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("LSTORE_0", test30) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: LSTORE_0\n\n") - val myudf: () => Long = () => { - var myLong : Long = 1l - myLong - } - val dataset = List(1l).toDS() - val u = udf(myudf) - val result = dataset.withColumn("new", u()) - val ref = dataset.withColumn("new",lit(1l)) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("ILOAD",test31) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: ILOAD\n\n") - val myudf: (Int, Int, Int, Int, Int, Long, Float, Double) => Int = (a,b,c,d,e,f,g,h) => { - e - } - val u = udf(myudf) - val dataset = List(1).toDS() - val dataset2 = dataset.withColumn("value2",col("value") + 1) - val dataset3 = dataset2.withColumn("value3",col("value2") + 1) - val dataset4 = dataset3.withColumn("value4",col("value3") + 1) - val dataset5 = dataset4.withColumn("value5",col("value4") + 1) - val dataset6 = dataset5.withColumn("value6",lit(1l)) - val dataset7 = dataset6.withColumn("value7",lit(1.0f)) - val dataset8 = dataset7.withColumn("value8",lit(1.0)) - val result = dataset8.withColumn("new", u(col("value"),col("value2"),col("value3"),col("value4"),col("value5"),col("value6"),col("value7"),col("value8"))) - val ref = dataset8.withColumn("new", lit(5)) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("LLOAD",test32) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: LLOAD\n\n") - val myudf: (Int, Int, Int, Int, Int, Long, Float, Double) => Long = (a,b,c,d,e,f,g,h) => { - f - } - val u = udf(myudf) - val dataset = List(1).toDS() - val dataset2 = dataset.withColumn("value2",col("value") + 1) - val dataset3 = dataset2.withColumn("value3",col("value2") + 1) - val dataset4 = dataset3.withColumn("value4",col("value3") + 1) - val dataset5 = dataset4.withColumn("value5",col("value4") + 1) - val dataset6 = dataset5.withColumn("value6",lit(1l)) - val dataset7 = dataset6.withColumn("value7",lit(1.0f)) - val dataset8 = dataset7.withColumn("value8",lit(1.0)) - val result = dataset8.withColumn("new", u(col("value"),col("value2"),col("value3"),col("value4"),col("value5"),col("value6"),col("value7"),col("value8"))) - val ref = dataset8.withColumn("new", lit(1l)) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("FLOAD",test33) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: FLOAD\n\n") - val myudf: (Int, Int, Int, Int, Int, Long, Float, Double) => Float = (a,b,c,d,e,f,g,h) => { - g - } - val u = udf(myudf) - val dataset = List(1).toDS() - val dataset2 = dataset.withColumn("value2",col("value") + 1) - val dataset3 = dataset2.withColumn("value3",col("value2") + 1) - val dataset4 = dataset3.withColumn("value4",col("value3") + 1) - val dataset5 = dataset4.withColumn("value5",col("value4") + 1) - val dataset6 = dataset5.withColumn("value6",lit(1l)) - val dataset7 = dataset6.withColumn("value7",lit(1.0f)) - val dataset8 = dataset7.withColumn("value8",lit(1.0)) - val result = dataset8.withColumn("new", u(col("value"),col("value2"),col("value3"),col("value4"),col("value5"),col("value6"),col("value7"),col("value8"))) - val ref = dataset8.withColumn("new", lit(1.0f)) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("DLOAD",test34) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: DLOAD\n\n") - val myudf: (Int, Int, Int, Int, Int, Long, Float, Double) => Double = (a,b,c,d,e,f,g,h) => { - h - } - val u = udf(myudf) - val dataset = List(1).toDS() - val dataset2 = dataset.withColumn("value2",col("value") + 1) - val dataset3 = dataset2.withColumn("value3",col("value2") + 1) - val dataset4 = dataset3.withColumn("value4",col("value3") + 1) - val dataset5 = dataset4.withColumn("value5",col("value4") + 1) - val dataset6 = dataset5.withColumn("value6",lit(1l)) - val dataset7 = dataset6.withColumn("value7",lit(1.0f)) - val dataset8 = dataset7.withColumn("value8",lit(1.0)) - val result = dataset8.withColumn("new", u(col("value"),col("value2"),col("value3"),col("value4"),col("value5"),col("value6"),col("value7"),col("value8"))) - val ref = dataset8.withColumn("new", lit(1.0)) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("Cast Double to Int",test35) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: Cast Double to Int\n\n") - val myudf: () => Int = () => { - var myVar : Double = 0.0 - myVar = myVar + 1.0 - val myVar2 : Int = myVar.toInt - myVar2 - } - val u = udf(myudf) - val dataset = List(1).toDS() - val result = dataset.withColumn("new", u()) - val ref = dataset.withColumn("new", lit(1)) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("Cast Float to Int",test36) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: Cast Float to Int\n\n") - val myudf: () => Int = () => { - var myVar : Float = 0.0f - myVar = myVar + 1.0f + 2.0f - val myVar2 : Int = myVar.toInt - myVar2 - } - val u = udf(myudf) - val dataset = List(1).toDS() - val result = dataset.withColumn("new", u()) - val ref = dataset.withColumn("new", lit(3)) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("Cast Long to Int",test37) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: Cast Long to Int\n\n") - val myudf: () => Int = () => { - var myVar : Long = 0l - myVar = myVar + 1l - val myVar2 : Int = myVar.toInt - myVar2 - } - val u = udf(myudf) - val dataset = List(1).toDS() - val result = dataset.withColumn("new", u()) - val ref = dataset.withColumn("new", lit(1l)) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("Cast Int to Long",test38) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: Cast Int to Long\n\n") - val myudf: () => Long = () => { - var myVar : Int = 0 - myVar = myVar + 1 + 2 + 3 + 4 + 5 - val myVar2 : Long = myVar.toLong - myVar2 - } - val u = udf(myudf) - val dataset = List(1).toDS() - val result = dataset.withColumn("new", u()) - val ref = dataset.withColumn("new", lit(15l)) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("Cast Float to Long",test39) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: Cast Float to Long\n\n") - val myudf: () => Long = () => { - var myVar : Float = 0.0f - myVar = myVar + 1.0f + 2.0f - val myVar2 : Long = myVar.toLong - myVar2 - } - val u = udf(myudf) - val dataset = List(1).toDS() - val result = dataset.withColumn("new", u()) - val ref = dataset.withColumn("new", lit(3l)) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("Cast Double to Long",test40) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: Cast Double to Long\n\n") - val myudf: () => Long = () => { - var myVar : Double = 0.0 - myVar = myVar + 1.0 - val myVar2 : Long = myVar.toLong - myVar2 - } - val u = udf(myudf) - val dataset = List(1).toDS() - val result = dataset.withColumn("new", u()) - val ref = dataset.withColumn("new", lit(1l)) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - - test("Cast Int to Float",test41) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: Cast Int to Float\n\n") - val myudf: () => Float = () => { - var myVar : Int = 0 - myVar = myVar + 1 + 2 + 3 + 4 + 5 - val myVar2 : Float = myVar.toFloat - myVar2 - } - val u = udf(myudf) - val dataset = List(1).toDS() - val result = dataset.withColumn("new", u()) - val ref = dataset.withColumn("new", lit(15.0f)) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("Cast Long to Float",test42) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: Cast Long to Float\n\n") - val myudf: () => Float = () => { - var myVar : Long = 0l - myVar = myVar + 1l - val myVar2 : Float = myVar.toFloat - myVar2 - } - val u = udf(myudf) - val dataset = List(1).toDS() - val result = dataset.withColumn("new", u()) - val ref = dataset.withColumn("new", lit(1.0f)) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("Cast Double to Float",test43) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: Cast Double to Float\n\n") - val myudf: () => Float = () => { - var myVar : Double = 0.0 - myVar = myVar + 1.0 - val myVar2 : Float = myVar.toFloat - myVar2 - } - val u = udf(myudf) - val dataset = List(1).toDS() - val result = dataset.withColumn("new", u()) - val ref = dataset.withColumn("new", lit(1.0f)) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("Cast Int to Double",test44) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: Cast Int to Double\n\n") - val myudf: () => Double = () => { - var myVar : Int = 0 - myVar = myVar + 1 + 2 + 3 + 4 + 5 - val myVar2 : Double = myVar.toDouble - myVar2 - } - val u = udf(myudf) - val dataset = List(1).toDS() - val result = dataset.withColumn("new", u()) - val ref = dataset.withColumn("new", lit(15.0)) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("Cast Long to Double",test45) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: Cast Long to Double\n\n") - val myudf: () => Double = () => { - var myVar : Long = 0l - myVar = myVar + 1l - val myVar2 : Double = myVar.toDouble - myVar2 - } - val u = udf(myudf) - val dataset = List(1).toDS() - val result = dataset.withColumn("new", u()) - val ref = dataset.withColumn("new", lit(1.0)) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - test("Cast Float to Double",test46) { - println("\n\n") - Thread.sleep(1000) - println("EXECUTING TEST: Cast Float to Double\n\n") - val myudf: () => Double = () => { - var myVar : Float = 0.0f - myVar = myVar + 1.0f + 2.0f - val myVar2 : Double = myVar.toDouble - myVar2 - } - val u = udf(myudf) - val dataset = List(1).toDS() - val result = dataset.withColumn("new", u()) - val ref = dataset.withColumn("new", lit(3.0)) - checkEquiv(result, ref) - println("TEST: *** END ***\n") - } - - - - - // test("UDF 4 args",test26) { - // println("\n\n") - // Thread.sleep(1000) - // println("EXECUTING TEST: UDF 4 args\n\n") - // val myudf: (Int, Int, Int, Int) => Int = (w,x,y,z) => { w+x+y+z } - // val u = udf(myudf) - // val dataset = List(1,2,3,4).toDS() - // val dataset2 = dataset.withColumn("value2",col("value") + 1) - // val dataset3 = dataset2.withColumn("value3",col("value2") + 1) - // val dataset4 = dataset3.withColumn("value4",col("value3") + 1) - // // val result = u(data) - // // dataset3.show - // // val dataset = List((1,2,3),(2,3,4),(3,4,5)).toDS() - // val result = dataset4.withColumn("new", u(col("value"), col("value2"), col("value3"), col("value4"))) - // val ref = dataset4.withColumn("new", col("value")+col("value2")+col("value3")+col("value4")) - // checkEquiv(result, ref) - // println("TEST: *** END ***\n") - // } - -} - diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala deleted file mode 100644 index 1d3439ded36a..000000000000 --- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala +++ /dev/null @@ -1,584 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql - -import java.math.BigDecimal - -import org.apache.spark.sql.api.java._ -import org.apache.spark.sql.catalyst.FunctionIdentifier -import org.apache.spark.sql.catalyst.plans.logical.Project -import org.apache.spark.sql.execution.QueryExecution -import org.apache.spark.sql.execution.columnar.InMemoryRelation -import org.apache.spark.sql.execution.command.{CreateDataSourceTableAsSelectCommand, ExplainCommand} -import org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand -import org.apache.spark.sql.functions.{lit, udf} -import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.test.SharedSparkSession -import org.apache.spark.sql.test.SQLTestData._ -import org.apache.spark.sql.types._ -import org.apache.spark.sql.util.QueryExecutionListener - - -private case class FunctionResult(f1: String, f2: String) - -class UDFSuite extends QueryTest with SharedSparkSession { - import testImplicits._ - import org.scalatest.Tag - - object udfsuite1 extends Tag("udfsuite1") - object udfsuite2 extends Tag("udfsuite2") - object udfsuite3 extends Tag("udfsuite3") - object udfsuite4 extends Tag("udfsuite4") - object udfsuite5 extends Tag("udfsuite5") - object udfsuite6 extends Tag("udfsuite6") - object udfsuite7 extends Tag("udfsuite7") - object udfsuite8 extends Tag("udfsuite8") - object udfsuite9 extends Tag("udfsuite9") - object udfsuite10 extends Tag("udfsuite10") - object udfsuite11 extends Tag("udfsuite11") - object udfsuite12 extends Tag("udfsuite12") - object udfsuite13 extends Tag("udfsuite13") - object udfsuite14 extends Tag("udfsuite14") - object udfsuite15 extends Tag("udfsuite15") - object udfsuite16 extends Tag("udfsuite16") - object udfsuite17 extends Tag("udfsuite17") - object udfsuite18 extends Tag("udfsuite18") - object udfsuite19 extends Tag("udfsuite19") - object udfsuite20 extends Tag("udfsuite20") - object udfsuite21 extends Tag("udfsuite21") - object udfsuite22 extends Tag("udfsuite22") - object udfsuite23 extends Tag("udfsuite23") - object udfsuite24 extends Tag("udfsuite24") - object udfsuite25 extends Tag("udfsuite25") - object udfsuite26 extends Tag("udfsuite26") - object udfsuite27 extends Tag("udfsuite27") - object udfsuite28 extends Tag("udfsuite28") - object udfsuite29 extends Tag("udfsuite29") - object udfsuite30 extends Tag("udfsuite30") - object udfsuite31 extends Tag("udfsuite31") - object udfsuite32 extends Tag("udfsuite32") - object udfsuite33 extends Tag("udfsuite33") - object udfsuite34 extends Tag("udfsuite34") - object udfsuite35 extends Tag("udfsuite35") - object udfsuite36 extends Tag("udfsuite36") - object udfsuite37 extends Tag("udfsuite37") - - test("built-in fixed arity expressions",udfsuite1) { - val df = spark.emptyDataFrame - df.selectExpr("rand()", "randn()", "rand(5)", "randn(50)") - } - - test("built-in vararg expressions",udfsuite2) { - val df = Seq((1, 2)).toDF("a", "b") - df.selectExpr("array(a, b)") - df.selectExpr("struct(a, b)") - } - - test("built-in expressions with multiple constructors",udfsuite3) { - val df = Seq(("abcd", 2)).toDF("a", "b") - df.selectExpr("substr(a, 2)", "substr(a, 2, 3)").collect() - } - - test("count",udfsuite4) { - val df = Seq(("abcd", 2)).toDF("a", "b") - df.selectExpr("count(a)") - } - - test("count distinct",udfsuite5) { - val df = Seq(("abcd", 2)).toDF("a", "b") - df.selectExpr("count(distinct a)") - } - - test("SPARK-8003 spark_partition_id",udfsuite6) { - val df = Seq((1, "Tearing down the walls that divide us")).toDF("id", "saying") - df.createOrReplaceTempView("tmp_table") - checkAnswer(sql("select spark_partition_id() from tmp_table").toDF(), Row(0)) - spark.catalog.dropTempView("tmp_table") - } - - test("SPARK-8005 input_file_name",udfsuite7) { - withTempPath { dir => - val data = sparkContext.parallelize(0 to 10, 2).toDF("id") - data.write.parquet(dir.getCanonicalPath) - spark.read.parquet(dir.getCanonicalPath).createOrReplaceTempView("test_table") - val answer = sql("select input_file_name() from test_table").head().getString(0) - assert(answer.contains(dir.toURI.getPath)) - assert(sql("select input_file_name() from test_table").distinct().collect().length >= 2) - spark.catalog.dropTempView("test_table") - } - } - - test("error reporting for incorrect number of arguments - builtin function",udfsuite8) { - val df = spark.emptyDataFrame - val e = intercept[AnalysisException] { - df.selectExpr("substr('abcd', 2, 3, 4)") - } - assert(e.getMessage.contains("Invalid number of arguments for function substr. Expected:")) - } - - test("error reporting for incorrect number of arguments - udf",udfsuite9) { - val df = spark.emptyDataFrame - val e = intercept[AnalysisException] { - spark.udf.register("foo", (_: String).length) - df.selectExpr("foo(2, 3, 4)") - } - assert(e.getMessage.contains("Invalid number of arguments for function foo. Expected:")) - } - - test("error reporting for undefined functions",udfsuite10) { - val df = spark.emptyDataFrame - val e = intercept[AnalysisException] { - df.selectExpr("a_function_that_does_not_exist()") - } - assert(e.getMessage.contains("Undefined function")) - assert(e.getMessage.contains("a_function_that_does_not_exist")) - } - - test("Simple UDF",udfsuite11) { - spark.udf.register("strLenScala", (_: String).length) - assert(sql("SELECT strLenScala('test')").head().getInt(0) === 4) - } - - test("UDF defined using UserDefinedFunction",udfsuite12) { - import functions.udf - val foo = udf((x: Int) => x + 1) - spark.udf.register("foo", foo) - assert(sql("select foo(5)").head().getInt(0) == 6) - } - - test("ZeroArgument non-deterministic UDF",udfsuite13) { - val foo = udf(() => Math.random()) - spark.udf.register("random0", foo.asNondeterministic()) - val df = sql("SELECT random0()") - assert(df.logicalPlan.asInstanceOf[Project].projectList.forall(!_.deterministic)) - assert(df.head().getDouble(0) >= 0.0) - - val foo1 = foo.asNondeterministic() - val df1 = testData.select(foo1()) - assert(df1.logicalPlan.asInstanceOf[Project].projectList.forall(!_.deterministic)) - assert(df1.head().getDouble(0) >= 0.0) - - val bar = udf(() => Math.random(), DataTypes.DoubleType).asNondeterministic() - val df2 = testData.select(bar()) - assert(df2.logicalPlan.asInstanceOf[Project].projectList.forall(!_.deterministic)) - assert(df2.head().getDouble(0) >= 0.0) - - val javaUdf = udf(new UDF0[Double] { - override def call(): Double = Math.random() - }, DoubleType).asNondeterministic() - val df3 = testData.select(javaUdf()) - assert(df3.logicalPlan.asInstanceOf[Project].projectList.forall(!_.deterministic)) - assert(df3.head().getDouble(0) >= 0.0) - } - - test("TwoArgument UDF",udfsuite14) { - spark.udf.register("strLenScala", (_: String).length + (_: Int)) - assert(sql("SELECT strLenScala('test', 1)").head().getInt(0) === 5) - } - - test("UDF in a WHERE",udfsuite15) { - withTempView("integerData") { - spark.udf.register("oneArgFilter", (n: Int) => { n > 80 }) - - val df = sparkContext.parallelize( - (1 to 100).map(i => TestData(i, i.toString))).toDF() - df.createOrReplaceTempView("integerData") - - val result = - sql("SELECT * FROM integerData WHERE oneArgFilter(key)") - assert(result.count() === 20) - } - } - - test("UDF in a HAVING",udfsuite16) { - withTempView("groupData") { - spark.udf.register("havingFilter", (n: Long) => { n > 5 }) - - val df = Seq(("red", 1), ("red", 2), ("blue", 10), - ("green", 100), ("green", 200)).toDF("g", "v") - df.createOrReplaceTempView("groupData") - - val result = - sql( - """ - | SELECT g, SUM(v) as s - | FROM groupData - | GROUP BY g - | HAVING havingFilter(s) - """.stripMargin) - - assert(result.count() === 2) - } - } - - test("UDF in a GROUP BY",udfsuite17) { - withTempView("groupData") { - spark.udf.register("groupFunction", (n: Int) => { n > 10 }) - - val df = Seq(("red", 1), ("red", 2), ("blue", 10), - ("green", 100), ("green", 200)).toDF("g", "v") - df.createOrReplaceTempView("groupData") - - val result = - sql( - """ - | SELECT SUM(v) - | FROM groupData - | GROUP BY groupFunction(v) - """.stripMargin) - assert(result.count() === 2) - } - } - - test("UDFs everywhere",udfsuite18) { - withTempView("groupData") { - spark.udf.register("groupFunction", (n: Int) => { n > 10 }) - spark.udf.register("havingFilter", (n: Long) => { n > 2000 }) - spark.udf.register("whereFilter", (n: Int) => { n < 150 }) - spark.udf.register("timesHundred", (n: Long) => { n * 100 }) - - val df = Seq(("red", 1), ("red", 2), ("blue", 10), - ("green", 100), ("green", 200)).toDF("g", "v") - df.createOrReplaceTempView("groupData") - - val result = - sql( - """ - | SELECT timesHundred(SUM(v)) as v100 - | FROM groupData - | WHERE whereFilter(v) - | GROUP BY groupFunction(v) - | HAVING havingFilter(v100) - """.stripMargin) - assert(result.count() === 1) - } - } - - test("struct UDF",udfsuite19) { - spark.udf.register("returnStruct", (f1: String, f2: String) => FunctionResult(f1, f2)) - - val result = - sql("SELECT returnStruct('test', 'test2') as ret") - .select($"ret.f1").head().getString(0) - assert(result === "test") - } - - test("udf that is transformed",udfsuite20) { - spark.udf.register("makeStruct", (x: Int, y: Int) => (x, y)) - // 1 + 1 is constant folded causing a transformation. - assert(sql("SELECT makeStruct(1 + 1, 2)").first().getAs[Row](0) === Row(2, 2)) - } - - test("type coercion for udf inputs",udfsuite21) { - spark.udf.register("intExpected", (x: Int) => x) - // pass a decimal to intExpected. - assert(sql("SELECT intExpected(1.0)").head().getInt(0) === 1) - } - - test("udf in different types",udfsuite22) { - spark.udf.register("testDataFunc", (n: Int, s: String) => { (n, s) }) - spark.udf.register("decimalDataFunc", - (a: java.math.BigDecimal, b: java.math.BigDecimal) => { (a, b) }) - spark.udf.register("binaryDataFunc", (a: Array[Byte], b: Int) => { (a, b) }) - spark.udf.register("arrayDataFunc", - (data: Seq[Int], nestedData: Seq[Seq[Int]]) => { (data, nestedData) }) - spark.udf.register("mapDataFunc", - (data: scala.collection.Map[Int, String]) => { data }) - spark.udf.register("complexDataFunc", - (m: Map[String, Int], a: Seq[Int], b: Boolean) => { (m, a, b) } ) - - checkAnswer( - sql("SELECT tmp.t.* FROM (SELECT testDataFunc(key, value) AS t from testData) tmp").toDF(), - testData) - checkAnswer( - sql(""" - | SELECT tmp.t.* FROM - | (SELECT decimalDataFunc(a, b) AS t FROM decimalData) tmp - """.stripMargin).toDF(), decimalData) - checkAnswer( - sql(""" - | SELECT tmp.t.* FROM - | (SELECT binaryDataFunc(a, b) AS t FROM binaryData) tmp - """.stripMargin).toDF(), binaryData) - checkAnswer( - sql(""" - | SELECT tmp.t.* FROM - | (SELECT arrayDataFunc(data, nestedData) AS t FROM arrayData) tmp - """.stripMargin).toDF(), arrayData.toDF()) - checkAnswer( - sql(""" - | SELECT mapDataFunc(data) AS t FROM mapData - """.stripMargin).toDF(), mapData.toDF()) - checkAnswer( - sql(""" - | SELECT tmp.t.* FROM - | (SELECT complexDataFunc(m, a, b) AS t FROM complexData) tmp - """.stripMargin).toDF(), complexData.select("m", "a", "b")) - } - - test("SPARK-11716 UDFRegistration does not include the input data type in returned UDF",udfsuite23) { - val myUDF = spark.udf.register("testDataFunc", (n: Int, s: String) => { (n, s.toInt) }) - - // Without the fix, this will fail because we fail to cast data type of b to string - // because myUDF does not know its input data type. With the fix, this query should not - // fail. - checkAnswer( - testData2.select(myUDF($"a", $"b").as("t")), - testData2.selectExpr("struct(a, b)")) - - checkAnswer( - sql("SELECT tmp.t.* FROM (SELECT testDataFunc(a, b) AS t from testData2) tmp").toDF(), - testData2) - } - - test("SPARK-19338 Provide identical names for UDFs in the EXPLAIN output",udfsuite24) { - def explainStr(df: DataFrame): String = { - val explain = ExplainCommand(df.queryExecution.logical, extended = false) - val sparkPlan = spark.sessionState.executePlan(explain).executedPlan - sparkPlan.executeCollect().map(_.getString(0).trim).headOption.getOrElse("") - } - val udf1Name = "myUdf1" - val udf2Name = "myUdf2" - val udf1 = spark.udf.register(udf1Name, (n: Int) => n + 1) - val udf2 = spark.udf.register(udf2Name, (n: Int) => n * 1) - assert(explainStr(sql("SELECT myUdf1(myUdf2(1))")).contains(s"$udf1Name($udf2Name(1))")) - assert(explainStr(spark.range(1).select(udf1(udf2(functions.lit(1))))) - .contains(s"$udf1Name($udf2Name(1))")) - } - - test("SPARK-23666 Do not display exprId in argument names",udfsuite25) { - withTempView("x") { - Seq(((1, 2), 3)).toDF("a", "b").createOrReplaceTempView("x") - spark.udf.register("f", (a: Int) => a) - val outputStream = new java.io.ByteArrayOutputStream() - Console.withOut(outputStream) { - spark.sql("SELECT f(a._1) FROM x").show - } - assert(outputStream.toString.contains("f(a._1 AS `_1`)")) - } - } - - test("cached Data should be used in the write path",udfsuite26) { - withTable("t") { - withTempPath { path => - var numTotalCachedHit = 0 - val listener = new QueryExecutionListener { - override def onFailure(f: String, qe: QueryExecution, e: Throwable): Unit = {} - - override def onSuccess(funcName: String, qe: QueryExecution, duration: Long): Unit = { - qe.withCachedData match { - case c: CreateDataSourceTableAsSelectCommand - if c.query.isInstanceOf[InMemoryRelation] => - numTotalCachedHit += 1 - case i: InsertIntoHadoopFsRelationCommand - if i.query.isInstanceOf[InMemoryRelation] => - numTotalCachedHit += 1 - case _ => - } - } - } - spark.listenerManager.register(listener) - - val udf1 = udf({ (x: Int, y: Int) => x + y }) - val df = spark.range(0, 3).toDF("a") - .withColumn("b", udf1($"a", lit(10))) - df.cache() - df.write.saveAsTable("t") - sparkContext.listenerBus.waitUntilEmpty() - assert(numTotalCachedHit == 1, "expected to be cached in saveAsTable") - df.write.insertInto("t") - sparkContext.listenerBus.waitUntilEmpty() - assert(numTotalCachedHit == 2, "expected to be cached in insertInto") - df.write.save(path.getCanonicalPath) - sparkContext.listenerBus.waitUntilEmpty() - assert(numTotalCachedHit == 3, "expected to be cached in save for native") - } - } - } - - test("SPARK-24891 Fix HandleNullInputsForUDF rule",udfsuite27) { - val udf1 = udf({(x: Int, y: Int) => x + y}) - val df = spark.range(0, 3).toDF("a") - .withColumn("b", udf1($"a", udf1($"a", lit(10)))) - .withColumn("c", udf1($"a", lit(null))) - val plan = spark.sessionState.executePlan(df.logicalPlan).analyzed - - comparePlans(df.logicalPlan, plan) - checkAnswer( - df, - Seq( - Row(0, 10, null), - Row(1, 12, null), - Row(2, 14, null))) - } - - test("SPARK-24891 Fix HandleNullInputsForUDF rule - with table",udfsuite28) { - withTable("x") { - Seq((1, "2"), (2, "4")).toDF("a", "b").write.format("json").saveAsTable("x") - sql("insert into table x values(3, null)") - sql("insert into table x values(null, '4')") - spark.udf.register("f", (a: Int, b: String) => a + b) - val df = spark.sql("SELECT f(a, b) FROM x") - val plan = spark.sessionState.executePlan(df.logicalPlan).analyzed - comparePlans(df.logicalPlan, plan) - checkAnswer(df, Seq(Row("12"), Row("24"), Row("3null"), Row(null))) - } - } - - test("SPARK-25044 Verify null input handling for primitive types - with udf()",udfsuite29) { - val input = Seq( - (null, Integer.valueOf(1), "x"), - ("M", null, "y"), - ("N", Integer.valueOf(3), null)).toDF("a", "b", "c") - - val udf1 = udf((a: String, b: Int, c: Any) => a + b + c) - val df = input.select(udf1('a, 'b, 'c)) - checkAnswer(df, Seq(Row("null1x"), Row(null), Row("N3null"))) - - // test Java UDF. Java UDF can't have primitive inputs, as it's generic typed. - val udf2 = udf(new UDF3[String, Integer, Object, String] { - override def call(t1: String, t2: Integer, t3: Object): String = { - t1 + t2 + t3 - } - }, StringType) - val df2 = input.select(udf2('a, 'b, 'c)) - checkAnswer(df2, Seq(Row("null1x"), Row("Mnully"), Row("N3null"))) - } - - test("SPARK-25044 Verify null input handling for primitive types - with udf.register",udfsuite30) { - withTable("t") { - Seq((null, Integer.valueOf(1), "x"), ("M", null, "y"), ("N", Integer.valueOf(3), null)) - .toDF("a", "b", "c").write.format("json").saveAsTable("t") - spark.udf.register("f", (a: String, b: Int, c: Any) => a + b + c) - val df = spark.sql("SELECT f(a, b, c) FROM t") - checkAnswer(df, Seq(Row("null1x"), Row(null), Row("N3null"))) - - // test Java UDF. Java UDF can't have primitive inputs, as it's generic typed. - spark.udf.register("f2", new UDF3[String, Integer, Object, String] { - override def call(t1: String, t2: Integer, t3: Object): String = { - t1 + t2 + t3 - } - }, StringType) - val df2 = spark.sql("SELECT f2(a, b, c) FROM t") - checkAnswer(df2, Seq(Row("null1x"), Row("Mnully"), Row("N3null"))) - } - } - - test("SPARK-25044 Verify null input handling for primitive types - with udf(Any, DataType)",udfsuite31) { - val f = udf((x: Int) => x, IntegerType) - checkAnswer( - Seq(new Integer(1), null).toDF("x").select(f($"x")), - Row(1) :: Row(0) :: Nil) - - val f2 = udf((x: Double) => x, DoubleType) - checkAnswer( - Seq(new java.lang.Double(1.1), null).toDF("x").select(f2($"x")), - Row(1.1) :: Row(0.0) :: Nil) - - } - - test("SPARK-26308: udf with decimal",udfsuite32) { - val df1 = spark.createDataFrame( - sparkContext.parallelize(Seq(Row(new BigDecimal("2011000000000002456556")))), - StructType(Seq(StructField("col1", DecimalType(30, 0))))) - val udf1 = org.apache.spark.sql.functions.udf((value: BigDecimal) => { - if (value == null) null else value.toBigInteger.toString - }) - checkAnswer(df1.select(udf1(df1.col("col1"))), Seq(Row("2011000000000002456556"))) - } - - test("SPARK-26308: udf with complex types of decimal",udfsuite33) { - val df1 = spark.createDataFrame( - sparkContext.parallelize(Seq(Row(Array(new BigDecimal("2011000000000002456556"))))), - StructType(Seq(StructField("col1", ArrayType(DecimalType(30, 0)))))) - val udf1 = org.apache.spark.sql.functions.udf((arr: Seq[BigDecimal]) => { - arr.map(value => if (value == null) null else value.toBigInteger.toString) - }) - checkAnswer(df1.select(udf1($"col1")), Seq(Row(Array("2011000000000002456556")))) - - val df2 = spark.createDataFrame( - sparkContext.parallelize(Seq(Row(Map("a" -> new BigDecimal("2011000000000002456556"))))), - StructType(Seq(StructField("col1", MapType(StringType, DecimalType(30, 0)))))) - val udf2 = org.apache.spark.sql.functions.udf((map: Map[String, BigDecimal]) => { - map.mapValues(value => if (value == null) null else value.toBigInteger.toString) - }) - checkAnswer(df2.select(udf2($"col1")), Seq(Row(Map("a" -> "2011000000000002456556")))) - } - - test("SPARK-26323 Verify input type check - with udf()",udfsuite34) { - val f = udf((x: Long, y: Any) => x) - val df = Seq(1 -> "a", 2 -> "b").toDF("i", "j").select(f($"i", $"j")) - checkAnswer(df, Seq(Row(1L), Row(2L))) - } - - test("SPARK-26323 Verify input type check - with udf.register",udfsuite35) { - withTable("t") { - Seq(1 -> "a", 2 -> "b").toDF("i", "j").write.format("json").saveAsTable("t") - spark.udf.register("f", (x: Long, y: Any) => x) - val df = spark.sql("SELECT f(i, j) FROM t") - checkAnswer(df, Seq(Row(1L), Row(2L))) - } - } - - test("Using java.time.Instant in UDF",udfsuite36) { - withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> "true") { - val expected = java.time.Instant.parse("2019-02-27T00:00:00Z") - val plusSec = udf((i: java.time.Instant) => i.plusSeconds(1)) - val df = spark.sql("SELECT TIMESTAMP '2019-02-26 23:59:59Z' as t") - .select(plusSec('t)) - assert(df.collect().toSeq === Seq(Row(expected))) - } - } - - test("Using java.time.LocalDate in UDF",udfsuite37) { - withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> "true") { - val expected = java.time.LocalDate.parse("2019-02-27") - val plusDay = udf((i: java.time.LocalDate) => i.plusDays(1)) - val df = spark.sql("SELECT DATE '2019-02-26' as d") - .select(plusDay('d)) - assert(df.collect().toSeq === Seq(Row(expected))) - } - } - - test("SPARK-28321 0-args Java UDF should not be called only once") { - val nonDeterministicJavaUDF = udf( - new UDF0[Int] { - override def call(): Int = scala.util.Random.nextInt() - }, IntegerType).asNondeterministic() - - assert(spark.range(2).select(nonDeterministicJavaUDF()).distinct().count() == 2) - } - - test("Replace _FUNC_ in UDF ExpressionInfo") { - val info = spark.sessionState.catalog.lookupFunctionInfo(FunctionIdentifier("upper")) - assert(info.getName === "upper") - assert(info.getClassName === "org.apache.spark.sql.catalyst.expressions.Upper") - assert(info.getUsage === "upper(str) - Returns `str` with all characters changed to uppercase.") - assert(info.getExamples.contains("> SELECT upper('SparkSql');")) - assert(info.getSince === "1.0.1") - assert(info.getNote === "") - assert(info.getExtended.contains("> SELECT upper('SparkSql');")) - } - - test("SPARK-28521 error message for CAST(parameter types contains DataType)") { - val e = intercept[AnalysisException] { - spark.sql("SELECT CAST(1)") - } - assert(e.getMessage.contains("Invalid arguments for function cast")) - } -} diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml deleted file mode 100644 index f627227aa038..000000000000 --- a/sql/hive/pom.xml +++ /dev/null @@ -1,273 +0,0 @@ - - - - - 4.0.0 - - org.apache.spark - spark-parent_2.12 - 3.0.0-SNAPSHOT - ../../pom.xml - - - spark-hive_2.12 - jar - Spark Project Hive - http://spark.apache.org/ - - hive - - - - - - ${hive.parquet.group} - parquet-hadoop-bundle - - - org.apache.spark - spark-core_${scala.binary.version} - ${project.version} - - - org.apache.spark - spark-core_${scala.binary.version} - ${project.version} - test-jar - test - - - org.apache.spark - spark-sql_${scala.binary.version} - ${project.version} - - - org.apache.spark - spark-sql_${scala.binary.version} - ${project.version} - test-jar - test - - - org.apache.spark - spark-catalyst_${scala.binary.version} - test-jar - ${project.version} - test - - - org.apache.spark - spark-tags_${scala.binary.version} - test-jar - test - - - - - ${hive.group} - hive-exec - ${hive.classifier} - - - ${hive.group} - hive-metastore - - - - - org.apache.avro - avro - - - - org.apache.avro - avro-mapred - ${avro.mapred.classifier} - - - commons-httpclient - commons-httpclient - - - org.apache.httpcomponents - httpclient - - - org.codehaus.jackson - jackson-mapper-asl - - - - commons-codec - commons-codec - - - joda-time - joda-time - - - org.jodd - jodd-core - - - com.google.code.findbugs - jsr305 - - - org.datanucleus - datanucleus-core - - - org.apache.thrift - libthrift - - - org.apache.thrift - libfb303 - - - org.apache.derby - derby - - - org.scala-lang - scala-compiler - test - - - org.scalacheck - scalacheck_${scala.binary.version} - test - - - - - hive - - - - org.codehaus.mojo - build-helper-maven-plugin - 3.0.0 - - - add-scala-test-sources - generate-test-sources - - add-test-source - - - - compatibility/src/test/scala - - - - - - - - - - hadoop-3.2 - - - ${hive.group} - hive-common - - - ${hive.group} - hive-serde - - - ${hive.group} - hive-shims - - - org.apache.hive - hive-llap-common - - - org.apache.hive - hive-llap-client - - - - - - - target/scala-${scala.binary.version}/classes - target/scala-${scala.binary.version}/test-classes - - - org.scalatest - scalatest-maven-plugin - - - -da -Xmx4g -XX:ReservedCodeCacheSize=${CodeCacheSize} - - - - org.apache.maven.plugins - maven-enforcer-plugin - - - enforce-versions - - enforce - - - - - - *:hive-cli - - - - - - - - - -