diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 5033ab00601ab..3bfd1abb48d9c 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -692,11 +692,7 @@ jobs:
     - name: Install Python linter dependencies
       if: inputs.branch != 'branch-3.3' && inputs.branch != 'branch-3.4' && inputs.branch != 'branch-3.5'
       run: |
-        # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
-        #   See also https://github.com/sphinx-doc/sphinx/issues/7551.
-        # Jinja2 3.0.0+ causes error when building with Sphinx.
-        #   See also https://issues.apache.org/jira/browse/SPARK-35375.
-        python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==23.9.1'
+        python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc jinja2 'black==23.9.1'
         python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.59.3' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0'
     - name: Python linter
       run: PYTHON_EXECUTABLE=python3.9 ./dev/lint-python
@@ -745,13 +741,9 @@ jobs:
         Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')"
     - name: Install dependencies for documentation generation
       run: |
-        # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
-        #   See also https://github.com/sphinx-doc/sphinx/issues/7551.
-        # Jinja2 3.0.0+ causes error when building with Sphinx.
-        #   See also https://issues.apache.org/jira/browse/SPARK-35375.
         # Pin the MarkupSafe to 2.0.1 to resolve the CI error.
         #   See also https://issues.apache.org/jira/browse/SPARK-38279.
-        python3.9 -m pip install 'sphinx<3.1.0' mkdocs pydata_sphinx_theme sphinx-copybutton nbsphinx numpydoc 'jinja2<3.0.0' 'markupsafe==2.0.1' 'pyzmq<24.0.0'
+        python3.9 -m pip install 'sphinx==4.2.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 'markupsafe==2.0.1' 'pyzmq<24.0.0'
         python3.9 -m pip install ipython_genutils # See SPARK-38517
         python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8'
         python3.9 -m pip install 'docutils<0.18.0' # See SPARK-39421
diff --git a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala
index ab488e18ba3f4..75c56451592e4 100644
--- a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala
+++ b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala
@@ -80,7 +80,9 @@ class UTF8StringPropertyCheckSuite extends AnyFunSuite with ScalaCheckDrivenProp
 
   test("compare") {
     forAll { (s1: String, s2: String) =>
-      assert(Math.signum(toUTF8(s1).compareTo(toUTF8(s2))) === Math.signum(s1.compareTo(s2)))
+      assert(Math.signum {
+        toUTF8(s1).compareTo(toUTF8(s2)).toFloat
+      } === Math.signum(s1.compareTo(s2).toFloat))
     }
   }
 
diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json
index 19b70307a1cdd..5b70edf249d14 100644
--- a/common/utils/src/main/resources/error/error-classes.json
+++ b/common/utils/src/main/resources/error/error-classes.json
@@ -1067,7 +1067,7 @@
   },
   "FAILED_EXECUTE_UDF" : {
     "message" : [
-      "Failed to execute user defined function (<functionName>: (<signature>) => <result>)."
+      "User defined function (<functionName>: (<signature>) => <result>) failed due to: <reason>."
     ],
     "sqlState" : "39000"
   },
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
index a1e57226e530f..d760c9d97693b 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -1532,6 +1532,41 @@ class Dataset[T] private[sql] (
       proto.Aggregate.GroupType.GROUP_TYPE_CUBE)
   }
 
+  /**
+   * Create multi-dimensional aggregation for the current Dataset using the specified grouping
+   * sets, so we can run aggregation on them. See [[RelationalGroupedDataset]] for all the
+   * available aggregate functions.
+   *
+   * {{{
+   *   // Compute the average for all numeric columns group by specific grouping sets.
+   *   ds.groupingSets(Seq(Seq($"department", $"group"), Seq()), $"department", $"group").avg()
+   *
+   *   // Compute the max age and average salary, group by specific grouping sets.
+   *   ds.groupingSets(Seq($"department", $"gender"), Seq()), $"department", $"group").agg(Map(
+   *     "salary" -> "avg",
+   *     "age" -> "max"
+   *   ))
+   * }}}
+   *
+   * @group untypedrel
+   * @since 4.0.0
+   */
+  @scala.annotation.varargs
+  def groupingSets(groupingSets: Seq[Seq[Column]], cols: Column*): RelationalGroupedDataset = {
+    val groupingSetMsgs = groupingSets.map { groupingSet =>
+      val groupingSetMsg = proto.Aggregate.GroupingSets.newBuilder()
+      for (groupCol <- groupingSet) {
+        groupingSetMsg.addGroupingSet(groupCol.expr)
+      }
+      groupingSetMsg.build()
+    }
+    new RelationalGroupedDataset(
+      toDF(),
+      cols,
+      proto.Aggregate.GroupType.GROUP_TYPE_GROUPING_SETS,
+      groupingSets = Some(groupingSetMsgs))
+  }
+
   /**
    * (Scala-specific) Aggregates on the entire Dataset without groups.
    * {{{
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
index 5ed97e45c7701..776a6231eaecd 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
@@ -39,7 +39,8 @@ class RelationalGroupedDataset private[sql] (
     private[sql] val df: DataFrame,
     private[sql] val groupingExprs: Seq[Column],
     groupType: proto.Aggregate.GroupType,
-    pivot: Option[proto.Aggregate.Pivot] = None) {
+    pivot: Option[proto.Aggregate.Pivot] = None,
+    groupingSets: Option[Seq[proto.Aggregate.GroupingSets]] = None) {
 
   private[this] def toDF(aggExprs: Seq[Column]): DataFrame = {
     df.sparkSession.newDataFrame { builder =>
@@ -60,6 +61,11 @@ class RelationalGroupedDataset private[sql] (
           builder.getAggregateBuilder
             .setGroupType(proto.Aggregate.GroupType.GROUP_TYPE_PIVOT)
             .setPivot(pivot.get)
+        case proto.Aggregate.GroupType.GROUP_TYPE_GROUPING_SETS =>
+          assert(groupingSets.isDefined)
+          val aggBuilder = builder.getAggregateBuilder
+            .setGroupType(proto.Aggregate.GroupType.GROUP_TYPE_GROUPING_SETS)
+          groupingSets.get.foreach(aggBuilder.addGroupingSets)
         case g => throw new UnsupportedOperationException(g.toString)
       }
     }
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
index 5cc63bc45a04a..c5c917ebfa955 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
@@ -3017,6 +3017,12 @@ class PlanGenerationTestSuite
     simple.groupBy(Column("id")).pivot("a").agg(functions.count(Column("b")))
   }
 
+  test("groupingSets") {
+    simple
+      .groupingSets(Seq(Seq(fn.col("a")), Seq.empty[Column]), fn.col("a"))
+      .agg("a" -> "max", "a" -> "count")
+  }
+
   test("width_bucket") {
     simple.select(fn.width_bucket(fn.col("b"), fn.col("b"), fn.col("b"), fn.col("a")))
   }
diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/RetryPolicy.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/RetryPolicy.scala
index cb5b97f2e4aff..8c8472d780dbc 100644
--- a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/RetryPolicy.scala
+++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/RetryPolicy.scala
@@ -55,7 +55,7 @@ object RetryPolicy {
   def defaultPolicy(): RetryPolicy = RetryPolicy(
     name = "DefaultPolicy",
     // Please synchronize changes here with Python side:
-    // pyspark/sql/connect/client/core.py
+    // pyspark/sql/connect/client/retries.py
     //
     // Note: these constants are selected so that the maximum tolerated wait is guaranteed
     // to be at least 10 minutes
diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowVectorReader.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowVectorReader.scala
index 488208574809b..53d8d46e62689 100644
--- a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowVectorReader.scala
+++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowVectorReader.scala
@@ -134,7 +134,7 @@ private[arrow] class SmallIntVectorReader(v: SmallIntVector)
 private[arrow] class IntVectorReader(v: IntVector) extends TypedArrowVectorReader[IntVector](v) {
   override def getInt(i: Int): Int = vector.get(i)
   override def getLong(i: Int): Long = getInt(i)
-  override def getFloat(i: Int): Float = getInt(i)
+  override def getFloat(i: Int): Float = getInt(i).toFloat
   override def getDouble(i: Int): Double = getInt(i)
   override def getString(i: Int): String = String.valueOf(getInt(i))
   override def getJavaDecimal(i: Int): JBigDecimal = JBigDecimal.valueOf(getInt(i))
@@ -143,8 +143,8 @@ private[arrow] class IntVectorReader(v: IntVector) extends TypedArrowVectorReade
 private[arrow] class BigIntVectorReader(v: BigIntVector)
     extends TypedArrowVectorReader[BigIntVector](v) {
   override def getLong(i: Int): Long = vector.get(i)
-  override def getFloat(i: Int): Float = getLong(i)
-  override def getDouble(i: Int): Double = getLong(i)
+  override def getFloat(i: Int): Float = getLong(i).toFloat
+  override def getDouble(i: Int): Double = getLong(i).toDouble
   override def getString(i: Int): String = String.valueOf(getLong(i))
   override def getJavaDecimal(i: Int): JBigDecimal = JBigDecimal.valueOf(getLong(i))
   override def getTimestamp(i: Int): Timestamp = toJavaTimestamp(getLong(i) * MICROS_PER_SECOND)
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_encode.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_encode.explain
index 56da919abf4c5..2f65436059230 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_encode.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_encode.explain
@@ -1,2 +1,2 @@
-Project [encode(g#0, UTF-8) AS encode(g, UTF-8)#0]
+Project [encode(g#0, UTF-8, false) AS encode(g, UTF-8)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_binary_with_format.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_binary_with_format.explain
index e9513f0103c81..b62ccccc0c15e 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_binary_with_format.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_binary_with_format.explain
@@ -1,2 +1,2 @@
-Project [encode(g#0, UTF-8) AS to_binary(g, utf-8)#0]
+Project [encode(g#0, UTF-8, false) AS to_binary(g, utf-8)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/groupingSets.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/groupingSets.explain
new file mode 100644
index 0000000000000..1e3fe1a987ef5
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/groupingSets.explain
@@ -0,0 +1,4 @@
+Aggregate [a#0, spark_grouping_id#0L], [a#0, max(a#0) AS max(a)#0, count(a#0) AS count(a)#0L]
++- Expand [[id#0L, a#0, b#0, a#0, 0], [id#0L, a#0, b#0, null, 1]], [id#0L, a#0, b#0, a#0, spark_grouping_id#0L]
+   +- Project [id#0L, a#0, b#0, a#0 AS a#0]
+      +- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupingSets.json b/connector/connect/common/src/test/resources/query-tests/queries/groupingSets.json
new file mode 100644
index 0000000000000..6e84824ec7a3a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/groupingSets.json
@@ -0,0 +1,50 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "aggregate": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "groupType": "GROUP_TYPE_GROUPING_SETS",
+    "groupingExpressions": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "a"
+      }
+    }],
+    "aggregateExpressions": [{
+      "unresolvedFunction": {
+        "functionName": "max",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a",
+            "planId": "0"
+          }
+        }]
+      }
+    }, {
+      "unresolvedFunction": {
+        "functionName": "count",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a",
+            "planId": "0"
+          }
+        }]
+      }
+    }],
+    "groupingSets": [{
+      "groupingSet": [{
+        "unresolvedAttribute": {
+          "unparsedIdentifier": "a"
+        }
+      }]
+    }, {
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupingSets.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/groupingSets.proto.bin
new file mode 100644
index 0000000000000..ce0294096706e
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/groupingSets.proto.bin differ
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
index 95c5acc803d49..abfc063139056 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
@@ -2235,7 +2235,7 @@ class SparkConnectPlanner(
 
     JoinWith.typedJoinWith(
       joined,
-      session.sqlContext.conf.dataFrameSelfJoinAutoResolveAmbiguity,
+      session.sessionState.conf.dataFrameSelfJoinAutoResolveAmbiguity,
       session.sessionState.analyzer.resolver,
       rel.getJoinDataType.getIsLeftStruct,
       rel.getJoinDataType.getIsRightStruct)
@@ -2563,6 +2563,8 @@ class SparkConnectPlanner(
     // To avoid explicit handling of the result on the client, we build the expected input
     // of the relation on the server. The client has to simply forward the result.
     val result = SqlCommandResult.newBuilder()
+    // Only filled when isCommand
+    val metrics = ExecutePlanResponse.Metrics.newBuilder()
     if (isCommand) {
       // Convert the results to Arrow.
       val schema = df.schema
@@ -2596,10 +2598,10 @@ class SparkConnectPlanner(
             proto.LocalRelation
               .newBuilder()
               .setData(ByteString.copyFrom(bytes))))
+      metrics.addAllMetrics(MetricGenerator.transformPlan(df).asJava)
     } else {
-      // Trigger assertExecutedPlanPrepared to ensure post ReadyForExecution before finished
-      // executedPlan is currently called by createMetricsResponse below
-      df.queryExecution.assertExecutedPlanPrepared()
+      // No execution triggered for relations. Manually set ready
+      tracker.setReadyForExecution()
       result.setRelation(
         proto.Relation
           .newBuilder()
@@ -2622,8 +2624,17 @@ class SparkConnectPlanner(
         .setSqlCommandResult(result)
         .build())
 
-    // Send Metrics
-    responseObserver.onNext(MetricGenerator.createMetricsResponse(sessionHolder, df))
+    // Send Metrics when isCommand (i.e. show tables) which is eagerly executed & has metrics
+    // Skip metrics when !isCommand (i.e. select 1) which is not executed & doesn't have metrics
+    if (isCommand) {
+      responseObserver.onNext(
+        ExecutePlanResponse
+          .newBuilder()
+          .setSessionId(sessionHolder.sessionId)
+          .setServerSideSessionId(sessionHolder.serverSessionId)
+          .setMetrics(metrics.build)
+          .build)
+    }
   }
 
   private def handleRegisterUserDefinedFunction(
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/MetricGenerator.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/MetricGenerator.scala
index c9bba653e8a8f..e2e4128311871 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/MetricGenerator.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/MetricGenerator.scala
@@ -51,6 +51,12 @@ private[connect] object MetricGenerator extends AdaptiveSparkPlanHelper {
     allChildren(p).flatMap(c => transformPlan(c, p.id))
   }
 
+  private[connect] def transformPlan(
+      rows: DataFrame): Seq[ExecutePlanResponse.Metrics.MetricObject] = {
+    val executedPlan = rows.queryExecution.executedPlan
+    transformPlan(executedPlan, executedPlan.id)
+  }
+
   private def transformPlan(
       p: SparkPlan,
       parentId: Int): Seq[ExecutePlanResponse.Metrics.MetricObject] = {
diff --git a/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala b/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala
index f5967a74ad339..c412486ce197e 100644
--- a/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala
+++ b/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala
@@ -146,7 +146,7 @@ private[spark] class DirectKafkaInputDStream[K, V](
           val maxRateLimitPerPartition = ppc.maxRatePerPartition(tp)
           val backpressureRate = lag / totalLag.toDouble * rate
           tp -> (if (maxRateLimitPerPartition > 0) {
-            Math.min(backpressureRate, maxRateLimitPerPartition)} else backpressureRate)
+            Math.min(backpressureRate, maxRateLimitPerPartition.toDouble)} else backpressureRate)
         }
       case None => offsets.map { case (tp, offset) => tp -> ppc.maxRatePerPartition(tp).toDouble }
     }
diff --git a/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala b/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala
index 286b073125ff0..6c57091bc3c46 100644
--- a/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala
+++ b/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala
@@ -98,7 +98,7 @@ private[spark] class KafkaRDD[K, V](
     if (compacted) {
       super.countApprox(timeout, confidence)
     } else {
-      val c = count()
+      val c = count().toDouble
       new PartialResult(new BoundedDouble(c, 1.0, c, c), true)
     }
 
diff --git a/connector/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala b/connector/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala
index faf114108fac5..28f0906258303 100644
--- a/connector/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala
+++ b/connector/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala
@@ -805,7 +805,7 @@ private[streaming] class ConstantEstimator(@volatile private var rate: Long)
       time: Long,
       elements: Long,
       processingDelay: Long,
-      schedulingDelay: Long): Option[Double] = Some(rate)
+      schedulingDelay: Long): Option[Double] = Some(rate.toDouble)
 }
 
 private[streaming] class ConstantRateController(id: Int, estimator: RateEstimator, rate: Long)
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
index d6363182606d9..e6d5a750ea325 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
@@ -378,7 +378,7 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
         resources.foreach { case (k, v) =>
           PythonRDD.writeUTF(k, dataOut)
           PythonRDD.writeUTF(v.name, dataOut)
-          dataOut.writeInt(v.addresses.size)
+          dataOut.writeInt(v.addresses.length)
           v.addresses.foreach { case addr =>
             PythonRDD.writeUTF(addr, dataOut)
           }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
index cb325b37958ec..b2f35984d37f8 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
@@ -83,13 +83,13 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
       .flatMap(_.iterator)
       .groupBy(_._1) // group by resource name
       .map { case (rName, rInfoArr) =>
-      rName -> rInfoArr.map(_._2.addresses.size).sum
+      rName -> rInfoArr.map(_._2.addresses.length).sum
     }
     val usedInfo = aliveWorkers.map(_.resourcesInfoUsed)
       .flatMap(_.iterator)
       .groupBy(_._1) // group by resource name
       .map { case (rName, rInfoArr) =>
-      rName -> rInfoArr.map(_._2.addresses.size).sum
+      rName -> rInfoArr.map(_._2.addresses.length).sum
     }
     formatResourcesUsed(totalInfo, usedInfo)
   }
diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorMetrics.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorMetrics.scala
index 486e59652218b..8c474e9b76c6a 100644
--- a/core/src/main/scala/org/apache/spark/executor/ExecutorMetrics.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorMetrics.scala
@@ -46,7 +46,7 @@ class ExecutorMetrics private[spark] extends Serializable {
 
   private[spark] def this(metrics: Array[Long]) = {
     this()
-    Array.copy(metrics, 0, this.metrics, 0, Math.min(metrics.size, this.metrics.size))
+    Array.copy(metrics, 0, this.metrics, 0, Math.min(metrics.length, this.metrics.length))
   }
 
   private[spark] def this(metrics: AtomicLongArray) = {
diff --git a/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryInputFormat.scala b/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryInputFormat.scala
index 978afaffab30b..4897cf694ae8e 100644
--- a/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryInputFormat.scala
+++ b/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryInputFormat.scala
@@ -74,7 +74,7 @@ private[spark] class FixedLengthBinaryInputFormat
     if (defaultSize < recordLength) {
       recordLength.toLong
     } else {
-      (Math.floor(defaultSize / recordLength) * recordLength).toLong
+      defaultSize / recordLength * recordLength
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/StatsdReporter.scala b/core/src/main/scala/org/apache/spark/metrics/sink/StatsdReporter.scala
index 877f04b1adc01..189d390d37999 100644
--- a/core/src/main/scala/org/apache/spark/metrics/sink/StatsdReporter.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/StatsdReporter.scala
@@ -124,9 +124,9 @@ private[spark] class StatsdReporter(
 
   private def reportTimer(name: String, timer: Timer)(implicit socket: DatagramSocket): Unit = {
     val snapshot = timer.getSnapshot
-    send(fullName(name, "max"), format(convertDuration(snapshot.getMax)), TIMER)
+    send(fullName(name, "max"), format(convertDuration(snapshot.getMax.toDouble)), TIMER)
     send(fullName(name, "mean"), format(convertDuration(snapshot.getMean)), TIMER)
-    send(fullName(name, "min"), format(convertDuration(snapshot.getMin)), TIMER)
+    send(fullName(name, "min"), format(convertDuration(snapshot.getMin.toDouble)), TIMER)
     send(fullName(name, "stddev"), format(convertDuration(snapshot.getStdDev)), TIMER)
     send(fullName(name, "p50"), format(convertDuration(snapshot.getMedian)), TIMER)
     send(fullName(name, "p75"), format(convertDuration(snapshot.get75thPercentile)), TIMER)
diff --git a/core/src/main/scala/org/apache/spark/partial/CountEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/CountEvaluator.scala
index cbee136871012..a974ca2f1a05b 100644
--- a/core/src/main/scala/org/apache/spark/partial/CountEvaluator.scala
+++ b/core/src/main/scala/org/apache/spark/partial/CountEvaluator.scala
@@ -35,7 +35,7 @@ private[spark] class CountEvaluator(totalOutputs: Int, confidence: Double)
 
   override def currentResult(): BoundedDouble = {
     if (outputsMerged == totalOutputs) {
-      new BoundedDouble(sum, 1.0, sum, sum)
+      new BoundedDouble(sum.toDouble, 1.0, sum.toDouble, sum.toDouble)
     } else if (outputsMerged == 0 || sum == 0) {
       new BoundedDouble(0, 0.0, 0.0, Double.PositiveInfinity)
     } else {
@@ -57,7 +57,8 @@ private[partial] object CountEvaluator {
     val low = dist.inverseCumulativeProbability((1 - confidence) / 2)
     val high = dist.inverseCumulativeProbability((1 + confidence) / 2)
     // Add 'sum' to each because distribution is just of remaining count, not observed
-    new BoundedDouble(sum + dist.getNumericalMean, confidence, sum + low, sum + high)
+    new BoundedDouble(
+      sum + dist.getNumericalMean, confidence, (sum + low).toDouble, (sum + high).toDouble)
   }
 
 
diff --git a/core/src/main/scala/org/apache/spark/partial/GroupedCountEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/GroupedCountEvaluator.scala
index d2b4187df5d50..7cd60815fadbe 100644
--- a/core/src/main/scala/org/apache/spark/partial/GroupedCountEvaluator.scala
+++ b/core/src/main/scala/org/apache/spark/partial/GroupedCountEvaluator.scala
@@ -41,7 +41,9 @@ private[spark] class GroupedCountEvaluator[T : ClassTag](totalOutputs: Int, conf
 
   override def currentResult(): Map[T, BoundedDouble] = {
     if (outputsMerged == totalOutputs) {
-      sums.map { case (key, sum) => (key, new BoundedDouble(sum, 1.0, sum, sum)) }.toMap
+      sums.map { case (key, sum) =>
+        (key, new BoundedDouble(sum.toDouble, 1.0, sum.toDouble, sum.toDouble))
+      }.toMap
     } else if (outputsMerged == 0) {
       new HashMap[T, BoundedDouble]
     } else {
diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala b/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
index 9080be01a9e66..fe08e8337f76f 100644
--- a/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
@@ -303,7 +303,7 @@ private[spark] object ResourceUtils extends Logging {
       allocations: Map[String, ResourceInformation],
       execReqs: Map[String, ExecutorResourceRequest]): Unit = {
     execReqs.foreach { case (rName, req) =>
-      require(allocations.contains(rName) && allocations(rName).addresses.size >= req.amount,
+      require(allocations.contains(rName) && allocations(rName).addresses.length >= req.amount,
         s"Resource: ${rName}, with addresses: " +
           s"${allocations(rName).addresses.mkString(",")} " +
           s"is less than what the user requested: ${req.amount})")
@@ -476,7 +476,7 @@ private[spark] object ResourceUtils extends Logging {
       if (maxTaskPerExec < (execAmount * numParts / taskAmount)) {
         val origTaskAmount = treq.amount
         val taskReqStr = s"${origTaskAmount}/${numParts}"
-        val resourceNumSlots = Math.floor(execAmount * numParts / taskAmount).toInt
+        val resourceNumSlots = (execAmount * numParts / taskAmount).toInt
         val message = s"The configuration of resource: ${treq.resourceName} " +
           s"(exec = ${execAmount}, task = ${taskReqStr}, " +
           s"runnable tasks = ${resourceNumSlots}) will " +
diff --git a/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
index d10cf55ed0d10..113521453ad7b 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
@@ -95,7 +95,7 @@ private[spark] object MapStatus {
     } else if (size <= 1L) {
       1
     } else {
-      math.min(255, math.ceil(math.log(size) / math.log(LOG_BASE)).toInt).toByte
+      math.min(255, math.ceil(math.log(size.toDouble) / math.log(LOG_BASE)).toInt).toByte
     }
   }
 
@@ -276,12 +276,12 @@ private[spark] object HighlyCompressedMapStatus {
         val skewSizeThreshold =
           Math.max(
             medianSize * accurateBlockSkewedFactor,
-            sortedSizes(totalNumBlocks - maxAccurateSkewedBlockNumber)
+            sortedSizes(totalNumBlocks - maxAccurateSkewedBlockNumber).toDouble
           )
-        Math.min(shuffleAccurateBlockThreshold, skewSizeThreshold)
+        Math.min(shuffleAccurateBlockThreshold.toDouble, skewSizeThreshold)
       } else {
         // Disable skew detection if accurateBlockSkewedFactor <= 0
-        shuffleAccurateBlockThreshold
+        shuffleAccurateBlockThreshold.toDouble
       }
 
     val hugeBlockSizes = mutable.Map.empty[Int, Byte]
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskDescription.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskDescription.scala
index 6e6507782a49e..75032086ead72 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskDescription.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskDescription.scala
@@ -80,7 +80,7 @@ private[spark] object TaskDescription {
     map.foreach { case (key, value) =>
       dataOut.writeUTF(key)
       dataOut.writeUTF(value.name)
-      dataOut.writeInt(value.addresses.size)
+      dataOut.writeInt(value.addresses.length)
       value.addresses.foreach(dataOut.writeUTF(_))
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index 41f6b3ad64bf5..15ae2fef221d1 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -434,7 +434,7 @@ private[spark] class TaskSchedulerImpl(
                 // addresses are the same as that we allocated in taskResourceAssignments since it's
                 // synchronized. We don't remove the exact addresses allocated because the current
                 // approach produces the identical result with less time complexity.
-                availableResources(i)(rName).remove(0, rInfo.addresses.size)
+                availableResources(i)(rName).remove(0, rInfo.addresses.length)
               }
             }
           } catch {
@@ -752,7 +752,7 @@ private[spark] class TaskSchedulerImpl(
               .mkString(",")
             addressesWithDescs.foreach(_._2.properties.setProperty("addresses", addressesStr))
 
-            logInfo(s"Successfully scheduled all the ${addressesWithDescs.size} tasks for " +
+            logInfo(s"Successfully scheduled all the ${addressesWithDescs.length} tasks for " +
               s"barrier stage ${taskSet.stageId}.")
           }
           taskSet.barrierPendingLaunchTasks.clear()
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index 6157a3e46c875..d17e6735c4ecf 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -809,7 +809,7 @@ private[spark] class TaskSetManager(
 
     info.markFinished(TaskState.FINISHED, clock.getTimeMillis())
     if (speculationEnabled) {
-      successfulTaskDurations.insert(info.duration)
+      successfulTaskDurations.insert(info.duration.toDouble)
       taskProcessRateCalculator.foreach(_.updateAvgTaskProcessRate(tid, result))
     }
     removeRunningTask(tid)
@@ -1196,7 +1196,7 @@ private[spark] class TaskSetManager(
     val timeMs = clock.getTimeMillis()
     if (numSuccessfulTasks >= minFinishedForSpeculation) {
       val medianDuration = successfulTaskDurations.percentile()
-      val threshold = max(speculationMultiplier * medianDuration, minTimeToSpeculation)
+      val threshold = max(speculationMultiplier * medianDuration, minTimeToSpeculation.toDouble)
       // TODO: Threshold should also look at standard deviation of task durations and have a lower
       // bound based on that.
       logDebug("Task length threshold for speculation: " + threshold)
@@ -1204,7 +1204,8 @@ private[spark] class TaskSetManager(
     } else if (isSpeculationThresholdSpecified && speculationTasksLessEqToSlots) {
       val threshold = speculationTaskDurationThresOpt.get
       logDebug(s"Tasks taking longer time than provided speculation threshold: $threshold")
-      foundTasks = checkAndSubmitSpeculatableTasks(timeMs, threshold, customizedThreshold = true)
+      foundTasks = checkAndSubmitSpeculatableTasks(
+        timeMs, threshold.toDouble, customizedThreshold = true)
     }
     // avoid more warning logs.
     if (foundTasks) {
diff --git a/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala b/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala
index dff94b4e875de..b5473e076946b 100644
--- a/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala
+++ b/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala
@@ -74,7 +74,7 @@ private[spark] class ConsoleProgressBar(sc: SparkContext) extends Logging {
    * the progress bar, then progress bar will be showed in next line without overwrite logs.
    */
   private def show(now: Long, stages: Seq[StageData]): Unit = {
-    val width = TerminalWidth / stages.size
+    val width = TerminalWidth / stages.length
     val bar = stages.map { s =>
       val total = s.numTasks
       val header = s"[Stage ${s.stageId}:"
diff --git a/core/src/main/scala/org/apache/spark/util/Clock.scala b/core/src/main/scala/org/apache/spark/util/Clock.scala
index 226f15d3d38c2..e0cb3f4188e6d 100644
--- a/core/src/main/scala/org/apache/spark/util/Clock.scala
+++ b/core/src/main/scala/org/apache/spark/util/Clock.scala
@@ -85,7 +85,7 @@ private[spark] class SystemClock extends Clock {
       return currentTime
     }
 
-    val pollTime = math.max(waitTime / 10.0, minPollTime).toLong
+    val pollTime = math.max(waitTime / 10.0, minPollTime.toDouble).toLong
 
     while (true) {
       currentTime = System.currentTimeMillis()
diff --git a/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala b/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala
index 3245a528b74cf..4c7b12f60cc8d 100644
--- a/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala
@@ -245,7 +245,7 @@ private[spark] object HadoopFSUtils extends Logging {
     val allLeafStatuses = {
       val (dirs, topLevelFiles) = filteredStatuses.partition(_.isDirectory)
       val filteredNestedFiles: Seq[FileStatus] = contextOpt match {
-        case Some(context) if dirs.size > parallelismThreshold =>
+        case Some(context) if dirs.length > parallelismThreshold =>
           parallelListLeafFilesInternal(
             context,
             dirs.map(_.getPath).toImmutableArraySeq,
diff --git a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBufferFileRegion.scala b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBufferFileRegion.scala
index 23fc0f88f0b93..ec74ce0473efd 100644
--- a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBufferFileRegion.scala
+++ b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBufferFileRegion.scala
@@ -69,7 +69,7 @@ private[io] class ChunkedByteBufferFileRegion(
       if (keepGoing) {
         // advance to the next chunk (if there are any more)
         currentChunkIdx += 1
-        if (currentChunkIdx == chunks.size) {
+        if (currentChunkIdx == chunks.length) {
           keepGoing = false
         } else {
           currentChunk = chunks(currentChunkIdx)
diff --git a/core/src/main/scala/org/apache/spark/util/random/StratifiedSamplingUtils.scala b/core/src/main/scala/org/apache/spark/util/random/StratifiedSamplingUtils.scala
index f08cf44e4e12b..08e2ea01f623e 100644
--- a/core/src/main/scala/org/apache/spark/util/random/StratifiedSamplingUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/random/StratifiedSamplingUtils.scala
@@ -98,8 +98,8 @@ private[spark] object StratifiedSamplingUtils extends Logging {
         if (acceptResult.areBoundsEmpty) {
           val n = counts.get(key)
           val sampleSize = math.ceil(n * fraction).toLong
-          val lmbd1 = PoissonBounds.getLowerBound(sampleSize)
-          val lmbd2 = PoissonBounds.getUpperBound(sampleSize)
+          val lmbd1 = PoissonBounds.getLowerBound(sampleSize.toDouble)
+          val lmbd2 = PoissonBounds.getUpperBound(sampleSize.toDouble)
           acceptResult.acceptBound = lmbd1 / n
           acceptResult.waitListBound = (lmbd2 - lmbd1) / n
         }
diff --git a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
index c425596eb0433..874f4896bb01e 100644
--- a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
+++ b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
@@ -170,10 +170,10 @@ trait RDDCheckpointTester { self: SparkFunSuite =>
    * upon checkpointing. Ignores the checkpointData field, which may grow when we checkpoint.
    */
   private def getSerializedSizes(rdd: RDD[_]): (Int, Int) = {
-    val rddSize = Utils.serialize(rdd).size
-    val rddCpDataSize = Utils.serialize(rdd.checkpointData).size
-    val rddPartitionSize = Utils.serialize(rdd.partitions).size
-    val rddDependenciesSize = Utils.serialize(rdd.dependencies).size
+    val rddSize = Utils.serialize(rdd).length
+    val rddCpDataSize = Utils.serialize(rdd.checkpointData).length
+    val rddPartitionSize = Utils.serialize(rdd.partitions).length
+    val rddDependenciesSize = Utils.serialize(rdd.dependencies).length
 
     // Print detailed size, helps in debugging
     logInfo("Serialized sizes of " + rdd +
@@ -339,7 +339,7 @@ class CheckpointSuite extends SparkFunSuite with RDDCheckpointTester with LocalS
 
   runTest("ParallelCollectionRDD") { reliableCheckpoint: Boolean =>
     val parCollection = sc.makeRDD(1 to 4, 2)
-    val numPartitions = parCollection.partitions.size
+    val numPartitions = parCollection.partitions.length
     checkpoint(parCollection, reliableCheckpoint)
     assert(parCollection.dependencies === Nil)
     val result = parCollection.collect()
@@ -358,7 +358,7 @@ class CheckpointSuite extends SparkFunSuite with RDDCheckpointTester with LocalS
     val blockManager = SparkEnv.get.blockManager
     blockManager.putSingle(blockId, "test", StorageLevel.MEMORY_ONLY)
     val blockRDD = new BlockRDD[String](sc, Array(blockId))
-    val numPartitions = blockRDD.partitions.size
+    val numPartitions = blockRDD.partitions.length
     checkpoint(blockRDD, reliableCheckpoint)
     val result = blockRDD.collect()
     if (reliableCheckpoint) {
@@ -507,7 +507,7 @@ class CheckpointSuite extends SparkFunSuite with RDDCheckpointTester with LocalS
 
   runTest("CheckpointRDD with zero partitions") { reliableCheckpoint: Boolean =>
     val rdd = new BlockRDD[Int](sc, Array.empty[BlockId])
-    assert(rdd.partitions.size === 0)
+    assert(rdd.partitions.length === 0)
     assert(rdd.isCheckpointed === false)
     assert(rdd.isCheckpointedAndMaterialized === false)
     checkpoint(rdd, reliableCheckpoint)
@@ -516,7 +516,7 @@ class CheckpointSuite extends SparkFunSuite with RDDCheckpointTester with LocalS
     assert(rdd.count() === 0)
     assert(rdd.isCheckpointed)
     assert(rdd.isCheckpointedAndMaterialized)
-    assert(rdd.partitions.size === 0)
+    assert(rdd.partitions.length === 0)
   }
 
   runTest("checkpointAllMarkedAncestors") { reliableCheckpoint: Boolean =>
diff --git a/core/src/test/scala/org/apache/spark/DistributedSuite.scala b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
index e156533be15ca..a2b09f0ef3c3a 100644
--- a/core/src/test/scala/org/apache/spark/DistributedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
@@ -80,7 +80,7 @@ class DistributedSuite extends SparkFunSuite with Matchers with LocalSparkContex
     sc = new SparkContext(clusterUrl, "test")
     val pairs = sc.parallelize(Seq((1, 1), (1, 2), (1, 3), (2, 1)), 5)
     val groups = pairs.groupByKey(5).collect()
-    assert(groups.size === 2)
+    assert(groups.length === 2)
     val valuesFor1 = groups.find(_._1 == 1).get._2
     assert(valuesFor1.toList.sorted === List(1, 2, 3))
     val valuesFor2 = groups.find(_._1 == 2).get._2
@@ -264,8 +264,8 @@ class DistributedSuite extends SparkFunSuite with Matchers with LocalSparkContex
     sc = new SparkContext(clusterUrl, "test")
     val data = sc.parallelize(Seq(true, true), 2)
     assert(data.count() === 2) // force executors to start
-    assert(data.map(markNodeIfIdentity).collect().size === 2)
-    assert(data.map(failOnMarkedIdentity).collect().size === 2)
+    assert(data.map(markNodeIfIdentity).collect().length === 2)
+    assert(data.map(failOnMarkedIdentity).collect().length === 2)
   }
 
   test("recover from repeated node failures during shuffle-map") {
@@ -275,7 +275,7 @@ class DistributedSuite extends SparkFunSuite with Matchers with LocalSparkContex
     for (i <- 1 to 3) {
       val data = sc.parallelize(Seq(true, false), 2)
       assert(data.count() === 2)
-      assert(data.map(markNodeIfIdentity).collect().size === 2)
+      assert(data.map(markNodeIfIdentity).collect().length === 2)
       assert(data.map(failOnMarkedIdentity).map(x => x -> x).groupByKey().count() === 2)
     }
   }
@@ -287,7 +287,7 @@ class DistributedSuite extends SparkFunSuite with Matchers with LocalSparkContex
     for (i <- 1 to 3) {
       val data = sc.parallelize(Seq(true, true), 2)
       assert(data.count() === 2)
-      assert(data.map(markNodeIfIdentity).collect().size === 2)
+      assert(data.map(markNodeIfIdentity).collect().length === 2)
       // This relies on mergeCombiners being used to perform the actual reduce for this
       // test to actually be testing what it claims.
       val grouped = data.map(x => x -> x).combineByKey(
@@ -295,7 +295,7 @@ class DistributedSuite extends SparkFunSuite with Matchers with LocalSparkContex
                       (x: Boolean, y: Boolean) => x,
                       (x: Boolean, y: Boolean) => failOnMarkedIdentity(x)
                     )
-      assert(grouped.collect().size === 1)
+      assert(grouped.collect().length === 1)
     }
   }
 
@@ -310,8 +310,8 @@ class DistributedSuite extends SparkFunSuite with Matchers with LocalSparkContex
       data.persist(StorageLevel.MEMORY_ONLY_2)
 
       assert(data.count() === 4)
-      assert(data.map(markNodeIfIdentity).collect().size === 4)
-      assert(data.map(failOnMarkedIdentity).collect().size === 4)
+      assert(data.map(markNodeIfIdentity).collect().length === 4)
+      assert(data.map(failOnMarkedIdentity).collect().length === 4)
 
       // Create a new replicated RDD to make sure that cached peer information doesn't cause
       // problems.
diff --git a/core/src/test/scala/org/apache/spark/FileSuite.scala b/core/src/test/scala/org/apache/spark/FileSuite.scala
index 4a2b2339159cb..7750db6020887 100644
--- a/core/src/test/scala/org/apache/spark/FileSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileSuite.scala
@@ -236,7 +236,7 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
       // Try reading the output back as an object file
       val ct = reflect.ClassTag[Any](Utils.classForName(className, noSparkClassLoader = true))
       val output = sc.objectFile[Any](outputDir)
-      assert(output.collect().size === 3)
+      assert(output.collect().length === 3)
       assert(output.collect().head.getClass.getName === className)
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
index dde30aee82878..5d635011d2ec6 100644
--- a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
@@ -237,13 +237,13 @@ class MapOutputTrackerSuite extends SparkFunSuite with LocalSparkContext {
     // as it has 4 out of 7 bytes of output.
     val topLocs50 = tracker.getLocationsWithLargestOutputs(10, 0, 1, 0.5)
     assert(topLocs50.nonEmpty)
-    assert(topLocs50.get.size === 1)
+    assert(topLocs50.get.length === 1)
     assert(topLocs50.get.head === BlockManagerId("a", "hostA", 1000))
 
     // When the threshold is 20%, both hosts should be returned as preferred locations.
     val topLocs20 = tracker.getLocationsWithLargestOutputs(10, 0, 1, 0.2)
     assert(topLocs20.nonEmpty)
-    assert(topLocs20.get.size === 2)
+    assert(topLocs20.get.length === 2)
     assert(topLocs20.get.toSet ===
            Seq(BlockManagerId("a", "hostA", 1000), BlockManagerId("b", "hostB", 1000)).toSet)
 
diff --git a/core/src/test/scala/org/apache/spark/PartitioningSuite.scala b/core/src/test/scala/org/apache/spark/PartitioningSuite.scala
index 28fa9f5e23e79..3447ba8c1765e 100644
--- a/core/src/test/scala/org/apache/spark/PartitioningSuite.scala
+++ b/core/src/test/scala/org/apache/spark/PartitioningSuite.scala
@@ -77,7 +77,7 @@ class PartitioningSuite extends SparkFunSuite with SharedSparkContext with Priva
       for (element <- 1 to 1000) {
         val partition = partitioner.getPartition(element)
         if (numPartitions > 1) {
-          if (partition < rangeBounds.size) {
+          if (partition < rangeBounds.length) {
             assert(element <= rangeBounds(partition))
           }
           if (partition > 0) {
@@ -111,7 +111,7 @@ class PartitioningSuite extends SparkFunSuite with SharedSparkContext with Priva
     assert(count === rdd.count())
     sketched.foreach { case (idx, n, sample) =>
       assert(n === idx)
-      assert(sample.size === math.min(n, sampleSizePerPartition))
+      assert(sample.length === math.min(n, sampleSizePerPartition))
     }
   }
 
diff --git a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
index a92d532907adf..ac10a00d98e04 100644
--- a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
@@ -51,7 +51,7 @@ abstract class ShuffleSuite extends SparkFunSuite with Matchers with LocalRootDi
     sc = new SparkContext("local", "test", myConf)
     val pairs = sc.parallelize(Seq((1, 1), (1, 2), (1, 3), (2, 1)), 4)
     val groups = pairs.groupByKey(4).collect()
-    assert(groups.size === 2)
+    assert(groups.length === 2)
     val valuesFor1 = groups.find(_._1 == 1).get._2
     assert(valuesFor1.toList.sorted === List(1, 2, 3))
     val valuesFor2 = groups.find(_._1 == 2).get._2
diff --git a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala
index 0b33e2a9426ce..e7315d6119be0 100644
--- a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala
+++ b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala
@@ -163,7 +163,7 @@ private[spark] class Benchmark(
     // scalastyle:on
     assert(runTimes.nonEmpty)
     val best = runTimes.min
-    val avg = runTimes.sum / runTimes.size
+    val avg = runTimes.sum.toDouble / runTimes.size
     val stdev = if (runTimes.size > 1) {
       math.sqrt(runTimes.map(time => (time - avg) * (time - avg)).sum / (runTimes.size - 1))
     } else 0
diff --git a/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala
index 3b3bcff0c5a3f..20993df718a3b 100644
--- a/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala
@@ -439,7 +439,7 @@ class DecommissionWorkerSuite
     val appId = sc.applicationId
     eventually(timeout(1.minute), interval(1.seconds)) {
       val apps = getApplications()
-      assert(apps.size === 1)
+      assert(apps.length === 1)
       assert(apps.head.id === appId)
       assert(apps.head.getExecutorLimit === Int.MaxValue)
     }
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index a032e9aa16be9..553d001285b2d 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -1736,7 +1736,7 @@ object SimpleApplicationTest {
         .map(x => SparkEnv.get.conf.get(config))
         .collect()
         .distinct
-      if (executorValues.size != 1) {
+      if (executorValues.length != 1) {
         throw new SparkException(s"Inconsistent values for $config: " +
           s"${executorValues.mkString("values(", ", ", ")")}")
       }
@@ -1795,7 +1795,7 @@ class TestFileSystem extends org.apache.hadoop.fs.LocalFileSystem {
 class TestSparkApplication extends SparkApplication with Matchers {
 
   override def start(args: Array[String], conf: SparkConf): Unit = {
-    assert(args.size === 1)
+    assert(args.length === 1)
     assert(args(0) === "hello")
     assert(conf.get("spark.test.hello") === "world")
     assert(sys.props.get("spark.test.hello") === None)
diff --git a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
index 01995ca3632d2..5ecc551c16b8c 100644
--- a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
@@ -69,7 +69,7 @@ class StandaloneDynamicAllocationSuite
     workers = makeWorkers(10, 2048)
     // Wait until all workers register with master successfully
     eventually(timeout(1.minute), interval(10.milliseconds)) {
-      assert(getMasterState.workers.size === numWorkers)
+      assert(getMasterState.workers.length === numWorkers)
     }
   }
 
@@ -93,7 +93,7 @@ class StandaloneDynamicAllocationSuite
     val appId = sc.applicationId
     eventually(timeout(10.seconds), interval(10.millis)) {
       val apps = getApplications()
-      assert(apps.size === 1)
+      assert(apps.length === 1)
       assert(apps.head.id === appId)
       assert(apps.head.executors.size === 2)
       assert(apps.head.getExecutorLimit === Int.MaxValue)
@@ -140,7 +140,7 @@ class StandaloneDynamicAllocationSuite
     val appId = sc.applicationId
     eventually(timeout(10.seconds), interval(10.millis)) {
       val apps = getApplications()
-      assert(apps.size === 1)
+      assert(apps.length === 1)
       assert(apps.head.id === appId)
       assert(apps.head.executors.size === 2)
       assert(apps.head.executors.values.map(_.cores).toArray === Array(4, 4))
@@ -195,7 +195,7 @@ class StandaloneDynamicAllocationSuite
     val appId = sc.applicationId
     eventually(timeout(10.seconds), interval(10.millis)) {
       val apps = getApplications()
-      assert(apps.size === 1)
+      assert(apps.length === 1)
       assert(apps.head.id === appId)
       assert(apps.head.executors.size === 2)
       assert(apps.head.executors.values.map(_.cores).toArray === Array(8, 8))
@@ -248,7 +248,7 @@ class StandaloneDynamicAllocationSuite
     val appId = sc.applicationId
     eventually(timeout(10.seconds), interval(10.millis)) {
       val apps = getApplications()
-      assert(apps.size === 1)
+      assert(apps.length === 1)
       assert(apps.head.id === appId)
       assert(apps.head.executors.size === 10) // 20 cores total
       assert(apps.head.getExecutorLimit === Int.MaxValue)
@@ -302,7 +302,7 @@ class StandaloneDynamicAllocationSuite
     val appId = sc.applicationId
     eventually(timeout(10.seconds), interval(10.millis)) {
       val apps = getApplications()
-      assert(apps.size === 1)
+      assert(apps.length === 1)
       assert(apps.head.id === appId)
       assert(apps.head.executors.size === 4) // 8 cores total
       assert(apps.head.getExecutorLimit === Int.MaxValue)
@@ -360,7 +360,7 @@ class StandaloneDynamicAllocationSuite
     sc.requestExecutors(2)
     eventually(timeout(10.seconds), interval(10.millis)) {
       val apps = getApplications()
-      assert(apps.size === 1)
+      assert(apps.length === 1)
       assert(apps.head.id === appId)
       assert(apps.head.executors.size === 2)
       assert(apps.head.getExecutorLimit === 2)
@@ -385,7 +385,7 @@ class StandaloneDynamicAllocationSuite
     sc.requestExecutors(2)
     eventually(timeout(10.seconds), interval(10.millis)) {
       val apps = getApplications()
-      assert(apps.size === 1)
+      assert(apps.length === 1)
       assert(apps.head.id === appId)
       assert(apps.head.executors.size === 2)
       assert(apps.head.getExecutorLimit === 2)
@@ -425,7 +425,7 @@ class StandaloneDynamicAllocationSuite
     val appId = sc.applicationId
     eventually(timeout(10.seconds), interval(10.millis)) {
       val apps = getApplications()
-      assert(apps.size === 1)
+      assert(apps.length === 1)
       assert(apps.head.id === appId)
       assert(apps.head.executors.size === 2)
       assert(apps.head.getExecutorLimit === Int.MaxValue)
@@ -465,7 +465,7 @@ class StandaloneDynamicAllocationSuite
     val appId = sc.applicationId
     eventually(timeout(10.seconds), interval(10.millis)) {
       val apps = getApplications()
-      assert(apps.size === 1)
+      assert(apps.length === 1)
       assert(apps.head.id === appId)
       assert(apps.head.executors.size === initialExecutorLimit)
       assert(apps.head.getExecutorLimit === initialExecutorLimit)
@@ -477,7 +477,7 @@ class StandaloneDynamicAllocationSuite
     val appId = sc.applicationId
     eventually(timeout(10.seconds), interval(10.millis)) {
       val apps = getApplications()
-      assert(apps.size === 1)
+      assert(apps.length === 1)
       assert(apps.head.id === appId)
       assert(apps.head.executors.size === 2)
       assert(apps.head.getExecutorLimit === Int.MaxValue)
diff --git a/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala b/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala
index d109ed8442d44..3555faf5c2cb9 100644
--- a/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala
@@ -71,7 +71,7 @@ class AppClientSuite
     workers = makeWorkers(10, 2048)
     // Wait until all workers register with master successfully
     eventually(timeout(1.minute), interval(10.milliseconds)) {
-      assert(getMasterState.workers.size === numWorkers)
+      assert(getMasterState.workers.length === numWorkers)
     }
   }
 
@@ -99,7 +99,7 @@ class AppClientSuite
       eventually(timeout(10.seconds), interval(10.millis)) {
         val apps = getApplications()
         assert(ci.listener.connectedIdList.size === 1, "client listener should have one connection")
-        assert(apps.size === 1, "master should have 1 registered app")
+        assert(apps.length === 1, "master should have 1 registered app")
       }
 
       // Send message to Master to request Executors, verify request by change in executor limit
@@ -176,7 +176,7 @@ class AppClientSuite
       eventually(timeout(10.seconds), interval(10.millis)) {
         val apps = getApplications()
         assert(ci.listener.connectedIdList.size === 1, "client listener should have one connection")
-        assert(apps.size === 1, "master should have 1 registered app")
+        assert(apps.length === 1, "master should have 1 registered app")
       }
 
       // Send message to Master to request Executors with multiple resource profiles.
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/EventLogTestHelper.scala b/core/src/test/scala/org/apache/spark/deploy/history/EventLogTestHelper.scala
index ac89f60955eed..0161917f8853d 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/EventLogTestHelper.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/EventLogTestHelper.scala
@@ -56,7 +56,7 @@ object EventLogTestHelper {
       eventStr: String,
       desiredSize: Long): Seq[String] = {
     val stringLen = eventStr.getBytes(StandardCharsets.UTF_8).length
-    val repeatCount = Math.floor(desiredSize / stringLen).toInt
+    val repeatCount = (desiredSize / stringLen).toInt
     (0 until repeatCount).map { _ =>
       writer.writeEvent(eventStr, flushLogger = true)
       eventStr
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
index d16e904bdcf13..3013a5bf4a294 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
@@ -1113,13 +1113,13 @@ abstract class FsHistoryProviderSuite extends SparkFunSuite with Matchers with P
 
     provider.checkForLogs()
     provider.cleanLogs()
-    assert(new File(testDir.toURI).listFiles().size === logCount)
+    assert(new File(testDir.toURI).listFiles().length === logCount)
 
     // Move the clock forward 1 day and scan the files again. They should still be there.
     clock.advance(TimeUnit.DAYS.toMillis(1))
     provider.checkForLogs()
     provider.cleanLogs()
-    assert(new File(testDir.toURI).listFiles().size === logCount)
+    assert(new File(testDir.toURI).listFiles().length === logCount)
 
     // Update the slow app to contain valid info. Code should detect the change and not clean
     // it up.
@@ -1133,7 +1133,7 @@ abstract class FsHistoryProviderSuite extends SparkFunSuite with Matchers with P
     clock.advance(TimeUnit.DAYS.toMillis(2))
     provider.checkForLogs()
     provider.cleanLogs()
-    assert(new File(testDir.toURI).listFiles().size === validLogCount)
+    assert(new File(testDir.toURI).listFiles().length === validLogCount)
   }
 
   test("always find end event for finished apps") {
@@ -1414,12 +1414,12 @@ abstract class FsHistoryProviderSuite extends SparkFunSuite with Matchers with P
 
     provider.checkForLogs()
     // The invalid application log file would be cleaned by checkAndCleanLog().
-    assert(new File(testDir.toURI).listFiles().size === 1)
+    assert(new File(testDir.toURI).listFiles().length === 1)
 
     clock.advance(1)
     // cleanLogs() would clean the valid application log file.
     provider.cleanLogs()
-    assert(new File(testDir.toURI).listFiles().size === 0)
+    assert(new File(testDir.toURI).listFiles().length === 0)
   }
 
   private def assertOptionAfterSerde(opt: Option[Long], expected: Option[Long]): Unit = {
@@ -1556,7 +1556,7 @@ abstract class FsHistoryProviderSuite extends SparkFunSuite with Matchers with P
         SparkListenerJobStart(1, 0, Seq.empty)), rollFile = false)
       provider.checkForLogs()
       provider.cleanLogs()
-      assert(dir.listFiles().size === 1)
+      assert(dir.listFiles().length === 1)
       assert(provider.getListing().length === 1)
 
       // Manually delete the appstatus file to make an invalid rolling event log
@@ -1578,7 +1578,7 @@ abstract class FsHistoryProviderSuite extends SparkFunSuite with Matchers with P
       provider.checkForLogs()
       provider.cleanLogs()
       assert(provider.getListing().length === 1)
-      assert(dir.listFiles().size === 2)
+      assert(dir.listFiles().length === 2)
 
       // Make sure a new provider sees the valid application
       provider.stop()
@@ -1615,7 +1615,7 @@ abstract class FsHistoryProviderSuite extends SparkFunSuite with Matchers with P
       // The 1st checkForLogs should scan/update app2 only since it is newer than app1
       provider.checkForLogs()
       assert(provider.getListing().length === 1)
-      assert(dir.listFiles().size === 2)
+      assert(dir.listFiles().length === 2)
       assert(provider.getListing().map(e => e.id).contains("app2"))
       assert(!provider.getListing().map(e => e.id).contains("app1"))
 
@@ -1630,7 +1630,7 @@ abstract class FsHistoryProviderSuite extends SparkFunSuite with Matchers with P
       // The 2nd checkForLogs should scan/update app3 only since it is newer than app1
       provider.checkForLogs()
       assert(provider.getListing().length === 2)
-      assert(dir.listFiles().size === 3)
+      assert(dir.listFiles().length === 3)
       assert(provider.getListing().map(e => e.id).contains("app3"))
       assert(!provider.getListing().map(e => e.id).contains("app1"))
 
@@ -1655,7 +1655,7 @@ abstract class FsHistoryProviderSuite extends SparkFunSuite with Matchers with P
         SparkListenerJobStart(1, 0, Seq.empty)), rollFile = false)
       provider.checkForLogs()
       provider.cleanLogs()
-      assert(dir.listFiles().size === 1)
+      assert(dir.listFiles().length === 1)
       assert(provider.getListing().length === 1)
 
       // Manually delete event log files and create event log file reader
diff --git a/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala
index 2f645e69079a2..abe05a8055843 100644
--- a/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala
@@ -289,7 +289,7 @@ class StandaloneRestSubmitSuite extends SparkFunSuite {
     val statusRequestPath = s"$httpUrl/$v/submissions/status"
     val goodJson = constructSubmitRequest(masterUrl).toJson
     val badJson1 = goodJson.replaceAll("action", "fraction") // invalid JSON
-    val badJson2 = goodJson.substring(goodJson.size / 2) // malformed JSON
+    val badJson2 = goodJson.substring(goodJson.length / 2) // malformed JSON
     val notJson = "\"hello, world\""
     val (response1, code1) = sendHttpRequestWithResponse(submitRequestPath, "POST") // missing JSON
     val (response2, code2) = sendHttpRequestWithResponse(submitRequestPath, "POST", badJson1)
diff --git a/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala b/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
index e64ebe2a55142..0fc0b7536067e 100644
--- a/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
@@ -97,7 +97,7 @@ class WholeTextFileRecordReaderSuite extends SparkFunSuite {
 
       val res = sc.wholeTextFiles(dir.toString, 3).collect()
 
-      assert(res.size === WholeTextFileRecordReaderSuite.fileNames.size,
+      assert(res.length === WholeTextFileRecordReaderSuite.fileNames.length,
         "Number of files read out does not fit with the actual value.")
 
       for ((filename, contents) <- res) {
@@ -120,7 +120,7 @@ class WholeTextFileRecordReaderSuite extends SparkFunSuite {
 
       val res = sc.wholeTextFiles(dir.toString, 3).collect()
 
-      assert(res.size === WholeTextFileRecordReaderSuite.fileNames.size,
+      assert(res.length === WholeTextFileRecordReaderSuite.fileNames.length,
         "Number of files read out does not fit with the actual value.")
 
       for ((filename, contents) <- res) {
diff --git a/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala b/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala
index ef214bd50d928..95b484d7176a5 100644
--- a/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala
@@ -214,11 +214,11 @@ class PluginContainerSuite extends SparkFunSuite with LocalSparkContext {
       }
       val execFiles =
         children.filter(_.getName.startsWith(NonLocalModeSparkPlugin.executorFileStr))
-      assert(execFiles.size === 1)
+      assert(execFiles.length === 1)
       val allLines = Files.readLines(execFiles(0), StandardCharsets.UTF_8)
       assert(allLines.size === 1)
       val addrs = NonLocalModeSparkPlugin.extractGpuAddrs(allLines.get(0))
-      assert(addrs.size === 2)
+      assert(addrs.length === 2)
       assert(addrs.sorted === Array("3", "4"))
 
       assert(NonLocalModeSparkPlugin.driverContext != null)
diff --git a/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
index 56783de1c13b4..4239180ba6c37 100644
--- a/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
@@ -91,7 +91,7 @@ class AsyncRDDActionsSuite extends SparkFunSuite with TimeLimits {
       val expected = input.take(num)
       val saw = rdd.takeAsync(num).get()
       assert(saw == expected, "incorrect result for rdd with %d partitions (expected %s, saw %s)"
-        .format(rdd.partitions.size, expected, saw))
+        .format(rdd.partitions.length, expected, saw))
     }
     val input = Range(1, 1000)
 
diff --git a/core/src/test/scala/org/apache/spark/rdd/LocalCheckpointSuite.scala b/core/src/test/scala/org/apache/spark/rdd/LocalCheckpointSuite.scala
index f644fee74a18b..591b8b4c0df7e 100644
--- a/core/src/test/scala/org/apache/spark/rdd/LocalCheckpointSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/LocalCheckpointSuite.scala
@@ -159,7 +159,7 @@ class LocalCheckpointSuite extends SparkFunSuite with LocalSparkContext {
 
   test("missing checkpoint block fails with informative message") {
     val rdd = newRdd.localCheckpoint()
-    val numPartitions = rdd.partitions.size
+    val numPartitions = rdd.partitions.length
     val partitionIndices = rdd.partitions.map(_.index)
     val bmm = sc.env.blockManager.master
 
diff --git a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
index 9b60d2eeeed1b..e436d98843411 100644
--- a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
@@ -41,7 +41,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
     val pairs = sc.parallelize(Seq((1, 1), (1, 1), (3, 2), (5, 1), (5, 3)), 2)
 
     val sets = pairs.aggregateByKey(new HashSet[Int]())(_ += _, _ ++= _).collect()
-    assert(sets.size === 3)
+    assert(sets.length === 3)
     val valuesFor1 = sets.find(_._1 == 1).get._2
     assert(valuesFor1.toList.sorted === List(1))
     val valuesFor3 = sets.find(_._1 == 3).get._2
@@ -53,7 +53,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   test("groupByKey") {
     val pairs = sc.parallelize(Seq((1, 1), (1, 2), (1, 3), (2, 1)))
     val groups = pairs.groupByKey().collect()
-    assert(groups.size === 2)
+    assert(groups.length === 2)
     val valuesFor1 = groups.find(_._1 == 1).get._2
     assert(valuesFor1.toList.sorted === List(1, 2, 3))
     val valuesFor2 = groups.find(_._1 == 2).get._2
@@ -63,7 +63,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   test("groupByKey with duplicates") {
     val pairs = sc.parallelize(Seq((1, 1), (1, 2), (1, 3), (1, 1), (2, 1)))
     val groups = pairs.groupByKey().collect()
-    assert(groups.size === 2)
+    assert(groups.length === 2)
     val valuesFor1 = groups.find(_._1 == 1).get._2
     assert(valuesFor1.toList.sorted === List(1, 1, 2, 3))
     val valuesFor2 = groups.find(_._1 == 2).get._2
@@ -73,7 +73,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   test("groupByKey with negative key hash codes") {
     val pairs = sc.parallelize(Seq((-1, 1), (-1, 2), (-1, 3), (2, 1)))
     val groups = pairs.groupByKey().collect()
-    assert(groups.size === 2)
+    assert(groups.length === 2)
     val valuesForMinus1 = groups.find(_._1 == -1).get._2
     assert(valuesForMinus1.toList.sorted === List(1, 2, 3))
     val valuesFor2 = groups.find(_._1 == 2).get._2
@@ -83,7 +83,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   test("groupByKey with many output partitions") {
     val pairs = sc.parallelize(Seq((1, 1), (1, 2), (1, 3), (2, 1)))
     val groups = pairs.groupByKey(10).collect()
-    assert(groups.size === 2)
+    assert(groups.length === 2)
     val valuesFor1 = groups.find(_._1 == 1).get._2
     assert(valuesFor1.toList.sorted === List(1, 2, 3))
     val valuesFor2 = groups.find(_._1 == 2).get._2
@@ -249,7 +249,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
     val rdd1 = sc.parallelize(Seq((1, 1), (1, 2), (2, 1), (3, 1)))
     val rdd2 = sc.parallelize(Seq((1, 'x'), (2, 'y'), (2, 'z'), (4, 'w')))
     val joined = rdd1.join(rdd2).collect()
-    assert(joined.size === 4)
+    assert(joined.length === 4)
     assert(joined.toSet === Set(
       (1, (1, 'x')),
       (1, (2, 'x')),
@@ -262,7 +262,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
     val rdd1 = sc.parallelize(Seq((1, 1), (1, 2), (1, 3)))
     val rdd2 = sc.parallelize(Seq((1, 'x'), (1, 'y')))
     val joined = rdd1.join(rdd2).collect()
-    assert(joined.size === 6)
+    assert(joined.length === 6)
     assert(joined.toSet === Set(
       (1, (1, 'x')),
       (1, (1, 'y')),
@@ -277,7 +277,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
     val rdd1 = sc.parallelize(Seq((1, 1), (1, 2), (2, 1), (3, 1)))
     val rdd2 = sc.parallelize(Seq((1, 'x'), (2, 'y'), (2, 'z'), (4, 'w')))
     val joined = rdd1.leftOuterJoin(rdd2).collect()
-    assert(joined.size === 5)
+    assert(joined.length === 5)
     assert(joined.toSet === Set(
       (1, (1, Some('x'))),
       (1, (2, Some('x'))),
@@ -296,7 +296,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
     val rdd2 = sc.emptyRDD[(Int, Int)](intPairCT)
 
     val joined = rdd1.cogroup(rdd2).collect()
-    assert(joined.size > 0)
+    assert(joined.length > 0)
   }
 
   // See SPARK-9326
@@ -307,7 +307,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
     val rdd1 = sc.parallelize(Seq((1, 1), (1, 2), (2, 1), (3, 1)))
     val rdd2 = sc.emptyRDD[Int](intCT).groupBy((x) => 5)
     val joined = rdd1.cogroup(rdd2).collect()
-    assert(joined.size > 0)
+    assert(joined.length > 0)
   }
 
   // See SPARK-22465
@@ -377,7 +377,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
     val rdd1 = sc.parallelize(Seq((1, 1), (1, 2), (2, 1), (3, 1)))
     val rdd2 = sc.parallelize(Seq((1, 'x'), (2, 'y'), (2, 'z'), (4, 'w')))
     val joined = rdd1.rightOuterJoin(rdd2).collect()
-    assert(joined.size === 5)
+    assert(joined.length === 5)
     assert(joined.toSet === Set(
       (1, (Some(1), 'x')),
       (1, (Some(2), 'x')),
@@ -391,7 +391,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
     val rdd1 = sc.parallelize(Seq((1, 1), (1, 2), (2, 1), (3, 1)))
     val rdd2 = sc.parallelize(Seq((1, 'x'), (2, 'y'), (2, 'z'), (4, 'w')))
     val joined = rdd1.fullOuterJoin(rdd2).collect()
-    assert(joined.size === 6)
+    assert(joined.length === 6)
     assert(joined.toSet === Set(
       (1, (Some(1), Some('x'))),
       (1, (Some(2), Some('x'))),
@@ -406,14 +406,14 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
     val rdd1 = sc.parallelize(Seq((1, 1), (1, 2), (2, 1), (3, 1)))
     val rdd2 = sc.parallelize(Seq((4, 'x'), (5, 'y'), (5, 'z'), (6, 'w')))
     val joined = rdd1.join(rdd2).collect()
-    assert(joined.size === 0)
+    assert(joined.length === 0)
   }
 
   test("join with many output partitions") {
     val rdd1 = sc.parallelize(Seq((1, 1), (1, 2), (2, 1), (3, 1)))
     val rdd2 = sc.parallelize(Seq((1, 'x'), (2, 'y'), (2, 'z'), (4, 'w')))
     val joined = rdd1.join(rdd2, 10).collect()
-    assert(joined.size === 4)
+    assert(joined.length === 4)
     assert(joined.toSet === Set(
       (1, (1, 'x')),
       (1, (2, 'x')),
@@ -426,7 +426,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
     val rdd1 = sc.parallelize(Seq((1, 1), (1, 2), (2, 1), (3, 1)))
     val rdd2 = sc.parallelize(Seq((1, 'x'), (2, 'y'), (2, 'z'), (4, 'w')))
     val joined = rdd1.groupWith(rdd2).collect()
-    assert(joined.size === 4)
+    assert(joined.length === 4)
     val joinedSet = joined.map(x => (x._1, (x._2._1.toList, x._2._2.toList))).toSet
     assert(joinedSet === Set(
       (1, (List(1, 2), List('x'))),
@@ -441,7 +441,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
     val rdd2 = sc.parallelize(Seq((1, 'x'), (2, 'y'), (2, 'z'), (4, 'w')))
     val rdd3 = sc.parallelize(Seq((1, 'a'), (3, 'b'), (4, 'c'), (4, 'd')))
     val joined = rdd1.groupWith(rdd2, rdd3).collect()
-    assert(joined.size === 4)
+    assert(joined.length === 4)
     val joinedSet = joined.map(x => (x._1,
       (x._2._1.toList, x._2._2.toList, x._2._3.toList))).toSet
     assert(joinedSet === Set(
@@ -458,7 +458,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
     val rdd3 = sc.parallelize(Seq((1, 'a'), (3, 'b'), (4, 'c'), (4, 'd')))
     val rdd4 = sc.parallelize(Seq((2, '@')))
     val joined = rdd1.groupWith(rdd2, rdd3, rdd4).collect()
-    assert(joined.size === 4)
+    assert(joined.length === 4)
     val joinedSet = joined.map(x => (x._1,
       (x._2._1.toList, x._2._2.toList, x._2._3.toList, x._2._4.toList))).toSet
     assert(joinedSet === Set(
@@ -492,14 +492,14 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
     val b = a.map(a => (a, (a * 2).toString))
     // then a group by, and see we didn't revert to 2 partitions
     val c = b.groupByKey()
-    assert(c.partitions.size === 2000)
+    assert(c.partitions.length === 2000)
   }
 
   test("default partitioner uses largest partitioner") {
     val a = sc.makeRDD(Seq((1, "a"), (2, "b")), 2)
     val b = sc.makeRDD(Seq((1, "a"), (2, "b")), 2000)
     val c = a.join(b)
-    assert(c.partitions.size === 2000)
+    assert(c.partitions.length === 2000)
   }
 
   test("subtract") {
@@ -507,7 +507,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
     val b = sc.parallelize(Array(2, 3, 4).toImmutableArraySeq, 4)
     val c = a.subtract(b)
     assert(c.collect().toSet === Set(1))
-    assert(c.partitions.size === a.partitions.size)
+    assert(c.partitions.length === a.partitions.length)
   }
 
   test("subtract with narrow dependency") {
@@ -531,7 +531,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
     val b = sc.parallelize(Seq((2, 20), (3, 30), (4, 40)), 4)
     val c = a.subtractByKey(b)
     assert(c.collect().toSet === Set((1, "a"), (1, "a")))
-    assert(c.partitions.size === a.partitions.size)
+    assert(c.partitions.length === a.partitions.length)
   }
 
   test("subtractByKey with narrow dependency") {
@@ -795,7 +795,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
         assertBinomialSample(exact = exact, actual = v.toInt, trials = trials(k).toInt,
           p = samplingRate)
       }
-      assert(takeSample.size === takeSample.toSet.size)
+      assert(takeSample.length === takeSample.toSet.size)
       takeSample.foreach { x => assert(1 <= x._2 && x._2 <= n, s"elements not in [1, $n]") }
     }
 
diff --git a/core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala
index 3a097e5335a2a..7f12d8b624c84 100644
--- a/core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala
@@ -47,7 +47,7 @@ class PipedRDDSuite extends SparkFunSuite with SharedSparkContext with Eventuall
     val piped = nums.pipe(Seq("cat"))
 
     val c = piped.collect()
-    assert(c.size === 4)
+    assert(c.length === 4)
     assert(c(0) === "1")
     assert(c(1) === "2")
     assert(c(2) === "3")
@@ -61,7 +61,7 @@ class PipedRDDSuite extends SparkFunSuite with SharedSparkContext with Eventuall
     // verify that both RDD.pipe(command: String) and RDD.pipe(command: String, env) work good
     for (piped <- Seq(nums.pipe("wc -l"), nums.pipe("wc -l", Map[String, String]()))) {
       val c = piped.collect()
-      assert(c.size === 2)
+      assert(c.length === 2)
       assert(c(0).trim === "2")
       assert(c(1).trim === "2")
     }
@@ -129,7 +129,7 @@ class PipedRDDSuite extends SparkFunSuite with SharedSparkContext with Eventuall
 
     val c = piped.collect()
 
-    assert(c.size === 8)
+    assert(c.length === 8)
     assert(c(0) === "0")
     assert(c(1) === "\u0001")
     assert(c(2) === "1_")
@@ -151,7 +151,7 @@ class PipedRDDSuite extends SparkFunSuite with SharedSparkContext with Eventuall
             f(e + "_")
           }
         }).collect()
-    assert(d.size === 8)
+    assert(d.length === 8)
     assert(d(0) === "0")
     assert(d(1) === "\u0001")
     assert(d(2) === "b\t2_")
@@ -216,7 +216,7 @@ class PipedRDDSuite extends SparkFunSuite with SharedSparkContext with Eventuall
     val nums = sc.makeRDD(Array(1, 2, 3, 4).toImmutableArraySeq, 2)
     val piped = nums.pipe(Seq("cat"), separateWorkingDir = true)
     val c = piped.collect()
-    assert(c.size === 4)
+    assert(c.length === 4)
     assert(c(0) === "1")
     assert(c(1) === "2")
     assert(c(2) === "3")
diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
index 32ba2053258eb..706ebfa936470 100644
--- a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
@@ -322,7 +322,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
   test("empty RDD") {
     val empty = new EmptyRDD[Int](sc)
     assert(empty.count() === 0)
-    assert(empty.collect().size === 0)
+    assert(empty.collect().length === 0)
 
     val thrown = intercept[UnsupportedOperationException]{
       empty.reduce(_ + _)
@@ -331,12 +331,12 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
 
     val emptyKv = new EmptyRDD[(Int, Int)](sc)
     val rdd = sc.parallelize(1 to 2, 2).map(x => (x, x))
-    assert(rdd.join(emptyKv).collect().size === 0)
-    assert(rdd.rightOuterJoin(emptyKv).collect().size === 0)
-    assert(rdd.leftOuterJoin(emptyKv).collect().size === 2)
-    assert(rdd.fullOuterJoin(emptyKv).collect().size === 2)
-    assert(rdd.cogroup(emptyKv).collect().size === 2)
-    assert(rdd.union(emptyKv).collect().size === 2)
+    assert(rdd.join(emptyKv).collect().length === 0)
+    assert(rdd.rightOuterJoin(emptyKv).collect().length === 0)
+    assert(rdd.leftOuterJoin(emptyKv).collect().length === 2)
+    assert(rdd.fullOuterJoin(emptyKv).collect().length === 2)
+    assert(rdd.cogroup(emptyKv).collect().length === 2)
+    assert(rdd.union(emptyKv).collect().length === 2)
   }
 
   test("repartitioned RDDs") {
@@ -348,7 +348,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
 
     // Coalesce partitions
     val repartitioned1 = data.repartition(2)
-    assert(repartitioned1.partitions.size == 2)
+    assert(repartitioned1.partitions.length == 2)
     val partitions1 = repartitioned1.glom().collect()
     assert(partitions1(0).length > 0)
     assert(partitions1(1).length > 0)
@@ -356,7 +356,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
 
     // Split partitions
     val repartitioned2 = data.repartition(20)
-    assert(repartitioned2.partitions.size == 20)
+    assert(repartitioned2.partitions.length == 20)
     val partitions2 = repartitioned2.glom().collect()
     assert(partitions2(0).length > 0)
     assert(partitions2(19).length > 0)
@@ -370,7 +370,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
     val data = sc.parallelize(input.toImmutableArraySeq, initialPartitions)
 
     val repartitioned1 = data.repartition(2)
-    assert(repartitioned1.partitions.size == 2)
+    assert(repartitioned1.partitions.length == 2)
     val partitions1 = repartitioned1.glom().collect()
     // some noise in balancing is allowed due to randomization
     assert(math.abs(partitions1(0).length - 500) < initialPartitions)
@@ -380,7 +380,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
     def testSplitPartitions(input: Seq[Int], initialPartitions: Int, finalPartitions: Int): Unit = {
       val data = sc.parallelize(input, initialPartitions)
       val repartitioned = data.repartition(finalPartitions)
-      assert(repartitioned.partitions.size === finalPartitions)
+      assert(repartitioned.partitions.length === finalPartitions)
       val partitions = repartitioned.glom().collect()
       // assert all elements are present
       assert(repartitioned.collect().sortWith(_ > _).toSeq === input.toSeq.sortWith(_ > _).toSeq)
@@ -441,7 +441,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
 
     // when shuffling, we can increase the number of partitions
     val coalesced6 = data.coalesce(20, shuffle = true)
-    assert(coalesced6.partitions.size === 20)
+    assert(coalesced6.partitions.length === 20)
     assert(coalesced6.collect().toSet === (1 to 10).toSet)
   }
 
@@ -564,13 +564,13 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
       val coalesced2 = data2.coalesce(partitions)
 
       // test that we have 10000 partitions
-      assert(coalesced2.partitions.size == 10000, "Expected 10000 partitions, but got " +
-        coalesced2.partitions.size)
+      assert(coalesced2.partitions.length == 10000, "Expected 10000 partitions, but got " +
+        coalesced2.partitions.length)
 
       // test that we have 100 partitions
       val coalesced3 = data2.coalesce(numMachines * 2)
-      assert(coalesced3.partitions.size == 100, "Expected 100 partitions, but got " +
-        coalesced3.partitions.size)
+      assert(coalesced3.partitions.length == 100, "Expected 100 partitions, but got " +
+        coalesced3.partitions.length)
 
       // test that the groups are load balanced with 100 +/- 20 elements in each
       val maxImbalance3 = coalesced3.partitions
@@ -613,9 +613,9 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
     val data = sc.parallelize(1 to 10, 10)
     // Note that split number starts from 0, so > 8 means only 10th partition left.
     val prunedRdd = new PartitionPruningRDD(data, splitNum => splitNum > 8)
-    assert(prunedRdd.partitions.size === 1)
+    assert(prunedRdd.partitions.length === 1)
     val prunedData = prunedRdd.collect()
-    assert(prunedData.size === 1)
+    assert(prunedData.length === 1)
     assert(prunedData(0) === 10)
   }
 
@@ -626,7 +626,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
 
   test("take") {
     var nums = sc.makeRDD(Range(1, 1000), 1)
-    assert(nums.take(0).size === 0)
+    assert(nums.take(0).length === 0)
     assert(nums.take(1) === Array(1))
     assert(nums.take(3) === Array(1, 2, 3))
     assert(nums.take(500) === (1 to 500).toArray)
@@ -635,7 +635,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
     assert(nums.take(1000) === (1 to 999).toArray)
 
     nums = sc.makeRDD(Range(1, 1000), 2)
-    assert(nums.take(0).size === 0)
+    assert(nums.take(0).length === 0)
     assert(nums.take(1) === Array(1))
     assert(nums.take(3) === Array(1, 2, 3))
     assert(nums.take(500) === (1 to 500).toArray)
@@ -644,7 +644,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
     assert(nums.take(1000) === (1 to 999).toArray)
 
     nums = sc.makeRDD(Range(1, 1000), 100)
-    assert(nums.take(0).size === 0)
+    assert(nums.take(0).length === 0)
     assert(nums.take(1) === Array(1))
     assert(nums.take(3) === Array(1, 2, 3))
     assert(nums.take(500) === (1 to 500).toArray)
@@ -653,7 +653,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
     assert(nums.take(1000) === (1 to 999).toArray)
 
     nums = sc.makeRDD(Range(1, 1000), 1000)
-    assert(nums.take(0).size === 0)
+    assert(nums.take(0).length === 0)
     assert(nums.take(1) === Array(1))
     assert(nums.take(3) === Array(1, 2, 3))
     assert(nums.take(500) === (1 to 500).toArray)
@@ -662,7 +662,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
     assert(nums.take(1000) === (1 to 999).toArray)
 
     nums = sc.parallelize(1 to 2, 2)
-    assert(nums.take(2147483638).size === 2)
+    assert(nums.take(2147483638).length === 2)
     assert(nums.takeAsync(2147483638).get().size === 2)
   }
 
@@ -670,7 +670,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
     val nums = Seq.range(1, 100000)
     val ints = sc.makeRDD(scala.util.Random.shuffle(nums), 2)
     val topK = ints.top(5)
-    assert(topK.size === 5)
+    assert(topK.length === 5)
     assert(topK === nums.reverse.take(5))
   }
 
@@ -679,7 +679,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
     implicit val ord = implicitly[Ordering[String]].reverse
     val rdd = sc.makeRDD(words, 2)
     val topK = rdd.top(2)
-    assert(topK.size === 2)
+    assert(topK.length === 2)
     assert(topK.sorted === Array("b", "a"))
   }
 
@@ -687,7 +687,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
     val nums = Array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
     val rdd = sc.makeRDD(nums.toImmutableArraySeq, 2)
     val sortedLowerK = rdd.takeOrdered(5)
-    assert(sortedLowerK.size === 5)
+    assert(sortedLowerK.length === 5)
     assert(sortedLowerK === Array(1, 2, 3, 4, 5))
   }
 
@@ -695,7 +695,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
     val nums = Array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
     val rdd = sc.makeRDD(nums.toImmutableArraySeq, 2)
     val sortedLowerK = rdd.takeOrdered(0)
-    assert(sortedLowerK.size === 0)
+    assert(sortedLowerK.length === 0)
   }
 
   test("SPARK-40276: takeOrdered with empty RDDs") {
@@ -708,7 +708,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
     implicit val ord = implicitly[Ordering[Int]].reverse
     val rdd = sc.makeRDD(nums.toImmutableArraySeq, 2)
     val sortedTopK = rdd.takeOrdered(5)
-    assert(sortedTopK.size === 5)
+    assert(sortedTopK.length === 5)
     assert(sortedTopK === Array(10, 9, 8, 7, 6))
     assert(sortedTopK === nums.sorted(ord).take(5))
   }
@@ -736,48 +736,48 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
 
     for (num <- List(5, 20, 100)) {
       val sample = data.takeSample(withReplacement = false, num = num)
-      assert(sample.size === num)        // Got exactly num elements
+      assert(sample.length === num)        // Got exactly num elements
       assert(sample.toSet.size === num)  // Elements are distinct
       assert(sample.forall(x => 1 <= x && x <= n), s"elements not in [1, $n]")
     }
     for (seed <- 1 to 5) {
       val sample = data.takeSample(withReplacement = false, 20, seed)
-      assert(sample.size === 20)        // Got exactly 20 elements
+      assert(sample.length === 20)        // Got exactly 20 elements
       assert(sample.toSet.size === 20)  // Elements are distinct
       assert(sample.forall(x => 1 <= x && x <= n), s"elements not in [1, $n]")
     }
     for (seed <- 1 to 5) {
       val sample = data.takeSample(withReplacement = false, 100, seed)
-      assert(sample.size === 100)        // Got only 100 elements
+      assert(sample.length === 100)        // Got only 100 elements
       assert(sample.toSet.size === 100)  // Elements are distinct
       assert(sample.forall(x => 1 <= x && x <= n), s"elements not in [1, $n]")
     }
     for (seed <- 1 to 5) {
       val sample = data.takeSample(withReplacement = true, 20, seed)
-      assert(sample.size === 20)        // Got exactly 20 elements
+      assert(sample.length === 20)        // Got exactly 20 elements
       assert(sample.forall(x => 1 <= x && x <= n), s"elements not in [1, $n]")
     }
     {
       val sample = data.takeSample(withReplacement = true, num = 20)
-      assert(sample.size === 20)        // Got exactly 20 elements
+      assert(sample.length === 20)        // Got exactly 20 elements
       assert(sample.forall(x => 1 <= x && x <= n), s"elements not in [1, $n]")
     }
     {
       val sample = data.takeSample(withReplacement = true, num = n)
-      assert(sample.size === n)        // Got exactly n elements
+      assert(sample.length === n)        // Got exactly n elements
       // Chance of getting all distinct elements is astronomically low, so test we got < n
       assert(sample.toSet.size < n, "sampling with replacement returned all distinct elements")
       assert(sample.forall(x => 1 <= x && x <= n), s"elements not in [1, $n]")
     }
     for (seed <- 1 to 5) {
       val sample = data.takeSample(withReplacement = true, n, seed)
-      assert(sample.size === n)        // Got exactly n elements
+      assert(sample.length === n)        // Got exactly n elements
       // Chance of getting all distinct elements is astronomically low, so test we got < n
       assert(sample.toSet.size < n, "sampling with replacement returned all distinct elements")
     }
     for (seed <- 1 to 5) {
       val sample = data.takeSample(withReplacement = true, 2 * n, seed)
-      assert(sample.size === 2 * n)        // Got exactly 2 * n elements
+      assert(sample.length === 2 * n)        // Got exactly 2 * n elements
       // Chance of getting all distinct elements is still quite low, so test we got < n
       assert(sample.toSet.size < n, "sampling with replacement returned all distinct elements")
     }
@@ -794,7 +794,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
     val data = sc.parallelize(1 to n, 2)
     for(seed <- 1 to 5) {
       val splits = data.randomSplit(Array(1.0, 2.0, 3.0), seed)
-      assert(splits.size == 3, "wrong number of splits")
+      assert(splits.length == 3, "wrong number of splits")
       assert(splits.flatMap(_.collect()).sorted.toList == data.collect().toList,
         "incomplete or wrong split")
       val s = splits.map(_.count())
@@ -1179,7 +1179,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
       sc.hadoopFile(outDir, classOf[TextInputFormat], classOf[LongWritable], classOf[Text])
     val coalescedHadoopRDD =
       hadoopRDD.coalesce(2, partitionCoalescer = Option(new SizeBasedCoalescer(maxSplitSize)))
-    assert(coalescedHadoopRDD.partitions.size <= 10)
+    assert(coalescedHadoopRDD.partitions.length <= 10)
     var totalPartitionCount = 0L
     coalescedHadoopRDD.partitions.foreach(partition => {
       var splitSizeSum = 0L
@@ -1256,7 +1256,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
       .map(coalescedRDD.getPreferredLocations(_).head)
       .groupBy(identity)
       .view
-      .mapValues(_.size)
+      .mapValues(_.length)
 
     // Make sure the coalesced partitions are distributed fairly evenly between the two locations.
     // This should not become flaky since the DefaultPartitionsCoalescer uses a fixed seed.
@@ -1357,7 +1357,7 @@ class SizeBasedCoalescer(val maxSize: Int) extends PartitionCoalescer with Seria
       totalSum += splitSize
     }
 
-    while (index < partitions.size) {
+    while (index < partitions.length) {
       val partition = partitions(index)
       val fileSplit =
         partition.asInstanceOf[HadoopPartition].inputSplit.value.asInstanceOf[FileSplit]
diff --git a/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala b/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala
index 802889b047796..5771e99b64c69 100644
--- a/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala
@@ -35,7 +35,7 @@ class SortingSuite extends SparkFunSuite with SharedSparkContext with Matchers {
     val pairArr = Array.fill(1000) { (rand.nextInt(), rand.nextInt()) }
     val pairs = sc.parallelize(pairArr.toImmutableArraySeq, 2)
     val sorted = pairs.sortByKey()
-    assert(sorted.partitions.size === 2)
+    assert(sorted.partitions.length === 2)
     assert(sorted.collect() === pairArr.sortBy(_._1))
   }
 
@@ -44,7 +44,7 @@ class SortingSuite extends SparkFunSuite with SharedSparkContext with Matchers {
     val pairArr = Array.fill(1000) { (rand.nextInt(), rand.nextInt()) }
     val pairs = sc.parallelize(pairArr.toImmutableArraySeq, 2)
     val sorted = pairs.sortByKey(true, 1)
-    assert(sorted.partitions.size === 1)
+    assert(sorted.partitions.length === 1)
     assert(sorted.collect() === pairArr.sortBy(_._1))
   }
 
@@ -53,7 +53,7 @@ class SortingSuite extends SparkFunSuite with SharedSparkContext with Matchers {
     val pairArr = Array.fill(1000) { (rand.nextInt(), rand.nextInt()) }
     val pairs = sc.parallelize(pairArr.toImmutableArraySeq, 2)
     val sorted = pairs.sortByKey(true, 20)
-    assert(sorted.partitions.size === 20)
+    assert(sorted.partitions.length === 20)
     assert(sorted.collect() === pairArr.sortBy(_._1))
   }
 
diff --git a/core/src/test/scala/org/apache/spark/rdd/ZippedPartitionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/ZippedPartitionsSuite.scala
index 7079b9ea8eadc..c04719eb9ea6f 100644
--- a/core/src/test/scala/org/apache/spark/rdd/ZippedPartitionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/ZippedPartitionsSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.{SharedSparkContext, SparkFunSuite}
 
 object ZippedPartitionsSuite {
   def procZippedData(i: Iterator[Int], s: Iterator[String], d: Iterator[Double]) : Iterator[Int] = {
-    Iterator(i.toArray.size, s.toArray.size, d.toArray.size)
+    Iterator(i.toArray.length, s.toArray.length, d.toArray.length)
   }
 }
 
@@ -35,7 +35,7 @@ class ZippedPartitionsSuite extends SparkFunSuite with SharedSparkContext {
 
     val obtainedSizes = zippedRDD.collect()
     val expectedSizes = Array(2, 3, 1, 2, 3, 1)
-    assert(obtainedSizes.size == 6)
+    assert(obtainedSizes.length == 6)
     assert(obtainedSizes.zip(expectedSizes).forall(x => x._1 == x._2))
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/resource/ResourceProfileSuite.scala b/core/src/test/scala/org/apache/spark/resource/ResourceProfileSuite.scala
index fd7018f189e26..be38315cd75fe 100644
--- a/core/src/test/scala/org/apache/spark/resource/ResourceProfileSuite.scala
+++ b/core/src/test/scala/org/apache/spark/resource/ResourceProfileSuite.scala
@@ -374,7 +374,7 @@ class ResourceProfileSuite extends SparkFunSuite with MockitoSugar {
     rprof.require(eReq)
 
     // Update this if new resource type added
-    assert(ResourceProfile.allSupportedExecutorResources.size === 5,
+    assert(ResourceProfile.allSupportedExecutorResources.length === 5,
       "Executor resources should have 5 supported resources")
     assert(rprof.build().getCustomExecutorResources().size === 1,
       "Executor resources should have 1 custom resource")
diff --git a/core/src/test/scala/org/apache/spark/resource/ResourceUtilsSuite.scala b/core/src/test/scala/org/apache/spark/resource/ResourceUtilsSuite.scala
index 1ab9f7c5d2b0c..20d6cc7671582 100644
--- a/core/src/test/scala/org/apache/spark/resource/ResourceUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/resource/ResourceUtilsSuite.scala
@@ -101,13 +101,13 @@ class ResourceUtilsSuite extends SparkFunSuite
       val gpuValue = resources.get(GPU)
       assert(gpuValue.nonEmpty, "Should have a gpu entry")
       assert(gpuValue.get.name == "gpu", "name should be gpu")
-      assert(gpuValue.get.addresses.size == 2, "Should have 2 indexes")
+      assert(gpuValue.get.addresses.length == 2, "Should have 2 indexes")
       assert(gpuValue.get.addresses.sameElements(Array("0", "1")), "should have 0,1 entries")
 
       val fpgaValue = resources.get(FPGA)
       assert(fpgaValue.nonEmpty, "Should have a gpu entry")
       assert(fpgaValue.get.name == "fpga", "name should be fpga")
-      assert(fpgaValue.get.addresses.size == 3, "Should have 3 indexes")
+      assert(fpgaValue.get.addresses.length == 3, "Should have 3 indexes")
       assert(fpgaValue.get.addresses.sameElements(Array("f1", "f2", "f3")),
         "should have f1,f2,f3 entries")
     }
@@ -231,7 +231,7 @@ class ResourceUtilsSuite extends SparkFunSuite
       val gpuValue = resources.get(GPU)
       assert(gpuValue.nonEmpty, "Should have a gpu entry")
       assert(gpuValue.get.name == "gpu", "name should be gpu")
-      assert(gpuValue.get.addresses.size == 2, "Should have 2 indexes")
+      assert(gpuValue.get.addresses.length == 2, "Should have 2 indexes")
       assert(gpuValue.get.addresses.sameElements(Array("0", "1")), "should have 0,1 entries")
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/AQEShuffledRDD.scala b/core/src/test/scala/org/apache/spark/scheduler/AQEShuffledRDD.scala
index 3f8eaede6e799..84f9ef0d557e6 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/AQEShuffledRDD.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/AQEShuffledRDD.scala
@@ -48,7 +48,7 @@ class CoalescedPartitioner(val parent: Partitioner, val partitionStartIndices: A
     result
   }
 
-  override def numPartitions: Int = partitionStartIndices.size
+  override def numPartitions: Int = partitionStartIndices.length
 
   override def getPartition(key: Any): Int = {
     parentPartitionMapping(parent.getPartition(key))
diff --git a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
index bf5e9d96cd80e..e9b8ae4bffe6d 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
@@ -62,7 +62,7 @@ class CoarseGrainedSchedulerBackendSuite extends SparkFunSuite with LocalSparkCo
     }
     assert(thrown.getMessage.contains("using broadcast variables for large values"))
     val smaller = sc.parallelize(1 to 4).collect()
-    assert(smaller.size === 4)
+    assert(smaller.length === 4)
   }
 
   test("compute max number of concurrent tasks can be launched") {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index 0f7146bc7c150..c55f627075e8f 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -462,9 +462,9 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
 
   /** Send the given CompletionEvent messages for the tasks in the TaskSet. */
   private def complete(taskSet: TaskSet, taskEndInfos: Seq[(TaskEndReason, Any)]): Unit = {
-    assert(taskSet.tasks.size >= taskEndInfos.size)
+    assert(taskSet.tasks.length >= taskEndInfos.size)
     for ((result, i) <- taskEndInfos.zipWithIndex) {
-      if (i < taskSet.tasks.size) {
+      if (i < taskSet.tasks.length) {
         runEvent(makeCompletionEvent(taskSet.tasks(i), result._1, result._2))
       }
     }
@@ -474,9 +474,9 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
       accumId: Long,
       taskSet: TaskSet,
       results: Seq[(TaskEndReason, Any)]): Unit = {
-    assert(taskSet.tasks.size >= results.size)
+    assert(taskSet.tasks.length >= results.size)
     for ((result, i) <- results.zipWithIndex) {
-      if (i < taskSet.tasks.size) {
+      if (i < taskSet.tasks.length) {
         runEvent(makeCompletionEvent(
           taskSet.tasks(i),
           result._1,
@@ -1671,21 +1671,21 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     runEvent(makeCompletionEvent(
       taskSet.tasks(0),
       Success,
-      makeMapStatus("hostA", reduceRdd.partitions.size)))
+      makeMapStatus("hostA", reduceRdd.partitions.length)))
     assert(shuffleStage.numAvailableOutputs === 0)
 
     // should work because it's a non-failed host (so the available map outputs will increase)
     runEvent(makeCompletionEvent(
       taskSet.tasks(0),
       Success,
-      makeMapStatus("hostB", reduceRdd.partitions.size)))
+      makeMapStatus("hostB", reduceRdd.partitions.length)))
     assert(shuffleStage.numAvailableOutputs === 1)
 
     // should be ignored for being too old
     runEvent(makeCompletionEvent(
       taskSet.tasks(0),
       Success,
-      makeMapStatus("hostA", reduceRdd.partitions.size)))
+      makeMapStatus("hostA", reduceRdd.partitions.length)))
     assert(shuffleStage.numAvailableOutputs === 1)
 
     // should work because it's a new epoch, which will increase the number of available map
@@ -1694,7 +1694,7 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     runEvent(makeCompletionEvent(
       taskSet.tasks(1),
       Success,
-      makeMapStatus("hostA", reduceRdd.partitions.size)))
+      makeMapStatus("hostA", reduceRdd.partitions.length)))
     assert(shuffleStage.numAvailableOutputs === 2)
     assert(mapOutputTracker.getMapSizesByExecutorId(shuffleId, 0).map(_._1).toSet ===
       HashSet(makeBlockManagerId("hostB"), makeBlockManagerId("hostA")))
@@ -2081,7 +2081,7 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     // stage complete), but the tasks that ran on HostA need to be re-run, so the DAGScheduler
     // should re-submit the stage with one task (the task that originally ran on HostA).
     assert(taskSets.size === 2)
-    assert(taskSets(1).tasks.size === 1)
+    assert(taskSets(1).tasks.length === 1)
 
     // Make sure that the stage that was re-submitted was the ShuffleMapStage (not the reduce
     // stage, which shouldn't be run until all of the tasks in the ShuffleMapStage complete on
@@ -2735,7 +2735,7 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     // Now complete tasks in the second task set
     val newTaskSet = taskSets(1)
     // 2 tasks should have been re-submitted, for tasks 0 and 1 (which ran on hostA).
-    assert(newTaskSet.tasks.size === 2)
+    assert(newTaskSet.tasks.length === 2)
     // Complete task 0 from the original task set (i.e., not the one that's currently active).
     // This should still be counted towards the job being complete (but there's still one
     // outstanding task).
@@ -2878,7 +2878,7 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     // failed hostA, so both should be resubmitted. Complete them on hostB successfully.
     scheduler.resubmitFailedStages()
     assert(taskSets(2).stageId === 0 && taskSets(2).stageAttemptId === 1
-      && taskSets(2).tasks.size === 2)
+      && taskSets(2).tasks.length === 2)
     complete(taskSets(2), Seq(
       (Success, makeMapStatus("hostB", 2)),
       (Success, makeMapStatus("hostB", 2))))
@@ -2898,7 +2898,7 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     // Task(stageId=1, stageAttemptId=1, partitionId=1) of this new active stage attempt
     // is still running.
     assert(taskSets(3).stageId === 1 && taskSets(3).stageAttemptId === 1
-      && taskSets(3).tasks.size === 2)
+      && taskSets(3).tasks.length === 2)
     runEvent(makeCompletionEvent(
       taskSets(3).tasks(0), Success, makeMapStatus("hostB", 2)))
 
@@ -2907,7 +2907,7 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     // was ignored due to executor failure
     assert(taskSets.size === 5)
     assert(taskSets(4).stageId === 1 && taskSets(4).stageAttemptId === 2
-      && taskSets(4).tasks.size === 1)
+      && taskSets(4).tasks.length === 1)
 
     // Complete task(stageId=1, stageAttempt=2, partitionId=1) successfully.
     runEvent(makeCompletionEvent(
@@ -4445,7 +4445,7 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     // a scenario where stage 0 needs to be resubmitted upon finishing all tasks.
     // Merge finalization should be scheduled in this case.
     for ((result, i) <- taskResults.zipWithIndex) {
-      if (i == taskSets(0).tasks.size - 1) {
+      if (i == taskSets(0).tasks.length - 1) {
         mapOutputTracker.removeOutputsOnHost("host0")
       }
       runEvent(makeCompletionEvent(taskSets(0).tasks(i), result._1, result._2))
@@ -4522,7 +4522,7 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     // a scenario where stage 0 needs to be resubmitted upon finishing all tasks.
     // Merge finalization should be scheduled in this case.
     for ((result, i) <- taskResults.zipWithIndex) {
-      if (i == taskSets(0).tasks.size - 1) {
+      if (i == taskSets(0).tasks.length - 1) {
         mapOutputTracker.removeOutputsOnHost("host0")
       }
       runEvent(makeCompletionEvent(taskSets(0).tasks(i), result._1, result._2))
@@ -4986,7 +4986,7 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
    * Note that this checks only the host and not the executor ID.
    */
   private def assertLocations(taskSet: TaskSet, hosts: Seq[Seq[String]]): Unit = {
-    assert(hosts.size === taskSet.tasks.size)
+    assert(hosts.size === taskSet.tasks.length)
     for ((taskLocs, expectedLocs) <- taskSet.tasks.map(_.preferredLocations).zip(hosts)) {
       assert(taskLocs.map(_.host).toSet === expectedLocs.toSet)
     }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
index cf2240a0511d7..13e7ff758ebaf 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
@@ -268,7 +268,7 @@ class MapStatusSuite extends SparkFunSuite {
       "number of skewed block sizes")
 
     val smallAndUntrackedBlocks =
-      nonEmptyBlocks.slice(0, nonEmptyBlocks.size - trackedSkewedBlocksLength)
+      nonEmptyBlocks.slice(0, nonEmptyBlocks.length - trackedSkewedBlocksLength)
     val avg = smallAndUntrackedBlocks.sum / smallAndUntrackedBlocks.length
 
     val loc = BlockManagerId("a", "b", 10)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
index 0533f9d7d8a49..f1a4b97c2981d 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
@@ -143,14 +143,14 @@ class OutputCommitCoordinatorSuite extends SparkFunSuite with BeforeAndAfter {
     val rdd = sc.parallelize(Seq(1), 1)
     sc.runJob(rdd, OutputCommitFunctions(tempDir.getAbsolutePath).commitSuccessfully _,
       rdd.partitions.indices)
-    assert(tempDir.list().size === 1)
+    assert(tempDir.list().length === 1)
   }
 
   ignore("If commit fails, if task is retried it should not be locked, and will succeed.") {
     val rdd = sc.parallelize(Seq(1), 1)
     sc.runJob(rdd, OutputCommitFunctions(tempDir.getAbsolutePath).failFirstCommitAttempt _,
       rdd.partitions.indices)
-    assert(tempDir.list().size === 1)
+    assert(tempDir.list().length === 1)
   }
 
   test("Job should not complete if all commits are denied") {
@@ -161,13 +161,13 @@ class OutputCommitCoordinatorSuite extends SparkFunSuite with BeforeAndAfter {
     def resultHandler(x: Int, y: Unit): Unit = {}
     val futureAction: SimpleFutureAction[Unit] = sc.submitJob[Int, Unit, Unit](rdd,
       OutputCommitFunctions(tempDir.getAbsolutePath).commitSuccessfully,
-      0 until rdd.partitions.size, resultHandler, ())
+      0 until rdd.partitions.length, resultHandler, ())
     // It's an error if the job completes successfully even though no committer was authorized,
     // so throw an exception if the job was allowed to complete.
     intercept[TimeoutException] {
       ThreadUtils.awaitResult(futureAction, 5.seconds)
     }
-    assert(tempDir.list().size === 0)
+    assert(tempDir.list().length === 0)
   }
 
   test("Only authorized committer failures can clear the authorized committer lock (SPARK-6614)") {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
index f0ae7fc74112b..2ab7df0d9cfd3 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
@@ -1815,10 +1815,10 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext
     var has1Gpu = 0
     for (tDesc <- taskDescriptions) {
       assert(tDesc.resources.contains(GPU))
-      if (tDesc.resources(GPU).addresses.size == 2) {
+      if (tDesc.resources(GPU).addresses.length == 2) {
         has2Gpus += 1
       }
-      if (tDesc.resources(GPU).addresses.size == 1) {
+      if (tDesc.resources(GPU).addresses.length == 1) {
         has1Gpu += 1
       }
     }
@@ -1836,7 +1836,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext
     taskDescriptions = taskScheduler.resourceOffers(workerOffers3).flatten
     assert(2 === taskDescriptions.length)
     assert(taskDescriptions.head.resources.contains(GPU))
-    assert(2 == taskDescriptions.head.resources(GPU).addresses.size)
+    assert(2 == taskDescriptions.head.resources(GPU).addresses.length)
   }
 
   test("Scheduler works with task resource profiles") {
@@ -1875,10 +1875,10 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext
     var has1Gpu = 0
     for (tDesc <- taskDescriptions) {
       assert(tDesc.resources.contains(GPU))
-      if (tDesc.resources(GPU).addresses.size == 2) {
+      if (tDesc.resources(GPU).addresses.length == 2) {
         has2Gpus += 1
       }
-      if (tDesc.resources(GPU).addresses.size == 1) {
+      if (tDesc.resources(GPU).addresses.length == 1) {
         has1Gpu += 1
       }
     }
@@ -1896,7 +1896,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext
     taskDescriptions = taskScheduler.resourceOffers(workerOffers3).flatten
     assert(2 === taskDescriptions.length)
     assert(taskDescriptions.head.resources.contains(GPU))
-    assert(2 == taskDescriptions.head.resources(GPU).addresses.size)
+    assert(2 == taskDescriptions.head.resources(GPU).addresses.length)
   }
 
   test("Calculate available tasks slots for task resource profiles") {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index 2fe50a486dbd6..2f8b6df8beac5 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -845,7 +845,7 @@ class TaskSetManagerSuite
 
     // multiple 1k result
     val r = sc.makeRDD(0 until 10, 10).map(genBytes(1024)).collect()
-    assert(10 === r.size)
+    assert(10 === r.length)
 
     // single 10M result
     val thrown = intercept[SparkException] {sc.makeRDD(genBytes(10 << 20)(0), 1).collect()}
@@ -863,7 +863,7 @@ class TaskSetManagerSuite
     sc = new SparkContext("local", "test", conf)
     // final result is below limit.
     val r = sc.makeRDD(0 until 2000, 2000).distinct(10).filter(_ == 0).collect()
-    assert(1 === r.size)
+    assert(1 === r.length)
   }
 
   test("[SPARK-13931] taskSetManager should not send Resubmitted tasks after being a zombie") {
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala
index 4acb4bbc779c3..25db9a5c68612 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala
@@ -48,7 +48,7 @@ class KryoSerializerDistributedSuite extends SparkFunSuite with LocalSparkContex
     val shuffledRDD = cachedRDD.map { case (i, o) => (i * i * i - 10 * i * i, o)}
 
     // Join the two RDDs, and force evaluation
-    assert(shuffledRDD.join(cachedRDD).collect().size == 1)
+    assert(shuffledRDD.join(cachedRDD).collect().length == 1)
   }
 }
 
diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala
index 8a9537b4f18d7..a9ca9135f38a9 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala
@@ -236,7 +236,7 @@ class IndexShuffleBlockResolverSuite extends SparkFunSuite {
         ShuffleMergedBlockId(shuffleId, shuffleMergeId, reduceId),
         dirs)
     assert(mergedBlockMeta.getNumChunks === 3)
-    assert(mergedBlockMeta.readChunkBitmaps().size === 3)
+    assert(mergedBlockMeta.readChunkBitmaps().length === 3)
     assert(mergedBlockMeta.readChunkBitmaps()(0).contains(1))
     assert(mergedBlockMeta.readChunkBitmaps()(0).contains(2))
     assert(!mergedBlockMeta.readChunkBitmaps()(0).contains(3))
diff --git a/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala b/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala
index ccf6c9184cc96..f2b795764b7e8 100644
--- a/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala
+++ b/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala
@@ -170,40 +170,44 @@ class AppStatusStoreSuite extends SparkFunSuite {
           assert(actualQuantiles === expectedQuantiles)
         }
 
-        assertQuantiles(_.executorDeserializeTime, summary.executorDeserializeTime)
-        assertQuantiles(_.executorDeserializeCpuTime, summary.executorDeserializeCpuTime)
-        assertQuantiles(_.executorRunTime, summary.executorRunTime)
-        assertQuantiles(_.executorRunTime, summary.executorRunTime)
-        assertQuantiles(_.executorCpuTime, summary.executorCpuTime)
-        assertQuantiles(_.resultSize, summary.resultSize)
-        assertQuantiles(_.jvmGCTime, summary.jvmGcTime)
-        assertQuantiles(_.resultSerializationTime, summary.resultSerializationTime)
-        assertQuantiles(_.memoryBytesSpilled, summary.memoryBytesSpilled)
-        assertQuantiles(_.diskBytesSpilled, summary.diskBytesSpilled)
-        assertQuantiles(_.peakExecutionMemory, summary.peakExecutionMemory)
-        assertQuantiles(_.inputMetrics.bytesRead, summary.inputMetrics.bytesRead)
-        assertQuantiles(_.inputMetrics.recordsRead, summary.inputMetrics.recordsRead)
-        assertQuantiles(_.outputMetrics.bytesWritten, summary.outputMetrics.bytesWritten)
-        assertQuantiles(_.outputMetrics.recordsWritten, summary.outputMetrics.recordsWritten)
-        assertQuantiles(_.shuffleReadMetrics.remoteBlocksFetched,
+        assertQuantiles(_.executorDeserializeTime.toDouble, summary.executorDeserializeTime)
+        assertQuantiles(_.executorDeserializeCpuTime.toDouble, summary.executorDeserializeCpuTime)
+        assertQuantiles(_.executorRunTime.toDouble, summary.executorRunTime)
+        assertQuantiles(_.executorRunTime.toDouble, summary.executorRunTime)
+        assertQuantiles(_.executorCpuTime.toDouble, summary.executorCpuTime)
+        assertQuantiles(_.resultSize.toDouble, summary.resultSize)
+        assertQuantiles(_.jvmGCTime.toDouble, summary.jvmGcTime)
+        assertQuantiles(_.resultSerializationTime.toDouble, summary.resultSerializationTime)
+        assertQuantiles(_.memoryBytesSpilled.toDouble, summary.memoryBytesSpilled)
+        assertQuantiles(_.diskBytesSpilled.toDouble, summary.diskBytesSpilled)
+        assertQuantiles(_.peakExecutionMemory.toDouble, summary.peakExecutionMemory)
+        assertQuantiles(_.inputMetrics.bytesRead.toDouble, summary.inputMetrics.bytesRead)
+        assertQuantiles(_.inputMetrics.recordsRead.toDouble, summary.inputMetrics.recordsRead)
+        assertQuantiles(_.outputMetrics.bytesWritten.toDouble, summary.outputMetrics.bytesWritten)
+        assertQuantiles(_.outputMetrics.recordsWritten.toDouble,
+          summary.outputMetrics.recordsWritten)
+        assertQuantiles(_.shuffleReadMetrics.remoteBlocksFetched.toDouble,
           summary.shuffleReadMetrics.remoteBlocksFetched)
-        assertQuantiles(_.shuffleReadMetrics.localBlocksFetched,
+        assertQuantiles(_.shuffleReadMetrics.localBlocksFetched.toDouble,
           summary.shuffleReadMetrics.localBlocksFetched)
-        assertQuantiles(_.shuffleReadMetrics.fetchWaitTime,
+        assertQuantiles(_.shuffleReadMetrics.fetchWaitTime.toDouble,
           summary.shuffleReadMetrics.fetchWaitTime)
-        assertQuantiles(_.shuffleReadMetrics.remoteBytesRead,
+        assertQuantiles(_.shuffleReadMetrics.remoteBytesRead.toDouble,
           summary.shuffleReadMetrics.remoteBytesRead)
-        assertQuantiles(_.shuffleReadMetrics.remoteBytesReadToDisk,
+        assertQuantiles(_.shuffleReadMetrics.remoteBytesReadToDisk.toDouble,
           summary.shuffleReadMetrics.remoteBytesReadToDisk)
         assertQuantiles(
-          t => t.shuffleReadMetrics.localBytesRead + t.shuffleReadMetrics.remoteBytesRead,
+          t => t.shuffleReadMetrics.localBytesRead + t.shuffleReadMetrics.remoteBytesRead.toDouble,
           summary.shuffleReadMetrics.readBytes)
         assertQuantiles(
-          t => t.shuffleReadMetrics.localBlocksFetched + t.shuffleReadMetrics.remoteBlocksFetched,
+          t => t.shuffleReadMetrics.localBlocksFetched +
+            t.shuffleReadMetrics.remoteBlocksFetched.toDouble,
           summary.shuffleReadMetrics.totalBlocksFetched)
-        assertQuantiles(_.shuffleWriteMetrics.bytesWritten, summary.shuffleWriteMetrics.writeBytes)
-        assertQuantiles(_.shuffleWriteMetrics.writeTime, summary.shuffleWriteMetrics.writeTime)
-        assertQuantiles(_.shuffleWriteMetrics.recordsWritten,
+        assertQuantiles(_.shuffleWriteMetrics.bytesWritten.toDouble,
+          summary.shuffleWriteMetrics.writeBytes)
+        assertQuantiles(_.shuffleWriteMetrics.writeTime.toDouble,
+          summary.shuffleWriteMetrics.writeTime)
+        assertQuantiles(_.shuffleWriteMetrics.recordsWritten.toDouble,
           summary.shuffleWriteMetrics.writeRecords)
       } finally {
         appStore.close()
diff --git a/core/src/test/scala/org/apache/spark/storage/DiskStoreSuite.scala b/core/src/test/scala/org/apache/spark/storage/DiskStoreSuite.scala
index be1b9be2d85d9..b644224652266 100644
--- a/core/src/test/scala/org/apache/spark/storage/DiskStoreSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/DiskStoreSuite.scala
@@ -117,7 +117,7 @@ class DiskStoreSuite extends SparkFunSuite {
 
     val chunkedByteBuffer = blockData.toChunkedByteBuffer(ByteBuffer.allocate)
     val chunks = chunkedByteBuffer.chunks
-    assert(chunks.size === 2)
+    assert(chunks.length === 2)
     for (chunk <- chunks) {
       assert(chunk.limit() === 10 * 1024)
     }
diff --git a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
index c377f2495d05d..35ef0587b9b4c 100644
--- a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
@@ -192,9 +192,9 @@ class FileAppenderSuite extends SparkFunSuite with BeforeAndAfter {
 
     // verify whether the earliest file has been deleted
     val rolledOverFiles = allGeneratedFiles.filter { _ != testFile.toString }.toArray.sorted
-    logInfo(s"All rolled over files generated:${rolledOverFiles.size}\n" +
+    logInfo(s"All rolled over files generated:${rolledOverFiles.length}\n" +
       rolledOverFiles.mkString("\n"))
-    assert(rolledOverFiles.size > 2)
+    assert(rolledOverFiles.length > 2)
     val earliestRolledOverFile = rolledOverFiles.head
     val existingRolledOverFiles = RollingFileAppender.getSortedRolledOverFiles(
       testFile.getParentFile.toString, testFile.getName).map(_.toString)
diff --git a/core/src/test/scala/org/apache/spark/util/collection/SizeTrackerSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/SizeTrackerSuite.scala
index 8aa4be6c2ff8d..82a4c85b02fa0 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/SizeTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/SizeTrackerSuite.scala
@@ -104,7 +104,7 @@ private object SizeTrackerSuite {
    * Run speed tests for size tracking collections.
    */
   def main(args: Array[String]): Unit = {
-    if (args.size < 1) {
+    if (args.length < 1) {
       // scalastyle:off println
       println("Usage: SizeTrackerSuite [num elements]")
       // scalastyle:on println
diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile
index dbb851d74a565..9cfe78570421e 100644
--- a/dev/create-release/spark-rm/Dockerfile
+++ b/dev/create-release/spark-rm/Dockerfile
@@ -37,12 +37,7 @@ ENV DEBCONF_NONINTERACTIVE_SEEN true
 # These arguments are just for reuse and not really meant to be customized.
 ARG APT_INSTALL="apt-get install --no-install-recommends -y"
 
-# TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
-#   See also https://github.com/sphinx-doc/sphinx/issues/7551.
-#   We should use the latest Sphinx version once this is fixed.
-# TODO(SPARK-35375): Jinja2 3.0.0+ causes error when building with Sphinx.
-#   See also https://issues.apache.org/jira/browse/SPARK-35375.
-ARG PIP_PKGS="sphinx==3.0.4 mkdocs==1.1.2 numpy==1.20.3 pydata_sphinx_theme==0.8.0 ipython==7.19.0 nbsphinx==0.8.0 numpydoc==1.1.0 jinja2==2.11.3 twine==3.4.1 sphinx-plotly-directive==0.1.3 sphinx-copybutton==0.5.2 pandas==1.5.3 pyarrow==3.0.0 plotly==5.4.0 markupsafe==2.0.1 docutils<0.17 grpcio==1.59.3 protobuf==4.21.6 grpcio-status==1.59.3 googleapis-common-protos==1.56.4"
+ARG PIP_PKGS="sphinx==4.2.0 mkdocs==1.1.2 numpy==1.20.3 pydata_sphinx_theme==0.13.3 ipython==7.19.0 nbsphinx==0.8.0 numpydoc==1.1.0 jinja2==3.1.2 twine==3.4.1 sphinx-plotly-directive==0.1.3 sphinx-copybutton==0.5.2 pandas==1.5.3 pyarrow==3.0.0 plotly==5.4.0 markupsafe==2.0.1 docutils<0.17 grpcio==1.59.3 protobuf==4.21.6 grpcio-status==1.59.3 googleapis-common-protos==1.56.4"
 ARG GEM_PKGS="bundler:2.3.8"
 
 # Install extra needed repos and refresh.
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 5a96f3fe9b982..3a8c3dc707aa1 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -45,7 +45,7 @@ commons-crypto/1.1.0//commons-crypto-1.1.0.jar
 commons-dbcp/1.4//commons-dbcp-1.4.jar
 commons-io/2.15.0//commons-io-2.15.0.jar
 commons-lang/2.6//commons-lang-2.6.jar
-commons-lang3/3.13.0//commons-lang3-3.13.0.jar
+commons-lang3/3.14.0//commons-lang3-3.14.0.jar
 commons-logging/1.1.3//commons-logging-1.1.3.jar
 commons-math3/3.6.1//commons-math3-3.6.1.jar
 commons-pool/1.5.4//commons-pool-1.5.4.jar
diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile
index 10ae49b71665f..7348c6af1e059 100644
--- a/dev/infra/Dockerfile
+++ b/dev/infra/Dockerfile
@@ -138,4 +138,5 @@ RUN python3.12 -m pip install numpy 'pyarrow>=14.0.0' 'six==1.16.0' 'pandas<=2.1
 RUN python3.12 -m pip install 'grpcio==1.59.3' 'grpcio-status==1.59.3' 'protobuf==4.25.1' 'googleapis-common-protos==1.56.4'
 # TODO(SPARK-46078) Use official one instead of nightly build when it's ready
 RUN python3.12 -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
+RUN python3.12 -m pip install torchvision --index-url https://download.pytorch.org/whl/cpu
 RUN python3.12 -m pip install torcheval
diff --git a/dev/requirements.txt b/dev/requirements.txt
index 66a74471377dd..2d139911bacb6 100644
--- a/dev/requirements.txt
+++ b/dev/requirements.txt
@@ -1,11 +1,11 @@
 # PySpark dependencies (required)
-py4j
+py4j>=0.10.9.7
 
 # PySpark dependencies (optional)
-numpy
-pyarrow
+numpy>=1.21
+pyarrow>=4.0.0
 six==1.16.0
-pandas
+pandas>=1.4.4
 scipy
 plotly
 mlflow>=2.3.1
@@ -31,12 +31,12 @@ pandas-stubs<1.2.0.54
 mkdocs
 
 # Documentation (Python)
-pydata_sphinx_theme
+pydata_sphinx_theme>=0.13
 ipython
 nbsphinx
 numpydoc
-jinja2<3.0.0
-sphinx<3.1.0
+jinja2
+sphinx==4.2.0
 sphinx-plotly-directive
 sphinx-copybutton
 docutils<0.18.0
@@ -52,8 +52,8 @@ black==23.9.1
 py
 
 # Spark Connect (required)
-grpcio==1.59.3
-grpcio-status==1.59.3
+grpcio>=1.59.3
+grpcio-status>=1.59.3
 protobuf==4.25.1
 googleapis-common-protos>=1.56.4
 
diff --git a/docs/README.md b/docs/README.md
index 87d68c2f86499..99ccf69dbaee5 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -52,13 +52,6 @@ Note: If you are on a system with both Ruby 1.9 and Ruby 2.0 you may need to rep
 
 To generate SQL and Python API docs, you'll need to install these libraries:
 
-<!--
-TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
-See also https://github.com/sphinx-doc/sphinx/issues/7551.
-
-TODO(SPARK-35375): Jinja2 3.0.0+ causes error when building with Sphinx.
-See also https://issues.apache.org/jira/browse/SPARK-35375.
--->
 Run the following command from $SPARK_HOME:
 ```sh
 $ pip install --upgrade -r dev/requirements.txt
diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index ce739cb90b531..2ab68d2a8049f 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -518,6 +518,8 @@ Spark applications supports the following configuration properties specific to s
 
 # Launching Spark Applications
 
+## Spark Protocol
+
 The [`spark-submit` script](submitting-applications.html) provides the most straightforward way to
 submit a compiled Spark application to the cluster. For standalone clusters, Spark currently
 supports two deploy modes. In `client` mode, the driver is launched in the same process as the
@@ -540,6 +542,84 @@ failing repeatedly, you may do so through:
 
 You can find the driver ID through the standalone Master web UI at `http://<master url>:8080`.
 
+## REST API
+
+If `spark.master.rest.enabled` is enabled, Spark master provides additional REST API
+via <code>http://[host:port]/[version]/submissions/[action]</code> where
+<code>host</code> is the master host, and
+<code>port</code> is the port number specified by `spark.master.rest.port` (default: 6066), and 
+<code>version</code> is a protocol version, <code>v1</code> as of today, and
+<code>action</code> is one of the following supported actions.
+
+<table class="table table-striped">
+  <thead><tr><th style="width:21%">Command</th><th>Description</th><th>HTTP METHOD</th><th>Since Version</th></tr></thead>
+  <tr>
+    <td><code>create</code></td>
+    <td>Create a Spark driver via <code>cluster</code> mode.</td>
+    <td>POST</td>
+    <td>1.3.0</td>
+  </tr>
+  <tr>
+    <td><code>kill</code></td>
+    <td>Kill a single Spark driver.</td>
+    <td>POST</td>
+    <td>1.3.0</td>
+  </tr>
+  <tr>
+    <td><code>killall</code></td>
+    <td>Kill all running Spark drivers.</td>
+    <td>POST</td>
+    <td>4.0.0</td>
+  </tr>
+  <tr>
+    <td><code>status</code></td>
+    <td>Check the status of a Spark job.</td>
+    <td>GET</td>
+    <td>1.3.0</td>
+  </tr>
+  <tr>
+    <td><code>clear</code></td>
+    <td>Clear the completed drivers and applications.</td>
+    <td>POST</td>
+    <td>4.0.0</td>
+  </tr>
+</table>
+
+The following is a <code>curl</code> CLI command example with the `pi.py` and REST API.
+
+```bash
+$ curl -XPOST http://IP:PORT/v1/submissions/create \
+--header "Content-Type:application/json;charset=UTF-8" \
+--data '{
+  "appResource": "",
+  "sparkProperties": {
+    "spark.master": "spark://master:7077",
+    "spark.app.name": "Spark Pi",
+    "spark.driver.memory": "1g",
+    "spark.driver.cores": "1",
+    "spark.jars": ""
+  },
+  "clientSparkVersion": "",
+  "mainClass": "org.apache.spark.deploy.SparkSubmit",
+  "environmentVariables": { },
+  "action": "CreateSubmissionRequest",
+  "appArgs": [ "/opt/spark/examples/src/main/python/pi.py", "10" ]
+}'
+```
+
+The following is the response from the REST API for the above <code>create</code> request.
+
+```bash
+{
+  "action" : "CreateSubmissionResponse",
+  "message" : "Driver successfully submitted as driver-20231124153531-0000",
+  "serverSparkVersion" : "4.0.0",
+  "submissionId" : "driver-20231124153531-0000",
+  "success" : true
+}
+```
+
+
 # Resource Scheduling
 
 The standalone cluster mode currently only supports a simple FIFO scheduler across applications.
diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md
index c0f88bffa6e5b..71abf10da328b 100644
--- a/docs/sql-error-conditions.md
+++ b/docs/sql-error-conditions.md
@@ -643,7 +643,7 @@ Column expression `<expr>` cannot be sorted because its type `<exprType>` is not
 
 [SQLSTATE: 39000](sql-error-conditions-sqlstates.html#class-39-external-routine-invocation-exception)
 
-Failed to execute user defined function (`<functionName>`: (`<signature>`) => `<result>`).
+User defined function (`<functionName>`: (`<signature>`) => `<result>`) failed due to: `<reason>`.
 
 ### FAILED_FUNCTION_CALL
 
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 5c00ce6558513..664bccf26651b 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -29,6 +29,7 @@ license: |
 - Since Spark 4.0, `spark.sql.hive.metastore` drops the support of Hive prior to 2.0.0 as they require JDK 8 that Spark does not support anymore. Users should migrate to higher versions.
 - Since Spark 4.0, `spark.sql.parquet.compression.codec` drops the support of codec name `lz4raw`, please use `lz4_raw` instead.
 - Since Spark 4.0, when overflowing during casting timestamp to byte/short/int under non-ansi mode, Spark will return null instead a wrapping value.
+- Since Spark 4.0, the `encode()` function supports only the following charsets 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16'. To restore the previous behavior when the function accepts charsets of the current JDK used by Spark, set `spark.sql.legacy.javaCharsets` to `true`.
 
 ## Upgrading from Spark SQL 3.4 to 3.5
 
diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index 547834c7f9e3a..33b9453a18c37 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -2452,6 +2452,14 @@ Specifically for built-in HDFS state store provider, users can check the state s
 it is best if cache missing count is minimized that means Spark won't waste too much time on loading checkpointed state.
 User can increase Spark locality waiting configurations to avoid loading state store providers in different executors across batches.
 
+#### State Data Source (Experimental)
+
+Apache Spark provides a streaming state related data source that provides the ability to manipulate state stores in the checkpoint. Users can run the batch query with State Data Source to get the visibility of the states for existing streaming query.
+
+As of Spark 4.0, the data source only supports read feature. See [State Data Source Integration Guide](structured-streaming-state-data-source.html) for more details.
+
+NOTE: this data source is currently marked as experimental - source options and the behavior (output) might be subject to change.
+
 ## Starting Streaming Queries
 Once you have defined the final result DataFrame/Dataset, all that is left is for you to start the streaming computation. To do that, you have to use the `DataStreamWriter`
 ([Python](api/python/reference/pyspark.ss/api/pyspark.sql.streaming.DataStreamWriter.html#pyspark.sql.streaming.DataStreamWriter)/[Scala](api/scala/org/apache/spark/sql/streaming/DataStreamWriter.html)/[Java](api/java/org/apache/spark/sql/streaming/DataStreamWriter.html) docs)
diff --git a/docs/structured-streaming-state-data-source.md b/docs/structured-streaming-state-data-source.md
new file mode 100644
index 0000000000000..a9353861c532c
--- /dev/null
+++ b/docs/structured-streaming-state-data-source.md
@@ -0,0 +1,248 @@
+---
+layout: global
+displayTitle: State Data Source Integration Guide
+title: State Data Source Integration Guide
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+State data source Guide in Structured Streaming (Experimental)
+
+## Overview
+
+State data source provides functionality to manipulate the state from the checkpoint.
+
+As of Spark 4.0, state data source provides the read functionality with a batch query. Additional functionalities including write is on the future roadmap.
+
+NOTE: this data source is currently marked as experimental - source options and the behavior (output) might be subject to change.
+
+## Reading state key-values from the checkpoint
+
+State data source enables reading key-value pairs from the state store in the checkpoint, via running a separate batch query.
+Users can leverage the functionality to cover two major use cases described below:
+
+* Construct a test checking both output and the state. It is non-trivial to deduce the key-value of the state from the output, and having visibility of the state would be a huge win on testing.
+* Investigate an incident against stateful streaming query. If users observe the incorrect output and want to track how it came up, having visibility of the state would be required.
+
+Users can read an instance of state store, which is matched to a single stateful operator in most cases. This means, users can expect that they can read the entire key-value pairs in the state for a single stateful operator. 
+
+Note that there could be an exception, e.g. stream-stream join, which leverages multiple state store instances internally. The data source abstracts the internal representation away from users and
+provides a user-friendly approach to read the state. See the section for stream-stream join for more details.
+
+### Creating a State store for Batch Queries (all defaults)
+
+<div class="codetabs">
+
+<div data-lang="python" markdown="1">
+{% highlight python %}
+
+df = spark \
+.read \
+.format("statestore") \
+.load("<checkpointLocation>")
+
+{% endhighlight %}
+</div>
+
+<div data-lang="scala" markdown="1">
+{% highlight scala %}
+
+val df = spark
+.read
+.format("statestore")
+.load("<checkpointLocation>")
+
+{% endhighlight %}
+</div>
+
+<div data-lang="java" markdown="1">
+{% highlight java %}
+
+Dataset<Row> df = spark
+.read()
+.format("statestore")
+.load("<checkpointLocation>");
+
+{% endhighlight %}
+</div>
+
+</div>
+
+Each row in the source has the following schema:
+
+<table class="table table-striped">
+<thead><tr><th>Column</th><th>Type</th><th>Note</th></tr></thead>
+<tr>
+  <td>key</td>
+  <td>struct (depends on the type for state key)</td>
+  <td></td>
+</tr>
+<tr>
+  <td>value</td>
+  <td>struct (depends on the type for state value)</td>
+  <td></td>
+</tr>
+<tr>
+  <td>_partition_id</td>
+  <td>int</td>
+  <td>metadata column (hidden unless specified with SELECT)</td>
+</tr>
+</table>
+
+The nested columns for key and value heavily depend on the input schema of the stateful operator as well as the type of operator.
+Users are encouraged to query about the schema via df.schema() / df.printSchema() first to understand the type of output.
+
+The following options must be set for the source.
+
+<table class="table table-striped">
+<thead><tr><th>Option</th><th>value</th><th>meaning</th></tr></thead>
+<tr>
+  <td>path</td>
+  <td>string</td>
+  <td>Specify the root directory of the checkpoint location. You can either specify the path via option("path", `path`) or load(`path`).</td>
+</tr>
+</table>
+
+The following configurations are optional:
+
+<table class="table table-striped">
+<thead><tr><th>Option</th><th>value</th><th>default</th><th>meaning</th></tr></thead>
+<tr>
+  <td>batchId</td>
+  <td>numeric value</td>
+  <td>latest committed batch</td>
+  <td>Represents the target batch to read from. This option is used when users want to perform time-travel. The batch should be committed but not yet cleaned up.</td>
+</tr>
+<tr>
+  <td>operatorId</td>
+  <td>numeric value</td>
+  <td>0</td>
+  <td>Represents the target operator to read from. This option is used when the query is using multiple stateful operators.</td>
+</tr>
+<tr>
+  <td>storeName</td>
+  <td>string</td>
+  <td>DEFAULT</td>
+  <td>Represents the target state store name to read from. This option is used when the stateful operator uses multiple state store instances. It is not required except stream-stream join.</td>
+</tr>
+<tr>
+  <td>joinSide</td>
+  <td>string ("left" or "right")</td>
+  <td>(none)</td>
+  <td>Represents the target side to read from. This option is used when users want to read the state from stream-stream join.</td>
+</tr>
+</table>
+
+### Reading state for Stream-stream join
+
+Structured Streaming implements the stream-stream join feature via leveraging multiple instances of state store internally.
+These instances logically compose buffers to store the input rows for left and right.
+
+Since it is more obvious to users to reason about, the data source provides the option 'joinSide' to read the buffered input for specific side of the join.
+To enable the functionality to read the internal state store instance directly, we also allow specifying the option 'storeName', with restriction that 'storeName' and 'joinSide' cannot be specified together.
+
+## State metadata source
+
+Before querying the state from existing checkpoint via state data source, users would like to understand the information for the checkpoint, especially about state operator. This includes which operators and state store instances are available in the checkpoint, available range of batch IDs, etc.
+
+Structured Streaming provides a data source named "State metadata source" to provide the state-related metadata information from the checkpoint.
+
+Note: The metadata is constructed when the streaming query is running with Spark 4.0+. The existing checkpoint which has been running with lower Spark version does not have the metadata and will be unable to query/use with this metadata source. It is required to run the streaming query pointing the existing checkpoint in Spark 4.0+ to construct the metadata before querying.
+
+### Creating a State metadata store for Batch Queries
+
+<div class="codetabs">
+
+<div data-lang="python" markdown="1">
+{% highlight python %}
+
+df = spark \
+.read \
+.format("state-metadata") \
+.load("<checkpointLocation>")
+
+{% endhighlight %}
+</div>
+
+<div data-lang="scala" markdown="1">
+{% highlight scala %}
+
+val df = spark
+.read
+.format("state-metadata")
+.load("<checkpointLocation>")
+
+{% endhighlight %}
+</div>
+
+<div data-lang="java" markdown="1">
+{% highlight java %}
+
+Dataset<Row> df = spark
+.read()
+.format("state-metadata")
+.load("<checkpointLocation>");
+
+{% endhighlight %}
+</div>
+
+</div>
+
+Each row in the source has the following schema:
+
+<table class="table table-striped">
+<thead><tr><th>Column</th><th>Type</th><th>Note</th></tr></thead>
+<tr>
+  <td>operatorId</td>
+  <td>int</td>
+  <td></td>
+</tr>
+<tr>
+  <td>operatorName</td>
+  <td>string</td>
+  <td></td>
+</tr>
+<tr>
+  <td>stateStoreName</td>
+  <td>int</td>
+  <td></td>
+</tr>
+<tr>
+  <td>numPartitions</td>
+  <td>int</td>
+  <td></td>
+</tr>
+<tr>
+  <td>minBatchId</td>
+  <td>int</td>
+  <td>The minimum batch ID available for querying state. The value could be invalid if the streaming query taking the checkpoint is running, as cleanup would run.</td>
+</tr>
+<tr>
+  <td>maxBatchId</td>
+  <td>int</td>
+  <td>The maximum batch ID available for querying state. The value could be invalid if the streaming query taking the checkpoint is running, as the query will commit further batches.</td>
+</tr>
+<tr>
+  <td>_numColsPrefixKey</td>
+  <td>int</td>
+  <td>metadata column (hidden unless specified with SELECT)</td>
+</tr>
+</table>
+
+One of the major use cases of this data source is to identify the operatorId to query if the query has multiple stateful operators, e.g. stream-stream join followed by deduplication.
+The column 'operatorName' helps users to identify the operatorId for given operator.
+
+Additionally, if users want to query about an internal state store instance for a stateful operator (e.g. stream-stream join), the column 'stateStoreName' would be useful to determine the target.
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
index d7099c5c953c1..bc6fab45810eb 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
@@ -87,7 +87,7 @@ object SVDPlusPlus {
     val gJoinT0 = g.outerJoinVertices(t0) {
       (vid: VertexId, vd: (Array[Double], Array[Double], Double, Double),
        msg: Option[(Long, Double)]) =>
-        (vd._1, vd._2, msg.get._2 / msg.get._1 - u, 1.0 / scala.math.sqrt(msg.get._1))
+        (vd._1, vd._2, msg.get._2 / msg.get._1 - u, 1.0 / scala.math.sqrt(msg.get._1.toDouble))
     }.cache()
     materialize(gJoinT0)
     g.unpersist()
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala
index caa2fdcdf5d2b..666790958c353 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala
@@ -321,7 +321,7 @@ class PageRankSuite extends SparkFunSuite with LocalSparkContext {
         val rank = if (vid < source) {
           0.0
         } else {
-          a * Math.pow(1 - resetProb, vid - source)
+          a * Math.pow(1 - resetProb, vid.toDouble - source)
         }
         vid -> rank
       }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index 6e26a78e9c7e6..aa39a3e177eeb 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -1418,7 +1418,7 @@ class GeneralizedLinearRegressionSummary private[regression] (
         case Row(label: Double, pred: Double, weight: Double) =>
           (label, pred, weight)
     }
-    family.aic(t, deviance, numInstances, weightSum) + 2 * rank
+    family.aic(t, deviance, numInstances.toDouble, weightSum) + 2 * rank
   }
 }
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/ANOVATest.scala b/mllib/src/main/scala/org/apache/spark/ml/stat/ANOVATest.scala
index d7b13f1bf25f3..482bb7fdc2105 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/stat/ANOVATest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/stat/ANOVATest.scala
@@ -224,7 +224,7 @@ private[ml] object ANOVATest {
     // mean square within
     val msw = sswn / dfwn
     val fValue = msb / msw
-    val pValue = 1 - new FDistribution(dfbn, dfwn).cumulativeProbability(fValue)
+    val pValue = 1 - new FDistribution(dfbn.toDouble, dfwn.toDouble).cumulativeProbability(fValue)
     val degreesOfFreedom = dfbn + dfwn
     (pValue, degreesOfFreedom, fValue)
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/FValueTest.scala b/mllib/src/main/scala/org/apache/spark/ml/stat/FValueTest.scala
index 89579dfcbb0c3..e2ce6cf7214f7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/stat/FValueTest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/stat/FValueTest.scala
@@ -135,7 +135,7 @@ private[ml] object FValueTest {
         } else Iterator.empty
       }.reduceByKey(_ + _
       ).mapPartitions { iter =>
-        val fd = new FDistribution(1, degreesOfFreedom)
+        val fd = new FDistribution(1.0, degreesOfFreedom.toDouble)
         iter.map { case (col, sumForCov) =>
           // Cov(X,Y) = Sum(((Xi - Avg(X)) * ((Yi-Avg(Y))) / (N-1)
           val covariance = sumForCov / (numSamples - 1)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
index dbcf9017f1748..234ecbc460638 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
@@ -525,7 +525,7 @@ final class OnlineLDAOptimizer extends LDAOptimizer with Logging {
     updateLambda(batchResult, batchSize)
 
     logphatOption.foreach(_ /= nonEmptyDocsN.toDouble)
-    logphatOption.foreach(updateAlpha(_, nonEmptyDocsN))
+    logphatOption.foreach(updateAlpha(_, nonEmptyDocsN.toDouble))
 
     this
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
index ed6e3ea966b26..17b28ed3eba5d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
@@ -106,7 +106,7 @@ class StreamingKMeansModel @Since("1.2.0") (
         val numNewPoints = pointStats.iterator.map { case (_, (_, n)) =>
           n
         }.sum
-        math.pow(decayFactor, numNewPoints)
+        math.pow(decayFactor, numNewPoints.toDouble)
     }
 
     // apply discount to weights
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
index 06c7754691953..79f482347289a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
@@ -91,8 +91,8 @@ class AssociationRules private[fpm] (
       .map { case (antecedent, ((consequent, freqUnion), freqAntecedent)) =>
         new Rule(antecedent.toArray,
           consequent.toArray,
-          freqUnion,
-          freqAntecedent,
+          freqUnion.toDouble,
+          freqAntecedent.toDouble,
           // the consequent contains always only one element
           itemSupport.get(consequent.head))
       }.filter(_.confidence >= minConfidence)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
index 2bd4877ffc72e..37bf9d45f6646 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
@@ -633,7 +633,7 @@ class RowMatrix @Since("1.0.0") (
     val gamma = if (threshold < 1e-6) {
       Double.PositiveInfinity
     } else {
-      10 * math.log(numCols()) / threshold
+      10 * math.log(numCols().toDouble) / threshold
     }
 
     val summary = Statistics.colStats(rows.map((_, 1.0)), Seq("normL2"))
@@ -823,7 +823,8 @@ class RowMatrix @Since("1.0.0") (
         + s"as it's bigger than maxResultSize ($maxDriverResultSizeInBytes Bytes)")
 
     val numerator = math.log(rows.getNumPartitions)
-    val denominator = math.log(maxDriverResultSizeInBytes) - math.log(aggregatedObjectSizeInBytes)
+    val denominator = math.log(maxDriverResultSizeInBytes.toDouble) -
+      math.log(aggregatedObjectSizeInBytes.toDouble)
     val desiredTreeDepth = math.ceil(numerator / denominator)
 
     if (desiredTreeDepth > 4) {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/correlation/SpearmanCorrelation.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/correlation/SpearmanCorrelation.scala
index aa0bf51ebcd25..28c2b5d5027ab 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/correlation/SpearmanCorrelation.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/correlation/SpearmanCorrelation.scala
@@ -70,7 +70,7 @@ private[stat] object SpearmanCorrelation extends Correlation with Logging {
           val output = flush()
           preCol = j
           preVal = v
-          startRank = rank
+          startRank = rank.toDouble
           cachedUids += uid
           output
         } else {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
index ead9f887fe811..d42df3e2f0ddf 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
@@ -201,7 +201,7 @@ private[spark] object ChiSqTest extends Logging {
     counts.foreach { case ((label, value), c) =>
       val i = value2Index(value)
       val j = label2Index(label)
-      contingency.update(i, j, c)
+      contingency.update(i, j, c.toDouble)
     }
 
     ChiSqTest.chiSquaredMatrix(contingency, methodName)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTestMethod.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTestMethod.scala
index 8f3d0f8b3214c..cf0fd388fa749 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTestMethod.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTestMethod.scala
@@ -131,7 +131,7 @@ private[stat] object StudentTTest extends StreamingTestMethod with Logging {
       statsA: StatCounter,
       statsB: StatCounter): StreamingTestResult = {
     def studentDF(sample1: StatisticalSummaryValues, sample2: StatisticalSummaryValues): Double =
-      sample1.getN + sample2.getN - 2
+      sample1.getN + sample2.getN - 2.0
 
     new StreamingTestResult(
       tTester.get.homoscedasticTTest(statsA, statsB),
diff --git a/pom.xml b/pom.xml
index ac096a19804db..fce9c2b54e03a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -197,7 +197,7 @@
     <!-- org.apache.commons/commons-lang/-->
     <commons-lang2.version>2.6</commons-lang2.version>
     <!-- org.apache.commons/commons-lang3/-->
-    <commons-lang3.version>3.13.0</commons-lang3.version>
+    <commons-lang3.version>3.14.0</commons-lang3.version>
     <!-- org.apache.commons/commons-pool2/-->
     <commons-pool2.version>2.11.1</commons-pool2.version>
     <datanucleus-core.version>4.1.17</datanucleus-core.version>
@@ -2978,7 +2978,7 @@
                 TODO(SPARK-33805): Undo the corresponding deprecated usage suppression rule after fixed.
               -->
               <arg>-Wconf:msg=^(?=.*?method|value|type|object|trait|inheritance)(?=.*?deprecated)(?=.*?since 2.13).+$:e</arg>
-              <arg>-Wconf:msg=^(?=.*?Widening conversion from)(?=.*?is deprecated because it loses precision).+$:s</arg>
+              <arg>-Wconf:msg=^(?=.*?Widening conversion from)(?=.*?is deprecated because it loses precision).+$:e</arg>
               <!-- SPARK-45610 Convert "Auto-application to `()` is deprecated" to compile error, as it will become a compile error in Scala 3. -->
               <arg>-Wconf:cat=deprecation&amp;msg=Auto-application to \`\(\)\` is deprecated:e</arg>
               <!--
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index e1db7b506c51e..72ea06a8d0504 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -236,7 +236,7 @@ object SparkBuild extends PomBuild {
         // TODO(SPARK-33805): Undo the corresponding deprecated usage suppression rule after
         //  fixed.
         "-Wconf:msg=^(?=.*?method|value|type|object|trait|inheritance)(?=.*?deprecated)(?=.*?since 2.13).+$:e",
-        "-Wconf:msg=^(?=.*?Widening conversion from)(?=.*?is deprecated because it loses precision).+$:s",
+        "-Wconf:msg=^(?=.*?Widening conversion from)(?=.*?is deprecated because it loses precision).+$:e",
         // SPARK-45610 Convert "Auto-application to `()` is deprecated" to compile error, as it will become a compile error in Scala 3.
         "-Wconf:cat=deprecation&msg=Auto-application to \\`\\(\\)\\` is deprecated:e",
         // TODO(SPARK-45615): The issue described by https://github.com/scalatest/scalatest/issues/2297 can cause false positives.
diff --git a/python/docs/source/_static/css/pyspark.css b/python/docs/source/_static/css/pyspark.css
index ccfe60f2bca64..2743629ff61c2 100644
--- a/python/docs/source/_static/css/pyspark.css
+++ b/python/docs/source/_static/css/pyspark.css
@@ -24,7 +24,7 @@ body {
 }
 
 h1,h2 {
-    color:#1B5162!important;
+    color:#17A2B8!important;
 }
 
 h3 {
diff --git a/python/docs/source/_static/spark-logo-dark.png b/python/docs/source/_static/spark-logo-dark.png
new file mode 100644
index 0000000000000..7460faec37fc7
Binary files /dev/null and b/python/docs/source/_static/spark-logo-dark.png differ
diff --git a/python/docs/source/_static/spark-logo-light.png b/python/docs/source/_static/spark-logo-light.png
new file mode 100644
index 0000000000000..41938560822ca
Binary files /dev/null and b/python/docs/source/_static/spark-logo-light.png differ
diff --git a/python/docs/source/_templates/autosummary/accessor_attribute.rst b/python/docs/source/_templates/autosummary/accessor_attribute.rst
new file mode 100644
index 0000000000000..28a94614b98f5
--- /dev/null
+++ b/python/docs/source/_templates/autosummary/accessor_attribute.rst
@@ -0,0 +1,6 @@
+{{ fullname }}
+{{ underline }}
+
+.. currentmodule:: {{ module + "." + objname.split(".")[0] }}
+
+.. autoattribute:: {{ ".".join(objname.split(".")[1:]) }}
diff --git a/python/docs/source/_templates/autosummary/accessor_method.rst b/python/docs/source/_templates/autosummary/accessor_method.rst
new file mode 100644
index 0000000000000..dce014d7b5da9
--- /dev/null
+++ b/python/docs/source/_templates/autosummary/accessor_method.rst
@@ -0,0 +1,6 @@
+{{ fullname }}
+{{ underline }}
+
+.. currentmodule:: {{ module + "." + objname.split(".")[0] }}
+
+.. automethod:: {{ ".".join(objname.split(".")[1:]) }}
diff --git a/python/docs/source/_templates/autosummary/class_with_docs.rst b/python/docs/source/_templates/autosummary/class_with_docs.rst
index 7c37b83c0e90e..1141fa68a256b 100644
--- a/python/docs/source/_templates/autosummary/class_with_docs.rst
+++ b/python/docs/source/_templates/autosummary/class_with_docs.rst
@@ -47,7 +47,9 @@
 
     .. autosummary::
     {% for item in attributes %}
-       ~{{ name }}.{{ item }}
+        {% if not (item == 'uid') %}
+           ~{{ name }}.{{ item }}
+        {% endif %}
     {%- endfor %}
 
     {% endif %}
diff --git a/python/docs/source/_templates/autosummary/plot_class.rst b/python/docs/source/_templates/autosummary/plot_class.rst
new file mode 100644
index 0000000000000..5e6a73bd0ecc2
--- /dev/null
+++ b/python/docs/source/_templates/autosummary/plot_class.rst
@@ -0,0 +1,53 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+{{ fullname }}
+{{ underline }}
+
+.. currentmodule:: {{ module + "." + objname.split(".")[0] }}
+
+.. automethod:: {{ ".".join(objname.split(".")[1:]) }}
+
+{% if '__init__' in methods %}
+  {% set caught_result = methods.remove('__init__') %}
+{% endif %}
+
+{% block methods %}
+{% if methods %}
+
+   .. rubric:: Methods
+
+   .. autosummary::
+      {% for item in methods %}
+         ~{{ name.split(".")[1] }}.{{ item }}
+      {%- endfor %}
+
+{% endif %}
+{% endblock %}
+
+{% block attributes_summary %}
+{% if attributes %}
+
+   .. rubric:: Attributes
+   
+   .. autosummary::
+      {% for item in attributes %}
+         ~{{ name.split(".")[1] }}.{{ item }}
+      {%- endfor %}
+
+{% endif %}
+{% endblock %}
diff --git a/python/docs/source/_templates/spark_footer.html b/python/docs/source/_templates/spark_footer.html
new file mode 100644
index 0000000000000..684482b0c2cdf
--- /dev/null
+++ b/python/docs/source/_templates/spark_footer.html
@@ -0,0 +1,3 @@
+<p class="copyright">
+    {{copyright}} The Apache Software Foundation, Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.
+</p>
diff --git a/python/docs/source/conf.py b/python/docs/source/conf.py
index b9884d55b3a1e..de7ab953c5386 100644
--- a/python/docs/source/conf.py
+++ b/python/docs/source/conf.py
@@ -12,6 +12,7 @@
 # All configuration values have a default; values that are commented out
 # serve to show the default.
 
+from datetime import datetime
 import sys
 import os
 import shutil
@@ -124,7 +125,8 @@
 
 # General information about the project.
 project = 'PySpark'
-copyright = ''
+# We have our custom "spark_footer.html" template, using copyright for the current year.
+copyright = f"Copyright @ {datetime.now().year}"
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
@@ -194,7 +196,12 @@
 # further.  For a list of options available for each theme, see the
 # documentation.
 html_theme_options = {
-    "navbar_end": ["version-switcher"]
+    "footer_start": ["spark_footer", "sphinx-version"],
+    "navbar_end": ["version-switcher", "theme-switcher"],
+    "logo": {
+        "image_light": "_static/spark-logo-light.png",
+        "image_dark": "_static/spark-logo-dark.png",
+    }
 }
 
 # Add any paths that contain custom themes here, relative to this directory.
diff --git a/python/docs/source/index.rst b/python/docs/source/index.rst
index b3233744c5eb1..72a846290fe9e 100644
--- a/python/docs/source/index.rst
+++ b/python/docs/source/index.rst
@@ -24,7 +24,7 @@ PySpark Overview
 **Date**: |today| **Version**: |release|
 
 **Useful links**:
-|binder|_ | `GitHub <https://github.com/apache/spark>`_ | `Issues <https://issues.apache.org/jira/projects/SPARK/issues>`_ | |examples|_ | `Community <https://spark.apache.org/community.html>`_
+|binder|_ | `GitHub <https://github.com/apache/spark>`_ | `Issues <https://issues.apache.org/jira/projects/SPARK/issues>`_ | |examples|_ | `Community <https://spark.apache.org/community.html>`_ | `Stack Overflow <https://stackoverflow.com/questions/tagged/pyspark>`_ | `Dev Mailing List <https://lists.apache.org/list.html?dev@spark.apache.org>`_ | `User Mailing List <https://lists.apache.org/list.html?user@spark.apache.org>`_
 
 PySpark is the Python API for Apache Spark. It enables you to perform real-time,
 large-scale data processing in a distributed environment using Python. It also provides a PySpark
diff --git a/python/docs/source/reference/pyspark.errors.rst b/python/docs/source/reference/pyspark.errors.rst
index 89ca7373a4f3f..9f28d134cf2db 100644
--- a/python/docs/source/reference/pyspark.errors.rst
+++ b/python/docs/source/reference/pyspark.errors.rst
@@ -43,6 +43,7 @@ Classes
     PySparkRuntimeError
     PySparkTypeError
     PySparkValueError
+    PySparkIndexError
     PythonException
     QueryExecutionException
     SparkRuntimeException
diff --git a/python/docs/source/reference/pyspark.pandas/frame.rst b/python/docs/source/reference/pyspark.pandas/frame.rst
index 911999b56be5e..12cf6e7db12fc 100644
--- a/python/docs/source/reference/pyspark.pandas/frame.rst
+++ b/python/docs/source/reference/pyspark.pandas/frame.rst
@@ -299,6 +299,7 @@ in Spark. These can be accessed by ``DataFrame.spark.<function/property>``.
 
 .. autosummary::
    :toctree: api/
+   :template: autosummary/accessor_method.rst
 
    DataFrame.spark.frame
    DataFrame.spark.cache
@@ -319,8 +320,8 @@ specific plotting methods of the form ``DataFrame.plot.<kind>``.
 
 .. autosummary::
    :toctree: api/
+   :template: autosummary/accessor_method.rst
 
-   DataFrame.plot
    DataFrame.plot.area
    DataFrame.plot.barh
    DataFrame.plot.bar
@@ -330,6 +331,10 @@ specific plotting methods of the form ``DataFrame.plot.<kind>``.
    DataFrame.plot.pie
    DataFrame.plot.scatter
    DataFrame.plot.density
+
+.. autosummary::
+   :toctree: api/
+
    DataFrame.hist
    DataFrame.boxplot
    DataFrame.kde
@@ -341,6 +346,7 @@ These can be accessed by ``DataFrame.pandas_on_spark.<function/property>``.
 
 .. autosummary::
    :toctree: api/
+   :template: autosummary/accessor_method.rst
 
    DataFrame.pandas_on_spark.apply_batch
    DataFrame.pandas_on_spark.transform_batch
diff --git a/python/docs/source/reference/pyspark.pandas/indexing.rst b/python/docs/source/reference/pyspark.pandas/indexing.rst
index 7ec4387bb679a..301e849ffe28a 100644
--- a/python/docs/source/reference/pyspark.pandas/indexing.rst
+++ b/python/docs/source/reference/pyspark.pandas/indexing.rst
@@ -129,8 +129,14 @@ in Spark. These can be accessed by ``Index.spark.<function/property>``.
 
 .. autosummary::
    :toctree: api/
+   :template: autosummary/accessor_attribute.rst
 
    Index.spark.column
+
+.. autosummary::
+   :toctree: api/
+   :template: autosummary/accessor_method.rst
+
    Index.spark.transform
 
 Sorting
@@ -308,9 +314,15 @@ in Spark. These can be accessed by ``MultiIndex.spark.<function/property>``.
 
 .. autosummary::
    :toctree: api/
+   :template: autosummary/accessor_attribute.rst
 
    MultiIndex.spark.data_type
    MultiIndex.spark.column
+
+.. autosummary::
+   :toctree: api/
+   :template: autosummary/accessor_method.rst
+
    MultiIndex.spark.transform
 
 MultiIndex Sorting
diff --git a/python/docs/source/reference/pyspark.pandas/io.rst b/python/docs/source/reference/pyspark.pandas/io.rst
index 118dd49a4ada9..fd41a03699cac 100644
--- a/python/docs/source/reference/pyspark.pandas/io.rst
+++ b/python/docs/source/reference/pyspark.pandas/io.rst
@@ -69,6 +69,11 @@ Generic Spark I/O
    :toctree: api/
 
    read_spark_io
+
+.. autosummary::
+   :toctree: api/
+   :template: autosummary/accessor_method.rst
+
    DataFrame.spark.to_spark_io
 
 Flat File / CSV
diff --git a/python/docs/source/reference/pyspark.pandas/series.rst b/python/docs/source/reference/pyspark.pandas/series.rst
index 01fb5aa87fb15..88d1861c6ccf0 100644
--- a/python/docs/source/reference/pyspark.pandas/series.rst
+++ b/python/docs/source/reference/pyspark.pandas/series.rst
@@ -270,8 +270,14 @@ in Spark. These can be accessed by ``Series.spark.<function/property>``.
 
 .. autosummary::
    :toctree: api/
+   :template: autosummary/accessor_attribute.rst
 
    Series.spark.column
+
+.. autosummary::
+   :toctree: api/
+   :template: autosummary/accessor_method.rst
+
    Series.spark.transform
    Series.spark.apply
 
@@ -304,6 +310,7 @@ Datetime Properties
 
 .. autosummary::
    :toctree: api/
+   :template: autosummary/accessor_attribute.rst
 
    Series.dt.date
    Series.dt.year
@@ -333,6 +340,7 @@ Datetime Methods
 
 .. autosummary::
    :toctree: api/
+   :template: autosummary/accessor_method.rst
 
    Series.dt.normalize
    Series.dt.strftime
@@ -353,6 +361,7 @@ like ``Series.str.<function/property>``.
 
 .. autosummary::
    :toctree: api/
+   :template: autosummary/accessor_method.rst
 
    Series.str.capitalize
    Series.str.cat
@@ -416,10 +425,16 @@ the ``Series.cat`` accessor.
 
 .. autosummary::
    :toctree: api/
+   :template: autosummary/accessor_attribute.rst
 
    Series.cat.categories
    Series.cat.ordered
    Series.cat.codes
+
+.. autosummary::
+   :toctree: api/
+   :template: autosummary/accessor_method.rst
+
    Series.cat.rename_categories
    Series.cat.reorder_categories
    Series.cat.add_categories
@@ -438,8 +453,8 @@ specific plotting methods of the form ``Series.plot.<kind>``.
 
 .. autosummary::
    :toctree: api/
+   :template: autosummary/accessor_method.rst
 
-   Series.plot
    Series.plot.area
    Series.plot.bar
    Series.plot.barh
@@ -449,6 +464,10 @@ specific plotting methods of the form ``Series.plot.<kind>``.
    Series.plot.line
    Series.plot.pie
    Series.plot.kde
+
+.. autosummary::
+   :toctree: api/
+
    Series.hist
 
 Serialization / IO / Conversion
@@ -476,6 +495,7 @@ These can be accessed by ``Series.pandas_on_spark.<function/property>``.
 
 .. autosummary::
    :toctree: api/
+   :template: autosummary/accessor_method.rst
 
    Series.pandas_on_spark.transform_batch
 
diff --git a/python/docs/source/reference/pyspark.sql/spark_session.rst b/python/docs/source/reference/pyspark.sql/spark_session.rst
index f25dbab5f6b9b..f242e4439cf4c 100644
--- a/python/docs/source/reference/pyspark.sql/spark_session.rst
+++ b/python/docs/source/reference/pyspark.sql/spark_session.rst
@@ -29,12 +29,21 @@ See also :class:`SparkSession`.
     :toctree: api/
 
     SparkSession.active
+
+.. autosummary::
+    :toctree: api/
+    :template: autosummary/accessor_method.rst
+
     SparkSession.builder.appName
     SparkSession.builder.config
     SparkSession.builder.enableHiveSupport
     SparkSession.builder.getOrCreate
     SparkSession.builder.master
     SparkSession.builder.remote
+
+.. autosummary::
+    :toctree: api/
+
     SparkSession.catalog
     SparkSession.conf
     SparkSession.createDataFrame
@@ -58,8 +67,13 @@ Spark Connect Only
 
 .. autosummary::
     :toctree: api/
+    :template: autosummary/accessor_method.rst
 
     SparkSession.builder.create
+
+.. autosummary::
+    :toctree: api/
+
     SparkSession.addArtifact
     SparkSession.addArtifacts
     SparkSession.copyFromLocalToFs
diff --git a/python/pyspark/errors/__init__.py b/python/pyspark/errors/__init__.py
index 6d9d452ddd056..a8b7191c166ab 100644
--- a/python/pyspark/errors/__init__.py
+++ b/python/pyspark/errors/__init__.py
@@ -38,6 +38,7 @@
     SparkNoSuchElementException,
     PySparkTypeError,
     PySparkValueError,
+    PySparkIndexError,
     PySparkAttributeError,
     PySparkRuntimeError,
     PySparkAssertionError,
@@ -66,6 +67,7 @@
     "SparkNoSuchElementException",
     "PySparkTypeError",
     "PySparkValueError",
+    "PySparkIndexError",
     "PySparkAttributeError",
     "PySparkRuntimeError",
     "PySparkAssertionError",
diff --git a/python/pyspark/errors/error_classes.py b/python/pyspark/errors/error_classes.py
index 70e88c18f9dc1..6662efa8ca54b 100644
--- a/python/pyspark/errors/error_classes.py
+++ b/python/pyspark/errors/error_classes.py
@@ -76,7 +76,7 @@
   },
   "CANNOT_BE_NONE": {
     "message": [
-      "Argument `<arg_name>` can not be None."
+      "Argument `<arg_name>` cannot be None."
     ]
   },
   "CANNOT_CONVERT_COLUMN_INTO_BOOL": {
@@ -242,6 +242,16 @@
       " must be set to `true` to enable Python profile."
     ]
   },
+  "INDEX_NOT_POSITIVE" : {
+    "message" : [
+      "Index must be positive, got '<index>'."
+    ]
+  },
+  "INDEX_OUT_OF_RANGE" : {
+    "message" : [
+      "<arg_name> index out of range, got '<index>'."
+    ]
+  },
   "INVALID_ARROW_UDTF_RETURN_TYPE" : {
     "message" : [
       "The return type of the arrow-optimized Python UDTF should be of type 'pandas.DataFrame', but the '<func>' method returned a value of type <type_name> with value: <value>."
@@ -267,6 +277,11 @@
       "All items in `<arg_name>` should be in <allowed_types>, got <item_type>."
     ]
   },
+  "INVALID_MULTIPLE_ARGUMENT_CONDITIONS" : {
+    "message" : [
+      "[{arg_names}] cannot be <condition>."
+    ]
+  },
   "INVALID_NDARRAY_DIMENSION": {
     "message": [
       "NumPy array input should be of <dimensions> dimensions."
@@ -287,6 +302,11 @@
       "Pandas UDF should return StructType for <eval_type>, got <return_type>."
     ]
   },
+  "INVALID_SESSION_UUID_ID": {
+    "message": [
+      "Parameter value <arg_name> must be a valid UUID format: <origin>"
+    ]
+  },
   "INVALID_TIMEOUT_TIMESTAMP" : {
     "message" : [
       "Timeout timestamp (<timestamp>) cannot be earlier than the current watermark (<watermark>)."
@@ -602,6 +622,11 @@
       "Argument `<arg_name>` should be a str, got <arg_type>."
     ]
   },
+  "NOT_STRUCT" : {
+    "message" : [
+      "Argument `<arg_name>` should be a struct type, got <arg_type>."
+    ]
+  },
   "NOT_STR_OR_LIST_OF_RDD" : {
     "message" : [
       "Argument `<arg_name>` should be a str or list[RDD], got <arg_type>."
@@ -609,7 +634,7 @@
   },
   "NOT_STR_OR_STRUCT" : {
     "message" : [
-      "Argument `<arg_name>` should be a str or structType, got <arg_type>."
+      "Argument `<arg_name>` should be a str or struct type, got <arg_type>."
     ]
   },
   "NOT_WINDOWSPEC" : {
@@ -627,6 +652,11 @@
       "No active Spark session found. Please create a new Spark session before running the code."
     ]
   },
+  "NO_SCHEMA_AND_DRIVER_DEFAULT_SCHEME" : {
+    "message" : [
+      "Only allows <arg_name> to be a path without scheme, and Spark Driver should use the default scheme to determine the destination file system."
+    ]
+  },
   "ONLY_ALLOWED_FOR_SINGLE_COLUMN" : {
     "message" : [
       "Argument `<arg_name>` can only be provided for a single column."
@@ -703,6 +733,11 @@
       "Cannot start a remote Spark session because there is a regular Spark session already running."
     ]
   },
+  "SESSION_NEED_CONN_STR_OR_BUILDER" : {
+    "message" : [
+      "Needs either connection string or channelBuilder (mutually exclusive) to create a new SparkSession."
+    ]
+  },
   "SESSION_NOT_SAME" : {
     "message" : [
       "Both Datasets must belong to the same SparkSession."
@@ -748,6 +783,11 @@
       "Expected <expected> values for `<item>`, got <actual>."
     ]
   },
+  "TYPE_HINT_REQUIRED" : {
+    "message" : [
+      "A <arg_type> is required <where>."
+    ]
+  },
   "UDF_RETURN_TYPE" : {
     "message" : [
       "Return type of the user-defined function should be <expected>, but is <actual>."
@@ -868,6 +908,11 @@
       "<feature> is not supported with Arrow optimization enabled in Python UDFs. Disable 'spark.sql.execution.pythonUDF.arrow.enabled' to workaround.."
     ]
   },
+  "VALUE_ALLOWED" : {
+    "message" : [
+      "Value for `<arg_name>` does not allow <disallowed_value>."
+    ]
+  },
   "VALUE_NOT_ACCESSIBLE": {
     "message": [
       "Value `<value>` cannot be accessed inside tasks."
diff --git a/python/pyspark/errors/exceptions/base.py b/python/pyspark/errors/exceptions/base.py
index c63b9dbee87d0..c84ca17c3dbd5 100644
--- a/python/pyspark/errors/exceptions/base.py
+++ b/python/pyspark/errors/exceptions/base.py
@@ -214,6 +214,12 @@ class PySparkTypeError(PySparkException, TypeError):
     """
 
 
+class PySparkIndexError(PySparkException, IndexError):
+    """
+    Wrapper class for IndexError to support error classes.
+    """
+
+
 class PySparkAttributeError(PySparkException, AttributeError):
     """
     Wrapper class for AttributeError to support error classes.
diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py
index 73696ab46f878..df756f848429d 100644
--- a/python/pyspark/mllib/evaluation.py
+++ b/python/pyspark/mllib/evaluation.py
@@ -462,7 +462,7 @@ def __init__(
         sc = predictionAndLabels.ctx
         sql_ctx = SQLContext.getOrCreate(sc)
         df = sql_ctx.createDataFrame(
-            predictionAndLabels, schema=sql_ctx._inferSchema(predictionAndLabels)
+            predictionAndLabels, schema=sql_ctx.sparkSession._inferSchema(predictionAndLabels)
         )
         java_model = callMLlibFunc("newRankingMetrics", df._jdf)
         super(RankingMetrics, self).__init__(java_model)
@@ -576,7 +576,7 @@ def __init__(self, predictionAndLabels: RDD[Tuple[List[float], List[float]]]):
         sc = predictionAndLabels.ctx
         sql_ctx = SQLContext.getOrCreate(sc)
         df = sql_ctx.createDataFrame(
-            predictionAndLabels, schema=sql_ctx._inferSchema(predictionAndLabels)
+            predictionAndLabels, schema=sql_ctx.sparkSession._inferSchema(predictionAndLabels)
         )
         assert sc._jvm is not None
         java_class = sc._jvm.org.apache.spark.mllib.evaluation.MultilabelMetrics
diff --git a/python/pyspark/pandas/data_type_ops/categorical_ops.py b/python/pyspark/pandas/data_type_ops/categorical_ops.py
index bbaded42be905..824666b5819b3 100644
--- a/python/pyspark/pandas/data_type_ops/categorical_ops.py
+++ b/python/pyspark/pandas/data_type_ops/categorical_ops.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 #
 
+from itertools import chain
 from typing import cast, Any, Union
 
 import pandas as pd
@@ -134,7 +135,7 @@ def _to_cat(index_ops: IndexOpsLike) -> IndexOpsLike:
     if len(categories) == 0:
         scol = F.lit(None)
     else:
-        scol = F.lit(None)
-        for code, category in reversed(list(enumerate(categories))):
-            scol = F.when(index_ops.spark.column == F.lit(code), F.lit(category)).otherwise(scol)
+        kvs = chain(*[(F.lit(code), F.lit(category)) for code, category in enumerate(categories)])
+        map_scol = F.create_map(*kvs)
+        scol = map_scol[index_ops.spark.column]
     return index_ops._with_new_scol(scol)
diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index 4ecc85ce8f795..b53f5adfbaa81 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -2512,9 +2512,8 @@ def to_dict(self, orient: str = "dict", into: Type = dict) -> Union[List, Mappin
         You can also specify the mapping type.
 
         >>> from collections import OrderedDict, defaultdict
-        >>> df.to_dict(into=OrderedDict)
-        OrderedDict([('col1', OrderedDict([('row1', 1), ('row2', 2)])), \
-('col2', OrderedDict([('row1', 0.5), ('row2', 0.75)]))])
+        >>> df.to_dict(into=OrderedDict)  # doctest: +ELLIPSIS
+        OrderedDict(...)
 
         If you want a `defaultdict`, you need to initialize it:
 
diff --git a/python/pyspark/pandas/supported_api_gen.py b/python/pyspark/pandas/supported_api_gen.py
index a83731db8fc16..1f893520d2cef 100644
--- a/python/pyspark/pandas/supported_api_gen.py
+++ b/python/pyspark/pandas/supported_api_gen.py
@@ -33,13 +33,11 @@
 from pyspark.loose_version import LooseVersion
 from pyspark.pandas.exceptions import PandasNotImplementedError
 
+# Constants
 MAX_MISSING_PARAMS_SIZE = 5
-COMMON_PARAMETER_SET = {
-    "kwargs",
-    "args",
-    "cls",
-}  # These are not counted as missing parameters.
+COMMON_PARAMETER_SET = {"kwargs", "args", "cls"}
 MODULE_GROUP_MATCH = [(pd, ps), (pdw, psw), (pdg, psg)]
+PANDAS_LATEST_VERSION = "2.1.3"
 
 RST_HEADER = """
 =====================
@@ -73,6 +71,10 @@
 
 @unique
 class Implemented(Enum):
+    """
+    Enumeration of implementation statuses.
+    """
+
     IMPLEMENTED = "Y"
     NOT_IMPLEMENTED = "N"
     PARTIALLY_IMPLEMENTED = "P"
@@ -80,7 +82,7 @@ class Implemented(Enum):
 
 class SupportedStatus(NamedTuple):
     """
-    Defines a supported status for specific pandas API
+    Defines a supported status for specific pandas API.
     """
 
     implemented: str
@@ -89,47 +91,108 @@ class SupportedStatus(NamedTuple):
 
 def generate_supported_api(output_rst_file_path: str) -> None:
     """
-    Generate supported APIs status dictionary.
+    Generate the supported APIs status dictionary and write it to an RST file.
 
     Parameters
     ----------
     output_rst_file_path : str
         The path to the document file in RST format.
+    """
+    _check_pandas_version()
+    all_supported_status = _collect_supported_status()
+    _write_rst(output_rst_file_path, all_supported_status)
+
 
-    Write supported APIs documentation.
+def _check_pandas_version() -> None:
     """
-    pandas_latest_version = "2.1.3"
-    if LooseVersion(pd.__version__) != LooseVersion(pandas_latest_version):
+    Check if the installed pandas version matches the expected version.
+    """
+    if LooseVersion(pd.__version__) != LooseVersion(PANDAS_LATEST_VERSION):
         msg = (
-            "Warning: Latest version of pandas (%s) is required to generate the documentation; "
-            "however, your version was %s" % (pandas_latest_version, pd.__version__)
+            f"Warning: pandas {PANDAS_LATEST_VERSION} is required; your version is {pd.__version__}"
         )
         warnings.warn(msg, UserWarning)
         raise ImportError(msg)
 
+
+def _collect_supported_status() -> Dict[Tuple[str, str], Dict[str, SupportedStatus]]:
+    """
+    Collect the supported status across multiple module paths.
+    """
     all_supported_status: Dict[Tuple[str, str], Dict[str, SupportedStatus]] = {}
     for pd_module_group, ps_module_group in MODULE_GROUP_MATCH:
         pd_modules = _get_pd_modules(pd_module_group)
         _update_all_supported_status(
             all_supported_status, pd_modules, pd_module_group, ps_module_group
         )
-    _write_rst(output_rst_file_path, all_supported_status)
+    return all_supported_status
+
+
+def _get_pd_modules(pd_module_group: Any) -> List[str]:
+    """
+    Get sorted list of pandas member names from a pandas module.
+
+    Parameters
+    ----------
+    pd_module_group : Any
+        Importable pandas module.
+
+    Returns
+    -------
+    List[str]
+        Sorted list of member names.
+    """
+    return sorted(m[0] for m in getmembers(pd_module_group, isclass) if not m[0].startswith("_"))
+
+
+def _update_all_supported_status(
+    all_supported_status: Dict[Tuple[str, str], Dict[str, SupportedStatus]],
+    pd_modules: List[str],
+    pd_module_group: Any,
+    ps_module_group: Any,
+) -> None:
+    """
+    Update the supported status dictionary with status from multiple modules.
+
+    Parameters
+    ----------
+    all_supported_status : Dict[Tuple[str, str], Dict[str, SupportedStatus]]
+        The dictionary to update with supported statuses.
+    pd_modules : List[str]
+        List of module names in pandas.
+    pd_module_group : Any
+        Importable pandas module group.
+    ps_module_group : Any
+        Corresponding pyspark.pandas module group.
+    """
+    pd_modules.append("")  # Include General Function APIs
+    for module_name in pd_modules:
+        supported_status = _create_supported_by_module(
+            module_name, pd_module_group, ps_module_group
+        )
+        if supported_status:
+            all_supported_status[(module_name, ps_module_group.__name__)] = supported_status
 
 
 def _create_supported_by_module(
     module_name: str, pd_module_group: Any, ps_module_group: Any
 ) -> Dict[str, SupportedStatus]:
     """
-    Retrieves supported status of pandas module
+    Create a dictionary of supported status for a specific pandas module.
 
     Parameters
     ----------
     module_name : str
-        Class name that exists in the path of the module.
+        Name of the module in pandas.
     pd_module_group : Any
-        Specific path of importable pandas module.
-    ps_module_group: Any
-        Specific path of importable pyspark.pandas module.
+        Importable pandas module.
+    ps_module_group : Any
+        Corresponding pyspark.pandas module.
+
+    Returns
+    -------
+    Dict[str, SupportedStatus]
+        Dictionary of supported status for the module.
     """
     pd_module = getattr(pd_module_group, module_name) if module_name else pd_module_group
     try:
@@ -138,23 +201,11 @@ def _create_supported_by_module(
         # module not implemented
         return {}
 
-    pd_funcs = dict(
-        [
-            m
-            for m in getmembers(pd_module, isfunction)
-            if not m[0].startswith("_") and m[0] in pd_module.__dict__
-        ]
-    )
+    pd_funcs = dict([m for m in getmembers(pd_module, isfunction) if not m[0].startswith("_")])
     if not pd_funcs:
         return {}
 
-    ps_funcs = dict(
-        [
-            m
-            for m in getmembers(ps_module, isfunction)
-            if not m[0].startswith("_") and m[0] in ps_module.__dict__
-        ]
-    )
+    ps_funcs = dict([m for m in getmembers(ps_module, isfunction) if not m[0].startswith("_")])
 
     return _organize_by_implementation_status(
         module_name, pd_funcs, ps_funcs, pd_module_group, ps_module_group
@@ -169,7 +220,7 @@ def _organize_by_implementation_status(
     ps_module_group: Any,
 ) -> Dict[str, SupportedStatus]:
     """
-    Check the implementation status and parameters of both modules.
+    Organize functions by implementation status between pandas and pyspark.pandas.
 
     Parameters
     ----------
@@ -183,6 +234,11 @@ def _organize_by_implementation_status(
         Specific path of importable pandas module.
     ps_module_group: Any
         Specific path of importable pyspark.pandas module.
+
+    Returns
+    -------
+    Dict[str, SupportedStatus]
+        Dictionary of implementation status.
     """
     pd_dict = {}
     for pd_func_name, pd_func in pd_funcs.items():
@@ -226,7 +282,7 @@ def _transform_missing(
     ps_module_path: str,
 ) -> str:
     """
-    Transform missing parameters into table information string.
+    Transform missing parameters into a formatted string for table display.
 
     Parameters
     ----------
@@ -241,6 +297,11 @@ def _transform_missing(
     ps_module_path : str
         Path string of pyspark.pandas module.
 
+    Returns
+    -------
+    str
+        Formatted string representing missing parameters.
+
     Examples
     --------
     >>> _transform_missing("DataFrame", "add", {"axis", "fill_value", "level"},
@@ -263,47 +324,6 @@ def _transform_missing(
     return missing_str
 
 
-def _get_pd_modules(pd_module_group: Any) -> List[str]:
-    """
-    Returns sorted pandas member list from pandas module path.
-
-    Parameters
-    ----------
-    pd_module_group : Any
-        Specific path of importable pandas module.
-    """
-    return sorted([m[0] for m in getmembers(pd_module_group, isclass) if not m[0].startswith("_")])
-
-
-def _update_all_supported_status(
-    all_supported_status: Dict[Tuple[str, str], Dict[str, SupportedStatus]],
-    pd_modules: List[str],
-    pd_module_group: Any,
-    ps_module_group: Any,
-) -> None:
-    """
-    Updates supported status across multiple module paths.
-
-    Parameters
-    ----------
-    all_supported_status: Dict[Tuple[str, str], Dict[str, SupportedStatus]]
-        Data that stores the supported status across multiple module paths.
-    pd_modules: List[str]
-        Name list of pandas modules.
-    pd_module_group : Any
-        Specific path of importable pandas module.
-    ps_module_group: Any
-        Specific path of importable pyspark.pandas module.
-    """
-    pd_modules += [""]  # for General Function APIs
-    for module_name in pd_modules:
-        supported_status = _create_supported_by_module(
-            module_name, pd_module_group, ps_module_group
-        )
-        if supported_status:
-            all_supported_status[(module_name, ps_module_group.__name__)] = supported_status
-
-
 def _write_table(
     module_name: str,
     module_path: str,
@@ -311,7 +331,18 @@ def _write_table(
     w_fd: TextIO,
 ) -> None:
     """
-    Write table by using Sphinx list-table directive.
+    Write the support status in a table format using Sphinx list-table directive.
+
+    Parameters
+    ----------
+    module_name : str
+        The name of the module whose support status is being documented.
+    module_path : str
+        The import path of the module in the documentation.
+    supported_status : Dict[str, SupportedStatus]
+        A dictionary mapping each function name to its support status.
+    w_fd : TextIO
+        An open file descriptor where the table will be written.
     """
     lines = []
     if module_name:
@@ -348,7 +379,17 @@ def _write_table(
 
 def _escape_func_str(func_str: str) -> str:
     """
-    Transforms which affecting rst data format.
+    Escape function names to conform to RST format.
+
+    Parameters
+    ----------
+    func_str : str
+        Function name to escape.
+
+    Returns
+    -------
+    str
+        Escaped function name.
     """
     # TODO: Take into account that this function can create links incorrectly
     # We can create alias links or links to parent methods
@@ -363,7 +404,14 @@ def _write_rst(
     all_supported_status: Dict[Tuple[str, str], Dict[str, SupportedStatus]],
 ) -> None:
     """
-    Writes the documentation to the target file path.
+    Write the final RST file with the collected support status.
+
+    Parameters
+    ----------
+    output_rst_file_path : str
+        Path to the output RST file.
+    all_supported_status : Dict
+        Collected support status data.
     """
     with open(output_rst_file_path, "w") as w_fd:
         w_fd.write(RST_HEADER)
diff --git a/python/pyspark/sql/catalog.py b/python/pyspark/sql/catalog.py
index 1886f810ce05c..b5337734b3b80 100644
--- a/python/pyspark/sql/catalog.py
+++ b/python/pyspark/sql/catalog.py
@@ -19,6 +19,7 @@
 import warnings
 from typing import Any, Callable, NamedTuple, List, Optional, TYPE_CHECKING
 
+from pyspark.errors import PySparkTypeError
 from pyspark.storagelevel import StorageLevel
 from pyspark.sql.dataframe import DataFrame
 from pyspark.sql.session import SparkSession
@@ -851,7 +852,13 @@ def createTable(
             df = self._jcatalog.createTable(tableName, source, description, options)
         else:
             if not isinstance(schema, StructType):
-                raise TypeError("schema should be StructType")
+                raise PySparkTypeError(
+                    error_class="NOT_STRUCT",
+                    message_parameters={
+                        "arg_name": "schema",
+                        "arg_type": type(schema).__name__,
+                    },
+                )
             scala_datatype = self._jsparkSession.parseDataType(schema.json())
             df = self._jcatalog.createTable(tableName, source, scala_datatype, description, options)
         return DataFrame(df, self._sparkSession)
diff --git a/python/pyspark/sql/conf.py b/python/pyspark/sql/conf.py
index b00f534eb48db..e77039565dd15 100644
--- a/python/pyspark/sql/conf.py
+++ b/python/pyspark/sql/conf.py
@@ -20,8 +20,9 @@
 
 from py4j.java_gateway import JavaObject
 
-from pyspark import since, _NoValue
+from pyspark import _NoValue
 from pyspark._globals import _NoValueType
+from pyspark.errors import PySparkTypeError
 
 
 class RuntimeConfig:
@@ -84,12 +85,12 @@ def get(
         >>> spark.conf.get("my_key")
         'my_value'
         """
-        self._checkType(key, "key")
+        self._check_type(key, "key")
         if default is _NoValue:
             return self._jconf.get(key)
         else:
             if default is not None:
-                self._checkType(default, "default")
+                self._check_type(default, "default")
             return self._jconf.get(key, default)
 
     def unset(self, key: str) -> None:
@@ -116,17 +117,22 @@ def unset(self, key: str) -> None:
         """
         self._jconf.unset(key)
 
-    def _checkType(self, obj: Any, identifier: str) -> None:
+    def _check_type(self, obj: Any, identifier: str) -> None:
         """Assert that an object is of type str."""
         if not isinstance(obj, str):
-            raise TypeError(
-                "expected %s '%s' to be a string (was '%s')" % (identifier, obj, type(obj).__name__)
+            raise PySparkTypeError(
+                error_class="NOT_STR",
+                message_parameters={
+                    "arg_name": identifier,
+                    "arg_type": type(obj).__name__,
+                },
             )
 
-    @since(2.4)
     def isModifiable(self, key: str) -> bool:
         """Indicates whether the configuration property with the given key
         is modifiable in the current session.
+
+        .. versionadded:: 2.4.0
         """
         return self._jconf.isModifiable(key)
 
diff --git a/python/pyspark/sql/connect/catalog.py b/python/pyspark/sql/connect/catalog.py
index e725e381b8dbe..9143a03d324d7 100644
--- a/python/pyspark/sql/connect/catalog.py
+++ b/python/pyspark/sql/connect/catalog.py
@@ -46,7 +46,7 @@ def __init__(self, sparkSession: "SparkSession") -> None:
         self._sparkSession = sparkSession
 
     def _execute_and_fetch(self, catalog: plan.LogicalPlan) -> pa.Table:
-        table, _ = DataFrame.withPlan(catalog, session=self._sparkSession)._to_table()
+        table, _ = DataFrame(catalog, session=self._sparkSession)._to_table()
         assert table is not None
         return table
 
@@ -222,7 +222,7 @@ def createExternalTable(
             schema=schema,
             options=options,
         )
-        df = DataFrame.withPlan(catalog, session=self._sparkSession)
+        df = DataFrame(catalog, session=self._sparkSession)
         df._to_table()  # Eager execution.
         return df
 
@@ -245,7 +245,7 @@ def createTable(
             description=description,
             options=options,
         )
-        df = DataFrame.withPlan(catalog, session=self._sparkSession)
+        df = DataFrame(catalog, session=self._sparkSession)
         df._to_table()  # Eager execution.
         return df
 
diff --git a/python/pyspark/sql/connect/client/artifact.py b/python/pyspark/sql/connect/client/artifact.py
index 5829ec9a8d4dc..46c2b2750ba5e 100644
--- a/python/pyspark/sql/connect/client/artifact.py
+++ b/python/pyspark/sql/connect/client/artifact.py
@@ -14,6 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+from pyspark.errors import PySparkRuntimeError, PySparkValueError
 from pyspark.sql.connect.utils import check_dependencies
 from pyspark.sql.connect.client.logging import logger
 
@@ -112,7 +113,10 @@ def size(self) -> int:
         if isinstance(self.storage, LocalData):
             return self.storage.size
         else:
-            raise RuntimeError(f"Unsupported storage {type(self.storage)}")
+            raise PySparkRuntimeError(
+                error_class="UNSUPPORTED_OPERATION",
+                message_parameters={"operation": f"{self.storage} storage"},
+            )
 
 
 def new_jar_artifact(file_name: str, storage: LocalData) -> Artifact:
@@ -214,7 +218,13 @@ def _parse_artifacts(
                     # Minimal fix for the workaround of fragment handling in URI.
                     # This has a limitation - hash(#) in the file name would not work.
                     if "#" in local_path:
-                        raise ValueError("'#' in the path is not supported for adding an archive.")
+                        raise PySparkValueError(
+                            error_class="VALUE_ALLOWED",
+                            message_parameters={
+                                "arg_name": "artifact path",
+                                "disallowed_value": "#",
+                            },
+                        )
                     name = f"{name}#{parsed.fragment}"
 
                 artifact = new_archive_artifact(name, LocalFile(local_path))
@@ -223,9 +233,15 @@ def _parse_artifacts(
             elif name.endswith(".jar"):
                 artifact = new_jar_artifact(name, LocalFile(local_path))
             else:
-                raise RuntimeError(f"Unsupported file format: {local_path}")
+                raise PySparkRuntimeError(
+                    error_class="UNSUPPORTED_OPERATION",
+                    message_parameters={"operation": f"{local_path} file format"},
+                )
             return [artifact]
-        raise RuntimeError(f"Unsupported scheme: {parsed.scheme}")
+        raise PySparkRuntimeError(
+            error_class="UNSUPPORTED_OPERATION",
+            message_parameters={"operation": f"{parsed.scheme} scheme"},
+        )
 
     def _parse_forward_to_fs_artifacts(self, local_path: str, dest_path: str) -> List[Artifact]:
         abs_path: Path = Path(local_path).absolute()
diff --git a/python/pyspark/sql/connect/client/core.py b/python/pyspark/sql/connect/client/core.py
index 58b48bd69ba43..f037e968be013 100644
--- a/python/pyspark/sql/connect/client/core.py
+++ b/python/pyspark/sql/connect/client/core.py
@@ -327,7 +327,10 @@ def session_id(self) -> Optional[str]:
             try:
                 uuid.UUID(session_id, version=4)
             except ValueError as ve:
-                raise ValueError("Parameter value 'session_id' must be a valid UUID format.", ve)
+                raise PySparkValueError(
+                    error_class="INVALID_SESSION_UUID_ID",
+                    message_parameters={"arg_name": "session_id", "origin": str(ve)},
+                )
         return session_id
 
     def toChannel(self) -> grpc.Channel:
@@ -595,23 +598,8 @@ def __init__(
         self._user_id = None
         self._retry_policies: List[RetryPolicy] = []
 
-        default_policy_args = {
-            # Please synchronize changes here with Scala side
-            # GrpcRetryHandler.scala
-            #
-            # Note: the number of retries is selected so that the maximum tolerated wait
-            # is guaranteed to be at least 10 minutes
-            "max_retries": 15,
-            "backoff_multiplier": 4.0,
-            "initial_backoff": 50,
-            "max_backoff": 60000,
-            "jitter": 500,
-            "min_jitter_threshold": 2000,
-        }
-        if retry_policy:
-            default_policy_args.update(retry_policy)
-
-        default_policy = DefaultPolicy(**default_policy_args)
+        retry_policy_args = retry_policy or dict()
+        default_policy = DefaultPolicy(**retry_policy_args)
         self.set_retry_policies([default_policy])
 
         if self._builder.session_id is None:
@@ -1476,11 +1464,22 @@ def _throw_if_invalid_tag(self, tag: str) -> None:
         """
         spark_job_tags_sep = ","
         if tag is None:
-            raise ValueError("Spark Connect tag cannot be null.")
+            raise PySparkValueError(
+                error_class="CANNOT_BE_NONE", message_paramters={"arg_name": "Spark Connect tag"}
+            )
         if spark_job_tags_sep in tag:
-            raise ValueError(f"Spark Connect tag cannot contain '{spark_job_tags_sep}'.")
+            raise PySparkValueError(
+                error_class="VALUE_ALLOWED",
+                message_parameters={
+                    "arg_name": "Spark Connect tag",
+                    "disallowed_value": spark_job_tags_sep,
+                },
+            )
         if len(tag) == 0:
-            raise ValueError("Spark Connect tag cannot be an empty string.")
+            raise PySparkValueError(
+                error_class="VALUE_NOT_NON_EMPTY_STR",
+                message_parameters={"arg_name": "Spark Connect tag", "arg_value": tag},
+            )
 
     def _handle_error(self, error: Exception) -> NoReturn:
         """
diff --git a/python/pyspark/sql/connect/client/retries.py b/python/pyspark/sql/connect/client/retries.py
index 6aa959e09b5b0..26aa6893dfae5 100644
--- a/python/pyspark/sql/connect/client/retries.py
+++ b/python/pyspark/sql/connect/client/retries.py
@@ -185,6 +185,9 @@ def __init__(
         self._done = False
 
     def can_retry(self, exception: BaseException) -> bool:
+        if isinstance(exception, RetryException):
+            return True
+
         return any(policy.can_retry(exception) for policy in self._policies)
 
     def accept_exception(self, exception: BaseException) -> bool:
@@ -204,8 +207,12 @@ def _last_exception(self) -> BaseException:
     def _wait(self) -> None:
         exception = self._last_exception()
 
-        # Attempt to find a policy to wait with
+        if isinstance(exception, RetryException):
+            # Considered immediately retriable
+            logger.debug(f"Got error: {repr(exception)}. Retrying.")
+            return
 
+        # Attempt to find a policy to wait with
         for policy in self._policies:
             if not policy.can_retry(exception):
                 continue
@@ -244,12 +251,34 @@ def __iter__(self) -> Generator[AttemptManager, None, None]:
 class RetryException(Exception):
     """
     An exception that can be thrown upstream when inside retry and which is always retryable
+    even without policies
     """
 
 
 class DefaultPolicy(RetryPolicy):
-    def __init__(self, **kwargs):  # type: ignore[no-untyped-def]
-        super().__init__(**kwargs)
+    # Please synchronize changes here with Scala side in
+    # org.apache.spark.sql.connect.client.RetryPolicy
+    #
+    # Note: the number of retries is selected so that the maximum tolerated wait
+    # is guaranteed to be at least 10 minutes
+
+    def __init__(
+        self,
+        max_retries: Optional[int] = 15,
+        backoff_multiplier: float = 4.0,
+        initial_backoff: int = 50,
+        max_backoff: Optional[int] = 60000,
+        jitter: int = 500,
+        min_jitter_threshold: int = 2000,
+    ):
+        super().__init__(
+            max_retries=max_retries,
+            backoff_multiplier=backoff_multiplier,
+            initial_backoff=initial_backoff,
+            max_backoff=max_backoff,
+            jitter=jitter,
+            min_jitter_threshold=min_jitter_threshold,
+        )
 
     def can_retry(self, e: BaseException) -> bool:
         """
@@ -267,8 +296,6 @@ def can_retry(self, e: BaseException) -> bool:
         True if the exception can be retried, False otherwise.
 
         """
-        if isinstance(e, RetryException):
-            return True
 
         if not isinstance(e, grpc.RpcError):
             return False
diff --git a/python/pyspark/sql/connect/conf.py b/python/pyspark/sql/connect/conf.py
index 16e992044b268..3548a31fef036 100644
--- a/python/pyspark/sql/connect/conf.py
+++ b/python/pyspark/sql/connect/conf.py
@@ -14,6 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+from pyspark.errors import PySparkValueError, PySparkTypeError
 from pyspark.sql.connect.utils import check_dependencies
 
 check_dependencies(__name__)
@@ -79,21 +80,28 @@ def unset(self, key: str) -> None:
     def isModifiable(self, key: str) -> bool:
         op_is_modifiable = proto.ConfigRequest.IsModifiable(keys=[key])
         operation = proto.ConfigRequest.Operation(is_modifiable=op_is_modifiable)
-        result = self._client.config(operation)
-        if result.pairs[0][1] == "true":
+        result = self._client.config(operation).pairs[0][1]
+        if result == "true":
             return True
-        elif result.pairs[0][1] == "false":
+        elif result == "false":
             return False
         else:
-            raise ValueError(f"Unknown boolean value: {result.pairs[0][1]}")
+            raise PySparkValueError(
+                error_class="VALUE_NOT_ALLOWED",
+                message_parameters={"arg_name": "result", "allowed_values": "'true' or 'false'"},
+            )
 
     isModifiable.__doc__ = PySparkRuntimeConfig.isModifiable.__doc__
 
     def _checkType(self, obj: Any, identifier: str) -> None:
         """Assert that an object is of type str."""
         if not isinstance(obj, str):
-            raise TypeError(
-                "expected %s '%s' to be a string (was '%s')" % (identifier, obj, type(obj).__name__)
+            raise PySparkTypeError(
+                error_class="NOT_STR",
+                message_parameters={
+                    "arg_name": identifier,
+                    "arg_type": type(obj).__name__,
+                },
             )
 
 
diff --git a/python/pyspark/sql/connect/conversion.py b/python/pyspark/sql/connect/conversion.py
index dc46e68f532f6..550978d02f851 100644
--- a/python/pyspark/sql/connect/conversion.py
+++ b/python/pyspark/sql/connect/conversion.py
@@ -14,6 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+from pyspark.errors import PySparkValueError
 from pyspark.sql.connect.utils import check_dependencies
 
 check_dependencies(__name__)
@@ -127,9 +128,12 @@ def convert_struct(value: Any) -> Any:
                             _dict[dedup_field_names[i]] = field_convs[i](value.get(field))
                     else:
                         if len(value) != len(field_names):
-                            raise ValueError(
-                                f"Length mismatch: Expected axis has {len(field_names)} elements, "
-                                f"new values have {len(value)} elements"
+                            raise PySparkValueError(
+                                error_class="AXIS_LENGTH_MISMATCH",
+                                message_parameters={
+                                    "expected_length": str(len(field_names)),
+                                    "actual_length": str(len(value)),
+                                },
                             )
                         for i in range(len(field_names)):
                             _dict[dedup_field_names[i]] = field_convs[i](value[i])
@@ -285,10 +289,14 @@ def convert(data: Sequence[Any], schema: StructType) -> "pa.Table":
                     pylist[i].append(column_convs[i](item.get(col)))
             else:
                 if len(item) != len(column_names):
-                    raise ValueError(
-                        f"Length mismatch: Expected axis has {len(column_names)} elements, "
-                        f"new values have {len(item)} elements"
+                    raise PySparkValueError(
+                        error_class="AXIS_LENGTH_MISMATCH",
+                        message_parameters={
+                            "expected_length": str(len(column_names)),
+                            "actual_length": str(len(item)),
+                        },
                     )
+
                 for i in range(len(column_names)):
                     pylist[i].append(column_convs[i](item[i]))
 
diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py
index b3bec44428bf8..a73a24818c0c2 100644
--- a/python/pyspark/sql/connect/dataframe.py
+++ b/python/pyspark/sql/connect/dataframe.py
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from pyspark.errors.exceptions.base import SessionNotSameException
+from pyspark.errors.exceptions.base import SessionNotSameException, PySparkIndexError
 from pyspark.sql.connect.utils import check_dependencies
 
 check_dependencies(__name__)
@@ -153,7 +153,7 @@ def _repr_html_(self) -> Optional[str]:
             "spark.sql.repl.eagerEval.truncate",
         )
         if repl_eager_eval_enabled == "true":
-            table, _ = DataFrame.withPlan(
+            table, _ = DataFrame(
                 plan.HtmlString(
                     child=self._plan,
                     num_rows=int(cast(str, repl_eager_eval_max_num_rows)),
@@ -182,7 +182,7 @@ def select(self, *cols: "ColumnOrName") -> "DataFrame":
         if len(cols) == 1 and isinstance(cols[0], list):
             cols = cols[0]
 
-        return DataFrame.withPlan(plan.Project(self._plan, *cols), session=self._session)
+        return DataFrame(plan.Project(self._plan, *cols), session=self._session)
 
     select.__doc__ = PySparkDataFrame.select.__doc__
 
@@ -196,7 +196,7 @@ def selectExpr(self, *expr: Union[str, List[str]]) -> "DataFrame":
             else:
                 sql_expr.extend([sql_expression(e) for e in element])
 
-        return DataFrame.withPlan(plan.Project(self._plan, *sql_expr), session=self._session)
+        return DataFrame(plan.Project(self._plan, *sql_expr), session=self._session)
 
     selectExpr.__doc__ = PySparkDataFrame.selectExpr.__doc__
 
@@ -219,7 +219,7 @@ def agg(self, *exprs: Union[Column, Dict[str, str]]) -> "DataFrame":
     agg.__doc__ = PySparkDataFrame.agg.__doc__
 
     def alias(self, alias: str) -> "DataFrame":
-        return DataFrame.withPlan(plan.SubqueryAlias(self._plan, alias), session=self._session)
+        return DataFrame(plan.SubqueryAlias(self._plan, alias), session=self._session)
 
     alias.__doc__ = PySparkDataFrame.alias.__doc__
 
@@ -259,7 +259,7 @@ def count(self) -> int:
 
     def crossJoin(self, other: "DataFrame") -> "DataFrame":
         self._check_same_session(other)
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.Join(left=self._plan, right=other._plan, on=None, how="cross"),
             session=self._session,
         )
@@ -279,7 +279,7 @@ def coalesce(self, numPartitions: int) -> "DataFrame":
                 error_class="VALUE_NOT_POSITIVE",
                 message_parameters={"arg_name": "numPartitions", "arg_value": str(numPartitions)},
             )
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.Repartition(self._plan, num_partitions=numPartitions, shuffle=False),
             self._session,
         )
@@ -307,18 +307,18 @@ def repartition(  # type: ignore[misc]
                     },
                 )
             if len(cols) == 0:
-                return DataFrame.withPlan(
+                return DataFrame(
                     plan.Repartition(self._plan, num_partitions=numPartitions, shuffle=True),
                     self._session,
                 )
             else:
-                return DataFrame.withPlan(
+                return DataFrame(
                     plan.RepartitionByExpression(self._plan, numPartitions, list(cols)),
                     self.sparkSession,
                 )
         elif isinstance(numPartitions, (str, Column)):
             cols = (numPartitions,) + cols
-            return DataFrame.withPlan(
+            return DataFrame(
                 plan.RepartitionByExpression(self._plan, None, list(cols)),
                 self.sparkSession,
             )
@@ -372,7 +372,7 @@ def _convert_col(col: "ColumnOrName") -> "ColumnOrName":
             else:
                 sort = []
                 sort.extend([_convert_col(c) for c in cols])
-                return DataFrame.withPlan(
+                return DataFrame(
                     plan.RepartitionByExpression(self._plan, numPartitions, sort),
                     self.sparkSession,
                 )
@@ -380,7 +380,7 @@ def _convert_col(col: "ColumnOrName") -> "ColumnOrName":
             cols = (numPartitions,) + cols
             sort = []
             sort.extend([_convert_col(c) for c in cols])
-            return DataFrame.withPlan(
+            return DataFrame(
                 plan.RepartitionByExpression(self._plan, None, sort),
                 self.sparkSession,
             )
@@ -403,11 +403,11 @@ def dropDuplicates(self, subset: Optional[List[str]] = None) -> "DataFrame":
             )
 
         if subset is None:
-            return DataFrame.withPlan(
+            return DataFrame(
                 plan.Deduplicate(child=self._plan, all_columns_as_keys=True), session=self._session
             )
         else:
-            return DataFrame.withPlan(
+            return DataFrame(
                 plan.Deduplicate(child=self._plan, column_names=subset), session=self._session
             )
 
@@ -423,12 +423,12 @@ def dropDuplicatesWithinWatermark(self, subset: Optional[List[str]] = None) -> "
             )
 
         if subset is None:
-            return DataFrame.withPlan(
+            return DataFrame(
                 plan.Deduplicate(child=self._plan, all_columns_as_keys=True, within_watermark=True),
                 session=self._session,
             )
         else:
-            return DataFrame.withPlan(
+            return DataFrame(
                 plan.Deduplicate(child=self._plan, column_names=subset, within_watermark=True),
                 session=self._session,
             )
@@ -438,7 +438,7 @@ def dropDuplicatesWithinWatermark(self, subset: Optional[List[str]] = None) -> "
     drop_duplicates_within_watermark = dropDuplicatesWithinWatermark
 
     def distinct(self) -> "DataFrame":
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.Deduplicate(child=self._plan, all_columns_as_keys=True), session=self._session
         )
 
@@ -452,7 +452,7 @@ def drop(self, *cols: "ColumnOrName") -> "DataFrame":
                 message_parameters={"arg_name": "cols", "arg_type": type(cols).__name__},
             )
 
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.Drop(
                 child=self._plan,
                 columns=_cols,
@@ -467,7 +467,7 @@ def filter(self, condition: Union[Column, str]) -> "DataFrame":
             expr = sql_expression(condition)
         else:
             expr = condition
-        return DataFrame.withPlan(plan.Filter(child=self._plan, filter=expr), session=self._session)
+        return DataFrame(plan.Filter(child=self._plan, filter=expr), session=self._session)
 
     filter.__doc__ = PySparkDataFrame.filter.__doc__
 
@@ -487,9 +487,10 @@ def groupBy(self, *cols: "ColumnOrNameOrOrdinal") -> GroupedData:
             elif isinstance(c, str):
                 _cols.append(self[c])
             elif isinstance(c, int) and not isinstance(c, bool):
-                # TODO: should introduce dedicated error class
                 if c < 1:
-                    raise IndexError(f"Column ordinal must be positive but got {c}")
+                    raise PySparkIndexError(
+                        error_class="INDEX_NOT_POSITIVE", message_parameters={"index": str(c)}
+                    )
                 # ordinal is 1-based
                 _cols.append(self[c - 1])
             else:
@@ -512,9 +513,10 @@ def rollup(self, *cols: "ColumnOrName") -> "GroupedData":
             elif isinstance(c, str):
                 _cols.append(self[c])
             elif isinstance(c, int) and not isinstance(c, bool):
-                # TODO: should introduce dedicated error class
                 if c < 1:
-                    raise IndexError(f"Column ordinal must be positive but got {c}")
+                    raise PySparkIndexError(
+                        error_class="INDEX_NOT_POSITIVE", message_parameters={"index": str(c)}
+                    )
                 # ordinal is 1-based
                 _cols.append(self[c - 1])
             else:
@@ -535,9 +537,10 @@ def cube(self, *cols: "ColumnOrName") -> "GroupedData":
             elif isinstance(c, str):
                 _cols.append(self[c])
             elif isinstance(c, int) and not isinstance(c, bool):
-                # TODO: should introduce dedicated error class
                 if c < 1:
-                    raise IndexError(f"Column ordinal must be positive but got {c}")
+                    raise PySparkIndexError(
+                        error_class="INDEX_NOT_POSITIVE", message_parameters={"index": str(c)}
+                    )
                 # ordinal is 1-based
                 _cols.append(self[c - 1])
             else:
@@ -619,7 +622,7 @@ def join(
         self._check_same_session(other)
         if how is not None and isinstance(how, str):
             how = how.lower().replace("_", "")
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.Join(left=self._plan, right=other._plan, on=on, how=how),
             session=self._session,
         )
@@ -646,7 +649,7 @@ def _joinAsOf(
         if tolerance is not None:
             assert isinstance(tolerance, Column), "tolerance should be Column"
 
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.AsOfJoin(
                 left=self._plan,
                 right=other._plan,
@@ -664,14 +667,12 @@ def _joinAsOf(
     _joinAsOf.__doc__ = PySparkDataFrame._joinAsOf.__doc__
 
     def limit(self, n: int) -> "DataFrame":
-        return DataFrame.withPlan(plan.Limit(child=self._plan, limit=n), session=self._session)
+        return DataFrame(plan.Limit(child=self._plan, limit=n), session=self._session)
 
     limit.__doc__ = PySparkDataFrame.limit.__doc__
 
     def tail(self, num: int) -> List[Row]:
-        return DataFrame.withPlan(
-            plan.Tail(child=self._plan, limit=num), session=self._session
-        ).collect()
+        return DataFrame(plan.Tail(child=self._plan, limit=num), session=self._session).collect()
 
     tail.__doc__ = PySparkDataFrame.tail.__doc__
 
@@ -693,7 +694,6 @@ def _sort_cols(
         _cols: List[Column] = []
         for c in cols:
             if isinstance(c, int) and not isinstance(c, bool):
-                # TODO: should introduce dedicated error class
                 # ordinal is 1-based
                 if c > 0:
                     _c = self[c - 1]
@@ -701,7 +701,9 @@ def _sort_cols(
                 elif c < 0:
                     _c = self[-c - 1].desc()
                 else:
-                    raise IndexError("Column ordinal must not be zero!")
+                    raise PySparkIndexError(
+                        error_class="INDEX_NOT_POSITIVE", message_parameters={"index": str(c)}
+                    )
             else:
                 _c = c  # type: ignore[assignment]
             _cols.append(_to_col(cast("ColumnOrName", _c)))
@@ -725,7 +727,7 @@ def sort(
         *cols: Union[int, str, Column, List[Union[int, str, Column]]],
         **kwargs: Any,
     ) -> "DataFrame":
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.Sort(
                 self._plan,
                 columns=self._sort_cols(cols, kwargs),
@@ -743,7 +745,7 @@ def sortWithinPartitions(
         *cols: Union[int, str, Column, List[Union[int, str, Column]]],
         **kwargs: Any,
     ) -> "DataFrame":
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.Sort(
                 self._plan,
                 columns=self._sort_cols(cols, kwargs),
@@ -800,7 +802,7 @@ def sample(
 
         seed = int(seed) if seed is not None else None
 
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.Sample(
                 child=self._plan,
                 lower_bound=0.0,
@@ -825,7 +827,7 @@ def withColumnsRenamed(self, colsMap: Dict[str, str]) -> "DataFrame":
                 message_parameters={"arg_name": "colsMap", "arg_type": type(colsMap).__name__},
             )
 
-        return DataFrame.withPlan(plan.WithColumnsRenamed(self._plan, colsMap), self._session)
+        return DataFrame(plan.WithColumnsRenamed(self._plan, colsMap), self._session)
 
     withColumnsRenamed.__doc__ = PySparkDataFrame.withColumnsRenamed.__doc__
 
@@ -858,7 +860,7 @@ def _show_string(
                     },
                 )
 
-        table, _ = DataFrame.withPlan(
+        table, _ = DataFrame(
             plan.ShowString(
                 child=self._plan,
                 num_rows=n,
@@ -882,7 +884,7 @@ def withColumns(self, colsMap: Dict[str, Column]) -> "DataFrame":
             names.append(columnName)
             columns.append(column)
 
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.WithColumns(
                 self._plan,
                 columnNames=names,
@@ -899,7 +901,7 @@ def withColumn(self, colName: str, col: Column) -> "DataFrame":
                 error_class="NOT_COLUMN",
                 message_parameters={"arg_name": "col", "arg_type": type(col).__name__},
             )
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.WithColumns(
                 self._plan,
                 columnNames=[colName],
@@ -917,7 +919,7 @@ def withMetadata(self, columnName: str, metadata: Dict[str, Any]) -> "DataFrame"
                 message_parameters={"arg_name": "metadata", "arg_type": type(metadata).__name__},
             )
 
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.WithColumns(
                 self._plan,
                 columnNames=[columnName],
@@ -951,7 +953,7 @@ def to_jcols(
                 lst = [cols]
             return lst
 
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.Unpivot(
                 self._plan,
                 to_jcols(ids),
@@ -982,7 +984,7 @@ def withWatermark(self, eventTime: str, delayThreshold: str) -> "DataFrame":
                 },
             )
 
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.WithWatermark(
                 self._plan,
                 event_time=eventTime,
@@ -1032,7 +1034,7 @@ def hint(
                         },
                     )
 
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.Hint(self._plan, name, list(parameters)),
             session=self._session,
         )
@@ -1067,7 +1069,7 @@ def randomSplit(
         while j < length:
             lowerBound = normalizedCumWeights[j - 1]
             upperBound = normalizedCumWeights[j]
-            samplePlan = DataFrame.withPlan(
+            samplePlan = DataFrame(
                 plan.Sample(
                     child=self._plan,
                     lower_bound=lowerBound,
@@ -1106,7 +1108,7 @@ def observe(
         if isinstance(observation, Observation):
             return observation._on(self, *exprs)
         elif isinstance(observation, str):
-            return DataFrame.withPlan(
+            return DataFrame(
                 plan.CollectMetrics(self._plan, observation, list(exprs)),
                 self._session,
             )
@@ -1134,7 +1136,7 @@ def union(self, other: "DataFrame") -> "DataFrame":
 
     def unionAll(self, other: "DataFrame") -> "DataFrame":
         self._check_same_session(other)
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.SetOperation(self._plan, other._plan, "union", is_all=True), session=self._session
         )
 
@@ -1142,7 +1144,7 @@ def unionAll(self, other: "DataFrame") -> "DataFrame":
 
     def unionByName(self, other: "DataFrame", allowMissingColumns: bool = False) -> "DataFrame":
         self._check_same_session(other)
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.SetOperation(
                 self._plan,
                 other._plan,
@@ -1157,7 +1159,7 @@ def unionByName(self, other: "DataFrame", allowMissingColumns: bool = False) ->
 
     def subtract(self, other: "DataFrame") -> "DataFrame":
         self._check_same_session(other)
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.SetOperation(self._plan, other._plan, "except", is_all=False),
             session=self._session,
         )
@@ -1166,7 +1168,7 @@ def subtract(self, other: "DataFrame") -> "DataFrame":
 
     def exceptAll(self, other: "DataFrame") -> "DataFrame":
         self._check_same_session(other)
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.SetOperation(self._plan, other._plan, "except", is_all=True), session=self._session
         )
 
@@ -1174,7 +1176,7 @@ def exceptAll(self, other: "DataFrame") -> "DataFrame":
 
     def intersect(self, other: "DataFrame") -> "DataFrame":
         self._check_same_session(other)
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.SetOperation(self._plan, other._plan, "intersect", is_all=False),
             session=self._session,
         )
@@ -1183,7 +1185,7 @@ def intersect(self, other: "DataFrame") -> "DataFrame":
 
     def intersectAll(self, other: "DataFrame") -> "DataFrame":
         self._check_same_session(other)
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.SetOperation(self._plan, other._plan, "intersect", is_all=True),
             session=self._session,
         )
@@ -1264,7 +1266,7 @@ def fillna(
         else:
             _values = [value]
 
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.NAFill(child=self._plan, cols=_cols, values=_values),
             session=self._session,
         )
@@ -1323,7 +1325,7 @@ def dropna(
                     message_parameters={"arg_name": "subset", "arg_type": type(subset).__name__},
                 )
 
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.NADrop(child=self._plan, cols=_cols, min_non_nulls=min_non_nulls),
             session=self._session,
         )
@@ -1435,7 +1437,7 @@ def all_of_(xs: Iterable) -> bool:
                 message_parameters={},
             )
 
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.NAReplace(child=self._plan, cols=subset, replacements=rep_dict),
             session=self._session,
         )
@@ -1456,7 +1458,7 @@ def summary(self, *statistics: str) -> "DataFrame":
                     error_class="NOT_LIST_OF_STR",
                     message_parameters={"arg_name": "statistics", "arg_type": type(s).__name__},
                 )
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.StatSummary(child=self._plan, statistics=_statistics),
             session=self._session,
         )
@@ -1473,7 +1475,7 @@ def describe(self, *cols: Union[str, List[str]]) -> "DataFrame":
                 _cols.append(column)
             else:
                 _cols.extend([s for s in column])
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.StatDescribe(child=self._plan, cols=_cols),
             session=self._session,
         )
@@ -1491,7 +1493,7 @@ def cov(self, col1: str, col2: str) -> float:
                 error_class="NOT_STR",
                 message_parameters={"arg_name": "col2", "arg_type": type(col2).__name__},
             )
-        table, _ = DataFrame.withPlan(
+        table, _ = DataFrame(
             plan.StatCov(child=self._plan, col1=col1, col2=col2),
             session=self._session,
         )._to_table()
@@ -1517,7 +1519,7 @@ def corr(self, col1: str, col2: str, method: Optional[str] = None) -> float:
                 error_class="VALUE_NOT_PEARSON",
                 message_parameters={"arg_name": "method", "arg_value": method},
             )
-        table, _ = DataFrame.withPlan(
+        table, _ = DataFrame(
             plan.StatCorr(child=self._plan, col1=col1, col2=col2, method=method),
             session=self._session,
         )._to_table()
@@ -1588,7 +1590,7 @@ def approxQuantile(
                 },
             )
         relativeError = float(relativeError)
-        table, _ = DataFrame.withPlan(
+        table, _ = DataFrame(
             plan.StatApproxQuantile(
                 child=self._plan,
                 cols=list(col),
@@ -1614,7 +1616,7 @@ def crosstab(self, col1: str, col2: str) -> "DataFrame":
                 error_class="NOT_STR",
                 message_parameters={"arg_name": "col2", "arg_type": type(col2).__name__},
             )
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.StatCrosstab(child=self._plan, col1=col1, col2=col2),
             session=self._session,
         )
@@ -1633,7 +1635,7 @@ def freqItems(
             )
         if not support:
             support = 0.01
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.StatFreqItems(child=self._plan, cols=cols, support=support),
             session=self._session,
         )
@@ -1670,7 +1672,7 @@ def sampleBy(
                 )
             fractions[k] = float(v)
         seed = seed if seed is not None else random.randint(0, sys.maxsize)
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.StatSampleBy(child=self._plan, col=col, fractions=fractions, seed=seed),
             session=self._session,
         )
@@ -1815,7 +1817,7 @@ def inputFiles(self) -> List[str]:
 
     def to(self, schema: StructType) -> "DataFrame":
         assert schema is not None
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.ToSchema(child=self._plan, schema=schema),
             session=self._session,
         )
@@ -1829,7 +1831,7 @@ def toDF(self, *cols: str) -> "DataFrame":
                     error_class="NOT_LIST_OF_STR",
                     message_parameters={"arg_name": "cols", "arg_type": type(col_).__name__},
                 )
-        return DataFrame.withPlan(plan.ToDF(self._plan, list(cols)), self._session)
+        return DataFrame(plan.ToDF(self._plan, list(cols)), self._session)
 
     toDF.__doc__ = PySparkDataFrame.toDF.__doc__
 
@@ -2025,7 +2027,7 @@ def _map_partitions(
             evalType=evalType,
         )
 
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.MapPartitions(
                 child=self._plan, function=udf_obj, cols=self.columns, is_barrier=barrier
             ),
@@ -2115,18 +2117,10 @@ def writeTo(self, table: str) -> "DataFrameWriterV2":
 
     # SparkConnect specific API
     def offset(self, n: int) -> "DataFrame":
-        return DataFrame.withPlan(plan.Offset(child=self._plan, offset=n), session=self._session)
+        return DataFrame(plan.Offset(child=self._plan, offset=n), session=self._session)
 
     offset.__doc__ = PySparkDataFrame.offset.__doc__
 
-    @classmethod
-    def withPlan(cls, plan: plan.LogicalPlan, session: "SparkSession") -> "DataFrame":
-        """
-        Main initialization method used to construct a new data frame with a child plan.
-        This is for internal purpose.
-        """
-        return DataFrame(plan=plan, session=session)
-
 
 class DataFrameNaFunctions:
     def __init__(self, df: DataFrame):
diff --git a/python/pyspark/sql/connect/group.py b/python/pyspark/sql/connect/group.py
index 6f98186d9d93d..610ef036bc5ed 100644
--- a/python/pyspark/sql/connect/group.py
+++ b/python/pyspark/sql/connect/group.py
@@ -134,7 +134,7 @@ def agg(self, *exprs: Union[Column, Dict[str, str]]) -> "DataFrame":
             assert all(isinstance(c, Column) for c in exprs), "all exprs should be Column"
             aggregate_cols = cast(List[Column], list(exprs))
 
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.Aggregate(
                 child=self._df._plan,
                 group_type=self._group_type,
@@ -176,7 +176,7 @@ def _numeric_agg(self, function: str, cols: Sequence[str]) -> "DataFrame":
             # if no column is provided, then all numerical columns are selected
             agg_cols = numerical_cols
 
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.Aggregate(
                 child=self._df._plan,
                 group_type=self._group_type,
@@ -296,7 +296,7 @@ def applyInPandas(
             evalType=PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF,
         )
 
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.GroupMap(
                 child=self._df._plan,
                 grouping_cols=self._grouping_cols,
@@ -335,7 +335,7 @@ def applyInPandasWithState(
             stateStructType.json() if isinstance(stateStructType, StructType) else stateStructType
         )
 
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.ApplyInPandasWithState(
                 child=self._df._plan,
                 grouping_cols=self._grouping_cols,
@@ -378,7 +378,7 @@ def applyInPandas(
             evalType=PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF,
         )
 
-        return DataFrame.withPlan(
+        return DataFrame(
             plan.CoGroupMap(
                 input=self._gd1._df._plan,
                 input_grouping_cols=self._gd1._grouping_cols,
diff --git a/python/pyspark/sql/connect/observation.py b/python/pyspark/sql/connect/observation.py
index ff10443574962..174ab74c2506f 100644
--- a/python/pyspark/sql/connect/observation.py
+++ b/python/pyspark/sql/connect/observation.py
@@ -17,7 +17,11 @@
 from typing import Any, Dict, Optional
 import uuid
 
-from pyspark.errors import IllegalArgumentException
+from pyspark.errors import (
+    PySparkTypeError,
+    PySparkValueError,
+    IllegalArgumentException,
+)
 from pyspark.sql.connect.column import Column
 from pyspark.sql.connect.dataframe import DataFrame
 from pyspark.sql.observation import Observation as PySparkObservation
@@ -31,9 +35,15 @@ class Observation:
     def __init__(self, name: Optional[str] = None) -> None:
         if name is not None:
             if not isinstance(name, str):
-                raise TypeError("name should be a string")
+                raise PySparkTypeError(
+                    error_class="NOT_STR",
+                    message_parameters={"arg_name": "name", "arg_type": type(name).__name__},
+                )
             if name == "":
-                raise ValueError("name should not be empty")
+                raise PySparkValueError(
+                    error_class="VALUE_NOT_NON_EMPTY_STR",
+                    message_parameters={"arg_name": "name", "arg_value": name},
+                )
         self._name = name
         self._result: Optional[Dict[str, Any]] = None
 
@@ -46,10 +56,13 @@ def _on(self, df: DataFrame, *exprs: Column) -> DataFrame:
             self._name = str(uuid.uuid4())
 
         if df.isStreaming:
-            raise IllegalArgumentException("Observation does not support streaming Datasets")
+            raise IllegalArgumentException(
+                error_class="UNSUPPORTED_OPERATION",
+                message_parameters={"operation": "Streaming DataFrame with Observation"},
+            )
 
         self._result = {}
-        return DataFrame.withPlan(plan.CollectMetrics(df._plan, self, list(exprs)), df._session)
+        return DataFrame(plan.CollectMetrics(df._plan, self, list(exprs)), df._session)
 
     _on.__doc__ = PySparkObservation._on.__doc__
 
diff --git a/python/pyspark/sql/connect/plan.py b/python/pyspark/sql/connect/plan.py
index 4b18914446dbf..67a33c2b6cf25 100644
--- a/python/pyspark/sql/connect/plan.py
+++ b/python/pyspark/sql/connect/plan.py
@@ -43,7 +43,7 @@
 from pyspark.sql.connect.types import pyspark_types_to_proto_types, UnparsedDataType
 from pyspark.errors import (
     PySparkTypeError,
-    PySparkNotImplementedError,
+    PySparkValueError,
     PySparkPicklingError,
     IllegalArgumentException,
 )
@@ -1060,9 +1060,9 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
         elif self.set_op == "except":
             plan.set_op.set_op_type = proto.SetOperation.SET_OP_TYPE_EXCEPT
         else:
-            raise PySparkNotImplementedError(
+            raise PySparkValueError(
                 error_class="UNSUPPORTED_OPERATION",
-                message_parameters={"feature": self.set_op},
+                message_parameters={"operation": self.set_op},
             )
 
         plan.set_op.is_all = self.is_all
@@ -1699,8 +1699,9 @@ def command(self, session: "SparkConnectClient") -> proto.Command:
                         proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_INSERT_INTO
                     )
                 else:
-                    raise ValueError(
-                        f"Unknown TestSaveMethod value for DataFrame: {self.table_save_method}"
+                    raise PySparkValueError(
+                        error_class="UNSUPPORTED_OPERATION",
+                        message_parameters={"operation": tsm},
                     )
         elif self.path is not None:
             plan.write_operation.path = self.path
@@ -1716,7 +1717,10 @@ def command(self, session: "SparkConnectClient") -> proto.Command:
             elif wm == "ignore":
                 plan.write_operation.mode = proto.WriteOperation.SaveMode.SAVE_MODE_IGNORE
             else:
-                raise ValueError(f"Unknown SaveMode value for DataFrame: {self.mode}")
+                raise PySparkValueError(
+                    error_class="UNSUPPORTED_OPERATION",
+                    message_parameters={"operation": self.mode},
+                )
         return plan
 
     def print(self, indent: int = 0) -> str:
@@ -1812,7 +1816,10 @@ def command(self, session: "SparkConnectClient") -> proto.Command:
             elif wm == "create_or_replace":
                 plan.write_operation_v2.mode = proto.WriteOperationV2.Mode.MODE_CREATE_OR_REPLACE
             else:
-                raise ValueError(f"Unknown Mode value for DataFrame: {self.mode}")
+                raise PySparkValueError(
+                    error_class="UNSUPPORTED_OPERATION",
+                    message_parameters={"operation": self.mode},
+                )
         return plan
 
 
diff --git a/python/pyspark/sql/connect/readwriter.py b/python/pyspark/sql/connect/readwriter.py
index fee98cc34964a..52975917ea02b 100644
--- a/python/pyspark/sql/connect/readwriter.py
+++ b/python/pyspark/sql/connect/readwriter.py
@@ -134,7 +134,7 @@ def load(
     def _df(self, plan: LogicalPlan) -> "DataFrame":
         from pyspark.sql.connect.dataframe import DataFrame
 
-        return DataFrame.withPlan(plan, self._client)
+        return DataFrame(plan, self._client)
 
     def table(self, tableName: str) -> "DataFrame":
         return self._df(Read(tableName, self._options))
diff --git a/python/pyspark/sql/connect/session.py b/python/pyspark/sql/connect/session.py
index ef825f1a6f313..0fcd85c033cf2 100644
--- a/python/pyspark/sql/connect/session.py
+++ b/python/pyspark/sql/connect/session.py
@@ -194,15 +194,11 @@ def create(self) -> "SparkSession":
             has_channel_builder = self._channel_builder is not None
             has_spark_remote = "spark.remote" in self._options
 
-            if has_channel_builder and has_spark_remote:
-                raise ValueError(
-                    "Only one of connection string or channelBuilder "
-                    "can be used to create a new SparkSession."
-                )
-
-            if not has_channel_builder and not has_spark_remote:
-                raise ValueError(
-                    "Needs either connection string or channelBuilder to create a new SparkSession."
+            if (has_channel_builder and has_spark_remote) or (
+                not has_channel_builder and not has_spark_remote
+            ):
+                raise PySparkValueError(
+                    error_class="SESSION_NEED_CONN_STR_OR_BUILDER", message_parameters={}
                 )
 
             if has_channel_builder:
@@ -381,7 +377,7 @@ def createDataFrame(
             )
         elif isinstance(data, Sized) and len(data) == 0:
             if _schema is not None:
-                return DataFrame.withPlan(LocalRelation(table=None, schema=_schema.json()), self)
+                return DataFrame(LocalRelation(table=None, schema=_schema.json()), self)
             else:
                 raise PySparkValueError(
                     error_class="CANNOT_INFER_EMPTY_SCHEMA",
@@ -514,9 +510,8 @@ def createDataFrame(
                 if _has_nulltype(_schema):
                     # For cases like createDataFrame([("Alice", None, 80.1)], schema)
                     # we can not infer the schema from the data itself.
-                    raise ValueError(
-                        "Some of types cannot be determined after inferring, "
-                        "a StructType Schema is required in this case"
+                    raise PySparkValueError(
+                        error_class="CANNOT_DETERMINE_TYPE", message_parameters={}
                     )
 
             from pyspark.sql.connect.conversion import LocalDataToArrowConversion
@@ -547,7 +542,7 @@ def createDataFrame(
         if cache_threshold[0] is not None and int(cache_threshold[0]) <= _table.nbytes:
             plan = CachedLocalRelation(self._cache_local_relation(local_relation))
 
-        df = DataFrame.withPlan(plan, self)
+        df = DataFrame(plan, self)
         if _cols is not None and len(_cols) > 0:
             df = df.toDF(*_cols)
         return df
@@ -558,9 +553,9 @@ def sql(self, sqlQuery: str, args: Optional[Union[Dict[str, Any], List]] = None)
         cmd = SQL(sqlQuery, args)
         data, properties = self.client.execute_command(cmd.command(self._client))
         if "sql_command_result" in properties:
-            return DataFrame.withPlan(CachedRelation(properties["sql_command_result"]), self)
+            return DataFrame(CachedRelation(properties["sql_command_result"]), self)
         else:
-            return DataFrame.withPlan(cmd, self)
+            return DataFrame(cmd, self)
 
     sql.__doc__ = PySparkSession.sql.__doc__
 
@@ -580,7 +575,7 @@ def range(
         if numPartitions is not None:
             numPartitions = int(numPartitions)
 
-        return DataFrame.withPlan(
+        return DataFrame(
             Range(
                 start=int(start), end=int(actual_end), step=int(step), num_partitions=numPartitions
             ),
@@ -738,7 +733,13 @@ def addArtifacts(
         self, *path: str, pyfile: bool = False, archive: bool = False, file: bool = False
     ) -> None:
         if sum([file, pyfile, archive]) > 1:
-            raise ValueError("'pyfile', 'archive' and/or 'file' cannot be True together.")
+            raise PySparkValueError(
+                error_class="INVALID_MULTIPLE_ARGUMENT_CONDITIONS",
+                message_parameters={
+                    "arg_names": "'pyfile', 'archive' and/or 'file'",
+                    "condition": "True together",
+                },
+            )
         self._client.add_artifacts(*path, pyfile=pyfile, archive=archive, file=file)
 
     addArtifacts.__doc__ = PySparkSession.addArtifacts.__doc__
@@ -754,10 +755,9 @@ def _cache_local_relation(self, local_relation: LocalRelation) -> str:
 
     def copyFromLocalToFs(self, local_path: str, dest_path: str) -> None:
         if urllib.parse.urlparse(dest_path).scheme:
-            raise ValueError(
-                "`spark_session.copyFromLocalToFs` API only allows `dest_path` to be a path "
-                "without scheme, and spark driver uses the default scheme to "
-                "determine the destination file system."
+            raise PySparkValueError(
+                error_class="NO_SCHEMA_AND_DRIVER_DEFAULT_SCHEME",
+                message_parameters={"arg_name": "dest_path"},
             )
         self._client.copy_from_local_to_fs(local_path, dest_path)
 
@@ -769,7 +769,7 @@ def _create_remote_dataframe(self, remote_id: str) -> "DataFrame":
         This is used in ForeachBatch() runner, where the remote DataFrame refers to the
         output of a micro batch.
         """
-        return DataFrame.withPlan(CachedRemoteRelation(remote_id), self)
+        return DataFrame(CachedRemoteRelation(remote_id), self)
 
     @staticmethod
     def _start_connect_server(master: str, opts: Dict[str, Any]) -> None:
diff --git a/python/pyspark/sql/connect/streaming/readwriter.py b/python/pyspark/sql/connect/streaming/readwriter.py
index 294487fd4bdd5..11f230473fcf1 100644
--- a/python/pyspark/sql/connect/streaming/readwriter.py
+++ b/python/pyspark/sql/connect/streaming/readwriter.py
@@ -52,7 +52,7 @@ def __init__(self, client: "SparkSession") -> None:
     def _df(self, plan: LogicalPlan) -> "DataFrame":
         from pyspark.sql.connect.dataframe import DataFrame
 
-        return DataFrame.withPlan(plan, self._client)
+        return DataFrame(plan, self._client)
 
     def format(self, source: str) -> "DataStreamReader":
         self._format = source
diff --git a/python/pyspark/sql/connect/types.py b/python/pyspark/sql/connect/types.py
index cd2311e614eec..e8da9f61c6654 100644
--- a/python/pyspark/sql/connect/types.py
+++ b/python/pyspark/sql/connect/types.py
@@ -49,7 +49,7 @@
     NullType,
     UserDefinedType,
 )
-from pyspark.errors import PySparkAssertionError
+from pyspark.errors import PySparkAssertionError, PySparkValueError
 
 import pyspark.sql.connect.proto as pb2
 
@@ -205,7 +205,10 @@ def pyspark_types_to_proto_types(data_type: DataType) -> pb2.DataType:
         data_type_string = data_type.data_type_string
         ret.unparsed.data_type_string = data_type_string
     else:
-        raise Exception(f"Unsupported data type {data_type}")
+        raise PySparkValueError(
+            error_class="UNSUPPORTED_OPERATION",
+            message_parameters={"operation": f"data type {data_type}"},
+        )
     return ret
 
 
@@ -303,4 +306,7 @@ def proto_schema_to_pyspark_data_type(schema: pb2.DataType) -> DataType:
             json_value["serializedClass"] = schema.udt.serialized_python_class
         return UserDefinedType.fromJson(json_value)
     else:
-        raise Exception(f"Unsupported data type {schema}")
+        raise PySparkValueError(
+            error_class="UNSUPPORTED_OPERATION",
+            message_parameters={"operation": f"data type {schema}"},
+        )
diff --git a/python/pyspark/sql/connect/udtf.py b/python/pyspark/sql/connect/udtf.py
index f0facbd1a7022..f137864e026e4 100644
--- a/python/pyspark/sql/connect/udtf.py
+++ b/python/pyspark/sql/connect/udtf.py
@@ -167,7 +167,7 @@ def __call__(self, *args: "ColumnOrName", **kwargs: "ColumnOrName") -> "DataFram
         session = SparkSession.active()
 
         plan = self._build_common_inline_user_defined_table_function(*args, **kwargs)
-        return DataFrame.withPlan(plan, session)
+        return DataFrame(plan, session)
 
     def asDeterministic(self) -> "UserDefinedTableFunction":
         self.deterministic = True
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index eaec42b1ed064..7ef7b320eeb42 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -34,7 +34,7 @@
 
 from py4j.java_gateway import JavaObject
 
-from pyspark import since, _NoValue
+from pyspark import _NoValue
 from pyspark._globals import _NoValueType
 from pyspark.sql.session import _monkey_patch_RDD, SparkSession
 from pyspark.sql.dataframe import DataFrame
@@ -59,7 +59,6 @@
 __all__ = ["SQLContext", "HiveContext"]
 
 
-# TODO: ignore[attr-defined] will be removed, once SparkContext is inlined
 class SQLContext:
     """The entry point for working with structured data (rows and columns) in Spark, in Spark 1.x.
 
@@ -312,24 +311,6 @@ def registerJavaFunction(
         )
         return self.sparkSession.udf.registerJavaFunction(name, javaClassName, returnType)
 
-    # TODO(andrew): delete this once we refactor things to take in SparkSession
-    def _inferSchema(self, rdd: RDD, samplingRatio: Optional[float] = None) -> StructType:
-        """
-        Infer schema from an RDD of Row or tuple.
-
-        Parameters
-        ----------
-        rdd : :class:`RDD`
-            an RDD of Row or tuple
-        samplingRatio : float, optional
-            sampling ratio, or no sampling (default)
-
-        Returns
-        -------
-        :class:`pyspark.sql.types.StructType`
-        """
-        return self.sparkSession._inferSchema(rdd, samplingRatio)
-
     @overload
     def createDataFrame(
         self,
@@ -633,19 +614,28 @@ def tableNames(self, dbName: Optional[str] = None) -> List[str]:
         else:
             return [name for name in self._ssql_ctx.tableNames(dbName)]
 
-    @since(1.0)
     def cacheTable(self, tableName: str) -> None:
-        """Caches the specified table in-memory."""
+        """
+        Caches the specified table in-memory.
+
+        .. versionadded:: 1.0.0
+        """
         self._ssql_ctx.cacheTable(tableName)
 
-    @since(1.0)
     def uncacheTable(self, tableName: str) -> None:
-        """Removes the specified table from the in-memory cache."""
+        """
+        Removes the specified table from the in-memory cache.
+
+        .. versionadded:: 1.0.0
+        """
         self._ssql_ctx.uncacheTable(tableName)
 
-    @since(1.3)
     def clearCache(self) -> None:
-        """Removes all cached tables from the in-memory cache."""
+        """
+        Removes all cached tables from the in-memory cache.
+
+        .. versionadded:: 1.3.0
+        """
         self._ssql_ctx.clearCache()
 
     @property
@@ -700,7 +690,6 @@ def streams(self) -> StreamingQueryManager:
         return StreamingQueryManager(self._ssql_ctx.streams())
 
 
-# TODO: ignore[attr-defined] will be removed, once SparkContext is inlined
 class HiveContext(SQLContext):
     """A variant of Spark SQL that integrates with data stored in Hive.
 
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 82087adc82f5a..8b40b222a289c 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -43,7 +43,7 @@
 from pyspark import copy_func, _NoValue
 from pyspark._globals import _NoValueType
 from pyspark.context import SparkContext
-from pyspark.errors import PySparkTypeError, PySparkValueError
+from pyspark.errors import PySparkTypeError, PySparkValueError, PySparkIndexError
 from pyspark.rdd import (
     RDD,
     _load_from_socket,
@@ -3222,9 +3222,10 @@ def _jcols_ordinal(self, *cols: "ColumnOrNameOrOrdinal") -> JavaObject:
         _cols = []
         for c in cols:
             if isinstance(c, int) and not isinstance(c, bool):
-                # TODO: should introduce dedicated error class
                 if c < 1:
-                    raise IndexError(f"Column ordinal must be positive but got {c}")
+                    raise PySparkIndexError(
+                        error_class="INDEX_NOT_POSITIVE", message_parameters={"index": str(c)}
+                    )
                 # ordinal is 1-based
                 _cols.append(self[c - 1])
             else:
@@ -3256,7 +3257,9 @@ def _sort_cols(
                 elif c < 0:
                     _c = self[-c - 1].desc()
                 else:
-                    raise IndexError("Column ordinal must not be zero!")
+                    raise PySparkIndexError(
+                        error_class="INDEX_NOT_POSITIVE", message_parameters={"index": str(c)}
+                    )
             else:
                 _c = c  # type: ignore[assignment]
             jcols.append(_to_java_column(cast("ColumnOrName", _c)))
diff --git a/python/pyspark/sql/datasource.py b/python/pyspark/sql/datasource.py
index b380e8b534ebd..032e3f48a82ec 100644
--- a/python/pyspark/sql/datasource.py
+++ b/python/pyspark/sql/datasource.py
@@ -17,7 +17,6 @@
 from abc import ABC, abstractmethod
 from typing import final, Any, Dict, Iterator, List, Tuple, Type, Union, TYPE_CHECKING
 
-from pyspark import since
 from pyspark.sql import Row
 from pyspark.sql.types import StructType
 
@@ -29,7 +28,6 @@
 __all__ = ["DataSource", "DataSourceReader", "DataSourceWriter", "DataSourceRegistration"]
 
 
-@since(4.0)
 class DataSource(ABC):
     """
     A base class for data sources.
@@ -42,6 +40,8 @@ class DataSource(ABC):
 
     After implementing this interface, you can start to load your data source using
     ``spark.read.format(...).load()`` and save data using ``df.write.format(...).save()``.
+
+    .. versionadded: 4.0.0
     """
 
     @final
@@ -145,11 +145,12 @@ def writer(self, schema: StructType, saveMode: str) -> "DataSourceWriter":
         raise NotImplementedError
 
 
-@since(4.0)
 class DataSourceReader(ABC):
     """
     A base class for data source readers. Data source readers are responsible for
     outputting data from a data source.
+
+    .. versionadded: 4.0.0
     """
 
     def partitions(self) -> Iterator[Any]:
@@ -241,11 +242,12 @@ def read(self, partition: Any) -> Iterator[Union[Tuple, Row]]:
         ...
 
 
-@since(4.0)
 class DataSourceWriter(ABC):
     """
     A base class for data source writers. Data source writers are responsible for saving
     the data to the data source.
+
+    .. versionadded: 4.0.0
     """
 
     @abstractmethod
@@ -305,21 +307,23 @@ def abort(self, messages: List["WriterCommitMessage"]) -> None:
         ...
 
 
-@since(4.0)
 class WriterCommitMessage:
     """
     A commit message returned by the ``write`` method of ``DataSourceWriter`` and will be
     sent back to the driver side as input parameter of ``commit`` or ``abort`` method.
+
+    .. versionadded: 4.0.0
     """
 
     ...
 
 
-@since(4.0)
 class DataSourceRegistration:
     """
     Wrapper for data source registration. This instance can be accessed by
     :attr:`spark.dataSource`.
+
+    .. versionadded: 4.0.0
     """
 
     def __init__(self, sparkSession: "SparkSession"):
diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py
index 8723c5fc4b9d4..d985b9e6138f5 100644
--- a/python/pyspark/sql/functions/builtin.py
+++ b/python/pyspark/sql/functions/builtin.py
@@ -12368,7 +12368,7 @@ def array_join(
 @_try_remote_functions
 def concat(*cols: "ColumnOrName") -> Column:
     """
-    Concatenates multiple input columns together into a single column.
+    Collection function: Concatenates multiple input columns together into a single column.
     The function works with strings, numeric, binary and compatible array columns.
 
     .. versionadded:: 1.5.0
@@ -12392,19 +12392,61 @@ def concat(*cols: "ColumnOrName") -> Column:
 
     Examples
     --------
+    Example 1: Concatenating string columns
+
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([('abcd','123')], ['s', 'd'])
-    >>> df = df.select(concat(df.s, df.d).alias('s'))
-    >>> df.collect()
-    [Row(s='abcd123')]
-    >>> df
-    DataFrame[s: string]
+    >>> df.select(sf.concat(df.s, df.d)).show()
+    +------------+
+    |concat(s, d)|
+    +------------+
+    |     abcd123|
+    +------------+
+
+    Example 2: Concatenating array columns
 
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([([1, 2], [3, 4], [5]), ([1, 2], None, [3])], ['a', 'b', 'c'])
-    >>> df = df.select(concat(df.a, df.b, df.c).alias("arr"))
-    >>> df.collect()
-    [Row(arr=[1, 2, 3, 4, 5]), Row(arr=None)]
-    >>> df
-    DataFrame[arr: array<bigint>]
+    >>> df.select(sf.concat(df.a, df.b, df.c)).show()
+    +---------------+
+    |concat(a, b, c)|
+    +---------------+
+    |[1, 2, 3, 4, 5]|
+    |           NULL|
+    +---------------+
+
+    Example 3: Concatenating numeric columns
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(1, 2, 3)], ['a', 'b', 'c'])
+    >>> df.select(sf.concat(df.a, df.b, df.c)).show()
+    +---------------+
+    |concat(a, b, c)|
+    +---------------+
+    |            123|
+    +---------------+
+
+    Example 4: Concatenating binary columns
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(bytearray(b'abc'), bytearray(b'def'))], ['a', 'b'])
+    >>> df.select(sf.concat(df.a, df.b)).show()
+    +-------------------+
+    |       concat(a, b)|
+    +-------------------+
+    |[61 62 63 64 65 66]|
+    +-------------------+
+
+    Example 5: Concatenating mixed types of columns
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(1,"abc",3,"def")], ['a','b','c','d'])
+    >>> df.select(sf.concat(df.a, df.b, df.c, df.d)).show()
+    +------------------+
+    |concat(a, b, c, d)|
+    +------------------+
+    |          1abc3def|
+    +------------------+
     """
     return _invoke_function_over_seq_of_columns("concat", cols)
 
@@ -12412,7 +12454,7 @@ def concat(*cols: "ColumnOrName") -> Column:
 @_try_remote_functions
 def array_position(col: "ColumnOrName", value: Any) -> Column:
     """
-    Collection function: Locates the position of the first occurrence of the given value
+    Array function: Locates the position of the first occurrence of the given value
     in the given array. Returns null if either of the arguments are null.
 
     .. versionadded:: 2.4.0
@@ -12439,9 +12481,62 @@ def array_position(col: "ColumnOrName", value: Any) -> Column:
 
     Examples
     --------
-    >>> df = spark.createDataFrame([(["c", "b", "a"],), ([],)], ['data'])
-    >>> df.select(array_position(df.data, "a")).collect()
-    [Row(array_position(data, a)=3), Row(array_position(data, a)=0)]
+    Example 1: Finding the position of a string in an array of strings
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(["c", "b", "a"],)], ['data'])
+    >>> df.select(sf.array_position(df.data, "a")).show()
+    +-----------------------+
+    |array_position(data, a)|
+    +-----------------------+
+    |                      3|
+    +-----------------------+
+
+    Example 2: Finding the position of a string in an empty array
+
+    >>> from pyspark.sql import functions as sf
+    >>> from pyspark.sql.types import ArrayType, StringType, StructField, StructType
+    >>> schema = StructType([StructField("data", ArrayType(StringType()), True)])
+    >>> df = spark.createDataFrame([([],)], schema=schema)
+    >>> df.select(sf.array_position(df.data, "a")).show()
+    +-----------------------+
+    |array_position(data, a)|
+    +-----------------------+
+    |                      0|
+    +-----------------------+
+
+    Example 3: Finding the position of an integer in an array of integers
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([([1, 2, 3],)], ['data'])
+    >>> df.select(sf.array_position(df.data, 2)).show()
+    +-----------------------+
+    |array_position(data, 2)|
+    +-----------------------+
+    |                      2|
+    +-----------------------+
+
+    Example 4: Finding the position of a non-existing value in an array
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(["c", "b", "a"],)], ['data'])
+    >>> df.select(sf.array_position(df.data, "d")).show()
+    +-----------------------+
+    |array_position(data, d)|
+    +-----------------------+
+    |                      0|
+    +-----------------------+
+
+    Example 5: Finding the position of a value in an array with nulls
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([([None, "b", "a"],)], ['data'])
+    >>> df.select(sf.array_position(df.data, "a")).show()
+    +-----------------------+
+    |array_position(data, a)|
+    +-----------------------+
+    |                      3|
+    +-----------------------+
     """
     return _invoke_function("array_position", _to_java_column(col), value)
 
@@ -12449,10 +12544,14 @@ def array_position(col: "ColumnOrName", value: Any) -> Column:
 @_try_remote_functions
 def element_at(col: "ColumnOrName", extraction: Any) -> Column:
     """
-    Collection function: Returns element of array at given index in `extraction` if col is array.
-    Returns value for the given key in `extraction` if col is map. If position is negative
-    then location of the element will start from end, if number is outside the
-    array boundaries then None will be returned.
+    Collection function:
+    (array, index) - Returns element of array at given (1-based) index. If Index is 0, Spark will
+    throw an error. If index < 0, accesses elements from the last to the first.
+    If 'spark.sql.ansi.enabled' is set to true, an exception will be thrown if the index is out
+    of array boundaries instead of returning NULL.
+
+    (map, key) - Returns value for given key in `extraction` if col is map. The function always
+    returns NULL if the key is not contained in the map.
 
     .. versionadded:: 2.4.0
 
@@ -12481,15 +12580,49 @@ def element_at(col: "ColumnOrName", extraction: Any) -> Column:
 
     Examples
     --------
+    Example 1: Getting the first element of an array
+
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([(["a", "b", "c"],)], ['data'])
-    >>> df.select(element_at(df.data, 1)).collect()
-    [Row(element_at(data, 1)='a')]
-    >>> df.select(element_at(df.data, -1)).collect()
-    [Row(element_at(data, -1)='c')]
+    >>> df.select(sf.element_at(df.data, 1)).show()
+    +-------------------+
+    |element_at(data, 1)|
+    +-------------------+
+    |                  a|
+    +-------------------+
+
+    Example 2: Getting the last element of an array using negative index
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(["a", "b", "c"],)], ['data'])
+    >>> df.select(sf.element_at(df.data, -1)).show()
+    +--------------------+
+    |element_at(data, -1)|
+    +--------------------+
+    |                   c|
+    +--------------------+
+
+    Example 3: Getting a value from a map using a key
 
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([({"a": 1.0, "b": 2.0},)], ['data'])
+    >>> df.select(sf.element_at(df.data, sf.lit("a"))).show()
+    +-------------------+
+    |element_at(data, a)|
+    +-------------------+
+    |                1.0|
+    +-------------------+
+
+    Example 4: Getting a non-existing value from a map using a key
+
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([({"a": 1.0, "b": 2.0},)], ['data'])
-    >>> df.select(element_at(df.data, lit("a"))).collect()
-    [Row(element_at(data, a)=1.0)]
+    >>> df.select(sf.element_at(df.data, sf.lit("c"))).show()
+    +-------------------+
+    |element_at(data, c)|
+    +-------------------+
+    |               NULL|
+    +-------------------+
     """
     return _invoke_function_over_columns("element_at", col, lit(extraction))
 
@@ -12497,6 +12630,7 @@ def element_at(col: "ColumnOrName", extraction: Any) -> Column:
 @_try_remote_functions
 def try_element_at(col: "ColumnOrName", extraction: "ColumnOrName") -> Column:
     """
+    Collection function:
     (array, index) - Returns element of array at given (1-based) index. If Index is 0, Spark will
     throw an error. If index < 0, accesses elements from the last to the first. The function
     always returns NULL if the index exceeds the length of the array.
@@ -12515,15 +12649,60 @@ def try_element_at(col: "ColumnOrName", extraction: "ColumnOrName") -> Column:
 
     Examples
     --------
+    Example 1: Getting the first element of an array
+
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([(["a", "b", "c"],)], ['data'])
-    >>> df.select(try_element_at(df.data, lit(1)).alias('r')).collect()
-    [Row(r='a')]
-    >>> df.select(try_element_at(df.data, lit(-1)).alias('r')).collect()
-    [Row(r='c')]
+    >>> df.select(sf.try_element_at(df.data, sf.lit(1))).show()
+    +-----------------------+
+    |try_element_at(data, 1)|
+    +-----------------------+
+    |                      a|
+    +-----------------------+
+
+    Example 2: Getting the last element of an array using negative index
 
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(["a", "b", "c"],)], ['data'])
+    >>> df.select(sf.try_element_at(df.data, sf.lit(-1))).show()
+    +------------------------+
+    |try_element_at(data, -1)|
+    +------------------------+
+    |                       c|
+    +------------------------+
+
+    Example 3: Getting a value from a map using a key
+
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([({"a": 1.0, "b": 2.0},)], ['data'])
-    >>> df.select(try_element_at(df.data, lit("a")).alias('r')).collect()
-    [Row(r=1.0)]
+    >>> df.select(sf.try_element_at(df.data, sf.lit("a"))).show()
+    +-----------------------+
+    |try_element_at(data, a)|
+    +-----------------------+
+    |                    1.0|
+    +-----------------------+
+
+    Example 4: Getting a non-existing element from an array
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(["a", "b", "c"],)], ['data'])
+    >>> df.select(sf.try_element_at(df.data, sf.lit(4))).show()
+    +-----------------------+
+    |try_element_at(data, 4)|
+    +-----------------------+
+    |                   NULL|
+    +-----------------------+
+
+    Example 5: Getting a non-existing value from a map using a key
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([({"a": 1.0, "b": 2.0},)], ['data'])
+    >>> df.select(sf.try_element_at(df.data, sf.lit("c"))).show()
+    +-----------------------+
+    |try_element_at(data, c)|
+    +-----------------------+
+    |                   NULL|
+    +-----------------------+
     """
     return _invoke_function_over_columns("try_element_at", col, extraction)
 
diff --git a/python/pyspark/sql/observation.py b/python/pyspark/sql/observation.py
index 19201cdf0f3c9..ecb21e8d90849 100644
--- a/python/pyspark/sql/observation.py
+++ b/python/pyspark/sql/observation.py
@@ -19,6 +19,7 @@
 
 from py4j.java_gateway import JavaObject, JVMView
 
+from pyspark.errors import PySparkTypeError, PySparkValueError
 from pyspark.sql import column
 from pyspark.sql.column import Column
 from pyspark.sql.dataframe import DataFrame
@@ -85,9 +86,15 @@ def __init__(self, name: Optional[str] = None) -> None:
         """
         if name is not None:
             if not isinstance(name, str):
-                raise TypeError("name should be a string")
+                raise PySparkTypeError(
+                    error_class="NOT_STR",
+                    message_parameters={"arg_name": "name", "arg_type": type(name).__name__},
+                )
             if name == "":
-                raise ValueError("name should not be empty")
+                raise PySparkValueError(
+                    error_class="VALUE_NOT_NON_EMPTY_STR",
+                    message_parameters={"arg_name": "name", "arg_value": name},
+                )
         self._name = name
         self._jvm: Optional[JVMView] = None
         self._jo: Optional[JavaObject] = None
diff --git a/python/pyspark/sql/pandas/group_ops.py b/python/pyspark/sql/pandas/group_ops.py
index 56403482b9deb..dfe37672c0374 100644
--- a/python/pyspark/sql/pandas/group_ops.py
+++ b/python/pyspark/sql/pandas/group_ops.py
@@ -18,6 +18,7 @@
 from typing import List, Union, TYPE_CHECKING, cast
 import warnings
 
+from pyspark.errors import PySparkValueError
 from pyspark.rdd import PythonEvalType
 from pyspark.sql.column import Column
 from pyspark.sql.dataframe import DataFrame
@@ -97,8 +98,11 @@ def apply(self, udf: "GroupedMapPandasUserDefinedFunction") -> DataFrame:
                 != PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF
             )
         ):
-            raise ValueError(
-                "Invalid udf: the udf argument must be a pandas_udf of type " "GROUPED_MAP."
+            raise PySparkValueError(
+                error_class="INVALID_PANDAS_UDF",
+                message_parameters={
+                    "detail": "the udf argument must be a pandas_udf of type GROUPED_MAP."
+                },
             )
 
         warnings.warn(
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index b7e2c145f443e..b61284247b0e2 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -19,7 +19,7 @@
 
 from py4j.java_gateway import JavaClass, JavaObject
 
-from pyspark import RDD, since
+from pyspark import RDD
 from pyspark.sql.column import _to_seq, _to_java_column, Column
 from pyspark.sql.types import StructType
 from pyspark.sql import utils
@@ -2295,41 +2295,44 @@ def __init__(self, df: "DataFrame", table: str):
         self._spark = df.sparkSession
         self._jwriter = df._jdf.writeTo(table)
 
-    @since(3.1)
     def using(self, provider: str) -> "DataFrameWriterV2":
         """
         Specifies a provider for the underlying output data source.
         Spark's default catalog supports "parquet", "json", etc.
+
+        .. versionadded: 3.1.0
         """
         self._jwriter.using(provider)
         return self
 
-    @since(3.1)
     def option(self, key: str, value: "OptionalPrimitiveType") -> "DataFrameWriterV2":
         """
         Add a write option.
+
+        .. versionadded: 3.1.0
         """
         self._jwriter.option(key, to_str(value))
         return self
 
-    @since(3.1)
     def options(self, **options: "OptionalPrimitiveType") -> "DataFrameWriterV2":
         """
         Add write options.
+
+        .. versionadded: 3.1.0
         """
         options = {k: to_str(v) for k, v in options.items()}
         self._jwriter.options(options)
         return self
 
-    @since(3.1)
     def tableProperty(self, property: str, value: str) -> "DataFrameWriterV2":
         """
         Add table property.
+
+        .. versionadded: 3.1.0
         """
         self._jwriter.tableProperty(property, value)
         return self
 
-    @since(3.1)
     def partitionedBy(self, col: Column, *cols: Column) -> "DataFrameWriterV2":
         """
         Partition the output table created by `create`, `createOrReplace`, or `replace` using
@@ -2356,33 +2359,35 @@ def partitionedBy(self, col: Column, *cols: Column) -> "DataFrameWriterV2":
         * :py:func:`pyspark.sql.functions.hours`
         * :py:func:`pyspark.sql.functions.bucket`
 
+        .. versionadded: 3.1.0
         """
         col = _to_java_column(col)
         cols = _to_seq(self._spark._sc, [_to_java_column(c) for c in cols])
         self._jwriter.partitionedBy(col, cols)
         return self
 
-    @since(3.1)
     def create(self) -> None:
         """
         Create a new table from the contents of the data frame.
 
         The new table's schema, partition layout, properties, and other configuration will be
         based on the configuration set on this writer.
+
+        .. versionadded: 3.1.0
         """
         self._jwriter.create()
 
-    @since(3.1)
     def replace(self) -> None:
         """
         Replace an existing table with the contents of the data frame.
 
         The existing table's schema, partition layout, properties, and other configuration will be
         replaced with the contents of the data frame and the configuration set on this writer.
+
+        .. versionadded: 3.1.0
         """
         self._jwriter.replace()
 
-    @since(3.1)
     def createOrReplace(self) -> None:
         """
         Create a new table or replace an existing table with the contents of the data frame.
@@ -2391,26 +2396,29 @@ def createOrReplace(self) -> None:
         and other configuration will be based on the contents of the data frame
         and the configuration set on this writer.
         If the table exists, its configuration and data will be replaced.
+
+        .. versionadded: 3.1.0
         """
         self._jwriter.createOrReplace()
 
-    @since(3.1)
     def append(self) -> None:
         """
         Append the contents of the data frame to the output table.
+
+        .. versionadded: 3.1.0
         """
         self._jwriter.append()
 
-    @since(3.1)
     def overwrite(self, condition: Column) -> None:
         """
         Overwrite rows matching the given filter condition with the contents of the data frame in
         the output table.
+
+        .. versionadded: 3.1.0
         """
         condition = _to_java_column(condition)
         self._jwriter.overwrite(condition)
 
-    @since(3.1)
     def overwritePartitions(self) -> None:
         """
         Overwrite all partition for which the data frame contains at least one row with the contents
@@ -2418,6 +2426,8 @@ def overwritePartitions(self) -> None:
 
         This operation is equivalent to Hive's `INSERT OVERWRITE ... PARTITION`, which replaces
         partitions dynamically depending on the contents of the data frame.
+
+        .. versionadded: 3.1.0
         """
         self._jwriter.overwritePartitions()
 
diff --git a/python/pyspark/sql/tests/connect/client/test_client.py b/python/pyspark/sql/tests/connect/client/test_client.py
index 580ebc3965bb5..12e690c3a3099 100644
--- a/python/pyspark/sql/tests/connect/client/test_client.py
+++ b/python/pyspark/sql/tests/connect/client/test_client.py
@@ -31,7 +31,6 @@
     from pyspark.sql.connect.client.retries import (
         Retrying,
         DefaultPolicy,
-        RetryException,
         RetriesExceeded,
     )
     from pyspark.sql.connect.client.reattach import ExecutePlanResponseReattachableIterator
@@ -111,7 +110,7 @@ def sleep(t):
         try:
             for attempt in Retrying(client._retry_policies, sleep=sleep):
                 with attempt:
-                    raise RetryException()
+                    raise TestException("Retryable error", grpc.StatusCode.UNAVAILABLE)
         except RetriesExceeded:
             pass
 
diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py b/python/pyspark/sql/tests/connect/test_connect_basic.py
index 856bcbead19df..fb5eaece7f481 100755
--- a/python/pyspark/sql/tests/connect/test_connect_basic.py
+++ b/python/pyspark/sql/tests/connect/test_connect_basic.py
@@ -704,12 +704,15 @@ def test_with_local_list(self):
         with self.assertRaises(ParseException):
             self.connect.createDataFrame(data, "col1 magic_type, col2 int, col3 int, col4 int")
 
-        with self.assertRaisesRegex(
-            ValueError,
-            "Length mismatch: Expected axis has 3 elements, new values have 4 elements",
-        ):
+        with self.assertRaises(PySparkValueError) as pe:
             self.connect.createDataFrame(data, "col1 int, col2 int, col3 int")
 
+        self.check_error(
+            exception=pe.exception,
+            error_class="AXIS_LENGTH_MISMATCH",
+            message_parameters={"expected_length": "3", "actual_length": "4"},
+        )
+
     def test_with_local_rows(self):
         # SPARK-41789, SPARK-41810: Test creating a dataframe with list of rows and dictionaries
         rows = [
@@ -3708,9 +3711,7 @@ def test_metadata(self):
         with self.assertRaises(ValueError) as ve:
             chan = ChannelBuilder("sc://host/;session_id=abcd")
             SparkConnectClient(chan)
-        self.assertIn(
-            "Parameter value 'session_id' must be a valid UUID format.", str(ve.exception)
-        )
+        self.assertIn("Parameter value session_id must be a valid UUID format", str(ve.exception))
 
         chan = ChannelBuilder("sc://host/")
         self.assertIsNone(chan.session_id)
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py b/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py
index 38650b972eab3..5e7e4413ee465 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py
@@ -422,21 +422,21 @@ def test_wrong_args(self):
     def check_wrong_args(self):
         df = self.data
 
-        with self.assertRaisesRegex(ValueError, "Invalid udf"):
+        with self.assertRaisesRegex(ValueError, "Invalid function"):
             df.groupby("id").apply(lambda x: x)
-        with self.assertRaisesRegex(ValueError, "Invalid udf"):
+        with self.assertRaisesRegex(ValueError, "Invalid function"):
             df.groupby("id").apply(udf(lambda x: x, DoubleType()))
-        with self.assertRaisesRegex(ValueError, "Invalid udf"):
+        with self.assertRaisesRegex(ValueError, "Invalid function"):
             df.groupby("id").apply(sum(df.v))
-        with self.assertRaisesRegex(ValueError, "Invalid udf"):
+        with self.assertRaisesRegex(ValueError, "Invalid function"):
             df.groupby("id").apply(df.v + 1)
         with self.assertRaisesRegex(ValueError, "Invalid function"):
             df.groupby("id").apply(
                 pandas_udf(lambda: 1, StructType([StructField("d", DoubleType())]))
             )
-        with self.assertRaisesRegex(ValueError, "Invalid udf"):
+        with self.assertRaisesRegex(ValueError, "Invalid function"):
             df.groupby("id").apply(pandas_udf(lambda x, y: x, DoubleType()))
-        with self.assertRaisesRegex(ValueError, "Invalid udf.*GROUPED_MAP"):
+        with self.assertRaisesRegex(ValueError, "Invalid function.*GROUPED_MAP"):
             df.groupby("id").apply(pandas_udf(lambda x, y: x, DoubleType(), PandasUDFType.SCALAR))
 
     def test_unsupported_types(self):
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_map.py b/python/pyspark/sql/tests/pandas/test_pandas_map.py
index 304b78049b20f..ec9f208d08f9b 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_map.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_map.py
@@ -110,7 +110,7 @@ def func(iterator):
             df = (
                 self.spark.range(10, numPartitions=3)
                 .select(col("id").cast("string").alias("str"))
-                .withColumn("bin", encode(col("str"), "utf8"))
+                .withColumn("bin", encode(col("str"), "utf-8"))
             )
             actual = df.mapInPandas(func, "str string, bin binary").collect()
             expected = df.collect()
diff --git a/python/pyspark/sql/tests/streaming/test_streaming.py b/python/pyspark/sql/tests/streaming/test_streaming.py
index a905a87a3b4d6..2b9072c34befe 100644
--- a/python/pyspark/sql/tests/streaming/test_streaming.py
+++ b/python/pyspark/sql/tests/streaming/test_streaming.py
@@ -382,6 +382,30 @@ def test_streaming_write_to_table(self):
             result = self.spark.sql("SELECT value FROM output_table").collect()
             self.assertTrue(len(result) > 0)
 
+    def test_streaming_with_temporary_view(self):
+        """
+        This verifies createOrReplaceTempView() works with a streaming dataframe. An SQL
+        SELECT query on such a table results in a streaming dataframe and the streaming query works
+        as expected.
+        """
+        with self.table("input_table", "this_query"):
+            self.spark.sql("CREATE TABLE input_table (value string) USING parquet")
+            self.spark.sql("INSERT INTO input_table VALUES ('a'), ('b'), ('c')")
+            df = self.spark.readStream.table("input_table")
+            self.assertTrue(df.isStreaming)
+            # Create a temp view
+            df.createOrReplaceTempView("test_view")
+            # Create a select query
+            view_df = self.spark.sql("SELECT CONCAT('view_', value) as vv from test_view")
+            self.assertTrue(view_df.isStreaming)
+            q = view_df.writeStream.format("memory").queryName("this_query").start()
+            q.processAllAvailable()
+            q.stop()
+            result = self.spark.sql("SELECT * FROM this_query ORDER BY vv").collect()
+            self.assertEqual(
+                set([Row(value="view_a"), Row(value="view_b"), Row(value="view_c")]), set(result)
+            )
+
 
 class StreamingTests(StreamingTestsMixin, ReusedSQLTestCase):
     pass
diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py
index 3b2fb87123eba..52806f4f4a382 100644
--- a/python/pyspark/sql/tests/test_dataframe.py
+++ b/python/pyspark/sql/tests/test_dataframe.py
@@ -995,9 +995,9 @@ def test_observe(self):
         self.assertEqual(unnamed_observation.get, dict(rows=3))
 
         # observation requires name (if given) to be non empty string
-        with self.assertRaisesRegex(TypeError, "name should be a string"):
+        with self.assertRaisesRegex(TypeError, "`name` should be a str, got int"):
             Observation(123)
-        with self.assertRaisesRegex(ValueError, "name should not be empty"):
+        with self.assertRaisesRegex(ValueError, "`name` must be a non empty string, got ''."):
             Observation("")
 
         # dataframe.observe requires at least one expr
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 698a54999d451..932a5a703ea91 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -50,7 +50,12 @@
 
 from pyspark.serializers import CloudPickleSerializer
 from pyspark.sql.utils import has_numpy, get_active_spark_context
-from pyspark.errors import PySparkNotImplementedError, PySparkTypeError, PySparkValueError
+from pyspark.errors import (
+    PySparkNotImplementedError,
+    PySparkTypeError,
+    PySparkValueError,
+    PySparkIndexError,
+)
 
 if has_numpy:
     import numpy as np
@@ -1042,7 +1047,10 @@ def __getitem__(self, key: Union[str, int]) -> StructField:
             try:
                 return self.fields[key]
             except IndexError:
-                raise IndexError("StructType index out of range")
+                raise PySparkIndexError(
+                    error_class="INDEX_OUT_OF_RANGE",
+                    message_parameters={"arg_name": "StructType", "index": str(key)},
+                )
         elif isinstance(key, slice):
             return StructType(self.fields[key])
         else:
diff --git a/python/pyspark/tests/test_worker.py b/python/pyspark/tests/test_worker.py
index 2d675811fb9bc..9e8a05d18347c 100644
--- a/python/pyspark/tests/test_worker.py
+++ b/python/pyspark/tests/test_worker.py
@@ -230,6 +230,7 @@ def conf(cls):
         _conf.set("spark.python.worker.faulthandler.enabled", "true")
         return _conf
 
+    @unittest.skipIf(sys.version_info > (3, 11), "SPARK-46130: Flaky with Python 3.12")
     def test_python_segfault(self):
         try:
 
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPlugin.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPlugin.scala
index 9f5acf2e7e45b..a2c4b9be0c56f 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPlugin.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPlugin.scala
@@ -150,14 +150,15 @@ class ExecutorRollDriverPlugin extends DriverPlugin with Logging {
    * Since we will choose only first item, the duplication is okay.
    */
   private def outliersFromMultipleDimensions(listWithoutDriver: Seq[v1.ExecutorSummary]) =
-    outliers(listWithoutDriver.filter(_.totalTasks > 0), e => e.totalDuration / e.totalTasks) ++
-      outliers(listWithoutDriver, e => e.totalDuration) ++
-      outliers(listWithoutDriver, e => e.totalGCTime) ++
-      outliers(listWithoutDriver, e => e.failedTasks) ++
-      outliers(listWithoutDriver, e => getPeakMetrics(e, "JVMHeapMemory")) ++
-      outliers(listWithoutDriver, e => getPeakMetrics(e, "JVMOffHeapMemory")) ++
-      outliers(listWithoutDriver, e => e.totalShuffleWrite) ++
-      outliers(listWithoutDriver, e => e.diskUsed)
+    outliers(listWithoutDriver.filter(_.totalTasks > 0),
+      e => (e.totalDuration / e.totalTasks).toFloat) ++
+      outliers(listWithoutDriver, e => e.totalDuration.toFloat) ++
+      outliers(listWithoutDriver, e => e.totalGCTime.toFloat) ++
+      outliers(listWithoutDriver, e => e.failedTasks.toFloat) ++
+      outliers(listWithoutDriver, e => getPeakMetrics(e, "JVMHeapMemory").toFloat) ++
+      outliers(listWithoutDriver, e => getPeakMetrics(e, "JVMOffHeapMemory").toFloat) ++
+      outliers(listWithoutDriver, e => e.totalShuffleWrite.toFloat) ++
+      outliers(listWithoutDriver, e => e.diskUsed.toFloat)
 
   /**
    * Return executors whose metrics is outstanding, '(value - mean) > 2-sigma'. This is
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index f8afbc81c1211..5d24870bbcda3 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -385,7 +385,10 @@ private[yarn] class YarnAllocator(
     this.hostToLocalTaskCountPerResourceProfileId = hostToLocalTaskCountPerResourceProfileId
 
     if (resourceProfileToTotalExecs.isEmpty) {
-      targetNumExecutorsPerResourceProfileId.clear()
+      // Set target executor number to 0 to cancel pending allocate request.
+      targetNumExecutorsPerResourceProfileId.keys.foreach { rp =>
+        targetNumExecutorsPerResourceProfileId(rp) = 0
+      }
       allocatorNodeHealthTracker.setSchedulerExcludedNodes(excludedNodes)
       true
     } else {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 0dc70c6c3947c..2cc813bd30556 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -513,6 +513,11 @@ trait ConditionalExpression extends Expression {
    */
   def alwaysEvaluatedInputs: Seq[Expression]
 
+  /**
+   * Return a copy of itself with a new `alwaysEvaluatedInputs`.
+   */
+  def withNewAlwaysEvaluatedInputs(alwaysEvaluatedInputs: Seq[Expression]): ConditionalExpression
+
   /**
    * Return groups of branches. For each group, at least one branch will be hit at runtime,
    * so that we can eagerly evaluate the common expressions of a group.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
index 28a7db51621fd..9ee2f2bb41417 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
@@ -56,6 +56,10 @@ case class If(predicate: Expression, trueValue: Expression, falseValue: Expressi
    */
   override def alwaysEvaluatedInputs: Seq[Expression] = predicate :: Nil
 
+  override def withNewAlwaysEvaluatedInputs(alwaysEvaluatedInputs: Seq[Expression]): If = {
+    copy(predicate = alwaysEvaluatedInputs.head)
+  }
+
   override def branchGroups: Seq[Seq[Expression]] = Seq(Seq(trueValue, falseValue))
 
   final override val nodePatterns : Seq[TreePattern] = Seq(IF)
@@ -165,8 +169,15 @@ case class CaseWhen(
 
   final override val nodePatterns : Seq[TreePattern] = Seq(CASE_WHEN)
 
-  override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression =
-    super.legacyWithNewChildren(newChildren)
+  override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): CaseWhen = {
+    if (newChildren.length % 2 == 0) {
+      copy(branches = newChildren.grouped(2).map { case Seq(a, b) => (a, b) }.toSeq)
+    } else {
+      copy(
+        branches = newChildren.dropRight(1).grouped(2).map { case Seq(a, b) => (a, b) }.toSeq,
+        elseValue = newChildren.lastOption)
+    }
+  }
 
   // both then and else expressions should be considered.
   @transient
@@ -213,6 +224,10 @@ case class CaseWhen(
    */
   override def alwaysEvaluatedInputs: Seq[Expression] = children.head :: Nil
 
+  override def withNewAlwaysEvaluatedInputs(alwaysEvaluatedInputs: Seq[Expression]): CaseWhen = {
+    withNewChildrenInternal(alwaysEvaluatedInputs.toIndexedSeq ++ children.drop(1))
+  }
+
   override def branchGroups: Seq[Seq[Expression]] = {
     // We look at subexpressions in conditions and values of `CaseWhen` separately. It is
     // because a subexpression in conditions will be run no matter which condition is matched
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
index 0e9e375b8acf8..4ccb369f5e2b2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
@@ -70,6 +70,10 @@ case class Coalesce(children: Seq[Expression])
    */
   override def alwaysEvaluatedInputs: Seq[Expression] = children.head :: Nil
 
+  override def withNewAlwaysEvaluatedInputs(alwaysEvaluatedInputs: Seq[Expression]): Coalesce = {
+    withNewChildrenInternal(alwaysEvaluatedInputs.toIndexedSeq ++ children.drop(1))
+  }
+
   override def branchGroups: Seq[Seq[Expression]] = if (children.length > 1) {
     // If there is only one child, the first child is already covered by
     // `alwaysEvaluatedInputs` and we should exclude it here.
@@ -290,6 +294,10 @@ case class NaNvl(left: Expression, right: Expression)
    */
   override def alwaysEvaluatedInputs: Seq[Expression] = left :: Nil
 
+  override def withNewAlwaysEvaluatedInputs(alwaysEvaluatedInputs: Seq[Expression]): NaNvl = {
+    copy(left = alwaysEvaluatedInputs.head)
+  }
+
   override def branchGroups: Seq[Seq[Expression]] = Seq(children)
 
   override def eval(input: InternalRow): Any = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 0d3239423b22c..90cfd13875d0c 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -2685,18 +2685,26 @@ case class StringDecode(bin: Expression, charset: Expression)
   since = "1.5.0",
   group = "string_funcs")
 // scalastyle:on line.size.limit
-case class Encode(value: Expression, charset: Expression)
+case class Encode(value: Expression, charset: Expression, legacyCharsets: Boolean)
   extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
+  def this(value: Expression, charset: Expression) =
+    this(value, charset, SQLConf.get.legacyJavaCharsets)
+
   override def left: Expression = value
   override def right: Expression = charset
   override def dataType: DataType = BinaryType
   override def inputTypes: Seq[DataType] = Seq(StringType, StringType)
 
+  private val supportedCharsets = Set(
+    "US-ASCII", "ISO-8859-1", "UTF-8", "UTF-16BE", "UTF-16LE", "UTF-16")
+
   protected override def nullSafeEval(input1: Any, input2: Any): Any = {
     val toCharset = input2.asInstanceOf[UTF8String].toString
     try {
-      input1.asInstanceOf[UTF8String].toString.getBytes(toCharset)
+      if (legacyCharsets || supportedCharsets.contains(toCharset.toUpperCase(Locale.ROOT))) {
+        input1.asInstanceOf[UTF8String].toString.getBytes(toCharset)
+      } else throw new UnsupportedEncodingException
     } catch {
       case _: UnsupportedEncodingException =>
         throw QueryExecutionErrors.invalidCharsetError(prettyName, toCharset)
@@ -2706,10 +2714,17 @@ case class Encode(value: Expression, charset: Expression)
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     nullSafeCodeGen(ctx, ev, (string, charset) => {
       val toCharset = ctx.freshName("toCharset")
+      val sc = JavaCode.global(
+        ctx.addReferenceObj("supportedCharsets", supportedCharsets),
+        supportedCharsets.getClass)
       s"""
         String $toCharset = $charset.toString();
         try {
-          ${ev.value} = $string.toString().getBytes($toCharset);
+          if ($legacyCharsets || $sc.contains($toCharset.toUpperCase(java.util.Locale.ROOT))) {
+            ${ev.value} = $string.toString().getBytes($toCharset);
+          } else {
+            throw new java.io.UnsupportedEncodingException();
+          }
         } catch (java.io.UnsupportedEncodingException e) {
           throw QueryExecutionErrors.invalidCharsetError("$prettyName", $toCharset);
         }"""
@@ -2720,6 +2735,10 @@ case class Encode(value: Expression, charset: Expression)
     newLeft: Expression, newRight: Expression): Encode = copy(value = newLeft, charset = newRight)
 }
 
+object Encode {
+  def apply(value: Expression, charset: Expression): Encode = new Encode(value, charset)
+}
+
 /**
  * Converts the input expression to a binary value based on the supplied format.
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
index e02b286061861..e01457ff10255 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
@@ -75,7 +75,8 @@ class JacksonGenerator(
   private val gen = {
     val generator = new JsonFactory().createGenerator(writer).setRootValueSeparator(null)
     if (options.pretty) {
-      generator.setPrettyPrinter(new DefaultPrettyPrinter(""))
+      generator.setPrettyPrinter(
+        new DefaultPrettyPrinter(PrettyPrinter.DEFAULT_SEPARATORS.withRootSeparator("")))
     }
     if (options.writeNonAsciiCharacterAsCodePoint) {
       generator.setHighestNonEscapedChar(0x7F)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteWithExpression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteWithExpression.scala
index c5bd71b4a7d1f..cf2c77069a195 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteWithExpression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteWithExpression.scala
@@ -19,7 +19,8 @@ package org.apache.spark.sql.catalyst.optimizer
 
 import scala.collection.mutable
 
-import org.apache.spark.sql.catalyst.expressions.{Alias, CommonExpressionDef, CommonExpressionRef, Expression, With}
+import org.apache.spark.SparkException
+import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreePattern.{COMMON_EXPR_REF, WITH_EXPRESSION}
@@ -35,56 +36,92 @@ object RewriteWithExpression extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = {
     plan.transformWithPruning(_.containsPattern(WITH_EXPRESSION)) {
       case p if p.expressions.exists(_.containsPattern(WITH_EXPRESSION)) =>
-        var newChildren = p.children
-        var newPlan: LogicalPlan = p.transformExpressionsUp {
-          case With(child, defs) =>
-            val refToExpr = mutable.HashMap.empty[Long, Expression]
-            val childProjections = Array.fill(newChildren.size)(mutable.ArrayBuffer.empty[Alias])
+        val inputPlans = p.children.toArray
+        var newPlan: LogicalPlan = p.mapExpressions { expr =>
+          rewriteWithExprAndInputPlans(expr, inputPlans)
+        }
+        newPlan = newPlan.withNewChildren(inputPlans.toIndexedSeq)
+        if (p.output == newPlan.output) {
+          newPlan
+        } else {
+          Project(p.output, newPlan)
+        }
+    }
+  }
+
+  private def rewriteWithExprAndInputPlans(
+      e: Expression,
+      inputPlans: Array[LogicalPlan]): Expression = {
+    if (!e.containsPattern(WITH_EXPRESSION)) return e
+    e match {
+      case w: With =>
+        // Rewrite nested With expressions first
+        val child = rewriteWithExprAndInputPlans(w.child, inputPlans)
+        val defs = w.defs.map(rewriteWithExprAndInputPlans(_, inputPlans))
+        val refToExpr = mutable.HashMap.empty[Long, Expression]
+        val childProjections = Array.fill(inputPlans.length)(mutable.ArrayBuffer.empty[Alias])
+
+        defs.zipWithIndex.foreach { case (CommonExpressionDef(child, id), index) =>
+          if (child.containsPattern(COMMON_EXPR_REF)) {
+            throw SparkException.internalError(
+              "Common expression definition cannot reference other Common expression definitions")
+          }
 
-            defs.zipWithIndex.foreach { case (CommonExpressionDef(child, id), index) =>
-              if (CollapseProject.isCheap(child)) {
-                refToExpr(id) = child
-              } else {
-                val childProjectionIndex = newChildren.indexWhere(
-                  c => child.references.subsetOf(c.outputSet)
-                )
-                if (childProjectionIndex == -1) {
-                  // When we cannot rewrite the common expressions, force to inline them so that the
-                  // query can still run. This can happen if the join condition contains `With` and
-                  // the common expression references columns from both join sides.
-                  // TODO: things can go wrong if the common expression is nondeterministic. We
-                  //       don't fix it for now to match the old buggy behavior when certain
-                  //       `RuntimeReplaceable` did not use the `With` expression.
-                  // TODO: we should calculate the ref count and also inline the common expression
-                  //       if it's ref count is 1.
-                  refToExpr(id) = child
-                } else {
-                  val alias = Alias(child, s"_common_expr_$index")()
-                  childProjections(childProjectionIndex) += alias
-                  refToExpr(id) = alias.toAttribute
-                }
-              }
+          if (CollapseProject.isCheap(child)) {
+            refToExpr(id) = child
+          } else {
+            val childProjectionIndex = inputPlans.indexWhere(
+              c => child.references.subsetOf(c.outputSet)
+            )
+            if (childProjectionIndex == -1) {
+              // When we cannot rewrite the common expressions, force to inline them so that the
+              // query can still run. This can happen if the join condition contains `With` and
+              // the common expression references columns from both join sides.
+              // TODO: things can go wrong if the common expression is nondeterministic. We
+              //       don't fix it for now to match the old buggy behavior when certain
+              //       `RuntimeReplaceable` did not use the `With` expression.
+              // TODO: we should calculate the ref count and also inline the common expression
+              //       if it's ref count is 1.
+              refToExpr(id) = child
+            } else {
+              val alias = Alias(child, s"_common_expr_$index")()
+              childProjections(childProjectionIndex) += alias
+              refToExpr(id) = alias.toAttribute
             }
+          }
+        }
+
+        for (i <- inputPlans.indices) {
+          val projectList = childProjections(i)
+          if (projectList.nonEmpty) {
+            inputPlans(i) = Project(inputPlans(i).output ++ projectList, inputPlans(i))
+          }
+        }
 
-            newChildren = newChildren.zip(childProjections).map { case (child, projections) =>
-              if (projections.nonEmpty) {
-                Project(child.output ++ projections, child)
-              } else {
-                child
-              }
+        child.transformWithPruning(_.containsPattern(COMMON_EXPR_REF)) {
+          case ref: CommonExpressionRef =>
+            if (!refToExpr.contains(ref.id)) {
+              throw SparkException.internalError("Undefined common expression id " + ref.id)
             }
+            refToExpr(ref.id)
+        }
 
+      case c: ConditionalExpression =>
+        val newAlwaysEvaluatedInputs = c.alwaysEvaluatedInputs.map(
+          rewriteWithExprAndInputPlans(_, inputPlans))
+        val newExpr = c.withNewAlwaysEvaluatedInputs(newAlwaysEvaluatedInputs)
+        // Use transformUp to handle nested With.
+        newExpr.transformUpWithPruning(_.containsPattern(WITH_EXPRESSION)) {
+          case With(child, defs) =>
+            // For With in the conditional branches, they may not be evaluated at all and we can't
+            // pull the common expressions into a project which will always be evaluated. Inline it.
+            val refToExpr = defs.map(d => d.id -> d.child).toMap
             child.transformWithPruning(_.containsPattern(COMMON_EXPR_REF)) {
               case ref: CommonExpressionRef => refToExpr(ref.id)
             }
         }
 
-        newPlan = newPlan.withNewChildren(newChildren)
-        if (p.output == newPlan.output) {
-          newPlan
-        } else {
-          Project(p.output, newPlan)
-        }
+      case other => other.mapChildren(rewriteWithExprAndInputPlans(_, inputPlans))
     }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index ff59e60482dc6..9bd0f58e3df83 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -1127,7 +1127,8 @@ case class Range(
           val upperBinValue = getRangeValue(math.max(upperIndexPos, 0))
           val ndv = math.max(upperIndexPos - lowerIndexPos, 1)
           // Update the lowerIndex and lowerBinValue with upper ones for the next iteration.
-          (upperIndex, upperBinValue, binAr :+ HistogramBin(lowerBinValue, upperBinValue, ndv))
+          (upperIndex, upperBinValue,
+            binAr :+ HistogramBin(lowerBinValue.toDouble, upperBinValue.toDouble, ndv))
       }
     Histogram(height, binArray.toArray)
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala
index d645929eea7d8..7083014f1f38f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala
@@ -337,7 +337,7 @@ object EstimationUtils {
               lo = right.lo,
               hi = right.hi,
               leftNdv = left.ndv * leftRatio,
-              rightNdv = right.ndv,
+              rightNdv = right.ndv.toDouble,
               leftNumRows = leftHeight * leftRatio,
               rightNumRows = rightHeight
             )
@@ -350,7 +350,7 @@ object EstimationUtils {
             OverlappedRange(
               lo = left.lo,
               hi = left.hi,
-              leftNdv = left.ndv,
+              leftNdv = left.ndv.toDouble,
               rightNdv = right.ndv * rightRatio,
               leftNumRows = leftHeight,
               rightNumRows = rightHeight * rightRatio
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala
index 62b6ebde4e09a..7b0e6ddd330dc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala
@@ -299,7 +299,7 @@ class QuantileSummaries(
           result(pos) = sampled.last.value
         } else {
           val (newIndex, newMinRank, approxQuantile) =
-            findApproxQuantile(index, minRank, targetError, percentile)
+            findApproxQuantile(index, minRank, targetError.toDouble, percentile)
           index = newIndex
           minRank = newMinRank
           result(pos) = approxQuantile
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 1aa25a51fa9c6..24332479f1937 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -190,7 +190,8 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
       messageParameters = Map(
         "functionName" -> toSQLId(functionName),
         "signature" -> inputTypes,
-        "result" -> outputType),
+        "result" -> outputType,
+        "reason" -> e.toString),
       cause = e)
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 6a8e1f92fc510..d4e5c6a3d1e04 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -4577,6 +4577,22 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val STACK_TRACES_IN_DATAFRAME_CONTEXT = buildConf("spark.sql.stackTracesInDataFrameContext")
+    .doc("The number of non-Spark stack traces in the captured DataFrame query context.")
+    .version("4.0.0")
+    .intConf
+    .checkValue(_ > 0, "The number of stack traces in the DataFrame context must be positive.")
+    .createWithDefault(1)
+
+  val LEGACY_JAVA_CHARSETS = buildConf("spark.sql.legacy.javaCharsets")
+    .internal()
+    .doc("When set to true, the functions like `encode()` can use charsets from JDK while " +
+      "encoding or decoding string values. If it is false, such functions support only one of " +
+      "the charsets: 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16'.")
+    .version("4.0.0")
+    .booleanConf
+    .createWithDefault(false)
+
   /**
    * Holds information about keys that have been deprecated.
    *
@@ -5465,6 +5481,10 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
   def legacyRaiseErrorWithoutErrorClass: Boolean =
     getConf(SQLConf.LEGACY_RAISE_ERROR_WITHOUT_ERROR_CLASS)
 
+  def stackTracesInDataFrameContext: Int = getConf(SQLConf.STACK_TRACES_IN_DATAFRAME_CONTEXT)
+
+  def legacyJavaCharsets: Boolean = getConf(SQLConf.LEGACY_JAVA_CHARSETS)
+
   /** ********************** SQLConf functionality methods ************ */
 
   /** Set Spark SQL configuration properties. */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDFSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDFSuite.scala
index 1b40e02aa8662..00fc9d462eb65 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDFSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDFSuite.scala
@@ -50,13 +50,15 @@ class ScalaUDFSuite extends SparkFunSuite with ExpressionEvalHelper {
       Literal.create(null, StringType) :: Nil,
       Option(resolvedEncoder[String]()) :: Nil)
 
+    val pattern = "User defined function .+ failed due to: java.lang.NullPointerException".r
+
     val e1 = intercept[SparkException](udf.eval())
-    assert(e1.getMessage.contains("Failed to execute user defined function"))
+    assert(pattern.findFirstIn(e1.getMessage).isDefined)
 
     val e2 = intercept[SparkException] {
       checkEvaluationWithUnsafeProjection(udf, null)
     }
-    assert(e2.getMessage.contains("Failed to execute user defined function"))
+    assert(pattern.findFirstIn(e2.getMessage).isDefined)
   }
 
   test("SPARK-22695: ScalaUDF should not use global variables") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteWithExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteWithExpressionSuite.scala
index c625379eb5ffd..a386e9bf4efe6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteWithExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteWithExpressionSuite.scala
@@ -17,9 +17,10 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, CommonExpressionDef, CommonExpressionRef, With}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Coalesce, CommonExpressionDef, CommonExpressionRef, With}
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
@@ -57,7 +58,7 @@ class RewriteWithExpressionSuite extends PlanTest {
     )
   }
 
-  test("nested WITH expression") {
+  test("nested WITH expression in the definition expression") {
     val a = testRelation.output.head
     val commonExprDef = CommonExpressionDef(a + a)
     val ref = new CommonExpressionRef(commonExprDef)
@@ -85,6 +86,57 @@ class RewriteWithExpressionSuite extends PlanTest {
     )
   }
 
+  test("nested WITH expression in the main expression") {
+    val a = testRelation.output.head
+    val commonExprDef = CommonExpressionDef(a + a)
+    val ref = new CommonExpressionRef(commonExprDef)
+    val innerExpr = With(ref + ref, Seq(commonExprDef))
+    val innerCommonExprName = "_common_expr_0"
+
+    val b = testRelation.output.last
+    val outerCommonExprDef = CommonExpressionDef(b + b)
+    val outerRef = new CommonExpressionRef(outerCommonExprDef)
+    val outerExpr = With(outerRef * outerRef + innerExpr, Seq(outerCommonExprDef))
+    val outerCommonExprName = "_common_expr_0"
+
+    val plan = testRelation.select(outerExpr.as("col"))
+    val rewrittenInnerExpr = (a + a).as(innerCommonExprName)
+    val rewrittenOuterExpr = (b + b).as(outerCommonExprName)
+    val finalExpr = rewrittenOuterExpr.toAttribute * rewrittenOuterExpr.toAttribute +
+      (rewrittenInnerExpr.toAttribute + rewrittenInnerExpr.toAttribute)
+    comparePlans(
+      Optimizer.execute(plan),
+      testRelation
+        .select((testRelation.output :+ rewrittenInnerExpr): _*)
+        .select((testRelation.output :+ rewrittenInnerExpr.toAttribute :+ rewrittenOuterExpr): _*)
+        .select(finalExpr.as("col"))
+        .analyze
+    )
+  }
+
+  test("correlated nested WITH expression is not supported") {
+    val b = testRelation.output.last
+    val outerCommonExprDef = CommonExpressionDef(b + b)
+    val outerRef = new CommonExpressionRef(outerCommonExprDef)
+
+    val a = testRelation.output.head
+    // The inner expression definition references the outer expression
+    val commonExprDef1 = CommonExpressionDef(a + a + outerRef)
+    val ref1 = new CommonExpressionRef(commonExprDef1)
+    val innerExpr1 = With(ref1 + ref1, Seq(commonExprDef1))
+
+    val outerExpr1 = With(outerRef + innerExpr1, Seq(outerCommonExprDef))
+    intercept[SparkException](Optimizer.execute(testRelation.select(outerExpr1.as("col"))))
+
+    val commonExprDef2 = CommonExpressionDef(a + a)
+    val ref2 = new CommonExpressionRef(commonExprDef2)
+    // The inner main expression references the outer expression
+    val innerExpr2 = With(ref2 + outerRef, Seq(commonExprDef1))
+
+    val outerExpr2 = With(outerRef + innerExpr2, Seq(outerCommonExprDef))
+    intercept[SparkException](Optimizer.execute(testRelation.select(outerExpr2.as("col"))))
+  }
+
   test("WITH expression in filter") {
     val a = testRelation.output.head
     val commonExprDef = CommonExpressionDef(a + a)
@@ -154,4 +206,27 @@ class RewriteWithExpressionSuite extends PlanTest {
         )
     )
   }
+
+  test("WITH expression inside conditional expression") {
+    val a = testRelation.output.head
+    val commonExprDef = CommonExpressionDef(a + a)
+    val ref = new CommonExpressionRef(commonExprDef)
+    val expr = Coalesce(Seq(a, With(ref * ref, Seq(commonExprDef))))
+    val inlinedExpr = Coalesce(Seq(a, (a + a) * (a + a)))
+    val plan = testRelation.select(expr.as("col"))
+    // With in the conditional branches is always inlined.
+    comparePlans(Optimizer.execute(plan), testRelation.select(inlinedExpr.as("col")))
+
+    val expr2 = Coalesce(Seq(With(ref * ref, Seq(commonExprDef)), a))
+    val plan2 = testRelation.select(expr2.as("col"))
+    val commonExprName = "_common_expr_0"
+    // With in the always-evaluated branches can still be optimized.
+    comparePlans(
+      Optimizer.execute(plan2),
+      testRelation
+        .select((testRelation.output :+ (a + a).as(commonExprName)): _*)
+        .select(Coalesce(Seq(($"$commonExprName" * $"$commonExprName"), a)).as("col"))
+        .analyze
+    )
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index d36aaef558663..0c33f2c87fec2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -1244,7 +1244,7 @@ class Dataset[T] private[sql](
 
       withTypedPlan(JoinWith.typedJoinWith(
         joined,
-        sqlContext.conf.dataFrameSelfJoinAutoResolveAmbiguity,
+        sparkSession.sessionState.conf.dataFrameSelfJoinAutoResolveAmbiguity,
         sparkSession.sessionState.analyzer.resolver,
         this.exprEnc.isSerializedAsStructForTopLevel,
         other.exprEnc.isSerializedAsStructForTopLevel))
@@ -1450,7 +1450,7 @@ class Dataset[T] private[sql](
     case "*" =>
       Column(ResolvedStar(queryExecution.analyzed.output))
     case _ =>
-      if (sqlContext.conf.supportQuotedRegexColumnName) {
+      if (sparkSession.sessionState.conf.supportQuotedRegexColumnName) {
         colRegex(colName)
       } else {
         Column(addDataFrameIdToCol(resolve(colName)))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index a52de12e70c41..267581659d87b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -78,6 +78,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
 
   private[sql] def sessionState: SessionState = sparkSession.sessionState
   private[sql] def sharedState: SharedState = sparkSession.sharedState
+  @deprecated("Use SparkSession.sessionState.conf instead", "4.0.0")
   private[sql] def conf: SQLConf = sessionState.conf
 
   def sparkContext: SparkContext = sparkSession.sparkContext
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
index a94140dae5c0d..a8ec810fab3a2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
@@ -194,7 +194,7 @@ case class CreateDataSourceTableAsSelectCommand(
 
       result match {
         case fs: HadoopFsRelation if table.partitionColumnNames.nonEmpty &&
-            sparkSession.sqlContext.conf.manageFilesourcePartitions =>
+            sparkSession.sessionState.conf.manageFilesourcePartitions =>
           // Need to recover partitions into the metastore so our saved data is visible.
           sessionState.executePlan(RepairTableCommand(
             table.identifier,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 130872b10bcd1..7e001803592ff 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -715,7 +715,7 @@ case class RepairTableCommand(
       val total = partitionSpecsAndLocs.length
       logInfo(s"Found $total partitions in $root")
 
-      val partitionStats = if (spark.sqlContext.conf.gatherFastStats) {
+      val partitionStats = if (spark.sessionState.conf.gatherFastStats) {
         gatherPartitionStats(spark, partitionSpecsAndLocs, fs, pathFilter, threshold)
       } else {
         Map.empty[Path, PartitionStatistics]
@@ -957,7 +957,7 @@ object DDLUtils extends Logging {
   def verifyPartitionProviderIsHive(
       spark: SparkSession, table: CatalogTable, action: String): Unit = {
     val tableName = table.identifier.table
-    if (!spark.sqlContext.conf.manageFilesourcePartitions && isDatasourceTable(table)) {
+    if (!spark.sessionState.conf.manageFilesourcePartitions && isDatasourceTable(table)) {
       throw QueryCompilationErrors
         .actionNotAllowedOnTableWithFilesourcePartitionManagementDisabledError(action, tableName)
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 835308f3d0248..cebc74af724db 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -390,7 +390,7 @@ case class DataSource(
 
       // This is a non-streaming file based datasource.
       case (format: FileFormat, _) =>
-        val useCatalogFileIndex = sparkSession.sqlContext.conf.manageFilesourcePartitions &&
+        val useCatalogFileIndex = sparkSession.sessionState.conf.manageFilesourcePartitions &&
           catalogTable.isDefined && catalogTable.get.tracksPartitionsInCatalog &&
           catalogTable.get.partitionColumnNames.nonEmpty
         val (fileCatalog, dataSchema, partitionSchema) = if (useCatalogFileIndex) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileStatusCache.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileStatusCache.scala
index e1fdb9570732a..80002ecdaf8da 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileStatusCache.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileStatusCache.scala
@@ -41,12 +41,12 @@ object FileStatusCache {
    *         shared across all clients.
    */
   def getOrCreate(session: SparkSession): FileStatusCache = synchronized {
-    if (session.sqlContext.conf.manageFilesourcePartitions &&
-      session.sqlContext.conf.filesourcePartitionFileCacheSize > 0) {
+    if (session.sessionState.conf.manageFilesourcePartitions &&
+      session.sessionState.conf.filesourcePartitionFileCacheSize > 0) {
       if (sharedCache == null) {
         sharedCache = new SharedInMemoryCache(
-          session.sqlContext.conf.filesourcePartitionFileCacheSize,
-          session.sqlContext.conf.metadataCacheTTL
+          session.sessionState.conf.filesourcePartitionFileCacheSize,
+          session.sessionState.conf.metadataCacheTTL
         )
       }
       sharedCache.createForNewClient()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelation.scala
index fd1824055dcfd..a87453d3fd53a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelation.scala
@@ -65,7 +65,7 @@ case class HadoopFsRelation(
   }
 
   override def sizeInBytes: Long = {
-    val compressionFactor = sqlContext.conf.fileCompressionFactor
+    val compressionFactor = sparkSession.sessionState.conf.fileCompressionFactor
     (location.sizeInBytes * compressionFactor).toLong
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
index 37de04a59e4b0..dc41afe226b86 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
@@ -157,8 +157,8 @@ abstract class PartitioningAwareFileIndex(
         typeInference = sparkSession.sessionState.conf.partitionColumnTypeInferenceEnabled,
         basePaths = basePaths,
         userSpecifiedSchema = userSpecifiedSchema,
-        caseSensitive = sparkSession.sqlContext.conf.caseSensitiveAnalysis,
-        validatePartitionColumns = sparkSession.sqlContext.conf.validatePartitionColumns,
+        caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis,
+        validatePartitionColumns = sparkSession.sessionState.conf.validatePartitionColumns,
         timeZoneId = timeZoneId)
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcRelationProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcRelationProvider.scala
index 2760c7ac3019c..d9be1a1e3f674 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcRelationProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcRelationProvider.scala
@@ -32,11 +32,12 @@ class JdbcRelationProvider extends CreatableRelationProvider
       sqlContext: SQLContext,
       parameters: Map[String, String]): BaseRelation = {
     val jdbcOptions = new JDBCOptions(parameters)
-    val resolver = sqlContext.conf.resolver
-    val timeZoneId = sqlContext.conf.sessionLocalTimeZone
+    val sparkSession = sqlContext.sparkSession
+    val resolver = sparkSession.sessionState.conf.resolver
+    val timeZoneId = sparkSession.sessionState.conf.sessionLocalTimeZone
     val schema = JDBCRelation.getSchema(resolver, jdbcOptions)
     val parts = JDBCRelation.columnPartition(schema, resolver, timeZoneId, jdbcOptions)
-    JDBCRelation(schema, parts, jdbcOptions)(sqlContext.sparkSession)
+    JDBCRelation(schema, parts, jdbcOptions)(sparkSession)
   }
 
   override def createRelation(
@@ -45,7 +46,7 @@ class JdbcRelationProvider extends CreatableRelationProvider
       parameters: Map[String, String],
       df: DataFrame): BaseRelation = {
     val options = new JdbcOptionsInWrite(parameters)
-    val isCaseSensitive = sqlContext.conf.caseSensitiveAnalysis
+    val isCaseSensitive = sqlContext.sparkSession.sessionState.conf.caseSensitiveAnalysis
     val dialect = JdbcDialects.get(options.url)
     val conn = dialect.createConnectionFactory(options)(-1)
     try {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/RowLevelOperationRuntimeGroupFiltering.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/RowLevelOperationRuntimeGroupFiltering.scala
index b8288c636c386..7c28f91ee1cc6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/RowLevelOperationRuntimeGroupFiltering.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/RowLevelOperationRuntimeGroupFiltering.scala
@@ -51,7 +51,8 @@ class RowLevelOperationRuntimeGroupFiltering(optimizeSubqueries: Rule[LogicalPla
     // apply special dynamic filtering only for group-based row-level operations
     case GroupBasedRowLevelOperation(replaceData, _, Some(cond),
         DataSourceV2ScanRelation(_, scan: SupportsRuntimeV2Filtering, _, _, _))
-        if conf.runtimeRowLevelOperationGroupFilterEnabled && cond != TrueLiteral =>
+        if conf.runtimeRowLevelOperationGroupFilterEnabled && cond != TrueLiteral
+          && scan.filterAttributes().nonEmpty =>
 
       // use reference equality on scan to find required scan relations
       val newQuery = replaceData.query transformUp {
@@ -116,6 +117,7 @@ class RowLevelOperationRuntimeGroupFiltering(optimizeSubqueries: Rule[LogicalPla
       matchingRowsPlan: LogicalPlan,
       buildKeys: Seq[Attribute],
       pruningKeys: Seq[Attribute]): Expression = {
+    assert(buildKeys.nonEmpty && pruningKeys.nonEmpty)
 
     val buildQuery = Aggregate(buildKeys, buildKeys, matchingRowsPlan)
     DynamicPruningExpression(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index 3febce0fa4456..1bd59e818be57 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -66,7 +66,7 @@ class MicroBatchExecution(
         // When the flag is disabled, Spark will fall back to single batch execution, whenever
         // it figures out any source does not support Trigger.AvailableNow.
         // See SPARK-45178 for more details.
-        if (sparkSession.sqlContext.conf.getConf(
+        if (sparkSession.sessionState.conf.getConf(
             SQLConf.STREAMING_TRIGGER_AVAILABLE_NOW_WRAPPER_ENABLED)) {
           logInfo("Configured to use the wrapper of Trigger.AvailableNow for query " +
             s"$prettyIdString.")
@@ -113,7 +113,7 @@ class MicroBatchExecution(
     // transformation is responsible for replacing attributes with their final values.
 
     val disabledSources =
-      Utils.stringToSeq(sparkSession.sqlContext.conf.disabledV2StreamingMicroBatchReaders)
+      Utils.stringToSeq(sparkSession.sessionState.conf.disabledV2StreamingMicroBatchReaders)
 
     import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
     val _logicalPlan = analyzedPlan.transform {
@@ -144,7 +144,7 @@ class MicroBatchExecution(
           })
         } else if (v1.isEmpty) {
           throw QueryExecutionErrors.microBatchUnsupportedByDataSourceError(
-            srcName, sparkSession.sqlContext.conf.disabledV2StreamingMicroBatchReaders, table)
+            srcName, sparkSession.sessionState.conf.disabledV2StreamingMicroBatchReaders, table)
         } else {
           v2ToExecutionRelationMap.getOrElseUpdate(s, {
             // Materialize source to avoid creating it in every batch
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
index e70e94001eee0..ffdf9da6e5814 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
@@ -145,7 +145,7 @@ trait ProgressReporter extends Logging {
   private def addNewProgress(newProgress: StreamingQueryProgress): Unit = {
     progressBuffer.synchronized {
       progressBuffer += newProgress
-      while (progressBuffer.length >= sparkSession.sqlContext.conf.streamingProgressRetention) {
+      while (progressBuffer.length >= sparkSession.sessionState.conf.streamingProgressRetention) {
         progressBuffer.dequeue()
       }
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala
index dbddab2e9acdd..c1027db6ec77f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala
@@ -124,7 +124,7 @@ private[continuous] class EpochCoordinator(
   extends ThreadSafeRpcEndpoint with Logging {
 
   private val epochBacklogQueueSize =
-    session.sqlContext.conf.continuousStreamingEpochBacklogQueueSize
+    session.sessionState.conf.continuousStreamingEpochBacklogQueueSize
 
   private var queryWritesStopped: Boolean = false
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/package.scala
index 96bef83af0a86..877d9906a1cff 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/package.scala
@@ -22,6 +22,7 @@ import java.util.regex.Pattern
 import org.apache.spark.annotation.{DeveloperApi, Unstable}
 import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin}
 import org.apache.spark.sql.execution.SparkStrategy
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Allows the execution of relational queries, including those expressed in SQL using Spark.
@@ -103,7 +104,9 @@ package object sql {
       while (i < st.length && !sparkCode(st(i))) i += 1
       // Stop at the end of the first Spark code traces
       while (i < st.length && sparkCode(st(i))) i += 1
-      val origin = Origin(stackTrace = Some(st.slice(i - 1, i + 1)))
+      val origin = Origin(stackTrace = Some(st.slice(
+        from = i - 1,
+        until = i + SQLConf.get.stackTracesInDataFrameContext)))
       CurrentOrigin.withOrigin(origin)(f)
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
index d194ae77e968f..2911dfae46226 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
@@ -206,7 +206,7 @@ abstract class BaseRelation {
    *
    * @since 1.3.0
    */
-  def sizeInBytes: Long = sqlContext.conf.defaultSizeInBytes
+  def sizeInBytes: Long = sqlContext.sparkSession.sessionState.conf.defaultSizeInBytes
 
   /**
    * Whether does it need to convert the objects in Row to internal representation, for example:
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index 36dd168992a14..905c96ff4cbb0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -156,7 +156,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
       extraOptions + ("path" -> path.get)
     }
 
-    val ds = DataSource.lookupDataSource(source, sparkSession.sqlContext.conf).
+    val ds = DataSource.lookupDataSource(source, sparkSession.sessionState.conf).
       getConstructor().newInstance()
     // We need to generate the V1 data source so we can pass it to the V2 relation as a shim.
     // We can't be sure at this point whether we'll actually want to use V2, since we don't know the
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index 036afa62b4889..f4665f8ac6773 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -369,7 +369,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
     } else {
       val cls = DataSource.lookupDataSource(source, df.sparkSession.sessionState.conf)
       val disabledSources =
-        Utils.stringToSeq(df.sparkSession.sqlContext.conf.disabledV2StreamingWriters)
+        Utils.stringToSeq(df.sparkSession.sessionState.conf.disabledV2StreamingWriters)
       val useV1Source = disabledSources.contains(cls.getCanonicalName) ||
         // file source v2 does not support streaming yet.
         classOf[FileDataSourceV2].isAssignableFrom(cls)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala
index 26cdbcab79fe1..b8edebd8bac5d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala
@@ -415,10 +415,10 @@ private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab)
         withNumberInvalid { p.processedRowsPerSecond })), Array.empty[(Long, Double)])
     val inputRowsData = withNoProgress(query,
       query.recentProgress.map(p => (parseProgressTimestamp(p.timestamp),
-        withNumberInvalid { p.numInputRows })), Array.empty[(Long, Double)])
+        withNumberInvalid { p.numInputRows.toDouble })), Array.empty[(Long, Double)])
     val batchDurations = withNoProgress(query,
       query.recentProgress.map(p => (parseProgressTimestamp(p.timestamp),
-        withNumberInvalid { p.batchDuration })), Array.empty[(Long, Double)])
+        withNumberInvalid { p.batchDuration.toDouble })), Array.empty[(Long, Double)])
     val operationDurationData = withNoProgress(
       query,
       query.recentProgress.map { p =>
@@ -437,7 +437,7 @@ private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab)
         inputRateData.toImmutableArraySeq,
         minBatchTime,
         maxBatchTime,
-        minRecordRate,
+        minRecordRate.toDouble,
         maxRecordRate,
         "records/sec")
     graphUIDataForInputRate.generateDataJs(jsCollector)
@@ -449,7 +449,7 @@ private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab)
         processRateData.toImmutableArraySeq,
         minBatchTime,
         maxBatchTime,
-        minProcessRate,
+        minProcessRate.toDouble,
         maxProcessRate,
         "records/sec")
     graphUIDataForProcessRate.generateDataJs(jsCollector)
@@ -461,8 +461,8 @@ private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab)
         inputRowsData.toImmutableArraySeq,
         minBatchTime,
         maxBatchTime,
-        minRows,
-        maxRows,
+        minRows.toDouble,
+        maxRows.toDouble,
         "records")
     graphUIDataForInputRows.generateDataJs(jsCollector)
 
@@ -473,8 +473,8 @@ private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab)
         batchDurations.toImmutableArraySeq,
         minBatchTime,
         maxBatchTime,
-        minBatchDuration,
-        maxBatchDuration,
+        minBatchDuration.toDouble,
+        maxBatchDuration.toDouble,
         "ms")
     graphUIDataForBatchDuration.generateDataJs(jsCollector)
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/string-functions.sql.out
index 9c210a713de3d..9d8705e3e8620 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/string-functions.sql.out
@@ -384,21 +384,21 @@ Project [btrim(xyxtrimyyx, xy) AS btrim(xyxtrimyyx, xy)#x]
 -- !query
 SELECT btrim(encode(" xyz ", 'utf-8'))
 -- !query analysis
-Project [btrim(encode( xyz , utf-8)) AS btrim(encode( xyz , utf-8))#x]
+Project [btrim(encode( xyz , utf-8, false)) AS btrim(encode( xyz , utf-8))#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT btrim(encode('yxTomxx', 'utf-8'), encode('xyz', 'utf-8'))
 -- !query analysis
-Project [btrim(encode(yxTomxx, utf-8), encode(xyz, utf-8)) AS btrim(encode(yxTomxx, utf-8), encode(xyz, utf-8))#x]
+Project [btrim(encode(yxTomxx, utf-8, false), encode(xyz, utf-8, false)) AS btrim(encode(yxTomxx, utf-8), encode(xyz, utf-8))#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT btrim(encode('xxxbarxxx', 'utf-8'), encode('x', 'utf-8'))
 -- !query analysis
-Project [btrim(encode(xxxbarxxx, utf-8), encode(x, utf-8)) AS btrim(encode(xxxbarxxx, utf-8), encode(x, utf-8))#x]
+Project [btrim(encode(xxxbarxxx, utf-8, false), encode(x, utf-8, false)) AS btrim(encode(xxxbarxxx, utf-8), encode(x, utf-8))#x]
 +- OneRowRelation
 
 
@@ -640,17 +640,59 @@ Project [rpad(cast(0x57 as string), 5, abc) AS rpad(X'57', 5, abc)#x]
 +- OneRowRelation
 
 
+-- !query
+set spark.sql.legacy.javaCharsets=true
+-- !query analysis
+SetCommand (spark.sql.legacy.javaCharsets,Some(true))
+
+
+-- !query
+select encode('hello', 'WINDOWS-1252')
+-- !query analysis
+Project [encode(hello, WINDOWS-1252, true) AS encode(hello, WINDOWS-1252)#x]
++- OneRowRelation
+
+
+-- !query
+select encode(scol, ecol) from values('hello', 'WINDOWS-1252') as t(scol, ecol)
+-- !query analysis
+Project [encode(scol#x, ecol#x, true) AS encode(scol, ecol)#x]
++- SubqueryAlias t
+   +- LocalRelation [scol#x, ecol#x]
+
+
+-- !query
+set spark.sql.legacy.javaCharsets=false
+-- !query analysis
+SetCommand (spark.sql.legacy.javaCharsets,Some(false))
+
+
+-- !query
+select encode('hello', 'WINDOWS-1252')
+-- !query analysis
+Project [encode(hello, WINDOWS-1252, false) AS encode(hello, WINDOWS-1252)#x]
++- OneRowRelation
+
+
+-- !query
+select encode(scol, ecol) from values('hello', 'WINDOWS-1252') as t(scol, ecol)
+-- !query analysis
+Project [encode(scol#x, ecol#x, false) AS encode(scol, ecol)#x]
++- SubqueryAlias t
+   +- LocalRelation [scol#x, ecol#x]
+
+
 -- !query
 select encode('hello', 'Windows-xxx')
 -- !query analysis
-Project [encode(hello, Windows-xxx) AS encode(hello, Windows-xxx)#x]
+Project [encode(hello, Windows-xxx, false) AS encode(hello, Windows-xxx)#x]
 +- OneRowRelation
 
 
 -- !query
 select encode(scol, ecol) from values('hello', 'Windows-xxx') as t(scol, ecol)
 -- !query analysis
-Project [encode(scol#x, ecol#x) AS encode(scol, ecol)#x]
+Project [encode(scol#x, ecol#x, false) AS encode(scol, ecol)#x]
 +- SubqueryAlias t
    +- LocalRelation [scol#x, ecol#x]
 
@@ -704,7 +746,7 @@ org.apache.spark.sql.AnalysisException
 -- !query
 select decode(encode('abc', 'utf-8'), 'utf-8')
 -- !query analysis
-Project [decode(encode(abc, utf-8), utf-8) AS decode(encode(abc, utf-8), utf-8)#x]
+Project [decode(encode(abc, utf-8, false), utf-8) AS decode(encode(abc, utf-8), utf-8)#x]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/string-functions.sql.out
index 9c210a713de3d..9d8705e3e8620 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/string-functions.sql.out
@@ -384,21 +384,21 @@ Project [btrim(xyxtrimyyx, xy) AS btrim(xyxtrimyyx, xy)#x]
 -- !query
 SELECT btrim(encode(" xyz ", 'utf-8'))
 -- !query analysis
-Project [btrim(encode( xyz , utf-8)) AS btrim(encode( xyz , utf-8))#x]
+Project [btrim(encode( xyz , utf-8, false)) AS btrim(encode( xyz , utf-8))#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT btrim(encode('yxTomxx', 'utf-8'), encode('xyz', 'utf-8'))
 -- !query analysis
-Project [btrim(encode(yxTomxx, utf-8), encode(xyz, utf-8)) AS btrim(encode(yxTomxx, utf-8), encode(xyz, utf-8))#x]
+Project [btrim(encode(yxTomxx, utf-8, false), encode(xyz, utf-8, false)) AS btrim(encode(yxTomxx, utf-8), encode(xyz, utf-8))#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT btrim(encode('xxxbarxxx', 'utf-8'), encode('x', 'utf-8'))
 -- !query analysis
-Project [btrim(encode(xxxbarxxx, utf-8), encode(x, utf-8)) AS btrim(encode(xxxbarxxx, utf-8), encode(x, utf-8))#x]
+Project [btrim(encode(xxxbarxxx, utf-8, false), encode(x, utf-8, false)) AS btrim(encode(xxxbarxxx, utf-8), encode(x, utf-8))#x]
 +- OneRowRelation
 
 
@@ -640,17 +640,59 @@ Project [rpad(cast(0x57 as string), 5, abc) AS rpad(X'57', 5, abc)#x]
 +- OneRowRelation
 
 
+-- !query
+set spark.sql.legacy.javaCharsets=true
+-- !query analysis
+SetCommand (spark.sql.legacy.javaCharsets,Some(true))
+
+
+-- !query
+select encode('hello', 'WINDOWS-1252')
+-- !query analysis
+Project [encode(hello, WINDOWS-1252, true) AS encode(hello, WINDOWS-1252)#x]
++- OneRowRelation
+
+
+-- !query
+select encode(scol, ecol) from values('hello', 'WINDOWS-1252') as t(scol, ecol)
+-- !query analysis
+Project [encode(scol#x, ecol#x, true) AS encode(scol, ecol)#x]
++- SubqueryAlias t
+   +- LocalRelation [scol#x, ecol#x]
+
+
+-- !query
+set spark.sql.legacy.javaCharsets=false
+-- !query analysis
+SetCommand (spark.sql.legacy.javaCharsets,Some(false))
+
+
+-- !query
+select encode('hello', 'WINDOWS-1252')
+-- !query analysis
+Project [encode(hello, WINDOWS-1252, false) AS encode(hello, WINDOWS-1252)#x]
++- OneRowRelation
+
+
+-- !query
+select encode(scol, ecol) from values('hello', 'WINDOWS-1252') as t(scol, ecol)
+-- !query analysis
+Project [encode(scol#x, ecol#x, false) AS encode(scol, ecol)#x]
++- SubqueryAlias t
+   +- LocalRelation [scol#x, ecol#x]
+
+
 -- !query
 select encode('hello', 'Windows-xxx')
 -- !query analysis
-Project [encode(hello, Windows-xxx) AS encode(hello, Windows-xxx)#x]
+Project [encode(hello, Windows-xxx, false) AS encode(hello, Windows-xxx)#x]
 +- OneRowRelation
 
 
 -- !query
 select encode(scol, ecol) from values('hello', 'Windows-xxx') as t(scol, ecol)
 -- !query analysis
-Project [encode(scol#x, ecol#x) AS encode(scol, ecol)#x]
+Project [encode(scol#x, ecol#x, false) AS encode(scol, ecol)#x]
 +- SubqueryAlias t
    +- LocalRelation [scol#x, ecol#x]
 
@@ -704,7 +746,7 @@ org.apache.spark.sql.AnalysisException
 -- !query
 select decode(encode('abc', 'utf-8'), 'utf-8')
 -- !query analysis
-Project [decode(encode(abc, utf-8), utf-8) AS decode(encode(abc, utf-8), utf-8)#x]
+Project [decode(encode(abc, utf-8, false), utf-8) AS decode(encode(abc, utf-8), utf-8)#x]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/concat.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/concat.sql.out
index 676737a4fea8e..1b19753b1f6de 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/concat.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/concat.sql.out
@@ -11,7 +11,7 @@ FROM (
 -- !query analysis
 Project [concat(concat(cast(col1#xL as string), col2#x), cast(col3#x as string)) AS col#x]
 +- SubqueryAlias __auto_generated_subquery_name
-   +- Project [id#xL AS col1#xL, cast((id#xL + cast(1 as bigint)) as string) AS col2#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8) AS col3#x]
+   +- Project [id#xL AS col1#xL, cast((id#xL + cast(1 as bigint)) as string) AS col2#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8, false) AS col3#x]
       +- Range (0, 10, step=1, splits=None)
 
 
@@ -29,7 +29,7 @@ FROM (
 -- !query analysis
 Project [concat(concat(concat(col1#x, cast(col2#xL as string)), concat(col3#x, cast(col4#x as string))), cast(col5#x as string)) AS col#x]
 +- SubqueryAlias __auto_generated_subquery_name
-   +- Project [prefix_ AS col1#x, id#xL AS col2#xL, cast((id#xL + cast(1 as bigint)) as string) AS col3#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8) AS col4#x, cast(id#xL as double) AS col5#x]
+   +- Project [prefix_ AS col1#x, id#xL AS col2#xL, cast((id#xL + cast(1 as bigint)) as string) AS col3#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8, false) AS col4#x, cast(id#xL as double) AS col5#x]
       +- Range (0, 10, step=1, splits=None)
 
 
@@ -46,7 +46,7 @@ FROM (
 -- !query analysis
 Project [concat(concat(col1#x, col2#x), cast(concat(col3#x, col4#x) as string)) AS col#x]
 +- SubqueryAlias __auto_generated_subquery_name
-   +- Project [cast(id#xL as string) AS col1#x, cast((id#xL + cast(1 as bigint)) as string) AS col2#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8) AS col3#x, encode(cast((id#xL + cast(3 as bigint)) as string), utf-8) AS col4#x]
+   +- Project [cast(id#xL as string) AS col1#x, cast((id#xL + cast(1 as bigint)) as string) AS col2#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8, false) AS col3#x, encode(cast((id#xL + cast(3 as bigint)) as string), utf-8, false) AS col4#x]
       +- Range (0, 10, step=1, splits=None)
 
 
@@ -67,7 +67,7 @@ FROM (
 -- !query analysis
 Project [concat(cast(col1#x as string), cast(col2#x as string)) AS col#x]
 +- SubqueryAlias __auto_generated_subquery_name
-   +- Project [encode(cast(id#xL as string), utf-8) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8) AS col2#x]
+   +- Project [encode(cast(id#xL as string), utf-8, false) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8, false) AS col2#x]
       +- Range (0, 10, step=1, splits=None)
 
 
@@ -84,7 +84,7 @@ FROM (
 -- !query analysis
 Project [concat(concat(concat(cast(col1#x as string), cast(col2#x as string)), cast(col3#x as string)), cast(col4#x as string)) AS col#x]
 +- SubqueryAlias __auto_generated_subquery_name
-   +- Project [encode(cast(id#xL as string), utf-8) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8) AS col2#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8) AS col3#x, encode(cast((id#xL + cast(3 as bigint)) as string), utf-8) AS col4#x]
+   +- Project [encode(cast(id#xL as string), utf-8, false) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8, false) AS col2#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8, false) AS col3#x, encode(cast((id#xL + cast(3 as bigint)) as string), utf-8, false) AS col4#x]
       +- Range (0, 10, step=1, splits=None)
 
 
@@ -101,7 +101,7 @@ FROM (
 -- !query analysis
 Project [concat(concat(cast(col1#x as string), cast(col2#x as string)), concat(cast(col3#x as string), cast(col4#x as string))) AS col#x]
 +- SubqueryAlias __auto_generated_subquery_name
-   +- Project [encode(cast(id#xL as string), utf-8) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8) AS col2#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8) AS col3#x, encode(cast((id#xL + cast(3 as bigint)) as string), utf-8) AS col4#x]
+   +- Project [encode(cast(id#xL as string), utf-8, false) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8, false) AS col2#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8, false) AS col3#x, encode(cast((id#xL + cast(3 as bigint)) as string), utf-8, false) AS col4#x]
       +- Range (0, 10, step=1, splits=None)
 
 
@@ -122,7 +122,7 @@ FROM (
 -- !query analysis
 Project [concat(col1#x, col2#x) AS col#x]
 +- SubqueryAlias __auto_generated_subquery_name
-   +- Project [encode(cast(id#xL as string), utf-8) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8) AS col2#x]
+   +- Project [encode(cast(id#xL as string), utf-8, false) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8, false) AS col2#x]
       +- Range (0, 10, step=1, splits=None)
 
 
@@ -139,7 +139,7 @@ FROM (
 -- !query analysis
 Project [concat(concat(concat(col1#x, col2#x), col3#x), col4#x) AS col#x]
 +- SubqueryAlias __auto_generated_subquery_name
-   +- Project [encode(cast(id#xL as string), utf-8) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8) AS col2#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8) AS col3#x, encode(cast((id#xL + cast(3 as bigint)) as string), utf-8) AS col4#x]
+   +- Project [encode(cast(id#xL as string), utf-8, false) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8, false) AS col2#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8, false) AS col3#x, encode(cast((id#xL + cast(3 as bigint)) as string), utf-8, false) AS col4#x]
       +- Range (0, 10, step=1, splits=None)
 
 
@@ -156,7 +156,7 @@ FROM (
 -- !query analysis
 Project [concat(concat(col1#x, col2#x), concat(col3#x, col4#x)) AS col#x]
 +- SubqueryAlias __auto_generated_subquery_name
-   +- Project [encode(cast(id#xL as string), utf-8) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8) AS col2#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8) AS col3#x, encode(cast((id#xL + cast(3 as bigint)) as string), utf-8) AS col4#x]
+   +- Project [encode(cast(id#xL as string), utf-8, false) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8, false) AS col2#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8, false) AS col3#x, encode(cast((id#xL + cast(3 as bigint)) as string), utf-8, false) AS col4#x]
       +- Range (0, 10, step=1, splits=None)
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/elt.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/elt.sql.out
index 5a9b5ddbafa39..4d897a329cfe1 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/elt.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/elt.sql.out
@@ -13,7 +13,7 @@ FROM (
 -- !query analysis
 Project [elt(2, col1#x, cast(col2#xL as string), col3#x, cast(col4#x as string), cast(col5#x as string), false) AS col#x]
 +- SubqueryAlias __auto_generated_subquery_name
-   +- Project [prefix_ AS col1#x, id#xL AS col2#xL, cast((id#xL + cast(1 as bigint)) as string) AS col3#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8) AS col4#x, cast(id#xL as double) AS col5#x]
+   +- Project [prefix_ AS col1#x, id#xL AS col2#xL, cast((id#xL + cast(1 as bigint)) as string) AS col3#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8, false) AS col4#x, cast(id#xL as double) AS col5#x]
       +- Range (0, 10, step=1, splits=None)
 
 
@@ -30,7 +30,7 @@ FROM (
 -- !query analysis
 Project [elt(3, col1#x, col2#x, cast(col3#x as string), cast(col4#x as string), false) AS col#x]
 +- SubqueryAlias __auto_generated_subquery_name
-   +- Project [cast(id#xL as string) AS col1#x, cast((id#xL + cast(1 as bigint)) as string) AS col2#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8) AS col3#x, encode(cast((id#xL + cast(3 as bigint)) as string), utf-8) AS col4#x]
+   +- Project [cast(id#xL as string) AS col1#x, cast((id#xL + cast(1 as bigint)) as string) AS col2#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8, false) AS col3#x, encode(cast((id#xL + cast(3 as bigint)) as string), utf-8, false) AS col4#x]
       +- Range (0, 10, step=1, splits=None)
 
 
@@ -51,7 +51,7 @@ FROM (
 -- !query analysis
 Project [elt(1, cast(col1#x as string), cast(col2#x as string), false) AS col#x]
 +- SubqueryAlias __auto_generated_subquery_name
-   +- Project [encode(cast(id#xL as string), utf-8) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8) AS col2#x]
+   +- Project [encode(cast(id#xL as string), utf-8, false) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8, false) AS col2#x]
       +- Range (0, 10, step=1, splits=None)
 
 
@@ -72,5 +72,5 @@ FROM (
 -- !query analysis
 Project [elt(2, col1#x, col2#x, false) AS col#x]
 +- SubqueryAlias __auto_generated_subquery_name
-   +- Project [encode(cast(id#xL as string), utf-8) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8) AS col2#x]
+   +- Project [encode(cast(id#xL as string), utf-8, false) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8, false) AS col2#x]
       +- Range (0, 10, step=1, splits=None)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
index 0fbf211ec5c5e..645f6bcb8327c 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
@@ -118,6 +118,12 @@ SELECT rpad('abc', 5, x'57');
 SELECT rpad(x'57', 5, 'abc');
 
 -- encode
+set spark.sql.legacy.javaCharsets=true;
+select encode('hello', 'WINDOWS-1252');
+select encode(scol, ecol) from values('hello', 'WINDOWS-1252') as t(scol, ecol);
+set spark.sql.legacy.javaCharsets=false;
+select encode('hello', 'WINDOWS-1252');
+select encode(scol, ecol) from values('hello', 'WINDOWS-1252') as t(scol, ecol);
 select encode('hello', 'Windows-xxx');
 select encode(scol, ecol) from values('hello', 'Windows-xxx') as t(scol, ecol);
 
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
index 082ff03efacb3..89bb20fc1bff4 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
@@ -803,6 +803,72 @@ struct<rpad(X'57', 5, abc):string>
 Wabca
 
 
+-- !query
+set spark.sql.legacy.javaCharsets=true
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.javaCharsets	true
+
+
+-- !query
+select encode('hello', 'WINDOWS-1252')
+-- !query schema
+struct<encode(hello, WINDOWS-1252):binary>
+-- !query output
+hello
+
+
+-- !query
+select encode(scol, ecol) from values('hello', 'WINDOWS-1252') as t(scol, ecol)
+-- !query schema
+struct<encode(scol, ecol):binary>
+-- !query output
+hello
+
+
+-- !query
+set spark.sql.legacy.javaCharsets=false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.javaCharsets	false
+
+
+-- !query
+select encode('hello', 'WINDOWS-1252')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "INVALID_PARAMETER_VALUE.CHARSET",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "charset" : "WINDOWS-1252",
+    "functionName" : "`encode`",
+    "parameter" : "`charset`"
+  }
+}
+
+
+-- !query
+select encode(scol, ecol) from values('hello', 'WINDOWS-1252') as t(scol, ecol)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "INVALID_PARAMETER_VALUE.CHARSET",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "charset" : "WINDOWS-1252",
+    "functionName" : "`encode`",
+    "parameter" : "`charset`"
+  }
+}
+
+
 -- !query
 select encode('hello', 'Windows-xxx')
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
index 7914092037887..6d90a50915788 100644
--- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
@@ -735,6 +735,72 @@ struct<rpad(X'57', 5, abc):string>
 Wabca
 
 
+-- !query
+set spark.sql.legacy.javaCharsets=true
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.javaCharsets	true
+
+
+-- !query
+select encode('hello', 'WINDOWS-1252')
+-- !query schema
+struct<encode(hello, WINDOWS-1252):binary>
+-- !query output
+hello
+
+
+-- !query
+select encode(scol, ecol) from values('hello', 'WINDOWS-1252') as t(scol, ecol)
+-- !query schema
+struct<encode(scol, ecol):binary>
+-- !query output
+hello
+
+
+-- !query
+set spark.sql.legacy.javaCharsets=false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.javaCharsets	false
+
+
+-- !query
+select encode('hello', 'WINDOWS-1252')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "INVALID_PARAMETER_VALUE.CHARSET",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "charset" : "WINDOWS-1252",
+    "functionName" : "`encode`",
+    "parameter" : "`charset`"
+  }
+}
+
+
+-- !query
+select encode(scol, ecol) from values('hello', 'WINDOWS-1252') as t(scol, ecol)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "INVALID_PARAMETER_VALUE.CHARSET",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "charset" : "WINDOWS-1252",
+    "functionName" : "`encode`",
+    "parameter" : "`charset`"
+  }
+}
+
+
 -- !query
 select encode('hello', 'Windows-xxx')
 -- !query schema
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
index 8b5ffe560a1fa..da04674b99205 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
@@ -193,8 +193,8 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
         """.stripMargin)
       checkKeywordsExistsInExplain(df2,
         "Project [concat(cast(id#xL as string), cast((id#xL + 1) as string), " +
-          "cast(encode(cast((id#xL + 2) as string), utf-8) as string), " +
-          "cast(encode(cast((id#xL + 3) as string), utf-8) as string)) AS col#x]")
+          "cast(encode(cast((id#xL + 2) as string), utf-8, false) as string), " +
+          "cast(encode(cast((id#xL + 3) as string), utf-8, false) as string)) AS col#x]")
 
       val df3 = sql(
         """
@@ -209,8 +209,8 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
         """.stripMargin)
       checkKeywordsExistsInExplain(df3,
         "Project [concat(cast(id#xL as string), " +
-          "cast(encode(cast((id#xL + 2) as string), utf-8) as string), " +
-          "cast(encode(cast((id#xL + 3) as string), utf-8) as string)) AS col#x]")
+          "cast(encode(cast((id#xL + 2) as string), utf-8, false) as string), " +
+          "cast(encode(cast((id#xL + 3) as string), utf-8, false) as string)) AS col#x]")
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala
index 04e47ac4a1132..87eb35ee3e506 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala
@@ -139,11 +139,11 @@ abstract class StatisticsCollectionTestBase extends QueryTest with SQLTestUtils
       Some(Histogram(1, Array(HistogramBin(d1Internal, d1Internal, 1),
         HistogramBin(d1Internal, d2Internal, 1))))))
     colStats.update("ctimestamp", stats("ctimestamp").copy(histogram =
-      Some(Histogram(1, Array(HistogramBin(t1Internal, t1Internal, 1),
-        HistogramBin(t1Internal, t2Internal, 1))))))
+      Some(Histogram(1, Array(HistogramBin(t1Internal.toDouble, t1Internal.toDouble, 1),
+        HistogramBin(t1Internal.toDouble, t2Internal.toDouble, 1))))))
     colStats.update("ctimestamp_ntz", stats("ctimestamp_ntz").copy(histogram =
-      Some(Histogram(1, Array(HistogramBin(t1Internal, t1Internal, 1),
-        HistogramBin(t1Internal, t2Internal, 1))))))
+      Some(Histogram(1, Array(HistogramBin(t1Internal.toDouble, t1Internal.toDouble, 1),
+        HistogramBin(t1Internal.toDouble, t2Internal.toDouble, 1))))))
     colStats
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoTableSuiteBase.scala
index e7555c23fa4fc..5668e5981910c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoTableSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoTableSuiteBase.scala
@@ -32,6 +32,38 @@ abstract class MergeIntoTableSuiteBase extends RowLevelOperationSuiteBase {
 
   import testImplicits._
 
+  test("SPARK-45974: merge into non filter attributes table") {
+    val tableName: String = "cat.ns1.non_partitioned_table"
+    withTable(tableName) {
+      withTempView("source") {
+        val sourceRows = Seq(
+          (1, 100, "hr"),
+          (2, 200, "finance"),
+          (3, 300, "hr"))
+        sourceRows.toDF("pk", "salary", "dep").createOrReplaceTempView("source")
+
+        sql(s"CREATE TABLE $tableName (pk INT NOT NULL, salary INT, dep STRING)".stripMargin)
+
+        val df = sql(
+          s"""MERGE INTO $tableName t
+             |USING (select * from source) s
+             |ON t.pk = s.pk
+             |WHEN MATCHED THEN
+             | UPDATE SET t.salary = s.salary
+             |WHEN NOT MATCHED THEN
+             | INSERT *
+             |""".stripMargin)
+
+        checkAnswer(
+          sql(s"SELECT * FROM $tableName"),
+          Seq(
+            Row(1, 100, "hr"), // insert
+            Row(2, 200, "finance"), // insert
+            Row(3, 300, "hr"))) // insert
+      }
+    }
+  }
+
   test("merge into empty table with NOT MATCHED clause") {
     withTempView("source") {
       createTable("pk INT NOT NULL, salary INT, dep STRING")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryContextSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryContextSuite.scala
index 7d57eeb01bfa1..426822da3c912 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryContextSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryContextSuite.scala
@@ -25,14 +25,17 @@ import org.apache.spark.sql.test.SharedSparkSession
 class QueryContextSuite extends QueryTest with SharedSparkSession {
 
   test("summary of DataFrame context") {
-    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+    withSQLConf(
+      SQLConf.ANSI_ENABLED.key -> "true",
+      SQLConf.STACK_TRACES_IN_DATAFRAME_CONTEXT.key -> "2") {
       val e = intercept[SparkArithmeticException] {
         spark.range(1).select(lit(1) / lit(0)).collect()
       }
       assert(e.getQueryContext.head.summary() ==
         """== DataFrame ==
           |"div" was called from
-          |org.apache.spark.sql.errors.QueryContextSuite.$anonfun$new$3(QueryContextSuite.scala:30)
+          |org.apache.spark.sql.errors.QueryContextSuite.$anonfun$new$3(QueryContextSuite.scala:32)
+          |org.scalatest.Assertions.intercept(Assertions.scala:749)
           |""".stripMargin)
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
index a49352cbe5080..1e869bfd25aa9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
@@ -431,7 +431,8 @@ class QueryExecutionErrorsSuite
       parameters = Map(
         "functionName" -> functionNameRegex,
         "signature" -> "string, int",
-        "result" -> "string"),
+        "result" -> "string",
+        "reason" -> "java.lang.StringIndexOutOfBoundsException: begin 5, end 6, length 5"),
       matchPVals = true)
   }
 
@@ -455,7 +456,8 @@ class QueryExecutionErrorsSuite
       errorClass = "FAILED_EXECUTE_UDF",
       parameters = Map("functionName" -> functionNameRegex,
         "signature" -> "string, int",
-        "result" -> "string"),
+        "result" -> "string",
+        "reason" -> "java.lang.StringIndexOutOfBoundsException: begin 5, end 6, length 5"),
       matchPVals = true)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/PassThroughEncodingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/PassThroughEncodingSuite.scala
index 9395c402fa905..39b76ede73d20 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/PassThroughEncodingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/PassThroughEncodingSuite.scala
@@ -178,7 +178,7 @@ class PassThroughSuite extends SparkFunSuite {
         case SHORT => Seq(2: Short, 1: Short, 2: Short, nullValue.toShort: Short, 5: Short)
         case INT => Seq(2: Int, 1: Int, 2: Int, nullValue: Int, 5: Int)
         case LONG => Seq(2: Long, 1: Long, 2: Long, nullValue: Long, 5: Long)
-        case FLOAT => Seq(2: Float, 1: Float, 2: Float, nullValue: Float, 5: Float)
+        case FLOAT => Seq(2: Float, 1: Float, 2: Float, nullValue.toFloat: Float, 5: Float)
         case DOUBLE => Seq(2: Double, 1: Double, 2: Double, nullValue: Double, 5: Double)
       }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala
index a0d11e2ce7ae1..cd6f41b4ef45e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala
@@ -61,7 +61,7 @@ class ParquetEncodingSuite extends ParquetCompatibilityTest with SharedSparkSess
         List.fill(n)(ROW).toDF().repartition(1).write.parquet(dir.getCanonicalPath)
         val file = TestUtils.listDirectory(dir).head
 
-        val conf = sqlContext.conf
+        val conf = spark.sessionState.conf
         val reader = new VectorizedParquetRecordReader(
           conf.offHeapColumnVectorEnabled, conf.parquetVectorizedReaderBatchSize)
         reader.initialize(file, null)
@@ -91,7 +91,7 @@ class ParquetEncodingSuite extends ParquetCompatibilityTest with SharedSparkSess
         data.repartition(1).write.parquet(dir.getCanonicalPath)
         val file = TestUtils.listDirectory(dir).head
 
-        val conf = sqlContext.conf
+        val conf = spark.sessionState.conf
         val reader = new VectorizedParquetRecordReader(
           conf.offHeapColumnVectorEnabled, conf.parquetVectorizedReaderBatchSize)
         reader.initialize(file, null)
@@ -125,7 +125,7 @@ class ParquetEncodingSuite extends ParquetCompatibilityTest with SharedSparkSess
         data.toDF("f").coalesce(1).write.parquet(dir.getCanonicalPath)
         val file = TestUtils.listDirectory(dir).head
 
-        val conf = sqlContext.conf
+        val conf = spark.sessionState.conf
         val reader = new VectorizedParquetRecordReader(
           conf.offHeapColumnVectorEnabled, conf.parquetVectorizedReaderBatchSize)
         reader.initialize(file, null /* set columns to null to project all columns */)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
index c064f49c31225..1efa8221e41f4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
@@ -1375,7 +1375,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
       spark.createDataFrame(data).repartition(1).write.parquet(dir.getCanonicalPath)
       val file = TestUtils.listDirectory(dir).head;
       {
-        val conf = sqlContext.conf
+        val conf = spark.sessionState.conf
         val reader = new VectorizedParquetRecordReader(
           conf.offHeapColumnVectorEnabled, conf.parquetVectorizedReaderBatchSize)
         try {
@@ -1394,7 +1394,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
 
       // Project just one column
       {
-        val conf = sqlContext.conf
+        val conf = spark.sessionState.conf
         val reader = new VectorizedParquetRecordReader(
           conf.offHeapColumnVectorEnabled, conf.parquetVectorizedReaderBatchSize)
         try {
@@ -1412,7 +1412,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
 
       // Project columns in opposite order
       {
-        val conf = sqlContext.conf
+        val conf = spark.sessionState.conf
         val reader = new VectorizedParquetRecordReader(
           conf.offHeapColumnVectorEnabled, conf.parquetVectorizedReaderBatchSize)
         try {
@@ -1431,7 +1431,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
 
       // Empty projection
       {
-        val conf = sqlContext.conf
+        val conf = spark.sessionState.conf
         val reader = new VectorizedParquetRecordReader(
           conf.offHeapColumnVectorEnabled, conf.parquetVectorizedReaderBatchSize)
         try {
@@ -1473,7 +1473,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
 
       dataTypes.zip(constantValues).foreach { case (dt, v) =>
         val schema = StructType(StructField("pcol", dt) :: Nil)
-        val conf = sqlContext.conf
+        val conf = spark.sessionState.conf
         val vectorizedReader = new VectorizedParquetRecordReader(
           conf.offHeapColumnVectorEnabled, conf.parquetVectorizedReaderBatchSize)
         val partitionValues = new GenericInternalRow(Array(v))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RatePerMicroBatchProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RatePerMicroBatchProviderSuite.scala
index 48f90e34890cf..01599bb92869e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RatePerMicroBatchProviderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RatePerMicroBatchProviderSuite.scala
@@ -29,7 +29,7 @@ class RatePerMicroBatchProviderSuite extends StreamTest {
   import testImplicits._
 
   test("RatePerMicroBatchProvider in registry") {
-    val ds = DataSource.lookupDataSource("rate-micro-batch", spark.sqlContext.conf)
+    val ds = DataSource.lookupDataSource("rate-micro-batch", spark.sessionState.conf)
       .getConstructor().newInstance()
     assert(ds.isInstanceOf[RatePerMicroBatchProvider], "Could not find rate-micro-batch source")
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
index 556782d9c5541..051cf9e17b782 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
@@ -56,7 +56,7 @@ class RateStreamProviderSuite extends StreamTest {
   }
 
   test("RateStreamProvider in registry") {
-    val ds = DataSource.lookupDataSource("rate", spark.sqlContext.conf)
+    val ds = DataSource.lookupDataSource("rate", spark.sessionState.conf)
       .getConstructor().newInstance()
     assert(ds.isInstanceOf[RateStreamProvider], "Could not find rate source")
   }
@@ -64,7 +64,7 @@ class RateStreamProviderSuite extends StreamTest {
   test("compatible with old path in registry") {
     val ds = DataSource.lookupDataSource(
       "org.apache.spark.sql.execution.streaming.RateSourceProvider",
-      spark.sqlContext.conf).getConstructor().newInstance()
+      spark.sessionState.conf).getConstructor().newInstance()
     assert(ds.isInstanceOf[RateStreamProvider], "Could not find rate source")
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
index 92dd3a996801d..06cb5be2add6a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
@@ -87,7 +87,7 @@ class TextSocketStreamSuite extends StreamTest with SharedSparkSession {
   test("backward compatibility with old path") {
     val ds = DataSource.lookupDataSource(
       "org.apache.spark.sql.execution.streaming.TextSocketSourceProvider",
-      spark.sqlContext.conf).getConstructor().newInstance()
+      spark.sessionState.conf).getConstructor().newInstance()
     assert(ds.isInstanceOf[TextSocketSourceProvider], "Could not find socket source")
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala
index b0abcbbe4d020..16f3e972c7697 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala
@@ -317,7 +317,7 @@ class SymmetricHashJoinStateManagerSuite extends StreamTest with BeforeAndAfter
     withTempDir { file =>
       withSQLConf(SQLConf.STATE_STORE_SKIP_NULLS_FOR_STREAM_STREAM_JOINS.key ->
         skipNullsForStreamStreamJoins.toString) {
-        val storeConf = new StateStoreConf(spark.sqlContext.conf)
+        val storeConf = new StateStoreConf(spark.sessionState.conf)
         val stateInfo = StatefulOperatorStateInfo(file.getAbsolutePath, UUID.randomUUID, 0, 0, 5)
         val manager = new SymmetricHashJoinStateManager(
           LeftSide, inputValueAttribs, joinKeyExprs, Some(stateInfo), storeConf, new Configuration,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index e759ef01e2c73..8655c0a3c29ce 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -1329,7 +1329,7 @@ class JDBCSuite extends QueryTest with SharedSparkSession {
     val df = spark.createDataset(Seq("a", "b", "c")).toDF("order")
     val schema = JdbcUtils.schemaString(
       df.schema,
-      df.sqlContext.conf.caseSensitiveAnalysis,
+      df.sparkSession.sessionState.conf.caseSensitiveAnalysis,
       "jdbc:mysql://localhost:3306/temp")
     assert(schema.contains("`order` LONGTEXT"))
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
index ccb202085910a..f904d0e3d3c81 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
@@ -417,7 +417,7 @@ class JDBCWriteSuite extends SharedSparkSession with BeforeAndAfter {
 
       assert(JdbcUtils.schemaString(
         schema,
-        spark.sqlContext.conf.caseSensitiveAnalysis,
+        spark.sessionState.conf.caseSensitiveAnalysis,
         url1,
         Option(createTableColTypes)) == expectedSchemaStr)
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
index 746f289c39327..898e80df0207d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
@@ -449,7 +449,7 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils with Adapti
           joined.sort("bucketed_table1.k", "bucketed_table2.k"),
           df1.join(df2, joinCondition(df1, df2), joinType).sort("df1.k", "df2.k"))
 
-        val joinOperator = if (joined.sqlContext.conf.adaptiveExecutionEnabled) {
+        val joinOperator = if (joined.sparkSession.sessionState.conf.adaptiveExecutionEnabled) {
           val executedPlan =
             joined.queryExecution.executedPlan.asInstanceOf[AdaptiveSparkPlanExec].executedPlan
           assert(executedPlan.isInstanceOf[SortMergeJoinExec])
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
index 36ee3226087f3..b0041b5ee9896 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
@@ -74,14 +74,15 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche
     // Make sure `largeValue` will cause overflow if we use a Long sum to calc avg.
     assert(largeValue * largeValue != BigInt(largeValue) * BigInt(largeValue))
     val stats =
-      EventTimeStats(max = largeValue, min = largeValue, avg = largeValue, count = largeValue - 1)
+      EventTimeStats(
+        max = largeValue, min = largeValue, avg = largeValue.toDouble, count = largeValue - 1)
     stats.add(largeValue)
     stats.avg should be (largeValue.toDouble +- epsilon)
 
     val stats2 = EventTimeStats(
       max = largeValue + 1,
       min = largeValue,
-      avg = largeValue + 1,
+      avg = largeValue + 1.0,
       count = largeValue)
     stats.merge(stats2)
     stats.avg should be ((largeValue + 0.5) +- epsilon)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
index 953bbddf6abbb..883f64ff7af4b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
@@ -1317,7 +1317,7 @@ class StreamSuite extends StreamTest {
           .map(_.asInstanceOf[RepartitionByExpression].numPartitions)
         // Before the fix of SPARK-34482, the numPartition is the value of
         // `COALESCE_PARTITIONS_INITIAL_PARTITION_NUM`.
-        assert(numPartition.get === spark.sqlContext.conf.getConf(SQLConf.SHUFFLE_PARTITIONS))
+        assert(numPartition.get === spark.sessionState.conf.getConf(SQLConf.SHUFFLE_PARTITIONS))
       } finally {
         if (query != null) {
           query.stop()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
index be84640f4bf36..8d79cf4af7717 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
@@ -708,7 +708,7 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
       .groupBy("group")
       .agg(collect_list("value"))
     testStream(df, outputMode = OutputMode.Update)(
-      AddData(input, (1 to spark.sqlContext.conf.objectAggSortBasedFallbackThreshold): _*),
+      AddData(input, (1 to spark.sessionState.conf.objectAggSortBasedFallbackThreshold): _*),
       AssertOnQuery { q =>
         q.processAllAvailable()
         true
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
index 4692c685c80bd..e05cb4d3c35ce 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
@@ -618,7 +618,7 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
           }
         }
 
-        val numPartitions = spark.sqlContext.conf.getConf(SQLConf.SHUFFLE_PARTITIONS)
+        val numPartitions = spark.sessionState.conf.getConf(SQLConf.SHUFFLE_PARTITIONS)
 
         assert(query.lastExecution.executedPlan.collect {
           case j @ StreamingSymmetricHashJoinExec(_, _, _, _, _, _, _, _, _,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueuedDataReaderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueuedDataReaderSuite.scala
index 55b884573f647..dac9e760e4be4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueuedDataReaderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueuedDataReaderSuite.scala
@@ -93,8 +93,8 @@ class ContinuousQueuedDataReaderSuite extends StreamTest with MockitoSugar {
       partitionReader,
       new StructType().add("i", "int"),
       mockContext,
-      dataQueueSize = sqlContext.conf.continuousStreamingExecutorQueueSize,
-      epochPollIntervalMs = sqlContext.conf.continuousStreamingExecutorPollIntervalMs)
+      dataQueueSize = spark.sessionState.conf.continuousStreamingExecutorQueueSize,
+      epochPollIntervalMs = spark.sessionState.conf.continuousStreamingExecutorPollIntervalMs)
 
     (queue, reader)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
index 1a4862bf9781d..e77ba92fe2981 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
@@ -422,10 +422,10 @@ class StreamingDataSourceV2Suite extends StreamTest {
 
   for ((read, write, trigger) <- cases) {
     testQuietly(s"stream with read format $read, write format $write, trigger $trigger") {
-      val sourceTable = DataSource.lookupDataSource(read, spark.sqlContext.conf).getConstructor()
+      val sourceTable = DataSource.lookupDataSource(read, spark.sessionState.conf).getConstructor()
         .newInstance().asInstanceOf[SimpleTableProvider].getTable(CaseInsensitiveStringMap.empty())
 
-      val sinkTable = DataSource.lookupDataSource(write, spark.sqlContext.conf).getConstructor()
+      val sinkTable = DataSource.lookupDataSource(write, spark.sessionState.conf).getConstructor()
         .newInstance().asInstanceOf[SimpleTableProvider].getTable(CaseInsensitiveStringMap.empty())
 
       import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
index e70d05820c34d..f8c592a943a04 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -49,11 +49,13 @@ private[hive] class SparkExecuteStatementOperation(
   with SparkOperation
   with Logging {
 
+  val session = sqlContext.sparkSession
+
   // If a timeout value `queryTimeout` is specified by users and it is smaller than
   // a global timeout value, we use the user-specified value.
   // This code follows the Hive timeout behaviour (See #29933 for details).
   private val timeout = {
-    val globalTimeout = sqlContext.conf.getConf(SQLConf.THRIFTSERVER_QUERY_TIMEOUT)
+    val globalTimeout = session.sessionState.conf.getConf(SQLConf.THRIFTSERVER_QUERY_TIMEOUT)
     if (globalTimeout > 0 && (queryTimeout <= 0 || globalTimeout < queryTimeout)) {
       globalTimeout
     } else {
@@ -61,13 +63,13 @@ private[hive] class SparkExecuteStatementOperation(
     }
   }
 
-  private val forceCancel = sqlContext.conf.getConf(SQLConf.THRIFTSERVER_FORCE_CANCEL)
+  private val forceCancel = session.sessionState.conf.getConf(SQLConf.THRIFTSERVER_FORCE_CANCEL)
 
   private val redactedStatement = {
-    val substitutorStatement = SQLConf.withExistingConf(sqlContext.conf) {
+    val substitutorStatement = SQLConf.withExistingConf(session.sessionState.conf) {
       new VariableSubstitution().substitute(statement)
     }
-    SparkUtils.redact(sqlContext.conf.stringRedactionPattern, substitutorStatement)
+    SparkUtils.redact(session.sessionState.conf.stringRedactionPattern, substitutorStatement)
   }
 
   private var result: DataFrame = _
@@ -259,7 +261,7 @@ private[hive] class SparkExecuteStatementOperation(
           e match {
             case _: HiveSQLException => throw e
             case _ => throw HiveThriftServerErrors.runningQueryError(
-              e, sqlContext.conf.errorMessageFormat)
+              e, sqlContext.sparkSession.sessionState.conf.errorMessageFormat)
           }
         }
     } finally {
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index 5b76cd653e372..73290a4d25928 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -284,7 +284,8 @@ private[hive] object SparkSQLCLIDriver extends Logging {
     var prefix = ""
 
     def currentDB = {
-      if (!SparkSQLEnv.sqlContext.conf.getConf(LEGACY_EMPTY_CURRENT_DB_IN_CLI)) {
+      if (!SparkSQLEnv.sqlContext.sparkSession.sessionState.conf
+        .getConf(LEGACY_EMPTY_CURRENT_DB_IN_CLI)) {
         s" (${SparkSQLEnv.sqlContext.sparkSession.catalog.currentDatabase})"
       } else {
         ReflectionUtils.invokeStatic(classOf[CliDriver], "getFormattedDb",
@@ -448,7 +449,8 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
   }
 
   override def setHiveVariables(hiveVariables: java.util.Map[String, String]): Unit = {
-    hiveVariables.asScala.foreach(kv => SparkSQLEnv.sqlContext.conf.setConfString(kv._1, kv._2))
+    hiveVariables.asScala.foreach(kv =>
+      SparkSQLEnv.sqlContext.sparkSession.sessionState.conf.setConfString(kv._1, kv._2))
   }
 
   def printMasterAndAppId(): Unit = {
@@ -504,7 +506,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
 
           ret = rc.getResponseCode
           if (ret != 0) {
-            val format = SparkSQLEnv.sqlContext.conf.errorMessageFormat
+            val format = SparkSQLEnv.sqlContext.sparkSession.sessionState.conf.errorMessageFormat
             val e = rc.getException
             val msg = e match {
               case st: SparkThrowable with Throwable => SparkThrowableHelper.getMessage(st, format)
@@ -523,7 +525,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
           val res = new JArrayList[String]()
 
           if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CLI_PRINT_HEADER) ||
-              SparkSQLEnv.sqlContext.conf.cliPrintHeader) {
+              SparkSQLEnv.sqlContext.sparkSession.sessionState.conf.cliPrintHeader) {
             // Print the column names.
             Option(driver.getSchema.getFieldSchemas).foreach { fields =>
               out.println(fields.asScala.map(_.getName).mkString("\t"))
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
index 4834956f478d0..5d9ec3051dc35 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
@@ -61,7 +61,7 @@ private[hive] class SparkSQLDriver(val context: SQLContext = SparkSQLEnv.sqlCont
 
   override def run(command: String): CommandProcessorResponse = {
     try {
-      val substitutorCommand = SQLConf.withExistingConf(context.conf) {
+      val substitutorCommand = SQLConf.withExistingConf(context.sparkSession.sessionState.conf) {
         new VariableSubstitution().substitute(command)
       }
       context.sparkContext.setJobDescription(substitutorCommand)
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
index b6528ac62419d..7acc485b01e57 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
@@ -58,7 +58,7 @@ private[hive] class SparkSQLSessionManager(hiveServer: HiveServer2, sqlContext:
       val session = super.getSession(sessionHandle)
       HiveThriftServer2.eventManager.onSessionCreated(
         session.getIpAddress, sessionHandle.getSessionId.toString, session.getUsername)
-      val ctx = if (sqlContext.conf.hiveThriftServerSingleSession) {
+      val ctx = if (sqlContext.sparkSession.sessionState.conf.hiveThriftServerSingleSession) {
         sqlContext
       } else {
         sqlContext.newSession()
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 08f69aecdd2ea..e7d03b82274c3 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -194,7 +194,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
     val tableIdentifier =
       QualifiedTableName(relation.tableMeta.database, relation.tableMeta.identifier.table)
 
-    val lazyPruningEnabled = sparkSession.sqlContext.conf.manageFilesourcePartitions
+    val lazyPruningEnabled = sparkSession.sessionState.conf.manageFilesourcePartitions
     val tablePath = new Path(relation.tableMeta.location)
     val fileFormat = fileFormatClass.getConstructor().newInstance()
     val bucketSpec = relation.tableMeta.bucketSpec
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
index 3813071b680c9..096b11feb9bcd 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
@@ -754,7 +754,9 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
             "functionName" ->
               "`org`.`apache`.`hadoop`.`hive`.`ql`.`udf`.`generic`.`GenericUDFAssertTrue`",
             "signature" -> "boolean",
-            "result" -> "void"))
+            "result" -> "void",
+            "reason" ->
+              "org.apache.hadoop.hive.ql.metadata.HiveException: ASSERT_TRUE(): assertion failed."))
       }
     }
   }
@@ -778,6 +780,13 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
       withTable("HiveSimpleUDFTable") {
         sql(s"create table HiveSimpleUDFTable as select false as v")
         val df = sql("SELECT CodeGenHiveSimpleUDF(v) from HiveSimpleUDFTable")
+
+        val reason = """
+          |org.apache.hadoop.hive.ql.metadata.HiveException: Unable to execute method public
+          |boolean org.apache.spark.sql.hive.execution.SimpleUDFAssertTrue.evaluate(boolean) with
+          |arguments {false}:ASSERT_TRUE(): assertion failed."""
+          .stripMargin.replaceAll("\n", " ").trim
+
         checkError(
           exception = intercept[SparkException](df.collect()).getCause.asInstanceOf[SparkException],
           errorClass = "FAILED_EXECUTE_UDF",
@@ -785,7 +794,8 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
             "functionName" ->
               "`org`.`apache`.`spark`.`sql`.`hive`.`execution`.`SimpleUDFAssertTrue`",
             "signature" -> "boolean",
-            "result" -> "boolean"
+            "result" -> "boolean",
+            "reason" -> reason
           )
         )
       }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
index a9861dafda723..d4847ee830f57 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -70,7 +70,9 @@ object TestHive
         // LocalRelation will exercise the optimization rules better by disabling it as
         // this rule may potentially block testing of other optimization rules such as
         // ConstantPropagation etc.
-        .set(SQLConf.OPTIMIZER_EXCLUDED_RULES.key, ConvertToLocalRelation.ruleName)))
+        .set(SQLConf.OPTIMIZER_EXCLUDED_RULES.key, ConvertToLocalRelation.ruleName))) {
+  override def conf: SQLConf = sparkSession.sessionState.conf
+}
 
 
 case class TestHiveVersion(hiveClient: HiveClient)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/RateLimiter.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/RateLimiter.scala
index f77ca3e8fdb45..14e47b8f96fd6 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/RateLimiter.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/RateLimiter.scala
@@ -59,9 +59,9 @@ private[receiver] abstract class RateLimiter(conf: SparkConf) extends Logging {
   private[receiver] def updateRate(newRate: Long): Unit =
     if (newRate > 0) {
       if (maxRateLimit > 0) {
-        rateLimiter.setRate(newRate.min(maxRateLimit))
+        rateLimiter.setRate(newRate.min(maxRateLimit).toDouble)
       } else {
-        rateLimiter.setRate(newRate)
+        rateLimiter.setRate(newRate.toDouble)
       }
     }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
index 9b7014a6640d8..bac82b5d331f7 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
@@ -206,8 +206,8 @@ private[ui] class StreamingPage(parent: StreamingTab)
         recordRateForAllStreams.data,
         minBatchTime,
         maxBatchTime,
-        minRecordRate,
-        maxRecordRate,
+        minRecordRate.toDouble,
+        maxRecordRate.toDouble,
         "records/sec")
     graphUIDataForRecordRateOfAllStreams.generateDataJs(jsCollector)
 
@@ -218,7 +218,7 @@ private[ui] class StreamingPage(parent: StreamingTab)
         schedulingDelay.timelineData(normalizedUnit),
         minBatchTime,
         maxBatchTime,
-        minTime,
+        minTime.toDouble,
         maxTime,
         formattedUnit)
     graphUIDataForSchedulingDelay.generateDataJs(jsCollector)
@@ -230,7 +230,7 @@ private[ui] class StreamingPage(parent: StreamingTab)
         processingTime.timelineData(normalizedUnit),
         minBatchTime,
         maxBatchTime,
-        minTime,
+        minTime.toDouble,
         maxTime,
         formattedUnit, Some(batchInterval))
     graphUIDataForProcessingTime.generateDataJs(jsCollector)
@@ -242,7 +242,7 @@ private[ui] class StreamingPage(parent: StreamingTab)
         totalDelay.timelineData(normalizedUnit),
         minBatchTime,
         maxBatchTime,
-        minTime,
+        minTime.toDouble,
         maxTime,
         formattedUnit)
     graphUIDataForTotalDelay.generateDataJs(jsCollector)
@@ -294,7 +294,7 @@ private[ui] class StreamingPage(parent: StreamingTab)
       {if (hasStream) {
         <tr id="inputs-table" style="display: none;" >
           <td colspan="3">
-            {generateInputDStreamsTable(jsCollector, minBatchTime, maxBatchTime, minRecordRate)}
+            {generateInputDStreamsTable(jsCollector, minBatchTime, maxBatchTime, minRecordRate.toDouble)}
           </td>
         </tr>
       }}
@@ -350,7 +350,7 @@ private[ui] class StreamingPage(parent: StreamingTab)
     val content: Seq[Node] = listener.receivedRecordRateWithBatchTime.toList.sortBy(_._1).flatMap {
       case (streamId, recordRates) =>
         generateInputDStreamRow(
-          jsCollector, streamId, recordRates, minX, maxX, minY, maxYCalculated)
+          jsCollector, streamId, recordRates, minX, maxX, minY, maxYCalculated.toDouble)
     }
 
     // scalastyle:off
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala
index 57ec162b0d179..3c5fc8a08e677 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala
@@ -44,7 +44,7 @@ private[streaming] object UIUtils {
    */
   def normalizeDuration(milliseconds: Long): (Double, TimeUnit) = {
     if (milliseconds < 1000) {
-      return (milliseconds, TimeUnit.MILLISECONDS)
+      return (milliseconds.toDouble, TimeUnit.MILLISECONDS)
     }
     val seconds = milliseconds.toDouble / 1000
     if (seconds < 60) {
@@ -67,9 +67,9 @@ private[streaming] object UIUtils {
    * will discard the fractional part.
    */
   def convertToTimeUnit(milliseconds: Long, unit: TimeUnit): Double = unit match {
-    case TimeUnit.NANOSECONDS => milliseconds * 1000 * 1000
-    case TimeUnit.MICROSECONDS => milliseconds * 1000
-    case TimeUnit.MILLISECONDS => milliseconds
+    case TimeUnit.NANOSECONDS => milliseconds.toDouble * 1000 * 1000
+    case TimeUnit.MICROSECONDS => milliseconds.toDouble * 1000
+    case TimeUnit.MILLISECONDS => milliseconds.toDouble
     case TimeUnit.SECONDS => milliseconds / 1000.0
     case TimeUnit.MINUTES => milliseconds / 1000.0 / 60.0
     case TimeUnit.HOURS => milliseconds / 1000.0 / 60.0 / 60.0
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
index 7d9dfb100f613..e0ca22ad77d1d 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
@@ -119,13 +119,13 @@ class ExecutorAllocationManagerSuite extends TestSuiteBase
       }
 
       // Batch proc time = batch interval, should increase allocation by 1
-      addBatchProcTimeAndVerifyAllocation(batchDurationMillis) {
+      addBatchProcTimeAndVerifyAllocation(batchDurationMillis.toDouble) {
         verifyTotalRequestedExecs(Some(3)) // one already allocated, increase allocation by 1
         verifyScaledDownExec(None)
       }
 
       // Batch proc time = batch interval * 2, should increase allocation by 2
-      addBatchProcTimeAndVerifyAllocation(batchDurationMillis * 2) {
+      addBatchProcTimeAndVerifyAllocation(batchDurationMillis * 2.0) {
         verifyTotalRequestedExecs(Some(4))
         verifyScaledDownExec(None)
       }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/RateControllerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/RateControllerSuite.scala
index b5a45fc317d0e..a4faed1501577 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/RateControllerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/RateControllerSuite.scala
@@ -83,5 +83,5 @@ private[streaming] class ConstantEstimator(@volatile private var rate: Long)
       time: Long,
       elements: Long,
       processingDelay: Long,
-      schedulingDelay: Long): Option[Double] = Some(rate)
+      schedulingDelay: Long): Option[Double] = Some(rate.toDouble)
 }

Command	Description	HTTP METHOD	Since Version
`create`	Create a Spark driver via `cluster` mode.	POST	1.3.0
`kill`	Kill a single Spark driver.	POST	1.3.0
`killall`	Kill all running Spark drivers.	POST	4.0.0
`status`	Check the status of a Spark job.	GET	1.3.0
`clear`	Clear the completed drivers and applications.	POST	4.0.0
Column	Type	Note
key	struct (depends on the type for state key)
value	struct (depends on the type for state value)
_partition_id	int	metadata column (hidden unless specified with SELECT)
Option	value	default	meaning
batchId	numeric value	latest committed batch	Represents the target batch to read from. This option is used when users want to perform time-travel. The batch should be committed but not yet cleaned up.
operatorId	numeric value	0	Represents the target operator to read from. This option is used when the query is using multiple stateful operators.
storeName	string	DEFAULT	Represents the target state store name to read from. This option is used when the stateful operator uses multiple state store instances. It is not required except stream-stream join.
joinSide	string ("left" or "right")	(none)	Represents the target side to read from. This option is used when users want to read the state from stream-stream join.
Column	Type	Note
operatorId	int
operatorName	string
stateStoreName	int
numPartitions	int
minBatchId	int	The minimum batch ID available for querying state. The value could be invalid if the streaming query taking the checkpoint is running, as cleanup would run.
maxBatchId	int	The maximum batch ID available for querying state. The value could be invalid if the streaming query taking the checkpoint is running, as the query will commit further batches.
_numColsPrefixKey	int	metadata column (hidden unless specified with SELECT)