apache · timlee0119 · Sep 20, 2024 · Sep 20, 2024 · Sep 21, 2024 · dongjoon-hyun
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -1357,7 +1357,6 @@ package object config {
 
   private[spark] val SHUFFLE_ACCURATE_BLOCK_SKEWED_FACTOR =
     ConfigBuilder("spark.shuffle.accurateBlockSkewedFactor")
-      .internal()
       .doc("A shuffle block is considered as skewed and will be accurately recorded in " +
         "HighlyCompressedMapStatus if its size is larger than this factor multiplying " +
         "the median shuffle block size or SHUFFLE_ACCURATE_BLOCK_THRESHOLD. It is " +

diff --git a/docs/configuration.md b/docs/configuration.md
@@ -1222,6 +1222,19 @@ Apart from these, the following properties are also available, and may be useful
   </td>
   <td>2.2.1</td>
 </tr>
+<tr>
+  <td><code>spark.shuffle.accurateBlockSkewedFactor</code></td>
 private[spark] val SHUFFLE_ACCURATE_BLOCK_SKEWED_FACTOR = 
   ConfigBuilder("spark.shuffle.accurateBlockSkewedFactor") 
     .internal() 
 private[spark] val SHUFFLE_ACCURATE_BLOCK_SKEWED_FACTOR = 
   ConfigBuilder("spark.shuffle.accurateBlockSkewedFactor") 
     .internal() 
+  <td>-1.0</td>
+  <td>
+    A shuffle block is considered as skewed and will be accurately recorded in
+    <code>HighlyCompressedMapStatus</code> if its size is larger than this factor multiplying
+    the median shuffle block size or <code>spark.shuffle.accurateBlockThreshold</code>. It is
+    recommended to set this parameter to be the same as
+    <code>spark.sql.adaptive.skewJoin.skewedPartitionFactor</code>. Set to -1.0 to disable this
+    feature by default.
+  </td>
+  <td>3.3.0</td>
+</tr>
 <tr>
   <td><code>spark.shuffle.registration.timeout</code></td>
   <td>5000</td>