NVIDIA · revans2 · Sep 30, 2020 · Sep 28, 2020 · Sep 28, 2020 · Sep 28, 2020
diff --git a/docs/configs.md b/docs/configs.md
@@ -33,6 +33,7 @@ Name | Description | Default Value
 <a name="memory.gpu.debug"></a>spark.rapids.memory.gpu.debug|Provides a log of GPU memory allocations and frees. If set to STDOUT or STDERR the logging will go there. Setting it to NONE disables logging. All other values are reserved for possible future expansion and in the mean time will disable logging.|NONE
 <a name="memory.gpu.maxAllocFraction"></a>spark.rapids.memory.gpu.maxAllocFraction|The fraction of total GPU memory that limits the maximum size of the RMM pool. The value must be greater than or equal to the setting for spark.rapids.memory.gpu.allocFraction. Note that this limit will be reduced by the reserve memory configured in spark.rapids.memory.gpu.reserve.|1.0
 <a name="memory.gpu.pooling.enabled"></a>spark.rapids.memory.gpu.pooling.enabled|Should RMM act as a pooling allocator for GPU memory, or should it just pass through to CUDA memory allocation directly.|true
+<a name="memory.gpu.pooling.strategy"></a>spark.rapids.memory.gpu.pooling.strategy|The strategy to use for the RMM pooling allocator. Valid values are "DEFAULT" and "ARENA". This configuration only takes effect when spark.rapids.memory.gpu.pooling.enabled is set to true.|ARENA
 <a name="memory.gpu.reserve"></a>spark.rapids.memory.gpu.reserve|The amount of GPU memory that should remain unallocated by RMM and left for system use such as memory needed for kernels, kernel launches or JIT compilation.|1073741824
 <a name="memory.host.spillStorageSize"></a>spark.rapids.memory.host.spillStorageSize|Amount of off-heap host memory to use for buffering spilled GPU data before spilling to local disk|1073741824
 <a name="memory.pinnedPool.size"></a>spark.rapids.memory.pinnedPool.size|The size of the pinned memory pool in bytes unless otherwise specified. Use 0 to disable the pool.|0

diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuDeviceManager.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuDeviceManager.scala
@@ -203,8 +203,18 @@ object GpuDeviceManager extends Logging {
       var init = RmmAllocationMode.CUDA_DEFAULT
       val features = ArrayBuffer[String]()
       if (conf.isPooledMemEnabled) {
-        init = init | RmmAllocationMode.POOL
-        features += "POOLED"
+        init = conf.pooledMemStrategy match {
+          case c if "default".equalsIgnoreCase(c) =>
+            features += "POOLED"
+            init | RmmAllocationMode.POOL
+          case c if "arena".equalsIgnoreCase(c) =>
+            features += "ARENA"
+            init | RmmAllocationMode.ARENA
+          case c =>
+            logWarning(
+              s"RMM pooled memory strategy set to '$c' is not supported and is being ignored.")
+            init
+        }
       }
       if (conf.isUvmEnabled) {
         init = init | RmmAllocationMode.CUDA_MANAGED_MEMORY
@@ -276,7 +286,7 @@ object GpuDeviceManager extends Logging {
     private[this] val devId = getDeviceId.getOrElse {
       throw new IllegalStateException("Device ID is not set")
     }
-    
+
     override def newThread(runnable: Runnable): Thread = {
       factory.newThread(() => {
         Cuda.setDevice(devId)

diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
@@ -321,6 +321,13 @@ object RapidsConf {
     .booleanConf
     .createWithDefault(true)
 
+  val POOLED_MEM_STRATEGY = conf("spark.rapids.memory.gpu.pooling.strategy")
+    .doc("The strategy to use for the RMM pooling allocator. Valid values are \"DEFAULT\" and " +
+      "\"ARENA\". This configuration only takes effect when " +
+      "spark.rapids.memory.gpu.pooling.enabled is set to true.")
+    .stringConf
+    .createWithDefault("ARENA")
+
   val CONCURRENT_GPU_TASKS = conf("spark.rapids.sql.concurrentGpuTasks")
       .doc("Set the number of tasks that can execute concurrently per GPU. " +
           "Tasks may temporarily block when the number of concurrent tasks in the executor " +
@@ -867,6 +874,8 @@ class RapidsConf(conf: Map[String, String]) extends Logging {
 
   lazy val isPooledMemEnabled: Boolean = get(POOLED_MEM)
 
+  lazy val pooledMemStrategy: String = get(POOLED_MEM_STRATEGY)
+
   lazy val rmmAllocFraction: Double = get(RMM_ALLOC_FRACTION)
 
   lazy val rmmAllocMaxFraction: Double = get(RMM_ALLOC_MAX_FRACTION)

diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/GpuDeviceManagerSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/GpuDeviceManagerSuite.scala
@@ -32,6 +32,7 @@ class GpuDeviceManagerSuite extends FunSuite with Arm {
     val maxPoolFraction = 0.2
     val conf = new SparkConf()
         .set(RapidsConf.POOLED_MEM.key, "true")
+        .set(RapidsConf.POOLED_MEM_STRATEGY.key, "ARENA")
         .set(RapidsConf.RMM_ALLOC_FRACTION.key, initPoolFraction.toString)
         .set(RapidsConf.RMM_ALLOC_MAX_FRACTION.key, maxPoolFraction.toString)
         .set(RapidsConf.RMM_ALLOC_RESERVE.key, "0")