filecoin-project · cryptonemo · Sep 29, 2021 · Sep 28, 2021 · cryptonemo · Sep 29, 2021
@@ -181,7 +181,7 @@ jobs:
             RUST_TEST_THREADS: 1
             FIL_PROOFS_USE_GPU_COLUMN_BUILDER: true
             FIL_PROOFS_USE_GPU_TREE_BUILDER: true
-            BELLMAN_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
+            FIL_PROOFS_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
 
   test_no_gpu:
     executor: default

@@ -73,6 +73,14 @@ To disable `multicore sdr` so that `hwloc` is not required, you can build proofs
 
 Note that the `multicore-sdr` feature is omitted from the specified feature list, which removes it from being used by default.
 
+There is experimental support for CUDA behind the `cuda` feature (disabled by default). You will need to install `nvcc`.  On Ubuntu, this can be achieved with `apt install nvidia-cuda-toolkit`.  To enable CUDA support, you can build proofs like this:
+
+```
+> cargo build --release --all --features cuda
+```
+
+It now builds it with both, CUDA and OpenCL support, CUDA will then be preferred at runtime, but can be disabled with the `FIL_PROOFS_GPU_FRAMEWORK` environment variable (see more information in the `GPU usage` section below).
+
 
 ## Building for Arm64
 
@@ -303,6 +311,20 @@ FIL_PROOFS_COLUMN_WRITE_BATCH_SIZE=Y
 
 Note that this value affects the degree of parallelism used when persisting the column tree to disk, and may exhaust system file descriptors if the limit is not adjusted appropriately (e.g. using `ulimit -n`).  If persisting the tree is failing due to a 'bad file descriptor' error, try adjusting this value to something larger (e.g. 524288, or 1048576).  Increasing this value processes larger chunks at once, which results in larger (but fewer) disk writes in parallel.
 
+When the library is built with both CUDA and OpenCL support, you can choose which one to use at run time.  Use the environment variable:
+
+```
+FIL_PROOFS_GPU_FRAMEWORK=cuda
+```
+
+You can set it to `opencl` to use OpenCL instead.  The default value is `cuda`, when you set nothing or any other (invalid) value.
+
+CUDA kernels are compiled and build time.  By default, they are built for recent architectures, Turing (`sm_75` and Ampere (`sm_80`, `sm_86`).  This increases the overall build time by several minutes.  You can reduce it by compiling it only for the specific aritecture you need.  For example if you only need the CUDA kernels to work on the Turing architecture, you can set
+
+`FIL_PROOFS_CUDA_NVCC_ARGS="--fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75"`
+
+Note that this environment variable is forwarded to underlying dependencies, which might not be automatically be rebuilt.  If you change this variable, best is to start from a clean build.
+
 ### Memory
 
 At the moment the default configuration is set to reduce memory consumption as much as possible so there's not much to do from the user side. We are now storing Merkle trees on disk, which were the main source of memory consumption.  You should expect a maximum RSS between 1-2 sector sizes, if you experience peaks beyond that range please report an issue (you can check the max RSS with the `/usr/bin/time -v` command).

@@ -68,8 +68,41 @@ fn cache(s: &str) -> String {
     cache_name
 }
 
+/// Sets an environment variable to a value if it isn't properly set yet.
+fn set_env_var_if_unset(env_var: &str, value: &str) {
+    if env::var(env_var).is_err() {
+        env::set_var(env_var, value);
+    }
+}
+
+/// Set the GPU framework for the dependencies.
+///
+/// If a GPU framework, CUDA and OpenCL is selected, it needs to be communicated some of the
+/// dependnecies. This is done via environment variables.
+///
+/// If one of those environment variables is already set, it won't be overridden.
+fn set_gpu_framework() {
+    if let Ok(framework) = env::var(format!("{}_GPU_FRAMEWORK", PREFIX)) {
+        set_env_var_if_unset("BELLMAN_GPU_FRAMEWORK", &framework);
+        set_env_var_if_unset("NEPTUNE_GPU_FRAMEWORK", &framework);
+    }
+}
+
+/// Set CUDA nvcc compile flags (if set) for the dependencies.
+///
+/// If one of those environment variables is already set, it won't be overridden.
+fn set_cuda_nvcc_args() {
+    if let Ok(nvcc_args) = env::var(format!("{}_CUDA_NVCC_ARGS", PREFIX)) {
+        set_env_var_if_unset("BELLMAN_CUDA_NVCC_ARGS", &nvcc_args);
+        set_env_var_if_unset("NEPTUNE_CUDA_NVCC_ARGS", &nvcc_args);
+    }
+}
+
 impl Settings {
     fn new() -> Result<Settings, ConfigError> {
+        set_gpu_framework();
+        set_cuda_nvcc_args();
+
         let mut s = Config::new();
 
         s.merge(File::with_name(SETTINGS_PATH).required(false))?;