Add mutex to KernelMap Cache (#382)

* Format * Add KernelMap exessive test * Auto update version * Fix * Fix for tidy * Add docstring; Update test code * Update docstring * Fix for tidy * Add changelog * Auto update version Co-authored-by: Dev version update bot <github-actions[bot]@users.noreply.github.com>
PennyLaneAI · Nov 2, 2022 · 101eb24 · 101eb24
1 parent 7242c1b
commit 101eb24
Show file tree

Hide file tree

Showing 4 changed files with 117 additions and 15 deletions.
diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md
@@ -34,6 +34,9 @@
 
 ### Bug fixes
 
+* Use mutex when accessing cache in KernelMap.
+[(#382)](https://github.com/PennyLaneAI/pennylane-lightning/pull/382)
+
 ### Contributors
 
 This release contains contributions from (in alphabetical order):
@@ -804,4 +807,4 @@ Initial release.
 
 This release contains contributions from (in alphabetical order):
 
-Tom Bromley, Josh Izaac, Nathan Killoran, Antal Száva
+Tom Bromley, Josh Izaac, Nathan Killoran, Antal Száva
diff --git a/pennylane_lightning/_version.py b/pennylane_lightning/_version.py
@@ -16,4 +16,4 @@
    Version number (major.minor.patch[-label])
 """
 
-__version__ = "0.27.0-dev13"
+__version__ = "0.27.0-dev14"
diff --git a/pennylane_lightning/src/simulator/KernelMap.hpp b/pennylane_lightning/src/simulator/KernelMap.hpp
@@ -26,6 +26,7 @@
 
 #include <deque>
 #include <functional>
+#include <mutex>
 #include <unordered_map>
 #include <utility>
 
@@ -161,7 +162,10 @@ template <class Operation, size_t cache_size = 16> class OperationKernelMap {
 
   private:
     EnumDispatchKernalMap kernel_map_;
+
+    /* TODO: Cache logic can be improved */
     mutable std::deque<std::tuple<size_t, uint32_t, EnumKernelMap>> cache_;
+    mutable std::mutex cache_mutex_;
 
     /**
      * @brief Allowed kernels for a given memory model
@@ -183,6 +187,43 @@ template <class Operation, size_t cache_size = 16> class OperationKernelMap {
               // LCOV_EXCL_STOP
           } {}
 
+    /**
+     * @brief Construct and update kernel map cache for the given number of
+     * qubits and dispatch key.
+     *
+     * @param num_qubits Number of qubits
+     * @param dispatch_key Dispatch key for cache
+     *
+     * @return Constructed element of the cache.
+     */
+    [[nodiscard]] auto updateCache(const size_t num_qubits,
+                                   uint32_t dispatch_key) const
+        -> std::unordered_map<Operation, Gates::KernelType> {
+        std::unordered_map<Operation, Gates::KernelType> kernel_for_op;
+
+        Util::for_each_enum<Operation>([&](Operation op) {
+            const auto key = std::make_pair(op, dispatch_key);
+            const auto &set = kernel_map_.at(key);
+            kernel_for_op.emplace(op, set.getKernel(num_qubits));
+        });
+
+        std::unique_lock cache_lock(cache_mutex_);
+
+        const auto cache_iter =
+            std::find_if(cache_.begin(), cache_.end(), [=](const auto &elt) {
+                return (std::get<0>(elt) == num_qubits) &&
+                       (std::get<1>(elt) == dispatch_key);
+            });
+
+        if (cache_iter == cache_.end()) {
+            if (cache_.size() == cache_size) {
+                cache_.pop_back();
+            }
+            cache_.emplace_front(num_qubits, dispatch_key, kernel_for_op);
+        }
+        return kernel_for_op;
+    }
+
   public:
     /**
      * @brief Get a singleton instance.
@@ -314,27 +355,18 @@ template <class Operation, size_t cache_size = 16> class OperationKernelMap {
     [[nodiscard]] auto getKernelMap(size_t num_qubits, Threading threading,
                                     CPUMemoryModel memory_model) const
         -> EnumKernelMap {
-        // TODO: Add mutex for cache_ when we goto multithread.
         const uint32_t dispatch_key = toDispatchKey(threading, memory_model);
 
+        std::unique_lock cache_lock(cache_mutex_);
+
         const auto cache_iter =
             std::find_if(cache_.begin(), cache_.end(), [=](const auto &elt) {
                 return (std::get<0>(elt) == num_qubits) &&
                        (std::get<1>(elt) == dispatch_key);
             });
         if (cache_iter == cache_.end()) {
-            std::unordered_map<Operation, Gates::KernelType> kernel_for_op;
-
-            Util::for_each_enum<Operation>([&](Operation op) {
-                const auto key = std::make_pair(op, dispatch_key);
-                const auto &set = kernel_map_.at(key);
-                kernel_for_op.emplace(op, set.getKernel(num_qubits));
-            });
-            if (cache_.size() == cache_size) {
-                cache_.pop_back();
-            }
-            cache_.emplace_front(num_qubits, dispatch_key, kernel_for_op);
-            return kernel_for_op;
+            cache_lock.unlock();
+            return updateCache(num_qubits, dispatch_key);
         }
         return std::get<2>(*cache_iter);
     }

diff --git a/pennylane_lightning/src/tests/Test_KernelMap.cpp b/pennylane_lightning/src/tests/Test_KernelMap.cpp
@@ -137,3 +137,70 @@ TEST_CASE("Test KernelMap functionalities", "[KernelMap]") {
             Util::LightningException, "does not exist");
     }
 }
+
+TEST_CASE("Test KernelMap is consistent in extreme usecase", "[KernelMap]") {
+    using Gates::GateOperation;
+    using Gates::KernelType;
+    using EnumKernelMap =
+        OperationKernelMap<Gates::GateOperation>::EnumKernelMap;
+    auto &instance = OperationKernelMap<Gates::GateOperation>::getInstance();
+
+    const auto num_qubits = std::vector<size_t>{4, 6, 8, 10, 12, 14, 16};
+    const auto threadings =
+        std::vector<Threading>{Threading::SingleThread, Threading::MultiThread};
+    const auto memory_models = std::vector<CPUMemoryModel>{
+        CPUMemoryModel::Unaligned, CPUMemoryModel::Aligned256,
+        CPUMemoryModel::Aligned512};
+
+    std::random_device rd;
+
+    std::vector<EnumKernelMap> records;
+
+    records.push_back(instance.getKernelMap(12, Threading::SingleThread,
+                                            CPUMemoryModel::Aligned256));
+
+    constexpr size_t num_iter = 8096;
+
+#ifdef _OPENMP
+#pragma omp parallel default(none)                                             \
+    shared(instance, records, rd, num_qubits, threadings, memory_models)       \
+        firstprivate(num_iter)
+#endif
+    {
+        std::mt19937 re;
+
+#ifdef _OPENMP
+#pragma omp critical
+#endif
+        { re.seed(rd()); }
+
+        std::uniform_int_distribution<size_t> num_qubit_dist(
+            0, num_qubits.size() - 1);
+        std::uniform_int_distribution<size_t> threading_dist(
+            0, threadings.size() - 1);
+        std::uniform_int_distribution<size_t> memory_model_dist(
+            0, memory_models.size() - 1);
+
+        std::vector<EnumKernelMap> res;
+
+#ifdef _OPENMP
+#pragma omp for
+#endif
+        for (size_t i = 0; i < num_iter; i++) {
+            const auto num_qubit = num_qubits[num_qubit_dist(re)];
+            const auto threading = threadings[threading_dist(re)];
+            const auto memory_model = memory_models[memory_model_dist(re)];
+
+            res.push_back(
+                instance.getKernelMap(num_qubit, threading, memory_model));
+        }
+#ifdef _OPENMP
+#pragma omp critical
+#endif
+        { records.insert(records.end(), res.begin(), res.end()); }
+    }
+    records.push_back(instance.getKernelMap(12, Threading::SingleThread,
+                                            CPUMemoryModel::Aligned256));
+
+    REQUIRE(records.front() == records.back());
+}