Skip to content

Commit

Permalink
Fix sync issues with calls to CUSV APIs on aarch64 (#823)
Browse files Browse the repository at this point in the history
### Before submitting

Please complete the following checklist when submitting a PR:

- [x] All new features must include a unit test.
If you've fixed a bug or added code that should be tested, add a test to
the
      [`tests`](../tests) directory!

- [x] All new functions and code must be clearly commented and
documented.
If you do make documentation changes, make sure that the docs build and
      render correctly by running `make docs`.

- [x] Ensure that the test suite passes, by running `make test`.

- [x] Add a new entry to the `.github/CHANGELOG.md` file, summarizing
the
      change, and including a link back to the PR.

- [x] Ensure that code is properly formatted by running `make format`. 

When all the above are checked, delete everything above the dashed
line and fill in the pull request template.


------------------------------------------------------------------------------------------------------------

**Context:** Fixes the known sync issues on aarch64 + GraceHopper when
using custatevec API calls.

**Description of the Change:** Adds stream sync to all CUSV async API
calls.

**Benefits:** Fixes #793 

**Possible Drawbacks:** Potential sync point may introduce (minimal)
overhead for smaller problems.

**Related GitHub Issues:**

---------

Co-authored-by: Lee J. O'Riordan <lee@xanadu.au>
Co-authored-by: ringo-but-quantum <github-ringo-but-quantum@xanadu.ai>
Co-authored-by: Ali Asadi <10773383+maliasadi@users.noreply.github.com>
  • Loading branch information
4 people committed Aug 2, 2024
1 parent cfc3006 commit 619b807
Show file tree
Hide file tree
Showing 8 changed files with 41 additions and 6 deletions.
6 changes: 6 additions & 0 deletions .github/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,12 @@

### Bug fixes

* Fix cuQuantum SDK path pass-though in CMake.
[(#831)](https://github.com/PennyLaneAI/pennylane-lightning/pull/831)

* Fix CUDA sync issues on aarch64+GraceHopper.
[(#823)](https://github.com/PennyLaneAI/pennylane-lightning/pull/823)

* Check for the number of wires for Hermitian observables in Lightning-Tensor. Only 1-wire Hermitian observables are supported as of `cuTensorNet-v24.03.0`.
[(#806)](https://github.com/PennyLaneAI/pennylane-lightning/pull/806)

Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/tests_lgpumpi_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ jobs:
- name: Build and run unit tests
run: |
source /etc/profile.d/modules.sh && module use /opt/modules/ && module load ${{ matrix.mpilib }}/cuda-${{ matrix.cuda_version_maj }}.${{ matrix.cuda_version_min }}
export CUQUANTUM_SDK=$(python -c "import site; print( f'{site.getsitepackages()[0]}/cuquantum/lib')")
export CUQUANTUM_SDK=$(python -c "import site; print( f'{site.getsitepackages()[0]}/cuquantum')")
cmake . -BBuild \
-DPL_BACKEND=lightning_gpu \
-DENABLE_PYTHON=OFF \
Expand All @@ -134,6 +134,7 @@ jobs:
-DCMAKE_CXX_COMPILER=mpicxx \
-DCMAKE_CUDA_COMPILER=$(which nvcc) \
-DCMAKE_CUDA_ARCHITECTURES="86" \
-DCUQUANTUM_SDK=${CUQUANTUM_SDK} \
-DPython_EXECUTABLE:FILE="${{ steps.python_path.outputs.python }}" \
-G Ninja
cmake --build ./Build
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/tests_lgpumpi_python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ jobs:
- name: Build and install package
env:
CUQUANTUM_SDK: $(python -c "import site; print( f'{site.getsitepackages()[0]}/cuquantum/lib')")
CUQUANTUM_SDK: $(python -c "import site; print( f'{site.getsitepackages()[0]}/cuquantum')")
run: |
source /etc/profile.d/modules.sh && module use /opt/modules/ && module load ${{ matrix.mpilib }}/cuda-${{ matrix.cuda_version_maj }}.${{ matrix.cuda_version_min }}
CMAKE_ARGS="-DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx -DENABLE_MPI=ON -DCMAKE_CUDA_COMPILER=$(which nvcc) -DCMAKE_CUDA_ARCHITECTURES=${{ env.CI_CUDA_ARCH }} -DPython_EXECUTABLE=${{ steps.python_path.outputs.python }}" \
Expand Down
4 changes: 2 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ Then the `cuStateVec`_ library can be installed and set a ``CUQUANTUM_SDK`` envi
.. code-block:: console
python -m pip install wheel custatevec-cu12
export CUQUANTUM_SDK=$(python -c "import site; print( f'{site.getsitepackages()[0]}/cuquantum/lib')")
export CUQUANTUM_SDK=$(python -c "import site; print( f'{site.getsitepackages()[0]}/cuquantum')")
The Lightning-GPU can then be installed with ``pip``:

Expand Down Expand Up @@ -386,7 +386,7 @@ Then the `cutensornet`_ library can be installed and set a ``CUQUANTUM_SDK`` env
.. code-block:: console
pip install cutensornet-cu12
export CUQUANTUM_SDK=$(python -c "import site; print( f'{site.getsitepackages()[0]}/cuquantum/lib')")
export CUQUANTUM_SDK=$(python -c "import site; print( f'{site.getsitepackages()[0]}/cuquantum')")
The Lightning-Tensor can then be installed with ``pip``:

Expand Down
4 changes: 4 additions & 0 deletions cmake/support_pllgpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ endmacro()

# Macro to aid in finding cuStateVec lib
macro(findCustatevec external_libs)
set(CUQUANTUM_ENV "$ENV{CUQUANTUM_SDK}")
find_library(CUSTATEVEC_LIB
NAMES libcustatevec.so.1 custatevec.so.1
HINTS /usr/lib
Expand All @@ -58,6 +59,8 @@ macro(findCustatevec external_libs)
lib64
${CUQUANTUM_SDK}/lib
${CUQUANTUM_SDK}/lib64
${CUQUANTUM_ENV}/lib
${CUQUANTUM_ENV}/lib64
${CUDAToolkit_LIBRARY_DIR}
${CUDA_TOOLKIT_ROOT_DIR}/lib
${CUDA_TOOLKIT_ROOT_DIR}/lib64
Expand All @@ -74,6 +77,7 @@ macro(findCustatevec external_libs)
/opt/cuda
include
${CUQUANTUM_SDK}/include
${CUQUANTUM_ENV}/include
${CUDAToolkit_INCLUDE_DIRS}
${CUDA_TOOLKIT_ROOT_DIR}/include
${Python_SITELIB}/cuquantum/include
Expand Down
2 changes: 1 addition & 1 deletion pennylane_lightning/core/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@
Version number (major.minor.patch[-label])
"""

__version__ = "0.38.0-dev24"
__version__ = "0.38.0-dev25"
Original file line number Diff line number Diff line change
Expand Up @@ -1359,6 +1359,8 @@ class StateVectorCudaManaged
/* const int32_t* */ ctrlsInt.data(),
/* const int32_t* */ nullptr,
/* const uint32_t */ ctrls.size()));
PL_CUDA_IS_SUCCESS(cudaStreamSynchronize(
BaseType::getDataBuffer().getDevTag().getStreamID()));
}

/**
Expand Down Expand Up @@ -1419,6 +1421,9 @@ class StateVectorCudaManaged
/* custatevecComputeType_t */ compute_type,
/* std::size_t* */ &extraWorkspaceSizeInBytes));

PL_CUDA_IS_SUCCESS(cudaStreamSynchronize(
BaseType::getDataBuffer().getDevTag().getStreamID()));

// allocate external workspace if necessary
// LCOV_EXCL_START
if (extraWorkspaceSizeInBytes > 0) {
Expand All @@ -1445,6 +1450,9 @@ class StateVectorCudaManaged
/* custatevecComputeType_t */ compute_type,
/* void* */ extraWorkspace,
/* std::size_t */ extraWorkspaceSizeInBytes));

PL_CUDA_IS_SUCCESS(cudaStreamSynchronize(
BaseType::getDataBuffer().getDevTag().getStreamID()));
// LCOV_EXCL_START
if (extraWorkspaceSizeInBytes)
PL_CUDA_IS_SUCCESS(cudaFree(extraWorkspace));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,8 @@ class Measurements final
/* const int32_t* */ maskBitString,
/* const int32_t* */ maskOrdering,
/* const uint32_t */ maskLen));
PL_CUDA_IS_SUCCESS(cudaStreamSynchronize(
this->_statevector.getDataBuffer().getDevTag().getStreamID()));

if constexpr (std::is_same_v<CFP_t, cuDoubleComplex> ||
std::is_same_v<CFP_t, double2>) {
Expand Down Expand Up @@ -252,6 +254,8 @@ class Measurements final
this->_statevector.getCusvHandle(), this->_statevector.getData(),
data_type, num_qubits, &sampler, num_samples,
&extraWorkspaceSizeInBytes));
PL_CUDA_IS_SUCCESS(cudaStreamSynchronize(
this->_statevector.getDataBuffer().getDevTag().getStreamID()));

// allocate external workspace if necessary
if (extraWorkspaceSizeInBytes > 0)
Expand All @@ -262,12 +266,16 @@ class Measurements final
PL_CUSTATEVEC_IS_SUCCESS(custatevecSamplerPreprocess(
this->_statevector.getCusvHandle(), sampler, extraWorkspace,
extraWorkspaceSizeInBytes));
PL_CUDA_IS_SUCCESS(cudaStreamSynchronize(
this->_statevector.getDataBuffer().getDevTag().getStreamID()));

// sample bit strings
PL_CUSTATEVEC_IS_SUCCESS(custatevecSamplerSample(
this->_statevector.getCusvHandle(), sampler, bitStrings.data(),
bitOrdering.data(), bitStringLen, rand_nums.data(), num_samples,
CUSTATEVEC_SAMPLER_OUTPUT_ASCENDING_ORDER));
PL_CUDA_IS_SUCCESS(cudaStreamSynchronize(
this->_statevector.getDataBuffer().getDevTag().getStreamID()));

// destroy descriptor and handle
PL_CUSTATEVEC_IS_SUCCESS(custatevecSamplerDestroy(sampler));
Expand Down Expand Up @@ -497,6 +505,9 @@ class Measurements final
const_cast<const int32_t **>(basisBits_ptr.data()),
/* const uint32_t */ n_basisBits.data()));

PL_CUDA_IS_SUCCESS(cudaStreamSynchronize(
this->_statevector.getDataBuffer().getDevTag().getStreamID()));

std::complex<PrecisionT> result{0, 0};

if constexpr (std::is_same_v<PrecisionT, double>) {
Expand Down Expand Up @@ -804,6 +815,8 @@ class Measurements final
/* const uint32_t */ tgtsInt.size(),
/* custatevecComputeType_t */ compute_type,
/* std::size_t* */ &extraWorkspaceSizeInBytes));
PL_CUDA_IS_SUCCESS(cudaStreamSynchronize(
this->_statevector.getDataBuffer().getDevTag().getStreamID()));

// LCOV_EXCL_START
if (extraWorkspaceSizeInBytes > 0) {
Expand Down Expand Up @@ -832,6 +845,9 @@ class Measurements final
/* void* */ extraWorkspace,
/* std::size_t */ extraWorkspaceSizeInBytes));

PL_CUDA_IS_SUCCESS(cudaStreamSynchronize(
this->_statevector.getDataBuffer().getDevTag().getStreamID()));

// LCOV_EXCL_START
if (extraWorkspaceSizeInBytes)
PL_CUDA_IS_SUCCESS(cudaFree(extraWorkspace));
Expand All @@ -840,4 +856,4 @@ class Measurements final
return static_cast<PrecisionT>(expect.x);
}
}; // class Measurements
} // namespace Pennylane::LightningGPU::Measures
} // namespace Pennylane::LightningGPU::Measures

0 comments on commit 619b807

Please sign in to comment.