Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix sync issues with calls to CUSV APIs on aarch64 #823

Merged
merged 15 commits into from
Aug 2, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pennylane_lightning/core/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@
Version number (major.minor.patch[-label])
"""

__version__ = "0.38.0-dev20"
__version__ = "0.38.0-dev21"
Original file line number Diff line number Diff line change
Expand Up @@ -1359,6 +1359,7 @@ class StateVectorCudaManaged
/* const int32_t* */ ctrlsInt.data(),
/* const int32_t* */ nullptr,
/* const uint32_t */ ctrls.size()));
PL_CUDA_IS_SUCCESS(cudaStreamSynchronize(BaseType::getDataBuffer().getDevTag().getStreamID()));
}

/**
Expand Down Expand Up @@ -1419,6 +1420,8 @@ class StateVectorCudaManaged
/* custatevecComputeType_t */ compute_type,
/* std::size_t* */ &extraWorkspaceSizeInBytes));

PL_CUDA_IS_SUCCESS(cudaStreamSynchronize(BaseType::getDataBuffer().getDevTag().getStreamID()));

// allocate external workspace if necessary
// LCOV_EXCL_START
if (extraWorkspaceSizeInBytes > 0) {
Expand All @@ -1445,6 +1448,8 @@ class StateVectorCudaManaged
/* custatevecComputeType_t */ compute_type,
/* void* */ extraWorkspace,
/* std::size_t */ extraWorkspaceSizeInBytes));

PL_CUDA_IS_SUCCESS(cudaStreamSynchronize(BaseType::getDataBuffer().getDevTag().getStreamID()));
// LCOV_EXCL_START
if (extraWorkspaceSizeInBytes)
PL_CUDA_IS_SUCCESS(cudaFree(extraWorkspace));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ class Measurements final
/* const int32_t* */ maskBitString,
/* const int32_t* */ maskOrdering,
/* const uint32_t */ maskLen));
PL_CUDA_IS_SUCCESS(cudaStreamSynchronize(this->_statevector.getDataBuffer().getDevTag().getStreamID()));
multiphaseCFD marked this conversation as resolved.
Show resolved Hide resolved

if constexpr (std::is_same_v<CFP_t, cuDoubleComplex> ||
std::is_same_v<CFP_t, double2>) {
Expand Down Expand Up @@ -252,6 +253,7 @@ class Measurements final
this->_statevector.getCusvHandle(), this->_statevector.getData(),
data_type, num_qubits, &sampler, num_samples,
&extraWorkspaceSizeInBytes));
PL_CUDA_IS_SUCCESS(cudaStreamSynchronize(this->_statevector.getDataBuffer().getDevTag().getStreamID()));

// allocate external workspace if necessary
if (extraWorkspaceSizeInBytes > 0)
Expand All @@ -262,12 +264,14 @@ class Measurements final
PL_CUSTATEVEC_IS_SUCCESS(custatevecSamplerPreprocess(
this->_statevector.getCusvHandle(), sampler, extraWorkspace,
extraWorkspaceSizeInBytes));
PL_CUDA_IS_SUCCESS(cudaStreamSynchronize(this->_statevector.getDataBuffer().getDevTag().getStreamID()));

// sample bit strings
PL_CUSTATEVEC_IS_SUCCESS(custatevecSamplerSample(
this->_statevector.getCusvHandle(), sampler, bitStrings.data(),
bitOrdering.data(), bitStringLen, rand_nums.data(), num_samples,
CUSTATEVEC_SAMPLER_OUTPUT_ASCENDING_ORDER));
PL_CUDA_IS_SUCCESS(cudaStreamSynchronize(this->_statevector.getDataBuffer().getDevTag().getStreamID()));

// destroy descriptor and handle
PL_CUSTATEVEC_IS_SUCCESS(custatevecSamplerDestroy(sampler));
Expand Down Expand Up @@ -497,6 +501,8 @@ class Measurements final
const_cast<const int32_t **>(basisBits_ptr.data()),
/* const uint32_t */ n_basisBits.data()));

PL_CUDA_IS_SUCCESS(cudaStreamSynchronize(this->_statevector.getDataBuffer().getDevTag().getStreamID()));

std::complex<PrecisionT> result{0, 0};

if constexpr (std::is_same_v<PrecisionT, double>) {
Expand Down Expand Up @@ -804,6 +810,7 @@ class Measurements final
/* const uint32_t */ tgtsInt.size(),
/* custatevecComputeType_t */ compute_type,
/* std::size_t* */ &extraWorkspaceSizeInBytes));
PL_CUDA_IS_SUCCESS(cudaStreamSynchronize(this->_statevector.getDataBuffer().getDevTag().getStreamID()));

// LCOV_EXCL_START
if (extraWorkspaceSizeInBytes > 0) {
Expand Down Expand Up @@ -832,6 +839,8 @@ class Measurements final
/* void* */ extraWorkspace,
/* std::size_t */ extraWorkspaceSizeInBytes));

PL_CUDA_IS_SUCCESS(cudaStreamSynchronize(this->_statevector.getDataBuffer().getDevTag().getStreamID()));

// LCOV_EXCL_START
if (extraWorkspaceSizeInBytes)
PL_CUDA_IS_SUCCESS(cudaFree(extraWorkspace));
Expand All @@ -840,4 +849,4 @@ class Measurements final
return static_cast<PrecisionT>(expect.x);
}
}; // class Measurements
} // namespace Pennylane::LightningGPU::Measures
} // namespace Pennylane::LightningGPU::Measures
Loading