Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ITS: fix compiler warnings #8566

Merged
merged 1 commit into from
Apr 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions Detectors/ITSMFT/ITS/tracking/GPU/cuda/VertexerTraitsGPU.cu
Original file line number Diff line number Diff line change
Expand Up @@ -102,14 +102,14 @@ namespace gpu
{

template <typename... Args>
GPUd() void printOnThread(const int tId, const char* str, Args... args)
GPUd() void printOnThread(const unsigned int tId, const char* str, Args... args)
{
if (blockIdx.x * blockDim.x + threadIdx.x == tId) {
printf(str, args...);
}
}

GPUd() void printVectorOnThread(const char* name, Vector<int>& vector, size_t size, const int tId = 0)
GPUd() void printVectorOnThread(const char* name, Vector<int>& vector, size_t size, const unsigned int tId = 0)
{
if (blockIdx.x * blockDim.x + threadIdx.x == tId) {
printf("vector %s :", name);
Expand All @@ -120,7 +120,7 @@ GPUd() void printVectorOnThread(const char* name, Vector<int>& vector, size_t si
}
}

GPUg() void printVectorKernel(DeviceStoreVertexerGPU& store, const int threadId)
GPUg() void printVectorKernel(DeviceStoreVertexerGPU& store, const unsigned int threadId)
{
if (blockIdx.x * blockDim.x + threadIdx.x == threadId) {
for (int i{0}; i < store.getConfig().histConf.nBinsXYZ[0] - 1; ++i) {
Expand All @@ -138,7 +138,7 @@ GPUg() void printVectorKernel(DeviceStoreVertexerGPU& store, const int threadId)
}
}

GPUg() void dumpMaximaKernel(DeviceStoreVertexerGPU& store, const int threadId)
GPUg() void dumpMaximaKernel(DeviceStoreVertexerGPU& store, const unsigned int threadId)
{
if (blockIdx.x * blockDim.x + threadIdx.x == threadId) {
printf("XmaxBin: %d at index: %d | YmaxBin: %d at index: %d | ZmaxBin: %d at index: %d\n",
Expand Down Expand Up @@ -232,8 +232,8 @@ GPUg() void trackletSelectionKernel(
GPUg() void computeCentroidsKernel(DeviceStoreVertexerGPU& store,
const float pairCut)
{
const int nLines = store.getNExclusiveFoundLines()[store.getClusters()[1].size() - 1] + store.getNFoundLines()[store.getClusters()[1].size() - 1];
const int maxIterations{nLines * (nLines - 1) / 2};
const size_t nLines = store.getNExclusiveFoundLines()[store.getClusters()[1].size() - 1] + store.getNFoundLines()[store.getClusters()[1].size() - 1];
const size_t maxIterations{nLines * (nLines - 1) / 2};
for (size_t currentThreadIndex = blockIdx.x * blockDim.x + threadIdx.x; currentThreadIndex < maxIterations; currentThreadIndex += blockDim.x * gridDim.x) {
int iFirstLine = currentThreadIndex / nLines;
int iSecondLine = currentThreadIndex % nLines;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ GPUhdi() void Line::getDCAComponents(const Line& line, const float point[3], flo

inline bool Line::operator==(const Line& rhs) const
{
bool val;
bool val{false};
for (int i{0}; i < 3; ++i) {
val &= this->originPoint[i] == rhs.originPoint[i];
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ class VertexerTraits
{
public:
VertexerTraits() = default;
~VertexerTraits() = default;
virtual ~VertexerTraits() = default;

GPUhd() static constexpr int4 getEmptyBinsRect()
{
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUbenchmark/Shared/Kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class GPUbenchmark final
float runDistributed(void (*kernel)(chunk_t**, size_t*, T...),
std::vector<std::pair<float, float>>& chunkRanges,
int nLaunches,
int nBlocks,
size_t nBlocks,
int nThreads,
T&... args);

Expand Down
27 changes: 14 additions & 13 deletions GPU/GPUbenchmark/cuda/Kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ bool checkTestChunks(std::vector<std::pair<float, float>>& chunks, size_t availM
bool check{false};

sort(chunks.begin(), chunks.end());
for (auto iChunk{0}; iChunk < chunks.size(); ++iChunk) { // Check boundaries
for (size_t iChunk{0}; iChunk < chunks.size(); ++iChunk) { // Check boundaries
if (chunks[iChunk].first + chunks[iChunk].second > availMemSizeGB) {
check = false;
break;
Expand Down Expand Up @@ -465,20 +465,20 @@ std::vector<float> GPUbenchmark<chunk_t>::runConcurrent(void (*kernel)(chunk_t*,
for (auto iStream{0}; iStream < dimStreams; ++iStream) {
GPUCHECK(cudaStreamCreate(&(streams.at(iStream)))); // round-robin on stream pool
}
for (auto iChunk{0}; iChunk < nChunks; ++iChunk) {
for (size_t iChunk{0}; iChunk < nChunks; ++iChunk) {
GPUCHECK(cudaEventCreate(&(starts[iChunk])));
GPUCHECK(cudaEventCreate(&(stops[iChunk])));
}

// Warm up on every chunk
for (auto iChunk{0}; iChunk < nChunks; ++iChunk) {
for (size_t iChunk{0}; iChunk < nChunks; ++iChunk) {
auto& chunk = chunkRanges[iChunk];
chunk_t* chunkPtr = getCustomPtr<chunk_t>(mState.scratchPtr, chunk.first);
(*kernel)<<<nBlocks, nThreads, 0, streams[iChunk % dimStreams]>>>(chunkPtr, getBufferCapacity<chunk_t>(chunk.second, mOptions.prime), args...);
}
auto start = std::chrono::high_resolution_clock::now();

for (auto iChunk{0}; iChunk < nChunks; ++iChunk) {
for (size_t iChunk{0}; iChunk < nChunks; ++iChunk) {
auto& chunk = chunkRanges[iChunk];
chunk_t* chunkPtr = getCustomPtr<chunk_t>(mState.scratchPtr, chunk.first);
GPUCHECK(cudaEventRecord(starts[iChunk], streams[iChunk % dimStreams]));
Expand All @@ -488,7 +488,7 @@ std::vector<float> GPUbenchmark<chunk_t>::runConcurrent(void (*kernel)(chunk_t*,
GPUCHECK(cudaEventRecord(stops[iChunk], streams[iChunk % dimStreams]));
}

for (auto iChunk{0}; iChunk < nChunks; ++iChunk) {
for (size_t iChunk{0}; iChunk < nChunks; ++iChunk) {
GPUCHECK(cudaEventSynchronize(stops[iChunk]));
GPUCHECK(cudaEventElapsedTime(&(results.at(iChunk)), starts[iChunk], stops[iChunk]));
GPUCHECK(cudaEventDestroy(starts[iChunk]));
Expand All @@ -512,7 +512,7 @@ template <typename... T>
float GPUbenchmark<chunk_t>::runDistributed(void (*kernel)(chunk_t**, size_t*, T...),
std::vector<std::pair<float, float>>& chunkRanges,
int nLaunches,
int nBlocks,
size_t nBlocks,
int nThreads,
T&... args)
{
Expand All @@ -521,7 +521,7 @@ float GPUbenchmark<chunk_t>::runDistributed(void (*kernel)(chunk_t**, size_t*, T
std::vector<size_t> perBlockCapacity(nBlocks); // Capacity of sub-buffer for block

float totChunkGB{0.f};
int totComputedBlocks{0};
size_t totComputedBlocks{0};

for (size_t iChunk{0}; iChunk < chunkRanges.size(); ++iChunk) {
chunkPtrs[iChunk] = getCustomPtr<chunk_t>(mState.scratchPtr, chunkRanges[iChunk].first);
Expand Down Expand Up @@ -666,10 +666,11 @@ void GPUbenchmark<chunk_t>::runTest(Test test, Mode mode, KernelConfig config)
}
nThreads *= mOptions.threadPoolFraction;

void (*kernel)(chunk_t*, size_t);
void (*kernel_distributed)(chunk_t**, size_t*);
void (*kernel_rand)(chunk_t*, size_t, int);
void (*kernel_rand_distributed)(chunk_t**, size_t*, int);
void (*kernel)(chunk_t*, size_t) = &gpu::read_k<chunk_t>; // Initialising to a default value
void (*kernel_distributed)(chunk_t**, size_t*) = &gpu::read_dist_k<chunk_t>; // Initialising to a default value
void (*kernel_rand)(chunk_t*, size_t, int) = &gpu::rand_read_k<chunk_t>; // Initialising to a default value
void (*kernel_rand_distributed)(chunk_t**, size_t*, int) = &gpu::rand_read_dist_k<chunk_t>; // Initialising to a default value

bool is_random{false};

if (mode != Mode::Distributed) {
Expand Down Expand Up @@ -744,7 +745,7 @@ void GPUbenchmark<chunk_t>::runTest(Test test, Mode mode, KernelConfig config)
if (!mOptions.raw) {
std::cout << " │ - per chunk throughput:\n";
}
for (auto iChunk{0}; iChunk < mState.testChunks.size(); ++iChunk) { // loop over single chunks separately
for (size_t iChunk{0}; iChunk < mState.testChunks.size(); ++iChunk) { // loop over single chunks separately
auto& chunk = mState.testChunks[iChunk];
float result{0.f};
if (!is_random) {
Expand Down Expand Up @@ -793,7 +794,7 @@ void GPUbenchmark<chunk_t>::runTest(Test test, Mode mode, KernelConfig config)
mOptions.prime);
}
float sum{0};
for (auto iChunk{0}; iChunk < mState.testChunks.size(); ++iChunk) {
for (size_t iChunk{0}; iChunk < mState.testChunks.size(); ++iChunk) {
auto& chunk = mState.testChunks[iChunk];
float chunkSize = (float)getBufferCapacity<chunk_t>(chunk.second, mOptions.prime) * sizeof(chunk_t) / (float)GB;
auto throughput = computeThroughput(test, results[iChunk], chunkSize, mState.getNKernelLaunches());
Expand Down