-
Notifications
You must be signed in to change notification settings - Fork 0
/
bitonic.cu
78 lines (66 loc) · 2.21 KB
/
bitonic.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
// implementation of bitonic sort
// based on "Fast in-place, comparison-based sorting with CUDA: a study with bitonic sort" by Peters et al.
// author: Shrihan Dadi (sdadi2)
#include <iostream>
#include <sstream>
#include <algorithm>
#include <cuda_runtime.h>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include "common.cuh"
using namespace std;
void usage() {
cout << "usage: bitonic [k]" << endl;
cout << "where 2^k is the size of the vector to generate for sorting" << endl;
exit(1);
}
// kernel for normalized bitonic sort
__global__ void bitonicSort(float vec[], size_t size, unsigned int phase, unsigned int step) {
size_t idx = ((size_t) blockDim.x) * blockIdx.x + threadIdx.x;
bitonicSwap(vec, size, phase, step, idx);
}
int main(int argc, char** argv) {
if (argc != 2) {
usage();
}
// read k from argv[1] where 2^k is the size of the vector to generate
istringstream ss(argv[1]);
unsigned int k;
if (!(ss >> k) || k > sizeof(size_t) * 8 - 1) {
usage();
}
// generate vector
size_t size = 1 << k;
thrust::host_vector<float> vec = genVec(size);
// sort with normalized bitonic sort
cout << "Sorting vector of size " << size << "..." << endl;
thrust::device_vector<float> gpuVec = vec;
float* gpuVecPtr = thrust::raw_pointer_cast(gpuVec.data());
size_t numBlocks = max((size_t) 1, (size / 2) / NUM_THREADS);
size_t numThreads = min(size / 2, (size_t) NUM_THREADS);
// time sorting
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start);
for (unsigned int phase = 1; phase <= k; phase++) {
for (unsigned int step = phase; step >= 1; step--) {
bitonicSort<<<numBlocks, numThreads>>>(gpuVecPtr, size, phase, step);
}
}
cudaEventRecord(stop);
// copy gpuVec back into vec
vec = gpuVec;
// get time to sort
cudaEventSynchronize(stop);
float milliseconds;
cudaEventElapsedTime(&milliseconds, start, stop);
// print out time to sort
cout << "Time: " << milliseconds << " ms" << endl;
#ifdef DEBUG
if (!sorted(vec)) {
cout << "vec is not sorted!" << endl;
}
#endif
return 0;
}