-
Notifications
You must be signed in to change notification settings - Fork 152
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
WGS84 to UTM benchmark (C++) (#1216)
Closes #1215 Depends on #1191 New benchmark for C++ API for WGS84->UTM transform. Below results were run on a single CPU core for Proj (ignore gbench's claims to the contrary), and on a single H100 80GB GPU. Machine was a DGX H100. ``` (rapids) coder ➜ ~/cuspatial $ CUDA_VISIBLE_DEVICES=1 cpp/build/latest/cuproj/benchmarks/WGS_TO_UTM_BENCH 2023-08-02T00:08:52+00:00 Running cpp/build/latest/cuproj/benchmarks/WGS_TO_UTM_BENCH Run on (224 X 3800 MHz CPU s) CPU Caches: L1 Data 48 KiB (x112) L1 Instruction 32 KiB (x112) L2 Unified 2048 KiB (x112) L3 Unified 107520 KiB (x2) Load Average: 1.70, 6.21, 14.43 --------------------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations UserCounters... --------------------------------------------------------------------------------------------------------------------- proj_utm_benchmark/forward_double/100 0.013 ms 0.013 ms 57756 items_per_second=7.95314M/s proj_utm_benchmark/forward_double/1000 0.114 ms 0.114 ms 6118 items_per_second=8.73369M/s proj_utm_benchmark/forward_double/10000 1.24 ms 1.24 ms 588 items_per_second=8.07697M/s proj_utm_benchmark/forward_double/100000 12.0 ms 12.0 ms 58 items_per_second=8.35593M/s proj_utm_benchmark/forward_double/1000000 120 ms 120 ms 6 items_per_second=8.36301M/s proj_utm_benchmark/forward_double/10000000 1213 ms 1213 ms 1 items_per_second=8.24563M/s proj_utm_benchmark/forward_double/100000000 11977 ms 11976 ms 1 items_per_second=8.35038M/s proj_utm_benchmark/forward_double/1000000000 119680 ms 119677 ms 1 items_per_second=8.35579M/s cuproj_utm_benchmark/forward_float/100/manual_time 0.011 ms 0.040 ms 66283 items_per_second=9.47605M/s cuproj_utm_benchmark/forward_float/1000/manual_time 0.012 ms 0.041 ms 56799 items_per_second=81.2019M/s cuproj_utm_benchmark/forward_float/10000/manual_time 0.013 ms 0.042 ms 55571 items_per_second=793.482M/s cuproj_utm_benchmark/forward_float/100000/manual_time 0.013 ms 0.042 ms 53048 items_per_second=7.5779G/s cuproj_utm_benchmark/forward_float/1000000/manual_time 0.027 ms 0.056 ms 25842 items_per_second=36.9063G/s cuproj_utm_benchmark/forward_float/10000000/manual_time 0.170 ms 0.198 ms 4130 items_per_second=58.8554G/s cuproj_utm_benchmark/forward_float/100000000/manual_time 1.60 ms 1.62 ms 439 items_per_second=62.6581G/s cuproj_utm_benchmark/forward_float/1000000000/manual_time 15.9 ms 15.9 ms 44 items_per_second=63.0518G/s cuproj_utm_benchmark/forward_double/100/manual_time 0.012 ms 0.041 ms 57960 items_per_second=8.30297M/s cuproj_utm_benchmark/forward_double/1000/manual_time 0.015 ms 0.044 ms 47605 items_per_second=68.0791M/s cuproj_utm_benchmark/forward_double/10000/manual_time 0.015 ms 0.044 ms 47353 items_per_second=676.864M/s cuproj_utm_benchmark/forward_double/100000/manual_time 0.016 ms 0.045 ms 43394 items_per_second=6.19684G/s cuproj_utm_benchmark/forward_double/1000000/manual_time 0.042 ms 0.070 ms 16621 items_per_second=23.7599G/s cuproj_utm_benchmark/forward_double/10000000/manual_time 0.304 ms 0.332 ms 2302 items_per_second=32.863G/s cuproj_utm_benchmark/forward_double/100000000/manual_time 2.93 ms 2.96 ms 240 items_per_second=34.087G/s cuproj_utm_benchmark/forward_double/1000000000/manual_time 29.3 ms 29.3 ms 24 items_per_second=34.13G/s Authors: - Mark Harris (https://github.com/harrism) Approvers: - Michael Wang (https://github.com/isVoid) URL: #1216
- Loading branch information
Showing
7 changed files
with
312 additions
and
47 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
/* | ||
* Copyright (c) 2019-2023, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#include <benchmark/benchmark.h> | ||
#include <rmm/mr/device/cuda_memory_resource.hpp> | ||
#include <rmm/mr/device/owning_wrapper.hpp> | ||
#include <rmm/mr/device/per_device_resource.hpp> | ||
#include <rmm/mr/device/pool_memory_resource.hpp> | ||
|
||
namespace cuspatial { | ||
|
||
namespace { | ||
// memory resource factory helpers | ||
inline auto make_cuda() { return std::make_shared<rmm::mr::cuda_memory_resource>(); } | ||
|
||
inline auto make_pool() | ||
{ | ||
return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_cuda()); | ||
} | ||
} // namespace | ||
|
||
/** | ||
* @brief Google Benchmark fixture for libcuspatial benchmarks | ||
* | ||
* libcuspatial benchmarks should use a fixture derived from this fixture class to | ||
* ensure that the RAPIDS Memory Manager pool mode is used in benchmarks, which | ||
* eliminates memory allocation / deallocation performance overhead from the | ||
* benchmark. | ||
* | ||
* The SetUp and TearDown methods of this fixture initialize RMM into pool mode | ||
* and finalize it, respectively. These methods are called automatically by | ||
* Google Benchmark | ||
* | ||
* Example: | ||
* | ||
* template <class T> | ||
* class my_benchmark : public cuspatial::benchmark { | ||
* public: | ||
* using TypeParam = T; | ||
* }; | ||
* | ||
* Then: | ||
* | ||
* BENCHMARK_TEMPLATE_DEFINE_F(my_benchmark, my_test_name, int) | ||
* (::benchmark::State& state) { | ||
* for (auto _ : state) { | ||
* // benchmark stuff | ||
* } | ||
* } | ||
* | ||
* BENCHMARK_REGISTER_F(my_benchmark, my_test_name)->Range(128, 512); | ||
*/ | ||
class benchmark : public ::benchmark::Fixture { | ||
public: | ||
virtual void SetUp(const ::benchmark::State& state) override | ||
{ | ||
mr = make_pool(); | ||
rmm::mr::set_current_device_resource(mr.get()); // set default resource to pool | ||
} | ||
|
||
virtual void TearDown(const ::benchmark::State& state) override | ||
{ | ||
// reset default resource to the initial resource | ||
rmm::mr::set_current_device_resource(nullptr); | ||
mr.reset(); | ||
} | ||
|
||
// eliminate partial override warnings (see benchmark/benchmark.h) | ||
void SetUp(::benchmark::State& st) override { SetUp(const_cast<const ::benchmark::State&>(st)); } | ||
void TearDown(::benchmark::State& st) override | ||
{ | ||
TearDown(const_cast<const ::benchmark::State&>(st)); | ||
} | ||
|
||
std::shared_ptr<rmm::mr::device_memory_resource> mr; | ||
}; | ||
|
||
}; // namespace cuspatial |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
/* | ||
* Copyright (c) 2023, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#include <cuproj/projection_factories.hpp> | ||
#include <cuspatial/geometry/vec_2d.hpp> | ||
|
||
#include <cuproj_test/convert_coordinates.hpp> | ||
#include <cuproj_test/coordinate_generator.cuh> | ||
|
||
#include <benchmarks/fixture/benchmark_fixture.hpp> | ||
#include <benchmarks/synchronization/synchronization.hpp> | ||
|
||
#include <rmm/cuda_stream_view.hpp> | ||
#include <rmm/device_vector.hpp> | ||
|
||
#include <thrust/host_vector.h> | ||
|
||
#include <type_traits> | ||
|
||
template <typename T> | ||
using coordinate = typename cuspatial::vec_2d<T>; | ||
|
||
static char const* epsg_src = "EPSG:4326"; | ||
static char const* epsg_dst = "EPSG:32756"; | ||
|
||
template <typename T> | ||
auto make_input(std::size_t grid_side) | ||
{ | ||
// Sydney Harbour | ||
coordinate<T> min_corner{-33.9, 151.2}; | ||
coordinate<T> max_corner{-33.7, 151.3}; | ||
|
||
auto input = cuproj_test::make_grid_array<coordinate<T>, rmm::device_vector<coordinate<T>>>( | ||
min_corner, max_corner, grid_side, grid_side); | ||
|
||
return input; | ||
} | ||
|
||
template <typename T> | ||
static void cuproj_wgs_to_utm_benchmark(benchmark::State& state) | ||
{ | ||
auto const num_points = state.range(0); | ||
|
||
auto const grid_side{static_cast<std::size_t>(sqrt(num_points))}; | ||
|
||
auto input = make_input<T>(grid_side); | ||
|
||
rmm::device_vector<coordinate<T>> output(input.size()); | ||
|
||
auto proj = cuproj::make_projection<coordinate<T>>(epsg_src, epsg_dst); | ||
|
||
for (auto _ : state) { | ||
cuda_event_timer raii(state, true); | ||
proj.transform(input.begin(), | ||
input.end(), | ||
output.begin(), | ||
cuproj::direction::FORWARD, | ||
rmm::cuda_stream_default); | ||
} | ||
|
||
state.SetItemsProcessed(num_points * state.iterations()); | ||
} | ||
|
||
void proj_wgs_to_utm_benchmark(benchmark::State& state) | ||
{ | ||
using T = double; | ||
auto const num_points = state.range(0); | ||
|
||
auto const grid_side{static_cast<std::size_t>(sqrt(num_points))}; | ||
|
||
auto d_input = make_input<T>(grid_side); | ||
auto input = thrust::host_vector<coordinate<T>>(d_input); | ||
|
||
std::vector<PJ_COORD> pj_input(input.size()); | ||
|
||
PJ_CONTEXT* C = proj_context_create(); | ||
PJ* P = proj_create_crs_to_crs(C, epsg_src, epsg_dst, nullptr); | ||
|
||
for (auto _ : state) { | ||
state.PauseTiming(); | ||
cuproj_test::convert_coordinates(input, pj_input); | ||
state.ResumeTiming(); | ||
proj_trans_array(P, PJ_FWD, pj_input.size(), pj_input.data()); | ||
} | ||
|
||
state.SetItemsProcessed(num_points * state.iterations()); | ||
} | ||
|
||
class proj_utm_benchmark : public ::benchmark::Fixture {}; | ||
|
||
// Edit these for GPUs/CPUs with larger or smaller memory. | ||
// For double precision, its' 16 bytes per (x,y) point, x2 for input and output | ||
// 10^8 points -> 3.2GB+, 10^9 points -> 32GB+ | ||
// H100 80GB is plenty for 10^9 points | ||
|
||
constexpr int range_min = 100; | ||
constexpr int range_max = 100'000'000; | ||
|
||
BENCHMARK_DEFINE_F(proj_utm_benchmark, forward_double)(::benchmark::State& state) | ||
{ | ||
proj_wgs_to_utm_benchmark(state); | ||
} | ||
BENCHMARK_REGISTER_F(proj_utm_benchmark, forward_double) | ||
->RangeMultiplier(10) | ||
->Range(range_min, range_max) | ||
->Unit(benchmark::kMillisecond); | ||
|
||
class cuproj_utm_benchmark : public cuspatial::benchmark {}; | ||
|
||
#define UTM_CUPROJ_BENCHMARK_DEFINE(name, type) \ | ||
BENCHMARK_DEFINE_F(cuproj_utm_benchmark, name)(::benchmark::State & state) \ | ||
{ \ | ||
cuproj_wgs_to_utm_benchmark<type>(state); \ | ||
} \ | ||
BENCHMARK_REGISTER_F(cuproj_utm_benchmark, name) \ | ||
->RangeMultiplier(10) \ | ||
->Range(range_min, range_max) \ | ||
->UseManualTime() \ | ||
->Unit(benchmark::kMillisecond); | ||
|
||
UTM_CUPROJ_BENCHMARK_DEFINE(forward_float, float); | ||
UTM_CUPROJ_BENCHMARK_DEFINE(forward_double, double); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
/* | ||
* Copyright (c) 2023, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <proj.h> | ||
|
||
#include <algorithm> | ||
#include <type_traits> | ||
|
||
namespace cuproj_test { | ||
|
||
// Convert coordinates from a x-y struct to a PJ_COORD struct or vice versa | ||
template <typename InVector, typename OutVector> | ||
void convert_coordinates(InVector const& in, OutVector& out) | ||
{ | ||
using in_coord_type = typename InVector::value_type; | ||
using out_coord_type = typename OutVector::value_type; | ||
|
||
static_assert( | ||
(std::is_same_v<out_coord_type, PJ_COORD> != std::is_same_v<in_coord_type, PJ_COORD>), | ||
"Invalid coordinate vector conversion"); | ||
|
||
if constexpr (std::is_same_v<in_coord_type, PJ_COORD>) { | ||
using T = typename out_coord_type::value_type; | ||
auto proj_coord_to_coordinate = [](auto const& c) { | ||
return out_coord_type{static_cast<T>(c.xy.x), static_cast<T>(c.xy.y)}; | ||
}; | ||
std::transform(in.begin(), in.end(), out.begin(), proj_coord_to_coordinate); | ||
} else if constexpr (std::is_same_v<out_coord_type, PJ_COORD>) { | ||
auto coordinate_to_proj_coord = [](auto const& c) { return PJ_COORD{c.x, c.y, 0, 0}; }; | ||
std::transform(in.begin(), in.end(), out.begin(), coordinate_to_proj_coord); | ||
} | ||
} | ||
|
||
} // namespace cuproj_test |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.