From ea3cf2efd318cf78737120785737e69e582c0107 Mon Sep 17 00:00:00 2001 From: Raymond Douglass Date: Fri, 2 Oct 2020 10:42:38 -0400 Subject: [PATCH 01/18] DOC v0.17 Updates --- CHANGELOG.md | 8 ++++++++ cpp/CMakeLists.txt | 2 +- docs/source/conf.py | 4 ++-- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 419f25c15..09f1e680b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +# cuSpatial 0.17.0 (Date TBD) + +## New Features + +## Improvements + +## Bug Fixes + # cuSpatial 0.16.0 (Date TBD) ## New Features diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 66c16dac0..5516c91d7 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -15,7 +15,7 @@ #============================================================================= cmake_minimum_required(VERSION 3.12 FATAL_ERROR) -project(CUDA_SPATIAL VERSION 0.16.0 LANGUAGES C CXX CUDA) +project(CUDA_SPATIAL VERSION 0.17.0 LANGUAGES C CXX CUDA) if(NOT CMAKE_CUDA_COMPILER) message(SEND_ERROR "CMake cannot locate a CUDA compiler") diff --git a/docs/source/conf.py b/docs/source/conf.py index 79a7e7d81..115c3a734 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -70,9 +70,9 @@ # built documents. # # The short X.Y version. -version = '0.16' +version = '0.17' # The full version, including alpha/beta/rc tags. -release = '0.16.0' +release = '0.17.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. From 3952952da3fe3a8ae05565f9f6d4be322f328a7e Mon Sep 17 00:00:00 2001 From: Mark Sadang Date: Fri, 6 Nov 2020 12:28:22 -0800 Subject: [PATCH 02/18] Gpuciscripts clean and update --- ci/checks/changelog.sh | 2 +- ci/checks/style.sh | 7 ++-- ci/cpu/build.sh | 35 +++++++++++-------- ci/cpu/cuspatial/build_cuspatial.sh | 2 +- ci/cpu/libcuspatial/build_libcuspatial.sh | 2 +- ci/cpu/upload_anaconda.sh | 4 +-- ci/docs/build.sh | 21 +++++++----- ci/gpu/build.sh | 41 ++++++++++++----------- ci/local/README.md | 3 +- 9 files changed, 66 insertions(+), 51 deletions(-) diff --git a/ci/checks/changelog.sh b/ci/checks/changelog.sh index 9e8453203..b9a22679f 100644 --- a/ci/checks/changelog.sh +++ b/ci/checks/changelog.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2018, NVIDIA CORPORATION. +# COPYRIGHT (c) 2020, NVIDIA CORPORATION. ######################### # cuSpatial CHANGELOG Tester # ######################### diff --git a/ci/checks/style.sh b/ci/checks/style.sh index 16638fdd2..0a92d4eb7 100644 --- a/ci/checks/style.sh +++ b/ci/checks/style.sh @@ -1,17 +1,18 @@ #!/bin/bash -# Copyright (c) 2018, NVIDIA CORPORATION. +# COPYRIGHT (c) 2020, NVIDIA CORPORATION. ##################### # cuSpatial Style Tester # ##################### # Ignore errors and set path set +e -PATH=/conda/bin:$PATH +PATH=/opt/conda/bin:$PATH LC_ALL=C.UTF-8 LANG=C.UTF-8 # Activate common conda env -source activate gdf +. /opt/conda/etc/profile.d/conda.sh +conda activate rapids # Run isort and get results/return code ISORT=`isort --check-only python/**/*.py` diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index d77f2ce71..fd28ebca8 100644 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -1,18 +1,18 @@ #!/bin/bash -# Copyright (c) 2018, NVIDIA CORPORATION. +# COPYRIGHT (c) 2020, NVIDIA CORPORATION. ###################################### # cuSpatial CPU conda build script for CI # ###################################### set -e # Logger function for build status output -function logger() { +function gpuci_logger() { echo -e "\n>>>> $@\n" } # Set path and build parallel level -export PATH=/conda/bin:/usr/local/cuda/bin:$PATH -export PARALLEL_LEVEL=4 +export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH +export PARALLEL_LEVEL=-4 export CUDF_HOME="${WORKSPACE}/cudf" # Set home to the job's workspace @@ -26,21 +26,28 @@ if [[ "$BUILD_MODE" = "branch" && "$SOURCE_BRANCH" = branch-* ]] ; then export VERSION_SUFFIX=`date +%y%m%d` fi +# Setup 'gpuci_conda_retry' for build retries (results in 2 total attempts) +export GPUCI_CONDA_RETRY_MAX=1 +export GPUCI_CONDA_RETRY_SLEEP=30 + ################################################################################ # SETUP - Check environment ################################################################################ -logger "Get env..." +gpuci_logger "Get env" env -logger "Activate conda env..." -source activate gdf +gpuci_logger "Activate conda env" +. /opt/conda/etc/profile.d/conda.sh +conda activate rapids -logger "Check versions..." +gpuci_logger "Check versions" python --version -gcc --version -g++ --version -conda list +$CC --version +$CXX --version +conda info +conda config --show-sources +conda list --show-channel-urls # FIX Added to deal with Anancoda SSL verification issues during conda builds conda config --set ssl_verify False @@ -49,17 +56,17 @@ conda config --set ssl_verify False # BUILD - Conda package builds (conda deps: libcupatial <- cuspatial) ########################################################################################## -logger "Build conda pkg for libcuspatial..." +gpuci_logger "Build conda pkg for libcuspatial" cd $WORKSPACE source ci/cpu/libcuspatial/build_libcuspatial.sh -logger "Build conda pkg for cuspatial..." +gpuci_logger "Build conda pkg for cuspatial" source ci/cpu/cuspatial/build_cuspatial.sh ################################################################################ # UPLOAD - Conda packages ################################################################################ -logger "Upload conda pkgs..." +gpuci_logger "Upload conda pkgs" source ci/cpu/upload_anaconda.sh diff --git a/ci/cpu/cuspatial/build_cuspatial.sh b/ci/cpu/cuspatial/build_cuspatial.sh index 15f833f9c..e99434c5a 100644 --- a/ci/cpu/cuspatial/build_cuspatial.sh +++ b/ci/cpu/cuspatial/build_cuspatial.sh @@ -1,5 +1,5 @@ set -e echo "Building cuspatial" -conda build conda/recipes/cuspatial --python=$PYTHON +gpuci_conda_retry build conda/recipes/cuspatial --python=$PYTHON diff --git a/ci/cpu/libcuspatial/build_libcuspatial.sh b/ci/cpu/libcuspatial/build_libcuspatial.sh index f4a395746..c81c936c0 100644 --- a/ci/cpu/libcuspatial/build_libcuspatial.sh +++ b/ci/cpu/libcuspatial/build_libcuspatial.sh @@ -1,5 +1,5 @@ set -e echo "Building libcuspatial" -conda build conda/recipes/libcuspatial +gpuci_conda_retry build conda/recipes/libcuspatial diff --git a/ci/cpu/upload_anaconda.sh b/ci/cpu/upload_anaconda.sh index 8f6c3bb6d..6f24cbc4f 100644 --- a/ci/cpu/upload_anaconda.sh +++ b/ci/cpu/upload_anaconda.sh @@ -2,8 +2,8 @@ set -e -export LIBCUSPATIAL_FILE=`conda build conda/recipes/libcuspatial --output` -export CUSPATIAL_FILE=`conda build conda/recipes/cuspatial --python=$PYTHON --output` +export LIBCUSPATIAL_FILE=`gpuci_conda_retry build conda/recipes/libcuspatial --output` +export CUSPATIAL_FILE=`gpuci_conda_retry build conda/recipes/cuspatial --python=$PYTHON --output` CUDA_REL=${CUDA_VERSION%.*} diff --git a/ci/docs/build.sh b/ci/docs/build.sh index b600aede9..193ddd19e 100644 --- a/ci/docs/build.sh +++ b/ci/docs/build.sh @@ -11,34 +11,37 @@ if [ -z "$PROJECT_WORKSPACE" ]; then fi export DOCS_WORKSPACE=$WORKSPACE/docs -export PATH=/conda/bin:/usr/local/cuda/bin:$PATH +export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH export HOME=$WORKSPACE export PROJECT_WORKSPACE=/rapids/cuspatial export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache" export NIGHTLY_VERSION=$(echo $BRANCH_VERSION | awk -F. '{print $2}') export PROJECTS=(cuspatial) -logger "Check environment..." +gpuci_logger "Check environment" env -logger "Check GPU usage..." +gpuci_logger "Check GPU usage" nvidia-smi -logger "Activate conda env..." -source activate rapids +gpuci_logger "Activate conda env" +. /opt/conda/etc/profile.d/conda.sh +conda activate rapids # TODO: Move installs to docs-build-env meta package -conda install -c anaconda markdown beautifulsoup4 jq +gpuci_conda_retry install -c anaconda markdown beautifulsoup4 jq pip install sphinx-markdown-tables -logger "Check versions..." +gpuci_logger "Check versions" python --version $CC --version $CXX --version -conda list +conda info +conda config --show-sources +conda list --show-channel-urls # Build Python docs -logger "Build Sphinx docs..." +gpuci_logger "Build Sphinx docs" cd $PROJECT_WORKSPACE/docs make html diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index b00f07176..505eccd4c 100644 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2018, NVIDIA CORPORATION. +# COPYRIGHT (c) 2020, NVIDIA CORPORATION. ######################################### # cuSpatial GPU build and test script for CI # ######################################### @@ -8,7 +8,7 @@ NUMARGS=$# ARGS=$* # Logger function for build status output -function logger() { +function gpuci_logger() { echo -e "\n>>>> $@\n" } @@ -18,8 +18,8 @@ function hasArg { } # Set path and build parallel level -export PATH=/conda/bin:/usr/local/cuda/bin:$PATH -export PARALLEL_LEVEL=4 +export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH +export PARALLEL_LEVEL=-4 export CUDA_REL=${CUDA_VERSION%.*} export CUDF_HOME="${WORKSPACE}/cudf" export CUSPATIAL_HOME="${WORKSPACE}" @@ -36,32 +36,35 @@ export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'` # SETUP - Check environment ################################################################################ -logger "Check environment..." +gpuci_logger "Check environment" env -logger "Check GPU usage..." +gpuci_logger "Check GPU usage" nvidia-smi -logger "Activate conda env..." -source activate gdf -conda install "cudf=${MINOR_VERSION}.*" "cudatoolkit=$CUDA_REL" \ +gpuci_logger "Activate conda env" +. /opt/conda/etc/profile.d/conda.sh +conda activate rapids +gpuci_conda_retry install "cudf=${MINOR_VERSION}.*" "cudatoolkit=$CUDA_REL" \ "rapids-build-env=$MINOR_VERSION.*" # https://docs.rapids.ai/maintainers/depmgmt/ # conda remove -f rapids-build-env -# conda install "your-pkg=1.0.0" +# gpuci_conda_retry install "your-pkg=1.0.0" -logger "Check versions..." +gpuci_logger "Check versions" python --version $CC --version $CXX --version -conda list +conda info +conda config --show-sources +conda list --show-channel-urls ################################################################################ # cuSpatial currently requires a the cudf repo for private headers ################################################################################ -logger "Clone cudf" +gpuci_logger "Clone cudf" git clone https://github.com/rapidsai/cudf.git -b branch-$MINOR_VERSION ${CUDF_HOME} cd $CUDF_HOME git submodule update --init --remote --recursive @@ -70,7 +73,7 @@ git submodule update --init --remote --recursive # BUILD - Build libcuspatial and cuSpatial from source ################################################################################ -logger "Build cuSpatial" +gpuci_logger "Build cuSpatial" cd $WORKSPACE ./build.sh clean libcuspatial cuspatial tests @@ -79,12 +82,12 @@ cd $WORKSPACE ############################################################################### if hasArg --skip-tests; then - logger "Skipping tests..." + gpuci_logger "Skipping tests" else - logger "Check GPU usage..." + gpuci_logger "Check GPU usage" nvidia-smi - logger "GoogleTests..." + gpuci_logger "GoogleTests" cd $WORKSPACE/cpp/build for gt in ${WORKSPACE}/cpp/build/gtests/* ; do @@ -93,10 +96,10 @@ else ${gt} --gtest_output=xml:${WORKSPACE}/test-results/ done - logger "Download/Generate Test Data" + gpuci_logger "Download/Generate Test Data" #TODO - logger "Test cuSpatial" + gpuci_logger "Test cuSpatial" #TODO #Python Unit tests for cuSpatial diff --git a/ci/local/README.md b/ci/local/README.md index ed24794a5..27595e7f3 100644 --- a/ci/local/README.md +++ b/ci/local/README.md @@ -30,7 +30,8 @@ For a full list of available gpuCI docker images, visit our [DockerHub](https:// Style Check: ```bash $ bash ci/local/build.sh -r ~/rapids/cuspatial -s -$ source activate rapids # Activate gpuCI conda environment +$ . /opt/conda/etc/profile.d/conda.sh +$ conda activate rapids # Activate gpuCI conda environment $ cd rapids $ flake8 python ``` From 6c2b52bd4d4d75ffad1a1afd16029cabdbe0dc0e Mon Sep 17 00:00:00 2001 From: Keith Kraus Date: Wed, 11 Nov 2020 00:52:49 -0500 Subject: [PATCH 03/18] Fix build issues related to libcudf/cudf changes (#322) Due to libcudf splitting into multiple shared libraries to fix debug builds, libcuspatial was left in a broken state. We don't fully understand why, but it seems like some combination of the linker pruning the `libcudf.so` dependency since it finds no symbols it uses in it BEFORE it adds `DT_NEEDED` entries from `libcudf.so` where it does find symbols. Added some linker flags to temporarily fix the build while we continue investigating a more proper solution from the libcudf side. Also fixed a minor gcc9 issue related to a copy elision. Also fixed usage of a removed cudf Python API. --- CHANGELOG.md | 1 + cpp/CMakeLists.txt | 14 +++++--------- cpp/benchmarks/CMakeLists.txt | 9 ++++++--- cpp/src/spatial/polygon_bounding_box.cu | 2 +- cpp/tests/CMakeLists.txt | 17 ++++++++--------- python/cuspatial/cuspatial/core/gis.py | 4 ++-- 6 files changed, 23 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 15828ed31..b9e7dec8a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ - PR #294 Fix include of deprecated RMM header file. - PR #296 Updates for RMM being header only. - PR #298 Fix Python docs to render first argument of each public function. +- PR #322 Fix build issues related to libcudf split build changes # cuSpatial 0.15.0 (26 Aug 2020) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 5516c91d7..0b403575b 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -201,9 +201,6 @@ endif (CUDF_INCLUDE AND CUDF_LIBRARY) find_package(GDAL REQUIRED) -message(STATUS "GDAL: GDAL_LIBRARIES set to ${GDAL_LIBRARIES}") -message(STATUS "GDAL: GDAL_INCLUDE_DIRS set to ${GDAL_INCLUDE_DIRS}") - if(NOT GDAL_FOUND) message(FATAL_ERROR "GDAL not found, please check your settings.") endif(NOT GDAL_FOUND) @@ -254,7 +251,6 @@ endif(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES) include_directories("${CMAKE_BINARY_DIR}/include" "${CMAKE_SOURCE_DIR}/include" "${CMAKE_SOURCE_DIR}/src" - "${GDAL_INCLUDE_DIRS}" "${RMM_INCLUDE}" "${CUDF_INCLUDE}") @@ -268,9 +264,7 @@ endif(CONDA_INCLUDE_DIRS) link_directories("${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}" # CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES is an undocumented/unsupported variable containing the link directories for nvcc "${CMAKE_BINARY_DIR}/lib" "${FLATBUFFERS_LIBRARY_DIR}" - "${GDAL_LIBRARIES}" - "${GTEST_LIBRARY_DIR}" - "${CUDF_LIBRARY}") + "${GTEST_LIBRARY_DIR}") if(CONDA_LINK_DIRS) link_directories("${CONDA_LINK_DIRS}") @@ -316,8 +310,10 @@ endif(USE_NVTX) ################################################################################################### # - link libraries -------------------------------------------------------------------------------- -target_link_libraries(cuspatial cudf cudart cuda cusparse nvrtc GDAL::GDAL) - +target_link_libraries(cuspatial cudart cusparse GDAL::GDAL) +# Because libcudf.so doesn't contain any symbols, the linker will determine that it's okay to prune +# it before copying `DT_NEEDED` entries from it +target_link_libraries(cuspatial "-Wl,--no-as-needed" cudf "-Wl,--as-needed") ################################################################################################### # - install targets ------------------------------------------------------------------------------- diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index b13a75d6b..1547ea90c 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -27,9 +27,12 @@ function(ConfigureBench CMAKE_BENCH_NAME CMAKE_BENCH_SRC) ${CMAKE_BENCH_SRC} "${CMAKE_CURRENT_SOURCE_DIR}/synchronization/synchronization.cpp") set_target_properties(${CMAKE_BENCH_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) - target_link_libraries(${CMAKE_BENCH_NAME} benchmark benchmark_main pthread cuspatial cudf - cudftestutil cudart cuda "${ARROW_LIB}" ${ZLIB_LIBRARIES} - nvrtc GDAL::GDAL) + # By default the linker doesn't transitively add `DT_NEEDED` entries in executables like it does + # for shared libraries, so to work around current libcudf build behavior we're adding the linker + # flag + target_link_libraries(${CMAKE_BENCH_NAME} "-Wl,--copy-dt-needed-entries") + target_link_libraries(${CMAKE_BENCH_NAME} benchmark benchmark_main pthread cuspatial + cudftestutil) set_target_properties(${CMAKE_BENCH_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/gbenchmarks") endfunction(ConfigureBench) diff --git a/cpp/src/spatial/polygon_bounding_box.cu b/cpp/src/spatial/polygon_bounding_box.cu index dd31d486c..073f3e074 100644 --- a/cpp/src/spatial/polygon_bounding_box.cu +++ b/cpp/src/spatial/polygon_bounding_box.cu @@ -87,7 +87,7 @@ std::unique_ptr compute_polygon_bounding_boxes(cudf::column_view co point_ids.begin(), thrust::maximum()); - return std::move(point_ids); + return point_ids; }(); auto type = cudf::data_type{cudf::type_to_id()}; diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 90c2e7678..01bbef9ac 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -42,9 +42,12 @@ function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC) add_executable(${CMAKE_TEST_NAME} ${CMAKE_TEST_SRC}) set_target_properties(${CMAKE_TEST_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) - target_link_libraries(${CMAKE_TEST_NAME} gmock gtest gmock_main gtest_main pthread cuspatial cudf - cudftestutil cudart cuda "${ARROW_LIB}" ${ZLIB_LIBRARIES} - nvrtc GDAL::GDAL) + # By default the linker doesn't transitively add `DT_NEEDED` entries in executables like it does + # for shared libraries, so to work around current libcudf build behavior we're adding the linker + # flag + target_link_libraries(${CMAKE_TEST_NAME} "-Wl,--copy-dt-needed-entries") + target_link_libraries(${CMAKE_TEST_NAME} gmock gtest gmock_main gtest_main pthread cuspatial + cudftestutil) if(USE_NVTX) target_link_libraries(${CMAKE_TEST_NAME} ${NVTX_LIBRARY}) endif(USE_NVTX) @@ -67,7 +70,6 @@ include_directories("${CMAKE_BINARY_DIR}/include" "${CMAKE_SOURCE_DIR}/include" "${CMAKE_SOURCE_DIR}" "${CMAKE_SOURCE_DIR}/src" - "${GDAL_INCLUDE_DIRS}" "${GTEST_INCLUDE_DIR}" "${RMM_INCLUDE}" "${CUDF_INCLUDE}") @@ -76,12 +78,9 @@ include_directories("${CMAKE_BINARY_DIR}/include" # - library paths --------------------------------------------------------------------------------- link_directories("${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}" # CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES is an undocumented/unsupported variable containing the link directories for nvcc - "${CMAKE_BINARY_DIR}/lib" - "${GDAL_LIBRARIES}" + "${CMAKE_BINARY_DIR}" "${CONDA_LINK_DIRS}" - "${GTEST_LIBRARY_DIR}" - "${CUDF_LIBRARY}" - "${CUSPATIAL_LIBRARY}") + "${GTEST_LIBRARY_DIR}") set(CARTESIAN_PRODUCT_GROUP_INDEX_ITERATOR_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/spatial/cartesian_product_group_index_iterator_test.cpp") diff --git a/python/cuspatial/cuspatial/core/gis.py b/python/cuspatial/cuspatial/core/gis.py index 947618414..e7634c5a8 100644 --- a/python/cuspatial/cuspatial/core/gis.py +++ b/python/cuspatial/cuspatial/core/gis.py @@ -91,7 +91,7 @@ def directed_hausdorff_distance(xs, ys, points_per_space): ) result = result.data_array_view result = result.reshape(num_spaces, num_spaces) - return DataFrame.from_gpu_matrix(result) + return DataFrame(result) def haversine_distance(p1_lon, p1_lat, p2_lon, p2_lat): @@ -249,7 +249,7 @@ def point_in_polygon( result = gis_utils.pip_bitmap_column_to_binary_array( polygon_bitmap_column=result, width=len(poly_offsets) ) - result = DataFrame.from_gpu_matrix(result) + result = DataFrame(result) result = result._apply_support_method("astype", dtype="bool") result.columns = [x for x in list(reversed(poly_offsets.index))] result = result[list(reversed(result.columns))] From d1eed197910ec8ed964c40750bb30538c7face88 Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Wed, 11 Nov 2020 20:21:41 -0600 Subject: [PATCH 04/18] Add cuda to target_link_libraries (#323) --- CHANGELOG.md | 1 + cpp/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b9e7dec8a..72109d771 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ - PR #296 Updates for RMM being header only. - PR #298 Fix Python docs to render first argument of each public function. - PR #322 Fix build issues related to libcudf split build changes +- PR #323 Add cuda to target_link_libraries # cuSpatial 0.15.0 (26 Aug 2020) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 0b403575b..b8ea063fc 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -310,7 +310,7 @@ endif(USE_NVTX) ################################################################################################### # - link libraries -------------------------------------------------------------------------------- -target_link_libraries(cuspatial cudart cusparse GDAL::GDAL) +target_link_libraries(cuspatial cuda cudart cusparse GDAL::GDAL) # Because libcudf.so doesn't contain any symbols, the linker will determine that it's okay to prune # it before copying `DT_NEEDED` entries from it target_link_libraries(cuspatial "-Wl,--no-as-needed" cudf "-Wl,--as-needed") From 45ad13f8735bd86b4591367487b19be7568f31da Mon Sep 17 00:00:00 2001 From: Keith Kraus Date: Wed, 11 Nov 2020 23:15:00 -0500 Subject: [PATCH 05/18] Pin cmake policies to cmake 3.17 version (#310) Planning to upgrade to cmake 3.18 in the near future and the CUDA_ARCHITECTURES change is a breaking change, so pinning to 3.17 policies to prevent the breakages. --- CHANGELOG.md | 1 + cpp/CMakeLists.txt | 3 ++- cpp/benchmarks/CMakeLists.txt | 18 +++++++++++++++--- cpp/tests/CMakeLists.txt | 3 --- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 72109d771..86380a990 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## New Features ## Improvements +- PR #310 Pin cmake policies to cmake 3.17 version ## Bug Fixes diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index b8ea063fc..ecaaa0134 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -13,7 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. #============================================================================= -cmake_minimum_required(VERSION 3.12 FATAL_ERROR) + +cmake_minimum_required(VERSION 3.12...3.17 FATAL_ERROR) project(CUDA_SPATIAL VERSION 0.17.0 LANGUAGES C CXX CUDA) diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 1547ea90c..a027c7d77 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -1,6 +1,18 @@ -cmake_minimum_required(VERSION 3.12 FATAL_ERROR) - -project(CUSPATIAL_BENCHMARKS LANGUAGES C CXX CUDA) +#============================================================================= +# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= if(NOT CMAKE_CUDA_COMPILER) message(SEND_ERROR "CMake cannot locate a CUDA compiler") diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 01bbef9ac..d039a6235 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -13,9 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. #============================================================================= -cmake_minimum_required(VERSION 3.12 FATAL_ERROR) - -project(CUSPATIAL_TESTS LANGUAGES C CXX CUDA) if(NOT CMAKE_CUDA_COMPILER) message(SEND_ERROR "CMake cannot locate a CUDA compiler") From ec8941b343e3844bb40f3b52c9595f956c83eaff Mon Sep 17 00:00:00 2001 From: Jianting Zhang Date: Thu, 12 Nov 2020 18:34:48 -0500 Subject: [PATCH 06/18] Zero initialize device_uvector before scatter (#320) Zero initialize device_uvector before scatter Co-authored-by: ptaylor --- CHANGELOG.md | 1 + cpp/src/indexing/construction/detail/phase_2.cuh | 2 ++ 2 files changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 86380a990..6b4fce3f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - PR #310 Pin cmake policies to cmake 3.17 version ## Bug Fixes +- PR #320 Fix a quadtree construction bug - needs zero out device_uvector before scatter # cuSpatial 0.16.0 (Date TBD) diff --git a/cpp/src/indexing/construction/detail/phase_2.cuh b/cpp/src/indexing/construction/detail/phase_2.cuh index 5c1ee3fbf..cfe48f5b4 100644 --- a/cpp/src/indexing/construction/detail/phase_2.cuh +++ b/cpp/src/indexing/construction/detail/phase_2.cuh @@ -193,6 +193,8 @@ inline rmm::device_uvector compute_parent_positions( position_map.begin()); // line 2 of algorithm in Fig. 5 in ref. rmm::device_uvector parent_pos(num_child_nodes, stream); + thrust::uninitialized_fill( + rmm::exec_policy(stream)->on(stream), parent_pos.begin(), parent_pos.end(), 0); thrust::scatter(rmm::exec_policy(stream)->on(stream), thrust::make_counting_iterator(0), thrust::make_counting_iterator(0) + num_parent_nodes, From f09566414d3ed1ca816d62283ac722680b343307 Mon Sep 17 00:00:00 2001 From: Mark Sadang Date: Thu, 12 Nov 2020 20:33:27 -0800 Subject: [PATCH 07/18] Updated build.sh and upload.sh --- ci/cpu/build.sh | 43 +++++++---------- ci/cpu/cuspatial/build_cuspatial.sh | 5 -- ci/cpu/libcuspatial/build_libcuspatial.sh | 5 -- ci/cpu/upload.sh | 56 +++++++++++++++++++++++ ci/cpu/upload_anaconda.sh | 39 ---------------- 5 files changed, 73 insertions(+), 75 deletions(-) delete mode 100644 ci/cpu/cuspatial/build_cuspatial.sh delete mode 100644 ci/cpu/libcuspatial/build_libcuspatial.sh create mode 100644 ci/cpu/upload.sh delete mode 100644 ci/cpu/upload_anaconda.sh diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index fd28ebca8..57e4945de 100644 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -1,18 +1,18 @@ #!/bin/bash -# COPYRIGHT (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2018, NVIDIA CORPORATION. ###################################### # cuSpatial CPU conda build script for CI # ###################################### set -e # Logger function for build status output -function gpuci_logger() { +function logger() { echo -e "\n>>>> $@\n" } # Set path and build parallel level -export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH -export PARALLEL_LEVEL=-4 +export PATH=/conda/bin:/usr/local/cuda/bin:$PATH +export PARALLEL_LEVEL=4 export CUDF_HOME="${WORKSPACE}/cudf" # Set home to the job's workspace @@ -26,28 +26,21 @@ if [[ "$BUILD_MODE" = "branch" && "$SOURCE_BRANCH" = branch-* ]] ; then export VERSION_SUFFIX=`date +%y%m%d` fi -# Setup 'gpuci_conda_retry' for build retries (results in 2 total attempts) -export GPUCI_CONDA_RETRY_MAX=1 -export GPUCI_CONDA_RETRY_SLEEP=30 - ################################################################################ # SETUP - Check environment ################################################################################ -gpuci_logger "Get env" +logger "Get env..." env -gpuci_logger "Activate conda env" -. /opt/conda/etc/profile.d/conda.sh -conda activate rapids +logger "Activate conda env..." +source activate gdf -gpuci_logger "Check versions" +logger "Check versions..." python --version -$CC --version -$CXX --version -conda info -conda config --show-sources -conda list --show-channel-urls +gcc --version +g++ --version +conda list # FIX Added to deal with Anancoda SSL verification issues during conda builds conda config --set ssl_verify False @@ -56,17 +49,15 @@ conda config --set ssl_verify False # BUILD - Conda package builds (conda deps: libcupatial <- cuspatial) ########################################################################################## -gpuci_logger "Build conda pkg for libcuspatial" -cd $WORKSPACE -source ci/cpu/libcuspatial/build_libcuspatial.sh +echo "Building libcuspatial" +conda build conda/recipes/libcuspatial -gpuci_logger "Build conda pkg for cuspatial" -source ci/cpu/cuspatial/build_cuspatial.sh +echo "Building cuspatial" +conda build conda/recipes/cuspatial --python=$PYTHON ################################################################################ # UPLOAD - Conda packages ################################################################################ -gpuci_logger "Upload conda pkgs" -source ci/cpu/upload_anaconda.sh - +logger "Upload conda pkgs..." +source ci/cpu/upload.sh diff --git a/ci/cpu/cuspatial/build_cuspatial.sh b/ci/cpu/cuspatial/build_cuspatial.sh deleted file mode 100644 index e99434c5a..000000000 --- a/ci/cpu/cuspatial/build_cuspatial.sh +++ /dev/null @@ -1,5 +0,0 @@ -set -e - -echo "Building cuspatial" -gpuci_conda_retry build conda/recipes/cuspatial --python=$PYTHON - diff --git a/ci/cpu/libcuspatial/build_libcuspatial.sh b/ci/cpu/libcuspatial/build_libcuspatial.sh deleted file mode 100644 index c81c936c0..000000000 --- a/ci/cpu/libcuspatial/build_libcuspatial.sh +++ /dev/null @@ -1,5 +0,0 @@ -set -e - -echo "Building libcuspatial" -gpuci_conda_retry build conda/recipes/libcuspatial - diff --git a/ci/cpu/upload.sh b/ci/cpu/upload.sh new file mode 100644 index 000000000..f82cfd3f5 --- /dev/null +++ b/ci/cpu/upload.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +set -e + +# Setup 'gpuci_retry' for upload retries (results in 4 total attempts) +export GPUCI_RETRY_MAX=3 +export GPUCI_RETRY_SLEEP=30 + +# Set default label options if they are not defined elsewhere +export LABEL_OPTION=${LABEL_OPTION:-"--label main"} + +# Skip uploads unless BUILD_MODE == "branch" +if [ ${BUILD_MODE} != "branch" ]; then + echo "Skipping upload" + return 0 +fi + +# Skip uploads if there is no upload key +if [ -z "$MY_UPLOAD_KEY" ]; then + echo "No upload key" + return 0 +fi + +################################################################################ +# SETUP - Get conda file output locations +################################################################################ + +gpuci_logger "Get conda file output locations" +export LIBCUSPATIAL_FILE=`conda build conda/recipes/libcuspatial --output` +export CUSPATIAL_FILE=`conda build conda/recipes/cuspatial --python=$PYTHON --output` + +################################################################################ +# UPLOAD - Conda packages +################################################################################ + +gpuci_logger "Starting conda uploads" + +if [ "$UPLOAD_LIBCUSPATIAL" == "1" ]; then + LABEL_OPTION="--label main" + echo "LABEL_OPTION=${LABEL_OPTION}" + + test -e ${LIBCUSPATIAL_FILE} + echo "Upload libcuspatial" + echo ${LIBCUSPATIAL_FILE} + anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${LIBCUSPATIAL_FILE} +fi + +if [ "$UPLOAD_CUSPATIAL" == "1" ]; then + LABEL_OPTION="--label main" + echo "LABEL_OPTION=${LABEL_OPTION}" + + test -e ${CUSPATIAL_FILE} + echo "Upload cuspatial" + echo ${CUSPATIAL_FILE} + anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${CUSPATIAL_FILE} +fi diff --git a/ci/cpu/upload_anaconda.sh b/ci/cpu/upload_anaconda.sh deleted file mode 100644 index 6f24cbc4f..000000000 --- a/ci/cpu/upload_anaconda.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash - -set -e - -export LIBCUSPATIAL_FILE=`gpuci_conda_retry build conda/recipes/libcuspatial --output` -export CUSPATIAL_FILE=`gpuci_conda_retry build conda/recipes/cuspatial --python=$PYTHON --output` - -CUDA_REL=${CUDA_VERSION%.*} - -if [ ${BUILD_MODE} != "branch" ]; then - echo "Skipping upload" - return 0 -fi - -if [ -z "$MY_UPLOAD_KEY" ]; then - echo "No upload key" - return 0 -fi - -if [ "$UPLOAD_LIBCUSPATIAL" == "1" ]; then - LABEL_OPTION="--label main" - echo "LABEL_OPTION=${LABEL_OPTION}" - - test -e ${LIBCUSPATIAL_FILE} - echo "Upload libcuspatial" - echo ${LIBCUSPATIAL_FILE} - anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${LIBCUSPATIAL_FILE} -fi - -if [ "$UPLOAD_CUSPATIAL" == "1" ]; then - LABEL_OPTION="--label main" - echo "LABEL_OPTION=${LABEL_OPTION}" - - test -e ${CUSPATIAL_FILE} - echo "Upload cuspatial" - echo ${CUSPATIAL_FILE} - anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${CUSPATIAL_FILE} -fi - From 96a7b576b57c6f5e33ea8da67bac1ad6a320b486 Mon Sep 17 00:00:00 2001 From: Mark Sadang Date: Thu, 12 Nov 2020 20:44:41 -0800 Subject: [PATCH 08/18] Changed conda to gpuci_conda_retry --- ci/cpu/build.sh | 44 +++++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index 57e4945de..6002d8dab 100644 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -5,19 +5,20 @@ ###################################### set -e -# Logger function for build status output -function logger() { - echo -e "\n>>>> $@\n" -} - # Set path and build parallel level -export PATH=/conda/bin:/usr/local/cuda/bin:$PATH -export PARALLEL_LEVEL=4 -export CUDF_HOME="${WORKSPACE}/cudf" +export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH +export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} # Set home to the job's workspace export HOME=$WORKSPACE +# Determine CUDA release version +export CUDA_REL=${CUDA_VERSION%.*} + +# Setup 'gpuci_conda_retry' for build retries (results in 2 total attempts) +export GPUCI_CONDA_RETRY_MAX=1 +export GPUCI_CONDA_RETRY_SLEEP=30 + # Switch to project root; also root of repo checkout cd $WORKSPACE @@ -30,17 +31,22 @@ fi # SETUP - Check environment ################################################################################ -logger "Get env..." +gpuci_logger "Check environment variables" env -logger "Activate conda env..." -source activate gdf +gpuci_logger "Activate conda env" +. /opt/conda/etc/profile.d/conda.sh +conda activate rapids -logger "Check versions..." +gpuci_logger "Check compiler versions" python --version -gcc --version -g++ --version -conda list +$CC --version +$CXX --version + +gpuci_logger "Check conda environment" +conda info +conda config --show-sources +conda list --show-channel-urls # FIX Added to deal with Anancoda SSL verification issues during conda builds conda config --set ssl_verify False @@ -49,11 +55,11 @@ conda config --set ssl_verify False # BUILD - Conda package builds (conda deps: libcupatial <- cuspatial) ########################################################################################## -echo "Building libcuspatial" -conda build conda/recipes/libcuspatial +gpuci_logger "Building conda pkd for libcuspatial" +gpuci_conda_retry build conda/recipes/libcuspatial -echo "Building cuspatial" -conda build conda/recipes/cuspatial --python=$PYTHON +gpuci_logger "Building conda pkg for cuspatial" +gpuci_conda_retry build conda/recipes/cuspatial --python=$PYTHON ################################################################################ # UPLOAD - Conda packages From 80a011f364c61b4812fa79a634f3f71fb39d7af3 Mon Sep 17 00:00:00 2001 From: Mark Sadang Date: Thu, 12 Nov 2020 20:45:41 -0800 Subject: [PATCH 09/18] Updated gpuci_logger for build.sh --- ci/cpu/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index 6002d8dab..8c21379b6 100644 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -65,5 +65,5 @@ gpuci_conda_retry build conda/recipes/cuspatial --python=$PYTHON # UPLOAD - Conda packages ################################################################################ -logger "Upload conda pkgs..." +gpuci_logger "Upload conda pkgs..." source ci/cpu/upload.sh From 54a76dbc218245626160337b72a5c6750b0c019a Mon Sep 17 00:00:00 2001 From: Mark Sadang Date: Fri, 13 Nov 2020 07:55:59 -0800 Subject: [PATCH 10/18] Added gpuci_logger function --- ci/cpu/build.sh | 5 +++++ ci/cpu/upload.sh | 2 ++ 2 files changed, 7 insertions(+) diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index 8c21379b6..d95f7bb48 100644 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -5,6 +5,11 @@ ###################################### set -e +# Logger function for build status output +function gpuci_logger() { + echo -e "\n>>>> $@\n" +} + # Set path and build parallel level export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} diff --git a/ci/cpu/upload.sh b/ci/cpu/upload.sh index f82cfd3f5..eb33c9428 100644 --- a/ci/cpu/upload.sh +++ b/ci/cpu/upload.sh @@ -21,6 +21,8 @@ if [ -z "$MY_UPLOAD_KEY" ]; then return 0 fi +CUDA_REL=${CUDA_VERSION%.*} + ################################################################################ # SETUP - Get conda file output locations ################################################################################ From 575973822dea18500e8a1e9dc573861b9e69d559 Mon Sep 17 00:00:00 2001 From: Mark Sadang Date: Fri, 13 Nov 2020 07:56:57 -0800 Subject: [PATCH 11/18] removed cuda_rel --- ci/cpu/upload.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/ci/cpu/upload.sh b/ci/cpu/upload.sh index eb33c9428..f82cfd3f5 100644 --- a/ci/cpu/upload.sh +++ b/ci/cpu/upload.sh @@ -21,8 +21,6 @@ if [ -z "$MY_UPLOAD_KEY" ]; then return 0 fi -CUDA_REL=${CUDA_VERSION%.*} - ################################################################################ # SETUP - Get conda file output locations ################################################################################ From 3b56913c2ab9570b6a17d5d63616d485c84767c7 Mon Sep 17 00:00:00 2001 From: Mark Sadang Date: Fri, 13 Nov 2020 08:20:56 -0800 Subject: [PATCH 12/18] added cpuci_retry --- ci/cpu/upload.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/cpu/upload.sh b/ci/cpu/upload.sh index f82cfd3f5..17a3ddd9b 100644 --- a/ci/cpu/upload.sh +++ b/ci/cpu/upload.sh @@ -42,7 +42,7 @@ if [ "$UPLOAD_LIBCUSPATIAL" == "1" ]; then test -e ${LIBCUSPATIAL_FILE} echo "Upload libcuspatial" echo ${LIBCUSPATIAL_FILE} - anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${LIBCUSPATIAL_FILE} + gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${LIBCUSPATIAL_FILE} fi if [ "$UPLOAD_CUSPATIAL" == "1" ]; then @@ -52,5 +52,5 @@ if [ "$UPLOAD_CUSPATIAL" == "1" ]; then test -e ${CUSPATIAL_FILE} echo "Upload cuspatial" echo ${CUSPATIAL_FILE} - anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${CUSPATIAL_FILE} + gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${CUSPATIAL_FILE} fi From 12976055c18be1a58efe14bcd2333a7d3778352b Mon Sep 17 00:00:00 2001 From: Mark Sadang Date: Fri, 13 Nov 2020 10:27:07 -0800 Subject: [PATCH 13/18] removed gpuci_logger and replaced parallel_level --- ci/cpu/build.sh | 5 ----- ci/gpu/build.sh | 7 +------ 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index d95f7bb48..8c21379b6 100644 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -5,11 +5,6 @@ ###################################### set -e -# Logger function for build status output -function gpuci_logger() { - echo -e "\n>>>> $@\n" -} - # Set path and build parallel level export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 505eccd4c..6d10cda3d 100644 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -7,11 +7,6 @@ set -e NUMARGS=$# ARGS=$* -# Logger function for build status output -function gpuci_logger() { - echo -e "\n>>>> $@\n" -} - # Arg parsing function function hasArg { (( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ") @@ -19,7 +14,7 @@ function hasArg { # Set path and build parallel level export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH -export PARALLEL_LEVEL=-4 +export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} export CUDA_REL=${CUDA_VERSION%.*} export CUDF_HOME="${WORKSPACE}/cudf" export CUSPATIAL_HOME="${WORKSPACE}" From 5d251e3505756deca66f17ae13eecae7092aa58d Mon Sep 17 00:00:00 2001 From: Mark Sadang Date: Thu, 19 Nov 2020 15:49:42 -0800 Subject: [PATCH 14/18] readded cudf_home deletion --- ci/cpu/build.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index 8c21379b6..291fd9c0a 100644 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -8,6 +8,7 @@ set -e # Set path and build parallel level export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} +export CUDF_HOME="${WORKSPACE}/cudf" # Set home to the job's workspace export HOME=$WORKSPACE From d4ed2d83d5b560cbab27e614868db92ec53ee6ea Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 24 Nov 2020 13:15:04 +1100 Subject: [PATCH 15/18] Convert cudaStream_t to rmm::cuda_stream_view (#325) This PR converts all usage of cudaStream_t in cuSpatial to rmm::cuda_stream_view, following on from rapidsai/cudf#6646 and rapidsai/cudf#6648 Also reorders stream parameters to occur before MR parameters in all functions. --- CHANGELOG.md | 3 +- .../synchronization/synchronization.cpp | 34 ++++---- .../synchronization/synchronization.hpp | 34 +++++--- cpp/include/cuspatial/cubic_spline.hpp | 26 +++--- .../indexing/construction/detail/phase_1.cuh | 39 ++++----- .../indexing/construction/detail/phase_2.cuh | 49 +++++------ .../construction/detail/utilities.cuh | 3 +- .../indexing/construction/point_quadtree.cu | 57 ++++++------- cpp/src/interpolate/cubic_spline.cu | 81 +++++++++---------- cpp/src/io/shp/polygon_shapefile_reader.cu | 11 +-- cpp/src/join/detail/intersection.cuh | 13 +-- cpp/src/join/detail/traversal.cuh | 17 ++-- cpp/src/join/quadtree_point_in_polygon.cu | 27 ++++--- .../quadtree_point_to_nearest_polyline.cu | 29 +++---- cpp/src/join/quadtree_poly_filtering.cu | 39 +++++---- cpp/src/spatial/hausdorff.cu | 22 ++--- cpp/src/spatial/haversine.cu | 10 ++- cpp/src/spatial/lonlat_to_cartesian.cu | 23 +++--- cpp/src/spatial/point_in_polygon.cu | 44 +++++----- cpp/src/spatial/polygon_bounding_box.cu | 30 +++---- cpp/src/spatial/polyline_bounding_box.cu | 48 +++++------ cpp/src/spatial_window/spatial_window.cu | 14 ++-- cpp/src/trajectory/derive_trajectories.cu | 31 +++---- .../trajectory/trajectory_bounding_boxes.cu | 31 +++---- .../trajectory_distances_and_speeds.cu | 44 +++++----- cpp/tests/trajectory/trajectory_utilities.cuh | 2 +- 26 files changed, 407 insertions(+), 354 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b4fce3f9..3782fd51d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,9 +4,10 @@ ## Improvements - PR #310 Pin cmake policies to cmake 3.17 version +- PR #325 Convert `cudaStream_t` to `rmm::cuda_stream_view` ## Bug Fixes -- PR #320 Fix a quadtree construction bug - needs zero out device_uvector before scatter +- PR #320 Fix quadtree construction bug: zero out `device_uvector` before `scatter` # cuSpatial 0.16.0 (Date TBD) diff --git a/cpp/benchmarks/synchronization/synchronization.cpp b/cpp/benchmarks/synchronization/synchronization.cpp index 4894f3fc6..c5a88bd64 100644 --- a/cpp/benchmarks/synchronization/synchronization.cpp +++ b/cpp/benchmarks/synchronization/synchronization.cpp @@ -16,47 +16,45 @@ #include "synchronization.hpp" -#include +#include -#define RMM_CUDA_ASSERT_OK(expr) \ - do { \ - cudaError_t const status = (expr); \ - assert(cudaSuccess == status); \ - } while (0); +#include +#include cuda_event_timer::cuda_event_timer(benchmark::State& state, bool flush_l2_cache, - cudaStream_t stream) + rmm::cuda_stream_view stream) : p_state(&state), stream(stream) { // flush all of L2$ if (flush_l2_cache) { int current_device = 0; - RMM_CUDA_TRY(cudaGetDevice(¤t_device)); + CUDA_TRY(cudaGetDevice(¤t_device)); int l2_cache_bytes = 0; - RMM_CUDA_TRY(cudaDeviceGetAttribute(&l2_cache_bytes, cudaDevAttrL2CacheSize, current_device)); + CUDA_TRY(cudaDeviceGetAttribute(&l2_cache_bytes, cudaDevAttrL2CacheSize, current_device)); if (l2_cache_bytes > 0) { const int memset_value = 0; rmm::device_buffer l2_cache_buffer(l2_cache_bytes, stream); - RMM_CUDA_TRY(cudaMemsetAsync(l2_cache_buffer.data(), memset_value, l2_cache_bytes, stream)); + CUDA_TRY( + cudaMemsetAsync(l2_cache_buffer.data(), memset_value, l2_cache_bytes, stream.value())); } } - RMM_CUDA_TRY(cudaEventCreate(&start)); - RMM_CUDA_TRY(cudaEventCreate(&stop)); - RMM_CUDA_TRY(cudaEventRecord(start, stream)); + CUDA_TRY(cudaEventCreate(&start)); + CUDA_TRY(cudaEventCreate(&stop)); + CUDA_TRY(cudaEventRecord(start, stream.value())); } cuda_event_timer::~cuda_event_timer() { - RMM_CUDA_ASSERT_OK(cudaEventRecord(stop, stream)); - RMM_CUDA_ASSERT_OK(cudaEventSynchronize(stop)); + CUDA_TRY(cudaEventRecord(stop, stream.value())); + CUDA_TRY(cudaEventSynchronize(stop)); float milliseconds = 0.0f; - RMM_CUDA_ASSERT_OK(cudaEventElapsedTime(&milliseconds, start, stop)); + CUDA_TRY(cudaEventElapsedTime(&milliseconds, start, stop)); p_state->SetIterationTime(milliseconds / (1000.0f)); - RMM_CUDA_ASSERT_OK(cudaEventDestroy(start)); - RMM_CUDA_ASSERT_OK(cudaEventDestroy(stop)); + CUDA_TRY(cudaEventDestroy(start)); + CUDA_TRY(cudaEventDestroy(stop)); } diff --git a/cpp/benchmarks/synchronization/synchronization.hpp b/cpp/benchmarks/synchronization/synchronization.hpp index 64bf660f0..5e84e9fb9 100644 --- a/cpp/benchmarks/synchronization/synchronization.hpp +++ b/cpp/benchmarks/synchronization/synchronization.hpp @@ -17,7 +17,7 @@ /** * @file synchronization.hpp * @brief This is the header file for `cuda_event_timer`. - */ + **/ /** * @brief This class serves as a wrapper for using `cudaEvent_t` as the user @@ -33,7 +33,7 @@ for (auto _ : state){ - cudaStream_t stream = 0; + rmm::cuda_stream_view stream{}; // default stream, could be another stream // Create (Construct) an object of this class. You HAVE to pass in the // benchmark::State object you are using. It measures the time from its @@ -44,23 +44,29 @@ cuda_event_timer raii(state, true, stream); // flush_l2_cache = true // Now perform the operations that is to be benchmarked - sample_kernel<<<1, 256, 0, stream>>>(); // Possibly launching a CUDA kernel + sample_kernel<<<1, 256, 0, stream.value()>>>(); // Possibly launching a CUDA kernel } } // Register the function as a benchmark. You will need to set the `UseManualTime()` - // flag in order to use the timer embeded in this class. + // flag in order to use the timer embedded in this class. BENCHMARK(sample_cuda_benchmark)->UseManualTime(); - */ + **/ -#pragma once +#ifndef CUDF_BENCH_SYNCHRONIZATION_H +#define CUDF_BENCH_SYNCHRONIZATION_H // Google Benchmark library #include -#include + +#include + +#include + +#include class cuda_event_timer { public: @@ -73,14 +79,16 @@ class cuda_event_timer { * @param[in] flush_l2_cache_ whether or not to flush the L2 cache before * every iteration. * @param[in] stream_ The CUDA stream we are measuring time on. - */ - cuda_event_timer(benchmark::State& state, bool flush_l2_cache, cudaStream_t stream_ = 0); + **/ + cuda_event_timer(benchmark::State& state, + bool flush_l2_cache, + rmm::cuda_stream_view stream = rmm::cuda_stream_default); - // The user will HAVE to provide a benchmark::State object to set + // The user must provide a benchmark::State object to set // the timer so we disable the default c'tor. cuda_event_timer() = delete; - // The d'tor stops the timer and performs a synchroniazation. + // The d'tor stops the timer and performs a synchronization. // Time of the benchmark::State object provided to the c'tor // will be set to the value given by `cudaEventElapsedTime`. ~cuda_event_timer(); @@ -88,6 +96,8 @@ class cuda_event_timer { private: cudaEvent_t start; cudaEvent_t stop; - cudaStream_t stream; + rmm::cuda_stream_view stream; benchmark::State* p_state; }; + +#endif diff --git a/cpp/include/cuspatial/cubic_spline.hpp b/cpp/include/cuspatial/cubic_spline.hpp index 666488426..f168d8afe 100644 --- a/cpp/include/cuspatial/cubic_spline.hpp +++ b/cpp/include/cuspatial/cubic_spline.hpp @@ -18,6 +18,8 @@ #include #include +#include + #include namespace cuspatial { @@ -43,16 +45,19 @@ namespace cuspatial { * @param[in] offsets the exclusive scan of the spline sizes, prefixed by * 0. For example, for 3 splines of 5 vertices each, the offsets input array * is {0, 5, 10, 15}. + * @param[in] mr The memory resource to use for allocating output * * @return cudf::table_view of coefficients for spline interpolation. The size * of the table is ((M-n), 4) where M is `t.size()` and and n is * `ids.size()-1`. **/ -std::unique_ptr cubicspline_interpolate(cudf::column_view const& query_points, - cudf::column_view const& spline_ids, - cudf::column_view const& offsets, - cudf::column_view const& source_points, - cudf::table_view const& coefficients); +std::unique_ptr cubicspline_interpolate( + cudf::column_view const& query_points, + cudf::column_view const& spline_ids, + cudf::column_view const& offsets, + cudf::column_view const& source_points, + cudf::table_view const& coefficients, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Compute cubic interpolations of a set of points based on their @@ -69,11 +74,14 @@ std::unique_ptr cubicspline_interpolate(cudf::column_view const& q * identify which specific spline a given query_point is interpolated with. * @param[in] coefficients table of spline coefficients produced by * cubicspline_coefficients. + * @param[in] mr The memory resource to use for allocating output * * @return cudf::column `y` coordinates interpolated from `x` and `coefs`. **/ -std::unique_ptr cubicspline_coefficients(cudf::column_view const& t, - cudf::column_view const& y, - cudf::column_view const& ids, - cudf::column_view const& offsets); +std::unique_ptr cubicspline_coefficients( + cudf::column_view const& t, + cudf::column_view const& y, + cudf::column_view const& ids, + cudf::column_view const& offsets, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace cuspatial diff --git a/cpp/src/indexing/construction/detail/phase_1.cuh b/cpp/src/indexing/construction/detail/phase_1.cuh index 1e7d9f623..1679ec724 100644 --- a/cpp/src/indexing/construction/detail/phase_1.cuh +++ b/cpp/src/indexing/construction/detail/phase_1.cuh @@ -24,6 +24,7 @@ #include #include +#include #include #include @@ -57,11 +58,11 @@ compute_point_keys_and_sorted_indices(cudf::column_view const &x, T y_max, T scale, int8_t max_depth, - rmm::mr::device_memory_resource *mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) { rmm::device_uvector keys(x.size(), stream); - thrust::transform(rmm::exec_policy(stream)->on(stream), + thrust::transform(rmm::exec_policy(stream)->on(stream.value()), make_zip_iterator(x.begin(), y.begin()), make_zip_iterator(x.begin(), y.begin()) + x.size(), keys.begin(), @@ -77,12 +78,12 @@ compute_point_keys_and_sorted_indices(cudf::column_view const &x, auto indices = make_fixed_width_column(keys.size(), stream, mr); - thrust::sequence(rmm::exec_policy(stream)->on(stream), + thrust::sequence(rmm::exec_policy(stream)->on(stream.value()), indices->mutable_view().begin(), indices->mutable_view().end()); // Sort the codes and point indices - thrust::stable_sort_by_key(rmm::exec_policy(stream)->on(stream), + thrust::stable_sort_by_key(rmm::exec_policy(stream)->on(stream.value()), keys.begin(), keys.end(), indices->mutable_view().begin()); @@ -106,9 +107,9 @@ inline cudf::size_type build_tree_level(InputIterator1 keys_begin, OutputIterator1 keys_out, OutputIterator2 vals_out, BinaryOp binary_op, - cudaStream_t stream) + rmm::cuda_stream_view stream) { - auto result = thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream), + auto result = thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream.value()), keys_begin, keys_end, vals_in, @@ -133,7 +134,7 @@ build_tree_levels(int8_t max_depth, KeysIterator keys_begin, ValsIterator quad_point_count_begin, ValsIterator quad_child_count_begin, - cudaStream_t stream) + rmm::cuda_stream_view stream) { // begin/end offsets cudf::size_type begin{0}; @@ -193,7 +194,7 @@ reverse_tree_levels(rmm::device_uvector const &quad_keys_in, std::vector const &begin_pos, std::vector const &end_pos, int8_t max_depth, - cudaStream_t stream) + rmm::cuda_stream_view stream) { rmm::device_uvector quad_keys(quad_keys_in.size(), stream); rmm::device_uvector quad_levels(quad_keys_in.size(), stream); @@ -205,19 +206,19 @@ reverse_tree_levels(rmm::device_uvector const &quad_keys_in, cudf::size_type level_end = end_pos[level]; cudf::size_type level_begin = begin_pos[level]; cudf::size_type num_quads = level_end - level_begin; - thrust::fill(rmm::exec_policy(stream)->on(stream), + thrust::fill(rmm::exec_policy(stream)->on(stream.value()), quad_levels.begin() + offset, quad_levels.begin() + offset + num_quads, level); - thrust::copy(rmm::exec_policy(stream)->on(stream), + thrust::copy(rmm::exec_policy(stream)->on(stream.value()), quad_keys_in.begin() + level_begin, quad_keys_in.begin() + level_end, quad_keys.begin() + offset); - thrust::copy(rmm::exec_policy(stream)->on(stream), + thrust::copy(rmm::exec_policy(stream)->on(stream.value()), quad_point_count_in.begin() + level_begin, quad_point_count_in.begin() + level_end, quad_point_count.begin() + offset); - thrust::copy(rmm::exec_policy(stream)->on(stream), + thrust::copy(rmm::exec_policy(stream)->on(stream.value()), quad_child_count_in.begin() + level_begin, quad_child_count_in.begin() + level_end, quad_child_count.begin() + offset); @@ -255,15 +256,15 @@ inline auto make_full_levels(cudf::column_view const &x, T scale, int8_t max_depth, cudf::size_type min_size, - rmm::mr::device_memory_resource *mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) { // Compute point keys and sort into bottom-level quadrants // (i.e. quads at level `max_depth - 1`) // Compute Morton code (z-order) keys for each point auto keys_and_indices = compute_point_keys_and_sorted_indices( - x, y, x_min, x_max, y_min, y_max, scale, max_depth, mr, stream); + x, y, x_min, x_max, y_min, y_max, scale, max_depth, stream, mr); auto &point_keys = keys_and_indices.first; auto &point_indices = keys_and_indices.second; @@ -291,8 +292,10 @@ inline auto make_full_levels(cudf::column_view const &x, quad_child_count.resize(num_bottom_quads * (max_depth + 1), stream); // Zero out the quad_child_count vector because we're reusing the point_keys vector - thrust::fill( - rmm::exec_policy(stream)->on(stream), quad_child_count.begin(), quad_child_count.end(), 0); + thrust::fill(rmm::exec_policy(stream)->on(stream.value()), + quad_child_count.begin(), + quad_child_count.end(), + 0); // // Compute "full" quads for the tree at each level. Starting from the quadrant diff --git a/cpp/src/indexing/construction/detail/phase_2.cuh b/cpp/src/indexing/construction/detail/phase_2.cuh index cfe48f5b4..153e793d6 100644 --- a/cpp/src/indexing/construction/detail/phase_2.cuh +++ b/cpp/src/indexing/construction/detail/phase_2.cuh @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -43,10 +44,10 @@ namespace detail { inline rmm::device_uvector compute_leaf_positions(cudf::column_view const &indicator, cudf::size_type num_valid_nodes, - cudaStream_t stream) + rmm::cuda_stream_view stream) { rmm::device_uvector leaf_pos(num_valid_nodes, stream); - auto result = thrust::copy_if(rmm::exec_policy(stream)->on(stream), + auto result = thrust::copy_if(rmm::exec_policy(stream)->on(stream.value()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(0) + num_valid_nodes, indicator.begin(), @@ -64,12 +65,12 @@ inline rmm::device_uvector flatten_point_keys( cudf::column_view const &indicator, cudf::size_type num_valid_nodes, int8_t max_depth, - cudaStream_t stream) + rmm::cuda_stream_view stream) { rmm::device_uvector flattened_keys(num_valid_nodes, stream); auto keys_and_levels = make_zip_iterator(quad_keys.begin(), quad_level.begin(), indicator.begin()); - thrust::transform(rmm::exec_policy(stream)->on(stream), + thrust::transform(rmm::exec_policy(stream)->on(stream.value()), keys_and_levels, keys_and_levels + num_valid_nodes, flattened_keys.begin(), @@ -98,7 +99,7 @@ inline rmm::device_uvector compute_flattened_first_point_positions( cudf::column_view const &indicator, cudf::size_type num_valid_nodes, int8_t max_depth, - cudaStream_t stream) + rmm::cuda_stream_view stream) { // Sort initial indices and temporary point counts by the flattened keys auto sorted_order_and_point_counts = [&]() { @@ -106,24 +107,24 @@ inline rmm::device_uvector compute_flattened_first_point_positions( flatten_point_keys(quad_keys, quad_level, indicator, num_valid_nodes, max_depth, stream); rmm::device_uvector initial_sort_indices(num_valid_nodes, stream); - thrust::sequence(rmm::exec_policy(stream)->on(stream), + thrust::sequence(rmm::exec_policy(stream)->on(stream.value()), initial_sort_indices.begin(), initial_sort_indices.end()); rmm::device_uvector quad_point_count_tmp(num_valid_nodes, stream); - thrust::copy(rmm::exec_policy(stream)->on(stream), + thrust::copy(rmm::exec_policy(stream)->on(stream.value()), quad_point_count.begin(), quad_point_count.end(), quad_point_count_tmp.begin()); // sort indices and temporary point counts thrust::stable_sort_by_key( - rmm::exec_policy(stream)->on(stream), + rmm::exec_policy(stream)->on(stream.value()), flattened_keys.begin(), flattened_keys.end(), make_zip_iterator(initial_sort_indices.begin(), quad_point_count_tmp.begin())); - thrust::remove_if(rmm::exec_policy(stream)->on(stream), + thrust::remove_if(rmm::exec_policy(stream)->on(stream.value()), quad_point_count_tmp.begin(), quad_point_count_tmp.begin() + num_valid_nodes, quad_point_count_tmp.begin(), @@ -150,7 +151,7 @@ inline rmm::device_uvector compute_flattened_first_point_positions( rmm::device_uvector quad_point_offsets_tmp(leaf_offsets.size(), stream); - thrust::exclusive_scan(rmm::exec_policy(stream)->on(stream), + thrust::exclusive_scan(rmm::exec_policy(stream)->on(stream.value()), quad_point_count_tmp.begin(), quad_point_count_tmp.end(), quad_point_offsets_tmp.begin()); @@ -158,14 +159,14 @@ inline rmm::device_uvector compute_flattened_first_point_positions( auto counts_and_offsets = make_zip_iterator(quad_point_count_tmp.begin(), quad_point_offsets_tmp.begin()); - thrust::stable_sort_by_key(rmm::exec_policy(stream)->on(stream), + thrust::stable_sort_by_key(rmm::exec_policy(stream)->on(stream.value()), initial_sort_indices.begin(), initial_sort_indices.end(), counts_and_offsets); rmm::device_uvector quad_point_offsets(num_valid_nodes, stream); - thrust::scatter(rmm::exec_policy(stream)->on(stream), + thrust::scatter(rmm::exec_policy(stream)->on(stream.value()), counts_and_offsets, counts_and_offsets + leaf_offsets.size(), leaf_offsets.begin(), @@ -180,22 +181,22 @@ inline rmm::device_uvector compute_parent_positions( rmm::device_uvector const &quad_child_count, cudf::size_type num_parent_nodes, cudf::size_type num_child_nodes, - cudaStream_t stream) + rmm::cuda_stream_view stream) { // Compute parent node start positions // Wraped in an IIFE so `position_map` is freed on return auto parent_pos = [&]() { rmm::device_uvector position_map(num_parent_nodes, stream); // line 1 of algorithm in Fig. 5 in ref. - thrust::exclusive_scan(rmm::exec_policy(stream)->on(stream), + thrust::exclusive_scan(rmm::exec_policy(stream)->on(stream.value()), quad_child_count.begin(), quad_child_count.begin() + num_parent_nodes, position_map.begin()); // line 2 of algorithm in Fig. 5 in ref. rmm::device_uvector parent_pos(num_child_nodes, stream); thrust::uninitialized_fill( - rmm::exec_policy(stream)->on(stream), parent_pos.begin(), parent_pos.end(), 0); - thrust::scatter(rmm::exec_policy(stream)->on(stream), + rmm::exec_policy(stream)->on(stream.value()), parent_pos.begin(), parent_pos.end(), 0); + thrust::scatter(rmm::exec_policy(stream)->on(stream.value()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(0) + num_parent_nodes, position_map.begin(), @@ -204,7 +205,7 @@ inline rmm::device_uvector compute_parent_positions( }(); // line 3 of algorithm in Fig. 5 in ref. - thrust::inclusive_scan(rmm::exec_policy(stream)->on(stream), + thrust::inclusive_scan(rmm::exec_policy(stream)->on(stream.value()), parent_pos.begin(), parent_pos.begin() + num_child_nodes, parent_pos.begin(), @@ -236,7 +237,7 @@ inline std::pair remove_unqualified_quads( cudf::size_type num_child_nodes, cudf::size_type min_size, cudf::size_type level_1_size, - cudaStream_t stream) + rmm::cuda_stream_view stream) { // compute parent node start positions auto parent_positions = @@ -248,7 +249,7 @@ inline std::pair remove_unqualified_quads( // Start counting nodes at level 2, since children of the root node should not // be discarded. auto num_invalid_parent_nodes = - thrust::count_if(rmm::exec_policy(stream)->on(stream), + thrust::count_if(rmm::exec_policy(stream)->on(stream.value()), parent_point_counts, parent_point_counts + (num_parent_nodes - level_1_size), // i.e. quad_point_count[parent_pos] <= min_size @@ -267,7 +268,7 @@ inline std::pair remove_unqualified_quads( quad_levels.begin() + level_1_size); auto last_valid = - thrust::remove_if(rmm::exec_policy(stream)->on(stream), + thrust::remove_if(rmm::exec_policy(stream)->on(stream.value()), tree, tree + num_child_nodes, parent_point_counts, @@ -306,21 +307,21 @@ inline std::unique_ptr construct_non_leaf_indicator( cudf::size_type num_valid_nodes, cudf::size_type min_size, rmm::mr::device_memory_resource *mr, - cudaStream_t stream) + rmm::cuda_stream_view stream) { // // Construct the indicator output column auto is_quad = make_fixed_width_column(num_valid_nodes, stream, mr); // line 6 of algorithm in Fig. 5 in ref. - thrust::transform(rmm::exec_policy(stream)->on(stream), + thrust::transform(rmm::exec_policy(stream)->on(stream.value()), quad_point_count.begin(), quad_point_count.begin() + num_parent_nodes, is_quad->mutable_view().begin(), thrust::placeholders::_1 > min_size); // line 7 of algorithm in Fig. 5 in ref. - thrust::replace_if(rmm::exec_policy(stream)->on(stream), + thrust::replace_if(rmm::exec_policy(stream)->on(stream.value()), quad_point_count.begin(), quad_point_count.begin() + num_parent_nodes, is_quad->view().begin(), @@ -330,7 +331,7 @@ inline std::unique_ptr construct_non_leaf_indicator( if (num_valid_nodes > num_parent_nodes) { // zero-fill the rest of the indicator column because // device_memory_resources aren't required to initialize allocations - thrust::fill(rmm::exec_policy(stream)->on(stream), + thrust::fill(rmm::exec_policy(stream)->on(stream.value()), is_quad->mutable_view().begin() + num_parent_nodes, is_quad->mutable_view().end(), 0); diff --git a/cpp/src/indexing/construction/detail/utilities.cuh b/cpp/src/indexing/construction/detail/utilities.cuh index 61a732181..b41c7a1a7 100644 --- a/cpp/src/indexing/construction/detail/utilities.cuh +++ b/cpp/src/indexing/construction/detail/utilities.cuh @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -53,7 +54,7 @@ struct tuple_sum { template inline std::unique_ptr make_fixed_width_column( cudf::size_type size, - cudaStream_t stream = 0, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { return cudf::make_fixed_width_column( diff --git a/cpp/src/indexing/construction/point_quadtree.cu b/cpp/src/indexing/construction/point_quadtree.cu index e55ab30db..279df59fd 100644 --- a/cpp/src/indexing/construction/point_quadtree.cu +++ b/cpp/src/indexing/construction/point_quadtree.cu @@ -23,6 +23,7 @@ #include +#include #include /* @@ -49,11 +50,11 @@ inline std::unique_ptr make_quad_tree(rmm::device_uvector int8_t max_depth, cudf::size_type min_size, cudf::size_type level_1_size, - rmm::mr::device_memory_resource *mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) { // count the number of child nodes - auto num_child_nodes = thrust::reduce(rmm::exec_policy(stream)->on(stream), + auto num_child_nodes = thrust::reduce(rmm::exec_policy(stream)->on(stream.value()), quad_child_count.begin(), quad_child_count.begin() + num_parent_nodes); @@ -90,7 +91,7 @@ inline std::unique_ptr make_quad_tree(rmm::device_uvector auto quad_child_pos = make_fixed_width_column(num_valid_nodes, stream, mr); // line 9 of algorithm in Fig. 5 in ref. - thrust::replace_if(rmm::exec_policy(stream)->on(stream), + thrust::replace_if(rmm::exec_policy(stream)->on(stream.value()), quad_child_count.begin(), quad_child_count.begin() + num_valid_nodes, is_quad->view().begin(), @@ -98,7 +99,7 @@ inline std::unique_ptr make_quad_tree(rmm::device_uvector 0); // line 10 of algorithm in Fig. 5 in ref. - thrust::exclusive_scan(rmm::exec_policy(stream)->on(stream), + thrust::exclusive_scan(rmm::exec_policy(stream)->on(stream.value()), quad_child_count.begin(), quad_child_count.end(), quad_child_pos->mutable_view().begin(), @@ -111,7 +112,7 @@ inline std::unique_ptr make_quad_tree(rmm::device_uvector // for each value in `is_quad` copy from `quad_child_pos` if true, else // `quad_point_pos` - thrust::transform(rmm::exec_policy(stream)->on(stream), + thrust::transform(rmm::exec_policy(stream)->on(stream.value()), offsets_iter, offsets_iter + num_valid_nodes, offsets->mutable_view().template begin(), @@ -130,7 +131,7 @@ inline std::unique_ptr make_quad_tree(rmm::device_uvector auto lengths_iter = make_zip_iterator(is_quad->view().begin(), // quad_child_count.begin(), quad_point_count.begin()); - thrust::transform(rmm::exec_policy(stream)->on(stream), + thrust::transform(rmm::exec_policy(stream)->on(stream.value()), lengths_iter, lengths_iter + num_valid_nodes, lengths->mutable_view().template begin(), @@ -143,7 +144,7 @@ inline std::unique_ptr make_quad_tree(rmm::device_uvector auto keys = make_fixed_width_column(num_valid_nodes, stream, mr); // Copy quad keys to keys output column - thrust::copy(rmm::exec_policy(stream)->on(stream), + thrust::copy(rmm::exec_policy(stream)->on(stream.value()), quad_keys.begin(), quad_keys.end(), keys->mutable_view().begin()); @@ -152,7 +153,7 @@ inline std::unique_ptr make_quad_tree(rmm::device_uvector auto levels = make_fixed_width_column(num_valid_nodes, stream, mr); // Copy quad levels to levels output column - thrust::copy(rmm::exec_policy(stream)->on(stream), + thrust::copy(rmm::exec_policy(stream)->on(stream.value()), quad_levels.begin(), quad_levels.end(), levels->mutable_view().begin()); @@ -174,8 +175,8 @@ inline std::unique_ptr make_leaf_tree( rmm::device_uvector const &quad_keys, rmm::device_uvector const &quad_point_count, cudf::size_type num_top_quads, - rmm::mr::device_memory_resource *mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) { auto keys = make_fixed_width_column(num_top_quads, stream, mr); auto levels = make_fixed_width_column(num_top_quads, stream, mr); @@ -184,31 +185,31 @@ inline std::unique_ptr make_leaf_tree( auto offsets = make_fixed_width_column(num_top_quads, stream, mr); // copy quad keys from the front of the quad_keys list - thrust::copy(rmm::exec_policy(stream)->on(stream), + thrust::copy(rmm::exec_policy(stream)->on(stream.value()), quad_keys.begin(), quad_keys.begin() + num_top_quads, keys->mutable_view().begin()); // copy point counts from the front of the quad_point_count list - thrust::copy(rmm::exec_policy(stream)->on(stream), + thrust::copy(rmm::exec_policy(stream)->on(stream.value()), quad_point_count.begin(), quad_point_count.begin() + num_top_quads, lengths->mutable_view().begin()); // All leaves are children of the root node (level 0) - thrust::fill(rmm::exec_policy(stream)->on(stream), + thrust::fill(rmm::exec_policy(stream)->on(stream.value()), levels->mutable_view().begin(), levels->mutable_view().end(), 0); // Quad node indicators are false for leaf nodes - thrust::fill(rmm::exec_policy(stream)->on(stream), + thrust::fill(rmm::exec_policy(stream)->on(stream.value()), is_quad->mutable_view().begin(), is_quad->mutable_view().end(), false); // compute offsets from lengths - thrust::exclusive_scan(rmm::exec_policy(stream)->on(stream), + thrust::exclusive_scan(rmm::exec_policy(stream)->on(stream.value()), lengths->view().begin(), lengths->view().end(), offsets->mutable_view().begin()); @@ -252,8 +253,8 @@ struct dispatch_construct_quadtree { double scale, int8_t max_depth, cudf::size_type min_size, - rmm::mr::device_memory_resource *mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) { // Construct the full set of non-empty subquadrants starting from the lowest level. // Corresponds to "Phase 1" of quadtree construction in ref. @@ -266,8 +267,8 @@ struct dispatch_construct_quadtree { scale, max_depth, min_size, - mr, - stream); + stream, + mr); auto &point_indices = std::get<0>(quads); auto &quad_keys = std::get<1>(quads); @@ -281,7 +282,7 @@ struct dispatch_construct_quadtree { // Optimization: return early if the top level nodes are all leaves if (num_parent_nodes <= 0) { return std::make_pair(std::move(point_indices), - make_leaf_tree(quad_keys, quad_point_count, num_top_quads, mr, stream)); + make_leaf_tree(quad_keys, quad_point_count, num_top_quads, stream, mr)); } // Corresponds to "Phase 2" of quadtree construction in ref. @@ -294,8 +295,8 @@ struct dispatch_construct_quadtree { max_depth, min_size, level_1_size, - mr, - stream)); + stream, + mr)); } }; @@ -311,8 +312,8 @@ std::pair, std::unique_ptr> quadtree_ double scale, int8_t max_depth, cudf::size_type min_size, - rmm::mr::device_memory_resource *mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) { return cudf::type_dispatcher(x.type(), dispatch_construct_quadtree{}, @@ -325,8 +326,8 @@ std::pair, std::unique_ptr> quadtree_ scale, max_depth, min_size, - mr, - stream); + stream, + mr); } } // namespace detail @@ -362,7 +363,7 @@ std::pair, std::unique_ptr> quadtree_ std::make_unique(std::move(cols))); } return detail::quadtree_on_points( - x, y, x_min, x_max, y_min, y_max, scale, max_depth, min_size, mr, cudaStream_t{0}); + x, y, x_min, x_max, y_min, y_max, scale, max_depth, min_size, rmm::cuda_stream_default, mr); } } // namespace cuspatial diff --git a/cpp/src/interpolate/cubic_spline.cu b/cpp/src/interpolate/cubic_spline.cu index 5219545ce..500d5c2d6 100644 --- a/cpp/src/interpolate/cubic_spline.cu +++ b/cpp/src/interpolate/cubic_spline.cu @@ -23,6 +23,8 @@ #include #include +#include + #include namespace { // anonymous @@ -35,8 +37,8 @@ struct parallel_search { cudf::column_view const& curve_ids, cudf::column_view const& prefixes, cudf::column_view const& query_coords, - rmm::mr::device_memory_resource* mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { const T* p_t = t.data(); const int32_t* p_curve_ids = curve_ids.data(); @@ -46,7 +48,7 @@ struct parallel_search { curve_ids.type(), t.size(), cudf::mask_state::UNALLOCATED, stream, mr); int32_t* p_result = result->mutable_view().data(); thrust::for_each( - rmm::exec_policy(stream)->on(stream), + rmm::exec_policy(stream)->on(stream.value()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(query_coords.size()), [p_t, p_curve_ids, p_prefixes, p_query_coords, p_result] __device__(int index) { @@ -87,8 +89,8 @@ struct interpolate { cudf::column_view const& ids, cudf::column_view const& coef_indices, cudf::table_view const& coefficients, - rmm::mr::device_memory_resource* mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { const T* p_t = t.data(); const int32_t* p_ids = ids.data(); @@ -101,7 +103,7 @@ struct interpolate { cudf::make_numeric_column(t.type(), t.size(), cudf::mask_state::UNALLOCATED, stream, mr); T* p_result = result->mutable_view().data(); thrust::for_each( - rmm::exec_policy(stream)->on(stream), + rmm::exec_policy(stream)->on(stream.value()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(t.size()), [p_t, p_ids, p_coef_indices, p_d3, p_d2, p_d1, p_d0, p_result] __device__(int index) { @@ -134,8 +136,8 @@ struct coefficients_compute { cudf::mutable_column_view const& d2, cudf::mutable_column_view const& d1, cudf::mutable_column_view const& d0, - rmm::mr::device_memory_resource* mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { const T* p_t = t.data(); const T* p_y = y.data(); @@ -148,7 +150,7 @@ struct coefficients_compute { T* p_d1 = d1.data(); T* p_d0 = d0.data(); thrust::for_each( - rmm::exec_policy(stream)->on(stream), + rmm::exec_policy(stream)->on(stream.value()), thrust::make_counting_iterator(1), thrust::make_counting_iterator(prefixes.size()), [p_t, p_y, p_prefixes, p_h, p_i, p_z, p_d3, p_d2, p_d1, p_d0] __device__(int index) { @@ -181,8 +183,8 @@ struct coefficients_compute { cudf::mutable_column_view const& d2, cudf::mutable_column_view const& d1, cudf::mutable_column_view const& d0, - rmm::mr::device_memory_resource* mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUSPATIAL_FAIL("Non-floating point operation is not supported."); } @@ -201,8 +203,8 @@ struct compute_spline_tridiagonals { cudf::mutable_column_view const& u, cudf::mutable_column_view const& h, cudf::mutable_column_view const& i, - rmm::mr::device_memory_resource* mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { const T* p_t = t.data(); const T* p_y = y.data(); @@ -212,7 +214,7 @@ struct compute_spline_tridiagonals { T* p_u = u.data(); T* p_h = h.data(); T* p_i = i.data(); - thrust::for_each(rmm::exec_policy(stream)->on(stream), + thrust::for_each(rmm::exec_policy(stream)->on(stream.value()), thrust::make_counting_iterator(1), thrust::make_counting_iterator(prefixes.size()), [p_t, p_y, p_prefixes, p_d, p_dlu, p_u, p_h, p_i] __device__(int index) { @@ -240,8 +242,8 @@ struct compute_spline_tridiagonals { cudf::mutable_column_view const& u, cudf::mutable_column_view const& h, cudf::mutable_column_view const& i, - rmm::mr::device_memory_resource* mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUSPATIAL_FAIL("Non-floating point operation is not supported."); } @@ -278,8 +280,8 @@ std::unique_ptr cubicspline_interpolate(cudf::column_view const& q cudf::column_view const& prefixes, cudf::column_view const& source_points, cudf::table_view const& coefficients, - rmm::mr::device_memory_resource* mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto coefficient_indices = cudf::type_dispatcher(query_points.type(), parallel_search{}, @@ -287,8 +289,8 @@ std::unique_ptr cubicspline_interpolate(cudf::column_view const& q curve_ids, prefixes, source_points, - mr, - stream); + stream, + mr); // TPRINT(coefficient_indices->mutable_view(), "parallel_search_"); // TPRINT(query_points, "query_points_"); // TPRINT(curve_ids, "curve_ids_"); @@ -300,8 +302,8 @@ std::unique_ptr cubicspline_interpolate(cudf::column_view const& q curve_ids, coefficient_indices->view(), coefficients, - mr, - stream); + stream, + mr); // TPRINT(query_points, "query_points_"); // TPRINT(curve_ids, "curve_ids_"); // TPRINT(prefixes, "prefixes_"); @@ -338,18 +340,12 @@ std::unique_ptr cubicspline_interpolate(cudf::column_view const& q * of the table is ((M-n), 4) where M is `t.size()` and and n is * `ids.size()-1`. **/ -std::unique_ptr cubicspline_coefficients(cudf::column_view const& t, - cudf::column_view const& y, - cudf::column_view const& ids, - cudf::column_view const& offsets, - rmm::mr::device_memory_resource* mr, - cudaStream_t stream); std::unique_ptr cubicspline_coefficients(cudf::column_view const& t, cudf::column_view const& y, cudf::column_view const& ids, cudf::column_view const& prefixes, - rmm::mr::device_memory_resource* mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { // rmm::device_vector::iterator t_rd = rmm::device_vector(t.data()); // TPRINT(t, "t_"); @@ -392,8 +388,8 @@ std::unique_ptr cubicspline_coefficients(cudf::column_view const& t u_buffer, h_buffer, i_buffer, - mr, - stream); + stream, + mr); // TPRINT(h_buffer, "h_i"); // TPRINT(i_buffer, "i_i"); @@ -468,8 +464,8 @@ std::unique_ptr cubicspline_coefficients(cudf::column_view const& t d2, d1, d0, - mr, - stream); + stream, + mr); // TPRINT(h_buffer, "h_buffer_"); // TPRINT(i_buffer, "i_buffer_"); @@ -497,25 +493,22 @@ std::unique_ptr cubicspline_interpolate(cudf::column_view const& q cudf::column_view const& curve_ids, cudf::column_view const& prefixes, cudf::column_view const& source_points, - cudf::table_view const& coefficients) + cudf::table_view const& coefficients, + rmm::mr::device_memory_resource* mr) { - return cuspatial::detail::cubicspline_interpolate(query_points, - curve_ids, - prefixes, - source_points, - coefficients, - rmm::mr::get_current_device_resource(), - 0); + return cuspatial::detail::cubicspline_interpolate( + query_points, curve_ids, prefixes, source_points, coefficients, rmm::cuda_stream_default, mr); } // Calls the coeffiecients function using default memory resources. std::unique_ptr cubicspline_coefficients(cudf::column_view const& t, cudf::column_view const& y, cudf::column_view const& ids, - cudf::column_view const& prefixes) + cudf::column_view const& prefixes, + rmm::mr::device_memory_resource* mr) { return cuspatial::detail::cubicspline_coefficients( - t, y, ids, prefixes, rmm::mr::get_current_device_resource(), 0); + t, y, ids, prefixes, rmm::cuda_stream_default, mr); } } // namespace cuspatial diff --git a/cpp/src/io/shp/polygon_shapefile_reader.cu b/cpp/src/io/shp/polygon_shapefile_reader.cu index 504a8d935..4c2cd8697 100644 --- a/cpp/src/io/shp/polygon_shapefile_reader.cu +++ b/cpp/src/io/shp/polygon_shapefile_reader.cu @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -34,7 +35,7 @@ namespace detail { template std::unique_ptr make_column(std::vector source, - cudaStream_t stream, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { auto tid = cudf::type_to_id(); @@ -50,7 +51,7 @@ std::tuple, read_polygon_shapefile(std::string const& filename); std::vector> read_polygon_shapefile( - std::string const& filename, rmm::mr::device_memory_resource* mr, cudaStream_t stream) + std::string const& filename, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUSPATIAL_EXPECTS(not filename.empty(), "Filename cannot be empty."); @@ -62,13 +63,13 @@ std::vector> read_polygon_shapefile( auto ys = make_column(std::get<3>(poly_vectors), stream, mr); // transform polygon lengths to polygon offsets - thrust::exclusive_scan(rmm::exec_policy(stream)->on(stream), + thrust::exclusive_scan(rmm::exec_policy(stream)->on(stream.value()), polygon_offsets->view().begin(), polygon_offsets->view().end(), polygon_offsets->mutable_view().begin()); // transform ring lengths to ring offsets - thrust::exclusive_scan(rmm::exec_policy(stream)->on(stream), + thrust::exclusive_scan(rmm::exec_policy(stream)->on(stream.value()), ring_offsets->view().begin(), ring_offsets->view().end(), ring_offsets->mutable_view().begin()); @@ -87,7 +88,7 @@ std::vector> read_polygon_shapefile( std::vector> read_polygon_shapefile( std::string const& filename, rmm::mr::device_memory_resource* mr) { - return detail::read_polygon_shapefile(filename, mr, 0); + return detail::read_polygon_shapefile(filename, rmm::cuda_stream_default, mr); } } // namespace cuspatial diff --git a/cpp/src/join/detail/intersection.cuh b/cpp/src/join/detail/intersection.cuh index 621d197d9..c3db09369 100644 --- a/cpp/src/join/detail/intersection.cuh +++ b/cpp/src/join/detail/intersection.cuh @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -43,11 +44,11 @@ template inline cudf::size_type copy_leaf_intersections(InputIterator input_begin, InputIterator input_end, OutputIterator output_begin, - cudaStream_t stream) + rmm::cuda_stream_view stream) { return thrust::distance( output_begin, - thrust::copy_if(rmm::exec_policy(stream)->on(stream), + thrust::copy_if(rmm::exec_policy(stream)->on(stream.value()), input_begin, input_end, output_begin, @@ -58,10 +59,10 @@ template inline cudf::size_type remove_non_quad_intersections(InputIterator input_begin, InputIterator input_end, OutputIterator output_begin, - cudaStream_t stream) + rmm::cuda_stream_view stream) { return thrust::distance(output_begin, - thrust::remove_if(rmm::exec_policy(stream)->on(stream), + thrust::remove_if(rmm::exec_policy(stream)->on(stream.value()), input_begin, input_end, output_begin, @@ -87,7 +88,7 @@ inline std::pair find_intersections( T y_min, T scale, int8_t max_depth, - cudaStream_t stream) + rmm::cuda_stream_view stream) { auto d_keys = cudf::column_device_view::create(quadtree.column(0), stream); auto d_levels = cudf::column_device_view::create(quadtree.column(1), stream); @@ -97,7 +98,7 @@ inline std::pair find_intersections( auto d_poly_x_max = cudf::column_device_view::create(poly_bbox.column(2), stream); auto d_poly_y_max = cudf::column_device_view::create(poly_bbox.column(3), stream); - thrust::transform(rmm::exec_policy(stream)->on(stream), + thrust::transform(rmm::exec_policy(stream)->on(stream.value()), make_zip_iterator(node_indices, poly_indices), make_zip_iterator(node_indices, poly_indices) + num_pairs, node_pairs, diff --git a/cpp/src/join/detail/traversal.cuh b/cpp/src/join/detail/traversal.cuh index 81b2247de..26a4035e3 100644 --- a/cpp/src/join/detail/traversal.cuh +++ b/cpp/src/join/detail/traversal.cuh @@ -20,6 +20,7 @@ #include "utility/z_order.cuh" #include +#include #include #include @@ -60,14 +61,14 @@ descend_quadtree(LengthsIter counts, rmm::device_uvector &parent_levels, rmm::device_uvector &parent_node_indices, rmm::device_uvector &parent_poly_indices, - cudaStream_t stream) + rmm::cuda_stream_view stream) { // Use the current parent node indices as the lookup into the global child counts auto parent_counts = thrust::make_permutation_iterator(counts, parent_node_indices.begin()); // scan on the number of child nodes to compute the offsets // note: size is `num_quads + 1` so the last element is `num_children` rmm::device_uvector parent_offsets(num_quads + 1, stream); - thrust::inclusive_scan(rmm::exec_policy(stream)->on(stream), + thrust::inclusive_scan(rmm::exec_policy(stream)->on(stream.value()), parent_counts, parent_counts + num_quads, parent_offsets.begin() + 1); @@ -79,9 +80,9 @@ descend_quadtree(LengthsIter counts, rmm::device_uvector parent_indices(num_children, stream); // fill with zeroes thrust::fill( - rmm::exec_policy(stream)->on(stream), parent_indices.begin(), parent_indices.end(), 0); + rmm::exec_policy(stream)->on(stream.value()), parent_indices.begin(), parent_indices.end(), 0); // use the parent_offsets as the map to scatter sequential parent_indices - thrust::scatter(rmm::exec_policy(stream)->on(stream), + thrust::scatter(rmm::exec_policy(stream)->on(stream.value()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(0) + num_quads, parent_offsets.begin(), @@ -89,7 +90,7 @@ descend_quadtree(LengthsIter counts, // inclusive scan with maximum functor to fill the empty elements with their left-most non-empty // elements. `parent_indices` is now a full array of the sequence index of each quadrant's parent - thrust::inclusive_scan(rmm::exec_policy(stream)->on(stream), + thrust::inclusive_scan(rmm::exec_policy(stream)->on(stream.value()), parent_indices.begin(), parent_indices.begin() + num_children, parent_indices.begin(), @@ -102,7 +103,7 @@ descend_quadtree(LengthsIter counts, rmm::device_uvector child_poly_indices(num_children, stream); // `parent_indices` is a gather map to retrieve non-leaf quads' respective child nodes - thrust::gather(rmm::exec_policy(stream)->on(stream), + thrust::gather(rmm::exec_policy(stream)->on(stream.value()), parent_indices.begin(), parent_indices.begin() + num_children, // curr level iterator @@ -117,7 +118,7 @@ descend_quadtree(LengthsIter counts, child_poly_indices.begin())); rmm::device_uvector relative_child_offsets(num_children, stream); - thrust::exclusive_scan_by_key(rmm::exec_policy(stream)->on(stream), + thrust::exclusive_scan_by_key(rmm::exec_policy(stream)->on(stream.value()), parent_indices.begin(), parent_indices.begin() + num_children, thrust::constant_iterator(1), @@ -125,7 +126,7 @@ descend_quadtree(LengthsIter counts, // compute child quad indices using parent and relative child offsets auto child_offsets_iter = thrust::make_permutation_iterator(offsets, child_quad_indices.begin()); - thrust::transform(rmm::exec_policy(stream)->on(stream), + thrust::transform(rmm::exec_policy(stream)->on(stream.value()), child_offsets_iter, child_offsets_iter + num_children, relative_child_offsets.begin(), diff --git a/cpp/src/join/quadtree_point_in_polygon.cu b/cpp/src/join/quadtree_point_in_polygon.cu index 8bc6ee395..4cf89fc8f 100644 --- a/cpp/src/join/quadtree_point_in_polygon.cu +++ b/cpp/src/join/quadtree_point_in_polygon.cu @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -105,8 +106,8 @@ struct compute_quadtree_point_in_polygon { cudf::column_view const &ring_offsets, cudf::column_view const &poly_points_x, cudf::column_view const &poly_points_y, - rmm::mr::device_memory_resource *mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) { auto quad_lengths = quadtree.column(3); auto quad_offsets = quadtree.column(4); @@ -126,7 +127,7 @@ struct compute_quadtree_point_in_polygon { // `inclusive_scan` is the total number of points to be tested against any polygon. rmm::device_uvector local_point_offsets(num_poly_quad_pairs + 1, stream); - thrust::inclusive_scan(rmm::exec_policy(stream)->on(stream), + thrust::inclusive_scan(rmm::exec_policy(stream)->on(stream.value()), quad_lengths_iter, quad_lengths_iter + num_poly_quad_pairs, local_point_offsets.begin() + 1); @@ -154,7 +155,7 @@ struct compute_quadtree_point_in_polygon { // pp_pairs.append((polygon, point)) // ``` // - thrust::transform(rmm::exec_policy(stream)->on(stream), + thrust::transform(rmm::exec_policy(stream)->on(stream.value()), counting_iter, counting_iter + num_total_points, poly_and_point_indices, @@ -171,7 +172,7 @@ struct compute_quadtree_point_in_polygon { // Compute the number of intersections by removing (poly, point) pairs that don't intersect auto num_intersections = thrust::distance( poly_and_point_indices, - thrust::remove_if(rmm::exec_policy(stream)->on(stream), + thrust::remove_if(rmm::exec_policy(stream)->on(stream.value()), poly_and_point_indices, poly_and_point_indices + num_total_points, test_poly_point_intersection{ @@ -189,13 +190,13 @@ struct compute_quadtree_point_in_polygon { // `idxs.begin() + num_intersections`. // populate the polygon indices column - thrust::copy(rmm::exec_policy(stream)->on(stream), + thrust::copy(rmm::exec_policy(stream)->on(stream.value()), poly_idxs.begin(), poly_idxs.begin() + num_intersections, poly_idx_col->mutable_view().template begin()); // populate the point indices column - thrust::copy(rmm::exec_policy(stream)->on(stream), + thrust::copy(rmm::exec_policy(stream)->on(stream.value()), point_idxs.begin(), point_idxs.begin() + num_intersections, point_idx_col->mutable_view().template begin()); @@ -219,8 +220,8 @@ std::unique_ptr quadtree_point_in_polygon(cudf::table_view const &p cudf::column_view const &ring_offsets, cudf::column_view const &poly_points_x, cudf::column_view const &poly_points_y, - rmm::mr::device_memory_resource *mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) { return cudf::type_dispatcher(point_x.type(), compute_quadtree_point_in_polygon{}, @@ -233,8 +234,8 @@ std::unique_ptr quadtree_point_in_polygon(cudf::table_view const &p ring_offsets, poly_points_x, poly_points_y, - mr, - stream); + stream, + mr); } } // namespace detail @@ -285,8 +286,8 @@ std::unique_ptr quadtree_point_in_polygon(cudf::table_view const &p ring_offsets, poly_points_x, poly_points_y, - mr, - cudaStream_t{0}); + rmm::cuda_stream_default, + mr); } } // namespace cuspatial diff --git a/cpp/src/join/quadtree_point_to_nearest_polyline.cu b/cpp/src/join/quadtree_point_to_nearest_polyline.cu index f5ec9298b..c07361ee9 100644 --- a/cpp/src/join/quadtree_point_to_nearest_polyline.cu +++ b/cpp/src/join/quadtree_point_to_nearest_polyline.cu @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -140,8 +141,8 @@ struct compute_quadtree_point_to_nearest_polyline { cudf::column_view const &poly_offsets, cudf::column_view const &poly_points_x, cudf::column_view const &poly_points_y, - rmm::mr::device_memory_resource *mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) { auto poly_idx = poly_quad_pairs.column(0); auto quad_idx = poly_quad_pairs.column(1); @@ -151,20 +152,20 @@ struct compute_quadtree_point_to_nearest_polyline { rmm::device_uvector d_poly_idx(num_pairs, stream); - thrust::copy(rmm::exec_policy(stream)->on(stream), + thrust::copy(rmm::exec_policy(stream)->on(stream.value()), poly_idx.begin(), poly_idx.end(), d_poly_idx.begin()); rmm::device_uvector d_quad_idx(num_pairs, stream); - thrust::copy(rmm::exec_policy(stream)->on(stream), + thrust::copy(rmm::exec_policy(stream)->on(stream.value()), quad_idx.begin(), quad_idx.end(), d_quad_idx.begin()); // sort (d_poly_idx, d_quad_idx) using d_quad_idx as key => (quad_idxs, poly_idxs) - thrust::sort_by_key(rmm::exec_policy(stream)->on(stream), + thrust::sort_by_key(rmm::exec_policy(stream)->on(stream.value()), d_quad_idx.begin(), d_quad_idx.end(), d_poly_idx.begin()); @@ -176,7 +177,7 @@ struct compute_quadtree_point_to_nearest_polyline { uint32_t num_quads = thrust::distance(d_poly_idx_offsets.begin(), - thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream), + thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream.value()), d_quad_idx.begin(), d_quad_idx.end(), thrust::constant_iterator(1), @@ -187,7 +188,7 @@ struct compute_quadtree_point_to_nearest_polyline { d_quad_idx.resize(num_quads, stream); d_poly_idx_offsets.resize(num_quads, stream); - thrust::exclusive_scan(rmm::exec_policy(stream)->on(stream), + thrust::exclusive_scan(rmm::exec_policy(stream)->on(stream.value()), d_poly_idx_offsets.begin(), d_poly_idx_offsets.end(), d_poly_idx_offsets.begin()); @@ -196,7 +197,7 @@ struct compute_quadtree_point_to_nearest_polyline { auto poly_index_col = make_fixed_width_column(point_x.size(), stream, mr); auto distance_col = make_fixed_width_column(point_x.size(), stream, mr); - find_nearest_polyline_kernel<<>>( + find_nearest_polyline_kernel<<>>( d_quad_idx.begin(), d_poly_idx_offsets.size(), d_poly_idx_offsets.begin(), @@ -234,8 +235,8 @@ std::unique_ptr quadtree_point_to_nearest_polyline( cudf::column_view const &poly_offsets, cudf::column_view const &poly_points_x, cudf::column_view const &poly_points_y, - rmm::mr::device_memory_resource *mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) { return cudf::type_dispatcher(point_x.type(), compute_quadtree_point_to_nearest_polyline{}, @@ -247,8 +248,8 @@ std::unique_ptr quadtree_point_to_nearest_polyline( poly_offsets, poly_points_x, poly_points_y, - mr, - stream); + stream, + mr); } } // namespace detail @@ -297,8 +298,8 @@ std::unique_ptr quadtree_point_to_nearest_polyline( poly_offsets, poly_points_x, poly_points_y, - mr, - cudaStream_t{0}); + rmm::cuda_stream_default, + mr); } } // namespace cuspatial diff --git a/cpp/src/join/quadtree_poly_filtering.cu b/cpp/src/join/quadtree_poly_filtering.cu index 286af023b..0695074d5 100644 --- a/cpp/src/join/quadtree_poly_filtering.cu +++ b/cpp/src/join/quadtree_poly_filtering.cu @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -51,8 +52,8 @@ inline std::unique_ptr join_quadtree_and_bboxes(cudf::table_view co T y_max, T scale, int8_t max_depth, - rmm::mr::device_memory_resource *mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) { auto const node_levels = quadtree.column(1); // uint8_t auto const node_counts = quadtree.column(3); // uint32_t @@ -60,7 +61,7 @@ inline std::unique_ptr join_quadtree_and_bboxes(cudf::table_view co // Count the number of top-level nodes to start. // This could be provided explicitly, but count_if should be fast enough. - auto num_top_level_leaves = thrust::count_if(rmm::exec_policy(stream)->on(stream), + auto num_top_level_leaves = thrust::count_if(rmm::exec_policy(stream)->on(stream.value()), node_levels.begin(), node_levels.end(), thrust::placeholders::_1 == 0); @@ -177,12 +178,12 @@ inline std::unique_ptr join_quadtree_and_bboxes(cudf::table_view co cols.push_back(make_fixed_width_column(num_results, stream, mr)); cols.push_back(make_fixed_width_column(num_results, stream, mr)); - thrust::copy(rmm::exec_policy(stream)->on(stream), + thrust::copy(rmm::exec_policy(stream)->on(stream.value()), out_poly_idxs.begin(), out_poly_idxs.begin() + num_results, cols.at(0)->mutable_view().begin()); - thrust::copy(rmm::exec_policy(stream)->on(stream), + thrust::copy(rmm::exec_policy(stream)->on(stream.value()), out_node_idxs.begin(), out_node_idxs.begin() + num_results, cols.at(1)->mutable_view().begin()); @@ -200,8 +201,8 @@ struct dispatch_quadtree_bounding_box_join { double y_max, double scale, int8_t max_depth, - rmm::mr::device_memory_resource *mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) { return join_quadtree_and_bboxes(quadtree, poly_bbox, @@ -211,8 +212,8 @@ struct dispatch_quadtree_bounding_box_join { static_cast(y_max), static_cast(scale), max_depth, - mr, - stream); + stream, + mr); } template ::value> * = nullptr, @@ -232,8 +233,8 @@ std::unique_ptr join_quadtree_and_bounding_boxes(cudf::table_view c double y_max, double scale, int8_t max_depth, - rmm::mr::device_memory_resource *mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) { return cudf::type_dispatcher(poly_bbox.column(0).type(), dispatch_quadtree_bounding_box_join{}, @@ -245,8 +246,8 @@ std::unique_ptr join_quadtree_and_bounding_boxes(cudf::table_view c y_max, scale, max_depth, - mr, - stream); + stream, + mr); } } // namespace detail @@ -277,8 +278,16 @@ std::unique_ptr join_quadtree_and_bounding_boxes(cudf::table_view c return std::make_unique(std::move(cols)); } - return detail::join_quadtree_and_bounding_boxes( - quadtree, poly_bbox, x_min, x_max, y_min, y_max, scale, max_depth, mr, cudaStream_t{0}); + return detail::join_quadtree_and_bounding_boxes(quadtree, + poly_bbox, + x_min, + x_max, + y_min, + y_max, + scale, + max_depth, + rmm::cuda_stream_default, + mr); } } // namespace cuspatial diff --git a/cpp/src/spatial/hausdorff.cu b/cpp/src/spatial/hausdorff.cu index 86dd71d2e..362dfb110 100644 --- a/cpp/src/spatial/hausdorff.cu +++ b/cpp/src/spatial/hausdorff.cu @@ -14,23 +14,25 @@ * limitations under the License. */ +#include "utility/scatter_output_iterator.cuh" +#include "utility/size_from_offsets.cuh" + #include #include #include -#include "utility/scatter_output_iterator.cuh" -#include "utility/size_from_offsets.cuh" #include #include #include #include +#include +#include + #include #include #include -#include - #include namespace cuspatial { @@ -69,8 +71,8 @@ struct hausdorff_functor { cudf::column_view const& xs, cudf::column_view const& ys, cudf::column_view const& space_offsets, - rmm::mr::device_memory_resource* mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { size_type num_points = xs.size(); size_type num_spaces = space_offsets.size(); @@ -126,14 +128,14 @@ struct hausdorff_functor { auto num_cartesian = num_points * num_points; - thrust::inclusive_scan_by_key(rmm::exec_policy(stream)->on(stream), + thrust::inclusive_scan_by_key(rmm::exec_policy(stream)->on(stream.value()), gpc_key_iter, gpc_key_iter + num_cartesian, hausdorff_acc_iter, scatter_out, thrust::equal_to>()); - thrust::transform(rmm::exec_policy(stream)->on(stream), + thrust::transform(rmm::exec_policy(stream)->on(stream.value()), result_temp_iter, result_temp_iter + num_results, result->mutable_view().begin(), @@ -160,10 +162,8 @@ std::unique_ptr directed_hausdorff_distance(cudf::column_view cons CUSPATIAL_EXPECTS(xs.size() >= points_per_space.size(), "At least one point is required for each space"); - cudaStream_t stream = 0; - return cudf::type_dispatcher( - xs.type(), detail::hausdorff_functor(), xs, ys, points_per_space, mr, stream); + xs.type(), detail::hausdorff_functor(), xs, ys, points_per_space, rmm::cuda_stream_default, mr); } } // namespace cuspatial diff --git a/cpp/src/spatial/haversine.cu b/cpp/src/spatial/haversine.cu index 81e6af1d4..b76ec1584 100644 --- a/cpp/src/spatial/haversine.cu +++ b/cpp/src/spatial/haversine.cu @@ -25,6 +25,7 @@ #include #include +#include #include #include @@ -65,7 +66,7 @@ struct haversine_functor { cudf::column_view const& b_lon, cudf::column_view const& b_lat, double radius, - cudaStream_t stream, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { if (a_lon.is_empty()) { return cudf::empty_like(a_lon); } @@ -81,7 +82,7 @@ struct haversine_functor { auto input_iter = thrust::make_zip_iterator(input_tuple); - thrust::transform(rmm::exec_policy(stream)->on(stream), + thrust::transform(rmm::exec_policy(stream)->on(stream.value()), input_iter, input_iter + result->size(), result->mutable_view().begin(), @@ -108,7 +109,7 @@ std::unique_ptr haversine_distance(cudf::column_view const& a_lon, cudf::column_view const& b_lon, cudf::column_view const& b_lat, double radius, - cudaStream_t stream, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUSPATIAL_EXPECTS(radius > 0, "radius must be positive."); @@ -138,7 +139,8 @@ std::unique_ptr haversine_distance(cudf::column_view const& a_lon, double radius, rmm::mr::device_memory_resource* mr) { - return cuspatial::detail::haversine_distance(a_lon, a_lat, b_lon, b_lat, radius, 0, mr); + return cuspatial::detail::haversine_distance( + a_lon, a_lat, b_lon, b_lat, radius, rmm::cuda_stream_default, mr); } } // namespace cuspatial diff --git a/cpp/src/spatial/lonlat_to_cartesian.cu b/cpp/src/spatial/lonlat_to_cartesian.cu index 16d4831ee..2887765ff 100644 --- a/cpp/src/spatial/lonlat_to_cartesian.cu +++ b/cpp/src/spatial/lonlat_to_cartesian.cu @@ -14,11 +14,15 @@ * limitations under the License. */ +#include + #include #include #include #include -#include + +#include + #include #include @@ -55,8 +59,8 @@ struct lonlat_to_cartesian_functor { double origin_lat, cudf::column_view const& input_lon, cudf::column_view const& input_lat, - rmm::mr::device_memory_resource* mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto size = input_lon.size(); auto tid = cudf::type_to_id(); @@ -81,7 +85,7 @@ struct lonlat_to_cartesian_functor { lat_to_y(origin_lat - lat)); }; - thrust::transform(rmm::exec_policy(stream)->on(stream), + thrust::transform(rmm::exec_policy(stream)->on(stream.value()), input_zip, input_zip + input_lon.size(), output_zip, @@ -100,8 +104,8 @@ pair_of_columns lonlat_to_cartesian(double origin_lon, double origin_lat, cudf::column_view const& input_lon, cudf::column_view const& input_lat, - rmm::mr::device_memory_resource* mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { return cudf::type_dispatcher(input_lon.type(), lonlat_to_cartesian_functor(), @@ -109,8 +113,8 @@ pair_of_columns lonlat_to_cartesian(double origin_lon, origin_lat, input_lon, input_lat, - mr, - stream); + stream, + mr); } } // namespace detail @@ -132,7 +136,8 @@ pair_of_columns lonlat_to_cartesian(double origin_lon, CUSPATIAL_EXPECTS(not input_lon.has_nulls() && not input_lat.has_nulls(), "input cannot contain nulls"); - return detail::lonlat_to_cartesian(origin_lon, origin_lat, input_lon, input_lat, mr, 0); + return detail::lonlat_to_cartesian( + origin_lon, origin_lat, input_lon, input_lat, rmm::cuda_stream_default, mr); } } // namespace cuspatial diff --git a/cpp/src/spatial/point_in_polygon.cu b/cpp/src/spatial/point_in_polygon.cu index 370e4a381..827132f89 100644 --- a/cpp/src/spatial/point_in_polygon.cu +++ b/cpp/src/spatial/point_in_polygon.cu @@ -14,14 +14,15 @@ * limitations under the License. */ +#include + #include #include #include #include #include -#include - +#include #include #include @@ -112,8 +113,8 @@ struct point_in_polygon_functor { cudf::column_view const& poly_ring_offsets, cudf::column_view const& poly_points_x, cudf::column_view const& poly_points_y, - rmm::mr::device_memory_resource* mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto size = test_points_x.size(); auto tid = cudf::type_to_id(); @@ -129,17 +130,18 @@ struct point_in_polygon_functor { auto kernel = point_in_polygon_kernel; - kernel<<>>(test_points_x.size(), - test_points_x.begin(), - test_points_y.begin(), - poly_offsets.size(), - poly_offsets.begin(), - poly_ring_offsets.size(), - poly_ring_offsets.begin(), - poly_points_x.size(), - poly_points_x.begin(), - poly_points_y.begin(), - results->mutable_view().begin()); + kernel<<>>( + test_points_x.size(), + test_points_x.begin(), + test_points_y.begin(), + poly_offsets.size(), + poly_offsets.begin(), + poly_ring_offsets.size(), + poly_ring_offsets.begin(), + poly_points_x.size(), + poly_points_x.begin(), + poly_points_y.begin(), + results->mutable_view().begin()); return results; } @@ -157,8 +159,8 @@ std::unique_ptr point_in_polygon(cudf::column_view const& test_poi cudf::column_view const& poly_ring_offsets, cudf::column_view const& poly_points_x, cudf::column_view const& poly_points_y, - rmm::mr::device_memory_resource* mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { return cudf::type_dispatcher(test_points_x.type(), point_in_polygon_functor(), @@ -168,8 +170,8 @@ std::unique_ptr point_in_polygon(cudf::column_view const& test_poi poly_ring_offsets, poly_points_x, poly_points_y, - mr, - stream); + stream, + mr); } } // namespace detail @@ -212,8 +214,8 @@ std::unique_ptr point_in_polygon(cudf::column_view const& test_poi poly_ring_offsets, poly_points_x, poly_points_y, - mr, - 0); + rmm::cuda_stream_default, + mr); } } // namespace cuspatial diff --git a/cpp/src/spatial/polygon_bounding_box.cu b/cpp/src/spatial/polygon_bounding_box.cu index 073f3e074..c3ba056b3 100644 --- a/cpp/src/spatial/polygon_bounding_box.cu +++ b/cpp/src/spatial/polygon_bounding_box.cu @@ -25,6 +25,7 @@ #include #include +#include #include #include @@ -58,8 +59,8 @@ std::unique_ptr compute_polygon_bounding_boxes(cudf::column_view co cudf::column_view const &ring_offsets, cudf::column_view const &x, cudf::column_view const &y, - rmm::mr::device_memory_resource *mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) { auto num_polygons = poly_offsets.size(); // Wrapped in an IEFE so `first_ring_offsets` is freed on return @@ -68,20 +69,20 @@ std::unique_ptr compute_polygon_bounding_boxes(cudf::column_view co rmm::device_vector first_ring_offsets(num_polygons); // Gather the first ring offset for each polygon - thrust::gather(rmm::exec_policy(stream)->on(stream), + thrust::gather(rmm::exec_policy(stream)->on(stream.value()), poly_offsets.begin(), poly_offsets.end(), ring_offsets.begin(), first_ring_offsets.begin()); // Scatter the first ring offset into a list of point_ids for reduction - thrust::scatter(rmm::exec_policy(stream)->on(stream), + thrust::scatter(rmm::exec_policy(stream)->on(stream.value()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(0) + num_polygons, first_ring_offsets.begin(), point_ids.begin()); - thrust::inclusive_scan(rmm::exec_policy(stream)->on(stream), + thrust::inclusive_scan(rmm::exec_policy(stream)->on(stream.value()), point_ids.begin(), point_ids.end(), point_ids.begin(), @@ -112,7 +113,7 @@ std::unique_ptr compute_polygon_bounding_boxes(cudf::column_view co auto points_iter = thrust::make_zip_iterator(thrust::make_tuple(x.begin(), y.begin())); auto points_squared_iter = thrust::make_transform_iterator(points_iter, point_to_square{}); - thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream), + thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream.value()), point_ids.begin(), point_ids.end(), points_squared_iter, @@ -147,10 +148,10 @@ struct dispatch_compute_polygon_bounding_boxes { cudf::column_view const &ring_offsets, cudf::column_view const &x, cudf::column_view const &y, - rmm::mr::device_memory_resource *mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) { - return compute_polygon_bounding_boxes(poly_offsets, ring_offsets, x, y, mr, stream); + return compute_polygon_bounding_boxes(poly_offsets, ring_offsets, x, y, stream, mr); } }; @@ -162,8 +163,8 @@ std::unique_ptr polygon_bounding_boxes(cudf::column_view const &pol cudf::column_view const &ring_offsets, cudf::column_view const &x, cudf::column_view const &y, - rmm::mr::device_memory_resource *mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) { return cudf::type_dispatcher(x.type(), dispatch_compute_polygon_bounding_boxes{}, @@ -171,8 +172,8 @@ std::unique_ptr polygon_bounding_boxes(cudf::column_view const &pol ring_offsets, x, y, - mr, - stream); + stream, + mr); } } // namespace detail @@ -201,7 +202,8 @@ std::unique_ptr polygon_bounding_boxes(cudf::column_view const &pol return std::make_unique(std::move(cols)); } - return detail::polygon_bounding_boxes(poly_offsets, ring_offsets, x, y, mr, cudaStream_t{0}); + return detail::polygon_bounding_boxes( + poly_offsets, ring_offsets, x, y, rmm::cuda_stream_default, mr); } } // namespace cuspatial diff --git a/cpp/src/spatial/polyline_bounding_box.cu b/cpp/src/spatial/polyline_bounding_box.cu index 54aa4162b..0f3edc313 100644 --- a/cpp/src/spatial/polyline_bounding_box.cu +++ b/cpp/src/spatial/polyline_bounding_box.cu @@ -14,6 +14,17 @@ * limitations under the License. */ +#include + +#include +#include +#include +#include +#include + +#include +#include + #include #include #include @@ -23,19 +34,9 @@ #include #include -#include -#include -#include -#include -#include - -#include - #include #include -#include - namespace cuspatial { namespace { @@ -57,20 +58,20 @@ std::unique_ptr compute_polyline_bounding_boxes(cudf::column_view c cudf::column_view const &x, cudf::column_view const &y, T expansion_radius, - rmm::mr::device_memory_resource *mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) { auto num_polygons = poly_offsets.size(); rmm::device_vector point_ids(x.size()); // Scatter the polyline offsets into a list of point_ids for reduction - thrust::scatter(rmm::exec_policy(stream)->on(stream), + thrust::scatter(rmm::exec_policy(stream)->on(stream.value()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(0) + num_polygons, poly_offsets.begin(), point_ids.begin()); - thrust::inclusive_scan(rmm::exec_policy(stream)->on(stream), + thrust::inclusive_scan(rmm::exec_policy(stream)->on(stream.value()), point_ids.begin(), point_ids.end(), point_ids.begin(), @@ -99,7 +100,7 @@ std::unique_ptr compute_polyline_bounding_boxes(cudf::column_view c auto points_squared_iter = thrust::make_transform_iterator(points_iter, point_to_square{expansion_radius}); - thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream), + thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream.value()), point_ids.begin(), point_ids.end(), points_squared_iter, @@ -134,11 +135,11 @@ struct dispatch_compute_polyline_bounding_boxes { cudf::column_view const &x, cudf::column_view const &y, double expansion_radius, - rmm::mr::device_memory_resource *mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) { return compute_polyline_bounding_boxes( - poly_offsets, x, y, static_cast(expansion_radius), mr, stream); + poly_offsets, x, y, static_cast(expansion_radius), stream, mr); } }; @@ -150,8 +151,8 @@ std::unique_ptr polyline_bounding_boxes(cudf::column_view const &po cudf::column_view const &x, cudf::column_view const &y, double expansion_radius, - rmm::mr::device_memory_resource *mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) { return cudf::type_dispatcher(x.type(), dispatch_compute_polyline_bounding_boxes{}, @@ -159,8 +160,8 @@ std::unique_ptr polyline_bounding_boxes(cudf::column_view const &po x, y, expansion_radius, - mr, - cudaStream_t{0}); + rmm::cuda_stream_default, + mr); } } // namespace detail @@ -187,7 +188,8 @@ std::unique_ptr polyline_bounding_boxes(cudf::column_view const &po cols.push_back(cudf::empty_like(y)); return std::make_unique(std::move(cols)); } - return detail::polyline_bounding_boxes(poly_offsets, x, y, expansion_radius, mr, cudaStream_t{0}); + return detail::polyline_bounding_boxes( + poly_offsets, x, y, expansion_radius, rmm::cuda_stream_default, mr); } } // namespace cuspatial diff --git a/cpp/src/spatial_window/spatial_window.cu b/cpp/src/spatial_window/spatial_window.cu index 2f2d57d91..869f9c295 100644 --- a/cpp/src/spatial_window/spatial_window.cu +++ b/cpp/src/spatial_window/spatial_window.cu @@ -14,12 +14,14 @@ * limitations under the License. */ +#include + #include #include #include #include -#include +#include #include #include @@ -71,7 +73,7 @@ struct spatial_window_dispatch { double window_max_y, cudf::column_view const& x, cudf::column_view const& y, - cudaStream_t stream, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { auto device_x = cudf::column_device_view::create(x, stream); @@ -83,8 +85,8 @@ struct spatial_window_dispatch { static_cast(window_max_y), *device_x, *device_y}, - mr, - stream); + stream, + mr); } template points_in_spatial_window(double window_min_x, double window_max_y, cudf::column_view const& x, cudf::column_view const& y, - cudaStream_t stream, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUSPATIAL_EXPECTS(x.type() == y.type(), "Type mismatch between x and y arrays"); @@ -149,7 +151,7 @@ std::unique_ptr points_in_spatial_window(double window_min_x, rmm::mr::device_memory_resource* mr) { return detail::points_in_spatial_window( - window_min_x, window_max_x, window_min_y, window_max_y, x, y, 0, mr); + window_min_x, window_max_x, window_min_y, window_max_y, x, y, rmm::cuda_stream_default, mr); } } // namespace cuspatial diff --git a/cpp/src/trajectory/derive_trajectories.cu b/cpp/src/trajectory/derive_trajectories.cu index 822448f18..7a63d74a1 100644 --- a/cpp/src/trajectory/derive_trajectories.cu +++ b/cpp/src/trajectory/derive_trajectories.cu @@ -14,10 +14,8 @@ * limitations under the License. */ -#include - -#include -#include +#include +#include #include #include @@ -25,8 +23,11 @@ #include #include -#include -#include +#include +#include + +#include +#include #include #include @@ -39,20 +40,20 @@ std::pair, std::unique_ptr> derive_tr cudf::column_view const& x, cudf::column_view const& y, cudf::column_view const& timestamp, - rmm::mr::device_memory_resource* mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto sorted = cudf::detail::sort_by_key(cudf::table_view{{object_id, x, y, timestamp}}, cudf::table_view{{object_id, timestamp}}, {}, {}, - mr, - stream); + stream, + mr); auto policy = rmm::exec_policy(stream); auto sorted_id = sorted->get_column(0).view(); rmm::device_vector lengths(object_id.size()); - auto grouped = thrust::reduce_by_key(policy->on(stream), + auto grouped = thrust::reduce_by_key(policy->on(stream.value()), sorted_id.begin(), sorted_id.end(), thrust::make_constant_iterator(1), @@ -65,8 +66,10 @@ std::pair, std::unique_ptr> derive_tr stream, mr); - thrust::exclusive_scan( - policy->on(stream), lengths.begin(), lengths.end(), offsets->mutable_view().begin()); + thrust::exclusive_scan(policy->on(stream.value()), + lengths.begin(), + lengths.end(), + offsets->mutable_view().begin()); return std::make_pair(std::move(sorted), std::move(offsets)); } @@ -97,6 +100,6 @@ std::pair, std::unique_ptr> derive_tr return std::make_pair(std::make_unique(std::move(cols)), cudf::make_empty_column(cudf::data_type{cudf::type_id::INT32})); } - return detail::derive_trajectories(object_id, x, y, timestamp, mr, 0); + return detail::derive_trajectories(object_id, x, y, timestamp, rmm::cuda_stream_default, mr); } } // namespace cuspatial diff --git a/cpp/src/trajectory/trajectory_bounding_boxes.cu b/cpp/src/trajectory/trajectory_bounding_boxes.cu index cc411853e..ec1528168 100644 --- a/cpp/src/trajectory/trajectory_bounding_boxes.cu +++ b/cpp/src/trajectory/trajectory_bounding_boxes.cu @@ -14,7 +14,8 @@ * limitations under the License. */ -#include +#include +#include #include #include @@ -25,8 +26,9 @@ #include #include -#include -#include +#include + +#include namespace cuspatial { @@ -39,8 +41,8 @@ struct dispatch_element { cudf::column_view const& object_id, cudf::column_view const& x, cudf::column_view const& y, - rmm::mr::device_memory_resource* mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto policy = rmm::exec_policy(stream); @@ -71,7 +73,7 @@ struct dispatch_element { cols.at(3)->mutable_view().begin()) // bbox_y2 ); - thrust::fill(policy->on(stream), + thrust::fill(policy->on(stream.value()), bboxes, bboxes + num_trajectories, thrust::make_tuple(std::numeric_limits::max(), @@ -80,7 +82,7 @@ struct dispatch_element { std::numeric_limits::min())); thrust::reduce_by_key( - policy->on(stream), // execution policy + policy->on(stream.value()), // execution policy object_id.begin(), // keys_first object_id.end(), // keys_last points, // values_first @@ -96,7 +98,7 @@ struct dispatch_element { }); // check for errors - CHECK_CUDA(stream); + CHECK_CUDA(stream.value()); return std::make_unique(std::move(cols)); } @@ -107,8 +109,8 @@ struct dispatch_element { cudf::column_view const& object_id, cudf::column_view const& x, cudf::column_view const& y, - rmm::mr::device_memory_resource* mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUSPATIAL_FAIL("X and Y must be floating point types"); } @@ -121,11 +123,11 @@ std::unique_ptr trajectory_bounding_boxes(cudf::size_type num_traje cudf::column_view const& object_id, cudf::column_view const& x, cudf::column_view const& y, - rmm::mr::device_memory_resource* mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { return cudf::type_dispatcher( - x.type(), dispatch_element{}, num_trajectories, object_id, x, y, mr, stream); + x.type(), dispatch_element{}, num_trajectories, object_id, x, y, stream, mr); } } // namespace detail @@ -151,7 +153,8 @@ std::unique_ptr trajectory_bounding_boxes(cudf::size_type num_traje return std::make_unique(std::move(cols)); } - return detail::trajectory_bounding_boxes(num_trajectories, object_id, x, y, mr, 0); + return detail::trajectory_bounding_boxes( + num_trajectories, object_id, x, y, rmm::cuda_stream_default, mr); } } // namespace cuspatial diff --git a/cpp/src/trajectory/trajectory_distances_and_speeds.cu b/cpp/src/trajectory/trajectory_distances_and_speeds.cu index 18ee2fb02..91057738e 100644 --- a/cpp/src/trajectory/trajectory_distances_and_speeds.cu +++ b/cpp/src/trajectory/trajectory_distances_and_speeds.cu @@ -14,7 +14,8 @@ * limitations under the License. */ -#include +#include +#include #include #include @@ -25,8 +26,9 @@ #include #include -#include -#include +#include + +#include namespace cuspatial { @@ -42,7 +44,7 @@ struct duplicate_first_element_func { }; template -auto duplicate_first_element_iterator(cudf::column_view const& col, cudaStream_t stream) +auto duplicate_first_element_iterator(cudf::column_view const& col, rmm::cuda_stream_view stream) { auto d_col = cudf::column_device_view::create(col, stream); return thrust::make_transform_iterator(thrust::make_counting_iterator(-1), @@ -58,8 +60,8 @@ struct dispatch_timestamp { cudf::column_view const& x, cudf::column_view const& y, cudf::column_view const& timestamp, - rmm::mr::device_memory_resource* mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto policy = rmm::exec_policy(stream); @@ -102,7 +104,7 @@ struct dispatch_timestamp { // Compute duration and distance difference between adjacent elements that // share the same object id - thrust::adjacent_difference(policy->on(stream), // execution policy + thrust::adjacent_difference(policy->on(stream.value()), timestamp_point_and_id, // first timestamp_point_and_id + durations.size(), // last duration_and_distance_1, // result @@ -146,7 +148,7 @@ struct dispatch_timestamp { // Reduce the intermediate durations and kilometer distances into meter // distances and speeds in meters/second - thrust::reduce_by_key(policy->on(stream), // execution policy + thrust::reduce_by_key(policy->on(stream.value()), object_id.begin(), // keys_first object_id.end(), // keys_last duration_and_distance_2 + 1, // values_first @@ -165,7 +167,7 @@ struct dispatch_timestamp { }); // check for errors - CHECK_CUDA(stream); + CHECK_CUDA(stream.value()); return std::make_unique(std::move(cols)); } @@ -177,8 +179,8 @@ struct dispatch_timestamp { cudf::column_view const& timestamp, cudf::column_view const& x, cudf::column_view const& y, - rmm::mr::device_memory_resource* mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUSPATIAL_FAIL("Timestamp must be a timestamp type"); } @@ -192,8 +194,8 @@ struct dispatch_element { cudf::column_view const& x, cudf::column_view const& y, cudf::column_view const& timestamp, - rmm::mr::device_memory_resource* mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { return cudf::type_dispatcher(timestamp.type(), dispatch_timestamp{}, @@ -202,8 +204,8 @@ struct dispatch_element { x, y, timestamp, - mr, - stream); + stream, + mr); } template @@ -213,8 +215,8 @@ struct dispatch_element { cudf::column_view const& x, cudf::column_view const& y, cudf::column_view const& timestamp, - rmm::mr::device_memory_resource* mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUSPATIAL_FAIL("X and Y must be floating point types"); } @@ -228,11 +230,11 @@ std::unique_ptr trajectory_distances_and_speeds(cudf::size_type num cudf::column_view const& x, cudf::column_view const& y, cudf::column_view const& timestamp, - rmm::mr::device_memory_resource* mr, - cudaStream_t stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { return cudf::type_dispatcher( - x.type(), dispatch_element{}, num_trajectories, object_id, x, y, timestamp, mr, stream); + x.type(), dispatch_element{}, num_trajectories, object_id, x, y, timestamp, stream, mr); } } // namespace detail @@ -262,7 +264,7 @@ std::unique_ptr trajectory_distances_and_speeds(cudf::size_type num } return detail::trajectory_distances_and_speeds( - num_trajectories, object_id, x, y, timestamp, mr, 0); + num_trajectories, object_id, x, y, timestamp, rmm::cuda_stream_default, mr); } } // namespace cuspatial diff --git a/cpp/tests/trajectory/trajectory_utilities.cuh b/cpp/tests/trajectory/trajectory_utilities.cuh index a1f70b78f..f0aa797fc 100644 --- a/cpp/tests/trajectory/trajectory_utilities.cuh +++ b/cpp/tests/trajectory/trajectory_utilities.cuh @@ -64,7 +64,7 @@ std::unique_ptr make_test_trajectories_table( ); auto sorted = cudf::detail::sort_by_key( - cudf::table_view{{id, x, y, ts}}, cudf::table_view{{id, ts}}, {}, {}, mr, 0); + cudf::table_view{{id, x, y, ts}}, cudf::table_view{{id, ts}}, {}, {}, 0, mr); return sorted; } From 0dcf24b53b7a40dfc3adb4e23665ac77d02583b3 Mon Sep 17 00:00:00 2001 From: Mark Sadang Date: Wed, 2 Dec 2020 15:43:25 -0800 Subject: [PATCH 16/18] updated changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 15828ed31..fe3dcda24 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ ## New Features ## Improvements - +- PR #1119 Improvments to gpuCI scripts ## Bug Fixes # cuSpatial 0.16.0 (Date TBD) From c66749ea718b27bc374819802cfdc177209c1a3f Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Fri, 4 Dec 2020 04:52:27 -0600 Subject: [PATCH 17/18] Fix point in polygon test for cudf::gather breaking change(#328) Update `cudf::gather` callsite in `point_in_polygon_test_large` to account for the new out of bounds policy parameter. Authors: - ptaylor Approvers: - Mark Harris - Christopher Harris URL: https://github.com/rapidsai/cuspatial/pull/328 --- CHANGELOG.md | 1 + cpp/tests/join/point_in_polygon_test_large.cpp | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3782fd51d..e5aa0f5ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ ## Bug Fixes - PR #320 Fix quadtree construction bug: zero out `device_uvector` before `scatter` +- PR #328 Fix point in polygon test for cudf::gather breaking change # cuSpatial 0.16.0 (Date TBD) diff --git a/cpp/tests/join/point_in_polygon_test_large.cpp b/cpp/tests/join/point_in_polygon_test_large.cpp index 62c4d856f..a9a785c1c 100644 --- a/cpp/tests/join/point_in_polygon_test_large.cpp +++ b/cpp/tests/join/point_in_polygon_test_large.cpp @@ -158,7 +158,8 @@ TYPED_TEST(PIPRefineTestLarge, TestLarge) auto &quadtree = std::get<1>(quadtree_pair); auto &point_indices = std::get<0>(quadtree_pair); - auto points = cudf::gather(cudf::table_view{{x, y}}, *point_indices, this->mr()); + auto points = cudf::gather( + cudf::table_view{{x, y}}, *point_indices, cudf::out_of_bounds_policy::DONT_CHECK, this->mr()); fixed_width_column_wrapper poly_offsets({0, 1, 2, 3}); fixed_width_column_wrapper ring_offsets({0, 4, 10, 14}); From 9aeebc84e93fe161d0d0664ff6926a70ffc5dd7d Mon Sep 17 00:00:00 2001 From: Ray Douglass <3107146+raydouglass@users.noreply.github.com> Date: Thu, 10 Dec 2020 10:15:57 -0500 Subject: [PATCH 18/18] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 08fdd4067..e94e47f39 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -# cuSpatial 0.17.0 (Date TBD) +# cuSpatial 0.17.0 (10 Dec 2020) ## New Features