Skip to content

Commit

Permalink
Merge branch 'branch-24.10' of github.com:rapidsai/cudf into setup-py…
Browse files Browse the repository at this point in the history
…libcudf-package-24.10
  • Loading branch information
lithomas1 committed Jul 31, 2024
2 parents 606d15e + 9336c17 commit 33be4ce
Show file tree
Hide file tree
Showing 13 changed files with 88 additions and 39 deletions.
3 changes: 0 additions & 3 deletions ci/run_cudf_memcheck_ctests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@ export LIBCUDF_MEMCHECK_ENABLED=1
for gt in ./*_TEST ; do
test_name=$(basename ${gt})
# Run gtests with compute-sanitizer
if [[ "$test_name" == "ERROR_TEST" ]] || [[ "$test_name" == "STREAM_IDENTIFICATION_TEST" ]]; then
continue
fi
echo "Running compute-sanitizer on $test_name"
compute-sanitizer --tool memcheck ${gt} "$@"
done
Expand Down
5 changes: 4 additions & 1 deletion cpp/src/column/column_view.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,10 @@ void prefetch_col_data(ColumnView& col, void const* data_ptr, std::string_view k
key, data_ptr, col.size() * size_of(col.type()), cudf::get_default_stream());
} else if (col.type().id() == type_id::STRING) {
strings_column_view scv{col};

if (data_ptr == nullptr) {
// Do not call chars_size if the data_ptr is nullptr.
return;
}
cudf::experimental::prefetch::detail::prefetch_noexcept(
key,
data_ptr,
Expand Down
14 changes: 14 additions & 0 deletions cpp/src/utilities/prefetch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,20 @@ cudaError_t prefetch_noexcept(std::string_view key,
rmm::cuda_stream_view stream,
rmm::cuda_device_id device_id) noexcept
{
// Don't try to prefetch nullptrs or empty data. Sometimes libcudf has column
// views that use nullptrs with a nonzero size as an optimization.
if (ptr == nullptr) {
if (prefetch_config::instance().debug) {
std::cerr << "Skipping prefetch of nullptr" << std::endl;
}
return cudaSuccess;
}
if (size == 0) {
if (prefetch_config::instance().debug) {
std::cerr << "Skipping prefetch of size 0" << std::endl;
}
return cudaSuccess;
}
if (prefetch_config::instance().get(key)) {
if (prefetch_config::instance().debug) {
std::cerr << "Prefetching " << size << " bytes for key " << key << " at location " << ptr
Expand Down
4 changes: 4 additions & 0 deletions cpp/tests/error/error_handling_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ CUDF_KERNEL void test_kernel(int* data) { data[threadIdx.x] = threadIdx.x; }
// calls.
TEST(StreamCheck, FailedKernel)
{
if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { GTEST_SKIP(); }

rmm::cuda_stream stream;
int a;
test_kernel<<<0, 0, 0, stream.value()>>>(&a);
Expand All @@ -61,6 +63,8 @@ TEST(StreamCheck, FailedKernel)

TEST(StreamCheck, CatchFailedKernel)
{
if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { GTEST_SKIP(); }

rmm::cuda_stream stream;
int a;
test_kernel<<<0, 0, 0, stream.value()>>>(&a);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ void test_cudaLaunchKernel()
} catch (std::runtime_error&) {
return;
}
if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return; }
throw std::runtime_error("No exception raised for kernel on default stream!");
}

Expand Down
2 changes: 1 addition & 1 deletion dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -714,7 +714,7 @@ dependencies:
common:
- output_types: [conda, requirements, pyproject]
packages:
- polars>=1.0
- polars>=1.0,<1.3
run_dask_cudf:
common:
- output_types: [conda, requirements, pyproject]
Expand Down
52 changes: 35 additions & 17 deletions java/src/main/java/ai/rapids/cudf/ColumnView.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -5034,8 +5034,8 @@ private static NestedColumnVector createNestedColumnVector(DType type, long rows
// DATA MOVEMENT
/////////////////////////////////////////////////////////////////////////////

private static HostColumnVectorCore copyToHostNestedHelper(
ColumnView deviceCvPointer, HostMemoryAllocator hostMemoryAllocator) {
private static HostColumnVectorCore copyToHostAsyncNestedHelper(
Cuda.Stream stream, ColumnView deviceCvPointer, HostMemoryAllocator hostMemoryAllocator) {
if (deviceCvPointer == null) {
return null;
}
Expand All @@ -5056,20 +5056,20 @@ private static HostColumnVectorCore copyToHostNestedHelper(
currValidity = deviceCvPointer.getValid();
if (currData != null) {
hostData = hostMemoryAllocator.allocate(currData.length);
hostData.copyFromDeviceBuffer(currData);
hostData.copyFromDeviceBufferAsync(currData, stream);
}
if (currValidity != null) {
hostValid = hostMemoryAllocator.allocate(currValidity.length);
hostValid.copyFromDeviceBuffer(currValidity);
hostValid.copyFromDeviceBufferAsync(currValidity, stream);
}
if (currOffsets != null) {
hostOffsets = hostMemoryAllocator.allocate(currOffsets.length);
hostOffsets.copyFromDeviceBuffer(currOffsets);
hostOffsets.copyFromDeviceBufferAsync(currOffsets, stream);
}
int numChildren = deviceCvPointer.getNumChildren();
for (int i = 0; i < numChildren; i++) {
try(ColumnView childDevPtr = deviceCvPointer.getChildColumnView(i)) {
children.add(copyToHostNestedHelper(childDevPtr, hostMemoryAllocator));
children.add(copyToHostAsyncNestedHelper(stream, childDevPtr, hostMemoryAllocator));
}
}
currNullCount = deviceCvPointer.getNullCount();
Expand Down Expand Up @@ -5103,11 +5103,20 @@ private static HostColumnVectorCore copyToHostNestedHelper(
}
}

/** Copy the data to the host synchronously. */
public HostColumnVector copyToHost(HostMemoryAllocator hostMemoryAllocator) {
HostColumnVector result = copyToHostAsync(Cuda.DEFAULT_STREAM, hostMemoryAllocator);
Cuda.DEFAULT_STREAM.sync();
return result;
}

/**
* Copy the data to the host.
* Copy the data to the host asynchronously. The caller MUST synchronize on the stream
* before examining the result.
*/
public HostColumnVector copyToHost(HostMemoryAllocator hostMemoryAllocator) {
try (NvtxRange toHost = new NvtxRange("ensureOnHost", NvtxColor.BLUE)) {
public HostColumnVector copyToHostAsync(Cuda.Stream stream,
HostMemoryAllocator hostMemoryAllocator) {
try (NvtxRange toHost = new NvtxRange("toHostAsync", NvtxColor.BLUE)) {
HostMemoryBuffer hostDataBuffer = null;
HostMemoryBuffer hostValidityBuffer = null;
HostMemoryBuffer hostOffsetsBuffer = null;
Expand All @@ -5127,16 +5136,16 @@ public HostColumnVector copyToHost(HostMemoryAllocator hostMemoryAllocator) {
if (!type.isNestedType()) {
if (valid != null) {
hostValidityBuffer = hostMemoryAllocator.allocate(valid.getLength());
hostValidityBuffer.copyFromDeviceBuffer(valid);
hostValidityBuffer.copyFromDeviceBufferAsync(valid, stream);
}
if (offsets != null) {
hostOffsetsBuffer = hostMemoryAllocator.allocate(offsets.length);
hostOffsetsBuffer.copyFromDeviceBuffer(offsets);
hostOffsetsBuffer.copyFromDeviceBufferAsync(offsets, stream);
}
// If a strings column is all null values there is no data buffer allocated
if (data != null) {
hostDataBuffer = hostMemoryAllocator.allocate(data.length);
hostDataBuffer.copyFromDeviceBuffer(data);
hostDataBuffer.copyFromDeviceBufferAsync(data, stream);
}
HostColumnVector ret = new HostColumnVector(type, rows, Optional.of(nullCount),
hostDataBuffer, hostValidityBuffer, hostOffsetsBuffer);
Expand All @@ -5145,21 +5154,21 @@ public HostColumnVector copyToHost(HostMemoryAllocator hostMemoryAllocator) {
} else {
if (data != null) {
hostDataBuffer = hostMemoryAllocator.allocate(data.length);
hostDataBuffer.copyFromDeviceBuffer(data);
hostDataBuffer.copyFromDeviceBufferAsync(data, stream);
}

if (valid != null) {
hostValidityBuffer = hostMemoryAllocator.allocate(valid.getLength());
hostValidityBuffer.copyFromDeviceBuffer(valid);
hostValidityBuffer.copyFromDeviceBufferAsync(valid, stream);
}
if (offsets != null) {
hostOffsetsBuffer = hostMemoryAllocator.allocate(offsets.getLength());
hostOffsetsBuffer.copyFromDeviceBuffer(offsets);
hostOffsetsBuffer.copyFromDeviceBufferAsync(offsets, stream);
}
List<HostColumnVectorCore> children = new ArrayList<>();
for (int i = 0; i < getNumChildren(); i++) {
try (ColumnView childDevPtr = getChildColumnView(i)) {
children.add(copyToHostNestedHelper(childDevPtr, hostMemoryAllocator));
children.add(copyToHostAsyncNestedHelper(stream, childDevPtr, hostMemoryAllocator));
}
}
HostColumnVector ret = new HostColumnVector(type, rows, Optional.of(nullCount),
Expand Down Expand Up @@ -5192,10 +5201,19 @@ public HostColumnVector copyToHost(HostMemoryAllocator hostMemoryAllocator) {
}
}

/** Copy the data to host memory synchronously */
public HostColumnVector copyToHost() {
return copyToHost(DefaultHostMemoryAllocator.get());
}

/**
* Copy the data to the host asynchronously. The caller MUST synchronize on the stream
* before examining the result.
*/
public HostColumnVector copyToHostAsync(Cuda.Stream stream) {
return copyToHostAsync(stream, DefaultHostMemoryAllocator.get());
}

/**
* Calculate the total space required to copy the data to the host. This should be padded to
* the alignment that the CPU requires.
Expand Down
4 changes: 4 additions & 0 deletions java/src/main/java/ai/rapids/cudf/HostColumnVector.java
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ public interface EventHandler {
public HostColumnVector(DType type, long rows, Optional<Long> nullCount,
HostMemoryBuffer hostDataBuffer, HostMemoryBuffer hostValidityBuffer,
HostMemoryBuffer offsetBuffer, List<HostColumnVectorCore> nestedHcv) {
// NOTE: This constructor MUST NOT examine the contents of any host buffers, as they may be
// asynchronously written by the device.
super(type, rows, nullCount, hostDataBuffer, hostValidityBuffer, offsetBuffer, nestedHcv);
refCount = 0;
incRefCountInternal(true);
Expand All @@ -100,6 +102,8 @@ public HostColumnVector(DType type, long rows, Optional<Long> nullCount,
HostColumnVector(DType type, long rows, Optional<Long> nullCount,
HostMemoryBuffer hostDataBuffer, HostMemoryBuffer hostValidityBuffer,
HostMemoryBuffer offsetBuffer) {
// NOTE: This constructor MUST NOT examine the contents of any host buffers, as they may be
// asynchronously written by the device.
super(type, rows, nullCount, hostDataBuffer, hostValidityBuffer, offsetBuffer, new ArrayList<>());
assert !type.equals(DType.LIST) : "This constructor should not be used for list type";
if (nullCount.isPresent() && nullCount.get() > 0 && hostValidityBuffer == null) {
Expand Down
4 changes: 3 additions & 1 deletion java/src/main/java/ai/rapids/cudf/HostColumnVectorCore.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
*
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -47,6 +47,8 @@ public class HostColumnVectorCore implements AutoCloseable {
public HostColumnVectorCore(DType type, long rows,
Optional<Long> nullCount, HostMemoryBuffer data, HostMemoryBuffer validity,
HostMemoryBuffer offsets, List<HostColumnVectorCore> nestedChildren) {
// NOTE: This constructor MUST NOT examine the contents of any host buffers, as they may be
// asynchronously written by the device.
this.offHeap = new OffHeapState(data, validity, offsets);
MemoryCleaner.register(this, offHeap);
this.type = type;
Expand Down
5 changes: 3 additions & 2 deletions java/src/main/java/ai/rapids/cudf/JCudfSerialization.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -907,8 +907,9 @@ private static ColumnBufferProvider[] providersFrom(ColumnVector[] columns) {
boolean success = false;
try {
for (int i = 0; i < columns.length; i++) {
onHost[i] = columns[i].copyToHost();
onHost[i] = columns[i].copyToHostAsync(Cuda.DEFAULT_STREAM);
}
Cuda.DEFAULT_STREAM.sync();
ColumnBufferProvider[] ret = providersFrom(onHost, true);
success = true;
return ret;
Expand Down
19 changes: 17 additions & 2 deletions python/cudf/cudf/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

import rmm.mr

from cudf._lib import pylibcudf

from .fast_slow_proxy import is_proxy_object
from .magics import load_ipython_extension
from .profiler import Profiler
Expand All @@ -16,6 +18,19 @@

LOADED = False

_SUPPORTED_PREFETCHES = {
"column_view::get_data",
"mutable_column_view::get_data",
"gather",
"hash_join",
}


def _enable_managed_prefetching(rmm_mode):
if "managed" in rmm_mode:
for key in _SUPPORTED_PREFETCHES:
pylibcudf.experimental.enable_prefetching(key)


def install():
"""Enable Pandas Accelerator Mode."""
Expand All @@ -33,7 +48,7 @@ def install():
f"cudf.pandas detected an already configured memory resource, ignoring 'CUDF_PANDAS_RMM_MODE'={str(rmm_mode)}",
UserWarning,
)
return rmm_mode
return

free_memory, _ = rmm.mr.available_device_memory()
free_memory = int(round(float(free_memory) * 0.80 / 256) * 256)
Expand All @@ -57,7 +72,7 @@ def install():
elif rmm_mode != "cuda":
raise ValueError(f"Unsupported {rmm_mode=}")
rmm.mr.set_current_device_resource(new_mr)
return rmm_mode
_enable_managed_prefetching(rmm_mode)


def pytest_load_initial_conftests(early_config, parser, args):
Expand Down
12 changes: 1 addition & 11 deletions python/cudf/cudf/pandas/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,17 +72,7 @@ def main():

args = parser.parse_args()

rmm_mode = install()
if "managed" in rmm_mode:
for key in {
"column_view::get_data",
"mutable_column_view::get_data",
"gather",
"hash_join",
}:
from cudf._lib import pylibcudf

pylibcudf.experimental.enable_prefetching(key)
install()
with profile(args.profile, args.line_profile, args.args[0]) as fn:
args.args[0] = fn
if args.module:
Expand Down
2 changes: 1 addition & 1 deletion python/cudf_polars/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ license = { text = "Apache 2.0" }
requires-python = ">=3.9"
dependencies = [
"cudf==24.10.*,>=0.0.0a0",
"polars>=1.0",
"polars>=1.0,<1.3",
] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
classifiers = [
"Intended Audience :: Developers",
Expand Down

0 comments on commit 33be4ce

Please sign in to comment.