Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use std::overflow_error when output would exceed column size limit #13323

Merged
merged 24 commits into from
May 24, 2023
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
b71ad44
Use std::overflow_error when output would exceed column size limit
davidwendt May 9, 2023
3a05f80
Merge branch 'branch-23.06' into std-overflow-error
davidwendt May 10, 2023
4575640
change error in dlpack parm check
davidwendt May 10, 2023
7f959c1
add more std::overflow_error
davidwendt May 10, 2023
18219f3
Merge branch 'branch-23.06' into std-overflow-error
davidwendt May 10, 2023
e0ee490
remove unneeded punctuation from exception messages
davidwendt May 10, 2023
8d96cbf
add missing the to the exception message
davidwendt May 10, 2023
6b4f6d5
fix doxygen to match new exception
davidwendt May 11, 2023
73e02d6
Merge branch 'branch-23.06' into std-overflow-error
davidwendt May 11, 2023
f3df6b5
Merge branch 'branch-23.06' into std-overflow-error
davidwendt May 11, 2023
bbc1a93
fix doxygen throw text
davidwendt May 11, 2023
c9a1743
Merge branch 'branch-23.06' into std-overflow-error
davidwendt May 15, 2023
a99c9fb
Merge branch 'branch-23.06' into std-overflow-error
davidwendt May 15, 2023
275e701
Merge branch 'branch-23.06' into std-overflow-error
davidwendt May 16, 2023
0c38383
Merge branch 'branch-23.06' into std-overflow-error
davidwendt May 16, 2023
941e9a9
Merge branch 'branch-23.06' into std-overflow-error
davidwendt May 18, 2023
5ef54db
use size_t instead of uint64
davidwendt May 18, 2023
15d2569
Merge branch 'branch-23.06' into std-overflow-error
davidwendt May 18, 2023
76196c6
Merge branch 'branch-23.06' into std-overflow-error
davidwendt May 19, 2023
972977f
Merge branch 'branch-23.06' into std-overflow-error
davidwendt May 20, 2023
53f2539
Merge branch 'std-overflow-error' of github.com:davidwendt/cudf into …
davidwendt May 23, 2023
515b647
Merge branch 'branch-23.06' into std-overflow-error
davidwendt May 23, 2023
660b44e
remove MAX_JOIN_SIZE
davidwendt May 24, 2023
bed7749
Merge branch 'branch-23.06' into std-overflow-error
davidwendt May 24, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cpp/include/cudf/column/column.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,8 @@ class column {
_size{[&]() {
CUDF_EXPECTS(
other.size() <= static_cast<std::size_t>(std::numeric_limits<size_type>::max()),
"The device_uvector size exceeds the maximum size_type.");
"The device_uvector size exceeds the column size limit",
std::overflow_error);
return static_cast<size_type>(other.size());
}()},
_data{other.release()},
Expand Down
5 changes: 3 additions & 2 deletions cpp/include/cudf/column/column_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -435,8 +435,9 @@ class column_view : public detail::column_view_base {
cudf::data_type{cudf::type_to_id<T>()}, data.size(), data.data(), nullptr, 0, 0, {})
{
CUDF_EXPECTS(
data.size() < static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max()),
"Data exceeds the maximum size of a column view.");
data.size() <= static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max()),
"Data exceeds the column size limit",
std::overflow_error);
}

/**
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/detail/sizes_to_offsets_iterator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ std::pair<std::unique_ptr<column>, size_type> make_offsets_child_column(
auto const total_elements = sizes_to_offsets(input_itr, input_itr + count + 1, d_offsets, stream);
CUDF_EXPECTS(
total_elements <= static_cast<decltype(total_elements)>(std::numeric_limits<size_type>::max()),
"Size of output exceeds column size limit",
"Size of output exceeds the column size limit",
std::overflow_error);

offsets_column->set_null_count(0);
Expand Down
8 changes: 3 additions & 5 deletions cpp/include/cudf/filling.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -131,10 +131,8 @@ std::unique_ptr<table> repeat(
* count = 2
* return = [4,4,5,5,6,6]
* ```
* @throws cudf::logic_error if the data type of @p count is not size_type.
* @throws cudf::logic_error if @p count is invalid or @p count is negative.
* @throws cudf::logic_error if @p input_table.num_rows() * @p count overflows
* size_type.
* @throws cudf::logic_error if @p count is negative.
* @throws std::overflow_error if @p input_table.num_rows() * @p count overflows size_type.
*
* @param input_table Input table
* @param count Number of repetitions
Expand Down
5 changes: 3 additions & 2 deletions cpp/include/cudf/strings/detail/gather.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -324,7 +324,8 @@ std::unique_ptr<cudf::column> gather(strings_column_view const& strings,
size_t{0},
thrust::plus{});
CUDF_EXPECTS(total_bytes < static_cast<std::size_t>(std::numeric_limits<size_type>::max()),
"total size of output strings is too large for a cudf column");
"total size of output strings exceeds the column limit",
std::overflow_error);

// In-place convert output sizes into offsets
thrust::exclusive_scan(rmm::exec_policy_nosync(stream),
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/strings/detail/strings_children.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ auto make_strings_children(SizeAndExecuteFunction size_and_exec_fn,
// Convert the sizes to offsets
auto const bytes =
cudf::detail::sizes_to_offsets(d_offsets, d_offsets + strings_count + 1, d_offsets, stream);
CUDF_EXPECTS(bytes <= static_cast<int64_t>(std::numeric_limits<size_type>::max()),
"Size of output exceeds column size limit",
CUDF_EXPECTS(bytes <= std::numeric_limits<size_type>::max(),
"Size of output exceeds the column size limit",
std::overflow_error);

// Now build the chars column
Expand Down
2 changes: 0 additions & 2 deletions cpp/include/cudf/strings/repeat_strings.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@

#include <rmm/mr/device/per_device_resource.hpp>

#include <optional>

namespace cudf {
namespace strings {
/**
Expand Down
1 change: 1 addition & 0 deletions cpp/include/nvtext/minhash.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ std::unique_ptr<cudf::column> minhash(
* @throw std::invalid_argument if the width < 2
* @throw std::invalid_argument if hash_function is not HASH_MURMUR3
* @throw std::invalid_argument if seeds is empty
* @throw std::overflow_error if `seeds * input.size()` exceeds the column size limit
*
* @param input Strings column to compute minhash
* @param seeds Seed values used for the MurmurHash3_32 algorithm
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/nvtext/subword_tokenize.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,8 @@ struct tokenizer_result {
* strings column as working memory.
*
* @throw cudf::logic_error if `stride > max_sequence_length`
* @throw cudf::logic_error if `max_sequence_length * max_rows_tensor` is
* larger than the max value for cudf::size_type
* @throw std::overflow_error if `max_sequence_length * max_rows_tensor`
* exceeds the column size limit
*
* @param strings The input strings to tokenize.
* @param vocabulary_table The vocabulary table pre-loaded into this object.
Expand Down
12 changes: 5 additions & 7 deletions cpp/src/copying/concatenate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ std::unique_ptr<column> fused_concatenate(host_span<column_view const> views,
auto const output_size = std::get<3>(device_views);

CUDF_EXPECTS(output_size <= static_cast<std::size_t>(std::numeric_limits<size_type>::max()),
"Total number of concatenated rows exceeds size_type range",
"Total number of concatenated rows exceeds the column size limit",
std::overflow_error);

// Allocate output
Expand Down Expand Up @@ -369,9 +369,9 @@ class traverse_children {
std::size_t{},
[](size_t a, auto const& b) -> size_t { return a + b.size(); }) +
1;
// note: output text must include "exceeds size_type range" for python error handling
CUDF_EXPECTS(total_offset_count <= static_cast<size_t>(std::numeric_limits<size_type>::max()),
"Total number of concatenated offsets exceeds size_type range");
"Total number of concatenated offsets exceeds the column size limit",
std::overflow_error);
}
};

Expand Down Expand Up @@ -399,9 +399,8 @@ void traverse_children::operator()<cudf::string_view>(host_span<column_view cons
? scv.chars_size()
: cudf::detail::get_value<offset_type>(scv.offsets(), scv.size(), stream));
});
// note: output text must include "exceeds size_type range" for python error handling
CUDF_EXPECTS(total_char_count <= static_cast<size_t>(std::numeric_limits<size_type>::max()),
"Total number of concatenated chars exceeds size_type range",
"Total number of concatenated chars exceeds the column size limit",
std::overflow_error);
}

Expand Down Expand Up @@ -471,9 +470,8 @@ void bounds_and_type_check(host_span<column_view const> cols, rmm::cuda_stream_v
std::accumulate(cols.begin(), cols.end(), std::size_t{}, [](size_t a, auto const& b) {
return a + static_cast<size_t>(b.size());
});
// note: output text must include "exceeds size_type range" for python error handling
CUDF_EXPECTS(total_row_count <= static_cast<size_t>(std::numeric_limits<size_type>::max()),
"Total number of concatenated rows exceeds size_type range",
"Total number of concatenated rows exceeds the column size limit",
std::overflow_error);

// traverse children
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/copying/gather.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -65,7 +65,8 @@ std::unique_ptr<table> gather(table_view const& source_table,
rmm::mr::device_memory_resource* mr)
{
CUDF_EXPECTS(gather_map.size() <= static_cast<size_t>(std::numeric_limits<size_type>::max()),
"invalid gather map size");
"gather map size exceeds the column size limit",
std::overflow_error);
auto map_col = column_view(data_type{type_to_id<size_type>()},
static_cast<size_type>(gather_map.size()),
gather_map.data());
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/copying/scatter.cu
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,8 @@ std::unique_ptr<table> scatter(table_view const& source,
rmm::mr::device_memory_resource* mr)
{
CUDF_EXPECTS(scatter_map.size() <= static_cast<size_t>(std::numeric_limits<size_type>::max()),
"invalid scatter map size");
"scatter map size exceeds the column size limit",
std::overflow_error);
auto map_col = column_view(data_type{type_to_id<size_type>()},
static_cast<size_type>(scatter_map.size()),
scatter_map.data());
Expand Down
14 changes: 8 additions & 6 deletions cpp/src/filling/repeat.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -63,7 +63,8 @@ struct count_accessor {
auto count = p_count->value(stream);
// static_cast is necessary due to bool
CUDF_EXPECTS(static_cast<int64_t>(count) <= std::numeric_limits<cudf::size_type>::max(),
"count should not exceed size_type's limit.");
"count should not exceed the column size limit",
std::overflow_error);
return static_cast<cudf::size_type>(count);
}

Expand All @@ -86,7 +87,8 @@ struct count_checker {
auto max = thrust::reduce(
rmm::exec_policy(stream), count.begin<T>(), count.end<T>(), 0, thrust::maximum<T>());
CUDF_EXPECTS(max <= std::numeric_limits<cudf::size_type>::max(),
"count should not have values larger than size_type maximum.");
"count exceeds the column size limit",
std::overflow_error);
}
}

Expand Down Expand Up @@ -136,9 +138,9 @@ std::unique_ptr<table> repeat(table_view const& input_table,
rmm::mr::device_memory_resource* mr)
{
CUDF_EXPECTS(count >= 0, "count value should be non-negative");
CUDF_EXPECTS(
static_cast<int64_t>(input_table.num_rows()) * count <= std::numeric_limits<size_type>::max(),
"The resulting table has more rows than size_type's limit.");
CUDF_EXPECTS(input_table.num_rows() <= std::numeric_limits<size_type>::max() / count,
"The resulting table exceeds the column size limit",
std::overflow_error);

if ((input_table.num_rows() == 0) || (count == 0)) { return cudf::empty_like(input_table); }

Expand Down
12 changes: 7 additions & 5 deletions cpp/src/interop/dlpack.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -173,13 +173,15 @@ std::unique_ptr<table> from_dlpack(DLManagedTensor const* managed_tensor,
}
CUDF_EXPECTS(tensor.shape[0] >= 0,
"DLTensor first dim should be of shape greater than or equal to 0.");
CUDF_EXPECTS(tensor.shape[0] < std::numeric_limits<size_type>::max(),
"DLTensor first dim exceeds size supported by cudf");
CUDF_EXPECTS(tensor.shape[0] <= std::numeric_limits<size_type>::max(),
"DLTensor first dim exceeds the column size limit",
std::overflow_error);
if (tensor.ndim > 1) {
CUDF_EXPECTS(tensor.shape[1] >= 0,
"DLTensor second dim should be of shape greater than or equal to 0.");
CUDF_EXPECTS(tensor.shape[1] < std::numeric_limits<size_type>::max(),
"DLTensor second dim exceeds size supported by cudf");
CUDF_EXPECTS(tensor.shape[1] <= std::numeric_limits<size_type>::max(),
"DLTensor second dim exceeds the column size limit",
std::overflow_error);
}
size_t const num_columns = (tensor.ndim == 2) ? static_cast<size_t>(tensor.shape[1]) : 1;

Expand Down
3 changes: 2 additions & 1 deletion cpp/src/io/utilities/row_selection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ std::pair<uint64_t, size_type> skip_rows_num_rows_from_options(
auto const rows_to_skip = std::min(skip_rows_opt, num_source_rows);
if (not num_rows_opt.has_value()) {
CUDF_EXPECTS(num_source_rows - rows_to_skip <= std::numeric_limits<size_type>::max(),
"The requested number of rows to read exceeds the largest cudf column size");
"The requested number of rows exceeds the column size limit",
std::overflow_error);
return {rows_to_skip, num_source_rows - rows_to_skip};
}
// Limit the number of rows to the end of the input
Expand Down
3 changes: 1 addition & 2 deletions cpp/src/io/utilities/row_selection.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@ namespace cudf::io::detail {
* @param num_source_rows number of rows in the ORC file(s)
* @return A std::pair containing the number of rows to skip and the number of rows to read
*
* @throw cudf::logic_error when the requested number of rows to read exceeds the largest cudf
* column size
* @throw std::overflow_exception The requested number of rows exceeds the column size limit
*/
std::pair<uint64_t, size_type> skip_rows_num_rows_from_options(
uint64_t skip_rows_opt, std::optional<size_type> const& num_rows_opt, uint64_t num_source_rows);
Expand Down
10 changes: 6 additions & 4 deletions cpp/src/join/hash_join.cu
Original file line number Diff line number Diff line change
Expand Up @@ -376,8 +376,9 @@ hash_join<Hasher>::hash_join(cudf::table_view const& build,
{
CUDF_FUNC_RANGE();
CUDF_EXPECTS(0 != build.num_columns(), "Hash join build table is empty");
CUDF_EXPECTS(build.num_rows() < cudf::detail::MAX_JOIN_SIZE,
"Build column size is too big for hash join");
CUDF_EXPECTS(build.num_rows() <= cudf::detail::MAX_JOIN_SIZE,
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
"Build column size exceeds the column size limit",
std::overflow_error);

if (_is_empty) { return; }

Expand Down Expand Up @@ -558,8 +559,9 @@ hash_join<Hasher>::compute_hash_join(cudf::table_view const& probe,
rmm::mr::device_memory_resource* mr) const
{
CUDF_EXPECTS(0 != probe.num_columns(), "Hash join probe table is empty");
CUDF_EXPECTS(probe.num_rows() < cudf::detail::MAX_JOIN_SIZE,
"Probe column size is too big for hash join");
CUDF_EXPECTS(probe.num_rows() <= cudf::detail::MAX_JOIN_SIZE,
"Probe column size exceeds the column size limit",
std::overflow_error);

CUDF_EXPECTS(_build.num_columns() == probe.num_columns(),
"Mismatch in number of columns to be joined on");
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/lists/sequences.cu
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,8 @@ std::unique_ptr<column> sequences(column_view const& starts,

auto const n_elements = cudf::detail::sizes_to_offsets(
sizes_input_it, sizes_input_it + n_lists + 1, offsets_begin, stream);
CUDF_EXPECTS(n_elements <= static_cast<int64_t>(std::numeric_limits<size_type>::max()),
"Size of output exceeds column size limit",
CUDF_EXPECTS(n_elements <= std::numeric_limits<size_type>::max(),
"Size of output exceeds the column size limit",
std::overflow_error);

auto child = type_dispatcher(starts.type(),
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/strings/case.cu
Original file line number Diff line number Diff line change
Expand Up @@ -255,8 +255,8 @@ std::unique_ptr<column> convert_case(strings_column_view const& input,
// convert sizes to offsets
auto const bytes =
cudf::detail::sizes_to_offsets(d_offsets, d_offsets + input.size() + 1, d_offsets, stream);
CUDF_EXPECTS(bytes <= static_cast<int64_t>(std::numeric_limits<size_type>::max()),
"Size of output exceeds column size limit",
CUDF_EXPECTS(bytes <= std::numeric_limits<size_type>::max(),
"Size of output exceeds the column size limit",
std::overflow_error);

auto chars = create_chars_child_column(static_cast<size_type>(bytes), stream, mr);
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/strings/copying/concatenate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -216,9 +216,11 @@ std::unique_ptr<column> concatenate(host_span<column_view const> columns,
if (strings_count == 0) { return make_empty_column(type_id::STRING); }

CUDF_EXPECTS(offsets_count <= static_cast<std::size_t>(std::numeric_limits<size_type>::max()),
"total number of strings is too large for cudf column");
"total number of strings exceeds the column size limit",
std::overflow_error);
CUDF_EXPECTS(total_bytes <= static_cast<std::size_t>(std::numeric_limits<size_type>::max()),
"total size of strings is too large for cudf column");
"total size of strings exceeds the column size limit",
std::overflow_error);

bool const has_nulls =
std::any_of(columns.begin(), columns.end(), [](auto const& col) { return col.has_nulls(); });
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/strings/regex/utilities.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,8 @@ auto make_strings_children(SizeAndExecuteFunction size_and_exec_fn,

auto const char_bytes =
cudf::detail::sizes_to_offsets(d_offsets, d_offsets + strings_count + 1, d_offsets, stream);
CUDF_EXPECTS(char_bytes <= static_cast<int64_t>(std::numeric_limits<size_type>::max()),
"Size of output exceeds column size limit",
CUDF_EXPECTS(char_bytes <= std::numeric_limits<size_type>::max(),
"Size of output exceeds the column size limit",
std::overflow_error);

// Now build the chars column
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/strings/repeat_strings.cu
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ std::unique_ptr<string_scalar> repeat_string(string_scalar const& input,
if (repeat_times == 1) { return std::make_unique<string_scalar>(input, stream, mr); }

CUDF_EXPECTS(input.size() <= std::numeric_limits<size_type>::max() / repeat_times,
"The output string has size that exceeds the maximum allowed size.");
"The output size exceeds the column size limit",
std::overflow_error);

auto const str_size = input.size();
auto const iter = thrust::make_counting_iterator(0);
Expand Down
9 changes: 4 additions & 5 deletions cpp/src/text/minhash.cu
Original file line number Diff line number Diff line change
Expand Up @@ -114,11 +114,10 @@ std::unique_ptr<cudf::column> minhash(cudf::strings_column_view const& input,
CUDF_EXPECTS(hash_function == cudf::hash_id::HASH_MURMUR3,
"Only murmur3 hash algorithm supported",
std::invalid_argument);
CUDF_EXPECTS(
(static_cast<std::size_t>(input.size()) * seeds.size()) <
static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max()),
"The number of seeds times the number of input rows must not exceed maximum of size_type",
std::invalid_argument);
CUDF_EXPECTS((static_cast<std::size_t>(input.size()) * seeds.size()) <
static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max()),
"The number of seeds times the number of input rows exceeds the column size limit",
std::overflow_error);

auto output_type = cudf::data_type{cudf::type_to_id<cudf::hash_value_type>()};
if (input.is_empty()) { return cudf::make_empty_column(output_type); }
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/text/ngrams_tokenize.cu
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ std::unique_ptr<cudf::column> ngrams_tokenize(cudf::strings_column_view const& s
chars_offsets.begin(), chars_offsets.end(), chars_offsets.begin(), stream);
CUDF_EXPECTS(
output_chars_size <= static_cast<int64_t>(std::numeric_limits<cudf::size_type>::max()),
"Size of output exceeds column size limit",
"Size of output exceeds the column size limit",
std::overflow_error);

// This will contain the size in bytes of each ngram to generate
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/text/normalize.cu
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,9 @@ std::unique_ptr<cudf::column> normalize_characters(cudf::strings_column_view con
}();

CUDF_EXPECTS(
result.first->size() <= static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max()),
"output too large for strings column");
result.first->size() < static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max()),
"output exceeds the column size limit",
std::overflow_error);

// convert the result into a strings column
// - the cp_chars are the new 4-byte code-point values for all the characters in the output
Expand Down
Loading