Skip to content

Commit

Permalink
Merge branch 'branch-24.10' into rmm-adaptor-constructor
Browse files Browse the repository at this point in the history
  • Loading branch information
bdice authored Jul 29, 2024
2 parents 9c93768 + f8eb63e commit c053aa8
Show file tree
Hide file tree
Showing 41 changed files with 1,337 additions and 382 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/pr_issue_status_automation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ on:

jobs:
get-project-id:
uses: rapidsai/shared-workflows/.github/workflows/project-get-item-id.yaml@branch-24.08
uses: rapidsai/shared-workflows/.github/workflows/project-get-item-id.yaml@branch-24.10
if: github.event.pull_request.state == 'open'
secrets: inherit
permissions:
Expand All @@ -34,7 +34,7 @@ jobs:

update-status:
# This job sets the PR and its linked issues to "In Progress" status
uses: rapidsai/shared-workflows/.github/workflows/project-get-set-single-select-field.yaml@branch-24.08
uses: rapidsai/shared-workflows/.github/workflows/project-get-set-single-select-field.yaml@branch-24.10
if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }}
needs: get-project-id
with:
Expand All @@ -50,7 +50,7 @@ jobs:

update-sprint:
# This job sets the PR and its linked issues to the current "Weekly Sprint"
uses: rapidsai/shared-workflows/.github/workflows/project-get-set-iteration-field.yaml@branch-24.08
uses: rapidsai/shared-workflows/.github/workflows/project-get-set-iteration-field.yaml@branch-24.10
if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }}
needs: get-project-id
with:
Expand Down
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,7 @@ add_library(
src/interop/dlpack.cpp
src/interop/from_arrow.cu
src/interop/arrow_utilities.cpp
src/interop/decimal_conversion_utilities.cu
src/interop/to_arrow.cu
src/interop/to_arrow_device.cu
src/interop/to_arrow_host.cu
Expand Down
2 changes: 1 addition & 1 deletion cpp/cmake/thirdparty/get_thread_pool.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ function(find_and_configure_thread_pool)
include(${rapids-cmake-dir}/cpm/bs_thread_pool.cmake)

# Find or install thread-pool
rapids_cpm_bs_thread_pool(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports)
rapids_cpm_bs_thread_pool()

endfunction()

Expand Down
4 changes: 4 additions & 0 deletions cpp/examples/tpch/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,7 @@ target_compile_features(tpch_q6 PRIVATE cxx_std_17)
add_executable(tpch_q9 q9.cpp)
target_link_libraries(tpch_q9 PRIVATE cudf::cudf)
target_compile_features(tpch_q9 PRIVATE cxx_std_17)

add_executable(tpch_q10 q10.cpp)
target_link_libraries(tpch_q10 PRIVATE cudf::cudf)
target_compile_features(tpch_q10 PRIVATE cxx_std_17)
2 changes: 1 addition & 1 deletion cpp/examples/tpch/q1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ int main(int argc, char const** argv)
auto shipdate_upper =
cudf::timestamp_scalar<cudf::timestamp_D>(days_since_epoch(1998, 9, 2), true);
auto const shipdate_upper_literal = cudf::ast::literal(shipdate_upper);
auto lineitem_pred = std::make_unique<cudf::ast::operation>(
auto const lineitem_pred = std::make_unique<cudf::ast::operation>(
cudf::ast::ast_operator::LESS_EQUAL, shipdate_ref, shipdate_upper_literal);

// Read out the `lineitem` table from parquet file
Expand Down
166 changes: 166 additions & 0 deletions cpp/examples/tpch/q10.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "../utilities/timer.hpp"
#include "utils.hpp"

#include <cudf/ast/expressions.hpp>
#include <cudf/column/column.hpp>
#include <cudf/scalar/scalar.hpp>

/**
* @file q10.cpp
* @brief Implement query 10 of the TPC-H benchmark.
*
* create view customer as select * from '/tables/scale-1/customer.parquet';
* create view orders as select * from '/tables/scale-1/orders.parquet';
* create view lineitem as select * from '/tables/scale-1/lineitem.parquet';
* create view nation as select * from '/tables/scale-1/nation.parquet';
*
* select
* c_custkey,
* c_name,
* sum(l_extendedprice * (1 - l_discount)) as revenue,
* c_acctbal,
* n_name,
* c_address,
* c_phone,
* c_comment
* from
* customer,
* orders,
* lineitem,
* nation
* where
* c_custkey = o_custkey
* and l_orderkey = o_orderkey
* and o_orderdate >= date '1993-10-01'
* and o_orderdate < date '1994-01-01'
* and l_returnflag = 'R'
* and c_nationkey = n_nationkey
* group by
* c_custkey,
* c_name,
* c_acctbal,
* c_phone,
* n_name,
* c_address,
* c_comment
* order by
* revenue desc;
*/

/**
* @brief Calculate the revenue column
*
* @param extendedprice The extended price column
* @param discount The discount column
* @param stream The CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
*/
[[nodiscard]] std::unique_ptr<cudf::column> calc_revenue(
cudf::column_view const& extendedprice,
cudf::column_view const& discount,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource())
{
auto const one = cudf::numeric_scalar<double>(1);
auto const one_minus_discount =
cudf::binary_operation(one, discount, cudf::binary_operator::SUB, discount.type(), stream, mr);
auto const revenue_type = cudf::data_type{cudf::type_id::FLOAT64};
auto revenue = cudf::binary_operation(extendedprice,
one_minus_discount->view(),
cudf::binary_operator::MUL,
revenue_type,
stream,
mr);
return revenue;
}
int main(int argc, char const** argv)
{
auto const args = parse_args(argc, argv);

// Use a memory pool
auto resource = create_memory_resource(args.memory_resource_type);
rmm::mr::set_current_device_resource(resource.get());

cudf::examples::timer timer;

// Define the column projection and filter predicate for the `orders` table
std::vector<std::string> const orders_cols = {"o_custkey", "o_orderkey", "o_orderdate"};
auto const o_orderdate_ref = cudf::ast::column_reference(std::distance(
orders_cols.begin(), std::find(orders_cols.begin(), orders_cols.end(), "o_orderdate")));
auto o_orderdate_lower =
cudf::timestamp_scalar<cudf::timestamp_D>(days_since_epoch(1993, 10, 1), true);
auto const o_orderdate_lower_limit = cudf::ast::literal(o_orderdate_lower);
auto const o_orderdate_pred_lower = cudf::ast::operation(
cudf::ast::ast_operator::GREATER_EQUAL, o_orderdate_ref, o_orderdate_lower_limit);
auto o_orderdate_upper =
cudf::timestamp_scalar<cudf::timestamp_D>(days_since_epoch(1994, 1, 1), true);
auto const o_orderdate_upper_limit = cudf::ast::literal(o_orderdate_upper);
auto const o_orderdate_pred_upper =
cudf::ast::operation(cudf::ast::ast_operator::LESS, o_orderdate_ref, o_orderdate_upper_limit);
auto const orders_pred = std::make_unique<cudf::ast::operation>(
cudf::ast::ast_operator::LOGICAL_AND, o_orderdate_pred_lower, o_orderdate_pred_upper);

auto const l_returnflag_ref = cudf::ast::column_reference(3);
auto r_scalar = cudf::string_scalar("R");
auto const r_literal = cudf::ast::literal(r_scalar);
auto const lineitem_pred = std::make_unique<cudf::ast::operation>(
cudf::ast::ast_operator::EQUAL, l_returnflag_ref, r_literal);

// Read out the tables from parquet files
// while pushing down the column projections and filter predicates
auto const customer = read_parquet(
args.dataset_dir + "/customer.parquet",
{"c_custkey", "c_name", "c_nationkey", "c_acctbal", "c_address", "c_phone", "c_comment"});
auto const orders =
read_parquet(args.dataset_dir + "/orders.parquet", orders_cols, std::move(orders_pred));
auto const lineitem =
read_parquet(args.dataset_dir + "/lineitem.parquet",
{"l_extendedprice", "l_discount", "l_orderkey", "l_returnflag"},
std::move(lineitem_pred));
auto const nation = read_parquet(args.dataset_dir + "/nation.parquet", {"n_name", "n_nationkey"});

// Perform the joins
auto const join_a = apply_inner_join(customer, nation, {"c_nationkey"}, {"n_nationkey"});
auto const join_b = apply_inner_join(lineitem, orders, {"l_orderkey"}, {"o_orderkey"});
auto const joined_table = apply_inner_join(join_a, join_b, {"c_custkey"}, {"o_custkey"});

// Calculate and append the `revenue` column
auto revenue =
calc_revenue(joined_table->column("l_extendedprice"), joined_table->column("l_discount"));
(*joined_table).append(revenue, "revenue");

// Perform the groupby operation
auto const groupedby_table = apply_groupby(
joined_table,
groupby_context_t{
{"c_custkey", "c_name", "c_acctbal", "c_phone", "n_name", "c_address", "c_comment"},
{
{"revenue", {{cudf::aggregation::Kind::SUM, "revenue"}}},
}});

// Perform the order by operation
auto const orderedby_table =
apply_orderby(groupedby_table, {"revenue"}, {cudf::order::DESCENDING});

timer.print_elapsed_millis();

// Write query result to a parquet file
orderedby_table->to_parquet("q10.parquet");
return 0;
}
20 changes: 10 additions & 10 deletions cpp/examples/tpch/q5.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,14 @@
* region
* where
* c_custkey = o_custkey
* and l_orderkey = o_orderkey
* and l_suppkey = s_suppkey
* and c_nationkey = s_nationkey
* and s_nationkey = n_nationkey
* and n_regionkey = r_regionkey
* and r_name = 'ASIA'
* and o_orderdate >= date '1994-01-01'
* and o_orderdate < date '1995-01-01'
* and l_orderkey = o_orderkey
* and l_suppkey = s_suppkey
* and c_nationkey = s_nationkey
* and s_nationkey = n_nationkey
* and n_regionkey = r_regionkey
* and r_name = 'ASIA'
* and o_orderdate >= date '1994-01-01'
* and o_orderdate < date '1995-01-01'
* group by
* n_name
* order by
Expand Down Expand Up @@ -109,7 +109,7 @@ int main(int argc, char const** argv)
auto const o_orderdate_upper_limit = cudf::ast::literal(o_orderdate_upper);
auto const o_orderdate_pred_upper =
cudf::ast::operation(cudf::ast::ast_operator::LESS, o_orderdate_ref, o_orderdate_upper_limit);
auto orders_pred = std::make_unique<cudf::ast::operation>(
auto const orders_pred = std::make_unique<cudf::ast::operation>(
cudf::ast::ast_operator::LOGICAL_AND, o_orderdate_pred_lower, o_orderdate_pred_upper);

// Define the column projection and filter predicate for the `region` table
Expand All @@ -118,7 +118,7 @@ int main(int argc, char const** argv)
region_cols.begin(), std::find(region_cols.begin(), region_cols.end(), "r_name")));
auto r_name_value = cudf::string_scalar("ASIA");
auto const r_name_literal = cudf::ast::literal(r_name_value);
auto region_pred = std::make_unique<cudf::ast::operation>(
auto const region_pred = std::make_unique<cudf::ast::operation>(
cudf::ast::ast_operator::EQUAL, r_name_ref, r_name_literal);

// Read out the tables from parquet files
Expand Down
2 changes: 1 addition & 1 deletion cpp/examples/tpch/q6.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ int main(int argc, char const** argv)
cudf::ast::ast_operator::GREATER_EQUAL, shipdate_ref, shipdate_lower_literal);
auto const shipdate_pred_b =
cudf::ast::operation(cudf::ast::ast_operator::LESS, shipdate_ref, shipdate_upper_literal);
auto lineitem_pred = std::make_unique<cudf::ast::operation>(
auto const lineitem_pred = std::make_unique<cudf::ast::operation>(
cudf::ast::ast_operator::LOGICAL_AND, shipdate_pred_a, shipdate_pred_b);
auto lineitem =
read_parquet(args.dataset_dir + "/lineitem.parquet", lineitem_cols, std::move(lineitem_pred));
Expand Down
4 changes: 0 additions & 4 deletions cpp/include/cudf/detail/reshape.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@ namespace CUDF_EXPORT cudf {
namespace detail {
/**
* @copydoc cudf::tile
*
* @param stream CUDA stream used for device memory operations and kernel launches
*/
std::unique_ptr<table> tile(table_view const& input,
size_type count,
Expand All @@ -38,8 +36,6 @@ std::unique_ptr<table> tile(table_view const& input,

/**
* @copydoc cudf::interleave_columns
*
* @param stream CUDA stream used for device memory operations and kernel launches
*/
std::unique_ptr<column> interleave_columns(table_view const& input,
rmm::cuda_stream_view,
Expand Down
17 changes: 11 additions & 6 deletions cpp/include/cudf/reshape.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,14 @@ namespace CUDF_EXPORT cudf {
* @throws cudf::logic_error if input contains no columns.
* @throws cudf::logic_error if input columns dtypes are not identical.
*
* @param[in] input Table containing columns to interleave
* @param[in] mr Device memory resource used to allocate the returned column's device memory
*
* @param input Table containing columns to interleave
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return The interleaved columns as a single column
*/
std::unique_ptr<column> interleave_columns(
table_view const& input,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -68,15 +69,17 @@ std::unique_ptr<column> interleave_columns(
* return = [[8, 4, 7, 8, 4, 7], [5, 2, 3, 5, 2, 3]]
* ```
*
* @param[in] input Table containing rows to be repeated
* @param[in] count Number of times to tile "rows". Must be non-negative
* @param[in] mr Device memory resource used to allocate the returned table's device memory
* @param input Table containing rows to be repeated
* @param count Number of times to tile "rows". Must be non-negative
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned table's device memory
*
* @return The table containing the tiled "rows"
*/
std::unique_ptr<table> tile(
table_view const& input,
size_type count,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -95,13 +98,15 @@ enum class flip_endianness : bool { NO, YES };
*
* @param input_column Column to be converted to lists of bytes
* @param endian_configuration Whether to retain or flip the endianness of the elements
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
*
* @return The column containing the lists of bytes
*/
std::unique_ptr<column> byte_cast(
column_view const& input_column,
flip_endianness endian_configuration,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group
Expand Down
Loading

0 comments on commit c053aa8

Please sign in to comment.