rapidsai · rapids-bot · Jan 25, 2024 · Oct 6, 2023 · Oct 6, 2023 · Oct 6, 2023
@@ -1280,11 +1280,11 @@ inline __device__ bool setupLocalPageInfo(page_state_s* const s,
           if (((s->col.data_type & 7) == BYTE_ARRAY) && (s->col.str_dict_index)) {
             // String dictionary: use index
             s->dict_base = reinterpret_cast<uint8_t const*>(s->col.str_dict_index);
-            s->dict_size = s->col.page_info[0].num_input_values * sizeof(string_index_pair);
+            s->dict_size = s->col.dict_page->num_input_values * sizeof(string_index_pair);
           } else {
             s->dict_base =
-              s->col.page_info[0].page_data;  // dictionary is always stored in the first page
-            s->dict_size = s->col.page_info[0].uncompressed_page_size;
+              s->col.dict_page->page_data;  // dictionary is always stored in the first page
+            s->dict_size = s->col.dict_page->uncompressed_page_size;
           }
           s->dict_run  = 0;
           s->dict_val  = 0;

@@ -342,8 +342,10 @@ struct gpuParsePageHeader {
  * @param[in] num_chunks Number of column chunks
  */
 // blockDim {128,1,1}
-__global__ void __launch_bounds__(128)
-  gpuDecodePageHeaders(ColumnChunkDesc* chunks, int32_t num_chunks, int32_t* error_code)
+__global__ void __launch_bounds__(128) gpuDecodePageHeaders(ColumnChunkDesc* chunks,
+                                                            chunk_page_info* chunk_pages,
+                                                            int32_t num_chunks,
+                                                            int32_t* error_code)
 {
   using cudf::detail::warp_size;
   gpuParsePageHeader parse_page_header;
@@ -382,11 +384,10 @@ __global__ void __launch_bounds__(128)
       bs->page.str_bytes           = 0;
       bs->page.kernel_mask         = 0;
     }
-    num_values     = bs->ck.num_values;
-    page_info      = bs->ck.page_info;
-    num_dict_pages = bs->ck.num_dict_pages;
-    max_num_pages  = (page_info) ? bs->ck.max_num_pages : 0;
-    values_found   = 0;
+    num_values    = bs->ck.num_values;
+    page_info     = chunk_pages ? chunk_pages[chunk].pages : nullptr;
+    max_num_pages = (page_info) ? bs->ck.max_num_pages : 0;
+    values_found  = 0;
     __syncwarp();
     while (values_found < num_values && bs->cur < bs->end) {
       int index_out = -1;
@@ -484,9 +485,9 @@ __global__ void __launch_bounds__(128)
   if (!lane_id && ck->num_dict_pages > 0 && ck->str_dict_index) {
     // Data type to describe a string
     string_index_pair* dict_index = ck->str_dict_index;
-    uint8_t const* dict           = ck->page_info[0].page_data;
-    int dict_size                 = ck->page_info[0].uncompressed_page_size;
-    int num_entries               = ck->page_info[0].num_input_values;
+    uint8_t const* dict           = ck->dict_page->page_data;
+    int dict_size                 = ck->dict_page->uncompressed_page_size;
+    int num_entries               = ck->dict_page->num_input_values;
     int pos = 0, cur = 0;
     for (int i = 0; i < num_entries; i++) {
       int len = 0;
@@ -507,13 +508,15 @@ __global__ void __launch_bounds__(128)
 }
 
 void __host__ DecodePageHeaders(ColumnChunkDesc* chunks,
+                                chunk_page_info* chunk_pages,
                                 int32_t num_chunks,
                                 int32_t* error_code,
                                 rmm::cuda_stream_view stream)
 {
   dim3 dim_block(128, 1);
   dim3 dim_grid((num_chunks + 3) >> 2, 1);  // 1 chunk per warp, 4 warps per block
-  gpuDecodePageHeaders<<<dim_grid, dim_block, 0, stream.value()>>>(chunks, num_chunks, error_code);
+  gpuDecodePageHeaders<<<dim_grid, dim_block, 0, stream.value()>>>(
+    chunks, chunk_pages, num_chunks, error_code);
 }
 
 void __host__ BuildStringDictionaryIndex(ColumnChunkDesc* chunks,

@@ -538,10 +538,10 @@ __global__ void __launch_bounds__(preprocess_block_size) gpuComputePageStringSiz
         if (col.str_dict_index) {
           // String dictionary: use index
           dict_base = reinterpret_cast<const uint8_t*>(col.str_dict_index);
-          dict_size = col.page_info[0].num_input_values * sizeof(string_index_pair);
+          dict_size = col.dict_page->num_input_values * sizeof(string_index_pair);
         } else {
-          dict_base = col.page_info[0].page_data;  // dictionary is always stored in the first page
-          dict_size = col.page_info[0].uncompressed_page_size;
+          dict_base = col.dict_page->page_data;  // dictionary is always stored in the first page
+          dict_size = col.dict_page->uncompressed_page_size;
         }
 
         // FIXME: need to return an error condition...this won't actually do anything

@@ -294,6 +294,21 @@ struct PageInfo {
   uint32_t kernel_mask;
 };
 
+/**
+ * @brief Return the column schema id as the key for a PageInfo struct.
+ */
+struct get_page_key {
+  __device__ int32_t operator()(PageInfo const& page) const { return page.src_col_schema; }
+};
+
+/**
+ * @brief Return and iterator that returns they keys for a vector of pages.
+ */
+inline auto make_page_key_iterator(cudf::detail::hostdevice_vector<PageInfo> const& pages)
+{
+  return thrust::make_transform_iterator(pages.d_begin(), get_page_key{});
+}
+
 /**
  * @brief Struct describing a particular chunk of column data
  */
@@ -330,7 +345,7 @@ struct ColumnChunkDesc {
       num_data_pages(0),
       num_dict_pages(0),
       max_num_pages(0),
-      page_info(nullptr),
+      dict_page(nullptr),
       str_dict_index(nullptr),
       valid_map_base{nullptr},
       column_data_base{nullptr},
@@ -345,22 +360,21 @@ struct ColumnChunkDesc {
   {
   }
 
-  uint8_t const* compressed_data{};                  // pointer to compressed column chunk data
-  size_t compressed_size{};                          // total compressed data size for this chunk
-  size_t num_values{};                               // total number of values in this column
-  size_t start_row{};                                // starting row of this chunk
-  uint32_t num_rows{};                               // number of rows in this chunk
+  uint8_t const* compressed_data{};  // pointer to compressed column chunk data
+  size_t compressed_size{};          // total compressed data size for this chunk
+  size_t num_values{};               // total number of values in this column
+  size_t start_row{};                // file-wide, absolute starting row of this chunk
+  uint32_t num_rows{};               // number of rows in this chunk
   int16_t max_level[level_type::NUM_LEVEL_TYPES]{};  // max definition/repetition level
   int16_t max_nesting_depth{};                       // max nesting depth of the output
   uint16_t data_type{};  // basic column data type, ((type_length << 3) |
                          // parquet::Type)
   uint8_t
-    level_bits[level_type::NUM_LEVEL_TYPES]{};   // bits to encode max definition/repetition levels
-  int32_t num_data_pages{};                      // number of data pages
-  int32_t num_dict_pages{};                      // number of dictionary pages
-  int32_t max_num_pages{};                       // size of page_info array
-  PageInfo* page_info{};                         // output page info for up to num_dict_pages +
-                                                 // num_data_pages (dictionary pages first)
+    level_bits[level_type::NUM_LEVEL_TYPES]{};  // bits to encode max definition/repetition levels
+  int32_t num_data_pages{};                     // number of data pages
+  int32_t num_dict_pages{};                     // number of dictionary pages
+  int32_t max_num_pages{};                      // size of page_info array
+  PageInfo* dict_page{};
   string_index_pair* str_dict_index{};           // index for string dictionary
   bitmask_type** valid_map_base{};               // base pointers of valid bit map for this column
   void** column_data_base{};                     // base pointers of column data
@@ -375,6 +389,13 @@ struct ColumnChunkDesc {
   int32_t src_col_schema{};  // my schema index in the file
 };
 
+/**
+ * @brief A utility structure for use in decoding page headers.
+ */
+struct chunk_page_info {
+  PageInfo* pages;
+};
+
 /**
  * @brief Struct describing an encoder column
  */
@@ -520,11 +541,13 @@ constexpr bool is_string_col(ColumnChunkDesc const& chunk)
  * @brief Launches kernel for parsing the page headers in the column chunks
  *
  * @param[in] chunks List of column chunks
+ * @param[in] chunk_pages List of pages associated with the chunks, in chunk-sorted order
  * @param[in] num_chunks Number of column chunks
  * @param[out] error_code Error code for kernel failures
  * @param[in] stream CUDA stream to use
  */
 void DecodePageHeaders(ColumnChunkDesc* chunks,
+                       chunk_page_info* chunk_pages,
                        int32_t num_chunks,
                        int32_t* error_code,
                        rmm::cuda_stream_view stream);