diff --git a/cpp/.clang-format b/.clang-format
similarity index 100%
rename from cpp/.clang-format
rename to .clang-format
diff --git a/.gitattributes b/.gitattributes
index fbfe7434d50..ed8e5e1425a 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,4 +1,5 @@
 python/cudf/cudf/_version.py export-subst
+python/strings_udf/strings_udf/_version.py export-subst
 python/cudf_kafka/cudf_kafka/_version.py export-subst
 python/custreamz/custreamz/_version.py export-subst
 python/dask_cudf/dask_cudf/_version.py export-subst
diff --git a/.gitignore b/.gitignore
index 29df683e9ec..0d63c76bf9f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -35,6 +35,8 @@ python/cudf_kafka/*/_lib/**/*.cpp
 python/cudf_kafka/*/_lib/**/*.h
 python/custreamz/*/_lib/**/*.cpp
 python/custreamz/*/_lib/**/*.h
+python/strings_udf/strings_udf/_lib/*.cpp
+python/strings_udf/strings_udf/*.ptx
 .Python
 env/
 develop-eggs/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 08e35fb47b0..a1debd52720 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -9,7 +9,6 @@ repos:
                 # project can specify its own first/third-party packages.
                 args: ["--config-root=python/", "--resolve-all-configs"]
                 files: python/.*
-                exclude: (__init__.py|setup.py)$
                 types_or: [python, cython, pyi]
       - repo: https://github.com/psf/black
         rev: 22.3.0
diff --git a/build.sh b/build.sh
index eee3ee512fa..ac283d01fc9 100755
--- a/build.sh
+++ b/build.sh
@@ -17,7 +17,7 @@ ARGS=$*
 # script, and that this script resides in the repo dir!
 REPODIR=$(cd $(dirname $0); pwd)
 
-VALIDARGS="clean libcudf cudf cudfjar dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n -l --allgpuarch --disable_nvtx --opensource_nvcomp  --show_depr_warn --ptds -h --build_metrics --incl_cache_stats"
+VALIDARGS="clean libcudf cudf cudfjar dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz strings_udf -v -g -n -l --allgpuarch --disable_nvtx --opensource_nvcomp  --show_depr_warn --ptds -h --build_metrics --incl_cache_stats"
 HELP="$0 [clean] [libcudf] [cudf] [cudfjar] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [--cmake-args=\\\"<args>\\\"]
    clean                         - remove all existing build artifacts and configuration (start
                                    over)
@@ -335,6 +335,15 @@ if buildAll || hasArg cudf; then
     fi
 fi
 
+if buildAll || hasArg strings_udf; then
+
+    cd ${REPODIR}/python/strings_udf
+    python setup.py build_ext --inplace -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1}
+    if [[ ${INSTALL_TARGET} != "" ]]; then
+        python setup.py install --single-version-externally-managed --record=record.txt  -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1}
+    fi
+fi
+
 # Build and install the dask_cudf Python package
 if buildAll || hasArg dask_cudf; then
 
diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh
index 0cdea59b2de..514b2463685 100755
--- a/ci/cpu/build.sh
+++ b/ci/cpu/build.sh
@@ -82,6 +82,14 @@ fi
 if [ "$BUILD_LIBCUDF" == '1' ]; then
   gpuci_logger "Build conda pkg for libcudf"
   gpuci_conda_retry mambabuild --no-build-id --croot ${CONDA_BLD_DIR} conda/recipes/libcudf $CONDA_BUILD_ARGS
+
+  # BUILD_LIBCUDF == 1 means this job is being run on the cpu_build jobs
+  # that is where we must also build the strings_udf package
+  gpuci_logger "Build conda pkg for strings_udf (python 3.8)"
+  gpuci_conda_retry mambabuild --no-build-id --croot ${CONDA_BLD_DIR} conda/recipes/strings_udf $CONDA_BUILD_ARGS --python=3.8
+  gpuci_logger "Build conda pkg for strings_udf (python 3.9)"
+  gpuci_conda_retry mambabuild --no-build-id --croot ${CONDA_BLD_DIR} conda/recipes/strings_udf $CONDA_BUILD_ARGS --python=3.9
+
   mkdir -p ${CONDA_BLD_DIR}/libcudf/work
   cp -r ${CONDA_BLD_DIR}/work/* ${CONDA_BLD_DIR}/libcudf/work
   gpuci_logger "sccache stats"
@@ -110,6 +118,10 @@ if [ "$BUILD_CUDF" == '1' ]; then
 
   gpuci_logger "Build conda pkg for custreamz"
   gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/custreamz --python=$PYTHON $CONDA_BUILD_ARGS $CONDA_CHANNEL
+  
+  gpuci_logger "Build conda pkg for strings_udf"
+  gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/strings_udf --python=$PYTHON $CONDA_BUILD_ARGS $CONDA_CHANNEL
+
 fi
 ################################################################################
 # UPLOAD - Conda packages
diff --git a/ci/cpu/upload.sh b/ci/cpu/upload.sh
index 29f6265ec63..5fe35ef7466 100755
--- a/ci/cpu/upload.sh
+++ b/ci/cpu/upload.sh
@@ -33,6 +33,19 @@ if [[ "$BUILD_LIBCUDF" == "1" && "$UPLOAD_LIBCUDF" == "1" ]]; then
   export LIBCUDF_FILES=$(conda build --no-build-id --croot "${CONDA_BLD_DIR}" conda/recipes/libcudf --output)
   LIBCUDF_FILES=$(echo "$LIBCUDF_FILES" | sed 's/.*libcudf-example.*//') # skip libcudf-example pkg upload
   gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing --no-progress $LIBCUDF_FILES
+
+  # since strings_udf compiles libcudf code, we require it be built in the same environment as libcudf
+  # however since libcudf is agnostic to the python version that is present, we must vary it explicitly
+  # here if we want packages for both python 3.8 and 3.9
+  export STRINGS_UDF_FILE=$(conda build --croot "${CONDA_BLD_DIR}" conda/recipes/strings_udf --python=3.8 --output)
+  test -e ${STRINGS_UDF_FILE}
+  echo "Upload strings_udf (python 3.8): ${STRINGS_UDF_FILE}"
+  gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${STRINGS_UDF_FILE} --no-progress
+
+  export STRINGS_UDF_FILE=$(conda build --croot "${CONDA_BLD_DIR}" conda/recipes/strings_udf --python=3.9 --output)
+  test -e ${STRINGS_UDF_FILE}
+  echo "Upload strings_udf (python 3.9): ${STRINGS_UDF_FILE}"
+  gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${STRINGS_UDF_FILE} --no-progress
 fi
 
 if [[ "$BUILD_CUDF" == "1" && "$UPLOAD_CUDF" == "1" ]]; then
diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
index 60dc8e2ae33..b491399f630 100755
--- a/ci/gpu/build.sh
+++ b/ci/gpu/build.sh
@@ -81,7 +81,6 @@ gpuci_logger "Check conda environment"
 conda info
 conda config --show-sources
 conda list --show-channel-urls
-
 gpuci_logger "Check compiler versions"
 python --version
 
@@ -123,11 +122,11 @@ if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
     install_dask
 
     ################################################################################
-    # BUILD - Build libcudf, cuDF, libcudf_kafka, and dask_cudf from source
+    # BUILD - Build libcudf, cuDF, libcudf_kafka, dask_cudf, and strings_udf from source
     ################################################################################
 
     gpuci_logger "Build from source"
-    "$WORKSPACE/build.sh" clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka benchmarks tests --ptds
+    "$WORKSPACE/build.sh" clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka strings_udf benchmarks tests --ptds
 
     ################################################################################
     # TEST - Run GoogleTest
@@ -185,7 +184,11 @@ else
     gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/cudf_kafka --python=$PYTHON -c ${CONDA_ARTIFACT_PATH}
     gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/custreamz --python=$PYTHON -c ${CONDA_ARTIFACT_PATH}
 
-    gpuci_logger "Installing cudf, dask-cudf, cudf_kafka and custreamz"
+    # the CUDA component of strings_udf must be built on cuda 11.5 just like libcudf
+    # but because there is no separate python package, we must also build the python on the 11.5 jobs
+    # this means that at this point (on the GPU test jobs) the whole package is already built and has been
+    # copied by CI from the upstream 11.5 jobs into $CONDA_ARTIFACT_PATH
+    gpuci_logger "Installing cudf, dask-cudf, cudf_kafka, and custreamz"
     gpuci_mamba_retry install cudf dask-cudf cudf_kafka custreamz -c "${CONDA_BLD_DIR}" -c "${CONDA_ARTIFACT_PATH}"
 
     gpuci_logger "GoogleTests"
@@ -249,6 +252,8 @@ fi
 
 cd "$WORKSPACE/python/cudf/cudf"
 # It is essential to cd into $WORKSPACE/python/cudf/cudf as `pytest-xdist` + `coverage` seem to work only at this directory level.
+gpuci_logger "Check conda packages"
+conda list
 gpuci_logger "Python py.test for cuDF"
 py.test -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" --junitxml="$WORKSPACE/junit-cudf.xml" -v --cov-config="$WORKSPACE/python/cudf/.coveragerc" --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-coverage.xml" --cov-report term --dist=loadscope tests
 
@@ -260,6 +265,31 @@ cd "$WORKSPACE/python/custreamz"
 gpuci_logger "Python py.test for cuStreamz"
 py.test -n 8 --cache-clear --basetemp="$WORKSPACE/custreamz-cuda-tmp" --junitxml="$WORKSPACE/junit-custreamz.xml" -v --cov-config=.coveragerc --cov=custreamz --cov-report=xml:"$WORKSPACE/python/custreamz/custreamz-coverage.xml" --cov-report term custreamz
 
+gpuci_logger "Installing strings_udf"
+gpuci_mamba_retry install strings_udf -c "${CONDA_BLD_DIR}" -c "${CONDA_ARTIFACT_PATH}"
+
+cd "$WORKSPACE/python/strings_udf/strings_udf"
+gpuci_logger "Python py.test for strings_udf"
+
+# We do not want to exit with a nonzero exit code in the case where no
+# strings_udf tests are run because that will always happen when the local CUDA
+# version is not 11.5. We need to suppress the exit code because this script is
+# run with set -e and we're already setting a trap that we don't want to
+# override here.
+
+STRINGS_UDF_PYTEST_RETCODE=0
+py.test -n 8 --cache-clear --basetemp="$WORKSPACE/strings-udf-cuda-tmp" --junitxml="$WORKSPACE/junit-strings-udf.xml" -v --cov-config=.coveragerc --cov=strings_udf --cov-report=xml:"$WORKSPACE/python/strings_udf/strings-udf-coverage.xml" --cov-report term tests || STRINGS_UDF_PYTEST_RETCODE=$?
+
+if [ ${STRINGS_UDF_PYTEST_RETCODE} -eq 5 ]; then
+    echo "No strings UDF tests were run, but this script will continue to execute."
+elif [ ${STRINGS_UDF_PYTEST_RETCODE} -ne 0 ]; then
+    exit ${STRINGS_UDF_PYTEST_RETCODE}
+else
+    cd "$WORKSPACE/python/cudf/cudf"
+    gpuci_logger "Python py.test retest cuDF UDFs"
+    py.test tests/test_udf_masked_ops.py -n 8 --cache-clear
+fi
+
 # Run benchmarks with both cudf and pandas to ensure compatibility is maintained.
 # Benchmarks are run in DEBUG_ONLY mode, meaning that only small data sizes are used.
 # Therefore, these runs only verify that benchmarks are valid.
diff --git a/ci/gpu/test-notebooks.sh b/ci/gpu/test-notebooks.sh
index 1a5c2614000..36d093d0d28 100755
--- a/ci/gpu/test-notebooks.sh
+++ b/ci/gpu/test-notebooks.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 NOTEBOOKS_DIR="$WORKSPACE/notebooks"
 NBTEST="$WORKSPACE/ci/utils/nbtest.sh"
diff --git a/ci/local/build.sh b/ci/local/build.sh
index 345db967264..f6479cd76cc 100755
--- a/ci/local/build.sh
+++ b/ci/local/build.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 
 GIT_DESCRIBE_TAG=`git describe --tags`
 MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index 34783a414bd..8fad4e08c56 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -37,6 +37,9 @@ sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g'
 # Python update
 sed_runner 's/'"cudf_version .*)"'/'"cudf_version ${NEXT_FULL_TAG})"'/g' python/cudf/CMakeLists.txt
 
+# Strings UDF update
+sed_runner 's/'"strings_udf_version .*)"'/'"strings_udf_version ${NEXT_FULL_TAG})"'/g' python/strings_udf/CMakeLists.txt
+
 # cpp libcudf_kafka update
 sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/libcudf_kafka/CMakeLists.txt
 
diff --git a/ci/utils/nbtest.sh b/ci/utils/nbtest.sh
index 1b39f267c65..2a94e2d0695 100755
--- a/ci/utils/nbtest.sh
+++ b/ci/utils/nbtest.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 MAGIC_OVERRIDE_CODE="
 def my_run_line_magic(*args, **kwargs):
diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml
index 57dbfdf6d0a..69b9f6b24ba 100644
--- a/conda/environments/cudf_dev_cuda11.5.yml
+++ b/conda/environments/cudf_dev_cuda11.5.yml
@@ -21,7 +21,7 @@ dependencies:
   - python>=3.8,<3.10
   - numba>=0.56.2
   - numpy
-  - pandas>=1.0,<1.5.0dev0
+  - pandas>=1.0,<1.6.0dev0
   - pyarrow=9
   - fastavro>=0.22.9
   - python-snappy>=0.6.0
diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
index 65e97d3755c..9b8e379b25e 100644
--- a/conda/recipes/cudf/meta.yaml
+++ b/conda/recipes/cudf/meta.yaml
@@ -48,7 +48,7 @@ requirements:
     - protobuf>=3.20.1,<3.21.0a0
     - python
     - typing_extensions
-    - pandas >=1.0,<1.5.0dev0
+    - pandas >=1.0,<1.6.0dev0
     - cupy >=9.5.0,<12.0.0a0
     - numba >=0.56.2
     - numpy
diff --git a/conda/recipes/strings_udf/build.sh b/conda/recipes/strings_udf/build.sh
new file mode 100644
index 00000000000..2de1325347b
--- /dev/null
+++ b/conda/recipes/strings_udf/build.sh
@@ -0,0 +1,4 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+# This assumes the script is executed from the root of the repo directory
+./build.sh strings_udf
diff --git a/conda/recipes/strings_udf/conda_build_config.yaml b/conda/recipes/strings_udf/conda_build_config.yaml
new file mode 100644
index 00000000000..d9c3f21448f
--- /dev/null
+++ b/conda/recipes/strings_udf/conda_build_config.yaml
@@ -0,0 +1,14 @@
+c_compiler_version:
+  - 9
+
+cxx_compiler_version:
+  - 9
+
+sysroot_version:
+  - "2.17"
+
+cmake_version:
+  - ">=3.20.1,!=3.23.0"
+
+cuda_compiler:
+  - nvcc
diff --git a/conda/recipes/strings_udf/meta.yaml b/conda/recipes/strings_udf/meta.yaml
new file mode 100644
index 00000000000..e29fb55ce63
--- /dev/null
+++ b/conda/recipes/strings_udf/meta.yaml
@@ -0,0 +1,65 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
+{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
+{% set py_version=environ.get('CONDA_PY', 38) %}
+{% set cuda_version='.'.join(environ.get('CUDA', '11.5').split('.')[:2]) %}
+{% set cuda_major=cuda_version.split('.')[0] %}
+
+package:
+  name: strings_udf
+  version: {{ version }}
+
+source:
+  git_url: ../../..
+
+build:
+  number: {{ GIT_DESCRIBE_NUMBER }}
+  string: cuda_{{ cuda_major }}_py{{ py_version }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+  script_env:
+    - VERSION_SUFFIX
+    - PARALLEL_LEVEL
+  # libcudf's run_exports pinning is looser than we would like
+  ignore_run_exports:
+    - libcudf
+  ignore_run_exports_from:
+    - {{ compiler('cuda') }}
+
+requirements:
+  build:
+    - cmake {{ cmake_version }}
+    - {{ compiler('c') }}
+    - {{ compiler('cxx') }}
+    - {{ compiler('cuda') }} {{ cuda_version }}
+    - sysroot_{{ target_platform }} {{ sysroot_version }}
+  host:
+    - python
+    - cython >=0.29,<0.30
+    - scikit-build>=0.13.1
+    - setuptools
+    - numba >=0.54
+    - libcudf ={{ version }}
+    - cudf ={{ version }}
+    - cudatoolkit ={{ cuda_version }}
+  run:
+    - python
+    - typing_extensions
+    - numba >=0.54
+    - numpy
+    - libcudf ={{ version }}
+    - cudf ={{ version }}
+    - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}
+    - cachetools
+    - ptxcompiler  # [linux64]  # CUDA enhanced compatibility. See https://github.com/rapidsai/ptxcompiler
+test:                                   # [linux64]
+  requires:                             # [linux64]
+    - cudatoolkit {{ cuda_version }}.*  # [linux64]
+  imports:                              # [linux64]
+    - strings_udf                       # [linux64]
+
+about:
+  home: https://rapids.ai/
+  license: Apache-2.0
+  license_family: APACHE
+  license_file: LICENSE
+  summary: strings_udf library
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index ae33ad260d2..7efa186aede 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -328,6 +328,7 @@ add_library(
   src/io/csv/writer_impl.cu
   src/io/functions.cpp
   src/io/json/json_gpu.cu
+  src/io/json/json_tree.cu
   src/io/json/nested_json_gpu.cu
   src/io/json/reader_impl.cu
   src/io/json/experimental/read_json.cpp
diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index bbd51546668..d1ff177a25e 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -223,8 +223,10 @@ ConfigureBench(MERGE_BENCH merge/merge.cpp)
 ConfigureBench(NULLMASK_BENCH null_mask/set_null_mask.cpp)
 
 # ##################################################################################################
-# * parquet writer chunks benchmark ---------------------------------------------------------------
-ConfigureBench(PARQUET_WRITER_CHUNKS_BENCH io/parquet/parquet_writer_chunks.cpp)
+# * parquet writer benchmark ----------------------------------------------------------------------
+ConfigureNVBench(
+  PARQUET_WRITER_NVBENCH io/parquet/parquet_writer.cpp io/parquet/parquet_writer_chunks.cpp
+)
 
 # ##################################################################################################
 # * parquet reader benchmark ----------------------------------------------------------------------
@@ -238,11 +240,7 @@ ConfigureNVBench(ORC_READER_NVBENCH io/orc/orc_reader_input.cpp io/orc/orc_reade
 
 # ##################################################################################################
 # * csv reader benchmark --------------------------------------------------------------------------
-ConfigureBench(CSV_READER_BENCH io/csv/csv_reader.cpp)
-
-# ##################################################################################################
-# * parquet writer benchmark ----------------------------------------------------------------------
-ConfigureBench(PARQUET_WRITER_BENCH io/parquet/parquet_writer.cpp)
+ConfigureNVBench(CSV_READER_NVBENCH io/csv/csv_reader_input.cpp io/csv/csv_reader_options.cpp)
 
 # ##################################################################################################
 # * orc writer benchmark --------------------------------------------------------------------------
diff --git a/cpp/benchmarks/io/csv/csv_reader.cpp b/cpp/benchmarks/io/csv/csv_reader.cpp
deleted file mode 100644
index 17b4a342dea..00000000000
--- a/cpp/benchmarks/io/csv/csv_reader.cpp
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <benchmarks/common/generate_input.hpp>
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/io/cuio_common.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
-
-#include <cudf/io/csv.hpp>
-
-// to enable, run cmake with -DBUILD_BENCHMARKS=ON
-
-constexpr size_t data_size         = 256 << 20;
-constexpr cudf::size_type num_cols = 64;
-
-namespace cudf_io = cudf::io;
-
-class CsvRead : public cudf::benchmark {
-};
-
-void BM_csv_read_varying_input(benchmark::State& state)
-{
-  auto const data_types  = get_type_or_group(state.range(0));
-  auto const source_type = static_cast<io_type>(state.range(1));
-
-  auto const tbl =
-    create_random_table(cycle_dtypes(data_types, num_cols), table_size_bytes{data_size});
-  auto const view = tbl->view();
-
-  cuio_source_sink_pair source_sink(source_type);
-  cudf_io::csv_writer_options options =
-    cudf_io::csv_writer_options::builder(source_sink.make_sink_info(), view).include_header(true);
-  cudf_io::write_csv(options);
-
-  cudf_io::csv_reader_options const read_options =
-    cudf_io::csv_reader_options::builder(source_sink.make_source_info());
-
-  auto mem_stats_logger = cudf::memory_stats_logger();
-  for (auto _ : state) {
-    try_drop_l3_cache();
-    cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
-    cudf_io::read_csv(read_options);
-  }
-
-  state.SetBytesProcessed(data_size * state.iterations());
-  state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage();
-  state.counters["encoded_file_size"] = source_sink.size();
-}
-
-void BM_csv_read_varying_options(benchmark::State& state)
-{
-  auto const col_sel    = static_cast<column_selection>(state.range(0));
-  auto const row_sel    = static_cast<row_selection>(state.range(1));
-  auto const num_chunks = state.range(2);
-
-  auto const data_types =
-    dtypes_for_column_selection(get_type_or_group({int32_t(type_group_id::INTEGRAL),
-                                                   int32_t(type_group_id::FLOATING_POINT),
-                                                   int32_t(type_group_id::FIXED_POINT),
-                                                   int32_t(type_group_id::TIMESTAMP),
-                                                   int32_t(type_group_id::DURATION),
-                                                   int32_t(cudf::type_id::STRING)}),
-                                col_sel);
-  auto const cols_to_read = select_column_indexes(data_types.size(), col_sel);
-
-  auto const tbl  = create_random_table(data_types, table_size_bytes{data_size});
-  auto const view = tbl->view();
-
-  cuio_source_sink_pair source_sink(io_type::HOST_BUFFER);
-  cudf_io::csv_writer_options options =
-    cudf_io::csv_writer_options::builder(source_sink.make_sink_info(), view)
-      .include_header(true)
-      .line_terminator("\r\n");
-  cudf_io::write_csv(options);
-
-  cudf_io::csv_reader_options read_options =
-    cudf_io::csv_reader_options::builder(source_sink.make_source_info())
-      .use_cols_indexes(cols_to_read)
-      .thousands('\'')
-      .windowslinetermination(true)
-      .comment('#')
-      .prefix("BM_");
-
-  size_t const chunk_size             = source_sink.size() / num_chunks;
-  cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks;
-  auto mem_stats_logger               = cudf::memory_stats_logger();
-  for (auto _ : state) {
-    try_drop_l3_cache();
-    cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
-    for (int32_t chunk = 0; chunk < num_chunks; ++chunk) {
-      // only read the header in the first chunk
-      read_options.set_header(chunk == 0 ? 0 : -1);
-
-      auto const is_last_chunk = chunk == (num_chunks - 1);
-      switch (row_sel) {
-        case row_selection::ALL: break;
-        case row_selection::BYTE_RANGE:
-          read_options.set_byte_range_offset(chunk * chunk_size);
-          read_options.set_byte_range_size(chunk_size);
-          if (is_last_chunk) read_options.set_byte_range_size(0);
-          break;
-        case row_selection::NROWS:
-          read_options.set_skiprows(chunk * chunk_row_cnt);
-          read_options.set_nrows(chunk_row_cnt);
-          if (is_last_chunk) read_options.set_nrows(-1);
-          break;
-        case row_selection::SKIPFOOTER:
-          read_options.set_skiprows(chunk * chunk_row_cnt);
-          read_options.set_skipfooter(view.num_rows() - (chunk + 1) * chunk_row_cnt);
-          if (is_last_chunk) read_options.set_skipfooter(0);
-          break;
-        default: CUDF_FAIL("Unsupported row selection method");
-      }
-
-      cudf_io::read_csv(read_options);
-    }
-  }
-
-  auto const data_processed = data_size * cols_to_read.size() / view.num_columns();
-  state.SetBytesProcessed(data_processed * state.iterations());
-  state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage();
-  state.counters["encoded_file_size"] = source_sink.size();
-}
-
-#define CSV_RD_BM_INPUTS_DEFINE(name, type_or_group, src_type)       \
-  BENCHMARK_DEFINE_F(CsvRead, name)                                  \
-  (::benchmark::State & state) { BM_csv_read_varying_input(state); } \
-  BENCHMARK_REGISTER_F(CsvRead, name)                                \
-    ->Args({int32_t(type_or_group), src_type})                       \
-    ->Unit(benchmark::kMillisecond)                                  \
-    ->UseManualTime();
-
-RD_BENCHMARK_DEFINE_ALL_SOURCES(CSV_RD_BM_INPUTS_DEFINE, integral, type_group_id::INTEGRAL);
-RD_BENCHMARK_DEFINE_ALL_SOURCES(CSV_RD_BM_INPUTS_DEFINE, floats, type_group_id::FLOATING_POINT);
-RD_BENCHMARK_DEFINE_ALL_SOURCES(CSV_RD_BM_INPUTS_DEFINE, decimal, type_group_id::FIXED_POINT);
-RD_BENCHMARK_DEFINE_ALL_SOURCES(CSV_RD_BM_INPUTS_DEFINE, timestamps, type_group_id::TIMESTAMP);
-RD_BENCHMARK_DEFINE_ALL_SOURCES(CSV_RD_BM_INPUTS_DEFINE, durations, type_group_id::DURATION);
-RD_BENCHMARK_DEFINE_ALL_SOURCES(CSV_RD_BM_INPUTS_DEFINE, string, cudf::type_id::STRING);
-
-BENCHMARK_DEFINE_F(CsvRead, column_selection)
-(::benchmark::State& state) { BM_csv_read_varying_options(state); }
-BENCHMARK_REGISTER_F(CsvRead, column_selection)
-  ->ArgsProduct({{int32_t(column_selection::ALL),
-                  int32_t(column_selection::ALTERNATE),
-                  int32_t(column_selection::FIRST_HALF),
-                  int32_t(column_selection::SECOND_HALF)},
-                 {int32_t(row_selection::ALL)},
-                 {1}})
-  ->Unit(benchmark::kMillisecond)
-  ->UseManualTime();
-
-BENCHMARK_DEFINE_F(CsvRead, row_selection)
-(::benchmark::State& state) { BM_csv_read_varying_options(state); }
-BENCHMARK_REGISTER_F(CsvRead, row_selection)
-  ->ArgsProduct({{int32_t(column_selection::ALL)},
-                 {int32_t(row_selection::BYTE_RANGE),
-                  int32_t(row_selection::NROWS),
-                  int32_t(row_selection::SKIPFOOTER)},
-                 {1, 8}})
-  ->Unit(benchmark::kMillisecond)
-  ->UseManualTime();
diff --git a/cpp/benchmarks/io/csv/csv_reader_input.cpp b/cpp/benchmarks/io/csv/csv_reader_input.cpp
new file mode 100644
index 00000000000..4f895e13f1b
--- /dev/null
+++ b/cpp/benchmarks/io/csv/csv_reader_input.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/fixture/rmm_pool_raii.hpp>
+#include <benchmarks/io/cuio_common.hpp>
+#include <benchmarks/io/nvbench_helpers.hpp>
+
+#include <cudf/io/csv.hpp>
+#include <cudf/utilities/default_stream.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+constexpr size_t data_size         = 256 << 20;
+constexpr cudf::size_type num_cols = 64;
+
+template <typename DataType>
+void csv_read_common(DataType const& data_types,
+                     cudf::io::io_type const& source_type,
+                     nvbench::state& state)
+{
+  auto const tbl =
+    create_random_table(cycle_dtypes(data_types, num_cols), table_size_bytes{data_size});
+  auto const view = tbl->view();
+
+  cuio_source_sink_pair source_sink(source_type);
+  cudf::io::csv_writer_options options =
+    cudf::io::csv_writer_options::builder(source_sink.make_sink_info(), view).include_header(true);
+
+  cudf::io::write_csv(options);
+
+  cudf::io::csv_reader_options const read_options =
+    cudf::io::csv_reader_options::builder(source_sink.make_source_info());
+
+  auto const mem_stats_logger = cudf::memory_stats_logger();  // init stats logger
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer,
+             [&](nvbench::launch& launch, auto& timer) {
+               try_drop_l3_cache();  // Drop L3 cache for accurate measurement
+
+               timer.start();
+               cudf::io::read_csv(read_options);
+               timer.stop();
+             });
+
+  auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
+  state.add_element_count(static_cast<double>(data_size) / time, "bytes_per_second");
+  state.add_buffer_size(
+    mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
+  state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
+}
+
+template <data_type DataType>
+void BM_csv_read_input(nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>>)
+{
+  cudf::rmm_pool_raii rmm_pool;
+
+  auto const d_type      = get_type_or_group(static_cast<int32_t>(DataType));
+  auto const source_type = io_type::FILEPATH;
+
+  csv_read_common(d_type, source_type, state);
+}
+
+template <cudf::io::io_type IO>
+void BM_csv_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_type<IO>>)
+{
+  cudf::rmm_pool_raii rmm_pool;
+
+  auto const d_type      = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
+                                         static_cast<int32_t>(data_type::FLOAT),
+                                         static_cast<int32_t>(data_type::DECIMAL),
+                                         static_cast<int32_t>(data_type::TIMESTAMP),
+                                         static_cast<int32_t>(data_type::DURATION),
+                                         static_cast<int32_t>(data_type::STRING)});
+  auto const source_type = IO;
+
+  csv_read_common(d_type, source_type, state);
+}
+
+using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
+                                            data_type::FLOAT,
+                                            data_type::DECIMAL,
+                                            data_type::TIMESTAMP,
+                                            data_type::DURATION,
+                                            data_type::STRING>;
+
+using io_list =
+  nvbench::enum_type_list<cudf::io::io_type::FILEPATH, cudf::io::io_type::HOST_BUFFER>;
+
+NVBENCH_BENCH_TYPES(BM_csv_read_input, NVBENCH_TYPE_AXES(d_type_list))
+  .set_name("csv_read_data_type")
+  .set_type_axes_names({"data_type"})
+  .set_min_samples(4);
+
+NVBENCH_BENCH_TYPES(BM_csv_read_io, NVBENCH_TYPE_AXES(io_list))
+  .set_name("csv_read_io")
+  .set_type_axes_names({"io"})
+  .set_min_samples(4);
diff --git a/cpp/benchmarks/io/csv/csv_reader_options.cpp b/cpp/benchmarks/io/csv/csv_reader_options.cpp
new file mode 100644
index 00000000000..b569dc65f3d
--- /dev/null
+++ b/cpp/benchmarks/io/csv/csv_reader_options.cpp
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/fixture/rmm_pool_raii.hpp>
+#include <benchmarks/io/cuio_common.hpp>
+#include <benchmarks/io/nvbench_helpers.hpp>
+
+#include <cudf/io/csv.hpp>
+#include <cudf/utilities/default_stream.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+constexpr size_t data_size = 256 << 20;
+
+template <column_selection ColSelection, row_selection RowSelection>
+void BM_csv_read_varying_options(
+  nvbench::state& state,
+  nvbench::type_list<nvbench::enum_type<ColSelection>, nvbench::enum_type<RowSelection>>)
+{
+  cudf::rmm_pool_raii rmm_pool;
+
+  auto const data_types =
+    dtypes_for_column_selection(get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
+                                                   static_cast<int32_t>(data_type::FLOAT),
+                                                   static_cast<int32_t>(data_type::DECIMAL),
+                                                   static_cast<int32_t>(data_type::TIMESTAMP),
+                                                   static_cast<int32_t>(data_type::DURATION),
+                                                   static_cast<int32_t>(data_type::STRING)}),
+                                ColSelection);
+  auto const cols_to_read = select_column_indexes(data_types.size(), ColSelection);
+  auto const num_chunks   = state.get_int64("num_chunks");
+
+  auto const tbl  = create_random_table(data_types, table_size_bytes{data_size});
+  auto const view = tbl->view();
+
+  cuio_source_sink_pair source_sink(io_type::HOST_BUFFER);
+  cudf::io::csv_writer_options options =
+    cudf::io::csv_writer_options::builder(source_sink.make_sink_info(), view)
+      .include_header(true)
+      .line_terminator("\r\n");
+  cudf::io::write_csv(options);
+
+  cudf::io::csv_reader_options read_options =
+    cudf::io::csv_reader_options::builder(source_sink.make_source_info())
+      .use_cols_indexes(cols_to_read)
+      .thousands('\'')
+      .windowslinetermination(true)
+      .comment('#')
+      .prefix("BM_");
+
+  size_t const chunk_size             = source_sink.size() / num_chunks;
+  cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks;
+  auto const mem_stats_logger         = cudf::memory_stats_logger();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer,
+             [&](nvbench::launch& launch, auto& timer) {
+               try_drop_l3_cache();  // Drop L3 cache for accurate measurement
+
+               timer.start();
+               for (int32_t chunk = 0; chunk < num_chunks; ++chunk) {
+                 // only read the header in the first chunk
+                 read_options.set_header(chunk == 0 ? 0 : -1);
+
+                 auto const is_last_chunk = chunk == (num_chunks - 1);
+                 switch (RowSelection) {
+                   case row_selection::ALL: break;
+                   case row_selection::BYTE_RANGE:
+                     read_options.set_byte_range_offset(chunk * chunk_size);
+                     read_options.set_byte_range_size(chunk_size);
+                     if (is_last_chunk) read_options.set_byte_range_size(0);
+                     break;
+                   case row_selection::NROWS:
+                     read_options.set_skiprows(chunk * chunk_row_cnt);
+                     read_options.set_nrows(chunk_row_cnt);
+                     if (is_last_chunk) read_options.set_nrows(-1);
+                     break;
+                   case row_selection::SKIPFOOTER:
+                     read_options.set_skiprows(chunk * chunk_row_cnt);
+                     read_options.set_skipfooter(view.num_rows() - (chunk + 1) * chunk_row_cnt);
+                     if (is_last_chunk) read_options.set_skipfooter(0);
+                     break;
+                   default: CUDF_FAIL("Unsupported row selection method");
+                 }
+
+                 cudf::io::read_csv(read_options);
+               }
+               timer.stop();
+             });
+
+  auto const elapsed_time   = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
+  auto const data_processed = data_size * cols_to_read.size() / view.num_columns();
+  state.add_element_count(static_cast<double>(data_processed) / elapsed_time, "bytes_per_second");
+  state.add_buffer_size(
+    mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
+  state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
+}
+
+using col_selections = nvbench::enum_type_list<column_selection::ALL,
+                                               column_selection::ALTERNATE,
+                                               column_selection::FIRST_HALF,
+                                               column_selection::SECOND_HALF>;
+
+using row_selections = nvbench::
+  enum_type_list<row_selection::BYTE_RANGE, row_selection::NROWS, row_selection::SKIPFOOTER>;
+
+NVBENCH_BENCH_TYPES(BM_csv_read_varying_options,
+                    NVBENCH_TYPE_AXES(col_selections, nvbench::enum_type_list<row_selection::ALL>))
+  .set_name("csv_read_column_selection")
+  .set_type_axes_names({"column_selection", "row_selection"})
+  .set_min_samples(4)
+  .add_int64_axis("num_chunks", {1});
+
+NVBENCH_BENCH_TYPES(BM_csv_read_varying_options,
+                    NVBENCH_TYPE_AXES(nvbench::enum_type_list<column_selection::ALL>,
+                                      row_selections))
+  .set_name("csv_read_row_selection")
+  .set_type_axes_names({"column_selection", "row_selection"})
+  .set_min_samples(4)
+  .add_int64_axis("num_chunks", {1, 8});
diff --git a/cpp/benchmarks/io/json/nested_json.cpp b/cpp/benchmarks/io/json/nested_json.cpp
index 1e84d0ee7c5..e2d4c3b77d8 100644
--- a/cpp/benchmarks/io/json/nested_json.cpp
+++ b/cpp/benchmarks/io/json/nested_json.cpp
@@ -15,6 +15,7 @@
  */
 
 #include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/fixture/rmm_pool_raii.hpp>
 
 #include <nvbench/nvbench.cuh>
@@ -71,15 +72,21 @@ void BM_NESTED_JSON(nvbench::state& state)
   state.add_element_count(input.size());
 
   // Run algorithm
+  auto const mem_stats_logger = cudf::memory_stats_logger();
   state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
     // Allocate device-side temporary storage & run algorithm
     cudf::io::json::detail::parse_nested_json(input, default_options, cudf::default_stream_value);
   });
+
+  auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
+  state.add_element_count(static_cast<double>(string_size) / time, "bytes_per_second");
+  state.add_buffer_size(
+    mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
 }
 
 NVBENCH_BENCH(BM_NESTED_JSON)
   .set_name("nested_json_gpu_parser")
-  .add_int64_power_of_two_axis("string_size", nvbench::range(20, 31, 1));
+  .add_int64_power_of_two_axis("string_size", nvbench::range(20, 30, 1));
 
 }  // namespace cudf
diff --git a/cpp/benchmarks/io/nvbench_helpers.hpp b/cpp/benchmarks/io/nvbench_helpers.hpp
index 6c52c9a91cb..3ebef4fe0bd 100644
--- a/cpp/benchmarks/io/nvbench_helpers.hpp
+++ b/cpp/benchmarks/io/nvbench_helpers.hpp
@@ -122,7 +122,11 @@ NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
   [](auto value) {
     switch (value) {
       case row_selection::ALL: return "ALL";
+      case row_selection::BYTE_RANGE: return "BYTE_RANGE";
       case row_selection::NROWS: return "NROWS";
+      case row_selection::SKIPFOOTER: return "SKIPFOOTER";
+      case row_selection::STRIPES: return "STRIPES";
+      case row_selection::ROW_GROUPS: return "ROW_GROUPS";
       default: return "Unknown";
     }
   },
@@ -139,18 +143,6 @@ NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
   },
   [](auto) { return std::string{}; })
 
-NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
-  cudf::io::statistics_freq,
-  [](auto value) {
-    switch (value) {
-      case cudf::io::statistics_freq::STATISTICS_NONE: return "STATISTICS_NONE";
-      case cudf::io::statistics_freq::STATISTICS_ROWGROUP: return "ORC_STATISTICS_STRIPE";
-      case cudf::io::statistics_freq::STATISTICS_PAGE: return "ORC_STATISTICS_ROW_GROUP";
-      default: return "Unknown";
-    }
-  },
-  [](auto) { return std::string{}; })
-
 enum class converts_strings : bool { YES, NO };
 
 enum class uses_pandas_metadata : bool { YES, NO };
diff --git a/cpp/benchmarks/io/orc/orc_writer.cpp b/cpp/benchmarks/io/orc/orc_writer.cpp
index 73f6812ac8e..ddf699b0eaa 100644
--- a/cpp/benchmarks/io/orc/orc_writer.cpp
+++ b/cpp/benchmarks/io/orc/orc_writer.cpp
@@ -26,6 +26,18 @@
 
 #include <nvbench/nvbench.cuh>
 
+NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
+  cudf::io::statistics_freq,
+  [](auto value) {
+    switch (value) {
+      case cudf::io::statistics_freq::STATISTICS_NONE: return "STATISTICS_NONE";
+      case cudf::io::statistics_freq::STATISTICS_ROWGROUP: return "ORC_STATISTICS_STRIPE";
+      case cudf::io::statistics_freq::STATISTICS_PAGE: return "ORC_STATISTICS_ROW_GROUP";
+      default: return "Unknown";
+    }
+  },
+  [](auto) { return std::string{}; })
+
 constexpr int64_t data_size        = 512 << 20;
 constexpr cudf::size_type num_cols = 64;
 
@@ -38,7 +50,7 @@ void BM_orc_write_encode(nvbench::state& state, nvbench::type_list<nvbench::enum
   cudf::size_type const cardinality = state.get_int64("cardinality");
   cudf::size_type const run_length  = state.get_int64("run_length");
   auto const compression            = cudf::io::compression_type::SNAPPY;
-  auto const sink_type              = io_type::HOST_BUFFER;
+  auto const sink_type              = io_type::VOID;
 
   auto const tbl =
     create_random_table(cycle_dtypes(d_type, num_cols),
diff --git a/cpp/benchmarks/io/parquet/parquet_writer.cpp b/cpp/benchmarks/io/parquet/parquet_writer.cpp
index 9edfe8ce938..747dd5c086c 100644
--- a/cpp/benchmarks/io/parquet/parquet_writer.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_writer.cpp
@@ -16,29 +16,94 @@
 
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/fixture/rmm_pool_raii.hpp>
 #include <benchmarks/io/cuio_common.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
+#include <benchmarks/io/nvbench_helpers.hpp>
 
 #include <cudf/io/parquet.hpp>
+#include <cudf/utilities/default_stream.hpp>
+
+#include <nvbench/nvbench.cuh>
 
 // to enable, run cmake with -DBUILD_BENCHMARKS=ON
 
+NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
+  cudf::io::statistics_freq,
+  [](auto value) {
+    switch (value) {
+      case cudf::io::statistics_freq::STATISTICS_NONE: return "STATISTICS_NONE";
+      case cudf::io::statistics_freq::STATISTICS_ROWGROUP: return "STATISTICS_ROWGROUP";
+      case cudf::io::statistics_freq::STATISTICS_PAGE: return "STATISTICS_PAGE";
+      default: return "Unknown";
+    }
+  },
+  [](auto) { return std::string{}; })
+
 constexpr size_t data_size         = 512 << 20;
 constexpr cudf::size_type num_cols = 64;
 
-namespace cudf_io = cudf::io;
+template <data_type DataType>
+void BM_parq_write_encode(nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>>)
+{
+  cudf::rmm_pool_raii rmm_pool;
 
-class ParquetWrite : public cudf::benchmark {
-};
+  auto const data_types             = get_type_or_group(static_cast<int32_t>(DataType));
+  cudf::size_type const cardinality = state.get_int64("cardinality");
+  cudf::size_type const run_length  = state.get_int64("run_length");
+  auto const compression            = cudf::io::compression_type::SNAPPY;
+  auto const sink_type              = io_type::VOID;
 
-void BM_parq_write_varying_inout(benchmark::State& state)
+  auto const tbl =
+    create_random_table(cycle_dtypes(data_types, num_cols),
+                        table_size_bytes{data_size},
+                        data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
+  auto const view = tbl->view();
+
+  std::size_t encoded_file_size = 0;
+
+  auto const mem_stats_logger = cudf::memory_stats_logger();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync,
+             [&](nvbench::launch& launch, auto& timer) {
+               cuio_source_sink_pair source_sink(sink_type);
+
+               timer.start();
+               cudf::io::parquet_writer_options opts =
+                 cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
+                   .compression(compression);
+               cudf::io::write_parquet(opts);
+               timer.stop();
+
+               encoded_file_size = source_sink.size();
+             });
+
+  auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
+  state.add_element_count(static_cast<double>(data_size) / time, "bytes_per_second");
+  state.add_buffer_size(
+    mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
+  state.add_buffer_size(encoded_file_size, "encoded_file_size", "encoded_file_size");
+}
+
+template <cudf::io::io_type IO, cudf::io::compression_type Compression>
+void BM_parq_write_io_compression(
+  nvbench::state& state,
+  nvbench::type_list<nvbench::enum_type<IO>, nvbench::enum_type<Compression>>)
 {
-  auto const data_types             = get_type_or_group(state.range(0));
-  cudf::size_type const cardinality = state.range(1);
-  cudf::size_type const run_length  = state.range(2);
-  cudf_io::compression_type const compression =
-    state.range(3) ? cudf_io::compression_type::SNAPPY : cudf_io::compression_type::NONE;
-  auto const sink_type = static_cast<io_type>(state.range(4));
+  cudf::rmm_pool_raii rmm_pool;
+
+  auto const data_types = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
+                                             static_cast<int32_t>(data_type::FLOAT),
+                                             static_cast<int32_t>(data_type::DECIMAL),
+                                             static_cast<int32_t>(data_type::TIMESTAMP),
+                                             static_cast<int32_t>(data_type::DURATION),
+                                             static_cast<int32_t>(data_type::STRING),
+                                             static_cast<int32_t>(data_type::LIST),
+                                             static_cast<int32_t>(data_type::STRUCT)});
+
+  cudf::size_type const cardinality = state.get_int64("cardinality");
+  cudf::size_type const run_length  = state.get_int64("run_length");
+  auto const compression            = Compression;
+  auto const sink_type              = IO;
 
   auto const tbl =
     create_random_table(cycle_dtypes(data_types, num_cols),
@@ -46,80 +111,114 @@ void BM_parq_write_varying_inout(benchmark::State& state)
                         data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
   auto const view = tbl->view();
 
-  cuio_source_sink_pair source_sink(sink_type);
-  auto mem_stats_logger = cudf::memory_stats_logger();
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
-    cudf_io::parquet_writer_options opts =
-      cudf_io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
-        .compression(compression);
-    cudf_io::write_parquet(opts);
-  }
-
-  state.SetBytesProcessed(data_size * state.iterations());
-  state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage();
-  state.counters["encoded_file_size"] = source_sink.size();
+  std::size_t encoded_file_size = 0;
+
+  auto const mem_stats_logger = cudf::memory_stats_logger();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync,
+             [&](nvbench::launch& launch, auto& timer) {
+               cuio_source_sink_pair source_sink(sink_type);
+
+               timer.start();
+               cudf::io::parquet_writer_options opts =
+                 cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
+                   .compression(compression);
+               cudf::io::write_parquet(opts);
+               timer.stop();
+
+               encoded_file_size = source_sink.size();
+             });
+
+  auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
+  state.add_element_count(static_cast<double>(data_size) / time, "bytes_per_second");
+  state.add_buffer_size(
+    mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
+  state.add_buffer_size(encoded_file_size, "encoded_file_size", "encoded_file_size");
 }
 
-void BM_parq_write_varying_options(benchmark::State& state)
+template <cudf::io::statistics_freq Statistics, cudf::io::compression_type Compression>
+void BM_parq_write_varying_options(
+  nvbench::state& state,
+  nvbench::type_list<nvbench::enum_type<Statistics>, nvbench::enum_type<Compression>>)
 {
-  auto const compression  = static_cast<cudf::io::compression_type>(state.range(0));
-  auto const enable_stats = static_cast<cudf::io::statistics_freq>(state.range(1));
-  auto const file_path    = state.range(2) != 0 ? "unused_path.parquet" : "";
-
-  auto const data_types = get_type_or_group({int32_t(type_group_id::INTEGRAL_SIGNED),
-                                             int32_t(type_group_id::FLOATING_POINT),
-                                             int32_t(type_group_id::FIXED_POINT),
-                                             int32_t(type_group_id::TIMESTAMP),
-                                             int32_t(type_group_id::DURATION),
-                                             int32_t(cudf::type_id::STRING),
-                                             int32_t(cudf::type_id::LIST)});
+  auto const enable_stats = Statistics;
+  auto const compression  = Compression;
+  auto const file_path    = state.get_string("file_path");
+
+  auto const data_types = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL_SIGNED),
+                                             static_cast<int32_t>(data_type::FLOAT),
+                                             static_cast<int32_t>(data_type::DECIMAL),
+                                             static_cast<int32_t>(data_type::TIMESTAMP),
+                                             static_cast<int32_t>(data_type::DURATION),
+                                             static_cast<int32_t>(data_type::STRING),
+                                             static_cast<int32_t>(data_type::LIST)});
 
   auto const tbl  = create_random_table(data_types, table_size_bytes{data_size});
   auto const view = tbl->view();
 
-  cuio_source_sink_pair source_sink(io_type::FILEPATH);
+  std::size_t encoded_file_size = 0;
+
   auto mem_stats_logger = cudf::memory_stats_logger();
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
-    cudf_io::parquet_writer_options const options =
-      cudf_io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
-        .compression(compression)
-        .stats_level(enable_stats)
-        .column_chunks_file_paths({file_path});
-    cudf_io::write_parquet(options);
-  }
-
-  state.SetBytesProcessed(data_size * state.iterations());
-  state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage();
-  state.counters["encoded_file_size"] = source_sink.size();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync,
+             [&](nvbench::launch& launch, auto& timer) {
+               cuio_source_sink_pair source_sink(io_type::FILEPATH);
+
+               timer.start();
+               cudf::io::parquet_writer_options const options =
+                 cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
+                   .compression(compression)
+                   .stats_level(enable_stats)
+                   .column_chunks_file_paths({file_path});
+               cudf::io::write_parquet(options);
+               timer.stop();
+
+               encoded_file_size = source_sink.size();
+             });
+
+  auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
+  state.add_element_count(static_cast<double>(data_size) / time, "bytes_per_second");
+  state.add_buffer_size(
+    mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
+  state.add_buffer_size(encoded_file_size, "encoded_file_size", "encoded_file_size");
 }
 
-#define PARQ_WR_BM_INOUTS_DEFINE(name, type_or_group, sink_type)                              \
-  BENCHMARK_DEFINE_F(ParquetWrite, name)                                                      \
-  (::benchmark::State & state) { BM_parq_write_varying_inout(state); }                        \
-  BENCHMARK_REGISTER_F(ParquetWrite, name)                                                    \
-    ->ArgsProduct({{int32_t(type_or_group)}, {0, 1000}, {1, 32}, {true, false}, {sink_type}}) \
-    ->Unit(benchmark::kMillisecond)                                                           \
-    ->UseManualTime();
-
-WR_BENCHMARK_DEFINE_ALL_SINKS(PARQ_WR_BM_INOUTS_DEFINE, integral, type_group_id::INTEGRAL);
-WR_BENCHMARK_DEFINE_ALL_SINKS(PARQ_WR_BM_INOUTS_DEFINE, floats, type_group_id::FLOATING_POINT);
-WR_BENCHMARK_DEFINE_ALL_SINKS(PARQ_WR_BM_INOUTS_DEFINE, decimal, type_group_id::FIXED_POINT);
-WR_BENCHMARK_DEFINE_ALL_SINKS(PARQ_WR_BM_INOUTS_DEFINE, timestamps, type_group_id::TIMESTAMP);
-WR_BENCHMARK_DEFINE_ALL_SINKS(PARQ_WR_BM_INOUTS_DEFINE, durations, type_group_id::DURATION);
-WR_BENCHMARK_DEFINE_ALL_SINKS(PARQ_WR_BM_INOUTS_DEFINE, string, cudf::type_id::STRING);
-WR_BENCHMARK_DEFINE_ALL_SINKS(PARQ_WR_BM_INOUTS_DEFINE, list, cudf::type_id::LIST);
-WR_BENCHMARK_DEFINE_ALL_SINKS(PARQ_WR_BM_INOUTS_DEFINE, struct, cudf::type_id::STRUCT);
-
-BENCHMARK_DEFINE_F(ParquetWrite, writer_options)
-(::benchmark::State& state) { BM_parq_write_varying_options(state); }
-BENCHMARK_REGISTER_F(ParquetWrite, writer_options)
-  ->ArgsProduct({{int32_t(cudf::io::compression_type::NONE),
-                  int32_t(cudf::io::compression_type::SNAPPY)},
-                 {int32_t(cudf::io::statistics_freq::STATISTICS_NONE),
-                  int32_t(cudf::io::statistics_freq::STATISTICS_ROWGROUP),
-                  int32_t(cudf::io::statistics_freq::STATISTICS_PAGE)},
-                 {false, true}})
-  ->Unit(benchmark::kMillisecond)
-  ->UseManualTime();
+using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
+                                            data_type::FLOAT,
+                                            data_type::DECIMAL,
+                                            data_type::TIMESTAMP,
+                                            data_type::DURATION,
+                                            data_type::STRING,
+                                            data_type::LIST,
+                                            data_type::STRUCT>;
+
+using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
+                                        cudf::io::io_type::HOST_BUFFER,
+                                        cudf::io::io_type::VOID>;
+
+using compression_list =
+  nvbench::enum_type_list<cudf::io::compression_type::SNAPPY, cudf::io::compression_type::NONE>;
+
+using stats_list = nvbench::enum_type_list<cudf::io::STATISTICS_NONE,
+                                           cudf::io::STATISTICS_ROWGROUP,
+                                           cudf::io::STATISTICS_PAGE>;
+
+NVBENCH_BENCH_TYPES(BM_parq_write_encode, NVBENCH_TYPE_AXES(d_type_list))
+  .set_name("parquet_write_encode")
+  .set_type_axes_names({"data_type"})
+  .set_min_samples(4)
+  .add_int64_axis("cardinality", {0, 1000})
+  .add_int64_axis("run_length", {1, 32});
+
+NVBENCH_BENCH_TYPES(BM_parq_write_io_compression, NVBENCH_TYPE_AXES(io_list, compression_list))
+  .set_name("parquet_write_io_compression")
+  .set_type_axes_names({"io", "compression"})
+  .set_min_samples(4)
+  .add_int64_axis("cardinality", {0, 1000})
+  .add_int64_axis("run_length", {1, 32});
+
+NVBENCH_BENCH_TYPES(BM_parq_write_varying_options, NVBENCH_TYPE_AXES(stats_list, compression_list))
+  .set_name("parquet_write_options")
+  .set_type_axes_names({"statistics", "compression"})
+  .set_min_samples(4)
+  .add_string_axis("file_path", {"unused_path.parquet", ""});
diff --git a/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp b/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp
index e22696b9c01..6c8500a2a70 100644
--- a/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp
@@ -16,50 +16,61 @@
 
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/fixture/rmm_pool_raii.hpp>
 #include <benchmarks/io/cuio_common.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
+#include <benchmarks/io/nvbench_helpers.hpp>
 
 #include <cudf/column/column.hpp>
 #include <cudf/io/parquet.hpp>
 #include <cudf/table/table.hpp>
+#include <cudf/utilities/default_stream.hpp>
+
+#include <nvbench/nvbench.cuh>
 
 // to enable, run cmake with -DBUILD_BENCHMARKS=ON
 
 constexpr int64_t data_size = 512 << 20;
 
-namespace cudf_io = cudf::io;
+void PQ_write(nvbench::state& state)
+{
+  cudf::rmm_pool_raii rmm_pool;
 
-class ParquetWrite : public cudf::benchmark {
-};
-class ParquetWriteChunked : public cudf::benchmark {
-};
+  cudf::size_type const num_cols = state.get_int64("num_cols");
 
-void PQ_write(benchmark::State& state)
-{
-  cudf::size_type num_cols = state.range(0);
-
-  auto tbl              = create_random_table(cycle_dtypes({cudf::type_id::INT32}, num_cols),
-                                 table_size_bytes{data_size});
-  cudf::table_view view = tbl->view();
-
-  auto mem_stats_logger = cudf::memory_stats_logger();
-  cuio_source_sink_pair source_sink(io_type::VOID);
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
-    cudf_io::parquet_writer_options opts =
-      cudf_io::parquet_writer_options::builder(source_sink.make_sink_info(), view);
-    cudf_io::write_parquet(opts);
-  }
+  auto const tbl  = create_random_table(cycle_dtypes({cudf::type_id::INT32}, num_cols),
+                                       table_size_bytes{data_size});
+  auto const view = tbl->view();
+
+  std::size_t encoded_file_size = 0;
+  auto const mem_stats_logger   = cudf::memory_stats_logger();
+
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync,
+             [&](nvbench::launch& launch, auto& timer) {
+               cuio_source_sink_pair source_sink(io_type::VOID);
 
-  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * state.range(0));
-  state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage();
-  state.counters["encoded_file_size"] = source_sink.size();
+               timer.start();
+               cudf::io::parquet_writer_options opts =
+                 cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view);
+               cudf::io::write_parquet(opts);
+               timer.stop();
+
+               encoded_file_size = source_sink.size();
+             });
+
+  auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
+  state.add_element_count(static_cast<double>(data_size) / time, "bytes_per_second");
+  state.add_buffer_size(
+    mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
+  state.add_buffer_size(encoded_file_size, "encoded_file_size", "encoded_file_size");
 }
 
-void PQ_write_chunked(benchmark::State& state)
+void PQ_write_chunked(nvbench::state& state)
 {
-  cudf::size_type num_cols   = state.range(0);
-  cudf::size_type num_tables = state.range(1);
+  cudf::rmm_pool_raii rmm_pool;
+
+  cudf::size_type const num_cols   = state.get_int64("num_cols");
+  cudf::size_type const num_tables = state.get_int64("num_chunks");
 
   std::vector<std::unique_ptr<cudf::table>> tables;
   for (cudf::size_type idx = 0; idx < num_tables; idx++) {
@@ -67,47 +78,41 @@ void PQ_write_chunked(benchmark::State& state)
                                          table_size_bytes{size_t(data_size / num_tables)}));
   }
 
-  auto mem_stats_logger = cudf::memory_stats_logger();
-  cuio_source_sink_pair source_sink(io_type::VOID);
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
-    cudf_io::chunked_parquet_writer_options opts =
-      cudf_io::chunked_parquet_writer_options::builder(source_sink.make_sink_info());
-    cudf_io::parquet_chunked_writer writer(opts);
-    std::for_each(tables.begin(), tables.end(), [&writer](std::unique_ptr<cudf::table> const& tbl) {
-      writer.write(*tbl);
+  auto const mem_stats_logger   = cudf::memory_stats_logger();
+  std::size_t encoded_file_size = 0;
+
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.exec(
+    nvbench::exec_tag::timer | nvbench::exec_tag::sync, [&](nvbench::launch& launch, auto& timer) {
+      cuio_source_sink_pair source_sink(io_type::VOID);
+
+      timer.start();
+      cudf::io::chunked_parquet_writer_options opts =
+        cudf::io::chunked_parquet_writer_options::builder(source_sink.make_sink_info());
+      cudf::io::parquet_chunked_writer writer(opts);
+      std::for_each(tables.begin(),
+                    tables.end(),
+                    [&writer](std::unique_ptr<cudf::table> const& tbl) { writer.write(*tbl); });
+      writer.close();
+      timer.stop();
+
+      encoded_file_size = source_sink.size();
     });
-    writer.close();
-  }
 
-  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * state.range(0));
-  state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage();
-  state.counters["encoded_file_size"] = source_sink.size();
+  auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
+  state.add_element_count(static_cast<double>(data_size) / time, "bytes_per_second");
+  state.add_buffer_size(
+    mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
+  state.add_buffer_size(encoded_file_size, "encoded_file_size", "encoded_file_size");
 }
 
-#define PWBM_BENCHMARK_DEFINE(name, size, num_columns)                                    \
-  BENCHMARK_DEFINE_F(ParquetWrite, name)(::benchmark::State & state) { PQ_write(state); } \
-  BENCHMARK_REGISTER_F(ParquetWrite, name)                                                \
-    ->Args({num_columns})                                                                 \
-    ->Unit(benchmark::kMillisecond)                                                       \
-    ->UseManualTime()
-
-PWBM_BENCHMARK_DEFINE(3Gb8Cols, data_size, 8);
-PWBM_BENCHMARK_DEFINE(3Gb1024Cols, data_size, 1024);
-
-#define PWCBM_BENCHMARK_DEFINE(name, num_columns, num_chunks)               \
-  BENCHMARK_DEFINE_F(ParquetWriteChunked, name)(::benchmark::State & state) \
-  {                                                                         \
-    PQ_write_chunked(state);                                                \
-  }                                                                         \
-  BENCHMARK_REGISTER_F(ParquetWriteChunked, name)                           \
-    ->Args({num_columns, num_chunks})                                       \
-    ->Unit(benchmark::kMillisecond)                                         \
-    ->UseManualTime()                                                       \
-    ->Iterations(4)
-
-PWCBM_BENCHMARK_DEFINE(3Gb8Cols64Chunks, 8, 8);
-PWCBM_BENCHMARK_DEFINE(3Gb1024Cols64Chunks, 1024, 8);
-
-PWCBM_BENCHMARK_DEFINE(3Gb8Cols128Chunks, 8, 64);
-PWCBM_BENCHMARK_DEFINE(3Gb1024Cols128Chunks, 1024, 64);
+NVBENCH_BENCH(PQ_write)
+  .set_name("parquet_write_num_cols")
+  .set_min_samples(4)
+  .add_int64_axis("num_cols", {8, 1024});
+
+NVBENCH_BENCH(PQ_write_chunked)
+  .set_name("parquet_chunked_write")
+  .set_min_samples(4)
+  .add_int64_axis("num_cols", {8, 1024})
+  .add_int64_axis("num_chunks", {8, 64});
diff --git a/cpp/cmake/thirdparty/get_thrust.cmake b/cpp/cmake/thirdparty/get_thrust.cmake
index cbdaf5520ff..379b1521bf0 100644
--- a/cpp/cmake/thirdparty/get_thrust.cmake
+++ b/cpp/cmake/thirdparty/get_thrust.cmake
@@ -41,8 +41,8 @@ function(find_and_configure_thrust VERSION)
     CPM_ARGS
     GIT_REPOSITORY https://github.com/NVIDIA/thrust.git
     GIT_TAG ${VERSION}
-    GIT_SHALLOW TRUE ${cpm_thrust_disconnect_update} PATCH_COMMAND patch --reject-file=- -p1 -N <
-                ${CUDF_SOURCE_DIR}/cmake/thrust.patch || true
+    GIT_SHALLOW TRUE ${cpm_thrust_disconnect_update}
+    PATCH_COMMAND patch --reject-file=- -p1 -N < ${CUDF_SOURCE_DIR}/cmake/thrust.patch || true
     OPTIONS "THRUST_INSTALL TRUE"
   )
 
@@ -80,6 +80,6 @@ function(find_and_configure_thrust VERSION)
   endif()
 endfunction()
 
-set(CUDF_MIN_VERSION_Thrust 1.17.0)
+set(CUDF_MIN_VERSION_Thrust 1.17.2)
 
 find_and_configure_thrust(${CUDF_MIN_VERSION_Thrust})
diff --git a/cpp/doxygen/developer_guide/DOCUMENTATION.md b/cpp/doxygen/developer_guide/DOCUMENTATION.md
index c9f38d5849b..8a7d89c8dbd 100644
--- a/cpp/doxygen/developer_guide/DOCUMENTATION.md
+++ b/cpp/doxygen/developer_guide/DOCUMENTATION.md
@@ -437,7 +437,7 @@ We recommend installing Doxygen using conda (`conda install doxygen`) or a Linux
 Alternatively you can [build and install doxygen from source](https://www.doxygen.nl/manual/install.html).
 
 To build the libcudf HTML documentation simply run the `doxygen` command from the `cpp/doxygen` directory containing the `Doxyfile`.
-The libcudf documentation can also be built using `make docs_cudf` from the cmake build directory (e.g. `cpp/build`).
+The libcudf documentation can also be built using `cmake --build . --target docs_cudf` from the cmake build directory (e.g. `cpp/build`).
 Doxygen reads and processes all appropriate source files under the `cpp/include/` directory.
 The output is generated in the `cpp/doxygen/html/` directory.
 You can load the local `index.html` file generated there into any web browser to view the result.
diff --git a/cpp/examples/basic/src/process_csv.cpp b/cpp/examples/basic/src/process_csv.cpp
index 1d6e718717d..5a3914da453 100644
--- a/cpp/examples/basic/src/process_csv.cpp
+++ b/cpp/examples/basic/src/process_csv.cpp
@@ -1,3 +1,19 @@
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 #include <cudf/aggregation.hpp>
 #include <cudf/groupby.hpp>
 #include <cudf/io/csv.hpp>
diff --git a/cpp/include/cudf/io/json.hpp b/cpp/include/cudf/io/json.hpp
index 73724b99589..66ac6d74cff 100644
--- a/cpp/include/cudf/io/json.hpp
+++ b/cpp/include/cudf/io/json.hpp
@@ -38,6 +38,22 @@ namespace io {
 
 class json_reader_options_builder;
 
+/**
+ * @brief Allows specifying the target types for nested JSON data via json_reader_options'
+ * `set_dtypes` method.
+ */
+struct schema_element {
+  /**
+   * @brief The type that this column should be converted to
+   */
+  data_type type;
+
+  /**
+   * @brief Allows specifying this column's child columns target type
+   */
+  std::map<std::string, schema_element> child_types;
+};
+
 /**
  * @brief Input arguments to the `read_json` interface.
  *
@@ -65,7 +81,10 @@ class json_reader_options {
   source_info _source;
 
   // Data types of the column; empty to infer dtypes
-  std::variant<std::vector<data_type>, std::map<std::string, data_type>> _dtypes;
+  std::variant<std::vector<data_type>,
+               std::map<std::string, data_type>,
+               std::map<std::string, schema_element>>
+    _dtypes;
   // Specify the compression format of the source or infer from file extension
   compression_type _compression = compression_type::AUTO;
 
@@ -83,6 +102,9 @@ class json_reader_options {
   // Whether to use the experimental reader
   bool _experimental = false;
 
+  // Whether to keep the quote characters of string values
+  bool _keep_quotes = false;
+
   /**
    * @brief Constructor from source info.
    *
@@ -120,7 +142,10 @@ class json_reader_options {
    *
    * @returns Data types of the columns
    */
-  std::variant<std::vector<data_type>, std::map<std::string, data_type>> const& get_dtypes() const
+  std::variant<std::vector<data_type>,
+               std::map<std::string, data_type>,
+               std::map<std::string, schema_element>> const&
+  get_dtypes() const
   {
     return _dtypes;
   }
@@ -203,6 +228,13 @@ class json_reader_options {
    */
   bool is_enabled_experimental() const { return _experimental; }
 
+  /**
+   * @brief Whether the experimental reader should keep quotes of string values.
+   *
+   * @returns true if the experimental reader should keep quotes, false otherwise
+   */
+  bool is_enabled_keep_quotes() const { return _keep_quotes; }
+
   /**
    * @brief Set data types for columns to be read.
    *
@@ -217,6 +249,13 @@ class json_reader_options {
    */
   void set_dtypes(std::map<std::string, data_type> types) { _dtypes = std::move(types); }
 
+  /**
+   * @brief Set data types for a potentially nested column hierarchy.
+   *
+   * @param types Map of column names to schema_element to support arbitrary nesting of data types
+   */
+  void set_dtypes(std::map<std::string, schema_element> types) { _dtypes = std::move(types); }
+
   /**
    * @brief Set the compression type.
    *
@@ -258,6 +297,14 @@ class json_reader_options {
    * @param val Boolean value to enable/disable the experimental reader
    */
   void enable_experimental(bool val) { _experimental = val; }
+
+  /**
+   * @brief Set whether the experimental reader should keep quotes of string values.
+   *
+   * @param val Boolean value to indicate whether the experimental reader should keep quotes
+   * of string values
+   */
+  void enable_keep_quotes(bool val) { _keep_quotes = val; }
 };
 
 /**
@@ -305,6 +352,18 @@ class json_reader_options_builder {
     return *this;
   }
 
+  /**
+   * @brief Set data types for columns to be read.
+   *
+   * @param types Column name -> schema_element map
+   * @return this for chaining
+   */
+  json_reader_options_builder& dtypes(std::map<std::string, schema_element> types)
+  {
+    options._dtypes = std::move(types);
+    return *this;
+  }
+
   /**
    * @brief Set the compression type.
    *
@@ -377,6 +436,19 @@ class json_reader_options_builder {
     return *this;
   }
 
+  /**
+   * @brief Set whether the experimental reader should keep quotes of string values.
+   *
+   * @param val Boolean value to indicate whether the experimental reader should keep quotes
+   * of string values
+   * @return this for chaining
+   */
+  json_reader_options_builder& keep_quotes(bool val)
+  {
+    options._keep_quotes = val;
+    return *this;
+  }
+
   /**
    * @brief move json_reader_options member once it's built.
    */
diff --git a/cpp/include/cudf/io/orc_metadata.hpp b/cpp/include/cudf/io/orc_metadata.hpp
index e5b89cc0f91..d974eaa103a 100644
--- a/cpp/include/cudf/io/orc_metadata.hpp
+++ b/cpp/include/cudf/io/orc_metadata.hpp
@@ -107,7 +107,7 @@ struct double_statistics : minmax_statistics<double>, sum_statistics<double> {
  * order. The `sum` is the total length of elements in the column.
  * Note: According to ORC specs, the sum should be signed, but pyarrow uses unsigned value
  */
-struct string_statistics : minmax_statistics<std::string>, sum_statistics<uint64_t> {
+struct string_statistics : minmax_statistics<std::string>, sum_statistics<int64_t> {
 };
 
 /**
@@ -163,6 +163,7 @@ struct column_statistics;
  */
 struct column_statistics {
   std::optional<uint64_t> number_of_values;  ///< number of statistics
+  std::optional<bool> has_null;              ///< column has any nulls
   std::variant<no_statistics,
                integer_statistics,
                double_statistics,
diff --git a/cpp/include/cudf/io/text/detail/tile_state.hpp b/cpp/include/cudf/io/text/detail/tile_state.hpp
index 849d857597b..bf833d4720c 100644
--- a/cpp/include/cudf/io/text/detail/tile_state.hpp
+++ b/cpp/include/cudf/io/text/detail/tile_state.hpp
@@ -1,3 +1,18 @@
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 #pragma once
 
diff --git a/cpp/include/cudf/strings/regex/flags.hpp b/cpp/include/cudf/strings/regex/flags.hpp
index 3512d35959a..3a7051345fa 100644
--- a/cpp/include/cudf/strings/regex/flags.hpp
+++ b/cpp/include/cudf/strings/regex/flags.hpp
@@ -72,6 +72,18 @@ constexpr bool is_ascii(regex_flags const f)
   return (f & regex_flags::ASCII) == regex_flags::ASCII;
 }
 
+/**
+ * @brief Capture groups setting
+ *
+ * For processing a regex pattern containing capture groups.
+ * These can be used to optimize the generated regex instructions
+ * where the capture groups do not require extracting the groups.
+ */
+enum class capture_groups : uint32_t {
+  EXTRACT,     ///< Capture groups processed normally for extract
+  NON_CAPTURE  ///< Convert all capture groups to non-capture groups
+};
+
 /** @} */  // end of doxygen group
 }  // namespace strings
 }  // namespace cudf
diff --git a/cpp/src/dictionary/search.cu b/cpp/src/dictionary/search.cu
index ae5538780f9..3936f7470e5 100644
--- a/cpp/src/dictionary/search.cu
+++ b/cpp/src/dictionary/search.cu
@@ -79,8 +79,7 @@ struct find_index_fn {
     using ScalarType = cudf::scalar_type_t<Element>;
     auto find_key    = static_cast<ScalarType const&>(key).value(stream);
     auto keys_view   = column_device_view::create(input.keys(), stream);
-    auto iter = thrust::equal_range(thrust::device,  // segfaults: rmm::exec_policy(stream) and
-                                                     // thrust::cuda::par.on(stream)
+    auto iter        = thrust::equal_range(rmm::exec_policy(cudf::default_stream_value),
                                     keys_view->begin<Element>(),
                                     keys_view->end<Element>(),
                                     find_key);
diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu
index 5850b3af1c4..eeb27c2ac05 100644
--- a/cpp/src/interop/to_arrow.cu
+++ b/cpp/src/interop/to_arrow.cu
@@ -30,6 +30,7 @@
 #include <cudf/utilities/type_dispatcher.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
+#include <rmm/exec_policy.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
 
 #include <thrust/copy.h>
@@ -153,7 +154,8 @@ std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<numeric::decimal64>(
 
   auto count = thrust::make_counting_iterator(0);
 
-  thrust::for_each(count,
+  thrust::for_each(rmm::exec_policy(cudf::default_stream_value),
+                   count,
                    count + input.size(),
                    [in = input.begin<DeviceType>(), out = buf.data()] __device__(auto in_idx) {
                      auto const out_idx = in_idx * 2;
diff --git a/cpp/src/io/fst/agent_dfa.cuh b/cpp/src/io/fst/agent_dfa.cuh
index d847598d6dd..e02c7ff85fa 100644
--- a/cpp/src/io/fst/agent_dfa.cuh
+++ b/cpp/src/io/fst/agent_dfa.cuh
@@ -422,7 +422,7 @@ struct AgentDFA {
     OffsetT const num_total_symbols,
     StateIndexT& state,
     CallbackOpT& callback_op,
-    cub::Int2Type<BYPASS_LOAD> /**/)
+    cub::Int2Type<BYPASS_LOAD>)
   {
     using StateTransitionOpT = StateTransitionOp<CallbackOpT, TransitionTableT>;
 
diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp
index 6f702a489a0..aabaa941daf 100644
--- a/cpp/src/io/functions.cpp
+++ b/cpp/src/io/functions.cpp
@@ -287,6 +287,7 @@ raw_orc_statistics read_raw_orc_statistics(source_info const& src_info)
 column_statistics::column_statistics(cudf::io::orc::column_statistics&& cs)
 {
   number_of_values = cs.number_of_values;
+  has_null         = cs.has_null;
   if (cs.int_stats) {
     type_specific_stats = *cs.int_stats;
   } else if (cs.double_stats) {
diff --git a/cpp/src/io/json/experimental/read_json.cpp b/cpp/src/io/json/experimental/read_json.cpp
index ceac40ba4f9..7d78bd34b19 100644
--- a/cpp/src/io/json/experimental/read_json.cpp
+++ b/cpp/src/io/json/experimental/read_json.cpp
@@ -47,9 +47,6 @@ table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
                               rmm::cuda_stream_view stream,
                               rmm::mr::device_memory_resource* mr)
 {
-  auto const dtypes_empty =
-    std::visit([](const auto& dtypes) { return dtypes.empty(); }, reader_opts.get_dtypes());
-  CUDF_EXPECTS(dtypes_empty, "user specified dtypes are not yet supported");
   CUDF_EXPECTS(reader_opts.get_byte_range_offset() == 0 and reader_opts.get_byte_range_size() == 0,
                "specifying a byte range is not yet supported");
 
diff --git a/cpp/src/io/json/json_tree.cu b/cpp/src/io/json/json_tree.cu
new file mode 100644
index 00000000000..3a26a1479e5
--- /dev/null
+++ b/cpp/src/io/json/json_tree.cu
@@ -0,0 +1,714 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nested_json.hpp"
+#include <hash/hash_allocator.cuh>
+#include <hash/helper_functions.cuh>
+#include <io/utilities/hostdevice_vector.hpp>
+
+#include <cudf/detail/hashing.hpp>
+#include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/scatter.cuh>
+#include <cudf/detail/utilities/hash_functions.cuh>
+#include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/utilities/error.hpp>
+#include <cudf/utilities/span.hpp>
+
+#include <cuco/static_map.cuh>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
+#include <rmm/mr/device/polymorphic_allocator.hpp>
+
+#include <thrust/copy.h>
+#include <thrust/count.h>
+#include <thrust/fill.h>
+#include <thrust/gather.h>
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/iterator/transform_output_iterator.h>
+#include <thrust/iterator/zip_iterator.h>
+#include <thrust/reduce.h>
+#include <thrust/scan.h>
+#include <thrust/sequence.h>
+#include <thrust/sort.h>
+#include <thrust/tabulate.h>
+#include <thrust/transform.h>
+
+#include <limits>
+
+namespace cudf::io::json {
+namespace detail {
+
+// The node that a token represents
+struct token_to_node {
+  __device__ auto operator()(PdaTokenT const token) -> NodeT
+  {
+    switch (token) {
+      case token_t::StructBegin: return NC_STRUCT;
+      case token_t::ListBegin: return NC_LIST;
+      case token_t::StringBegin: return NC_STR;
+      case token_t::ValueBegin: return NC_VAL;
+      case token_t::FieldNameBegin: return NC_FN;
+      default: return NC_ERR;
+    };
+  }
+};
+
+// Convert token indices to node range for each valid node.
+struct node_ranges {
+  device_span<PdaTokenT const> tokens;
+  device_span<SymbolOffsetT const> token_indices;
+  bool include_quote_char;
+  __device__ auto operator()(size_type i) -> thrust::tuple<SymbolOffsetT, SymbolOffsetT>
+  {
+    // Whether a token expects to be followed by its respective end-of-* token partner
+    auto is_begin_of_section = [] __device__(PdaTokenT const token) {
+      switch (token) {
+        case token_t::StringBegin:
+        case token_t::ValueBegin:
+        case token_t::FieldNameBegin: return true;
+        default: return false;
+      };
+    };
+    // The end-of-* partner token for a given beginning-of-* token
+    auto end_of_partner = [] __device__(PdaTokenT const token) {
+      switch (token) {
+        case token_t::StringBegin: return token_t::StringEnd;
+        case token_t::ValueBegin: return token_t::ValueEnd;
+        case token_t::FieldNameBegin: return token_t::FieldNameEnd;
+        default: return token_t::ErrorBegin;
+      };
+    };
+    // Includes quote char for end-of-string token or Skips the quote char for
+    // beginning-of-field-name token
+    auto get_token_index = [include_quote_char = include_quote_char] __device__(
+                             PdaTokenT const token, SymbolOffsetT const token_index) {
+      constexpr SymbolOffsetT quote_char_size = 1;
+      switch (token) {
+        // Strip off quote char included for StringBegin
+        case token_t::StringBegin: return token_index + (include_quote_char ? 0 : quote_char_size);
+        // Strip off or Include trailing quote char for string values for StringEnd
+        case token_t::StringEnd: return token_index + (include_quote_char ? quote_char_size : 0);
+        // Strip off quote char included for FieldNameBegin
+        case token_t::FieldNameBegin: return token_index + quote_char_size;
+        default: return token_index;
+      };
+    };
+    PdaTokenT const token = tokens[i];
+    // The section from the original JSON input that this token demarcates
+    SymbolOffsetT range_begin = get_token_index(token, token_indices[i]);
+    SymbolOffsetT range_end   = range_begin + 1;  // non-leaf, non-field nodes ignore this value.
+    if (is_begin_of_section(token)) {
+      if ((i + 1) < tokens.size() && end_of_partner(token) == tokens[i + 1]) {
+        // Update the range_end for this pair of tokens
+        range_end = get_token_index(tokens[i + 1], token_indices[i + 1]);
+      }
+    }
+    return thrust::make_tuple(range_begin, range_end);
+  }
+};
+
+// Generates a tree representation of the given tokens, token_indices.
+tree_meta_t get_tree_representation(device_span<PdaTokenT const> tokens,
+                                    device_span<SymbolOffsetT const> token_indices,
+                                    rmm::cuda_stream_view stream,
+                                    rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  // Whether a token does represent a node in the tree representation
+  auto is_node = [] __device__(PdaTokenT const token) -> bool {
+    switch (token) {
+      case token_t::StructBegin:
+      case token_t::ListBegin:
+      case token_t::StringBegin:
+      case token_t::ValueBegin:
+      case token_t::FieldNameBegin:
+      case token_t::ErrorBegin: return true;
+      default: return false;
+    };
+  };
+
+  // Whether the token pops from the parent node stack
+  auto does_pop = [] __device__(PdaTokenT const token) {
+    switch (token) {
+      case token_t::StructMemberEnd:
+      case token_t::StructEnd:
+      case token_t::ListEnd: return true;
+      default: return false;
+    };
+  };
+
+  // Whether the token pushes onto the parent node stack
+  auto does_push = [] __device__(PdaTokenT const token) {
+    switch (token) {
+      case token_t::FieldNameBegin:
+      case token_t::StructBegin:
+      case token_t::ListBegin: return true;
+      default: return false;
+    };
+  };
+
+  auto num_tokens = tokens.size();
+  auto is_node_it = thrust::make_transform_iterator(
+    tokens.begin(),
+    [is_node] __device__(auto t) -> size_type { return static_cast<size_type>(is_node(t)); });
+  auto num_nodes = thrust::count_if(
+    rmm::exec_policy(stream), tokens.begin(), tokens.begin() + num_tokens, is_node);
+
+  // Node categories: copy_if with transform.
+  rmm::device_uvector<NodeT> node_categories(num_nodes, stream, mr);
+  auto node_categories_it =
+    thrust::make_transform_output_iterator(node_categories.begin(), token_to_node{});
+  auto node_categories_end = thrust::copy_if(rmm::exec_policy(stream),
+                                             tokens.begin(),
+                                             tokens.begin() + num_tokens,
+                                             node_categories_it,
+                                             is_node);
+  CUDF_EXPECTS(node_categories_end - node_categories_it == num_nodes,
+               "node category count mismatch");
+
+  // Node levels: transform_exclusive_scan, copy_if.
+  rmm::device_uvector<size_type> token_levels(num_tokens, stream);
+  auto push_pop_it = thrust::make_transform_iterator(
+    tokens.begin(), [does_push, does_pop] __device__(PdaTokenT const token) -> size_type {
+      return does_push(token) - does_pop(token);
+    });
+  thrust::exclusive_scan(
+    rmm::exec_policy(stream), push_pop_it, push_pop_it + num_tokens, token_levels.begin());
+
+  rmm::device_uvector<TreeDepthT> node_levels(num_nodes, stream, mr);
+  auto node_levels_end = thrust::copy_if(rmm::exec_policy(stream),
+                                         token_levels.begin(),
+                                         token_levels.begin() + num_tokens,
+                                         tokens.begin(),
+                                         node_levels.begin(),
+                                         is_node);
+  CUDF_EXPECTS(node_levels_end - node_levels.begin() == num_nodes, "node level count mismatch");
+
+  // Node ranges: copy_if with transform.
+  rmm::device_uvector<SymbolOffsetT> node_range_begin(num_nodes, stream, mr);
+  rmm::device_uvector<SymbolOffsetT> node_range_end(num_nodes, stream, mr);
+  auto node_range_tuple_it =
+    thrust::make_zip_iterator(node_range_begin.begin(), node_range_end.begin());
+  // Whether the tokenizer stage should keep quote characters for string values
+  // If the tokenizer keeps the quote characters, they may be stripped during type casting
+  constexpr bool include_quote_char = true;
+  auto node_range_out_it            = thrust::make_transform_output_iterator(
+    node_range_tuple_it, node_ranges{tokens, token_indices, include_quote_char});
+
+  auto node_range_out_end =
+    thrust::copy_if(rmm::exec_policy(stream),
+                    thrust::make_counting_iterator<size_type>(0),
+                    thrust::make_counting_iterator<size_type>(0) + num_tokens,
+                    node_range_out_it,
+                    [is_node, tokens_gpu = tokens.begin()] __device__(size_type i) -> bool {
+                      return is_node(tokens_gpu[i]);
+                    });
+  CUDF_EXPECTS(node_range_out_end - node_range_out_it == num_nodes, "node range count mismatch");
+
+  // Node parent ids: previous push token_id transform, stable sort, segmented scan with Max,
+  // reorder, copy_if. This one is sort of logical stack. But more generalized.
+  // TODO: make it own function.
+  rmm::device_uvector<size_type> parent_token_ids(num_tokens, stream);
+  rmm::device_uvector<size_type> initial_order(num_tokens, stream);
+
+  thrust::sequence(rmm::exec_policy(stream), initial_order.begin(), initial_order.end());
+  thrust::tabulate(rmm::exec_policy(stream),
+                   parent_token_ids.begin(),
+                   parent_token_ids.end(),
+                   [does_push, tokens_gpu = tokens.begin()] __device__(auto i) -> size_type {
+                     return (i > 0) && does_push(tokens_gpu[i - 1]) ? i - 1 : -1;
+                     // -1, not sentinel used here because of max operation below
+                   });
+
+  auto out_pid = thrust::make_zip_iterator(parent_token_ids.data(), initial_order.data());
+  // Uses radix sort for builtin types.
+  thrust::stable_sort_by_key(rmm::exec_policy(stream),
+                             token_levels.data(),
+                             token_levels.data() + token_levels.size(),
+                             out_pid);
+
+  // SegmentedScan Max.
+  thrust::inclusive_scan_by_key(rmm::exec_policy(stream),
+                                token_levels.data(),
+                                token_levels.data() + token_levels.size(),
+                                parent_token_ids.data(),
+                                parent_token_ids.data(),
+                                thrust::equal_to<size_type>{},
+                                thrust::maximum<size_type>{});
+  // Reusing token_levels memory & use scatter to restore the original order.
+
+  std::swap(token_levels, parent_token_ids);
+  auto& sorted_parent_token_ids = token_levels;
+  thrust::scatter(rmm::exec_policy(stream),
+                  sorted_parent_token_ids.begin(),
+                  sorted_parent_token_ids.end(),
+                  initial_order.data(),
+                  parent_token_ids.data());
+
+  rmm::device_uvector<size_type> node_ids_gpu(num_tokens, stream);
+  thrust::exclusive_scan(
+    rmm::exec_policy(stream), is_node_it, is_node_it + num_tokens, node_ids_gpu.begin());
+
+  rmm::device_uvector<NodeIndexT> parent_node_ids(num_nodes, stream, mr);
+  auto parent_node_ids_it = thrust::make_transform_iterator(
+    parent_token_ids.begin(),
+    [node_ids_gpu = node_ids_gpu.begin()] __device__(size_type const pid) -> NodeIndexT {
+      return pid < 0 ? parent_node_sentinel : node_ids_gpu[pid];
+    });
+  auto parent_node_ids_end = thrust::copy_if(rmm::exec_policy(stream),
+                                             parent_node_ids_it,
+                                             parent_node_ids_it + parent_token_ids.size(),
+                                             tokens.begin(),
+                                             parent_node_ids.begin(),
+                                             is_node);
+  CUDF_EXPECTS(parent_node_ids_end - parent_node_ids.begin() == num_nodes,
+               "parent node id gather mismatch");
+
+  return {std::move(node_categories),
+          std::move(parent_node_ids),
+          std::move(node_levels),
+          std::move(node_range_begin),
+          std::move(node_range_end)};
+}
+
+/**
+ * @brief Generates unique node_type id for each node.
+ * Field nodes with the same name are assigned the same node_type id.
+ * List, Struct, and String nodes are assigned their category values as node_type ids.
+ *
+ * All inputs and outputs are in node_id order.
+ * @param d_input JSON string in device memory
+ * @param d_tree Tree representation of the JSON
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @return Vector of node_type ids
+ */
+rmm::device_uvector<size_type> hash_node_type_with_field_name(device_span<SymbolT const> d_input,
+                                                              tree_meta_t const& d_tree,
+                                                              rmm::cuda_stream_view stream)
+{
+  CUDF_FUNC_RANGE();
+  using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor<default_allocator<char>>;
+  using hash_map_type =
+    cuco::static_map<size_type, size_type, cuda::thread_scope_device, hash_table_allocator_type>;
+  auto num_nodes = d_tree.node_categories.size();
+
+  constexpr size_type empty_node_index_sentinel = -1;
+  hash_map_type key_map{compute_hash_table_size(num_nodes),  // TODO reduce oversubscription
+                        cuco::sentinel::empty_key{empty_node_index_sentinel},
+                        cuco::sentinel::empty_value{empty_node_index_sentinel},
+                        hash_table_allocator_type{default_allocator<char>{}, stream},
+                        stream.value()};
+  auto d_hasher = [d_input          = d_input.data(),
+                   node_range_begin = d_tree.node_range_begin.data(),
+                   node_range_end   = d_tree.node_range_end.data()] __device__(auto node_id) {
+    auto const field_name = cudf::string_view(d_input + node_range_begin[node_id],
+                                              node_range_end[node_id] - node_range_begin[node_id]);
+    return cudf::detail::default_hash<cudf::string_view>{}(field_name);
+  };
+  auto d_equal = [d_input          = d_input.data(),
+                  node_range_begin = d_tree.node_range_begin.data(),
+                  node_range_end   = d_tree.node_range_end.data()] __device__(auto node_id1,
+                                                                            auto node_id2) {
+    auto const field_name1 = cudf::string_view(
+      d_input + node_range_begin[node_id1], node_range_end[node_id1] - node_range_begin[node_id1]);
+    auto const field_name2 = cudf::string_view(
+      d_input + node_range_begin[node_id2], node_range_end[node_id2] - node_range_begin[node_id2]);
+    return field_name1 == field_name2;
+  };
+  auto is_field_name_node = [node_categories = d_tree.node_categories.data()] __device__(
+                              auto node_id) { return node_categories[node_id] == node_t::NC_FN; };
+  // key-value pairs: uses node_id itself as node_type. (unique node_id for a field name due to
+  // hashing)
+  auto iter = cudf::detail::make_counting_transform_iterator(
+    0, [] __device__(size_type i) { return cuco::make_pair(i, i); });
+
+  key_map.insert_if(iter,
+                    iter + num_nodes,
+                    thrust::counting_iterator<size_type>(0),  // stencil
+                    is_field_name_node,
+                    d_hasher,
+                    d_equal,
+                    stream.value());
+  auto get_hash_value =
+    [key_map = key_map.get_device_view(), d_hasher, d_equal] __device__(auto node_id) -> size_type {
+    auto it = key_map.find(node_id, d_hasher, d_equal);
+    return (it == key_map.end()) ? size_type{0} : it->second.load(cuda::std::memory_order_relaxed);
+  };
+  // convert field nodes to node indices, and other nodes to enum value.
+  rmm::device_uvector<size_type> node_type(num_nodes, stream);
+  thrust::tabulate(rmm::exec_policy(stream),
+                   node_type.begin(),
+                   node_type.end(),
+                   [node_categories = d_tree.node_categories.data(),
+                    is_field_name_node,
+                    get_hash_value] __device__(auto node_id) -> size_type {
+                     if (is_field_name_node(node_id))
+                       return static_cast<size_type>(NUM_NODE_CLASSES) + get_hash_value(node_id);
+                     else
+                       return static_cast<size_type>(node_categories[node_id]);
+                   });
+  return node_type;
+}
+
+/**
+ * @brief Translates sorted parent_node_ids to parent_indices with indices from scatter_indices
+ *
+ * @param scatter_indices The sorted order of parent_node_ids
+ * @param parent_node_ids The sorted parent_node_ids
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @return Translated parent_indices pointing to sorted node_ids positions
+ */
+rmm::device_uvector<NodeIndexT> translate_sorted_parent_node_indices(
+  device_span<size_type const> scatter_indices,
+  device_span<NodeIndexT const> parent_node_ids,
+  rmm::cuda_stream_view stream)
+{
+  auto const num_nodes      = scatter_indices.size();
+  auto const gather_indices = cudf::detail::scatter_to_gather(
+    scatter_indices.begin(), scatter_indices.end(), num_nodes, stream);
+
+  rmm::device_uvector<NodeIndexT> parent_indices(num_nodes, stream);
+  // gather, except parent sentinels
+  thrust::transform(rmm::exec_policy(stream),
+                    parent_node_ids.begin(),
+                    parent_node_ids.end(),
+                    parent_indices.begin(),
+                    [gather_indices = gather_indices.data()] __device__(auto parent_node_id) {
+                      return (parent_node_id == parent_node_sentinel)
+                               ? parent_node_sentinel
+                               : gather_indices[parent_node_id];
+                    });
+  return parent_indices;
+};
+
+/**
+ * @brief Generates column id and parent column id for each node from the node_level sorted inputs
+ *
+ * 4. Per-Level Processing: Propagate parent node ids for each level.
+ *   For each level,
+ *     a. gather col_id from previous level results. input=col_id, gather_map is parent_indices.
+ *     b. stable sort by {parent_col_id, node_type}
+ *     c. scan sum of unique {parent_col_id, node_type}
+ *     d. scatter the col_id back to stable node_level order (using scatter_indices)
+ *
+ * pre-condition: All input arguments are stable sorted by node_level (stable in node_id order)
+ * post-condition: Returned column_id, parent_col_id are level sorted.
+ * @param node_type Unique id to identify node type, field with different name has different id.
+ * @param parent_indices Parent node indices in the sorted node_level order
+ * @param d_level_boundaries The boundaries of each level in the sorted node_level order
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return column_id, parent_column_id
+ */
+std::pair<rmm::device_uvector<NodeIndexT>, rmm::device_uvector<NodeIndexT>> generate_column_id(
+  device_span<size_type> node_type,        // level sorted
+  device_span<NodeIndexT> parent_indices,  // level sorted
+  device_span<size_type const> d_level_boundaries,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+
+  auto const num_nodes = node_type.size();
+  rmm::device_uvector<size_type> scatter_indices(num_nodes, stream);
+  thrust::sequence(rmm::exec_policy(stream), scatter_indices.begin(), scatter_indices.end());
+  rmm::device_uvector<NodeIndexT> col_id(num_nodes, stream, mr);
+  rmm::device_uvector<NodeIndexT> parent_col_id(num_nodes, stream);
+  // scatter 1 to level_boundaries alone, useful for scan later
+  thrust::scatter(rmm::exec_policy(stream),
+                  thrust::make_constant_iterator(1),
+                  thrust::make_constant_iterator(1) + d_level_boundaries.size() - 1,
+                  d_level_boundaries.begin(),
+                  col_id.begin());
+  auto level_boundaries = cudf::detail::make_std_vector_async(d_level_boundaries, stream);
+  // Initialize First level node's node col_id to 0
+  thrust::fill(rmm::exec_policy(stream), col_id.begin(), col_id.begin() + level_boundaries[0], 0);
+  // Initialize First level node's parent_col_id to parent_node_sentinel sentinel
+  thrust::fill(rmm::exec_policy(stream),
+               parent_col_id.begin(),
+               parent_col_id.begin() + level_boundaries[0],
+               parent_node_sentinel);
+
+  // Per-level processing
+  auto const num_levels = level_boundaries.size();
+  for (size_t level = 1; level < num_levels; level++) {
+    // Gather the each node's parent's column id for the nodes of the current level
+    thrust::gather(rmm::exec_policy(stream),
+                   parent_indices.data() + level_boundaries[level - 1],
+                   parent_indices.data() + level_boundaries[level],
+                   col_id.data(),
+                   parent_col_id.data() + level_boundaries[level - 1]);
+
+    // To invoke Radix sort for keys {parent_col_id, node_type} instead of merge sort,
+    // we need to split to 2 Radix sorts.
+    // Secondary sort on node_type
+    thrust::stable_sort_by_key(
+      rmm::exec_policy(stream),
+      node_type.data() + level_boundaries[level - 1],
+      node_type.data() + level_boundaries[level],
+      thrust::make_zip_iterator(parent_col_id.begin() + level_boundaries[level - 1],
+                                scatter_indices.begin()));
+    // Primary sort on parent_col_id
+    thrust::stable_sort_by_key(
+      rmm::exec_policy(stream),
+      parent_col_id.begin() + level_boundaries[level - 1],
+      parent_col_id.begin() + level_boundaries[level],
+      thrust::make_zip_iterator(node_type.data() + level_boundaries[level - 1],
+                                scatter_indices.begin()));
+
+    auto start_it = thrust::make_zip_iterator(parent_col_id.begin() + level_boundaries[level - 1],
+                                              node_type.data() + level_boundaries[level - 1]);
+    auto adjacent_pair_it = thrust::make_zip_iterator(start_it - 1, start_it);
+    // Compares two adjacent items, beginning with the first and second item from the current level.
+    // Writes flags to the index of the rhs item.
+    // First index holds next col_id from previous level.
+    thrust::transform(rmm::exec_policy(stream),
+                      adjacent_pair_it + 1,
+                      adjacent_pair_it + level_boundaries[level] - level_boundaries[level - 1],
+                      col_id.data() + level_boundaries[level - 1] + 1,
+                      [] __device__(auto adjacent_pair) -> size_type {
+                        auto const lhs = thrust::get<0>(adjacent_pair);
+                        auto const rhs = thrust::get<1>(adjacent_pair);
+                        return lhs != rhs ? 1 : 0;
+                      });
+
+    // includes previous level last col_id to continue the index.
+    thrust::inclusive_scan(rmm::exec_policy(stream),
+                           col_id.data() + level_boundaries[level - 1],
+                           col_id.data() + level_boundaries[level] + (level != num_levels - 1),
+                           // +1 only for not-last-levels, for next level start col_id
+                           col_id.data() + level_boundaries[level - 1]);
+    // scatter to restore original order.
+    auto const num_nodes_per_level = level_boundaries[level] - level_boundaries[level - 1];
+    {
+      rmm::device_uvector<NodeIndexT> tmp_col_id(num_nodes_per_level, stream);
+      rmm::device_uvector<NodeIndexT> tmp_parent_col_id(num_nodes_per_level, stream);
+      thrust::scatter(rmm::exec_policy(stream),
+                      thrust::make_zip_iterator(col_id.begin() + level_boundaries[level - 1],
+                                                parent_col_id.data() + level_boundaries[level - 1]),
+                      thrust::make_zip_iterator(col_id.begin() + level_boundaries[level],
+                                                parent_col_id.data() + level_boundaries[level]),
+                      scatter_indices.begin(),
+                      thrust::make_zip_iterator(tmp_col_id.begin(), tmp_parent_col_id.begin()));
+      thrust::copy(rmm::exec_policy(stream),
+                   tmp_col_id.begin(),
+                   tmp_col_id.end(),
+                   col_id.begin() + level_boundaries[level - 1]);
+      thrust::copy(rmm::exec_policy(stream),
+                   tmp_parent_col_id.begin(),
+                   tmp_parent_col_id.end(),
+                   parent_col_id.begin() + level_boundaries[level - 1]);
+    }
+    thrust::sequence(rmm::exec_policy(stream),
+                     scatter_indices.begin(),
+                     scatter_indices.begin() + num_nodes_per_level);
+  }
+
+  return {std::move(col_id), std::move(parent_col_id)};
+}
+
+/**
+ * @brief Computes row indices of each node in the hierarchy.
+ * 5. Generate row_offset.
+ *   a. stable_sort by parent_col_id.
+ *   b. scan_by_key {parent_col_id} (required only on nodes who's parent is list)
+ *   c. propagate to non-list leaves from parent list node by recursion
+ *
+ * pre-condition:
+ *  scatter_indices is a sequence, representing node_id.
+ *  d_tree.node_categories, d_tree.parent_node_ids, parent_col_id are in order of node_id.
+ * post-condition: row_offsets is in order of node_id.
+ *  parent_col_id and scatter_indices are sorted by parent_col_id. (unused after this function)
+ * @param scatter_indices node_id
+ * @param parent_col_id parent node's column id
+ * @param d_tree Tree representation of the JSON string
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
+ * @return row_offsets
+ */
+rmm::device_uvector<size_type> compute_row_offsets(device_span<size_type> scatter_indices,
+                                                   rmm::device_uvector<NodeIndexT>&& parent_col_id,
+                                                   tree_meta_t& d_tree,
+                                                   rmm::cuda_stream_view stream,
+                                                   rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  auto const num_nodes = d_tree.node_categories.size();
+  // TODO generate scatter_indices sequences here itself
+  thrust::stable_sort_by_key(
+    rmm::exec_policy(stream), parent_col_id.begin(), parent_col_id.end(), scatter_indices.begin());
+  rmm::device_uvector<size_type> row_offsets(num_nodes, stream, mr);
+  // TODO is it possible to generate list child_offsets too here?
+  thrust::exclusive_scan_by_key(
+    rmm::exec_policy(stream),
+    parent_col_id.begin(),  // TODO: is there any way to limit this to list parents alone?
+    parent_col_id.end(),
+    thrust::make_constant_iterator<size_type>(1),
+    row_offsets.begin());
+
+  // Using scatter instead of sort.
+  auto& temp_storage = parent_col_id;  // reuse parent_col_id as temp storage
+  thrust::scatter(rmm::exec_policy(stream),
+                  row_offsets.begin(),
+                  row_offsets.end(),
+                  scatter_indices.begin(),
+                  temp_storage.begin());
+  row_offsets = std::move(temp_storage);
+
+  // Propagate row offsets to non-list leaves from list's immediate children node by recursion
+  thrust::transform_if(
+    rmm::exec_policy(stream),
+    thrust::make_counting_iterator<size_type>(0),
+    thrust::make_counting_iterator<size_type>(num_nodes),
+    row_offsets.begin(),
+    [node_categories = d_tree.node_categories.data(),
+     parent_node_ids = d_tree.parent_node_ids.begin(),
+     row_offsets     = row_offsets.begin()] __device__(size_type node_id) {
+      auto parent_node_id = parent_node_ids[node_id];
+      while (parent_node_id != parent_node_sentinel and
+             node_categories[parent_node_id] != node_t::NC_LIST) {
+        node_id        = parent_node_id;
+        parent_node_id = parent_node_ids[parent_node_id];
+      }
+      return row_offsets[node_id];
+    },
+    [node_categories = d_tree.node_categories.data(),
+     parent_node_ids = d_tree.parent_node_ids.begin()] __device__(size_type node_id) {
+      auto parent_node_id = parent_node_ids[node_id];
+      return parent_node_id != parent_node_sentinel and
+             !(node_categories[parent_node_id] == node_t::NC_LIST);
+    });
+  return row_offsets;
+}
+
+/**
+@note
+This algorithm assigns a unique column id to each node in the tree.
+The row offset is the row index of the node in that column id.
+Algorithm:
+1. Convert node_category+fieldname to node_type.
+  a. Create a hashmap to hash field name and assign unique node id as values.
+  b. Convert the node categories to node types.
+     Node type is defined as node category enum value if it is not a field node,
+     otherwise it is the unique node id assigned by the hashmap (value shifted by #NUM_CATEGORY).
+2. Preprocessing: Translate parent node ids after sorting by level.
+  a. sort by level
+  b. get gather map of sorted indices
+  c. translate parent_node_ids to new sorted indices
+3. Find level boundaries.
+   copy_if index of first unique values of sorted levels.
+4. Per-Level Processing: Propagate parent node ids for each level.
+  For each level,
+    a. gather col_id from previous level results. input=col_id, gather_map is parent_indices.
+    b. stable sort by {parent_col_id, node_type}
+    c. scan sum of unique {parent_col_id, node_type}
+    d. scatter the col_id back to stable node_level order (using scatter_indices)
+  Restore original node_id order
+5. Generate row_offset.
+  a. stable_sort by parent_col_id.
+  b. scan_by_key {parent_col_id} (required only on nodes whose parent is a list)
+  c. propagate to non-list leaves from parent list node by recursion
+**/
+std::tuple<rmm::device_uvector<NodeIndexT>, rmm::device_uvector<size_type>>
+records_orient_tree_traversal(device_span<SymbolT const> d_input,
+                              tree_meta_t& d_tree,
+                              rmm::cuda_stream_view stream,
+                              rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  // 1. Convert node_category + field_name to node_type.
+
+  auto num_nodes = d_tree.node_categories.size();
+  rmm::device_uvector<size_type> node_type =
+    hash_node_type_with_field_name(d_input, d_tree, stream);
+  // TODO two-level hashing:  one for field names
+  // and another for {node-level, node_category} + field hash for the entire path
+
+  // 2. Preprocessing: Translate parent node ids after sorting by level.
+  //   a. sort by level
+  //   b. get gather map of sorted indices
+  //   c. translate parent_node_ids to sorted indices
+
+  rmm::device_uvector<size_type> scatter_indices(num_nodes, stream);
+  thrust::sequence(rmm::exec_policy(stream), scatter_indices.begin(), scatter_indices.end());
+
+  rmm::device_uvector<NodeIndexT> parent_node_ids(d_tree.parent_node_ids, stream);  // make a copy
+  auto out_pid =
+    thrust::make_zip_iterator(scatter_indices.data(), parent_node_ids.data(), node_type.data());
+  // Uses cub radix sort. sort by level
+  thrust::stable_sort_by_key(rmm::exec_policy(stream),
+                             d_tree.node_levels.data(),
+                             d_tree.node_levels.data() + num_nodes,
+                             out_pid);
+
+  rmm::device_uvector<NodeIndexT> parent_indices =
+    translate_sorted_parent_node_indices(scatter_indices, parent_node_ids, stream);
+  // TODO optimize memory usage: parent_node_ids is no longer needed
+
+  // 3. Find level boundaries.
+  auto level_boundaries = [&]() {
+    // Already node_levels is sorted
+    auto max_level = d_tree.node_levels.back_element(stream);
+    rmm::device_uvector<size_type> level_boundaries(max_level + 1, stream);
+    // TODO try reduce_by_key
+    auto level_end =
+      thrust::copy_if(rmm::exec_policy(stream),
+                      thrust::make_counting_iterator<size_type>(1),
+                      thrust::make_counting_iterator<size_type>(num_nodes + 1),
+                      level_boundaries.begin(),
+                      [num_nodes, node_levels = d_tree.node_levels.begin()] __device__(auto index) {
+                        return index == num_nodes || node_levels[index] != node_levels[index - 1];
+                      });
+    CUDF_EXPECTS(thrust::distance(level_boundaries.begin(), level_end) == max_level + 1,
+                 "num_levels != max_level + 1");
+    return level_boundaries;
+  };
+
+  // 4. Per-Level Processing: Propagate parent node ids for each level.
+  auto [col_id, parent_col_id] = generate_column_id(node_type,       // level sorted
+                                                    parent_indices,  // level sorted
+                                                    level_boundaries(),
+                                                    stream,
+                                                    mr);
+
+  // restore original order of col_id, parent_col_id and used d_tree members
+  {
+    rmm::device_uvector<NodeIndexT> tmp_col_id(num_nodes, stream);
+    rmm::device_uvector<NodeIndexT> tmp_parent_col_id(num_nodes, stream);
+    rmm::device_uvector<TreeDepthT> tmp_node_levels(num_nodes, stream);
+    thrust::scatter(
+      rmm::exec_policy(stream),
+      thrust::make_zip_iterator(col_id.begin(), parent_col_id.begin(), d_tree.node_levels.begin()),
+      thrust::make_zip_iterator(col_id.end(), parent_col_id.end(), d_tree.node_levels.end()),
+      scatter_indices.begin(),
+      thrust::make_zip_iterator(
+        tmp_col_id.begin(), tmp_parent_col_id.begin(), tmp_node_levels.begin()));
+    col_id             = std::move(tmp_col_id);
+    parent_col_id      = std::move(tmp_parent_col_id);
+    d_tree.node_levels = std::move(tmp_node_levels);
+    thrust::sequence(rmm::exec_policy(stream), scatter_indices.begin(), scatter_indices.end());
+  }
+
+  // 5. Generate row_offset.
+  auto row_offsets =
+    compute_row_offsets(scatter_indices, std::move(parent_col_id), d_tree, stream, mr);
+  return std::tuple{std::move(col_id), std::move(row_offsets)};
+}
+
+}  // namespace detail
+}  // namespace cudf::io::json
diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp
index 4e930f86591..548f5c4e9e9 100644
--- a/cpp/src/io/json/nested_json.hpp
+++ b/cpp/src/io/json/nested_json.hpp
@@ -21,10 +21,13 @@
 #include <cudf/types.hpp>
 #include <cudf/utilities/bit.hpp>
 #include <cudf/utilities/default_stream.hpp>
+#include <cudf/utilities/error.hpp>
 #include <cudf/utilities/span.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
 
+#include <map>
 #include <vector>
 
 namespace cudf::io::json {
@@ -58,7 +61,7 @@ using NodeT = char;
 
 /// Type used to index into the nodes within the tree of structs, lists, field names, and value
 /// nodes
-using NodeIndexT = uint32_t;
+using NodeIndexT = size_type;
 
 /// Type large enough to represent tree depth from [0, max-tree-depth); may be an unsigned type
 using TreeDepthT = StackLevelT;
@@ -67,14 +70,14 @@ using TreeDepthT = StackLevelT;
  * @brief Struct that encapsulate all information of a columnar tree representation.
  */
 struct tree_meta_t {
-  std::vector<NodeT> node_categories;
-  std::vector<NodeIndexT> parent_node_ids;
-  std::vector<TreeDepthT> node_levels;
-  std::vector<SymbolOffsetT> node_range_begin;
-  std::vector<SymbolOffsetT> node_range_end;
+  rmm::device_uvector<NodeT> node_categories;
+  rmm::device_uvector<NodeIndexT> parent_node_ids;
+  rmm::device_uvector<TreeDepthT> node_levels;
+  rmm::device_uvector<SymbolOffsetT> node_range_begin;
+  rmm::device_uvector<SymbolOffsetT> node_range_end;
 };
 
-constexpr NodeIndexT parent_node_sentinel = std::numeric_limits<NodeIndexT>::max();
+constexpr NodeIndexT parent_node_sentinel = -1;
 
 /**
  * @brief Class of a node (or a node "category") within the tree representation
@@ -125,6 +128,7 @@ struct json_column {
   // Following "items" as the default child column's name of a list column
   // Using the struct's field names
   std::map<std::string, json_column> child_columns;
+  std::vector<std::string> column_order;
 
   // Counting the current number of items in this column
   row_offset_t current_offset = 0;
@@ -140,19 +144,7 @@ struct json_column {
    *
    * @param up_to_row_offset The row offset up to which to fill with nulls.
    */
-  void null_fill(row_offset_t up_to_row_offset)
-  {
-    // Fill all the rows up to up_to_row_offset with "empty"/null rows
-    validity.resize(word_index(up_to_row_offset) + 1);
-    std::fill_n(std::back_inserter(string_offsets),
-                up_to_row_offset - string_offsets.size(),
-                (string_offsets.size() > 0) ? string_offsets.back() : 0);
-    std::fill_n(std::back_inserter(string_lengths), up_to_row_offset - string_lengths.size(), 0);
-    std::fill_n(std::back_inserter(child_offsets),
-                up_to_row_offset + 1 - child_offsets.size(),
-                (child_offsets.size() > 0) ? child_offsets.back() : 0);
-    current_offset = up_to_row_offset;
-  }
+  void null_fill(row_offset_t up_to_row_offset);
 
   /**
    * @brief Recursively iterates through the tree of columns making sure that all child columns of a
@@ -160,26 +152,7 @@ struct json_column {
    *
    * @param min_row_count The minimum number of rows to be filled.
    */
-  void level_child_cols_recursively(row_offset_t min_row_count)
-  {
-    // Fill this columns with nulls up to the given row count
-    null_fill(min_row_count);
-
-    // If this is a struct column, we need to level all its child columns
-    if (type == json_col_t::StructColumn) {
-      for (auto it = std::begin(child_columns); it != std::end(child_columns); it++) {
-        it->second.level_child_cols_recursively(min_row_count);
-      }
-    }
-    // If this is a list column, we need to make sure that its child column levels its children
-    else if (type == json_col_t::ListColumn) {
-      auto it = std::begin(child_columns);
-      // Make that child column fill its child columns up to its own row count
-      if (it != std::end(child_columns)) {
-        it->second.level_child_cols_recursively(it->second.current_offset);
-      }
-    }
-  }
+  void level_child_cols_recursively(row_offset_t min_row_count);
 
   /**
    * @brief Appends the row at the given index to the column, filling all rows between the column's
@@ -193,42 +166,10 @@ struct json_column {
    * the offsets
    */
   void append_row(uint32_t row_index,
-                  json_col_t const& row_type,
+                  json_col_t row_type,
                   uint32_t string_offset,
                   uint32_t string_end,
-                  uint32_t child_count)
-  {
-    // If, thus far, the column's type couldn't be inferred, we infer it to the given type
-    if (type == json_col_t::Unknown) { type = row_type; }
-
-    // We shouldn't run into this, as we shouldn't be asked to append an "unknown" row type
-    // CUDF_EXPECTS(type != json_col_t::Unknown, "Encountered invalid JSON token sequence");
-
-    // Fill all the omitted rows with "empty"/null rows (if needed)
-    null_fill(row_index);
-
-    // Table listing what we intend to use for a given column type and row type combination
-    // col type | row type  => {valid, FAIL, null}
-    // -----------------------------------------------
-    // List     | List      => valid
-    // List     | Struct    => FAIL
-    // List     | String    => null
-    // Struct   | List      => FAIL
-    // Struct   | Struct    => valid
-    // Struct   | String    => null
-    // String   | List      => null
-    // String   | Struct    => null
-    // String   | String    => valid
-    bool const is_valid = (type == row_type);
-    if (static_cast<size_type>(validity.size()) < word_index(current_offset))
-      validity.push_back({});
-    set_bit_unsafe(&validity.back(), intra_word_index(current_offset));
-    valid_count += (is_valid) ? 1U : 0U;
-    string_offsets.push_back(string_offset);
-    string_lengths.push_back(string_end - string_offset);
-    child_offsets.push_back((child_offsets.size() > 0) ? child_offsets.back() + child_count : 0);
-    current_offset++;
-  };
+                  uint32_t child_count);
 };
 
 /**
@@ -243,6 +184,10 @@ enum token_t : PdaTokenT {
   ListBegin,
   /// End-of-list token (on encounter of semantic ']')
   ListEnd,
+  // Beginning-of-struct-member token
+  StructMemberBegin,
+  // End-of-struct-member token
+  StructMemberEnd,
   /// Beginning-of-field-name token (on encounter of first quote)
   FieldNameBegin,
   /// End-of-field-name token (on encounter of a field name's second quote)
@@ -298,6 +243,39 @@ std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> ge
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief Parses the given JSON string and generates a tree representation of the given input.
+ *
+ * @param tokens Vector of token types in the json string
+ * @param token_indices The indices within the input string corresponding to each token
+ * @param stream The CUDA stream to which kernels are dispatched
+ * @param mr Optional, resource with which to allocate
+ * @return A tree representation of the input JSON string as vectors of node type, parent index,
+ * level, begin index, and end index in the input JSON string
+ */
+tree_meta_t get_tree_representation(
+  device_span<PdaTokenT const> tokens,
+  device_span<SymbolOffsetT const> token_indices,
+  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Traverse the tree representation of the JSON input in records orient format and populate
+ * the output columns indices and row offsets within that column.
+ *
+ * @param d_input The JSON input
+ * @param d_tree A tree representation of the input JSON string as vectors of node type, parent
+ * index, level, begin index, and end index in the input JSON string
+ * @param stream The CUDA stream to which kernels are dispatched
+ * @param mr Optional, resource with which to allocate
+ * @return A tuple of the output column indices and the row offsets within each column for each node
+ */
+std::tuple<rmm::device_uvector<NodeIndexT>, rmm::device_uvector<size_type>>
+records_orient_tree_traversal(
+  device_span<SymbolT const> d_input,
+  tree_meta_t& d_tree,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 /**
  * @brief Parses the given JSON string and generates table from the given input.
  *
diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu
index 63b1ad32ba1..552cd1e6167 100644
--- a/cpp/src/io/json/nested_json_gpu.cu
+++ b/cpp/src/io/json/nested_json_gpu.cu
@@ -18,11 +18,15 @@
 
 #include <io/fst/logical_stack.cuh>
 #include <io/fst/lookup_tables.cuh>
-#include <io/utilities/hostdevice_vector.hpp>
+#include <io/utilities/parsing_utils.cuh>
+#include <io/utilities/type_inference.cuh>
 
 #include <cudf/column/column_factories.hpp>
+#include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/detail/utilities/visitor_overload.hpp>
 #include <cudf/detail/valid_if.cuh>
+#include <cudf/io/detail/data_casting.cuh>
 #include <cudf/io/json.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/types.hpp>
@@ -30,10 +34,12 @@
 #include <cudf/utilities/error.hpp>
 #include <cudf/utilities/span.hpp>
 
+#include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
 
 #include <thrust/device_vector.h>
+#include <thrust/iterator/transform_iterator.h>
 #include <thrust/transform.h>
 
 #include <stack>
@@ -129,9 +135,9 @@ std::array<std::array<dfa_states, NUM_SYMBOL_GROUPS>, TT_NUM_STATES> const trans
 // Translation table (i.e., for each transition, what are the symbols that we output)
 std::array<std::array<std::vector<char>, NUM_SYMBOL_GROUPS>, TT_NUM_STATES> const translation_table{
   {/* IN_STATE         {      [      }      ]      "      \    OTHER */
-   /* TT_OOS    */ {{{'{'}, {'['}, {'}'}, {']'}, {'x'}, {'x'}, {'x'}}},
-   /* TT_STR    */ {{{'x'}, {'x'}, {'x'}, {'x'}, {'x'}, {'x'}, {'x'}}},
-   /* TT_ESC    */ {{{'x'}, {'x'}, {'x'}, {'x'}, {'x'}, {'x'}, {'x'}}}}};
+   /* TT_OOS    */ {{{'{'}, {'['}, {'}'}, {']'}, {}, {}, {}}},
+   /* TT_STR    */ {{{}, {}, {}, {}, {}, {}, {}}},
+   /* TT_ESC    */ {{{}, {}, {}, {}, {}, {}, {}}}}};
 
 // The DFA's starting state
 constexpr auto start_state = static_cast<StateT>(TT_OOS);
@@ -472,17 +478,19 @@ auto get_transition_table(bool newline_delimited_json)
  */
 auto get_translation_table()
 {
-  constexpr auto StructBegin    = token_t::StructBegin;
-  constexpr auto StructEnd      = token_t::StructEnd;
-  constexpr auto ListBegin      = token_t::ListBegin;
-  constexpr auto ListEnd        = token_t::ListEnd;
-  constexpr auto FieldNameBegin = token_t::FieldNameBegin;
-  constexpr auto FieldNameEnd   = token_t::FieldNameEnd;
-  constexpr auto StringBegin    = token_t::StringBegin;
-  constexpr auto StringEnd      = token_t::StringEnd;
-  constexpr auto ValueBegin     = token_t::ValueBegin;
-  constexpr auto ValueEnd       = token_t::ValueEnd;
-  constexpr auto ErrorBegin     = token_t::ErrorBegin;
+  constexpr auto StructBegin       = token_t::StructBegin;
+  constexpr auto StructEnd         = token_t::StructEnd;
+  constexpr auto ListBegin         = token_t::ListBegin;
+  constexpr auto ListEnd           = token_t::ListEnd;
+  constexpr auto StructMemberBegin = token_t::StructMemberBegin;
+  constexpr auto StructMemberEnd   = token_t::StructMemberEnd;
+  constexpr auto FieldNameBegin    = token_t::FieldNameBegin;
+  constexpr auto FieldNameEnd      = token_t::FieldNameEnd;
+  constexpr auto StringBegin       = token_t::StringBegin;
+  constexpr auto StringEnd         = token_t::StringEnd;
+  constexpr auto ValueBegin        = token_t::ValueBegin;
+  constexpr auto ValueEnd          = token_t::ValueEnd;
+  constexpr auto ErrorBegin        = token_t::ErrorBegin;
 
   std::array<std::array<std::vector<char>, NUM_PDA_SGIDS>, PD_NUM_STATES> pda_tlt;
   pda_tlt[static_cast<StateT>(pda_state_t::PD_BOV)] = {{                /*ROOT*/
@@ -521,78 +529,80 @@ auto get_translation_table()
                                                         {},              // WHITE_SPACE
                                                         {},              // LINE_BREAK
                                                         {ValueBegin}}};  // OTHER
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_BOA)] = {{                 /*ROOT*/
-                                                        {ErrorBegin},    // OPENING_BRACE
-                                                        {ErrorBegin},    // OPENING_BRACKET
-                                                        {ErrorBegin},    // CLOSING_BRACE
-                                                        {ErrorBegin},    // CLOSING_BRACKET
-                                                        {ErrorBegin},    // QUOTE
-                                                        {ErrorBegin},    // ESCAPE
-                                                        {ErrorBegin},    // COMMA
-                                                        {ErrorBegin},    // COLON
-                                                        {ErrorBegin},    // WHITE_SPACE
-                                                        {ErrorBegin},    // LINE_BREAK
-                                                        {ErrorBegin},    // OTHER
-                                                        /*LIST*/
-                                                        {StructBegin},  // OPENING_BRACE
-                                                        {ListBegin},    // OPENING_BRACKET
-                                                        {ErrorBegin},   // CLOSING_BRACE
-                                                        {ListEnd},      // CLOSING_BRACKET
-                                                        {StringBegin},  // QUOTE
-                                                        {ErrorBegin},   // ESCAPE
-                                                        {ErrorBegin},   // COMMA
-                                                        {ErrorBegin},   // COLON
-                                                        {},             // WHITE_SPACE
-                                                        {},             // LINE_BREAK
-                                                        {ValueBegin},   // OTHER
-                                                        /*STRUCT*/
-                                                        {ErrorBegin},      // OPENING_BRACE
-                                                        {ErrorBegin},      // OPENING_BRACKET
-                                                        {StructEnd},       // CLOSING_BRACE
-                                                        {ErrorBegin},      // CLOSING_BRACKET
-                                                        {FieldNameBegin},  // QUOTE
-                                                        {ErrorBegin},      // ESCAPE
-                                                        {ErrorBegin},      // COMMA
-                                                        {ErrorBegin},      // COLON
-                                                        {},                // WHITE_SPACE
-                                                        {},                // LINE_BREAK
-                                                        {ErrorBegin}}};    // OTHER
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_LON)] = {{                   /*ROOT*/
-                                                        {ErrorBegin},      // OPENING_BRACE
-                                                        {ErrorBegin},      // OPENING_BRACKET
-                                                        {ErrorBegin},      // CLOSING_BRACE
-                                                        {ErrorBegin},      // CLOSING_BRACKET
-                                                        {ErrorBegin},      // QUOTE
-                                                        {ErrorBegin},      // ESCAPE
-                                                        {ErrorBegin},      // COMMA
-                                                        {ErrorBegin},      // COLON
-                                                        {ValueEnd},        // WHITE_SPACE
-                                                        {ValueEnd},        // LINE_BREAK
-                                                        {},                // OTHER
-                                                        /*LIST*/
-                                                        {ErrorBegin},         // OPENING_BRACE
-                                                        {ErrorBegin},         // OPENING_BRACKET
-                                                        {ErrorBegin},         // CLOSING_BRACE
-                                                        {ValueEnd, ListEnd},  // CLOSING_BRACKET
-                                                        {ErrorBegin},         // QUOTE
-                                                        {ErrorBegin},         // ESCAPE
-                                                        {ValueEnd},           // COMMA
-                                                        {ErrorBegin},         // COLON
-                                                        {ValueEnd},           // WHITE_SPACE
-                                                        {ValueEnd},           // LINE_BREAK
-                                                        {},                   // OTHER
-                                                        /*STRUCT*/
-                                                        {ErrorBegin},           // OPENING_BRACE
-                                                        {ErrorBegin},           // OPENING_BRACKET
-                                                        {ValueEnd, StructEnd},  // CLOSING_BRACE
-                                                        {ErrorBegin},           // CLOSING_BRACKET
-                                                        {ErrorBegin},           // QUOTE
-                                                        {ErrorBegin},           // ESCAPE
-                                                        {ValueEnd},             // COMMA
-                                                        {ErrorBegin},           // COLON
-                                                        {ValueEnd},             // WHITE_SPACE
-                                                        {ValueEnd},             // LINE_BREAK
-                                                        {}}};                   // OTHER
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_BOA)] = {
+    {               /*ROOT*/
+     {ErrorBegin},  // OPENING_BRACE
+     {ErrorBegin},  // OPENING_BRACKET
+     {ErrorBegin},  // CLOSING_BRACE
+     {ErrorBegin},  // CLOSING_BRACKET
+     {ErrorBegin},  // QUOTE
+     {ErrorBegin},  // ESCAPE
+     {ErrorBegin},  // COMMA
+     {ErrorBegin},  // COLON
+     {ErrorBegin},  // WHITE_SPACE
+     {ErrorBegin},  // LINE_BREAK
+     {ErrorBegin},  // OTHER
+     /*LIST*/
+     {StructBegin},  // OPENING_BRACE
+     {ListBegin},    // OPENING_BRACKET
+     {ErrorBegin},   // CLOSING_BRACE
+     {ListEnd},      // CLOSING_BRACKET
+     {StringBegin},  // QUOTE
+     {ErrorBegin},   // ESCAPE
+     {ErrorBegin},   // COMMA
+     {ErrorBegin},   // COLON
+     {},             // WHITE_SPACE
+     {},             // LINE_BREAK
+     {ValueBegin},   // OTHER
+     /*STRUCT*/
+     {ErrorBegin},                         // OPENING_BRACE
+     {ErrorBegin},                         // OPENING_BRACKET
+     {StructEnd},                          // CLOSING_BRACE
+     {ErrorBegin},                         // CLOSING_BRACKET
+     {StructMemberBegin, FieldNameBegin},  // QUOTE
+     {ErrorBegin},                         // ESCAPE
+     {ErrorBegin},                         // COMMA
+     {ErrorBegin},                         // COLON
+     {},                                   // WHITE_SPACE
+     {},                                   // LINE_BREAK
+     {ErrorBegin}}};                       // OTHER
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_LON)] = {
+    {               /*ROOT*/
+     {ErrorBegin},  // OPENING_BRACE
+     {ErrorBegin},  // OPENING_BRACKET
+     {ErrorBegin},  // CLOSING_BRACE
+     {ErrorBegin},  // CLOSING_BRACKET
+     {ErrorBegin},  // QUOTE
+     {ErrorBegin},  // ESCAPE
+     {ErrorBegin},  // COMMA
+     {ErrorBegin},  // COLON
+     {ValueEnd},    // WHITE_SPACE
+     {ValueEnd},    // LINE_BREAK
+     {},            // OTHER
+     /*LIST*/
+     {ErrorBegin},         // OPENING_BRACE
+     {ErrorBegin},         // OPENING_BRACKET
+     {ErrorBegin},         // CLOSING_BRACE
+     {ValueEnd, ListEnd},  // CLOSING_BRACKET
+     {ErrorBegin},         // QUOTE
+     {ErrorBegin},         // ESCAPE
+     {ValueEnd},           // COMMA
+     {ErrorBegin},         // COLON
+     {ValueEnd},           // WHITE_SPACE
+     {ValueEnd},           // LINE_BREAK
+     {},                   // OTHER
+     /*STRUCT*/
+     {ErrorBegin},                            // OPENING_BRACE
+     {ErrorBegin},                            // OPENING_BRACKET
+     {ValueEnd, StructMemberEnd, StructEnd},  // CLOSING_BRACE
+     {ErrorBegin},                            // CLOSING_BRACKET
+     {ErrorBegin},                            // QUOTE
+     {ErrorBegin},                            // ESCAPE
+     {ValueEnd, StructMemberEnd},             // COMMA
+     {ErrorBegin},                            // COLON
+     {ValueEnd},                              // WHITE_SPACE
+     {ValueEnd},                              // LINE_BREAK
+     {}}};                                    // OTHER
 
   pda_tlt[static_cast<StateT>(pda_state_t::PD_STR)] = {{              /*ROOT*/
                                                         {},           // OPENING_BRACE
@@ -668,79 +678,81 @@ auto get_translation_table()
                                                         {},    // LINE_BREAK
                                                         {}}};  // OTHER
 
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_PVL)] = {{               /*ROOT*/
-                                                        {ErrorBegin},  // OPENING_BRACE
-                                                        {ErrorBegin},  // OPENING_BRACKET
-                                                        {ErrorBegin},  // CLOSING_BRACE
-                                                        {ErrorBegin},  // CLOSING_BRACKET
-                                                        {ErrorBegin},  // QUOTE
-                                                        {ErrorBegin},  // ESCAPE
-                                                        {ErrorBegin},  // COMMA
-                                                        {ErrorBegin},  // COLON
-                                                        {},            // WHITE_SPACE
-                                                        {},            // LINE_BREAK
-                                                        {ErrorBegin},  // OTHER
-                                                        /*LIST*/
-                                                        {ErrorBegin},  // OPENING_BRACE
-                                                        {ErrorBegin},  // OPENING_BRACKET
-                                                        {ErrorBegin},  // CLOSING_BRACE
-                                                        {ListEnd},     // CLOSING_BRACKET
-                                                        {ErrorBegin},  // QUOTE
-                                                        {ErrorBegin},  // ESCAPE
-                                                        {},            // COMMA
-                                                        {ErrorBegin},  // COLON
-                                                        {},            // WHITE_SPACE
-                                                        {},            // LINE_BREAK
-                                                        {ErrorBegin},  // OTHER
-                                                        /*STRUCT*/
-                                                        {ErrorBegin},    // OPENING_BRACE
-                                                        {ErrorBegin},    // OPENING_BRACKET
-                                                        {StructEnd},     // CLOSING_BRACE
-                                                        {ErrorBegin},    // CLOSING_BRACKET
-                                                        {ErrorBegin},    // QUOTE
-                                                        {ErrorBegin},    // ESCAPE
-                                                        {},              // COMMA
-                                                        {ErrorBegin},    // COLON
-                                                        {},              // WHITE_SPACE
-                                                        {},              // LINE_BREAK
-                                                        {ErrorBegin}}};  // OTHER
-
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_BFN)] = {{               /*ROOT*/
-                                                        {ErrorBegin},  // OPENING_BRACE
-                                                        {ErrorBegin},  // OPENING_BRACKET
-                                                        {ErrorBegin},  // CLOSING_BRACE
-                                                        {ErrorBegin},  // CLOSING_BRACKET
-                                                        {ErrorBegin},  // QUOTE
-                                                        {ErrorBegin},  // ESCAPE
-                                                        {ErrorBegin},  // COMMA
-                                                        {ErrorBegin},  // COLON
-                                                        {ErrorBegin},  // WHITE_SPACE
-                                                        {ErrorBegin},  // LINE_BREAK
-                                                        {ErrorBegin},  // OTHER
-                                                        /*LIST*/
-                                                        {ErrorBegin},  // OPENING_BRACE
-                                                        {ErrorBegin},  // OPENING_BRACKET
-                                                        {ErrorBegin},  // CLOSING_BRACE
-                                                        {ErrorBegin},  // CLOSING_BRACKET
-                                                        {ErrorBegin},  // QUOTE
-                                                        {ErrorBegin},  // ESCAPE
-                                                        {ErrorBegin},  // COMMA
-                                                        {ErrorBegin},  // COLON
-                                                        {ErrorBegin},  // WHITE_SPACE
-                                                        {ErrorBegin},  // LINE_BREAK
-                                                        {ErrorBegin},  // OTHER
-                                                        /*STRUCT*/
-                                                        {ErrorBegin},      // OPENING_BRACE
-                                                        {ErrorBegin},      // OPENING_BRACKET
-                                                        {ErrorBegin},      // CLOSING_BRACE
-                                                        {ErrorBegin},      // CLOSING_BRACKET
-                                                        {FieldNameBegin},  // QUOTE
-                                                        {ErrorBegin},      // ESCAPE
-                                                        {ErrorBegin},      // COMMA
-                                                        {ErrorBegin},      // COLON
-                                                        {},                // WHITE_SPACE
-                                                        {},                // LINE_BREAK
-                                                        {ErrorBegin}}};    // OTHER
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_PVL)] = {
+    {               /*ROOT*/
+     {ErrorBegin},  // OPENING_BRACE
+     {ErrorBegin},  // OPENING_BRACKET
+     {ErrorBegin},  // CLOSING_BRACE
+     {ErrorBegin},  // CLOSING_BRACKET
+     {ErrorBegin},  // QUOTE
+     {ErrorBegin},  // ESCAPE
+     {ErrorBegin},  // COMMA
+     {ErrorBegin},  // COLON
+     {},            // WHITE_SPACE
+     {},            // LINE_BREAK
+     {ErrorBegin},  // OTHER
+     /*LIST*/
+     {ErrorBegin},  // OPENING_BRACE
+     {ErrorBegin},  // OPENING_BRACKET
+     {ErrorBegin},  // CLOSING_BRACE
+     {ListEnd},     // CLOSING_BRACKET
+     {ErrorBegin},  // QUOTE
+     {ErrorBegin},  // ESCAPE
+     {},            // COMMA
+     {ErrorBegin},  // COLON
+     {},            // WHITE_SPACE
+     {},            // LINE_BREAK
+     {ErrorBegin},  // OTHER
+     /*STRUCT*/
+     {ErrorBegin},                  // OPENING_BRACE
+     {ErrorBegin},                  // OPENING_BRACKET
+     {StructMemberEnd, StructEnd},  // CLOSING_BRACE
+     {ErrorBegin},                  // CLOSING_BRACKET
+     {ErrorBegin},                  // QUOTE
+     {ErrorBegin},                  // ESCAPE
+     {StructMemberEnd},             // COMMA
+     {ErrorBegin},                  // COLON
+     {},                            // WHITE_SPACE
+     {},                            // LINE_BREAK
+     {ErrorBegin}}};                // OTHER
+
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_BFN)] = {
+    {               /*ROOT*/
+     {ErrorBegin},  // OPENING_BRACE
+     {ErrorBegin},  // OPENING_BRACKET
+     {ErrorBegin},  // CLOSING_BRACE
+     {ErrorBegin},  // CLOSING_BRACKET
+     {ErrorBegin},  // QUOTE
+     {ErrorBegin},  // ESCAPE
+     {ErrorBegin},  // COMMA
+     {ErrorBegin},  // COLON
+     {ErrorBegin},  // WHITE_SPACE
+     {ErrorBegin},  // LINE_BREAK
+     {ErrorBegin},  // OTHER
+     /*LIST*/
+     {ErrorBegin},  // OPENING_BRACE
+     {ErrorBegin},  // OPENING_BRACKET
+     {ErrorBegin},  // CLOSING_BRACE
+     {ErrorBegin},  // CLOSING_BRACKET
+     {ErrorBegin},  // QUOTE
+     {ErrorBegin},  // ESCAPE
+     {ErrorBegin},  // COMMA
+     {ErrorBegin},  // COLON
+     {ErrorBegin},  // WHITE_SPACE
+     {ErrorBegin},  // LINE_BREAK
+     {ErrorBegin},  // OTHER
+     /*STRUCT*/
+     {ErrorBegin},                         // OPENING_BRACE
+     {ErrorBegin},                         // OPENING_BRACKET
+     {ErrorBegin},                         // CLOSING_BRACE
+     {ErrorBegin},                         // CLOSING_BRACKET
+     {StructMemberBegin, FieldNameBegin},  // QUOTE
+     {ErrorBegin},                         // ESCAPE
+     {ErrorBegin},                         // COMMA
+     {ErrorBegin},                         // COLON
+     {},                                   // WHITE_SPACE
+     {},                                   // LINE_BREAK
+     {ErrorBegin}}};                       // OTHER
 
   pda_tlt[static_cast<StateT>(pda_state_t::PD_FLN)] = {{               /*ROOT*/
                                                         {ErrorBegin},  // OPENING_BRACE
@@ -908,13 +920,107 @@ struct JSONToStackOp {
   }
 };
 
+void json_column::null_fill(row_offset_t up_to_row_offset)
+{
+  // Fill all the rows up to up_to_row_offset with "empty"/null rows
+  validity.resize(word_index(up_to_row_offset) + 1);
+  std::fill_n(std::back_inserter(string_offsets),
+              up_to_row_offset - string_offsets.size(),
+              (string_offsets.size() > 0) ? string_offsets.back() : 0);
+  std::fill_n(std::back_inserter(string_lengths), up_to_row_offset - string_lengths.size(), 0);
+  std::fill_n(std::back_inserter(child_offsets),
+              up_to_row_offset + 1 - child_offsets.size(),
+              (child_offsets.size() > 0) ? child_offsets.back() : 0);
+  current_offset = up_to_row_offset;
+}
+
+void json_column::level_child_cols_recursively(row_offset_t min_row_count)
+{
+  // Fill this columns with nulls up to the given row count
+  null_fill(min_row_count);
+
+  // If this is a struct column, we need to level all its child columns
+  if (type == json_col_t::StructColumn) {
+    for (auto it = std::begin(child_columns); it != std::end(child_columns); it++) {
+      it->second.level_child_cols_recursively(min_row_count);
+    }
+  }
+  // If this is a list column, we need to make sure that its child column levels its children
+  else if (type == json_col_t::ListColumn) {
+    auto it = std::begin(child_columns);
+    // Make that child column fill its child columns up to its own row count
+    if (it != std::end(child_columns)) {
+      it->second.level_child_cols_recursively(it->second.current_offset);
+    }
+  }
+};
+
+void json_column::append_row(uint32_t row_index,
+                             json_col_t row_type,
+                             uint32_t string_offset,
+                             uint32_t string_end,
+                             uint32_t child_count)
+{
+  // If, thus far, the column's type couldn't be inferred, we infer it to the given type
+  if (type == json_col_t::Unknown) {
+    type = row_type;
+  }
+  // If, at some point within a column, we encounter a nested type (list or struct),
+  // we change that column's type to that respective nested type and invalidate all previous rows
+  else if (type == json_col_t::StringColumn &&
+           (row_type == json_col_t::ListColumn || row_type == json_col_t::StructColumn)) {
+    // Change the column type
+    type = row_type;
+
+    // Invalidate all previous entries, as they were _not_ of the nested type to which we just
+    // converted
+    std::fill_n(validity.begin(), validity.size(), 0);
+    valid_count = 0U;
+  }
+  // If this is a nested column but we're trying to insert either (a) a list node into a struct
+  // column or (b) a struct node into a list column, we fail
+  CUDF_EXPECTS(not((type == json_col_t::ListColumn and row_type == json_col_t::StructColumn) or
+                   (type == json_col_t::StructColumn and row_type == json_col_t::ListColumn)),
+               "A mix of lists and structs within the same column is not supported");
+
+  // We shouldn't run into this, as we shouldn't be asked to append an "unknown" row type
+  CUDF_EXPECTS(type != json_col_t::Unknown, "Encountered invalid JSON token sequence");
+
+  // Fill all the omitted rows with "empty"/null rows (if needed)
+  null_fill(row_index);
+
+  // Table listing what we intend to use for a given column type and row type combination
+  // col type | row type  => {valid, FAIL, null}
+  // -----------------------------------------------
+  // List     | List      => valid
+  // List     | Struct    => FAIL
+  // List     | String    => null
+  // Struct   | List      => FAIL
+  // Struct   | Struct    => valid
+  // Struct   | String    => null
+  // String   | List      => valid (we switch col type to list, null'ing all previous rows)
+  // String   | Struct    => valid (we switch col type to list, null'ing all previous rows)
+  // String   | String    => valid
+  bool const is_valid = (type == row_type);
+  if (static_cast<size_type>(validity.size()) < word_index(current_offset)) validity.push_back({});
+  if (is_valid) { set_bit_unsafe(&validity.back(), intra_word_index(current_offset)); }
+  valid_count += (is_valid) ? 1U : 0U;
+  string_offsets.push_back(string_offset);
+  string_lengths.push_back(string_end - string_offset);
+  child_offsets.push_back((child_offsets.size() > 0) ? child_offsets.back() + child_count : 0);
+  current_offset++;
+};
+
 namespace detail {
 
 void get_stack_context(device_span<SymbolT const> json_in,
                        SymbolT* d_top_of_stack,
                        rmm::cuda_stream_view stream)
 {
-  constexpr std::size_t single_item = 1;
+  // Range of encapsulating function that comprises:
+  // -> DFA simulation for filtering out brackets and braces inside of quotes
+  // -> Logical stack to infer the stack context
+  CUDF_FUNC_RANGE();
 
   // Symbol representing the JSON-root (i.e., we're at nesting level '0')
   constexpr StackSymbolT root_symbol = '_';
@@ -922,7 +1028,7 @@ void get_stack_context(device_span<SymbolT const> json_in,
   constexpr StackSymbolT read_symbol = 'x';
 
   // Number of stack operations in the input (i.e., number of '{', '}', '[', ']' outside of quotes)
-  hostdevice_vector<SymbolOffsetT> num_stack_ops(single_item, stream);
+  rmm::device_scalar<SymbolOffsetT> d_num_stack_ops(stream);
 
   // Sequence of stack symbols and their position in the original input (sparse representation)
   rmm::device_uvector<StackSymbolT> stack_ops{json_in.size(), stream};
@@ -945,14 +1051,17 @@ void get_stack_context(device_span<SymbolT const> json_in,
                                   static_cast<SymbolOffsetT>(json_in.size()),
                                   stack_ops.data(),
                                   stack_op_indices.data(),
-                                  num_stack_ops.device_ptr(),
+                                  d_num_stack_ops.data(),
                                   to_stack_op::start_state,
                                   stream);
 
+  // Copy back to actual number of stack operations
+  auto const num_stack_ops = d_num_stack_ops.value(stream);
+
   // stack operations with indices are converted to top of the stack for each character in the input
   fst::sparse_stack_op_to_top_of_stack<StackLevelT>(
     stack_ops.data(),
-    device_span<SymbolOffsetT>{stack_op_indices.data(), stack_op_indices.size()},
+    device_span<SymbolOffsetT>{stack_op_indices.data(), num_stack_ops},
     JSONToStackOp{},
     d_top_of_stack,
     root_symbol,
@@ -967,26 +1076,28 @@ std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> ge
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr)
 {
-  rmm::device_uvector<PdaTokenT> tokens{json_in.size(), stream, mr};
-  rmm::device_uvector<SymbolOffsetT> tokens_indices{json_in.size(), stream, mr};
-  rmm::device_scalar<SymbolOffsetT> num_written_tokens{stream, mr};
+  // Range of encapsulating function that parses to internal columnar data representation
+  CUDF_FUNC_RANGE();
 
   auto const new_line_delimited_json = options.is_enabled_lines();
 
-  // Memory holding the top-of-stack stack context for the input
-  rmm::device_uvector<StackSymbolT> stack_op_indices{json_in.size(), stream};
-
-  // Identify what is the stack context for each input character (is it: JSON-root, struct, or list)
-  get_stack_context(json_in, stack_op_indices.data(), stream);
-
   // Prepare for PDA transducer pass, merging input symbols with stack symbols
-  rmm::device_uvector<PdaSymbolGroupIdT> pda_sgids{json_in.size(), stream};
-  auto zip_in = thrust::make_zip_iterator(json_in.data(), stack_op_indices.data());
-  thrust::transform(rmm::exec_policy(stream),
-                    zip_in,
-                    zip_in + json_in.size(),
-                    pda_sgids.data(),
-                    tokenizer_pda::PdaSymbolToSymbolGroupId{});
+  rmm::device_uvector<PdaSymbolGroupIdT> pda_sgids = [json_in, stream]() {
+    rmm::device_uvector<PdaSymbolGroupIdT> pda_sgids{json_in.size(), stream};
+    // Memory holding the top-of-stack stack context for the input
+    rmm::device_uvector<StackSymbolT> stack_op_indices{json_in.size(), stream};
+
+    // Identify what is the stack context for each input character (JSON-root, struct, or list)
+    get_stack_context(json_in, stack_op_indices.data(), stream);
+
+    auto zip_in = thrust::make_zip_iterator(json_in.data(), stack_op_indices.data());
+    thrust::transform(rmm::exec_policy(stream),
+                      zip_in,
+                      zip_in + json_in.size(),
+                      pda_sgids.data(),
+                      tokenizer_pda::PdaSymbolToSymbolGroupId{});
+    return pda_sgids;
+  }();
 
   // PDA transducer alias
   using ToTokenStreamFstT =
@@ -1006,6 +1117,9 @@ std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> ge
                                        stream};
 
   // Perform a PDA-transducer pass
+  rmm::device_scalar<SymbolOffsetT> num_written_tokens{stream};
+  rmm::device_uvector<PdaTokenT> tokens{json_in.size(), stream, mr};
+  rmm::device_uvector<SymbolOffsetT> tokens_indices{json_in.size(), stream, mr};
   json_to_tokens_fst.Transduce(pda_sgids.begin(),
                                static_cast<SymbolOffsetT>(json_in.size()),
                                tokens.data(),
@@ -1014,7 +1128,7 @@ std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> ge
                                tokenizer_pda::start_state,
                                stream);
 
-  auto num_total_tokens = num_written_tokens.value(stream);
+  auto const num_total_tokens = num_written_tokens.value(stream);
   tokens.resize(num_total_tokens, stream);
   tokens_indices.resize(num_total_tokens, stream);
 
@@ -1030,6 +1144,8 @@ std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> ge
  * @param[in] input The JSON input in host memory
  * @param[in] d_input The JSON input in device memory
  * @param[in] options Parsing options specifying the parsing behaviour
+ * @param[in] include_quote_char Whether to include the original quote chars around string values,
+ * allowing to distinguish string values from numeric and literal values
  * @param[in] stream The CUDA stream to which kernels are dispatched
  * @param[in] mr Optional, resource with which to allocate
  * @return The columnar representation of the data from the given JSON input
@@ -1039,9 +1155,13 @@ void make_json_column(json_column& root_column,
                       host_span<SymbolT const> input,
                       device_span<SymbolT const> d_input,
                       cudf::io::json_reader_options const& options,
+                      bool include_quote_char,
                       rmm::cuda_stream_view stream,
                       rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
+  // Range of encapsulating function that parses to internal columnar data representation
+  CUDF_FUNC_RANGE();
+
   // Default name for a list's child column
   std::string const list_child_name = "element";
 
@@ -1079,12 +1199,20 @@ void make_json_column(json_column& root_column,
     };
   };
 
-  // Skips the quote char if the token is a beginning-of-string or beginning-of-field-name token
-  auto get_token_index = [](PdaTokenT const token, SymbolOffsetT const token_index) {
-    constexpr SymbolOffsetT skip_quote_char = 1;
+  // Depending on whether we want to include the quotes of strings or not, respectively, we:
+  // (a) strip off the beginning quote included in StringBegin and FieldNameBegin or
+  // (b) include of the end quote excluded from in StringEnd and strip off the beginning quote
+  // included FieldNameBegin
+  auto get_token_index = [include_quote_char](PdaTokenT const token,
+                                              SymbolOffsetT const token_index) {
+    constexpr SymbolOffsetT quote_char_size = 1;
     switch (token) {
-      case token_t::StringBegin: return token_index + skip_quote_char;
-      case token_t::FieldNameBegin: return token_index + skip_quote_char;
+      // Optionally strip off quote char included for StringBegin
+      case token_t::StringBegin: return token_index + (include_quote_char ? 0 : quote_char_size);
+      // Optionally include trailing quote char for string values excluded for StringEnd
+      case token_t::StringEnd: return token_index + (include_quote_char ? quote_char_size : 0);
+      // Strip off quote char included for FieldNameBegin
+      case token_t::FieldNameBegin: return token_index + quote_char_size;
       default: return token_index;
     };
   };
@@ -1116,6 +1244,8 @@ void make_json_column(json_column& root_column,
       case token_t::StructEnd: return "StructEnd";
       case token_t::ListBegin: return "ListBegin";
       case token_t::ListEnd: return "ListEnd";
+      case token_t::StructMemberBegin: return "StructMemberBegin";
+      case token_t::StructMemberEnd: return "StructMemberEnd";
       case token_t::FieldNameBegin: return "FieldNameBegin";
       case token_t::FieldNameEnd: return "FieldNameEnd";
       case token_t::StringBegin: return "StringBegin";
@@ -1169,6 +1299,7 @@ void make_json_column(json_column& root_column,
         if (current_data_path.top().column->child_columns.size() == 0) {
           current_data_path.top().column->child_columns.emplace(std::string{list_child_name},
                                                                 json_column{json_col_t::Unknown});
+          current_data_path.top().column->column_order.push_back(list_child_name);
         }
         current_data_path.top().current_selected_col =
           &current_data_path.top().column->child_columns.begin()->second;
@@ -1208,6 +1339,7 @@ void make_json_column(json_column& root_column,
 
     // The field name's column does not exist yet, so we have to append the child column to the
     // struct column
+    struct_col->column_order.push_back(field_name);
     return &struct_col->child_columns.emplace(field_name, json_column{}).first->second;
   };
 
@@ -1401,16 +1533,38 @@ void make_json_column(json_column& root_column,
   root_column.level_child_cols_recursively(root_column.current_offset);
 }
 
+/**
+ * @brief Retrieves the parse_options to be used for type inference and type casting
+ *
+ * @param options The reader options to influence the relevant type inference and type casting
+ * options
+ */
+auto parsing_options(cudf::io::json_reader_options const& options)
+{
+  auto parse_opts = cudf::io::parse_options{',', '\n', '\"', '.'};
+
+  auto const stream     = cudf::default_stream_value;
+  parse_opts.dayfirst   = options.is_enabled_dayfirst();
+  parse_opts.keepquotes = options.is_enabled_keep_quotes();
+  parse_opts.trie_true  = cudf::detail::create_serialized_trie({"true"}, stream);
+  parse_opts.trie_false = cudf::detail::create_serialized_trie({"false"}, stream);
+  parse_opts.trie_na    = cudf::detail::create_serialized_trie({"", "null"}, stream);
+  return parse_opts;
+}
+
 std::pair<std::unique_ptr<column>, std::vector<column_name_info>> json_column_to_cudf_column(
   json_column const& json_col,
   device_span<SymbolT const> d_input,
+  cudf::io::json_reader_options const& options,
+  std::optional<schema_element> schema,
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr)
 {
+  // Range of orchestrating/encapsulating function
+  CUDF_FUNC_RANGE();
+
   auto make_validity =
     [stream, mr](json_column const& json_col) -> std::pair<rmm::device_buffer, size_type> {
-    if (json_col.current_offset == json_col.valid_count) { return {rmm::device_buffer{}, 0}; }
-
     return {rmm::device_buffer{json_col.validity.data(),
                                bitmask_allocation_size_bytes(json_col.current_offset),
                                stream,
@@ -1418,31 +1572,85 @@ std::pair<std::unique_ptr<column>, std::vector<column_name_info>> json_column_to
             json_col.current_offset - json_col.valid_count};
   };
 
+  auto get_child_schema = [schema](auto child_name) -> std::optional<schema_element> {
+    if (schema.has_value()) {
+      auto const result = schema.value().child_types.find(child_name);
+      if (result != std::end(schema.value().child_types)) { return result->second; }
+    }
+    return {};
+  };
+
   switch (json_col.type) {
     case json_col_t::StringColumn: {
-      // move string_offsets to GPU and transform to string column
-      auto const col_size      = json_col.string_offsets.size();
-      using char_length_pair_t = thrust::pair<const char*, size_type>;
+      auto const col_size = json_col.string_offsets.size();
       CUDF_EXPECTS(json_col.string_offsets.size() == json_col.string_lengths.size(),
                    "string offset, string length mismatch");
-      rmm::device_uvector<char_length_pair_t> d_string_data(col_size, stream);
+
+      // Move string_offsets and string_lengths to GPU
       rmm::device_uvector<json_column::row_offset_t> d_string_offsets =
         cudf::detail::make_device_uvector_async(json_col.string_offsets, stream);
       rmm::device_uvector<json_column::row_offset_t> d_string_lengths =
         cudf::detail::make_device_uvector_async(json_col.string_lengths, stream);
+
+      // Prepare iterator that returns (string_offset, string_length)-tuples
       auto offset_length_it =
         thrust::make_zip_iterator(d_string_offsets.begin(), d_string_lengths.begin());
-      thrust::transform(rmm::exec_policy(stream),
-                        offset_length_it,
-                        offset_length_it + col_size,
-                        d_string_data.data(),
-                        [data = d_input.data()] __device__(auto ip) {
-                          return char_length_pair_t{data + thrust::get<0>(ip), thrust::get<1>(ip)};
-                        });
-      auto str_col_ptr                  = make_strings_column(d_string_data, stream, mr);
-      auto [result_bitmask, null_count] = make_validity(json_col);
-      str_col_ptr->set_null_mask(result_bitmask, null_count);
-      return {std::move(str_col_ptr), {{"offsets"}, {"chars"}}};
+
+      // Prepare iterator that returns (string_offset, string_length)-pairs needed by inference
+      auto string_ranges_it =
+        thrust::make_transform_iterator(offset_length_it, [] __device__(auto ip) {
+          return thrust::pair<json_column::row_offset_t, std::size_t>{
+            thrust::get<0>(ip), static_cast<std::size_t>(thrust::get<1>(ip))};
+        });
+
+      // Prepare iterator that returns (string_ptr, string_length)-pairs needed by type conversion
+      auto string_spans_it = thrust::make_transform_iterator(
+        offset_length_it, [data = d_input.data()] __device__(auto ip) {
+          return thrust::pair<const char*, std::size_t>{
+            data + thrust::get<0>(ip), static_cast<std::size_t>(thrust::get<1>(ip))};
+        });
+
+      data_type target_type{};
+
+      if (schema.has_value()) {
+#ifdef NJP_DEBUG_PRINT
+        std::cout << "-> explicit type: "
+                  << (schema.has_value() ? std::to_string(static_cast<int>(schema->type.id()))
+                                         : "n/a");
+#endif
+        target_type = schema.value().type;
+      }
+      // Infer column type, if we don't have an explicit type for it
+      else {
+        target_type = cudf::io::detail::infer_data_type(
+          parsing_options(options).json_view(), d_input, string_ranges_it, col_size, stream);
+      }
+
+      // Convert strings to the inferred data type
+      auto col = experimental::detail::parse_data(string_spans_it,
+                                                  col_size,
+                                                  target_type,
+                                                  make_validity(json_col).first,
+                                                  parsing_options(options).view(),
+                                                  stream,
+                                                  mr);
+
+      // Reset nullable if we do not have nulls
+      // This is to match the existing JSON reader's behaviour:
+      // - Non-string columns will always be returned as nullable
+      // - String columns will be returned as nullable, iff there's at least one null entry
+      if (target_type.id() == type_id::STRING and col->null_count() == 0) {
+        col->set_null_mask(rmm::device_buffer{0, stream, mr}, 0);
+      }
+
+      // For string columns return ["offsets", "char"] schema
+      if (target_type.id() == type_id::STRING) {
+        return {std::move(col), {{"offsets"}, {"chars"}}};
+      }
+      // Non-string leaf-columns (e.g., numeric) do not have child columns in the schema
+      else {
+        return {std::move(col), {}};
+      }
       break;
     }
     case json_col_t::StructColumn: {
@@ -1450,10 +1658,12 @@ std::pair<std::unique_ptr<column>, std::vector<column_name_info>> json_column_to
       std::vector<column_name_info> column_names{};
       size_type num_rows{json_col.current_offset};
       // Create children columns
-      for (auto const& col : json_col.child_columns) {
-        column_names.emplace_back(col.first);
-        auto const& child_col      = col.second;
-        auto [child_column, names] = json_column_to_cudf_column(child_col, d_input, stream, mr);
+      for (auto const& col_name : json_col.column_order) {
+        auto const& col = json_col.child_columns.find(col_name);
+        column_names.emplace_back(col->first);
+        auto const& child_col      = col->second;
+        auto [child_column, names] = json_column_to_cudf_column(
+          child_col, d_input, options, get_child_schema(col_name), stream, mr);
         CUDF_EXPECTS(num_rows == child_column->size(),
                      "All children columns must have the same size");
         child_columns.push_back(std::move(child_column));
@@ -1478,7 +1688,12 @@ std::pair<std::unique_ptr<column>, std::vector<column_name_info>> json_column_to
         std::make_unique<column>(data_type{type_id::INT32}, num_rows, d_offsets.release());
       // Create children column
       auto [child_column, names] =
-        json_column_to_cudf_column(json_col.child_columns.begin()->second, d_input, stream, mr);
+        json_column_to_cudf_column(json_col.child_columns.begin()->second,
+                                   d_input,
+                                   options,
+                                   get_child_schema(json_col.child_columns.begin()->first),
+                                   stream,
+                                   mr);
       column_names.back().children      = names;
       auto [result_bitmask, null_count] = make_validity(json_col);
       return {make_lists_column(num_rows - 1,
@@ -1502,6 +1717,9 @@ table_with_metadata parse_nested_json(host_span<SymbolT const> input,
                                       rmm::cuda_stream_view stream,
                                       rmm::mr::device_memory_resource* mr)
 {
+  // Range of orchestrating/encapsulating function
+  CUDF_FUNC_RANGE();
+
   auto const new_line_delimited_json = options.is_enabled_lines();
 
   // Allocate device memory for the JSON input & copy over to device
@@ -1516,6 +1734,10 @@ table_with_metadata parse_nested_json(host_span<SymbolT const> input,
   constexpr uint32_t token_end_offset_zero      = 0;
   constexpr uint32_t node_init_child_count_zero = 0;
 
+  // Whether the tokenizer stage should keep quote characters for string values
+  // If the tokenizer keeps the quote characters, they may be stripped during type casting
+  constexpr bool include_quote_chars = true;
+
   // We initialize the very root node and root column, which represent the JSON document being
   // parsed. That root node is a list node and that root column is a list column. The column has the
   // root node as its only row. The values parsed from the JSON input will be treated as follows:
@@ -1529,7 +1751,8 @@ table_with_metadata parse_nested_json(host_span<SymbolT const> input,
   // Push the root node onto the stack for the data path
   data_path.push({&root_column, row_offset_zero, nullptr, node_init_child_count_zero});
 
-  make_json_column(root_column, data_path, input, d_input, options, stream, mr);
+  make_json_column(
+    root_column, data_path, input, d_input, options, include_quote_chars, stream, mr);
 
   // data_root refers to the root column of the data represented by the given JSON string
   auto const& data_root =
@@ -1550,14 +1773,61 @@ table_with_metadata parse_nested_json(host_span<SymbolT const> input,
   std::vector<column_name_info> out_column_names;
 
   // Iterate over the struct's child columns and convert to cudf column
-  for (auto const& [col_name, json_col] : root_struct_col.child_columns) {
+  size_type column_index = 0;
+  for (auto const& col_name : root_struct_col.column_order) {
+    auto const& json_col = root_struct_col.child_columns.find(col_name)->second;
     // Insert this columns name into the schema
     out_column_names.emplace_back(col_name);
 
+    std::optional<schema_element> child_schema_element = std::visit(
+      cudf::detail::visitor_overload{
+        [column_index](const std::vector<data_type>& user_dtypes) -> std::optional<schema_element> {
+          auto ret = (static_cast<std::size_t>(column_index) < user_dtypes.size())
+                       ? std::optional<schema_element>{{user_dtypes[column_index]}}
+                       : std::optional<schema_element>{};
+#ifdef NJP_DEBUG_PRINT
+          std::cout << "Column by index: #" << column_index << ", type id: "
+                    << (ret.has_value() ? std::to_string(static_cast<int>(ret->type.id())) : "n/a")
+                    << ", with " << (ret.has_value() ? ret->child_types.size() : 0) << " children"
+                    << "\n";
+#endif
+          return ret;
+        },
+        [col_name](
+          std::map<std::string, data_type> const& user_dtypes) -> std::optional<schema_element> {
+          auto ret = (user_dtypes.find(col_name) != std::end(user_dtypes))
+                       ? std::optional<schema_element>{{user_dtypes.find(col_name)->second}}
+                       : std::optional<schema_element>{};
+#ifdef NJP_DEBUG_PRINT
+          std::cout << "Column by flat name: '" << col_name << "', type id: "
+                    << (ret.has_value() ? std::to_string(static_cast<int>(ret->type.id())) : "n/a")
+                    << ", with " << (ret.has_value() ? ret->child_types.size() : 0) << " children"
+                    << "\n";
+#endif
+          return ret;
+        },
+        [col_name](std::map<std::string, schema_element> const& user_dtypes)
+          -> std::optional<schema_element> {
+          auto ret = (user_dtypes.find(col_name) != std::end(user_dtypes))
+                       ? user_dtypes.find(col_name)->second
+                       : std::optional<schema_element>{};
+#ifdef NJP_DEBUG_PRINT
+          std::cout << "Column by nested name: #" << col_name << ", type id: "
+                    << (ret.has_value() ? std::to_string(static_cast<int>(ret->type.id())) : "n/a")
+                    << ", with " << (ret.has_value() ? ret->child_types.size() : 0) << " children"
+                    << "\n";
+#endif
+          return ret;
+        }},
+      options.get_dtypes());
+
     // Get this JSON column's cudf column and schema info
-    auto [cudf_col, col_name_info]   = json_column_to_cudf_column(json_col, d_input, stream, mr);
+    auto [cudf_col, col_name_info] =
+      json_column_to_cudf_column(json_col, d_input, options, child_schema_element, stream, mr);
     out_column_names.back().children = std::move(col_name_info);
     out_columns.emplace_back(std::move(cudf_col));
+
+    column_index++;
   }
 
   return table_with_metadata{std::make_unique<table>(std::move(out_columns)),
diff --git a/cpp/src/io/json/reader_impl.cu b/cpp/src/io/json/reader_impl.cu
index da6e7621449..48b2af81fcd 100644
--- a/cpp/src/io/json/reader_impl.cu
+++ b/cpp/src/io/json/reader_impl.cu
@@ -432,6 +432,18 @@ std::vector<data_type> get_data_types(json_reader_options const& reader_opts,
                            return it->second;
                          });
           return sorted_dtypes;
+        },
+        [&](const std::map<std::string, schema_element>& dtypes) {
+          std::vector<data_type> sorted_dtypes;
+          std::transform(std::cbegin(column_names),
+                         std::cend(column_names),
+                         std::back_inserter(sorted_dtypes),
+                         [&](auto const& column_name) {
+                           auto const it = dtypes.find(column_name);
+                           CUDF_EXPECTS(it != dtypes.end(), "Must specify types for all columns");
+                           return it->second.type;
+                         });
+          return sorted_dtypes;
         }},
       reader_opts.get_dtypes());
   } else {
diff --git a/cpp/src/io/orc/orc.cpp b/cpp/src/io/orc/orc.cpp
index 3f63c8240ae..2f49416e681 100644
--- a/cpp/src/io/orc/orc.cpp
+++ b/cpp/src/io/orc/orc.cpp
@@ -184,7 +184,8 @@ void ProtobufReader::read(column_statistics& s, size_t maxlen)
                             make_field_reader(6, s.decimal_stats),
                             make_field_reader(7, s.date_stats),
                             make_field_reader(8, s.binary_stats),
-                            make_field_reader(9, s.timestamp_stats));
+                            make_field_reader(9, s.timestamp_stats),
+                            make_field_reader(10, s.has_null));
   function_builder(s, maxlen, op);
 }
 
diff --git a/cpp/src/io/orc/orc.hpp b/cpp/src/io/orc/orc.hpp
index a007750d264..2018024f566 100644
--- a/cpp/src/io/orc/orc.hpp
+++ b/cpp/src/io/orc/orc.hpp
@@ -122,7 +122,7 @@ struct column_statistics {
   std::optional<date_statistics> date_stats;
   std::optional<binary_statistics> binary_stats;
   std::optional<timestamp_statistics> timestamp_stats;
-  // TODO: hasNull (issue #7087)
+  std::optional<bool> has_null;
 };
 
 struct StripeStatistics {
@@ -423,6 +423,12 @@ inline uint8_t ProtobufReader::get<uint8_t>()
   return (m_cur < m_end) ? *m_cur++ : 0;
 };
 
+template <>
+inline bool ProtobufReader::get<bool>()
+{
+  return static_cast<bool>(get<uint8_t>());
+};
+
 template <>
 inline uint32_t ProtobufReader::get<uint32_t>()
 {
diff --git a/cpp/src/io/orc/stats_enc.cu b/cpp/src/io/orc/stats_enc.cu
index fb1db395922..bbff689082e 100644
--- a/cpp/src/io/orc/stats_enc.cu
+++ b/cpp/src/io/orc/stats_enc.cu
@@ -281,7 +281,7 @@ __global__ void __launch_bounds__(encode_threads_per_block)
         //  optional sint64 sum = 3; // sum will store the total length of all strings
         // }
         if (s->chunk.has_minmax && s->chunk.has_sum) {
-          uint32_t sz = (pb_put_uint(cur, 3, s->chunk.sum.i_val) - cur) +
+          uint32_t sz = (pb_put_int(cur, 3, s->chunk.sum.i_val) - cur) +
                         (pb_put_uint(cur, 1, s->chunk.min_value.str_val.length) - cur) +
                         (pb_put_uint(cur, 2, s->chunk.max_value.str_val.length) - cur) +
                         s->chunk.min_value.str_val.length + s->chunk.max_value.str_val.length;
@@ -291,7 +291,7 @@ __global__ void __launch_bounds__(encode_threads_per_block)
             cur, 1, s->chunk.min_value.str_val.ptr, s->chunk.min_value.str_val.length);
           cur = pb_put_binary(
             cur, 2, s->chunk.max_value.str_val.ptr, s->chunk.max_value.str_val.length);
-          cur = pb_put_uint(cur, 3, s->chunk.sum.i_val);
+          cur = pb_put_int(cur, 3, s->chunk.sum.i_val);
         }
         break;
       case dtype_bool:
diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu
index 4fa407f4e88..c9cc0f04b3c 100644
--- a/cpp/src/io/orc/stripe_data.cu
+++ b/cpp/src/io/orc/stripe_data.cu
@@ -1770,7 +1770,8 @@ __global__ void __launch_bounds__(block_size)
               // Adjust seconds only for negative timestamps with positive nanoseconds.
               // Alternative way to represent negative timestamps is with negative nanoseconds
               // in which case the adjustment in not needed.
-              if (seconds < 0 && nanos > 0) { seconds -= 1; }
+              // Comparing with 999999 instead of zero to match the apache writer.
+              if (seconds < 0 and nanos > 999999) { seconds -= 1; }
 
               duration_ns d_ns{nanos};
               duration_s d_s{seconds};
diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu
index 77984ee3c27..cdee066a06a 100644
--- a/cpp/src/io/parquet/page_enc.cu
+++ b/cpp/src/io/parquet/page_enc.cu
@@ -478,15 +478,16 @@ inline __device__ void PackLiteralsShuffle(
   constexpr uint32_t MASK2T = 1;  // mask for 2 thread leader
   constexpr uint32_t MASK4T = 3;  // mask for 4 thread leader
   constexpr uint32_t MASK8T = 7;  // mask for 8 thread leader
-  uint64_t vt;
+  uint64_t v64;
 
   if (t > (count | 0x1f)) { return; }
 
   switch (w) {
     case 1:
-      v |= shuffle_xor(v, 1) << 1;
-      v |= shuffle_xor(v, 2) << 2;
-      v |= shuffle_xor(v, 4) << 4;
+      v |= shuffle_xor(v, 1) << 1;  // grab bit 1 from neighbor
+      v |= shuffle_xor(v, 2) << 2;  // grab bits 2-3 from 2 lanes over
+      v |= shuffle_xor(v, 4) << 4;  // grab bits 4-7 from 4 lanes over
+      // sub-warp leader writes the combined bits
       if (t < count && !(t & MASK8T)) { dst[(t * w) >> 3] = v; }
       return;
     case 2:
@@ -511,14 +512,13 @@ inline __device__ void PackLiteralsShuffle(
     case 5:
       v |= shuffle_xor(v, 1) << 5;
       v |= shuffle_xor(v, 2) << 10;
-      vt = shuffle_xor(v, 4);
-      vt = vt << 20 | v;
+      v64 = static_cast<uint64_t>(shuffle_xor(v, 4)) << 20 | v;
       if (t < count && !(t & MASK8T)) {
-        dst[(t >> 3) * 5 + 0] = vt;
-        dst[(t >> 3) * 5 + 1] = vt >> 8;
-        dst[(t >> 3) * 5 + 2] = vt >> 16;
-        dst[(t >> 3) * 5 + 3] = vt >> 24;
-        dst[(t >> 3) * 5 + 4] = vt >> 32;
+        dst[(t >> 3) * 5 + 0] = v64;
+        dst[(t >> 3) * 5 + 1] = v64 >> 8;
+        dst[(t >> 3) * 5 + 2] = v64 >> 16;
+        dst[(t >> 3) * 5 + 3] = v64 >> 24;
+        dst[(t >> 3) * 5 + 4] = v64 >> 32;
       }
       return;
     case 6:
@@ -535,14 +535,13 @@ inline __device__ void PackLiteralsShuffle(
       return;
     case 10:
       v |= shuffle_xor(v, 1) << 10;
-      vt = shuffle_xor(v, 2);
-      vt = vt << 20 | v;
+      v64 = static_cast<uint64_t>(shuffle_xor(v, 2)) << 20 | v;
       if (t < count && !(t & MASK4T)) {
-        dst[(t >> 2) * 5 + 0] = vt;
-        dst[(t >> 2) * 5 + 1] = vt >> 8;
-        dst[(t >> 2) * 5 + 2] = vt >> 16;
-        dst[(t >> 2) * 5 + 3] = vt >> 24;
-        dst[(t >> 2) * 5 + 4] = vt >> 32;
+        dst[(t >> 2) * 5 + 0] = v64;
+        dst[(t >> 2) * 5 + 1] = v64 >> 8;
+        dst[(t >> 2) * 5 + 2] = v64 >> 16;
+        dst[(t >> 2) * 5 + 3] = v64 >> 24;
+        dst[(t >> 2) * 5 + 4] = v64 >> 32;
       }
       return;
     case 12:
@@ -560,14 +559,13 @@ inline __device__ void PackLiteralsShuffle(
       }
       return;
     case 20:
-      vt = shuffle_xor(v, 1);
-      vt = vt << 20 | v;
+      v64 = static_cast<uint64_t>(shuffle_xor(v, 1)) << 20 | v;
       if (t < count && !(t & MASK2T)) {
-        dst[(t >> 1) * 5 + 0] = vt;
-        dst[(t >> 1) * 5 + 1] = vt >> 8;
-        dst[(t >> 1) * 5 + 2] = vt >> 16;
-        dst[(t >> 1) * 5 + 3] = vt >> 24;
-        dst[(t >> 1) * 5 + 4] = vt >> 32;
+        dst[(t >> 1) * 5 + 0] = v64;
+        dst[(t >> 1) * 5 + 1] = v64 >> 8;
+        dst[(t >> 1) * 5 + 2] = v64 >> 16;
+        dst[(t >> 1) * 5 + 3] = v64 >> 24;
+        dst[(t >> 1) * 5 + 4] = v64 >> 32;
       }
       return;
     case 24:
@@ -590,10 +588,14 @@ inline __device__ void PackLiteralsRoundRobin(
 {
   // Scratch space to temporarily write to. Needed because we will use atomics to write 32 bit
   // words but the destination mem may not be a multiple of 4 bytes.
-  // TODO (dm): This assumes blockdim = 128 and max bits per value = 16. Reduce magic numbers.
-  // To allow up to 24 bit this needs to be sized at 96 words.
-  __shared__ uint32_t scratch[64];
-  if (t < 64) { scratch[t] = 0; }
+  // TODO (dm): This assumes blockdim = 128. Reduce magic numbers.
+  constexpr uint32_t NUM_THREADS  = 128;  // this needs to match gpuEncodePages block_size parameter
+  constexpr uint32_t NUM_BYTES    = (NUM_THREADS * MAX_DICT_BITS) >> 3;
+  constexpr uint32_t SCRATCH_SIZE = NUM_BYTES / sizeof(uint32_t);
+  __shared__ uint32_t scratch[SCRATCH_SIZE];
+  for (uint32_t i = t; i < SCRATCH_SIZE; i += NUM_THREADS) {
+    scratch[i] = 0;
+  }
   __syncthreads();
 
   if (t <= count) {
@@ -615,10 +617,9 @@ inline __device__ void PackLiteralsRoundRobin(
   auto available_bytes = (count * w + 7) / 8;
 
   auto scratch_bytes = reinterpret_cast<char*>(&scratch[0]);
-  if (t < available_bytes) { dst[t] = scratch_bytes[t]; }
-  if (t + 128 < available_bytes) { dst[t + 128] = scratch_bytes[t + 128]; }
-  // would need the following for up to 24 bits
-  // if (t + 256 < available_bytes) { dst[t + 256] = scratch_bytes[t + 256]; }
+  for (uint32_t i = t; i < available_bytes; i += NUM_THREADS) {
+    dst[i] = scratch_bytes[i];
+  }
   __syncthreads();
 }
 
@@ -628,6 +629,7 @@ inline __device__ void PackLiteralsRoundRobin(
 inline __device__ void PackLiterals(
   uint8_t* dst, uint32_t v, uint32_t count, uint32_t w, uint32_t t)
 {
+  if (w > 24) { CUDF_UNREACHABLE("Unsupported bit width"); }
   switch (w) {
     case 1:
     case 2:
@@ -644,11 +646,9 @@ inline __device__ void PackLiterals(
       // bit widths that lie on easy boundaries can be handled either directly
       // (8, 16, 24) or through fast shuffle operations.
       PackLiteralsShuffle(dst, v, count, w, t);
-      break;
+      return;
     default:
-      if (w > 16) { CUDF_UNREACHABLE("Unsupported bit width"); }
-      // less efficient bit packing that uses atomics, but can handle arbitrary
-      // bit widths up to 16. used for repetition and definition level encoding
+      // bit packing that uses atomics, but can handle arbitrary bit widths up to 24.
       PackLiteralsRoundRobin(dst, v, count, w, t);
   }
 }
diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index d0d367df962..8f4cd5c6f3b 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -45,8 +45,11 @@ namespace parquet {
 
 using cudf::io::detail::string_index_pair;
 
+// Largest number of bits to use for dictionary keys
+constexpr int MAX_DICT_BITS = 24;
+
 // Total number of unsigned 24 bit values
-constexpr size_type MAX_DICT_SIZE = (1 << 24) - 1;
+constexpr size_type MAX_DICT_SIZE = (1 << MAX_DICT_BITS) - 1;
 
 /**
  * @brief Struct representing an input column in the file.
diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu
index 2bfd7c1ba4d..9514b053451 100644
--- a/cpp/src/io/parquet/writer_impl.cu
+++ b/cpp/src/io/parquet/writer_impl.cu
@@ -1051,31 +1051,23 @@ auto build_chunk_dictionaries(hostdevice_2dvector<gpu::EncColumnChunk>& chunks,
   // Make decision about which chunks have dictionary
   for (auto& ck : h_chunks) {
     if (not ck.use_dictionary) { continue; }
-    std::tie(ck.use_dictionary, ck.dict_rle_bits) = [&]() {
+    std::tie(ck.use_dictionary, ck.dict_rle_bits) = [&]() -> std::pair<bool, uint8_t> {
       // calculate size of chunk if dictionary is used
 
       // If we have N unique values then the idx for the last value is N - 1 and nbits is the number
       // of bits required to encode indices into the dictionary
       auto max_dict_index = (ck.num_dict_entries > 0) ? ck.num_dict_entries - 1 : 0;
-      auto nbits          = CompactProtocolReader::NumRequiredBits(max_dict_index);
+      auto nbits          = std::max(CompactProtocolReader::NumRequiredBits(max_dict_index), 1);
 
-      // We don't use dictionary if the indices are > 24 bits because that's the maximum bitpacking
-      // bitsize we efficiently support
-      if (nbits > 24) { return std::pair(false, 0); }
-
-      // Only these bit sizes are allowed for RLE encoding because it's compute optimized
-      constexpr auto allowed_bitsizes =
-        std::array<size_type, 12>{1, 2, 3, 4, 5, 6, 8, 10, 12, 16, 20, 24};
-
-      // ceil to (1/2/3/4/5/6/8/10/12/16/20/24)
-      auto rle_bits = *std::lower_bound(allowed_bitsizes.begin(), allowed_bitsizes.end(), nbits);
-      auto rle_byte_size = util::div_rounding_up_safe(ck.num_values * rle_bits, 8);
+      // We don't use dictionary if the indices are > MAX_DICT_BITS bits because that's the maximum
+      // bitpacking bitsize we efficiently support
+      if (nbits > MAX_DICT_BITS) { return {false, 0}; }
 
+      auto rle_byte_size = util::div_rounding_up_safe(ck.num_values * nbits, 8);
       auto dict_enc_size = ck.uniq_data_size + rle_byte_size;
+      if (ck.plain_data_size <= dict_enc_size) { return {false, 0}; }
 
-      bool use_dict = (ck.plain_data_size > dict_enc_size);
-      if (not use_dict) { rle_bits = 0; }
-      return std::pair(use_dict, rle_bits);
+      return {true, nbits};
     }();
   }
 
diff --git a/cpp/src/io/text/multibyte_split.cu b/cpp/src/io/text/multibyte_split.cu
index 5313d7a89ba..133c5fe9826 100644
--- a/cpp/src/io/text/multibyte_split.cu
+++ b/cpp/src/io/text/multibyte_split.cu
@@ -302,6 +302,73 @@ __global__ __launch_bounds__(THREADS_PER_TILE) void multibyte_split_kernel(
   }
 }
 
+__global__ __launch_bounds__(THREADS_PER_TILE) void byte_split_kernel(
+  cudf::size_type base_tile_idx,
+  int64_t base_input_offset,
+  int64_t base_offset_offset,
+  cudf::io::text::detail::scan_tile_state_view<cutoff_offset> tile_output_offsets,
+  char delim,
+  cudf::device_span<char const> chunk_input_chars,
+  int64_t byte_range_end,
+  cudf::split_device_span<int64_t> output_offsets)
+{
+  using InputLoad =
+    cub::BlockLoad<char, THREADS_PER_TILE, ITEMS_PER_THREAD, cub::BLOCK_LOAD_WARP_TRANSPOSE>;
+  using OffsetScan         = cub::BlockScan<cutoff_offset, THREADS_PER_TILE>;
+  using OffsetScanCallback = cudf::io::text::detail::scan_tile_state_callback<cutoff_offset>;
+
+  __shared__ union {
+    typename InputLoad::TempStorage input_load;
+    typename OffsetScan::TempStorage offset_scan;
+  } temp_storage;
+
+  int32_t const tile_idx            = base_tile_idx + blockIdx.x;
+  int32_t const tile_input_offset   = blockIdx.x * ITEMS_PER_TILE;
+  int32_t const thread_input_offset = tile_input_offset + threadIdx.x * ITEMS_PER_THREAD;
+  int32_t const thread_input_size   = chunk_input_chars.size() - thread_input_offset;
+
+  // STEP 1: Load inputs
+
+  char thread_chars[ITEMS_PER_THREAD];
+
+  InputLoad(temp_storage.input_load)
+    .Load(chunk_input_chars.data() + tile_input_offset,
+          thread_chars,
+          chunk_input_chars.size() - tile_input_offset);
+
+  // STEP 2: Flag matches
+
+  cutoff_offset thread_offset;
+  uint32_t thread_match_mask[(ITEMS_PER_THREAD + 31) / 32]{};
+
+  for (int32_t i = 0; i < ITEMS_PER_THREAD; i++) {
+    auto const is_match      = i < thread_input_size and thread_chars[i] == delim;
+    auto const match_end     = base_input_offset + thread_input_offset + i + 1;
+    auto const is_past_range = match_end >= byte_range_end;
+    thread_match_mask[i / 32] |= uint32_t{is_match} << (i % 32);
+    thread_offset = thread_offset + cutoff_offset{is_match, is_past_range};
+  }
+
+  // STEP 3: Scan flags to determine absolute thread output offset
+
+  auto prefix_callback = OffsetScanCallback(tile_output_offsets, tile_idx);
+
+  __syncthreads();  // required before temp_memory re-use
+  OffsetScan(temp_storage.offset_scan).ExclusiveSum(thread_offset, thread_offset, prefix_callback);
+
+  // Step 4: Assign outputs from each thread using match offsets.
+
+  for (int32_t i = 0; i < ITEMS_PER_THREAD; i++) {
+    auto const is_match = (thread_match_mask[i / 32] >> (i % 32)) & 1u;
+    if (is_match && !thread_offset.is_past_end()) {
+      auto const match_end     = base_input_offset + thread_input_offset + i + 1;
+      auto const is_past_range = match_end >= byte_range_end;
+      output_offsets[thread_offset.offset() - base_offset_offset] = match_end;
+      thread_offset = thread_offset + cutoff_offset{true, is_past_range};
+    }
+  }
+}
+
 }  // namespace
 
 namespace cudf {
@@ -615,19 +682,35 @@ std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source
 
     cudaStreamWaitEvent(scan_stream.value(), last_launch_event);
 
-    multibyte_split_kernel<<<tiles_in_launch,
-                             THREADS_PER_TILE,
-                             0,
-                             scan_stream.value()>>>(  //
-      base_tile_idx,
-      chunk_offset,
-      offset_storage.size(),
-      tile_multistates,
-      tile_offsets,
-      {device_delim.data(), static_cast<std::size_t>(device_delim.size())},
-      *chunk,
-      byte_range_end,
-      offset_output);
+    if (delimiter.size() == 1) {
+      // the single-byte case allows for a much more efficient kernel, so we special-case it
+      byte_split_kernel<<<tiles_in_launch,
+                          THREADS_PER_TILE,
+                          0,
+                          scan_stream.value()>>>(  //
+        base_tile_idx,
+        chunk_offset,
+        offset_storage.size(),
+        tile_offsets,
+        delimiter[0],
+        *chunk,
+        byte_range_end,
+        offset_output);
+    } else {
+      multibyte_split_kernel<<<tiles_in_launch,
+                               THREADS_PER_TILE,
+                               0,
+                               scan_stream.value()>>>(  //
+        base_tile_idx,
+        chunk_offset,
+        offset_storage.size(),
+        tile_multistates,
+        tile_offsets,
+        {device_delim.data(), static_cast<std::size_t>(device_delim.size())},
+        *chunk,
+        byte_range_end,
+        offset_output);
+    }
 
     // load the next chunk
     auto next_chunk = reader->get_next_chunk(ITEMS_PER_CHUNK, read_stream);
diff --git a/cpp/src/io/utilities/column_type_histogram.hpp b/cpp/src/io/utilities/column_type_histogram.hpp
index 99762595693..8bd2d3a89cf 100644
--- a/cpp/src/io/utilities/column_type_histogram.hpp
+++ b/cpp/src/io/utilities/column_type_histogram.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,14 +25,14 @@ namespace io {
  * @brief Per-column histogram struct containing detected occurrences of each dtype
  */
 struct column_type_histogram {
-  cudf::size_type null_count;
-  cudf::size_type float_count;
-  cudf::size_type datetime_count;
-  cudf::size_type string_count;
-  cudf::size_type negative_small_int_count;
-  cudf::size_type positive_small_int_count;
-  cudf::size_type big_int_count;
-  cudf::size_type bool_count;
+  cudf::size_type null_count{};
+  cudf::size_type float_count{};
+  cudf::size_type datetime_count{};
+  cudf::size_type string_count{};
+  cudf::size_type negative_small_int_count{};
+  cudf::size_type positive_small_int_count{};
+  cudf::size_type big_int_count{};
+  cudf::size_type bool_count{};
 };
 
 }  // namespace io
diff --git a/cpp/src/io/utilities/parsing_utils.cuh b/cpp/src/io/utilities/parsing_utils.cuh
index a3699acb934..388c9b28001 100644
--- a/cpp/src/io/utilities/parsing_utils.cuh
+++ b/cpp/src/io/utilities/parsing_utils.cuh
@@ -42,6 +42,16 @@ using cudf::device_span;
 namespace cudf {
 namespace io {
 
+/**
+ * @brief Non-owning view for json type inference options
+ */
+struct json_inference_options_view {
+  char quote_char;
+  cudf::detail::trie_view trie_true;
+  cudf::detail::trie_view trie_false;
+  cudf::detail::trie_view trie_na;
+};
+
 /**
  * @brief Structure for holding various options used when parsing and
  * converting CSV/json data to cuDF data type values.
@@ -79,6 +89,14 @@ struct parse_options {
   cudf::detail::optional_trie trie_na;
   bool multi_delimiter;
 
+  [[nodiscard]] json_inference_options_view json_view() const
+  {
+    return {quotechar,
+            cudf::detail::make_trie_view(trie_true),
+            cudf::detail::make_trie_view(trie_false),
+            cudf::detail::make_trie_view(trie_na)};
+  }
+
   [[nodiscard]] parse_options_view view() const
   {
     return {delimiter,
@@ -545,12 +563,22 @@ __inline__ __device__ T decode_value(char const* begin,
                                      char const* end,
                                      parse_options_view const& opts)
 {
+  // If this is a string value, remove quotes
+  if ((thrust::distance(begin, end) >= 2 && *begin == '\"' && *thrust::prev(end) == '\"')) {
+    thrust::advance(begin, 1);
+    thrust::advance(end, -1);
+  }
   return to_timestamp<T>(begin, end, opts.dayfirst);
 }
 
 template <typename T, CUDF_ENABLE_IF(cudf::is_duration<T>())>
 __inline__ __device__ T decode_value(char const* begin, char const* end, parse_options_view const&)
 {
+  // If this is a string value, remove quotes
+  if ((thrust::distance(begin, end) >= 2 && *begin == '\"' && *thrust::prev(end) == '\"')) {
+    thrust::advance(begin, 1);
+    thrust::advance(end, -1);
+  }
   return to_duration<T>(begin, end);
 }
 
diff --git a/cpp/src/io/utilities/type_inference.cuh b/cpp/src/io/utilities/type_inference.cuh
new file mode 100644
index 00000000000..578c72fc316
--- /dev/null
+++ b/cpp/src/io/utilities/type_inference.cuh
@@ -0,0 +1,315 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <io/utilities/column_type_histogram.hpp>
+#include <io/utilities/parsing_utils.cuh>
+#include <io/utilities/trie.cuh>
+
+#include <cudf/types.hpp>
+#include <cudf/utilities/error.hpp>
+#include <cudf/utilities/span.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_scalar.hpp>
+
+#include <thrust/distance.h>
+#include <thrust/tuple.h>
+
+#include <cub/block/block_reduce.cuh>
+
+#include <cstddef>
+
+namespace cudf::io::detail {
+/**
+ * @brief Custom column_type_histogram sum reduction callable
+ */
+struct custom_sum {
+  __device__ inline cudf::io::column_type_histogram operator()(
+    cudf::io::column_type_histogram const& lhs, cudf::io::column_type_histogram const& rhs)
+  {
+    return {lhs.null_count + rhs.null_count,
+            lhs.float_count + rhs.float_count,
+            lhs.datetime_count + rhs.datetime_count,
+            lhs.string_count + rhs.string_count,
+            lhs.negative_small_int_count + rhs.negative_small_int_count,
+            lhs.positive_small_int_count + rhs.positive_small_int_count,
+            lhs.big_int_count + rhs.big_int_count,
+            lhs.bool_count + rhs.bool_count};
+  }
+};
+
+/**
+ * @brief Returns true if the input character is a valid digit.
+ * Supports both decimal and hexadecimal digits (uppercase and lowercase).
+ *
+ * @param c Character to check
+ * @param is_hex Whether to check as a hexadecimal
+ *
+ * @return `true` if it is digit-like, `false` otherwise
+ */
+__device__ __inline__ bool is_digit(char const c, bool const is_hex = false)
+{
+  if (c >= '0' && c <= '9') return true;
+
+  if (is_hex) {
+    if (c >= 'A' && c <= 'F') return true;
+    if (c >= 'a' && c <= 'f') return true;
+  }
+
+  return false;
+}
+
+/**
+ * @brief Returns true if the counters indicate a potentially valid float.
+ * False positives are possible because positions are not taken into account.
+ * For example, field "e.123-" would match the pattern.
+ */
+__device__ __inline__ bool is_like_float(std::size_t len,
+                                         uint32_t digit_cnt,
+                                         uint32_t decimal_cnt,
+                                         uint32_t dash_cnt,
+                                         uint32_t exponent_cnt)
+{
+  // Can't have more than one exponent and one decimal point
+  if (decimal_cnt > 1) return false;
+  if (exponent_cnt > 1) return false;
+  // Without the exponent or a decimal point, this is an integer, not a float
+  if (decimal_cnt == 0 && exponent_cnt == 0) return false;
+
+  // Can only have one '-' per component
+  if (dash_cnt > 1 + exponent_cnt) return false;
+
+  // If anything other than these characters is present, it's not a float
+  if (digit_cnt + decimal_cnt + dash_cnt + exponent_cnt != len) return false;
+
+  // Needs at least 1 digit, 2 if exponent is present
+  if (digit_cnt < 1 + exponent_cnt) return false;
+
+  return true;
+}
+
+/**
+ * @brief Constructs column type histogram for a given column string input `data`.
+ *
+ * @tparam BlockSize Number of threads in each block
+ * @tparam OptionsView Type of inference options view
+ * @tparam ColumnStringIter Iterator type whose `value_type` is a
+ * `thrust::tuple<offset_t, length_t>`, where `offset_t` and `length_t` are of integral type and
+ * `offset_t` needs to be convertible to `std::size_t`.
+ *
+ * @param[in] options View of inference options
+ * @param[in] data JSON string input
+ * @param[in] column_strings_begin The begining of an offset-length tuple sequence
+ * @param[in] size Size of the string input
+ * @param[out] column_info Histogram of column type counters
+ */
+template <int BlockSize, typename OptionsView, typename ColumnStringIter>
+__global__ void infer_column_type_kernel(OptionsView options,
+                                         device_span<char const> data,
+                                         ColumnStringIter column_strings_begin,
+                                         std::size_t size,
+                                         cudf::io::column_type_histogram* column_info)
+{
+  auto thread_type_histogram = cudf::io::column_type_histogram{};
+
+  for (auto idx = threadIdx.x + blockDim.x * blockIdx.x; idx < size;
+       idx += gridDim.x * blockDim.x) {
+    auto const field_offset = thrust::get<0>(*(column_strings_begin + idx));
+    auto const field_len    = thrust::get<1>(*(column_strings_begin + idx));
+    auto const field_begin  = data.begin() + field_offset;
+
+    if (cudf::detail::serialized_trie_contains(
+          options.trie_na, {field_begin, static_cast<std::size_t>(field_len)})) {
+      ++thread_type_histogram.null_count;
+      continue;
+    }
+
+    // Handling strings
+    if (field_len >= 2 and *field_begin == options.quote_char and
+        field_begin[field_len - 1] == options.quote_char) {
+      ++thread_type_histogram.string_count;
+      continue;
+    }
+
+    uint32_t digit_count    = 0;
+    uint32_t decimal_count  = 0;
+    uint32_t slash_count    = 0;
+    uint32_t dash_count     = 0;
+    uint32_t plus_count     = 0;
+    uint32_t colon_count    = 0;
+    uint32_t exponent_count = 0;
+    uint32_t other_count    = 0;
+
+    auto const maybe_hex =
+      (field_len > 2 && field_begin[0] == '0' && field_begin[1] == 'x') ||
+      (field_len > 3 && field_begin[0] == '-' && field_begin[1] == '0' && field_begin[2] == 'x');
+    auto const field_end = field_begin + field_len;
+
+    for (auto pos = field_begin; pos < field_end; ++pos) {
+      if (is_digit(*pos, maybe_hex)) {
+        digit_count++;
+        continue;
+      }
+      // Looking for unique characters that will help identify column types
+      switch (*pos) {
+        case '.': decimal_count++; break;
+        case '-': dash_count++; break;
+        case '+': plus_count++; break;
+        case '/': slash_count++; break;
+        case ':': colon_count++; break;
+        case 'e':
+        case 'E':
+          if (!maybe_hex && pos > field_begin && pos < field_end - 1) exponent_count++;
+          break;
+        default: other_count++; break;
+      }
+    }
+
+    // All characters must be digits in an integer, except for the starting sign and 'x' in the
+    // hexadecimal prefix
+    auto const int_req_number_cnt =
+      static_cast<uint32_t>(field_len) -
+      ((*field_begin == '-' || *field_begin == '+') && field_len > 1) - maybe_hex;
+    if (cudf::detail::serialized_trie_contains(
+          options.trie_true, {field_begin, static_cast<std::size_t>(field_len)}) ||
+        cudf::detail::serialized_trie_contains(
+          options.trie_false, {field_begin, static_cast<std::size_t>(field_len)})) {
+      ++thread_type_histogram.bool_count;
+    } else if (digit_count == int_req_number_cnt) {
+      auto const is_negative = (*field_begin == '-');
+      char const* data_begin = field_begin + (is_negative || (*field_begin == '+'));
+      cudf::size_type* ptr   = cudf::io::gpu::infer_integral_field_counter(
+        data_begin, data_begin + digit_count, is_negative, thread_type_histogram);
+      ++*ptr;
+    } else if (is_like_float(
+                 field_len, digit_count, decimal_count, dash_count + plus_count, exponent_count)) {
+      ++thread_type_histogram.float_count;
+    }
+    // All invalid JSON values are treated as string
+    else {
+      ++thread_type_histogram.string_count;
+    }
+  }  // grid-stride for loop
+
+  using BlockReduce = cub::BlockReduce<cudf::io::column_type_histogram, BlockSize>;
+  __shared__ typename BlockReduce::TempStorage temp_storage;
+  auto const block_type_histogram =
+    BlockReduce(temp_storage).Reduce(thread_type_histogram, custom_sum{});
+  if (threadIdx.x == 0) {
+    atomicAdd(&column_info->null_count, block_type_histogram.null_count);
+    atomicAdd(&column_info->float_count, block_type_histogram.float_count);
+    atomicAdd(&column_info->datetime_count, block_type_histogram.datetime_count);
+    atomicAdd(&column_info->string_count, block_type_histogram.string_count);
+    atomicAdd(&column_info->negative_small_int_count,
+              block_type_histogram.negative_small_int_count);
+    atomicAdd(&column_info->positive_small_int_count,
+              block_type_histogram.positive_small_int_count);
+    atomicAdd(&column_info->big_int_count, block_type_histogram.big_int_count);
+    atomicAdd(&column_info->bool_count, block_type_histogram.bool_count);
+  }
+}
+
+/**
+ * @brief Constructs column type histogram for a given column string input `data`.
+ *
+ * @tparam OptionsView Type of inference options view
+ * @tparam ColumnStringIter Iterator type whose `value_type` is a
+ * `thrust::tuple<offset_t, length_t>`, where `offset_t` and `length_t` are of integral type and
+ * `offset_t` needs to be convertible to `std::size_t`.
+ *
+ * @param options View of inference options
+ * @param data JSON string input
+ * @param column_strings_begin The begining of an offset-length tuple sequence
+ * @param size Size of the string input
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @return A histogram containing column-specific type counters
+ */
+template <typename OptionsView, typename ColumnStringIter>
+cudf::io::column_type_histogram infer_column_type(OptionsView const& options,
+                                                  cudf::device_span<char const> data,
+                                                  ColumnStringIter column_strings_begin,
+                                                  std::size_t const size,
+                                                  rmm::cuda_stream_view stream)
+{
+  constexpr int block_size = 128;
+
+  auto const grid_size = (size + block_size - 1) / block_size;
+  auto d_column_info   = rmm::device_scalar<cudf::io::column_type_histogram>(stream);
+  CUDF_CUDA_TRY(cudaMemsetAsync(
+    d_column_info.data(), 0, sizeof(cudf::io::column_type_histogram), stream.value()));
+
+  infer_column_type_kernel<block_size><<<grid_size, block_size, 0, stream.value()>>>(
+    options, data, column_strings_begin, size, d_column_info.data());
+
+  return d_column_info.value(stream);
+}
+
+/**
+ * @brief Infers data type for a given JSON string input `data`.
+ *
+ * @throw cudf::logic_error if input size is 0
+ * @throw cudf::logic_error if date time is not inferred as string
+ * @throw cudf::logic_error if data type inference failed
+ *
+ * @tparam OptionsView Type of inference options view
+ * @tparam ColumnStringIter Iterator type whose `value_type` is convertible to
+ * `thrust::tuple<device_span, string_view>`
+ *
+ * @param options View of inference options
+ * @param data JSON string input
+ * @param column_strings_begin The begining of an offset-length tuple sequence
+ * @param size Size of the string input
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @return The inferred data type
+ */
+template <typename OptionsView, typename ColumnStringIter>
+cudf::data_type infer_data_type(OptionsView const& options,
+                                device_span<char const> data,
+                                ColumnStringIter column_strings_begin,
+                                std::size_t const size,
+                                rmm::cuda_stream_view stream)
+{
+  CUDF_EXPECTS(size != 0, "No data available for data type inference.\n");
+
+  auto const h_column_info = infer_column_type(options, data, column_strings_begin, size, stream);
+
+  auto get_type_id = [&](auto const& cinfo) {
+    auto int_count_total =
+      cinfo.big_int_count + cinfo.negative_small_int_count + cinfo.positive_small_int_count;
+    if (cinfo.null_count == static_cast<cudf::size_type>(size)) {
+      // Entire column is NULL; allocate the smallest amount of memory
+      return type_id::INT8;
+    } else if (cinfo.string_count > 0) {
+      return type_id::STRING;
+    } else if (cinfo.datetime_count > 0) {
+      CUDF_FAIL("Date time is inferred as string.\n");
+    } else if (cinfo.float_count > 0 || (int_count_total > 0 && cinfo.null_count > 0)) {
+      return type_id::FLOAT64;
+    } else if (cinfo.big_int_count == 0 && int_count_total != 0) {
+      return type_id::INT64;
+    } else if (cinfo.big_int_count != 0 && cinfo.negative_small_int_count != 0) {
+      return type_id::STRING;
+    } else if (cinfo.big_int_count != 0) {
+      return type_id::UINT64;
+    } else if (cinfo.bool_count > 0) {
+      return type_id::BOOL8;
+    }
+    CUDF_FAIL("Data type inference failed.\n");
+  };
+  return cudf::data_type{get_type_id(h_column_info)};
+}
+}  // namespace cudf::io::detail
diff --git a/cpp/src/lists/segmented_sort.cu b/cpp/src/lists/segmented_sort.cu
index 04456d970bf..ea35977e8e4 100644
--- a/cpp/src/lists/segmented_sort.cu
+++ b/cpp/src/lists/segmented_sort.cu
@@ -46,6 +46,17 @@ namespace lists {
 namespace detail {
 
 struct SegmentedSortColumn {
+  /**
+   * @brief Compile time check for allowing radix sort for column type.
+   *
+   * Floating point is not included here because of the special handling of NaNs.
+   */
+  template <typename T>
+  static constexpr bool is_radix_sort_supported()
+  {
+    return std::is_integral<T>();
+  }
+
   template <typename KeyT, typename ValueT, typename OffsetIteratorT>
   void SortPairsAscending(KeyT const* keys_in,
                           KeyT* keys_out,
@@ -133,7 +144,7 @@ struct SegmentedSortColumn {
   }
 
   template <typename T>
-  std::enable_if_t<not is_numeric<T>(), std::unique_ptr<column>> operator()(
+  std::enable_if_t<not is_radix_sort_supported<T>(), std::unique_ptr<column>> operator()(
     column_view const& child,
     column_view const& segment_offsets,
     order column_order,
@@ -152,7 +163,7 @@ struct SegmentedSortColumn {
   }
 
   template <typename T>
-  std::enable_if_t<is_numeric<T>(), std::unique_ptr<column>> operator()(
+  std::enable_if_t<is_radix_sort_supported<T>(), std::unique_ptr<column>> operator()(
     column_view const& child,
     column_view const& offsets,
     order column_order,
@@ -253,14 +264,14 @@ std::unique_ptr<column> sort_lists(lists_column_view const& input,
                     });
   // for numeric columns, calls Faster segmented radix sort path
   // for non-numeric columns, calls segmented_sort_by_key.
-  auto output_child = type_dispatcher(input.child().type(),
-                                      SegmentedSortColumn{},
-                                      input.get_sliced_child(stream),
-                                      output_offset->view(),
-                                      column_order,
-                                      null_precedence,
-                                      stream,
-                                      mr);
+  auto output_child = type_dispatcher<dispatch_storage_type>(input.child().type(),
+                                                             SegmentedSortColumn{},
+                                                             input.get_sliced_child(stream),
+                                                             output_offset->view(),
+                                                             column_order,
+                                                             null_precedence,
+                                                             stream,
+                                                             mr);
 
   auto null_mask = cudf::detail::copy_bitmask(input.parent(), stream, mr);
 
diff --git a/cpp/src/strings/contains.cu b/cpp/src/strings/contains.cu
index 431e9ae26e3..b7d154c4808 100644
--- a/cpp/src/strings/contains.cu
+++ b/cpp/src/strings/contains.cu
@@ -71,7 +71,7 @@ std::unique_ptr<column> contains_impl(strings_column_view const& input,
                                      mr);
   if (input.is_empty()) { return results; }
 
-  auto d_prog = reprog_device::create(pattern, flags, stream);
+  auto d_prog = reprog_device::create(pattern, flags, capture_groups::NON_CAPTURE, stream);
 
   auto d_results       = results->mutable_view().data<bool>();
   auto const d_strings = column_device_view::create(input.parent(), stream);
@@ -114,7 +114,7 @@ std::unique_ptr<column> count_re(
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   // compile regex into device object
-  auto d_prog = reprog_device::create(pattern, flags, stream);
+  auto d_prog = reprog_device::create(pattern, flags, capture_groups::NON_CAPTURE, stream);
 
   auto const d_strings = column_device_view::create(input.parent(), stream);
 
diff --git a/cpp/src/strings/extract/extract.cu b/cpp/src/strings/extract/extract.cu
index 3889f3e2833..76d2f84b1a0 100644
--- a/cpp/src/strings/extract/extract.cu
+++ b/cpp/src/strings/extract/extract.cu
@@ -92,7 +92,7 @@ std::unique_ptr<table> extract(strings_column_view const& input,
                                rmm::mr::device_memory_resource* mr)
 {
   // compile regex into device object
-  auto d_prog = reprog_device::create(pattern, flags, stream);
+  auto d_prog = reprog_device::create(pattern, flags, capture_groups::EXTRACT, stream);
 
   auto const groups = d_prog->group_counts();
   CUDF_EXPECTS(groups > 0, "Group indicators not found in regex pattern");
diff --git a/cpp/src/strings/extract/extract_all.cu b/cpp/src/strings/extract/extract_all.cu
index b3c251ca5b7..76c2788c1be 100644
--- a/cpp/src/strings/extract/extract_all.cu
+++ b/cpp/src/strings/extract/extract_all.cu
@@ -106,7 +106,7 @@ std::unique_ptr<column> extract_all_record(
   auto const d_strings     = column_device_view::create(input.parent(), stream);
 
   // Compile regex into device object.
-  auto d_prog = reprog_device::create(pattern, flags, stream);
+  auto d_prog = reprog_device::create(pattern, flags, capture_groups::EXTRACT, stream);
   // The extract pattern should always include groups.
   auto const groups = d_prog->group_counts();
   CUDF_EXPECTS(groups > 0, "extract_all requires group indicators in the regex pattern.");
diff --git a/cpp/src/strings/regex/regcomp.cpp b/cpp/src/strings/regex/regcomp.cpp
index 9b1013bae09..5b86aedc409 100644
--- a/cpp/src/strings/regex/regcomp.cpp
+++ b/cpp/src/strings/regex/regcomp.cpp
@@ -36,7 +36,7 @@ namespace detail {
 namespace {
 // Bitmask of all operators
 #define OPERATOR_MASK 0200
-enum OperatorType {
+enum OperatorType : int32_t {
   START        = 0200,  // Start, used for marker on stack
   LBRA_NC      = 0203,  // non-capturing group
   CAT          = 0205,  // Concatentation, implicit operator
@@ -176,6 +176,7 @@ class regex_parser {
   char32_t const* _expr_ptr;
   bool _lex_done{false};
   regex_flags const _flags;
+  capture_groups const _capture;
 
   int32_t _id_cclass_w{-1};  // alphanumeric [a-zA-Z0-9_]
   int32_t _id_cclass_W{-1};  // not alphanumeric plus '\n'
@@ -528,7 +529,8 @@ class regex_parser {
           _expr_ptr += 2;
           return LBRA_NC;
         }
-        return LBRA;
+        return (_capture == capture_groups::NON_CAPTURE) ? static_cast<int32_t>(LBRA_NC)
+                                                         : static_cast<int32_t>(LBRA);
       case ')': return RBRA;
       case '^': {
         _chr = chr;
@@ -752,8 +754,11 @@ class regex_parser {
   }
 
  public:
-  regex_parser(const char32_t* pattern, regex_flags const flags, reprog& prog)
-    : _prog(prog), _pattern_begin(pattern), _expr_ptr(pattern), _flags(flags)
+  regex_parser(const char32_t* pattern,
+               regex_flags const flags,
+               capture_groups const capture,
+               reprog& prog)
+    : _prog(prog), _pattern_begin(pattern), _expr_ptr(pattern), _flags(flags), _capture(capture)
   {
     auto const dot_type = is_dotall(_flags) ? ANYNL : ANY;
 
@@ -956,11 +961,14 @@ class regex_compiler {
   }
 
  public:
-  regex_compiler(const char32_t* pattern, regex_flags const flags, reprog& prog)
+  regex_compiler(const char32_t* pattern,
+                 regex_flags const flags,
+                 capture_groups const capture,
+                 reprog& prog)
     : _prog(prog), _last_was_and(false), _bracket_count(0), _flags(flags)
   {
     // Parse pattern into items
-    auto const items = regex_parser(pattern, _flags, _prog).get_items();
+    auto const items = regex_parser(pattern, _flags, capture, _prog).get_items();
 
     int cur_subid{};
     int push_subid{};
@@ -995,28 +1003,29 @@ class regex_compiler {
     CUDF_EXPECTS(_bracket_count == 0, "unmatched left parenthesis");
 
     _prog.set_start_inst(_and_stack.top().id_first);
-    _prog.finalize();
+    _prog.optimize();
     _prog.check_for_errors();
+    _prog.finalize();
     _prog.set_groups_count(cur_subid);
   }
 };
 
 // Convert pattern into program
-reprog reprog::create_from(std::string_view pattern, regex_flags const flags)
+reprog reprog::create_from(std::string_view pattern,
+                           regex_flags const flags,
+                           capture_groups const capture)
 {
   reprog rtn;
   auto pattern32 = string_to_char32_vector(pattern);
-  regex_compiler compiler(pattern32.data(), flags, rtn);
+  regex_compiler compiler(pattern32.data(), flags, capture, rtn);
   // for debugging, it can be helpful to call rtn.print(flags) here to dump
   // out the instructions that have been created from the given pattern
   return rtn;
 }
 
-void reprog::finalize()
-{
-  collapse_nops();
-  build_start_ids();
-}
+void reprog::optimize() { collapse_nops(); }
+
+void reprog::finalize() { build_start_ids(); }
 
 void reprog::collapse_nops()
 {
diff --git a/cpp/src/strings/regex/regcomp.h b/cpp/src/strings/regex/regcomp.h
index 10092137c77..7ad7f481436 100644
--- a/cpp/src/strings/regex/regcomp.h
+++ b/cpp/src/strings/regex/regcomp.h
@@ -31,7 +31,7 @@ namespace detail {
  *	03xx are tokens, i.e. operands for operators
  * ```
  */
-enum InstType {
+enum InstType : int32_t {
   CHAR    = 0177,  // Literal character
   RBRA    = 0201,  // Right bracket, )
   LBRA    = 0202,  // Left bracket, (
@@ -109,9 +109,12 @@ class reprog {
    *
    * @param pattern Regex pattern encoded as UTF-8
    * @param flags For interpreting certain `pattern` characters
+   * @param capture For controlling how capture groups are processed
    * @return Instance of reprog
    */
-  static reprog create_from(std::string_view pattern, regex_flags const flags);
+  static reprog create_from(std::string_view pattern,
+                            regex_flags const flags,
+                            capture_groups const capture = capture_groups::EXTRACT);
 
   int32_t add_inst(int32_t type);
   int32_t add_inst(reinst const& inst);
@@ -134,6 +137,7 @@ class reprog {
   void set_start_inst(int32_t id);
   [[nodiscard]] int32_t get_start_inst() const;
 
+  void optimize();
   void finalize();
   void check_for_errors();
 #ifndef NDEBUG
diff --git a/cpp/src/strings/regex/regex.cuh b/cpp/src/strings/regex/regex.cuh
index 11cc1a493a0..98631680800 100644
--- a/cpp/src/strings/regex/regex.cuh
+++ b/cpp/src/strings/regex/regex.cuh
@@ -91,15 +91,19 @@ class reprog_device {
     std::string_view pattern, rmm::cuda_stream_view stream);
 
   /**
-   * @brief Create the device program instance from a regex pattern.
+   * @brief Create the device program instance from a regex pattern
    *
-   * @param pattern The regex pattern to compile.
-   * @param re_flags Regex flags for interpreting special characters in the pattern.
+   * @param pattern The regex pattern to compile
+   * @param re_flags Regex flags for interpreting special characters in the pattern
+   * @param capture Control how capture groups are processed
    * @param stream CUDA stream used for device memory operations and kernel launches
-   * @return The program device object.
+   * @return The program device object
    */
   static std::unique_ptr<reprog_device, std::function<void(reprog_device*)>> create(
-    std::string_view pattern, regex_flags const re_flags, rmm::cuda_stream_view stream);
+    std::string_view pattern,
+    regex_flags const re_flags,
+    capture_groups const capture,
+    rmm::cuda_stream_view stream);
 
   /**
    * @brief Called automatically by the unique_ptr returned from create().
diff --git a/cpp/src/strings/regex/regexec.cu b/cpp/src/strings/regex/regexec.cu
index 5b97271cb21..03247d24ba3 100644
--- a/cpp/src/strings/regex/regexec.cu
+++ b/cpp/src/strings/regex/regexec.cu
@@ -47,15 +47,19 @@ reprog_device::reprog_device(reprog& prog)
 std::unique_ptr<reprog_device, std::function<void(reprog_device*)>> reprog_device::create(
   std::string_view pattern, rmm::cuda_stream_view stream)
 {
-  return reprog_device::create(pattern, regex_flags::MULTILINE, stream);
+  return reprog_device::create(
+    pattern, regex_flags::MULTILINE, capture_groups::NON_CAPTURE, stream);
 }
 
 // Create instance of the reprog that can be passed into a device kernel
 std::unique_ptr<reprog_device, std::function<void(reprog_device*)>> reprog_device::create(
-  std::string_view pattern, regex_flags const flags, rmm::cuda_stream_view stream)
+  std::string_view pattern,
+  regex_flags const flags,
+  capture_groups const capture,
+  rmm::cuda_stream_view stream)
 {
   // compile pattern into host object
-  reprog h_prog = reprog::create_from(pattern, flags);
+  reprog h_prog = reprog::create_from(pattern, flags, capture);
 
   // compute size to hold all the member data
   auto const insts_count   = h_prog.insts_count();
diff --git a/cpp/src/strings/replace/backref_re.cu b/cpp/src/strings/replace/backref_re.cu
index ab391c29ab8..e0a995c26b9 100644
--- a/cpp/src/strings/replace/backref_re.cu
+++ b/cpp/src/strings/replace/backref_re.cu
@@ -114,7 +114,7 @@ std::unique_ptr<column> replace_with_backrefs(strings_column_view const& input,
   CUDF_EXPECTS(!replacement.empty(), "Parameter replacement must not be empty");
 
   // compile regex into device object
-  auto d_prog = reprog_device::create(pattern, flags, stream);
+  auto d_prog = reprog_device::create(pattern, flags, capture_groups::EXTRACT, stream);
 
   // parse the repl string for back-ref indicators
   auto group_count = std::min(99, d_prog->group_counts());  // group count should NOT exceed 99
diff --git a/cpp/src/strings/replace/multi_re.cu b/cpp/src/strings/replace/multi_re.cu
index 149ccb6167d..a5b9ad37e65 100644
--- a/cpp/src/strings/replace/multi_re.cu
+++ b/cpp/src/strings/replace/multi_re.cu
@@ -145,7 +145,7 @@ std::unique_ptr<column> replace_re(
     patterns.size());
   std::transform(
     patterns.begin(), patterns.end(), h_progs.begin(), [flags, stream](auto const& ptn) {
-      return reprog_device::create(ptn, flags, stream);
+      return reprog_device::create(ptn, flags, capture_groups::NON_CAPTURE, stream);
     });
 
   // get the longest regex for the dispatcher
diff --git a/cpp/src/strings/replace/replace_re.cu b/cpp/src/strings/replace/replace_re.cu
index 131fa8decba..fd0049d7c89 100644
--- a/cpp/src/strings/replace/replace_re.cu
+++ b/cpp/src/strings/replace/replace_re.cu
@@ -115,7 +115,7 @@ std::unique_ptr<column> replace_re(
   string_view d_repl(replacement.data(), replacement.size());
 
   // compile regex into device object
-  auto d_prog = reprog_device::create(pattern, flags, stream);
+  auto d_prog = reprog_device::create(pattern, flags, capture_groups::NON_CAPTURE, stream);
 
   auto const maxrepl = max_replace_count.value_or(-1);
 
diff --git a/cpp/src/strings/search/findall.cu b/cpp/src/strings/search/findall.cu
index ab89936f541..73470bde867 100644
--- a/cpp/src/strings/search/findall.cu
+++ b/cpp/src/strings/search/findall.cu
@@ -103,7 +103,7 @@ std::unique_ptr<column> findall(
   auto const d_strings     = column_device_view::create(input.parent(), stream);
 
   // compile regex into device object
-  auto const d_prog = reprog_device::create(pattern, flags, stream);
+  auto const d_prog = reprog_device::create(pattern, flags, capture_groups::NON_CAPTURE, stream);
 
   // Create lists offsets column
   auto offsets   = count_matches(*d_strings, *d_prog, strings_count + 1, stream, mr);
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 3710bc6cdfa..e630e842f4e 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -226,12 +226,13 @@ ConfigureTest(ORC_TEST io/orc_test.cpp)
 ConfigureTest(PARQUET_TEST io/parquet_test.cpp)
 ConfigureTest(JSON_TEST io/json_test.cpp)
 ConfigureTest(JSON_TYPE_CAST_TEST io/json_type_cast_test.cu)
-ConfigureTest(NESTED_JSON_TEST io/nested_json_test.cpp)
+ConfigureTest(NESTED_JSON_TEST io/nested_json_test.cpp io/json_tree.cpp)
 ConfigureTest(ARROW_IO_SOURCE_TEST io/arrow_io_source_test.cpp)
 ConfigureTest(MULTIBYTE_SPLIT_TEST io/text/multibyte_split_test.cpp)
 ConfigureTest(DATA_CHUNK_SOURCE_TEST io/text/data_chunk_source_test.cpp)
 ConfigureTest(LOGICAL_STACK_TEST io/fst/logical_stack_test.cu)
 ConfigureTest(FST_TEST io/fst/fst_test.cu)
+ConfigureTest(TYPE_INFERENCE_TEST io/type_inference_test.cu)
 if(CUDF_ENABLE_ARROW_S3)
   target_compile_definitions(ARROW_IO_SOURCE_TEST PRIVATE "S3_ENABLED")
 endif()
diff --git a/cpp/tests/column/column_test.cu b/cpp/tests/column/column_test.cu
index 801cee285b6..51f37ecac6c 100644
--- a/cpp/tests/column/column_test.cu
+++ b/cpp/tests/column/column_test.cu
@@ -31,6 +31,8 @@
 #include <cudf_test/type_list_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
+#include <rmm/exec_policy.hpp>
+
 #include <thrust/execution_policy.h>
 #include <thrust/sequence.h>
 
@@ -46,8 +48,10 @@ struct TypedColumnTest : public cudf::test::BaseFixture {
   {
     auto typed_data = static_cast<char*>(data.data());
     auto typed_mask = static_cast<char*>(mask.data());
-    thrust::sequence(thrust::device, typed_data, typed_data + data.size());
-    thrust::sequence(thrust::device, typed_mask, typed_mask + mask.size());
+    thrust::sequence(
+      rmm::exec_policy(cudf::default_stream_value), typed_data, typed_data + data.size());
+    thrust::sequence(
+      rmm::exec_policy(cudf::default_stream_value), typed_mask, typed_mask + mask.size());
   }
 
   cudf::size_type num_elements() { return _num_elements; }
@@ -349,7 +353,7 @@ TYPED_TEST(TypedColumnTest, DeviceUvectorConstructorNoMask)
 {
   rmm::device_uvector<TypeParam> original{static_cast<std::size_t>(this->num_elements()),
                                           cudf::default_stream_value};
-  thrust::copy(thrust::device,
+  thrust::copy(rmm::exec_policy(cudf::default_stream_value),
                static_cast<TypeParam*>(this->data.data()),
                static_cast<TypeParam*>(this->data.data()) + this->num_elements(),
                original.begin());
@@ -366,7 +370,7 @@ TYPED_TEST(TypedColumnTest, DeviceUvectorConstructorWithMask)
 {
   rmm::device_uvector<TypeParam> original{static_cast<std::size_t>(this->num_elements()),
                                           cudf::default_stream_value};
-  thrust::copy(thrust::device,
+  thrust::copy(rmm::exec_policy(cudf::default_stream_value),
                static_cast<TypeParam*>(this->data.data()),
                static_cast<TypeParam*>(this->data.data()) + this->num_elements(),
                original.begin());
diff --git a/cpp/tests/column/compound_test.cu b/cpp/tests/column/compound_test.cu
index a6d15b50150..58be2b2f316 100644
--- a/cpp/tests/column/compound_test.cu
+++ b/cpp/tests/column/compound_test.cu
@@ -66,7 +66,7 @@ struct checker_for_level2 {
 TEST_F(CompoundColumnTest, ChildrenLevel1)
 {
   rmm::device_uvector<int32_t> data(1000, cudf::default_stream_value);
-  thrust::sequence(rmm::exec_policy(), data.begin(), data.end(), 1);
+  thrust::sequence(rmm::exec_policy(cudf::default_stream_value), data.begin(), data.end(), 1);
 
   auto null_mask = cudf::create_null_mask(100, cudf::mask_state::UNALLOCATED);
   rmm::device_buffer data1{data.data() + 100, 100 * sizeof(int32_t), cudf::default_stream_value};
@@ -105,14 +105,14 @@ TEST_F(CompoundColumnTest, ChildrenLevel1)
 
   {
     auto column = cudf::column_device_view::create(parent->view());
-    EXPECT_TRUE(thrust::any_of(rmm::exec_policy(),
+    EXPECT_TRUE(thrust::any_of(rmm::exec_policy(cudf::default_stream_value),
                                thrust::make_counting_iterator<int32_t>(0),
                                thrust::make_counting_iterator<int32_t>(100),
                                checker_for_level1<cudf::column_device_view>{*column}));
   }
   {
     auto column = cudf::mutable_column_device_view::create(parent->mutable_view());
-    EXPECT_TRUE(thrust::any_of(rmm::exec_policy(),
+    EXPECT_TRUE(thrust::any_of(rmm::exec_policy(cudf::default_stream_value),
                                thrust::make_counting_iterator<int32_t>(0),
                                thrust::make_counting_iterator<int32_t>(100),
                                checker_for_level1<cudf::mutable_column_device_view>{*column}));
@@ -122,7 +122,7 @@ TEST_F(CompoundColumnTest, ChildrenLevel1)
 TEST_F(CompoundColumnTest, ChildrenLevel2)
 {
   rmm::device_uvector<int32_t> data(1000, cudf::default_stream_value);
-  thrust::sequence(rmm::exec_policy(), data.begin(), data.end(), 1);
+  thrust::sequence(rmm::exec_policy(cudf::default_stream_value), data.begin(), data.end(), 1);
 
   auto null_mask = cudf::create_null_mask(100, cudf::mask_state::UNALLOCATED);
   rmm::device_buffer data11{data.data() + 100, 100 * sizeof(int32_t), cudf::default_stream_value};
@@ -202,14 +202,14 @@ TEST_F(CompoundColumnTest, ChildrenLevel2)
 
   {
     auto column = cudf::column_device_view::create(parent->view());
-    EXPECT_TRUE(thrust::any_of(rmm::exec_policy(),
+    EXPECT_TRUE(thrust::any_of(rmm::exec_policy(cudf::default_stream_value),
                                thrust::make_counting_iterator<int32_t>(0),
                                thrust::make_counting_iterator<int32_t>(100),
                                checker_for_level2<cudf::column_device_view>{*column}));
   }
   {
     auto column = cudf::mutable_column_device_view::create(parent->mutable_view());
-    EXPECT_TRUE(thrust::any_of(rmm::exec_policy(),
+    EXPECT_TRUE(thrust::any_of(rmm::exec_policy(cudf::default_stream_value),
                                thrust::make_counting_iterator<int32_t>(0),
                                thrust::make_counting_iterator<int32_t>(100),
                                checker_for_level2<cudf::mutable_column_device_view>{*column}));
diff --git a/cpp/tests/copying/concatenate_tests.cu b/cpp/tests/copying/concatenate_tests.cu
index f5efd616e3d..c06afe85cff 100644
--- a/cpp/tests/copying/concatenate_tests.cu
+++ b/cpp/tests/copying/concatenate_tests.cu
@@ -522,11 +522,11 @@ TEST_F(OverflowTest, Presliced)
 
     // try and concatenate 4 string columns of with ~1/2 billion chars in each
     auto offsets = cudf::make_fixed_width_column(data_type{type_id::INT32}, num_rows + 1);
-    thrust::fill(rmm::exec_policy(),
+    thrust::fill(rmm::exec_policy(cudf::default_stream_value),
                  offsets->mutable_view().begin<offset_type>(),
                  offsets->mutable_view().end<offset_type>(),
                  string_size);
-    thrust::exclusive_scan(rmm::exec_policy(),
+    thrust::exclusive_scan(rmm::exec_policy(cudf::default_stream_value),
                            offsets->view().begin<offset_type>(),
                            offsets->view().end<offset_type>(),
                            offsets->mutable_view().begin<offset_type>());
@@ -596,11 +596,11 @@ TEST_F(OverflowTest, Presliced)
 
     // try and concatenate 4 struct columns of with ~1/2 billion elements in each
     auto offsets = cudf::make_fixed_width_column(data_type{type_id::INT32}, num_rows + 1);
-    thrust::fill(rmm::exec_policy(),
+    thrust::fill(rmm::exec_policy(cudf::default_stream_value),
                  offsets->mutable_view().begin<offset_type>(),
                  offsets->mutable_view().end<offset_type>(),
                  list_size);
-    thrust::exclusive_scan(rmm::exec_policy(),
+    thrust::exclusive_scan(rmm::exec_policy(cudf::default_stream_value),
                            offsets->view().begin<offset_type>(),
                            offsets->view().end<offset_type>(),
                            offsets->mutable_view().begin<offset_type>());
@@ -688,11 +688,11 @@ TEST_F(OverflowTest, BigColumnsSmallSlices)
     constexpr size_type string_size = inner_size / num_rows;
 
     auto offsets = cudf::make_fixed_width_column(data_type{type_id::INT32}, num_rows + 1);
-    thrust::fill(rmm::exec_policy(),
+    thrust::fill(rmm::exec_policy(cudf::default_stream_value),
                  offsets->mutable_view().begin<offset_type>(),
                  offsets->mutable_view().end<offset_type>(),
                  string_size);
-    thrust::exclusive_scan(rmm::exec_policy(),
+    thrust::exclusive_scan(rmm::exec_policy(cudf::default_stream_value),
                            offsets->view().begin<offset_type>(),
                            offsets->view().end<offset_type>(),
                            offsets->mutable_view().begin<offset_type>());
@@ -715,11 +715,11 @@ TEST_F(OverflowTest, BigColumnsSmallSlices)
     constexpr size_type list_size = inner_size / num_rows;
 
     auto offsets = cudf::make_fixed_width_column(data_type{type_id::INT32}, num_rows + 1);
-    thrust::fill(rmm::exec_policy(),
+    thrust::fill(rmm::exec_policy(cudf::default_stream_value),
                  offsets->mutable_view().begin<offset_type>(),
                  offsets->mutable_view().end<offset_type>(),
                  list_size);
-    thrust::exclusive_scan(rmm::exec_policy(),
+    thrust::exclusive_scan(rmm::exec_policy(cudf::default_stream_value),
                            offsets->view().begin<offset_type>(),
                            offsets->view().end<offset_type>(),
                            offsets->mutable_view().begin<offset_type>());
@@ -742,11 +742,11 @@ TEST_F(OverflowTest, BigColumnsSmallSlices)
     constexpr size_type list_size = inner_size / num_rows;
 
     auto offsets = cudf::make_fixed_width_column(data_type{type_id::INT32}, num_rows + 1);
-    thrust::fill(rmm::exec_policy(),
+    thrust::fill(rmm::exec_policy(cudf::default_stream_value),
                  offsets->mutable_view().begin<offset_type>(),
                  offsets->mutable_view().end<offset_type>(),
                  list_size);
-    thrust::exclusive_scan(rmm::exec_policy(),
+    thrust::exclusive_scan(rmm::exec_policy(cudf::default_stream_value),
                            offsets->view().begin<offset_type>(),
                            offsets->view().end<offset_type>(),
                            offsets->mutable_view().begin<offset_type>());
diff --git a/cpp/tests/copying/detail_gather_tests.cu b/cpp/tests/copying/detail_gather_tests.cu
index 55623eec218..e3cd975ab41 100644
--- a/cpp/tests/copying/detail_gather_tests.cu
+++ b/cpp/tests/copying/detail_gather_tests.cu
@@ -48,7 +48,8 @@ TYPED_TEST(GatherTest, GatherDetailDeviceVectorTest)
 {
   constexpr cudf::size_type source_size{1000};
   rmm::device_uvector<cudf::size_type> gather_map(source_size, cudf::default_stream_value);
-  thrust::sequence(thrust::device, gather_map.begin(), gather_map.end());
+  thrust::sequence(
+    rmm::exec_policy(cudf::default_stream_value), gather_map.begin(), gather_map.end());
 
   auto data = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i; });
   cudf::test::fixed_width_column_wrapper<TypeParam> source_column(data, data + source_size);
diff --git a/cpp/tests/copying/split_tests.cpp b/cpp/tests/copying/split_tests.cpp
index b4add7d4123..85e28a5ec16 100644
--- a/cpp/tests/copying/split_tests.cpp
+++ b/cpp/tests/copying/split_tests.cpp
@@ -1360,7 +1360,8 @@ TEST_F(ContiguousSplitUntypedTest, ValidityEdgeCase)
   }
 }
 
-TEST_F(ContiguousSplitUntypedTest, CalculationOverflow)
+// This test requires about 25GB of device memory when used with the arena allocator
+TEST_F(ContiguousSplitUntypedTest, DISABLED_VeryLargeColumnTest)
 {
   // tests an edge case where buf.elements * buf.element_size overflows an INT32.
   auto col = cudf::make_fixed_width_column(
diff --git a/cpp/tests/device_atomics/device_atomics_test.cu b/cpp/tests/device_atomics/device_atomics_test.cu
index d9788388fb1..17e67da6227 100644
--- a/cpp/tests/device_atomics/device_atomics_test.cu
+++ b/cpp/tests/device_atomics/device_atomics_test.cu
@@ -147,9 +147,11 @@ struct AtomicsTest : public cudf::test::BaseFixture {
     if (block_size == 0) { block_size = vec_size; }
 
     if (is_cas_test) {
-      gpu_atomicCAS_test<<<grid_size, block_size>>>(dev_result.data(), dev_data.data(), vec_size);
+      gpu_atomicCAS_test<<<grid_size, block_size, 0, cudf::default_stream_value.value()>>>(
+        dev_result.data(), dev_data.data(), vec_size);
     } else {
-      gpu_atomic_test<<<grid_size, block_size>>>(dev_result.data(), dev_data.data(), vec_size);
+      gpu_atomic_test<<<grid_size, block_size, 0, cudf::default_stream_value.value()>>>(
+        dev_result.data(), dev_data.data(), vec_size);
     }
 
     auto host_result = cudf::detail::make_host_vector_sync(dev_result);
@@ -296,7 +298,7 @@ struct AtomicsBitwiseOpTest : public cudf::test::BaseFixture {
 
     if (block_size == 0) { block_size = vec_size; }
 
-    gpu_atomic_bitwiseOp_test<T><<<grid_size, block_size>>>(
+    gpu_atomic_bitwiseOp_test<T><<<grid_size, block_size, 0, cudf::default_stream_value.value()>>>(
       reinterpret_cast<T*>(dev_result.data()), reinterpret_cast<T*>(dev_data.data()), vec_size);
 
     auto host_result = cudf::detail::make_host_vector_sync(dev_result);
diff --git a/cpp/tests/error/error_handling_test.cu b/cpp/tests/error/error_handling_test.cu
index a83f72bb9c8..e34cf23eee4 100644
--- a/cpp/tests/error/error_handling_test.cu
+++ b/cpp/tests/error/error_handling_test.cu
@@ -17,6 +17,7 @@
 #include <cudf_test/base_fixture.hpp>
 
 #include <cudf/filling.hpp>
+#include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
 
 #include <rmm/cuda_stream.hpp>
@@ -89,7 +90,7 @@ TEST(DeathTest, CudaFatalError)
 {
   testing::FLAGS_gtest_death_test_style = "threadsafe";
   auto call_kernel                      = []() {
-    kernel<<<1, 1>>>();
+    kernel<<<1, 1, 0, cudf::default_stream_value.value()>>>();
     try {
       CUDF_CUDA_TRY(cudaDeviceSynchronize());
     } catch (const cudf::fatal_cuda_error& fe) {
diff --git a/cpp/tests/fixed_point/fixed_point_tests.cu b/cpp/tests/fixed_point/fixed_point_tests.cu
index c6f2bb6ce47..a4e0736e22f 100644
--- a/cpp/tests/fixed_point/fixed_point_tests.cu
+++ b/cpp/tests/fixed_point/fixed_point_tests.cu
@@ -85,8 +85,10 @@ TEST_F(FixedPointTest, DecimalXXThrustOnDevice)
   std::vector<decimal32> vec1(1000, decimal32{1, scale_type{-2}});
   auto d_vec1 = cudf::detail::make_device_uvector_sync(vec1);
 
-  auto const sum = thrust::reduce(
-    rmm::exec_policy(), std::cbegin(d_vec1), std::cend(d_vec1), decimal32{0, scale_type{-2}});
+  auto const sum = thrust::reduce(rmm::exec_policy(cudf::default_stream_value),
+                                  std::cbegin(d_vec1),
+                                  std::cend(d_vec1),
+                                  decimal32{0, scale_type{-2}});
 
   EXPECT_EQ(static_cast<int32_t>(sum), 1000);
 
@@ -99,8 +101,10 @@ TEST_F(FixedPointTest, DecimalXXThrustOnDevice)
   std::vector<int32_t> vec2(1000);
   std::iota(std::begin(vec2), std::end(vec2), 1);
 
-  auto const res1 = thrust::reduce(
-    rmm::exec_policy(), std::cbegin(d_vec1), std::cend(d_vec1), decimal32{0, scale_type{-2}});
+  auto const res1 = thrust::reduce(rmm::exec_policy(cudf::default_stream_value),
+                                   std::cbegin(d_vec1),
+                                   std::cend(d_vec1),
+                                   decimal32{0, scale_type{-2}});
 
   auto const res2 = std::accumulate(std::cbegin(vec2), std::cend(vec2), 0);
 
@@ -108,7 +112,7 @@ TEST_F(FixedPointTest, DecimalXXThrustOnDevice)
 
   rmm::device_uvector<int32_t> d_vec3(1000, cudf::default_stream_value);
 
-  thrust::transform(rmm::exec_policy(),
+  thrust::transform(rmm::exec_policy(cudf::default_stream_value),
                     std::cbegin(d_vec1),
                     std::cend(d_vec1),
                     std::begin(d_vec3),
diff --git a/cpp/tests/groupby/lists_tests.cu b/cpp/tests/groupby/lists_tests.cu
index c37e78f70f2..45c6b8fe2e6 100644
--- a/cpp/tests/groupby/lists_tests.cu
+++ b/cpp/tests/groupby/lists_tests.cu
@@ -28,6 +28,8 @@
 #include <cudf/types.hpp>
 #include <cudf/utilities/default_stream.hpp>
 
+#include <rmm/exec_policy.hpp>
+
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/logical.h>
 
@@ -122,7 +124,8 @@ inline void test_hash_based_sum_agg(column_view const& keys,
 
   // For each row in expected table `t[0, num_rows)`, there must be a match
   // in the resulting table `t[num_rows, 2 * num_rows)`
-  EXPECT_TRUE(thrust::all_of(thrust::make_counting_iterator<cudf::size_type>(0),
+  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value),
+                             thrust::make_counting_iterator<cudf::size_type>(0),
                              thrust::make_counting_iterator<cudf::size_type>(num_rows),
                              func));
 }
diff --git a/cpp/tests/hash_map/map_test.cu b/cpp/tests/hash_map/map_test.cu
index c88f6a28f79..84e64027c5d 100644
--- a/cpp/tests/hash_map/map_test.cu
+++ b/cpp/tests/hash_map/map_test.cu
@@ -140,16 +140,18 @@ TYPED_TEST(InsertTest, UniqueKeysUniqueValues)
 {
   using map_type  = typename TypeParam::map_type;
   using pair_type = typename TypeParam::pair_type;
-  thrust::tabulate(
-    rmm::exec_policy(), this->pairs.begin(), this->pairs.end(), unique_pair_generator<pair_type>{});
+  thrust::tabulate(rmm::exec_policy(cudf::default_stream_value),
+                   this->pairs.begin(),
+                   this->pairs.end(),
+                   unique_pair_generator<pair_type>{});
   // All pairs should be new inserts
-  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(),
+  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value),
                              this->pairs.begin(),
                              this->pairs.end(),
                              insert_pair<map_type, pair_type>{*this->map}));
 
   // All pairs should be present in the map
-  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(),
+  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value),
                              this->pairs.begin(),
                              this->pairs.end(),
                              find_pair<map_type, pair_type>{*this->map}));
@@ -159,23 +161,23 @@ TYPED_TEST(InsertTest, IdenticalKeysIdenticalValues)
 {
   using map_type  = typename TypeParam::map_type;
   using pair_type = typename TypeParam::pair_type;
-  thrust::tabulate(rmm::exec_policy(),
+  thrust::tabulate(rmm::exec_policy(cudf::default_stream_value),
                    this->pairs.begin(),
                    this->pairs.end(),
                    identical_pair_generator<pair_type>{});
   // Insert a single pair
-  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(),
+  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value),
                              this->pairs.begin(),
                              this->pairs.begin() + 1,
                              insert_pair<map_type, pair_type>{*this->map}));
   // Identical inserts should all return false (no new insert)
-  EXPECT_FALSE(thrust::all_of(rmm::exec_policy(),
+  EXPECT_FALSE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value),
                               this->pairs.begin(),
                               this->pairs.end(),
                               insert_pair<map_type, pair_type>{*this->map}));
 
   // All pairs should be present in the map
-  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(),
+  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value),
                              this->pairs.begin(),
                              this->pairs.end(),
                              find_pair<map_type, pair_type>{*this->map}));
@@ -185,30 +187,30 @@ TYPED_TEST(InsertTest, IdenticalKeysUniqueValues)
 {
   using map_type  = typename TypeParam::map_type;
   using pair_type = typename TypeParam::pair_type;
-  thrust::tabulate(rmm::exec_policy(),
+  thrust::tabulate(rmm::exec_policy(cudf::default_stream_value),
                    this->pairs.begin(),
                    this->pairs.end(),
                    identical_key_generator<pair_type>{});
 
   // Insert a single pair
-  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(),
+  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value),
                              this->pairs.begin(),
                              this->pairs.begin() + 1,
                              insert_pair<map_type, pair_type>{*this->map}));
 
   // Identical key inserts should all return false (no new insert)
-  EXPECT_FALSE(thrust::all_of(rmm::exec_policy(),
+  EXPECT_FALSE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value),
                               this->pairs.begin() + 1,
                               this->pairs.end(),
                               insert_pair<map_type, pair_type>{*this->map}));
 
   // Only first pair is present in map
-  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(),
+  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value),
                              this->pairs.begin(),
                              this->pairs.begin() + 1,
                              find_pair<map_type, pair_type>{*this->map}));
 
-  EXPECT_FALSE(thrust::all_of(rmm::exec_policy(),
+  EXPECT_FALSE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value),
                               this->pairs.begin() + 1,
                               this->pairs.end(),
                               find_pair<map_type, pair_type>{*this->map}));
diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp
index 232aaa51ef3..5a0db6e3c64 100644
--- a/cpp/tests/io/json_test.cpp
+++ b/cpp/tests/io/json_test.cpp
@@ -27,6 +27,9 @@
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_view.hpp>
+#include <cudf/types.hpp>
+
+#include <thrust/iterator/constant_iterator.h>
 
 #include <arrow/io/api.h>
 
@@ -156,14 +159,91 @@ void check_float_column(cudf::column_view const& col,
 struct JsonReaderTest : public cudf::test::BaseFixture {
 };
 
-TEST_F(JsonReaderTest, BasicJsonLines)
+/**
+ * @brief Enum class to be used to specify the test case of parametrized tests
+ */
+enum class json_test_t {
+  // Run test with the existing JSON lines reader using row-orient input data
+  json_lines_row_orient,
+  // Run test with the existing JSON lines reader using record-orient input data
+  json_lines_record_orient,
+  // Run test with the nested JSON lines reader using record-orient input data
+  json_experimental_record_orient
+};
+
+/**
+ * @brief Test fixture for parametrized JSON reader tests
+ */
+struct JsonReaderParamTest : public cudf::test::BaseFixture,
+                             public testing::WithParamInterface<json_test_t> {
+};
+
+/**
+ * @brief Test fixture for parametrized JSON reader tests, testing record orient-only for existing
+ * JSON lines reader and the new experimental reader
+ */
+struct JsonReaderDualTest : public cudf::test::BaseFixture,
+                            public testing::WithParamInterface<json_test_t> {
+};
+
+/**
+ * @brief Generates a JSON lines string that uses the record orient
+ *
+ * @param records An array of a map of key-value pairs
+ * @param record_delimiter The delimiter to be used to delimit a record
+ * @param prefix The prefix prepended to the whole string
+ * @param suffix The suffix to be appended after the whole string
+ * @return The JSON lines string that uses the record orient
+ */
+std::string to_records_orient(std::vector<std::map<std::string, std::string>> const& records,
+                              std::string record_delimiter,
+                              std::string prefix = "",
+                              std::string suffix = "")
 {
-  std::string data = "[1, 1.1]\n[2, 2.2]\n[3, 3.3]\n";
+  std::string result = prefix;
+  for (auto record_it = std::cbegin(records); record_it != std::cend(records); record_it++) {
+    result += "{";
+    for (auto kv_pair_it = std::cbegin(*record_it); kv_pair_it != std::cend(*record_it);
+         kv_pair_it++) {
+      auto const& [key, value] = *kv_pair_it;
+      result += "\"" + key + "\":" + value;
+      result += (kv_pair_it != std::prev(std::end(*record_it))) ? ", " : "";
+    }
+    result += "}";
+    if (record_it != std::prev(std::end(records))) { result += record_delimiter; }
+  }
+  return (result + suffix);
+}
+
+// Parametrize qualifying JSON tests for executing both experimental reader and existing JSON lines
+// reader
+INSTANTIATE_TEST_CASE_P(JsonReaderParamTest,
+                        JsonReaderParamTest,
+                        ::testing::Values(json_test_t::json_lines_row_orient,
+                                          json_test_t::json_lines_record_orient,
+                                          json_test_t::json_experimental_record_orient));
+
+// Parametrize qualifying JSON tests for executing both experimental reader and existing JSON lines
+// reader
+INSTANTIATE_TEST_CASE_P(JsonReaderDualTest,
+                        JsonReaderDualTest,
+                        ::testing::Values(json_test_t::json_lines_record_orient,
+                                          json_test_t::json_experimental_record_orient));
+
+TEST_P(JsonReaderParamTest, BasicJsonLines)
+{
+  auto const test_opt          = GetParam();
+  bool const test_experimental = (test_opt == json_test_t::json_experimental_record_orient);
+  std::string row_orient       = "[1, 1.1]\n[2, 2.2]\n[3, 3.3]\n";
+  std::string record_orient    = to_records_orient(
+    {{{"0", "1"}, {"1", "1.1"}}, {{"0", "2"}, {"1", "2.2"}}, {{"0", "3"}, {"1", "3.3"}}}, "\n");
+  std::string data = (test_opt == json_test_t::json_lines_row_orient) ? row_orient : record_orient;
 
   cudf_io::json_reader_options in_options =
     cudf_io::json_reader_options::builder(cudf_io::source_info{data.data(), data.size()})
       .dtypes(std::vector<data_type>{dtype<int32_t>(), dtype<double>()})
-      .lines(true);
+      .lines(true)
+      .experimental(test_experimental);
   cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
 
   EXPECT_EQ(result.tbl->num_columns(), 2);
@@ -182,19 +262,36 @@ TEST_F(JsonReaderTest, BasicJsonLines)
                                  float64_wrapper{{1.1, 2.2, 3.3}, validity});
 }
 
-TEST_F(JsonReaderTest, FloatingPoint)
+TEST_P(JsonReaderParamTest, FloatingPoint)
 {
+  auto const test_opt          = GetParam();
+  bool const test_experimental = (test_opt == json_test_t::json_experimental_record_orient);
+  std::string row_orient =
+    "[5.6]\n[0.5679e2]\n[1.2e10]\n[0.07e1]\n[3000e-3]\n[12.34e0]\n[3.1e-001]\n[-73."
+    "98007199999998]\n";
+  std::string record_orient = to_records_orient({{{"0", "5.6"}},
+                                                 {{"0", "0.5679e2"}},
+                                                 {{"0", "1.2e10"}},
+                                                 {{"0", "0.07e1"}},
+                                                 {{"0", "3000e-3"}},
+                                                 {{"0", "12.34e0"}},
+                                                 {{"0", "3.1e-001"}},
+                                                 {{"0", "-73.98007199999998"}}},
+                                                "\n");
+  std::string data = (test_opt == json_test_t::json_lines_row_orient) ? row_orient : record_orient;
+
   auto filepath = temp_env->get_temp_dir() + "FloatingPoint.json";
   {
     std::ofstream outfile(filepath, std::ofstream::out);
-    outfile << "[5.6]\n[0.5679e2]\n[1.2e10]\n[0.07e1]\n[3000e-3]\n[12.34e0]\n[3.1e-001]\n[-73."
-               "98007199999998]\n";
+    outfile << data;
   }
 
   cudf_io::json_reader_options in_options =
     cudf_io::json_reader_options::builder(cudf_io::source_info{filepath})
       .dtypes({dtype<float>()})
-      .lines(true);
+      .lines(true)
+      .experimental(test_experimental);
+
   cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
 
   EXPECT_EQ(result.tbl->num_columns(), 1);
@@ -211,14 +308,21 @@ TEST_F(JsonReaderTest, FloatingPoint)
   ASSERT_EQ((1u << result.tbl->get_column(0).size()) - 1, bitmask[0]);
 }
 
-TEST_F(JsonReaderTest, JsonLinesStrings)
+TEST_P(JsonReaderParamTest, JsonLinesStrings)
 {
-  std::string data = "[1, 1.1, \"aa \"]\n[2, 2.2, \"  bbb\"]";
+  auto const test_opt          = GetParam();
+  bool const test_experimental = (test_opt == json_test_t::json_experimental_record_orient);
+  std::string row_orient       = "[1, 1.1, \"aa \"]\n[2, 2.2, \"  bbb\"]";
+  std::string record_orient    = to_records_orient({{{"0", "1"}, {"1", "1.1"}, {"2", R"("aa ")"}},
+                                                 {{"0", "2"}, {"1", "2.2"}, {"2", R"("  bbb")"}}},
+                                                "\n");
+  std::string data = (test_opt == json_test_t::json_lines_row_orient) ? row_orient : record_orient;
 
   cudf_io::json_reader_options in_options =
     cudf_io::json_reader_options::builder(cudf_io::source_info{data.data(), data.size()})
       .dtypes({{"2", dtype<cudf::string_view>()}, {"0", dtype<int32_t>()}, {"1", dtype<double>()}})
-      .lines(true);
+      .lines(true)
+      .experimental(test_experimental);
 
   cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
 
@@ -241,8 +345,12 @@ TEST_F(JsonReaderTest, JsonLinesStrings)
                                  cudf::test::strings_column_wrapper({"aa ", "  bbb"}));
 }
 
-TEST_F(JsonReaderTest, MultiColumn)
+TEST_P(JsonReaderParamTest, MultiColumn)
 {
+  auto const test_opt          = GetParam();
+  bool const test_experimental = (test_opt == json_test_t::json_experimental_record_orient);
+  bool const row_orient        = (test_opt == json_test_t::json_lines_row_orient);
+
   constexpr auto num_rows = 10;
   auto int8_values        = random_values<int8_t>(num_rows);
   auto int16_values       = random_values<int16_t>(num_rows);
@@ -254,10 +362,25 @@ TEST_F(JsonReaderTest, MultiColumn)
   auto filepath = temp_env->get_temp_dir() + "MultiColumn.json";
   {
     std::ostringstream line;
-    for (int i = 0; i < num_rows; ++i) {
-      line << "[" << std::to_string(int8_values[i]) << "," << int16_values[i] << ","
-           << int32_values[i] << "," << int64_values[i] << "," << float32_values[i] << ","
-           << float64_values[i] << "]\n";
+    if (row_orient) {
+      for (int i = 0; i < num_rows; ++i) {
+        line << "[" << std::to_string(int8_values[i]) << "," << int16_values[i] << ","
+             << int32_values[i] << "," << int64_values[i] << "," << float32_values[i] << ","
+             << float64_values[i] << "]\n";
+      }
+    } else {
+      std::vector<std::map<std::string, std::string>> records;
+      for (int i = 0; i < num_rows; ++i) {
+        records.push_back({
+          {"0", std::to_string(int8_values[i])},     //
+          {"1", std::to_string(int16_values[i])},    //
+          {"2", std::to_string(int32_values[i])},    //
+          {"3", std::to_string(int64_values[i])},    //
+          {"4", std::to_string(float32_values[i])},  //
+          {"5", std::to_string(float64_values[i])},  //
+        });
+      }
+      line << to_records_orient(records, "\n");
     }
     std::ofstream outfile(filepath, std::ofstream::out);
     outfile << line.str();
@@ -271,7 +394,8 @@ TEST_F(JsonReaderTest, MultiColumn)
                dtype<int64_t>(),
                dtype<float>(),
                dtype<double>()})
-      .lines(true);
+      .lines(true)
+      .experimental(test_experimental);
   cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
 
   auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
@@ -297,18 +421,33 @@ TEST_F(JsonReaderTest, MultiColumn)
   check_float_column(view.column(5), float64_values, validity);
 }
 
-TEST_F(JsonReaderTest, Booleans)
+TEST_P(JsonReaderParamTest, Booleans)
 {
+  auto const test_opt          = GetParam();
+  bool const test_experimental = (test_opt == json_test_t::json_experimental_record_orient);
+  std::string row_orient       = "[true]\n[true]\n[false]\n[false]\n[true]";
+  std::string record_orient    = to_records_orient(
+    {
+      {{"0", "true"}},
+      {{"0", "true"}},
+      {{"0", "false"}},
+      {{"0", "false"}},
+      {{"0", "true"}},
+    },
+    "\n");
+  std::string data = (test_opt == json_test_t::json_lines_row_orient) ? row_orient : record_orient;
+
   auto filepath = temp_env->get_temp_dir() + "Booleans.json";
   {
     std::ofstream outfile(filepath, std::ofstream::out);
-    outfile << "[true]\n[true]\n[false]\n[false]\n[true]";
+    outfile << data;
   }
 
   cudf_io::json_reader_options in_options =
     cudf_io::json_reader_options::builder(cudf_io::source_info{filepath})
       .dtypes({dtype<bool>()})
-      .lines(true);
+      .lines(true)
+      .experimental(test_experimental);
   cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
 
   // Booleans are the same (integer) data type, but valued at 0 or 1
@@ -322,21 +461,39 @@ TEST_F(JsonReaderTest, Booleans)
                                  bool_wrapper{{true, true, false, false, true}, validity});
 }
 
-TEST_F(JsonReaderTest, Dates)
+TEST_P(JsonReaderParamTest, Dates)
 {
+  auto const test_opt          = GetParam();
+  bool const test_experimental = (test_opt == json_test_t::json_experimental_record_orient);
+  std::string row_orient =
+    "[05/03/2001]\n[31/10/2010]\n[20/10/1994]\n[18/10/1990]\n[1/1/1970]\n"
+    "[18/04/1995]\n[14/07/1994]\n[07/06/2006 11:20:30.400]\n"
+    "[16/09/2005T1:2:30.400PM]\n[2/2/1970]\n";
+  std::string record_orient = to_records_orient({{{"0", R"("05/03/2001")"}},
+                                                 {{"0", R"("31/10/2010")"}},
+                                                 {{"0", R"("20/10/1994")"}},
+                                                 {{"0", R"("18/10/1990")"}},
+                                                 {{"0", R"("1/1/1970")"}},
+                                                 {{"0", R"("18/04/1995")"}},
+                                                 {{"0", R"("14/07/1994")"}},
+                                                 {{"0", R"("07/06/2006 11:20:30.400")"}},
+                                                 {{"0", R"("16/09/2005T1:2:30.400PM")"}},
+                                                 {{"0", R"("2/2/1970")"}}},
+                                                "\n");
+  std::string data = (test_opt == json_test_t::json_lines_row_orient) ? row_orient : record_orient;
+
   auto filepath = temp_env->get_temp_dir() + "Dates.json";
   {
     std::ofstream outfile(filepath, std::ofstream::out);
-    outfile << "[05/03/2001]\n[31/10/2010]\n[20/10/1994]\n[18/10/1990]\n[1/1/1970]\n";
-    outfile << "[18/04/1995]\n[14/07/1994]\n[07/06/2006 11:20:30.400]\n";
-    outfile << "[16/09/2005T1:2:30.400PM]\n[2/2/1970]\n";
+    outfile << data;
   }
 
   cudf_io::json_reader_options in_options =
     cudf_io::json_reader_options::builder(cudf_io::source_info{filepath})
       .dtypes({data_type{type_id::TIMESTAMP_MILLISECONDS}})
       .lines(true)
-      .dayfirst(true);
+      .dayfirst(true)
+      .experimental(test_experimental);
   cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
 
   const auto view = result.tbl->view();
@@ -359,21 +516,39 @@ TEST_F(JsonReaderTest, Dates)
                                                       validity});
 }
 
-TEST_F(JsonReaderTest, Durations)
+TEST_P(JsonReaderParamTest, Durations)
 {
-  auto filepath = temp_env->get_temp_dir() + "Durations.json";
+  auto const test_opt          = GetParam();
+  bool const test_experimental = (test_opt == json_test_t::json_experimental_record_orient);
+  std::string row_orient =
+    "[-2]\n[-1]\n[0]\n"
+    "[1 days]\n[0 days 23:01:00]\n[0 days 00:00:00.000000123]\n"
+    "[0:0:0.000123]\n[0:0:0.000123000]\n[00:00:00.100000001]\n"
+    "[-2147483648]\n[2147483647]\n";
+  std::string record_orient = to_records_orient({{{"0", "-2"}},
+                                                 {{"0", "-1"}},
+                                                 {{"0", "0"}},
+                                                 {{"0", R"("1 days")"}},
+                                                 {{"0", R"("0 days 23:01:00")"}},
+                                                 {{"0", R"("0 days 00:00:00.000000123")"}},
+                                                 {{"0", R"("0:0:0.000123")"}},
+                                                 {{"0", R"("0:0:0.000123000")"}},
+                                                 {{"0", R"("00:00:00.100000001")"}},
+                                                 {{"0", R"(-2147483648)"}},
+                                                 {{"0", R"(2147483647)"}}},
+                                                "\n");
+  std::string data = (test_opt == json_test_t::json_lines_row_orient) ? row_orient : record_orient;
+  auto filepath    = temp_env->get_temp_dir() + "Durations.json";
   {
     std::ofstream outfile(filepath, std::ofstream::out);
-    outfile << "[-2]\n[-1]\n[0]\n";
-    outfile << "[1 days]\n[0 days 23:01:00]\n[0 days 00:00:00.000000123]\n";
-    outfile << "[0:0:0.000123]\n[0:0:0.000123000]\n[00:00:00.100000001]\n";
-    outfile << "[-2147483648]\n[2147483647]\n";
+    outfile << data;
   }
 
   cudf_io::json_reader_options in_options =
     cudf_io::json_reader_options::builder(cudf_io::source_info{filepath})
       .dtypes({data_type{type_id::DURATION_NANOSECONDS}})
-      .lines(true);
+      .lines(true)
+      .experimental(test_experimental);
   cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
 
   const auto view = result.tbl->view();
@@ -398,13 +573,20 @@ TEST_F(JsonReaderTest, Durations)
                                                        validity});
 }
 
-TEST_F(JsonReaderTest, JsonLinesDtypeInference)
+TEST_P(JsonReaderParamTest, JsonLinesDtypeInference)
 {
-  std::string data = "[100, 1.1, \"aa \"]\n[200, 2.2, \"  bbb\"]";
+  auto const test_opt          = GetParam();
+  bool const test_experimental = (test_opt == json_test_t::json_experimental_record_orient);
+  std::string row_orient       = "[100, 1.1, \"aa \"]\n[200, 2.2, \"  bbb\"]";
+  std::string record_orient    = to_records_orient({{{"0", "100"}, {"1", "1.1"}, {"2", R"("aa ")"}},
+                                                 {{"0", "200"}, {"1", "2.2"}, {"2", R"("  bbb")"}}},
+                                                "\n");
+  std::string data = (test_opt == json_test_t::json_lines_row_orient) ? row_orient : record_orient;
 
   cudf_io::json_reader_options in_options =
     cudf_io::json_reader_options::builder(cudf_io::source_info{data.data(), data.size()})
-      .lines(true);
+      .lines(true)
+      .experimental(test_experimental);
 
   cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
 
@@ -427,15 +609,24 @@ TEST_F(JsonReaderTest, JsonLinesDtypeInference)
                                  cudf::test::strings_column_wrapper({"aa ", "  bbb"}));
 }
 
-TEST_F(JsonReaderTest, JsonLinesFileInput)
+TEST_P(JsonReaderParamTest, JsonLinesFileInput)
 {
+  auto const test_opt          = GetParam();
+  bool const test_experimental = (test_opt == json_test_t::json_experimental_record_orient);
+  std::string row_orient       = "[11, 1.1]\n[22, 2.2]";
+  std::string record_orient =
+    to_records_orient({{{"0", "11"}, {"1", "1.1"}}, {{"0", "22"}, {"1", "2.2"}}}, "\n");
+  std::string data = (test_opt == json_test_t::json_lines_row_orient) ? row_orient : record_orient;
+
   const std::string fname = temp_env->get_temp_dir() + "JsonLinesFileTest.json";
   std::ofstream outfile(fname, std::ofstream::out);
-  outfile << "[11, 1.1]\n[22, 2.2]";
+  outfile << data;
   outfile.close();
 
   cudf_io::json_reader_options in_options =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{fname}).lines(true);
+    cudf_io::json_reader_options::builder(cudf_io::source_info{fname})
+      .lines(true)
+      .experimental(test_experimental);
 
   cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
 
@@ -481,15 +672,19 @@ TEST_F(JsonReaderTest, JsonLinesByteRange)
                                  int64_wrapper{{3000, 4000, 5000}, validity});
 }
 
-TEST_F(JsonReaderTest, JsonLinesObjects)
+TEST_P(JsonReaderDualTest, JsonLinesObjects)
 {
-  const std::string fname = temp_env->get_temp_dir() + "JsonLinesObjectsTest.json";
+  auto const test_opt          = GetParam();
+  bool const test_experimental = (test_opt == json_test_t::json_experimental_record_orient);
+  const std::string fname      = temp_env->get_temp_dir() + "JsonLinesObjectsTest.json";
   std::ofstream outfile(fname, std::ofstream::out);
   outfile << " {\"co\\\"l1\" : 1, \"col2\" : 2.0} \n";
   outfile.close();
 
   cudf_io::json_reader_options in_options =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{fname}).lines(true);
+    cudf_io::json_reader_options::builder(cudf_io::source_info{fname})
+      .lines(true)
+      .experimental(test_experimental);
 
   cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
 
@@ -507,12 +702,15 @@ TEST_F(JsonReaderTest, JsonLinesObjects)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(1), float64_wrapper{{2.0}, validity});
 }
 
-TEST_F(JsonReaderTest, JsonLinesObjectsStrings)
+TEST_P(JsonReaderDualTest, JsonLinesObjectsStrings)
 {
-  auto test_json_objects = [](std::string const& data) {
+  auto const test_opt          = GetParam();
+  bool const test_experimental = (test_opt == json_test_t::json_experimental_record_orient);
+  auto test_json_objects       = [test_experimental](std::string const& data) {
     cudf_io::json_reader_options in_options =
       cudf_io::json_reader_options::builder(cudf_io::source_info{data.data(), data.size()})
-        .lines(true);
+        .lines(true)
+        .experimental(test_experimental);
 
     cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
 
@@ -545,15 +743,18 @@ TEST_F(JsonReaderTest, JsonLinesObjectsStrings)
     "{\"col3\":\"bbb\", \"col1\":200, \"col2\":2.2}\n");
 }
 
-TEST_F(JsonReaderTest, JsonLinesObjectsMissingData)
+TEST_P(JsonReaderDualTest, JsonLinesObjectsMissingData)
 {
+  auto const test_opt          = GetParam();
+  bool const test_experimental = (test_opt == json_test_t::json_experimental_record_orient);
   // Note: columns will be ordered based on which fields appear first
   std::string const data =
     "{              \"col2\":1.1, \"col3\":\"aaa\"}\n"
     "{\"col1\":200,               \"col3\":\"bbb\"}\n";
   cudf_io::json_reader_options in_options =
     cudf_io::json_reader_options::builder(cudf_io::source_info{data.data(), data.size()})
-      .lines(true);
+      .lines(true)
+      .experimental(test_experimental);
 
   cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
 
@@ -581,15 +782,18 @@ TEST_F(JsonReaderTest, JsonLinesObjectsMissingData)
                                  cudf::test::strings_column_wrapper({"aaa", "bbb"}));
 }
 
-TEST_F(JsonReaderTest, JsonLinesObjectsOutOfOrder)
+TEST_P(JsonReaderDualTest, JsonLinesObjectsOutOfOrder)
 {
+  auto const test_opt          = GetParam();
+  bool const test_experimental = (test_opt == json_test_t::json_experimental_record_orient);
   std::string const data =
     "{\"col1\":100, \"col2\":1.1, \"col3\":\"aaa\"}\n"
     "{\"col3\":\"bbb\", \"col1\":200, \"col2\":2.2}\n";
 
   cudf_io::json_reader_options in_options =
     cudf_io::json_reader_options::builder(cudf_io::source_info{data.data(), data.size()})
-      .lines(true);
+      .lines(true)
+      .experimental(test_experimental);
 
   cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
 
@@ -675,18 +879,31 @@ TEST_F(JsonReaderTest, ArrowFileSource)
                                  int8_wrapper{{9, 8, 7, 6, 5, 4, 3, 2}, validity});
 }
 
-TEST_F(JsonReaderTest, InvalidFloatingPoint)
+TEST_P(JsonReaderParamTest, InvalidFloatingPoint)
 {
+  auto const test_opt          = GetParam();
+  bool const test_experimental = (test_opt == json_test_t::json_experimental_record_orient);
+  std::string row_orient       = "[1.2e1+]\n[3.4e2-]\n[5.6e3e]\n[7.8e3A]\n[9.0Be1]\n[1C.2]";
+  std::string record_orient    = to_records_orient({{{"0", "1.2e1+"}},
+                                                 {{"0", "3.4e2-"}},
+                                                 {{"0", "5.6e3e"}},
+                                                 {{"0", "7.8e3A"}},
+                                                 {{"0", "9.0Be1"}},
+                                                 {{"0", "1C.2"}}},
+                                                "\n");
+  std::string data = (test_opt == json_test_t::json_lines_row_orient) ? row_orient : record_orient;
+
   const auto filepath = temp_env->get_temp_dir() + "InvalidFloatingPoint.json";
   {
     std::ofstream outfile(filepath, std::ofstream::out);
-    outfile << "[1.2e1+]\n[3.4e2-]\n[5.6e3e]\n[7.8e3A]\n[9.0Be1]\n[1C.2]";
+    outfile << data;
   }
 
   cudf_io::json_reader_options in_options =
     cudf_io::json_reader_options::builder(cudf_io::source_info{filepath})
       .dtypes({dtype<float>()})
-      .lines(true);
+      .lines(true)
+      .experimental(test_experimental);
   cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
 
   EXPECT_EQ(result.tbl->num_columns(), 1);
@@ -700,20 +917,30 @@ TEST_F(JsonReaderTest, InvalidFloatingPoint)
   ASSERT_EQ(0u, col_data.second[0]);
 }
 
-TEST_F(JsonReaderTest, StringInference)
+TEST_P(JsonReaderParamTest, StringInference)
 {
-  std::string buffer = "[\"-1\"]";
+  auto const test_opt          = GetParam();
+  bool const test_experimental = (test_opt == json_test_t::json_experimental_record_orient);
+  std::string row_orient       = "[\"-1\"]";
+  std::string record_orient    = to_records_orient({{{"0", R"("-1")"}}}, "\n");
+  std::string data = (test_opt == json_test_t::json_lines_row_orient) ? row_orient : record_orient;
+
   cudf_io::json_reader_options in_options =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()})
-      .lines(true);
+    cudf_io::json_reader_options::builder(cudf_io::source_info{data.c_str(), data.size()})
+      .lines(true)
+      .experimental(test_experimental);
   cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
 
   EXPECT_EQ(result.tbl->num_columns(), 1);
   EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::STRING);
 }
 
-TEST_F(JsonReaderTest, ParseInRangeIntegers)
+TEST_P(JsonReaderParamTest, ParseInRangeIntegers)
 {
+  auto const test_opt          = GetParam();
+  bool const test_experimental = (test_opt == json_test_t::json_experimental_record_orient);
+  bool const row_orient        = (test_opt == json_test_t::json_lines_row_orient);
+
   constexpr auto num_rows                      = 4;
   std::vector<int64_t> small_int               = {0, -10, 20, -30};
   std::vector<int64_t> less_equal_int64_max    = {std::numeric_limits<int64_t>::max() - 3,
@@ -751,19 +978,41 @@ TEST_F(JsonReaderTest, ParseInRangeIntegers)
   auto filepath = temp_env->get_temp_dir() + "ParseInRangeIntegers.json";
   {
     std::ostringstream line;
-    for (int i = 0; i < num_rows; ++i) {
-      line << "[" << small_int[i] << "," << less_equal_int64_max[i] << ","
-           << greater_equal_int64_min[i] << "," << greater_int64_max[i] << ","
-           << less_equal_uint64_max[i] << "," << small_int_append_zeros[i] << ","
-           << less_equal_int64_max_append_zeros[i] << "," << greater_equal_int64_min_append_zeros[i]
-           << "," << greater_int64_max_append_zeros[i] << ","
-           << less_equal_uint64_max_append_zeros[i] << "]\n";
+    if (row_orient) {
+      for (int i = 0; i < num_rows; ++i) {
+        line << "[" << small_int[i] << "," << less_equal_int64_max[i] << ","
+             << greater_equal_int64_min[i] << "," << greater_int64_max[i] << ","
+             << less_equal_uint64_max[i] << "," << small_int_append_zeros[i] << ","
+             << less_equal_int64_max_append_zeros[i] << ","
+             << greater_equal_int64_min_append_zeros[i] << "," << greater_int64_max_append_zeros[i]
+             << "," << less_equal_uint64_max_append_zeros[i] << "]\n";
+      }
+    } else {
+      std::vector<std::map<std::string, std::string>> records;
+      for (int i = 0; i < num_rows; ++i) {
+        records.push_back({
+          {"0", std::to_string(small_int[i])},                //
+          {"1", std::to_string(less_equal_int64_max[i])},     //
+          {"2", std::to_string(greater_equal_int64_min[i])},  //
+          {"3", std::to_string(greater_int64_max[i])},        //
+          {"4", std::to_string(less_equal_uint64_max[i])},    //
+          {"5", small_int_append_zeros[i]},                   //
+          {"6", less_equal_int64_max_append_zeros[i]},        //
+          {"7", greater_equal_int64_min_append_zeros[i]},     //
+          {"8", greater_int64_max_append_zeros[i]},           //
+          {"9", less_equal_uint64_max_append_zeros[i]},       //
+        });
+      }
+      line << to_records_orient(records, "\n");
     }
+
     std::ofstream outfile(filepath, std::ofstream::out);
     outfile << line.str();
   }
   cudf_io::json_reader_options in_options =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{filepath}).lines(true);
+    cudf_io::json_reader_options::builder(cudf_io::source_info{filepath})
+      .lines(true)
+      .experimental(test_experimental);
 
   cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
 
@@ -782,8 +1031,12 @@ TEST_F(JsonReaderTest, ParseInRangeIntegers)
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(input_less_equal_uint64_max, view.column(9));
 }
 
-TEST_F(JsonReaderTest, ParseOutOfRangeIntegers)
+TEST_P(JsonReaderParamTest, ParseOutOfRangeIntegers)
 {
+  auto const test_opt          = GetParam();
+  bool const test_experimental = (test_opt == json_test_t::json_experimental_record_orient);
+  bool const row_orient        = (test_opt == json_test_t::json_lines_row_orient);
+
   constexpr auto num_rows                        = 4;
   std::vector<std::string> out_of_range_positive = {"111111111111111111111",
                                                     "2222222222222222222222",
@@ -830,18 +1083,41 @@ TEST_F(JsonReaderTest, ParseOutOfRangeIntegers)
   auto filepath = temp_env->get_temp_dir() + "ParseOutOfRangeIntegers.json";
   {
     std::ostringstream line;
-    for (int i = 0; i < num_rows; ++i) {
-      line << "[" << out_of_range_positive[i] << "," << out_of_range_negative[i] << ","
-           << greater_uint64_max[i] << "," << less_int64_min[i] << "," << mixed_range[i] << ","
-           << out_of_range_positive_append_zeros[i] << "," << out_of_range_negative_append_zeros[i]
-           << "," << greater_uint64_max_append_zeros[i] << "," << less_int64_min_append_zeros[i]
-           << "," << mixed_range_append_zeros[i] << "]\n";
+    if (row_orient) {
+      for (int i = 0; i < num_rows; ++i) {
+        line << "[" << out_of_range_positive[i] << "," << out_of_range_negative[i] << ","
+             << greater_uint64_max[i] << "," << less_int64_min[i] << "," << mixed_range[i] << ","
+             << out_of_range_positive_append_zeros[i] << ","
+             << out_of_range_negative_append_zeros[i] << "," << greater_uint64_max_append_zeros[i]
+             << "," << less_int64_min_append_zeros[i] << "," << mixed_range_append_zeros[i]
+             << "]\n";
+      }
+    } else {
+      std::vector<std::map<std::string, std::string>> records;
+      for (int i = 0; i < num_rows; ++i) {
+        records.push_back({
+          {"0", out_of_range_positive[i]},               //
+          {"1", out_of_range_negative[i]},               //
+          {"2", greater_uint64_max[i]},                  //
+          {"3", less_int64_min[i]},                      //
+          {"4", mixed_range[i]},                         //
+          {"5", out_of_range_positive_append_zeros[i]},  //
+          {"6", out_of_range_negative_append_zeros[i]},  //
+          {"7", greater_uint64_max_append_zeros[i]},     //
+          {"8", less_int64_min_append_zeros[i]},         //
+          {"9", mixed_range_append_zeros[i]},            //
+        });
+      }
+      line << to_records_orient(records, "\n");
     }
+
     std::ofstream outfile(filepath, std::ofstream::out);
     outfile << line.str();
   }
   cudf_io::json_reader_options in_options =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{filepath}).lines(true);
+    cudf_io::json_reader_options::builder(cudf_io::source_info{filepath})
+      .lines(true)
+      .experimental(test_experimental);
 
   cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
 
@@ -859,20 +1135,30 @@ TEST_F(JsonReaderTest, ParseOutOfRangeIntegers)
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(input_mixed_range_append, view.column(9));
 }
 
-TEST_F(JsonReaderTest, JsonLinesMultipleFileInputs)
+TEST_P(JsonReaderParamTest, JsonLinesMultipleFileInputs)
 {
+  auto const test_opt          = GetParam();
+  bool const test_experimental = (test_opt == json_test_t::json_experimental_record_orient);
+  std::vector<std::string> row_orient{"[11, 1.1]\n[22, 2.2]\n", "[33, 3.3]\n[44, 4.4]"};
+  std::vector<std::string> record_orient{
+    to_records_orient({{{"0", "11"}, {"1", "1.1"}}, {{"0", "22"}, {"1", "2.2"}}}, "\n") + "\n",
+    to_records_orient({{{"0", "33"}, {"1", "3.3"}}, {{"0", "44"}, {"1", "4.4"}}}, "\n") + "\n"};
+  auto const& data = (test_opt == json_test_t::json_lines_row_orient) ? row_orient : record_orient;
+
   const std::string file1 = temp_env->get_temp_dir() + "JsonLinesFileTest1.json";
   std::ofstream outfile(file1, std::ofstream::out);
-  outfile << "[11, 1.1]\n[22, 2.2]\n";
+  outfile << data[0];
   outfile.close();
 
   const std::string file2 = temp_env->get_temp_dir() + "JsonLinesFileTest2.json";
   std::ofstream outfile2(file2, std::ofstream::out);
-  outfile2 << "[33, 3.3]\n[44, 4.4]";
+  outfile2 << data[1];
   outfile2.close();
 
   cudf_io::json_reader_options in_options =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{{file1, file2}}).lines(true);
+    cudf_io::json_reader_options::builder(cudf_io::source_info{{file1, file2}})
+      .lines(true)
+      .experimental(test_experimental);
 
   cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
 
@@ -1000,10 +1286,168 @@ TEST_F(JsonReaderTest, ExperimentalLinesNoOmissions)
     json_lines_options.enable_experimental(true);
     cudf::io::table_with_metadata new_reader_table = cudf::io::read_json(json_lines_options);
 
-    // Verify that the data read via non-nested JSON lines reader matches the data read via nested
-    // JSON reader
+    // Verify that the data read via non-nested JSON lines reader matches the data read via
+    // nested JSON reader
     CUDF_TEST_EXPECT_TABLES_EQUAL(current_reader_table.tbl->view(), new_reader_table.tbl->view());
   }
 }
 
+TEST_F(JsonReaderTest, TestColumnOrder)
+{
+  std::string const json_string =
+    // Expected order:
+    // root: b, c, a, d
+    // a: 2, 0, 1
+    {R"({"b":"b0"}
+    {"c":"c1","a":{"2":null}}
+    {"d":"d2","a":{"0":"a2.0", "2":"a2.2"}}
+    {"b":"b3","a":{"1":null, "2":"a3.2"}})"};
+
+  std::vector<std::string> const root_col_names{"b", "c", "a", "d"};
+  std::vector<std::string> const a_child_col_names{"2", "0", "1"};
+
+  // Initialize parsing options (reading json lines)
+  cudf::io::json_reader_options json_lines_options =
+    cudf::io::json_reader_options::builder(
+      cudf::io::source_info{json_string.c_str(), json_string.size()})
+      .lines(true)
+      .experimental(true);
+
+  // Read in data using nested JSON reader
+  cudf::io::table_with_metadata new_reader_table = cudf::io::read_json(json_lines_options);
+
+  // Verify root column order (assert to avoid OOB access)
+  ASSERT_EQ(new_reader_table.metadata.schema_info.size(), root_col_names.size());
+
+  for (std::size_t i = 0; i < a_child_col_names.size(); i++) {
+    auto const& root_col_name = root_col_names[i];
+    EXPECT_EQ(new_reader_table.metadata.schema_info[i].name, root_col_name);
+  }
+
+  // Verify nested child column order (assert to avoid OOB access)
+  ASSERT_EQ(new_reader_table.metadata.schema_info[2].children.size(), a_child_col_names.size());
+  for (std::size_t i = 0; i < a_child_col_names.size(); i++) {
+    auto const& a_child_col_name = a_child_col_names[i];
+    EXPECT_EQ(new_reader_table.metadata.schema_info[2].children[i].name, a_child_col_name);
+  }
+
+  // Verify data of root columns
+  ASSERT_EQ(root_col_names.size(), new_reader_table.tbl->num_columns());
+  column_wrapper<cudf::string_view> root_col_data_b{{"b0", "", "", "b3"},
+                                                    {true, false, false, true}};
+  column_wrapper<cudf::string_view> root_col_data_c{{"", "c1", "", ""},
+                                                    {false, true, false, false}};
+  column_wrapper<cudf::string_view> root_col_data_d{{"", "", "d2", ""},
+                                                    {false, false, true, false}};
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(root_col_data_b, new_reader_table.tbl->get_column(0));
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(root_col_data_c, new_reader_table.tbl->get_column(1));
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(root_col_data_d, new_reader_table.tbl->get_column(3));
+
+  // Verify data of child columns of column 'a'
+  auto const col_a = new_reader_table.tbl->get_column(2);
+  ASSERT_EQ(a_child_col_names.size(), col_a.num_children());
+  column_wrapper<cudf::string_view> col_a2{{"", "", "a2.2", "a3.2"}, {false, false, true, true}};
+  column_wrapper<cudf::string_view> col_a0{{"", "", "a2.0", ""}, {false, false, true, false}};
+  // col a.1 is inferred as all-null
+  int8_wrapper col_a1{{0, 0, 0, 0}, {false, false, false, false}};
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(col_a2, col_a.child(0));
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(col_a0, col_a.child(1));
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(col_a1, col_a.child(2));
+}
+
+TEST_P(JsonReaderParamTest, JsonDtypeSchema)
+{
+  auto const test_opt          = GetParam();
+  bool const test_experimental = (test_opt == json_test_t::json_experimental_record_orient);
+  std::string row_orient       = "[1, 1.1, \"aa \"]\n[2, 2.2, \"  bbb\"]";
+  std::string record_orient    = to_records_orient({{{"0", "1"}, {"1", "1.1"}, {"2", R"("aa ")"}},
+                                                 {{"0", "2"}, {"1", "2.2"}, {"2", R"("  bbb")"}}},
+                                                "\n");
+
+  std::string data = (test_opt == json_test_t::json_lines_row_orient) ? row_orient : record_orient;
+
+  std::map<std::string, cudf_io::schema_element> dtype_schema{
+    {"2", {dtype<cudf::string_view>()}}, {"0", {dtype<int32_t>()}}, {"1", {dtype<double>()}}};
+  cudf_io::json_reader_options in_options =
+    cudf_io::json_reader_options::builder(cudf_io::source_info{data.data(), data.size()})
+      .dtypes(dtype_schema)
+      .lines(true)
+      .experimental(test_experimental);
+
+  cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
+
+  EXPECT_EQ(result.tbl->num_columns(), 3);
+  EXPECT_EQ(result.tbl->num_rows(), 2);
+
+  EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::INT32);
+  EXPECT_EQ(result.tbl->get_column(1).type().id(), cudf::type_id::FLOAT64);
+  EXPECT_EQ(result.tbl->get_column(2).type().id(), cudf::type_id::STRING);
+
+  EXPECT_EQ(result.metadata.schema_info[0].name, "0");
+  EXPECT_EQ(result.metadata.schema_info[1].name, "1");
+  EXPECT_EQ(result.metadata.schema_info[2].name, "2");
+
+  auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0), int_wrapper{{1, 2}, validity});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(1), float64_wrapper{{1.1, 2.2}, validity});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(2),
+                                 cudf::test::strings_column_wrapper({"aa ", "  bbb"}));
+}
+
+TEST_F(JsonReaderTest, JsonNestedDtypeSchema)
+{
+  std::string json_string = R"( [{"a":[123, {"0": 123}], "b":1.0}, {"b":1.1}, {"b":2.1}])";
+
+  std::map<std::string, cudf_io::schema_element> dtype_schema{
+    {"a",
+     {
+       data_type{cudf::type_id::LIST},
+       {{"element", {data_type{cudf::type_id::STRUCT}, {{"0", {dtype<float>()}}}}}},
+     }},
+    {"b", {dtype<int32_t>()}},
+  };
+
+  cudf_io::json_reader_options in_options =
+    cudf_io::json_reader_options::builder(
+      cudf_io::source_info{json_string.data(), json_string.size()})
+      .dtypes(dtype_schema)
+      .lines(false)
+      .experimental(true);
+
+  cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
+
+  // Make sure we have columns "a" and "b"
+  ASSERT_EQ(result.tbl->num_columns(), 2);
+  ASSERT_EQ(result.metadata.schema_info.size(), 2);
+  EXPECT_EQ(result.metadata.schema_info[0].name, "a");
+  EXPECT_EQ(result.metadata.schema_info[1].name, "b");
+  // Make sure column "a" is a list column (offsets and elements)
+  ASSERT_EQ(result.tbl->get_column(0).num_children(), 2);
+  ASSERT_EQ(result.metadata.schema_info[0].children.size(), 2);
+  // Make sure column "b" is a leaf column
+  ASSERT_EQ(result.tbl->get_column(1).num_children(), 0);
+  ASSERT_EQ(result.metadata.schema_info[1].children.size(), 0);
+  // Offsets child with no other child columns
+  ASSERT_EQ(result.tbl->get_column(0).child(0).num_children(), 0);
+  ASSERT_EQ(result.metadata.schema_info[0].children[0].children.size(), 0);
+  EXPECT_EQ(result.metadata.schema_info[0].children[0].name, "offsets");
+  // Elements is the struct column with a single child column "0"
+  ASSERT_EQ(result.tbl->get_column(0).child(1).num_children(), 1);
+  ASSERT_EQ(result.metadata.schema_info[0].children[1].children.size(), 1);
+  EXPECT_EQ(result.metadata.schema_info[0].children[1].name, "element");
+
+  // Verify column "a" being a list column
+  EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::LIST);
+  // Verify column "a->element->0" is a float column
+  EXPECT_EQ(result.tbl->get_column(0).child(1).child(0).type().id(), cudf::type_id::FLOAT32);
+  // Verify column "b" is an int column
+  EXPECT_EQ(result.tbl->get_column(1).type().id(), cudf::type_id::INT32);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0).child(1).child(0),
+                                 float_wrapper{{0.0, 123.0}, {false, true}});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(1),
+                                 int_wrapper{{1, 1, 2}, {true, true, true}});
+}
+
 CUDF_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/io/json_tree.cpp b/cpp/tests/io/json_tree.cpp
new file mode 100644
index 00000000000..940d9d8ca0a
--- /dev/null
+++ b/cpp/tests/io/json_tree.cpp
@@ -0,0 +1,826 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <io/json/nested_json.hpp>
+#include <io/utilities/hostdevice_vector.hpp>
+
+#include <cudf/detail/hashing.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/scalar/scalar.hpp>
+#include <cudf/utilities/default_stream.hpp>
+#include <cudf/utilities/span.hpp>
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/cudf_gtest.hpp>
+
+#include <rmm/cuda_stream.hpp>
+#include <rmm/cuda_stream_view.hpp>
+
+#include <string>
+
+namespace cuio_json = cudf::io::json;
+namespace cudf::io::json {
+// Host copy of tree_meta_t
+struct tree_meta_t2 {
+  std::vector<NodeT> node_categories;
+  std::vector<NodeIndexT> parent_node_ids;
+  std::vector<TreeDepthT> node_levels;
+  std::vector<SymbolOffsetT> node_range_begin;
+  std::vector<SymbolOffsetT> node_range_end;
+};
+}  // namespace cudf::io::json
+
+namespace {
+std::string get_node_string(std::size_t const node_id,
+                            cuio_json::tree_meta_t2 const& tree_rep,
+                            std::string const& json_input)
+{
+  auto node_to_str = [](cuio_json::PdaTokenT const token) {
+    switch (token) {
+      case cuio_json::NC_STRUCT: return "STRUCT";
+      case cuio_json::NC_LIST: return "LIST";
+      case cuio_json::NC_FN: return "FN";
+      case cuio_json::NC_STR: return "STR";
+      case cuio_json::NC_VAL: return "VAL";
+      case cuio_json::NC_ERR: return "ERR";
+      default: return "N/A";
+    };
+  };
+
+  return "<" + std::to_string(node_id) + ":" + node_to_str(tree_rep.node_categories[node_id]) +
+         ":[" + std::to_string(tree_rep.node_range_begin[node_id]) + ", " +
+         std::to_string(tree_rep.node_range_end[node_id]) + ") '" +
+         json_input.substr(tree_rep.node_range_begin[node_id],
+                           tree_rep.node_range_end[node_id] - tree_rep.node_range_begin[node_id]) +
+         "'>";
+}
+
+void print_tree_representation(std::string const& json_input,
+                               cuio_json::tree_meta_t2 const& tree_rep)
+{
+  for (std::size_t i = 0; i < tree_rep.node_categories.size(); i++) {
+    auto parent_id = tree_rep.parent_node_ids[i];
+    std::stack<std::size_t> path;
+    path.push(i);
+    while (parent_id != cuio_json::parent_node_sentinel) {
+      path.push(parent_id);
+      parent_id = tree_rep.parent_node_ids[parent_id];
+    }
+
+    while (path.size()) {
+      auto const node_id = path.top();
+      std::cout << get_node_string(node_id, tree_rep, json_input)
+                << (path.size() > 1 ? " -> " : "");
+      path.pop();
+    }
+    std::cout << "\n";
+  }
+}
+}  // namespace
+
+namespace cudf::io::json {
+namespace test {
+
+tree_meta_t2 to_cpu_tree(tree_meta_t const& d_value, rmm::cuda_stream_view stream)
+{
+  return {cudf::detail::make_std_vector_async(d_value.node_categories, stream),
+          cudf::detail::make_std_vector_async(d_value.parent_node_ids, stream),
+          cudf::detail::make_std_vector_async(d_value.node_levels, stream),
+          cudf::detail::make_std_vector_async(d_value.node_range_begin, stream),
+          cudf::detail::make_std_vector_async(d_value.node_range_end, stream)};
+}
+
+// DEBUG prints
+auto to_cat = [](auto v) -> std::string {
+  switch (v) {
+    case NC_STRUCT: return " S";
+    case NC_LIST: return " L";
+    case NC_STR: return " \"";
+    case NC_VAL: return " V";
+    case NC_FN: return " F";
+    case NC_ERR: return "ER";
+    default: return "UN";
+  };
+};
+auto to_int    = [](auto v) { return std::to_string(static_cast<int>(v)); };
+auto print_vec = [](auto const& cpu, auto const name, auto converter) {
+  for (auto const& v : cpu)
+    printf("%3s,", converter(v).c_str());
+  std::cout << name << std::endl;
+};
+void print_tree(tree_meta_t2 const& cpu_tree)
+{
+  print_vec(cpu_tree.node_categories, "node_categories", to_cat);
+  print_vec(cpu_tree.parent_node_ids, "parent_node_ids", to_int);
+  print_vec(cpu_tree.node_levels, "node_levels", to_int);
+  print_vec(cpu_tree.node_range_begin, "node_range_begin", to_int);
+  print_vec(cpu_tree.node_range_end, "node_range_end", to_int);
+}
+void print_tree(tree_meta_t const& d_gpu_tree)
+{
+  auto const cpu_tree = to_cpu_tree(d_gpu_tree, rmm::cuda_stream_default);
+  print_tree(cpu_tree);
+}
+
+template <typename T>
+bool compare_vector(std::vector<T> const& cpu_vec,
+                    std::vector<T> const& gpu_vec,
+                    std::string const& name)
+{
+  EXPECT_EQ(cpu_vec.size(), gpu_vec.size());
+  bool mismatch = false;
+  if (!std::equal(cpu_vec.begin(), cpu_vec.end(), gpu_vec.begin())) {
+    print_vec(cpu_vec, name + "(cpu)", to_int);
+    print_vec(gpu_vec, name + "(gpu)", to_int);
+    for (auto i = 0lu; i < cpu_vec.size(); i++) {
+      mismatch |= (cpu_vec[i] != gpu_vec[i]);
+      printf("%3s,", (cpu_vec[i] == gpu_vec[i] ? " " : "x"));
+    }
+    printf("\n");
+  }
+  EXPECT_FALSE(mismatch);
+  return mismatch;
+}
+
+template <typename T>
+bool compare_vector(std::vector<T> const& cpu_vec,
+                    rmm::device_uvector<T> const& d_vec,
+                    std::string const& name)
+{
+  auto gpu_vec = cudf::detail::make_std_vector_async(d_vec, cudf::default_stream_value);
+  return compare_vector(cpu_vec, gpu_vec, name);
+}
+
+void compare_trees(tree_meta_t2 const& cpu_tree, tree_meta_t const& d_gpu_tree, bool print = false)
+{
+  auto cpu_num_nodes = cpu_tree.node_categories.size();
+  EXPECT_EQ(cpu_num_nodes, d_gpu_tree.node_categories.size());
+  EXPECT_EQ(cpu_num_nodes, d_gpu_tree.parent_node_ids.size());
+  EXPECT_EQ(cpu_num_nodes, d_gpu_tree.node_levels.size());
+  EXPECT_EQ(cpu_num_nodes, d_gpu_tree.node_range_begin.size());
+  EXPECT_EQ(cpu_num_nodes, d_gpu_tree.node_range_end.size());
+  auto gpu_tree = to_cpu_tree(d_gpu_tree, cudf::default_stream_value);
+  bool mismatch = false;
+
+#define COMPARE_MEMBER(member)                                                       \
+  for (std::size_t i = 0; i < cpu_num_nodes; i++) {                                  \
+    EXPECT_EQ(cpu_tree.member[i], gpu_tree.member[i]) << #member << "[" << i << "]"; \
+  }
+  COMPARE_MEMBER(node_categories);
+  COMPARE_MEMBER(parent_node_ids);
+  COMPARE_MEMBER(node_levels);
+  COMPARE_MEMBER(node_range_begin);
+  COMPARE_MEMBER(node_range_end);
+#undef COMPARE_MEMBER
+
+#define PRINT_VEC(vec, conv) print_vec(vec, #vec, conv);
+#define PRINT_COMPARISON(vec, conv)                                                  \
+  PRINT_VEC(cpu_tree.vec, conv);                                                     \
+  PRINT_VEC(gpu_tree.vec, conv);                                                     \
+  if (!std::equal(cpu_tree.vec.begin(), cpu_tree.vec.end(), gpu_tree.vec.begin())) { \
+    for (auto i = 0lu; i < cpu_tree.vec.size(); i++) {                               \
+      mismatch |= (gpu_tree.vec[i] != cpu_tree.vec[i]);                              \
+      printf("%3s,", (gpu_tree.vec[i] == cpu_tree.vec[i] ? " " : "x"));              \
+    }                                                                                \
+    printf("\n");                                                                    \
+  }
+  if (print) {
+    for (int i = 0; i < int(cpu_num_nodes); i++)
+      printf("%3d,", i);
+    printf(" node_id\n");
+    PRINT_COMPARISON(node_categories, to_cat);   // Works
+    PRINT_COMPARISON(node_levels, to_int);       // Works
+    PRINT_COMPARISON(node_range_begin, to_int);  // Works
+    PRINT_COMPARISON(node_range_end, to_int);    // Works
+    PRINT_COMPARISON(parent_node_ids, to_int);   // Works
+    EXPECT_FALSE(mismatch);
+  }
+#undef PRINT_VEC
+#undef PRINT_COMPARISON
+}
+
+template <typename T>
+auto translate_col_id(T const& col_id)
+{
+  using value_type = typename T::value_type;
+  std::unordered_map<value_type, value_type> col_id_map;
+  std::vector<value_type> new_col_ids(col_id.size());
+  value_type unique_id = 0;
+  for (auto id : col_id) {
+    if (col_id_map.count(id) == 0) { col_id_map[id] = unique_id++; }
+  }
+  for (size_t i = 0; i < col_id.size(); i++) {
+    new_col_ids[i] = col_id_map[col_id[i]];
+  }
+  return new_col_ids;
+}
+
+tree_meta_t2 get_tree_representation_cpu(device_span<PdaTokenT const> tokens_gpu,
+                                         device_span<SymbolOffsetT const> token_indices_gpu1,
+                                         cudf::io::json_reader_options const& options,
+                                         rmm::cuda_stream_view stream)
+{
+  constexpr bool include_quote_char = true;
+  // Copy the JSON tokens to the host
+  thrust::host_vector<PdaTokenT> tokens = cudf::detail::make_host_vector_async(tokens_gpu, stream);
+  thrust::host_vector<SymbolOffsetT> token_indices =
+    cudf::detail::make_host_vector_async(token_indices_gpu1, stream);
+
+  // Make sure tokens have been copied to the host
+  stream.synchronize();
+
+  // DEBUG print
+  [[maybe_unused]] auto to_token_str = [](PdaTokenT token) {
+    switch (token) {
+      case token_t::StructBegin: return " {";
+      case token_t::StructEnd: return " }";
+      case token_t::ListBegin: return " [";
+      case token_t::ListEnd: return " ]";
+      case token_t::FieldNameBegin: return "FB";
+      case token_t::FieldNameEnd: return "FE";
+      case token_t::StringBegin: return "SB";
+      case token_t::StringEnd: return "SE";
+      case token_t::ErrorBegin: return "er";
+      case token_t::ValueBegin: return "VB";
+      case token_t::ValueEnd: return "VE";
+      case token_t::StructMemberBegin: return " <";
+      case token_t::StructMemberEnd: return " >";
+      default: return ".";
+    }
+  };
+  if (std::getenv("NJP_DEBUG_DUMP") != nullptr) {
+    std::cout << "Tokens: \n";
+    for (auto i = 0u; i < tokens.size(); i++) {
+      std::cout << to_token_str(tokens[i]) << " ";
+    }
+    std::cout << std::endl;
+  }
+
+  // Whether a token does represent a node in the tree representation
+  auto is_node = [](PdaTokenT const token) {
+    switch (token) {
+      case token_t::StructBegin:
+      case token_t::ListBegin:
+      case token_t::StringBegin:
+      case token_t::ValueBegin:
+      case token_t::FieldNameBegin:
+      case token_t::ErrorBegin: return true;
+      default: return false;
+    };
+  };
+
+  // The node that a token represents
+  auto token_to_node = [](PdaTokenT const token) {
+    switch (token) {
+      case token_t::StructBegin: return NC_STRUCT;
+      case token_t::ListBegin: return NC_LIST;
+      case token_t::StringBegin: return NC_STR;
+      case token_t::ValueBegin: return NC_VAL;
+      case token_t::FieldNameBegin: return NC_FN;
+      default: return NC_ERR;
+    };
+  };
+
+  // Includes quote char for end-of-string token or Skips the quote char for beginning-of-field-name
+  auto get_token_index = [include_quote_char](PdaTokenT const token,
+                                              SymbolOffsetT const token_index) {
+    constexpr SymbolOffsetT quote_char_size = 1;
+    switch (token) {
+      // Strip off or include quote char for StringBegin
+      case token_t::StringBegin: return token_index + (include_quote_char ? 0 : quote_char_size);
+      // Strip off or Include trailing quote char for string values for StringEnd
+      case token_t::StringEnd: return token_index + (include_quote_char ? quote_char_size : 0);
+      // Strip off quote char included for FieldNameBegin
+      case token_t::FieldNameBegin: return token_index + quote_char_size;
+      default: return token_index;
+    };
+  };
+
+  // Whether a token expects to be followed by its respective end-of-* token partner
+  auto is_begin_of_section = [](PdaTokenT const token) {
+    switch (token) {
+      case token_t::StringBegin:
+      case token_t::ValueBegin:
+      case token_t::FieldNameBegin: return true;
+      default: return false;
+    };
+  };
+
+  // The end-of-* partner token for a given beginning-of-* token
+  auto end_of_partner = [](PdaTokenT const token) {
+    switch (token) {
+      case token_t::StringBegin: return token_t::StringEnd;
+      case token_t::ValueBegin: return token_t::ValueEnd;
+      case token_t::FieldNameBegin: return token_t::FieldNameEnd;
+      default: return token_t::ErrorBegin;
+    };
+  };
+
+  // Whether the token pops from the parent node stack
+  auto does_pop = [](PdaTokenT const token) {
+    switch (token) {
+      case token_t::StructEnd:
+      case token_t::ListEnd: return true;
+      default: return false;
+    };
+  };
+
+  // Whether the token pushes onto the parent node stack
+  auto does_push = [](PdaTokenT const token) {
+    switch (token) {
+      case token_t::StructBegin:
+      case token_t::ListBegin: return true;
+      default: return false;
+    };
+  };
+
+  // The node id sitting on top of the stack becomes the node's parent
+  // The full stack represents the path from the root to the current node
+  std::stack<std::pair<NodeIndexT, bool>> parent_stack;
+
+  constexpr bool field_name_node    = true;
+  constexpr bool no_field_name_node = false;
+
+  std::vector<NodeT> node_categories;
+  std::vector<NodeIndexT> parent_node_ids;
+  std::vector<TreeDepthT> node_levels;
+  std::vector<SymbolOffsetT> node_range_begin;
+  std::vector<SymbolOffsetT> node_range_end;
+
+  std::size_t node_id = 0;
+  for (std::size_t i = 0; i < tokens.size(); i++) {
+    auto token = tokens[i];
+
+    // The section from the original JSON input that this token demarcates
+    std::size_t range_begin = get_token_index(token, token_indices[i]);
+    std::size_t range_end   = range_begin + 1;
+
+    // Identify this node's parent node id
+    std::size_t parent_node_id =
+      (parent_stack.size() > 0) ? parent_stack.top().first : parent_node_sentinel;
+
+    // If this token is the beginning-of-{value, string, field name}, also consume the next end-of-*
+    // token
+    if (is_begin_of_section(token)) {
+      if ((i + 1) < tokens.size() && end_of_partner(token) == tokens[i + 1]) {
+        // Update the range_end for this pair of tokens
+        range_end = get_token_index(tokens[i + 1], token_indices[i + 1]);
+        // We can skip the subsequent end-of-* token
+        i++;
+      }
+    }
+
+    // Emit node if this token becomes a node in the tree
+    if (is_node(token)) {
+      node_categories.push_back(token_to_node(token));
+      parent_node_ids.push_back(parent_node_id);
+      node_levels.push_back(parent_stack.size());
+      node_range_begin.push_back(range_begin);
+      node_range_end.push_back(range_end);
+    }
+
+    // Modify the stack if needed
+    if (token == token_t::FieldNameBegin) {
+      parent_stack.push({node_id, field_name_node});
+    } else {
+      if (does_push(token)) {
+        parent_stack.push({node_id, no_field_name_node});
+      } else if (does_pop(token)) {
+        CUDF_EXPECTS(parent_stack.size() >= 1, "Invalid JSON input.");
+        parent_stack.pop();
+      }
+
+      // If what we're left with is a field name on top of stack, we need to pop it
+      if (parent_stack.size() >= 1 && parent_stack.top().second == field_name_node) {
+        parent_stack.pop();
+      }
+    }
+
+    // Update node_id
+    if (is_node(token)) { node_id++; }
+  }
+
+  return {std::move(node_categories),
+          std::move(parent_node_ids),
+          std::move(node_levels),
+          std::move(node_range_begin),
+          std::move(node_range_end)};
+}
+
+std::tuple<std::vector<NodeIndexT>, std::vector<size_type>> records_orient_tree_traversal_cpu(
+  host_span<SymbolT const> input, tree_meta_t2 const& tree, rmm::cuda_stream_view stream)
+{
+  std::vector<NodeIndexT> node_ids(tree.parent_node_ids.size());
+  std::iota(node_ids.begin(), node_ids.end(), 0);
+
+  if (std::getenv("NJP_DEBUG_DUMP") != nullptr) {
+    for (int i = 0; i < int(tree.node_range_begin.size()); i++) {
+      printf("%3s ",
+             std::string(input.data() + tree.node_range_begin[i],
+                         tree.node_range_end[i] - tree.node_range_begin[i])
+               .c_str());
+    }
+    printf(" (JSON)\n");
+    print_vec(tree.node_categories, "node_categories", to_cat);
+    print_vec(node_ids, "cpu.node_ids", to_int);
+  }
+
+  // print_vec(tree.parent_node_ids, "tree.parent_node_ids (before)");
+  constexpr NodeIndexT top_node = -1;
+  // CPU version of the algorithm
+  // Calculate row offsets too.
+  auto hash_path = [&](auto node_id) {
+    size_t seed = 0;
+    while (node_id != top_node) {
+      seed = cudf::detail::hash_combine(seed, std::hash<TreeDepthT>{}(tree.node_levels[node_id]));
+      seed = cudf::detail::hash_combine(seed, std::hash<NodeT>{}(tree.node_categories[node_id]));
+      if (tree.node_categories[node_id] == node_t::NC_FN) {
+        auto field_name =
+          std::string_view(input.data() + tree.node_range_begin[node_id],
+                           tree.node_range_end[node_id] - tree.node_range_begin[node_id]);
+        seed = cudf::detail::hash_combine(seed, std::hash<std::string_view>{}(field_name));
+      }
+      node_id = tree.parent_node_ids[node_id];
+    }
+    return seed;
+  };
+  auto equal_path = [&](auto node_id1, auto node_id2) {
+    bool is_equal = true;
+    while (is_equal and node_id1 != top_node and node_id2 != top_node) {
+      is_equal &= tree.node_levels[node_id1] == tree.node_levels[node_id2];
+      is_equal &= tree.node_categories[node_id1] == tree.node_categories[node_id2];
+      if (is_equal and tree.node_categories[node_id1] == node_t::NC_FN) {
+        auto field_name1 =
+          std::string_view(input.data() + tree.node_range_begin[node_id1],
+                           tree.node_range_end[node_id1] - tree.node_range_begin[node_id1]);
+        auto field_name2 =
+          std::string_view(input.data() + tree.node_range_begin[node_id2],
+                           tree.node_range_end[node_id2] - tree.node_range_begin[node_id2]);
+        is_equal &= field_name1 == field_name2;
+      }
+      node_id1 = tree.parent_node_ids[node_id1];
+      node_id2 = tree.parent_node_ids[node_id2];
+    }
+    return is_equal and node_id1 == top_node and node_id2 == top_node;
+  };
+  std::unordered_map<NodeIndexT, int, decltype(hash_path), decltype(equal_path)> node_id_map(
+    10, hash_path, equal_path);
+  auto unique_col_id = 0;
+  for (auto& node_idx : node_ids) {
+    if (node_id_map.count(node_idx) == 0) {
+      node_id_map[node_idx] = unique_col_id++;  // node_idx;
+      node_idx              = node_id_map[node_idx];
+    } else {
+      node_idx = node_id_map[node_idx];
+    }
+  }
+  // Translate parent_node_ids
+  auto parent_col_ids(tree.parent_node_ids);
+  for (auto& parent_node_id : parent_col_ids) {
+    if (parent_node_id != top_node) parent_node_id = node_ids[parent_node_id];
+  }
+  if (std::getenv("NJP_DEBUG_DUMP") != nullptr) {
+    print_vec(node_ids, "cpu.node_ids (after)", to_int);
+    print_vec(tree.parent_node_ids, "cpu.parent_node_ids (after)", to_int);
+  }
+  // row_offsets
+  std::vector<int> row_offsets(tree.parent_node_ids.size(), 0);
+  std::unordered_map<int, int> col_id_current_offset;
+  for (std::size_t i = 0; i < tree.parent_node_ids.size(); i++) {
+    auto current_col_id = node_ids[i];
+    auto parent_col_id  = parent_col_ids[i];
+    auto parent_node_id = tree.parent_node_ids[i];
+    if (parent_col_id == top_node) {
+      // row_offsets[current_col_id] = 0; // JSON lines treats top node as list.
+      col_id_current_offset[current_col_id]++;
+      row_offsets[i] = col_id_current_offset[current_col_id] - 1;
+    } else {
+      if (tree.node_categories[parent_node_id] == node_t::NC_LIST) {
+        col_id_current_offset[current_col_id]++;
+        row_offsets[i] = col_id_current_offset[current_col_id] - 1;
+      } else {
+        row_offsets[i]                        = col_id_current_offset[parent_col_id] - 1;
+        col_id_current_offset[current_col_id] = col_id_current_offset[parent_col_id];
+      }
+    }
+  }
+  if (std::getenv("NJP_DEBUG_DUMP") != nullptr) {
+    print_vec(row_offsets, "cpu.row_offsets (generated)", to_int);
+  }
+  return {std::move(node_ids), std::move(row_offsets)};
+}
+
+}  // namespace test
+}  // namespace cudf::io::json
+
+namespace json_test = cudf::io::json::test;
+
+// Base test fixture for tests
+struct JsonTest : public cudf::test::BaseFixture {
+};
+
+TEST_F(JsonTest, TreeRepresentation)
+{
+  auto const stream = cudf::default_stream_value;
+
+  // Test input
+  std::string const input = R"(  [{)"
+                            R"("category": "reference",)"
+                            R"("index:": [4,12,42],)"
+                            R"("author": "Nigel Rees",)"
+                            R"("title": "[Sayings of the Century]",)"
+                            R"("price": 8.95)"
+                            R"(},  )"
+                            R"({)"
+                            R"("category": "reference",)"
+                            R"("index": [4,{},null,{"a":[{ }, {}] } ],)"
+                            R"("author": "Nigel Rees",)"
+                            R"("title": "{}[], <=semantic-symbols-string",)"
+                            R"("price": 8.95)"
+                            R"(}] )";
+  // Prepare input & output buffers
+  cudf::string_scalar const d_scalar(input, true, stream);
+  auto const d_input = cudf::device_span<cuio_json::SymbolT const>{
+    d_scalar.data(), static_cast<size_t>(d_scalar.size())};
+
+  cudf::io::json_reader_options const options{};
+
+  // Parse the JSON and get the token stream
+  const auto [tokens_gpu, token_indices_gpu] =
+    cudf::io::json::detail::get_token_stream(d_input, options, stream);
+
+  // Get the JSON's tree representation
+  auto gpu_tree = cuio_json::detail::get_tree_representation(tokens_gpu, token_indices_gpu, stream);
+  // host tree generation
+  auto cpu_tree =
+    cuio_json::test::get_tree_representation_cpu(tokens_gpu, token_indices_gpu, options, stream);
+  json_test::compare_trees(cpu_tree, gpu_tree);
+
+  // Print tree representation
+  if (std::getenv("NJP_DEBUG_DUMP") != nullptr) { print_tree_representation(input, cpu_tree); }
+
+  // Golden sample of node categories
+  std::vector<cuio_json::node_t> golden_node_categories = {
+    cuio_json::NC_LIST, cuio_json::NC_STRUCT, cuio_json::NC_FN,     cuio_json::NC_STR,
+    cuio_json::NC_FN,   cuio_json::NC_LIST,   cuio_json::NC_VAL,    cuio_json::NC_VAL,
+    cuio_json::NC_VAL,  cuio_json::NC_FN,     cuio_json::NC_STR,    cuio_json::NC_FN,
+    cuio_json::NC_STR,  cuio_json::NC_FN,     cuio_json::NC_VAL,    cuio_json::NC_STRUCT,
+    cuio_json::NC_FN,   cuio_json::NC_STR,    cuio_json::NC_FN,     cuio_json::NC_LIST,
+    cuio_json::NC_VAL,  cuio_json::NC_STRUCT, cuio_json::NC_VAL,    cuio_json::NC_STRUCT,
+    cuio_json::NC_FN,   cuio_json::NC_LIST,   cuio_json::NC_STRUCT, cuio_json::NC_STRUCT,
+    cuio_json::NC_FN,   cuio_json::NC_STR,    cuio_json::NC_FN,     cuio_json::NC_STR,
+    cuio_json::NC_FN,   cuio_json::NC_VAL};
+
+  // Golden sample of node ids
+  // clang-format off
+  std::vector<cuio_json::NodeIndexT> golden_parent_node_ids = {
+    cuio_json::parent_node_sentinel, 0, 1, 2,
+    1, 4, 5, 5,
+    5, 1, 9, 1,
+    11, 1, 13, 0,
+    15, 16, 15, 18,
+    19, 19, 19, 19,
+    23, 24, 25, 25,
+    15, 28, 15, 30,
+    15, 32};
+  // clang-format on
+
+  // Golden sample of node levels
+  std::vector<cuio_json::TreeDepthT> golden_node_levels = {0, 1, 2, 3, 2, 3, 4, 4, 4, 2, 3, 2,
+                                                           3, 2, 3, 1, 2, 3, 2, 3, 4, 4, 4, 4,
+                                                           5, 6, 7, 7, 2, 3, 2, 3, 2, 3};
+
+  // Golden sample of the character-ranges from the original input that each node demarcates
+  std::vector<std::size_t> golden_node_range_begin = {
+    2,   3,   5,   16,  29,  38,  39,  41,  44,  49,  58,  72,  80,  108, 116, 124, 126,
+    137, 150, 158, 159, 161, 164, 169, 171, 174, 175, 180, 189, 198, 212, 220, 255, 263};
+
+  // Golden sample of the character-ranges from the original input that each node demarcates
+  std::vector<std::size_t> golden_node_range_end = {
+    3,   4,   13,  27,  35,  39,  40,  43,  46,  55,  70,  77,  106, 113, 120, 125, 134,
+    148, 155, 159, 160, 162, 168, 170, 172, 175, 176, 181, 195, 210, 217, 253, 260, 267};
+
+  // Check results against golden samples
+  ASSERT_EQ(golden_node_categories.size(), cpu_tree.node_categories.size());
+  ASSERT_EQ(golden_parent_node_ids.size(), cpu_tree.parent_node_ids.size());
+  ASSERT_EQ(golden_node_levels.size(), cpu_tree.node_levels.size());
+  ASSERT_EQ(golden_node_range_begin.size(), cpu_tree.node_range_begin.size());
+  ASSERT_EQ(golden_node_range_end.size(), cpu_tree.node_range_end.size());
+
+  for (std::size_t i = 0; i < golden_node_categories.size(); i++) {
+    ASSERT_EQ(golden_node_categories[i], cpu_tree.node_categories[i]) << "[" << i << "]";
+    ASSERT_EQ(golden_parent_node_ids[i], cpu_tree.parent_node_ids[i]) << "[" << i << "]";
+    ASSERT_EQ(golden_node_levels[i], cpu_tree.node_levels[i]) << "[" << i << "]";
+    ASSERT_EQ(golden_node_range_begin[i], cpu_tree.node_range_begin[i]) << "[" << i << "]";
+    ASSERT_EQ(golden_node_range_end[i], cpu_tree.node_range_end[i]) << "[" << i << "]";
+  }
+}
+
+TEST_F(JsonTest, TreeRepresentation2)
+{
+  auto const stream = cudf::default_stream_value;
+  // Test input: value end with comma, space, close-brace ", }"
+  std::string const input =
+    // 0         1         2         3         4         5         6         7         8         9
+    // 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
+    R"([ {}, { "a": { "y" : 6, "z": [] }}, { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11)"
+    "\n}}]";
+  // Prepare input & output buffers
+  cudf::string_scalar d_scalar(input, true, stream);
+  auto d_input = cudf::device_span<cuio_json::SymbolT const>{d_scalar.data(),
+                                                             static_cast<size_t>(d_scalar.size())};
+
+  cudf::io::json_reader_options const options{};
+
+  // Parse the JSON and get the token stream
+  const auto [tokens_gpu, token_indices_gpu] =
+    cudf::io::json::detail::get_token_stream(d_input, options, stream);
+
+  // Get the JSON's tree representation
+  auto gpu_tree = cuio_json::detail::get_tree_representation(tokens_gpu, token_indices_gpu, stream);
+  // host tree generation
+  auto cpu_tree =
+    cuio_json::test::get_tree_representation_cpu(tokens_gpu, token_indices_gpu, options, stream);
+  json_test::compare_trees(cpu_tree, gpu_tree);
+
+  // Print tree representation
+  if (std::getenv("NJP_DEBUG_DUMP") != nullptr) { print_tree_representation(input, cpu_tree); }
+
+  // Golden sample of node categories
+  // clang-format off
+  std::vector<cuio_json::node_t> golden_node_categories = {
+    cuio_json::NC_LIST, cuio_json::NC_STRUCT,
+    cuio_json::NC_STRUCT, cuio_json::NC_FN, cuio_json::NC_STRUCT,  cuio_json::NC_FN,  cuio_json::NC_VAL, cuio_json::NC_FN,  cuio_json::NC_LIST,
+    cuio_json::NC_STRUCT, cuio_json::NC_FN, cuio_json::NC_STRUCT,  cuio_json::NC_FN,  cuio_json::NC_VAL, cuio_json::NC_FN,  cuio_json::NC_VAL,
+                          cuio_json::NC_FN, cuio_json::NC_STRUCT,  cuio_json::NC_FN,  cuio_json::NC_VAL, cuio_json::NC_FN,  cuio_json::NC_VAL};
+
+  // Golden sample of node ids
+  std::vector<cuio_json::NodeIndexT> golden_parent_node_ids = {
+    cuio_json::parent_node_sentinel, 0,
+    0, 2,  3,  4,  5,  4, 7,
+    0, 9, 10, 11, 12, 11, 14,
+       9, 16, 17, 18, 17, 20};
+  // clang-format on
+
+  // Golden sample of node levels
+  std::vector<cuio_json::TreeDepthT> golden_node_levels = {
+    0, 1, 1, 2, 3, 4, 5, 4, 5, 1, 2, 3, 4, 5, 4, 5, 2, 3, 4, 5, 4, 5,
+  };
+
+  // Golden sample of the character-ranges from the original input that each node demarcates
+  std::vector<std::size_t> golden_node_range_begin = {0,  2,  6,  9,  13, 16, 21, 25, 29, 36, 39,
+                                                      44, 47, 52, 56, 60, 66, 71, 73, 77, 83, 87};
+
+  // Golden sample of the character-ranges from the original input that each node demarcates
+  std::vector<std::size_t> golden_node_range_end = {1,  3,  7,  10, 14, 17, 22, 26, 30, 37, 40,
+                                                    45, 48, 53, 57, 61, 67, 72, 74, 79, 84, 89};
+
+  // Check results against golden samples
+  ASSERT_EQ(golden_node_categories.size(), cpu_tree.node_categories.size());
+  ASSERT_EQ(golden_parent_node_ids.size(), cpu_tree.parent_node_ids.size());
+  ASSERT_EQ(golden_node_levels.size(), cpu_tree.node_levels.size());
+  ASSERT_EQ(golden_node_range_begin.size(), cpu_tree.node_range_begin.size());
+  ASSERT_EQ(golden_node_range_end.size(), cpu_tree.node_range_end.size());
+
+  for (std::size_t i = 0; i < golden_node_categories.size(); i++) {
+    ASSERT_EQ(golden_node_categories[i], cpu_tree.node_categories[i]);
+    ASSERT_EQ(golden_parent_node_ids[i], cpu_tree.parent_node_ids[i]);
+    ASSERT_EQ(golden_node_levels[i], cpu_tree.node_levels[i]);
+    ASSERT_EQ(golden_node_range_begin[i], cpu_tree.node_range_begin[i]);
+    ASSERT_EQ(golden_node_range_end[i], cpu_tree.node_range_end[i]);
+  }
+}
+
+TEST_F(JsonTest, TreeRepresentation3)
+{
+  auto const stream = cudf::default_stream_value;
+  // Test input: Json lines with same TreeRepresentation2 input
+  std::string const input =
+    R"(  {}
+ { "a": { "y" : 6, "z": [] }}
+ { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }} )";  // Prepare input & output buffers
+  cudf::string_scalar d_scalar(input, true, stream);
+  auto d_input = cudf::device_span<cuio_json::SymbolT const>{d_scalar.data(),
+                                                             static_cast<size_t>(d_scalar.size())};
+
+  cudf::io::json_reader_options options{};
+  options.enable_lines(true);
+
+  // Parse the JSON and get the token stream
+  const auto [tokens_gpu, token_indices_gpu] =
+    cudf::io::json::detail::get_token_stream(d_input, options, stream);
+
+  // Get the JSON's tree representation
+  auto gpu_tree = cuio_json::detail::get_tree_representation(tokens_gpu, token_indices_gpu, stream);
+  // host tree generation
+  auto cpu_tree =
+    cuio_json::test::get_tree_representation_cpu(tokens_gpu, token_indices_gpu, options, stream);
+  json_test::compare_trees(cpu_tree, gpu_tree);
+
+  // Print tree representation
+  if (std::getenv("NJP_DEBUG_DUMP") != nullptr) { print_tree_representation(input, cpu_tree); }
+}
+
+/**
+ * @brief Test fixture for parametrized JSON tree traversal tests
+ */
+struct JsonTreeTraversalTest : public cudf::test::BaseFixture,
+                               public testing::WithParamInterface<std::tuple<bool, std::string>> {
+};
+
+//
+std::vector<std::string> json_list = {
+  "value",
+  "\"string\"",
+  "[1, 2, 3]",
+  R"({"a": 1, "b": 2, "c": 3})",
+  // input a: {x:i, y:i, z:[]}, b: {x:i, z:i}
+  R"([ {}, { "a": { "y" : 6, "z": [] }}, { "a" : { "x" : 8, "y": 9}, "b" : {"x": 10, "z": 11}}])",
+  // input a: {x:i, y:i, z:[]}, b: {x:i, z: {p: i, q: i}}
+  R"([ {}, { "a": { "y" : 1, "z": [] }},
+             { "a": { "x" : 2, "y": 3}, "b" : {"x": 4, "z": [ {"p": 1, "q": 2}]}},
+             { "a": { "y" : 6, "z": [7, 8, 9]}, "b": {"x": 10, "z": [{}, {"q": 3}, {"p": 4}]}},
+             { "a": { "z": [12, 13, 14, 15]}},
+             { "a": { "z": [16], "x": 2}}
+        ])"
+  //^row offset a a.x a.y a.z   b b.x b.z
+  //            1       1   1
+  //            2   2   2       2   2   2                     b.z[] 0        b.z.p 0, b.z.q 0
+  //            3       3   3   3   3   3   a.z[] 0, 1, 2     b.z[] 1, 2, 3  b.z.q 2, b.z.p 3
+  //            4           4               a.z[] 3, 4, 5, 6
+  //            5   5       5               a.z[] 7
+};
+
+std::vector<std::string> json_lines_list = {
+  // Test input a: {x:i, y:i, z:[]}, b: {x:i, z:i} with JSON-lines
+  R"(  {}
+ { "a": { "y" : 6, "z": [] }}
+ { "a": { "y" : 6, "z": [2, 3, 4, 5] }}
+ { "a": { "z": [4], "y" : 6 }}
+ { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }} )"};
+INSTANTIATE_TEST_SUITE_P(Mixed_And_Records,
+                         JsonTreeTraversalTest,
+                         ::testing::Combine(::testing::Values(false),
+                                            ::testing::ValuesIn(json_list)));
+INSTANTIATE_TEST_SUITE_P(JsonLines,
+                         JsonTreeTraversalTest,
+                         ::testing::Combine(::testing::Values(true),
+                                            ::testing::ValuesIn(json_lines_list)));
+
+TEST_P(JsonTreeTraversalTest, CPUvsGPUTraversal)
+{
+  auto [json_lines, input] = GetParam();
+  auto stream              = cudf::default_stream_value;
+  cudf::io::json_reader_options options{};
+  options.enable_lines(json_lines);
+
+  // std::cout << json_lines << input << std::endl;
+  cudf::string_scalar d_scalar(input, true, stream);
+  auto d_input = cudf::device_span<cuio_json::SymbolT const>{d_scalar.data(),
+                                                             static_cast<size_t>(d_scalar.size())};
+
+  // Parse the JSON and get the token stream
+  const auto [tokens_gpu, token_indices_gpu] =
+    cudf::io::json::detail::get_token_stream(d_input, options, stream);
+  // host tree generation
+  auto cpu_tree =
+    cuio_json::test::get_tree_representation_cpu(tokens_gpu, token_indices_gpu, options, stream);
+  // host tree traversal
+  auto [cpu_col_id, cpu_row_offsets] =
+    cuio_json::test::records_orient_tree_traversal_cpu(input, cpu_tree, stream);
+  // gpu tree generation
+  auto gpu_tree = cuio_json::detail::get_tree_representation(tokens_gpu, token_indices_gpu, stream);
+  // Print tree representation
+  if (std::getenv("NJP_DEBUG_DUMP") != nullptr) {
+    printf("BEFORE traversal (gpu_tree):\n");
+    json_test::print_tree(gpu_tree);
+  }
+  // gpu tree traversal
+  auto [gpu_col_id, gpu_row_offsets] =
+    cuio_json::detail::records_orient_tree_traversal(d_input, gpu_tree, stream);
+  // Print tree representation
+  if (std::getenv("NJP_DEBUG_DUMP") != nullptr) {
+    printf("AFTER  traversal (gpu_tree):\n");
+    json_test::print_tree(gpu_tree);
+  }
+
+  // convert to sequence because gpu col id might be have random id
+  auto gpu_col_id2 =
+    json_test::translate_col_id(cudf::detail::make_std_vector_async(gpu_col_id, stream));
+  EXPECT_FALSE(json_test::compare_vector(cpu_col_id, gpu_col_id2, "col_id"));
+  EXPECT_FALSE(json_test::compare_vector(cpu_row_offsets, gpu_row_offsets, "row_offsets"));
+}
diff --git a/cpp/tests/io/json_type_cast_test.cu b/cpp/tests/io/json_type_cast_test.cu
index b5c97a5f6c2..43702f1f7e7 100644
--- a/cpp/tests/io/json_type_cast_test.cu
+++ b/cpp/tests/io/json_type_cast_test.cu
@@ -70,7 +70,7 @@ TEST_F(JSONTypeCastTest, String)
 
   auto d_column = cudf::column_device_view::create(input);
   rmm::device_uvector<thrust::pair<const char*, cudf::size_type>> svs(d_column->size(), stream);
-  thrust::transform(thrust::device,
+  thrust::transform(rmm::exec_policy(cudf::default_stream_value),
                     d_column->pair_begin<cudf::string_view, false>(),
                     d_column->pair_end<cudf::string_view, false>(),
                     svs.begin(),
@@ -100,7 +100,7 @@ TEST_F(JSONTypeCastTest, Int)
   cudf::test::strings_column_wrapper data({"1", "null", "3", "true", "5", "false"});
   auto d_column = cudf::column_device_view::create(data);
   rmm::device_uvector<thrust::pair<const char*, cudf::size_type>> svs(d_column->size(), stream);
-  thrust::transform(thrust::device,
+  thrust::transform(rmm::exec_policy(cudf::default_stream_value),
                     d_column->pair_begin<cudf::string_view, false>(),
                     d_column->pair_end<cudf::string_view, false>(),
                     svs.begin(),
@@ -137,7 +137,7 @@ TEST_F(JSONTypeCastTest, StringEscapes)
   });
   auto d_column = cudf::column_device_view::create(data);
   rmm::device_uvector<thrust::pair<const char*, cudf::size_type>> svs(d_column->size(), stream);
-  thrust::transform(thrust::device,
+  thrust::transform(rmm::exec_policy(cudf::default_stream_value),
                     d_column->pair_begin<cudf::string_view, false>(),
                     d_column->pair_end<cudf::string_view, false>(),
                     svs.begin(),
diff --git a/cpp/tests/io/nested_json_test.cpp b/cpp/tests/io/nested_json_test.cpp
index f0ececaf4eb..bcfde4eedeb 100644
--- a/cpp/tests/io/nested_json_test.cpp
+++ b/cpp/tests/io/nested_json_test.cpp
@@ -21,7 +21,9 @@
 #include <cudf/io/json.hpp>
 #include <cudf/io/parquet.hpp>
 #include <cudf/lists/lists_column_view.hpp>
+#include <cudf/scalar/scalar.hpp>
 #include <cudf/utilities/default_stream.hpp>
+#include <cudf/utilities/span.hpp>
 
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
@@ -137,7 +139,7 @@ TEST_F(JsonTest, StackContext)
   using StackSymbolT = char;
 
   // Prepare cuda stream for data transfers & kernels
-  constexpr auto stream = cudf::default_stream_value;
+  auto const stream = cudf::default_stream_value;
 
   // Test input
   std::string const input = R"(  [{)"
@@ -156,15 +158,11 @@ TEST_F(JsonTest, StackContext)
                             R"(}] )";
 
   // Prepare input & output buffers
-  rmm::device_uvector<SymbolT> d_input(input.size(), stream);
+  cudf::string_scalar const d_scalar(input, true, stream);
+  auto const d_input =
+    cudf::device_span<SymbolT const>{d_scalar.data(), static_cast<size_t>(d_scalar.size())};
   hostdevice_vector<StackSymbolT> stack_context(input.size(), stream);
 
-  ASSERT_CUDA_SUCCEEDED(cudaMemcpyAsync(d_input.data(),
-                                        input.data(),
-                                        input.size() * sizeof(SymbolT),
-                                        cudaMemcpyHostToDevice,
-                                        stream.value()));
-
   // Run algorithm
   cuio_json::detail::get_stack_context(d_input, stack_context.device_ptr(), stream);
 
@@ -174,7 +172,7 @@ TEST_F(JsonTest, StackContext)
   // Make sure we copied back the stack context
   stream.synchronize();
 
-  std::vector<char> golden_stack_context{
+  std::vector<char> const golden_stack_context{
     '_', '_', '_', '[', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{',
     '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{',
     '{', '[', '[', '[', '[', '[', '[', '[', '[', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{',
@@ -202,21 +200,17 @@ TEST_F(JsonTest, StackContextUtf8)
   using StackSymbolT = char;
 
   // Prepare cuda stream for data transfers & kernels
-  constexpr auto stream = cudf::default_stream_value;
+  auto const stream = cudf::default_stream_value;
 
   // Test input
   std::string const input = R"([{"a":{"year":1882,"author": "Bharathi"}, {"a":"filip ʒakotɛ"}}])";
 
   // Prepare input & output buffers
-  rmm::device_uvector<SymbolT> d_input(input.size(), stream);
+  cudf::string_scalar const d_scalar(input, true, stream);
+  auto const d_input =
+    cudf::device_span<SymbolT const>{d_scalar.data(), static_cast<size_t>(d_scalar.size())};
   hostdevice_vector<StackSymbolT> stack_context(input.size(), stream);
 
-  ASSERT_CUDA_SUCCEEDED(cudaMemcpyAsync(d_input.data(),
-                                        input.data(),
-                                        input.size() * sizeof(SymbolT),
-                                        cudaMemcpyHostToDevice,
-                                        stream.value()));
-
   // Run algorithm
   cuio_json::detail::get_stack_context(d_input, stack_context.device_ptr(), stream);
 
@@ -226,7 +220,7 @@ TEST_F(JsonTest, StackContextUtf8)
   // Make sure we copied back the stack context
   stream.synchronize();
 
-  std::vector<char> golden_stack_context{
+  std::vector<char> const golden_stack_context{
     '_', '[', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{',
     '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{',
     '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{',
@@ -241,13 +235,6 @@ TEST_F(JsonTest, TokenStream)
   using cuio_json::PdaTokenT;
   using cuio_json::SymbolOffsetT;
   using cuio_json::SymbolT;
-
-  // Prepare cuda stream for data transfers & kernels
-  constexpr auto stream = cudf::default_stream_value;
-
-  // Default parsing options
-  cudf::io::json_reader_options default_options{};
-
   // Test input
   std::string const input = R"(  [{)"
                             R"("category": "reference",)"
@@ -264,54 +251,191 @@ TEST_F(JsonTest, TokenStream)
                             R"("price": 8.95)"
                             R"(}] )";
 
-  // Prepare input & output buffers
-  rmm::device_uvector<SymbolT> d_input(input.size(), stream);
+  auto const stream = cudf::default_stream_value;
 
-  ASSERT_CUDA_SUCCEEDED(cudaMemcpyAsync(d_input.data(),
-                                        input.data(),
-                                        input.size() * sizeof(SymbolT),
-                                        cudaMemcpyHostToDevice,
-                                        stream.value()));
+  // Default parsing options
+  cudf::io::json_reader_options default_options{};
+
+  // Prepare input & output buffers
+  cudf::string_scalar const d_scalar(input, true, stream);
+  auto const d_input =
+    cudf::device_span<SymbolT const>{d_scalar.data(), static_cast<size_t>(d_scalar.size())};
 
   // Parse the JSON and get the token stream
-  const auto [d_tokens_gpu, d_token_indices_gpu] =
+  auto [d_tokens_gpu, d_token_indices_gpu] =
     cuio_json::detail::get_token_stream(d_input, default_options, stream);
-
   // Copy back the number of tokens that were written
-  thrust::host_vector<PdaTokenT> tokens_gpu =
+  thrust::host_vector<PdaTokenT> const tokens_gpu =
     cudf::detail::make_host_vector_async(d_tokens_gpu, stream);
-  thrust::host_vector<SymbolOffsetT> token_indices_gpu =
+  thrust::host_vector<SymbolOffsetT> const token_indices_gpu =
     cudf::detail::make_host_vector_async(d_token_indices_gpu, stream);
 
-  // Make sure we copied back all relevant data
-  stream.synchronize();
+  // Golden token stream sample
+  using token_t = cuio_json::token_t;
+  std::vector<std::pair<std::size_t, cuio_json::PdaTokenT>> const golden_token_stream = {
+    {2, token_t::ListBegin},
+    {3, token_t::StructBegin},
+    {4, token_t::StructMemberBegin},
+    {4, token_t::FieldNameBegin},
+    {13, token_t::FieldNameEnd},
+    {16, token_t::StringBegin},
+    {26, token_t::StringEnd},
+    {27, token_t::StructMemberEnd},
+    {28, token_t::StructMemberBegin},
+    {28, token_t::FieldNameBegin},
+    {35, token_t::FieldNameEnd},
+    {38, token_t::ListBegin},
+    {39, token_t::ValueBegin},
+    {40, token_t::ValueEnd},
+    {41, token_t::ValueBegin},
+    {43, token_t::ValueEnd},
+    {44, token_t::ValueBegin},
+    {46, token_t::ValueEnd},
+    {46, token_t::ListEnd},
+    {47, token_t::StructMemberEnd},
+    {48, token_t::StructMemberBegin},
+    {48, token_t::FieldNameBegin},
+    {55, token_t::FieldNameEnd},
+    {58, token_t::StringBegin},
+    {69, token_t::StringEnd},
+    {70, token_t::StructMemberEnd},
+    {71, token_t::StructMemberBegin},
+    {71, token_t::FieldNameBegin},
+    {77, token_t::FieldNameEnd},
+    {80, token_t::StringBegin},
+    {105, token_t::StringEnd},
+    {106, token_t::StructMemberEnd},
+    {107, token_t::StructMemberBegin},
+    {107, token_t::FieldNameBegin},
+    {113, token_t::FieldNameEnd},
+    {116, token_t::ValueBegin},
+    {120, token_t::ValueEnd},
+    {120, token_t::StructMemberEnd},
+    {120, token_t::StructEnd},
+    {124, token_t::StructBegin},
+    {125, token_t::StructMemberBegin},
+    {125, token_t::FieldNameBegin},
+    {134, token_t::FieldNameEnd},
+    {137, token_t::StringBegin},
+    {147, token_t::StringEnd},
+    {148, token_t::StructMemberEnd},
+    {149, token_t::StructMemberBegin},
+    {149, token_t::FieldNameBegin},
+    {155, token_t::FieldNameEnd},
+    {158, token_t::ListBegin},
+    {159, token_t::ValueBegin},
+    {160, token_t::ValueEnd},
+    {161, token_t::StructBegin},
+    {162, token_t::StructEnd},
+    {164, token_t::ValueBegin},
+    {168, token_t::ValueEnd},
+    {169, token_t::StructBegin},
+    {170, token_t::StructMemberBegin},
+    {170, token_t::FieldNameBegin},
+    {172, token_t::FieldNameEnd},
+    {174, token_t::ListBegin},
+    {175, token_t::StructBegin},
+    {177, token_t::StructEnd},
+    {180, token_t::StructBegin},
+    {181, token_t::StructEnd},
+    {182, token_t::ListEnd},
+    {184, token_t::StructMemberEnd},
+    {184, token_t::StructEnd},
+    {186, token_t::ListEnd},
+    {187, token_t::StructMemberEnd},
+    {188, token_t::StructMemberBegin},
+    {188, token_t::FieldNameBegin},
+    {195, token_t::FieldNameEnd},
+    {198, token_t::StringBegin},
+    {209, token_t::StringEnd},
+    {210, token_t::StructMemberEnd},
+    {211, token_t::StructMemberBegin},
+    {211, token_t::FieldNameBegin},
+    {217, token_t::FieldNameEnd},
+    {220, token_t::StringBegin},
+    {252, token_t::StringEnd},
+    {253, token_t::StructMemberEnd},
+    {254, token_t::StructMemberBegin},
+    {254, token_t::FieldNameBegin},
+    {260, token_t::FieldNameEnd},
+    {263, token_t::ValueBegin},
+    {267, token_t::ValueEnd},
+    {267, token_t::StructMemberEnd},
+    {267, token_t::StructEnd},
+    {268, token_t::ListEnd}};
+
+  // Verify the number of tokens matches
+  ASSERT_EQ(golden_token_stream.size(), tokens_gpu.size());
+  ASSERT_EQ(golden_token_stream.size(), token_indices_gpu.size());
+
+  for (std::size_t i = 0; i < tokens_gpu.size(); i++) {
+    // Ensure the index the tokens are pointing to do match
+    EXPECT_EQ(golden_token_stream[i].first, token_indices_gpu[i]) << "Mismatch at #" << i;
+
+    // Ensure the token category is correct
+    EXPECT_EQ(golden_token_stream[i].second, tokens_gpu[i]) << "Mismatch at #" << i;
+  }
+}
+
+TEST_F(JsonTest, TokenStream2)
+{
+  using cuio_json::PdaTokenT;
+  using cuio_json::SymbolOffsetT;
+  using cuio_json::SymbolT;
+  // value end with comma, space, close-brace ", }"
+  std::string const input =
+    R"([ {}, { "a": { "y" : 6, "z": [] }}, { "a" : { "x" : 8, "y": 9}, "b" : {"x": 10 , "z": 11)"
+    "\n}}]";
+
+  auto const stream = cudf::default_stream_value;
+
+  // Default parsing options
+  cudf::io::json_reader_options default_options{};
+
+  // Prepare input & output buffers
+  cudf::string_scalar const d_scalar(input, true, stream);
+  auto const d_input =
+    cudf::device_span<SymbolT const>{d_scalar.data(), static_cast<size_t>(d_scalar.size())};
+
+  // Parse the JSON and get the token stream
+  auto [d_tokens_gpu, d_token_indices_gpu] =
+    cuio_json::detail::get_token_stream(d_input, default_options, stream);
+  // Copy back the number of tokens that were written
+  thrust::host_vector<PdaTokenT> const tokens_gpu =
+    cudf::detail::make_host_vector_async(d_tokens_gpu, stream);
+  thrust::host_vector<SymbolOffsetT> const token_indices_gpu =
+    cudf::detail::make_host_vector_async(d_token_indices_gpu, stream);
 
   // Golden token stream sample
   using token_t = cuio_json::token_t;
-  std::vector<std::pair<std::size_t, cuio_json::PdaTokenT>> golden_token_stream = {
-    {2, token_t::ListBegin},        {3, token_t::StructBegin},      {4, token_t::FieldNameBegin},
-    {13, token_t::FieldNameEnd},    {16, token_t::StringBegin},     {26, token_t::StringEnd},
-    {28, token_t::FieldNameBegin},  {35, token_t::FieldNameEnd},    {38, token_t::ListBegin},
-    {39, token_t::ValueBegin},      {40, token_t::ValueEnd},        {41, token_t::ValueBegin},
-    {43, token_t::ValueEnd},        {44, token_t::ValueBegin},      {46, token_t::ValueEnd},
-    {46, token_t::ListEnd},         {48, token_t::FieldNameBegin},  {55, token_t::FieldNameEnd},
-    {58, token_t::StringBegin},     {69, token_t::StringEnd},       {71, token_t::FieldNameBegin},
-    {77, token_t::FieldNameEnd},    {80, token_t::StringBegin},     {105, token_t::StringEnd},
-    {107, token_t::FieldNameBegin}, {113, token_t::FieldNameEnd},   {116, token_t::ValueBegin},
-    {120, token_t::ValueEnd},       {120, token_t::StructEnd},      {124, token_t::StructBegin},
-    {125, token_t::FieldNameBegin}, {134, token_t::FieldNameEnd},   {137, token_t::StringBegin},
-    {147, token_t::StringEnd},      {149, token_t::FieldNameBegin}, {155, token_t::FieldNameEnd},
-    {158, token_t::ListBegin},      {159, token_t::ValueBegin},     {160, token_t::ValueEnd},
-    {161, token_t::StructBegin},    {162, token_t::StructEnd},      {164, token_t::ValueBegin},
-    {168, token_t::ValueEnd},       {169, token_t::StructBegin},    {170, token_t::FieldNameBegin},
-    {172, token_t::FieldNameEnd},   {174, token_t::ListBegin},      {175, token_t::StructBegin},
-    {177, token_t::StructEnd},      {180, token_t::StructBegin},    {181, token_t::StructEnd},
-    {182, token_t::ListEnd},        {184, token_t::StructEnd},      {186, token_t::ListEnd},
-    {188, token_t::FieldNameBegin}, {195, token_t::FieldNameEnd},   {198, token_t::StringBegin},
-    {209, token_t::StringEnd},      {211, token_t::FieldNameBegin}, {217, token_t::FieldNameEnd},
-    {220, token_t::StringBegin},    {252, token_t::StringEnd},      {254, token_t::FieldNameBegin},
-    {260, token_t::FieldNameEnd},   {263, token_t::ValueBegin},     {267, token_t::ValueEnd},
-    {267, token_t::StructEnd},      {268, token_t::ListEnd}};
+  // clang-format off
+  std::vector<std::pair<std::size_t, cuio_json::PdaTokenT>> const golden_token_stream = {
+    {0, token_t::ListBegin},
+    {2, token_t::StructBegin}, {3, token_t::StructEnd}, //{}
+    {6, token_t::StructBegin},
+        {8, token_t::StructMemberBegin}, {8, token_t::FieldNameBegin}, {10, token_t::FieldNameEnd}, //a
+            {13, token_t::StructBegin},
+                {15, token_t::StructMemberBegin}, {15, token_t::FieldNameBegin}, {17, token_t::FieldNameEnd}, {21, token_t::ValueBegin}, {22, token_t::ValueEnd}, {22, token_t::StructMemberEnd}, //a.y
+                {24, token_t::StructMemberBegin}, {24, token_t::FieldNameBegin},  {26, token_t::FieldNameEnd},  {29, token_t::ListBegin}, {30, token_t::ListEnd}, {32, token_t::StructMemberEnd}, //a.z
+            {32, token_t::StructEnd},
+        {33, token_t::StructMemberEnd},
+    {33, token_t::StructEnd},
+    {36, token_t::StructBegin},
+        {38, token_t::StructMemberBegin}, {38, token_t::FieldNameBegin}, {40, token_t::FieldNameEnd}, //a
+            {44, token_t::StructBegin},
+                {46, token_t::StructMemberBegin}, {46, token_t::FieldNameBegin}, {48, token_t::FieldNameEnd}, {52, token_t::ValueBegin}, {53, token_t::ValueEnd}, {53, token_t::StructMemberEnd}, //a.x
+                {55, token_t::StructMemberBegin}, {55, token_t::FieldNameBegin}, {57, token_t::FieldNameEnd}, {60, token_t::ValueBegin}, {61, token_t::ValueEnd}, {61, token_t::StructMemberEnd}, //a.y
+            {61, token_t::StructEnd},
+        {62, token_t::StructMemberEnd},
+        {64, token_t::StructMemberBegin}, {64, token_t::FieldNameBegin}, {66, token_t::FieldNameEnd}, //b
+            {70, token_t::StructBegin},
+                {71, token_t::StructMemberBegin}, {71, token_t::FieldNameBegin}, {73, token_t::FieldNameEnd}, {76, token_t::ValueBegin}, {78, token_t::ValueEnd}, {79, token_t::StructMemberEnd}, //b.x
+                {81, token_t::StructMemberBegin}, {81, token_t::FieldNameBegin}, {83, token_t::FieldNameEnd}, {86, token_t::ValueBegin}, {88, token_t::ValueEnd}, {89, token_t::StructMemberEnd}, //b.z
+            {89, token_t::StructEnd},
+        {90, token_t::StructMemberEnd},
+    {90, token_t::StructEnd},
+    {91, token_t::ListEnd}};
+  // clang-format on
 
   // Verify the number of tokens matches
   ASSERT_EQ(golden_token_stream.size(), tokens_gpu.size());
@@ -331,7 +455,7 @@ TEST_F(JsonTest, ExtractColumn)
   using cuio_json::SymbolT;
 
   // Prepare cuda stream for data transfers & kernels
-  constexpr auto stream = cudf::default_stream_value;
+  auto const stream = cudf::default_stream_value;
 
   // Default parsing options
   cudf::io::json_reader_options default_options{};
@@ -346,8 +470,10 @@ TEST_F(JsonTest, ExtractColumn)
   auto const second_column_index = 1;
   EXPECT_EQ(cudf_table.tbl->num_columns(), expected_col_count);
 
-  auto expected_col1            = cudf::test::strings_column_wrapper({"0.0", "0.1", "0.2"});
-  auto expected_col2            = cudf::test::strings_column_wrapper({"1.0", "1.1", "1.2"});
+  auto expected_col1 =
+    cudf::test::fixed_width_column_wrapper<double>({0.0, 0.1, 0.2}, {true, true, true});
+  auto expected_col2 =
+    cudf::test::fixed_width_column_wrapper<double>({1.0, 1.1, 1.2}, {true, true, true});
   cudf::column_view parsed_col1 = cudf_table.tbl->get_column(first_column_index);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_col1, parsed_col1);
   cudf::column_view parsed_col2 = cudf_table.tbl->get_column(second_column_index);
@@ -357,7 +483,7 @@ TEST_F(JsonTest, ExtractColumn)
 TEST_F(JsonTest, UTF_JSON)
 {
   // Prepare cuda stream for data transfers & kernels
-  constexpr auto stream = cudf::default_stream_value;
+  auto const stream = cudf::default_stream_value;
 
   // Default parsing options
   cudf::io::json_reader_options default_options{};
@@ -395,110 +521,65 @@ TEST_F(JsonTest, UTF_JSON)
   CUDF_EXPECT_NO_THROW(cuio_json::detail::parse_nested_json(utf_pass, default_options, stream));
 }
 
-TEST_F(JsonTest, FromParquet)
+TEST_F(JsonTest, ExtractColumnWithQuotes)
 {
   using cuio_json::SymbolT;
 
-  std::string const input =
-    R"([{"0":{},"1":[],"2":{}},{"1":[[""],[]],"2":{"2":""}},{"0":{"a":"1"},"2":{"0":"W&RR=+I","1":""}}])";
-
   // Prepare cuda stream for data transfers & kernels
-  constexpr auto stream = cudf::default_stream_value;
+  auto const stream = cudf::default_stream_value;
 
   // Default parsing options
-  cudf::io::json_reader_options default_options{};
+  cudf::io::json_reader_options options{};
+  options.enable_keep_quotes(true);
 
-  // Binary parquet data containing the same data as the data represented by the JSON string.
-  // We could add a dataset to include this file, but we don't want tests in cudf to have data.
-  const unsigned char parquet_data[] = {
-    0x50, 0x41, 0x52, 0x31, 0x15, 0x00, 0x15, 0x18, 0x15, 0x18, 0x2C, 0x15, 0x06, 0x15, 0x00, 0x15,
-    0x06, 0x15, 0x06, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x21, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x31, 0x15, 0x00, 0x15, 0x24, 0x15, 0x20, 0x2C, 0x15, 0x08, 0x15, 0x00, 0x15, 0x06, 0x15, 0x06,
-    0x00, 0x00, 0x12, 0x18, 0x03, 0x00, 0x00, 0x00, 0x03, 0x10, 0x00, 0x05, 0x07, 0x04, 0x2D, 0x00,
-    0x01, 0x01, 0x15, 0x00, 0x15, 0x22, 0x15, 0x22, 0x2C, 0x15, 0x06, 0x15, 0x00, 0x15, 0x06, 0x15,
-    0x06, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x04, 0x07, 0x00, 0x00, 0x00, 0x57, 0x26, 0x52,
-    0x52, 0x3D, 0x2B, 0x49, 0x15, 0x00, 0x15, 0x14, 0x15, 0x14, 0x2C, 0x15, 0x06, 0x15, 0x00, 0x15,
-    0x06, 0x15, 0x06, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x04, 0x00, 0x00, 0x00, 0x00, 0x15,
-    0x00, 0x15, 0x14, 0x15, 0x14, 0x2C, 0x15, 0x06, 0x15, 0x00, 0x15, 0x06, 0x15, 0x06, 0x00, 0x00,
-    0x02, 0x00, 0x00, 0x00, 0x03, 0x02, 0x00, 0x00, 0x00, 0x00, 0x15, 0x02, 0x19, 0xCC, 0x48, 0x06,
-    0x73, 0x63, 0x68, 0x65, 0x6D, 0x61, 0x15, 0x06, 0x00, 0x35, 0x02, 0x18, 0x01, 0x30, 0x15, 0x02,
-    0x00, 0x15, 0x0C, 0x25, 0x02, 0x18, 0x01, 0x61, 0x25, 0x00, 0x00, 0x35, 0x02, 0x18, 0x01, 0x31,
-    0x15, 0x02, 0x15, 0x06, 0x00, 0x35, 0x04, 0x18, 0x04, 0x6C, 0x69, 0x73, 0x74, 0x15, 0x02, 0x00,
-    0x35, 0x00, 0x18, 0x07, 0x65, 0x6C, 0x65, 0x6D, 0x65, 0x6E, 0x74, 0x15, 0x02, 0x15, 0x06, 0x00,
-    0x35, 0x04, 0x18, 0x04, 0x6C, 0x69, 0x73, 0x74, 0x15, 0x02, 0x00, 0x15, 0x0C, 0x25, 0x00, 0x18,
-    0x07, 0x65, 0x6C, 0x65, 0x6D, 0x65, 0x6E, 0x74, 0x25, 0x00, 0x00, 0x35, 0x00, 0x18, 0x01, 0x32,
-    0x15, 0x06, 0x00, 0x15, 0x0C, 0x25, 0x02, 0x18, 0x01, 0x30, 0x25, 0x00, 0x00, 0x15, 0x0C, 0x25,
-    0x02, 0x18, 0x01, 0x31, 0x25, 0x00, 0x00, 0x15, 0x0C, 0x25, 0x02, 0x18, 0x01, 0x32, 0x25, 0x00,
-    0x00, 0x16, 0x06, 0x19, 0x1C, 0x19, 0x5C, 0x26, 0x00, 0x1C, 0x15, 0x0C, 0x19, 0x25, 0x00, 0x06,
-    0x19, 0x28, 0x01, 0x30, 0x01, 0x61, 0x15, 0x00, 0x16, 0x06, 0x16, 0x3A, 0x16, 0x3A, 0x26, 0x08,
-    0x3C, 0x36, 0x04, 0x28, 0x01, 0x31, 0x18, 0x01, 0x31, 0x00, 0x00, 0x00, 0x26, 0x00, 0x1C, 0x15,
-    0x0C, 0x19, 0x25, 0x00, 0x06, 0x19, 0x58, 0x01, 0x31, 0x04, 0x6C, 0x69, 0x73, 0x74, 0x07, 0x65,
-    0x6C, 0x65, 0x6D, 0x65, 0x6E, 0x74, 0x04, 0x6C, 0x69, 0x73, 0x74, 0x07, 0x65, 0x6C, 0x65, 0x6D,
-    0x65, 0x6E, 0x74, 0x15, 0x02, 0x16, 0x08, 0x16, 0x46, 0x16, 0x42, 0x26, 0x42, 0x3C, 0x36, 0x00,
-    0x28, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x26, 0x00, 0x1C, 0x15, 0x0C, 0x19, 0x25, 0x00, 0x06,
-    0x19, 0x28, 0x01, 0x32, 0x01, 0x30, 0x15, 0x00, 0x16, 0x06, 0x16, 0x44, 0x16, 0x44, 0x26, 0x84,
-    0x01, 0x3C, 0x36, 0x04, 0x28, 0x07, 0x57, 0x26, 0x52, 0x52, 0x3D, 0x2B, 0x49, 0x18, 0x07, 0x57,
-    0x26, 0x52, 0x52, 0x3D, 0x2B, 0x49, 0x00, 0x00, 0x00, 0x26, 0x00, 0x1C, 0x15, 0x0C, 0x19, 0x25,
-    0x00, 0x06, 0x19, 0x28, 0x01, 0x32, 0x01, 0x31, 0x15, 0x00, 0x16, 0x06, 0x16, 0x36, 0x16, 0x36,
-    0x26, 0xC8, 0x01, 0x3C, 0x36, 0x04, 0x28, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x26, 0x00, 0x1C,
-    0x15, 0x0C, 0x19, 0x25, 0x00, 0x06, 0x19, 0x28, 0x01, 0x32, 0x01, 0x32, 0x15, 0x00, 0x16, 0x06,
-    0x16, 0x36, 0x16, 0x36, 0x26, 0xFE, 0x01, 0x3C, 0x36, 0x04, 0x28, 0x00, 0x18, 0x00, 0x00, 0x00,
-    0x00, 0x16, 0xAC, 0x02, 0x16, 0x06, 0x00, 0x19, 0x1C, 0x18, 0x06, 0x70, 0x61, 0x6E, 0x64, 0x61,
-    0x73, 0x18, 0xFE, 0x04, 0x7B, 0x22, 0x69, 0x6E, 0x64, 0x65, 0x78, 0x5F, 0x63, 0x6F, 0x6C, 0x75,
-    0x6D, 0x6E, 0x73, 0x22, 0x3A, 0x20, 0x5B, 0x7B, 0x22, 0x6B, 0x69, 0x6E, 0x64, 0x22, 0x3A, 0x20,
-    0x22, 0x72, 0x61, 0x6E, 0x67, 0x65, 0x22, 0x2C, 0x20, 0x22, 0x6E, 0x61, 0x6D, 0x65, 0x22, 0x3A,
-    0x20, 0x6E, 0x75, 0x6C, 0x6C, 0x2C, 0x20, 0x22, 0x73, 0x74, 0x61, 0x72, 0x74, 0x22, 0x3A, 0x20,
-    0x30, 0x2C, 0x20, 0x22, 0x73, 0x74, 0x6F, 0x70, 0x22, 0x3A, 0x20, 0x33, 0x2C, 0x20, 0x22, 0x73,
-    0x74, 0x65, 0x70, 0x22, 0x3A, 0x20, 0x31, 0x7D, 0x5D, 0x2C, 0x20, 0x22, 0x63, 0x6F, 0x6C, 0x75,
-    0x6D, 0x6E, 0x5F, 0x69, 0x6E, 0x64, 0x65, 0x78, 0x65, 0x73, 0x22, 0x3A, 0x20, 0x5B, 0x7B, 0x22,
-    0x6E, 0x61, 0x6D, 0x65, 0x22, 0x3A, 0x20, 0x6E, 0x75, 0x6C, 0x6C, 0x2C, 0x20, 0x22, 0x66, 0x69,
-    0x65, 0x6C, 0x64, 0x5F, 0x6E, 0x61, 0x6D, 0x65, 0x22, 0x3A, 0x20, 0x6E, 0x75, 0x6C, 0x6C, 0x2C,
-    0x20, 0x22, 0x70, 0x61, 0x6E, 0x64, 0x61, 0x73, 0x5F, 0x74, 0x79, 0x70, 0x65, 0x22, 0x3A, 0x20,
-    0x22, 0x75, 0x6E, 0x69, 0x63, 0x6F, 0x64, 0x65, 0x22, 0x2C, 0x20, 0x22, 0x6E, 0x75, 0x6D, 0x70,
-    0x79, 0x5F, 0x74, 0x79, 0x70, 0x65, 0x22, 0x3A, 0x20, 0x22, 0x6F, 0x62, 0x6A, 0x65, 0x63, 0x74,
-    0x22, 0x2C, 0x20, 0x22, 0x6D, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x22, 0x3A, 0x20, 0x7B,
-    0x22, 0x65, 0x6E, 0x63, 0x6F, 0x64, 0x69, 0x6E, 0x67, 0x22, 0x3A, 0x20, 0x22, 0x55, 0x54, 0x46,
-    0x2D, 0x38, 0x22, 0x7D, 0x7D, 0x5D, 0x2C, 0x20, 0x22, 0x63, 0x6F, 0x6C, 0x75, 0x6D, 0x6E, 0x73,
-    0x22, 0x3A, 0x20, 0x5B, 0x7B, 0x22, 0x6E, 0x61, 0x6D, 0x65, 0x22, 0x3A, 0x20, 0x22, 0x30, 0x22,
-    0x2C, 0x20, 0x22, 0x66, 0x69, 0x65, 0x6C, 0x64, 0x5F, 0x6E, 0x61, 0x6D, 0x65, 0x22, 0x3A, 0x20,
-    0x22, 0x30, 0x22, 0x2C, 0x20, 0x22, 0x70, 0x61, 0x6E, 0x64, 0x61, 0x73, 0x5F, 0x74, 0x79, 0x70,
-    0x65, 0x22, 0x3A, 0x20, 0x22, 0x6F, 0x62, 0x6A, 0x65, 0x63, 0x74, 0x22, 0x2C, 0x20, 0x22, 0x6E,
-    0x75, 0x6D, 0x70, 0x79, 0x5F, 0x74, 0x79, 0x70, 0x65, 0x22, 0x3A, 0x20, 0x22, 0x6F, 0x62, 0x6A,
-    0x65, 0x63, 0x74, 0x22, 0x2C, 0x20, 0x22, 0x6D, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x22,
-    0x3A, 0x20, 0x6E, 0x75, 0x6C, 0x6C, 0x7D, 0x2C, 0x20, 0x7B, 0x22, 0x6E, 0x61, 0x6D, 0x65, 0x22,
-    0x3A, 0x20, 0x22, 0x31, 0x22, 0x2C, 0x20, 0x22, 0x66, 0x69, 0x65, 0x6C, 0x64, 0x5F, 0x6E, 0x61,
-    0x6D, 0x65, 0x22, 0x3A, 0x20, 0x22, 0x31, 0x22, 0x2C, 0x20, 0x22, 0x70, 0x61, 0x6E, 0x64, 0x61,
-    0x73, 0x5F, 0x74, 0x79, 0x70, 0x65, 0x22, 0x3A, 0x20, 0x22, 0x6C, 0x69, 0x73, 0x74, 0x5B, 0x6C,
-    0x69, 0x73, 0x74, 0x5B, 0x75, 0x6E, 0x69, 0x63, 0x6F, 0x64, 0x65, 0x5D, 0x5D, 0x22, 0x2C, 0x20,
-    0x22, 0x6E, 0x75, 0x6D, 0x70, 0x79, 0x5F, 0x74, 0x79, 0x70, 0x65, 0x22, 0x3A, 0x20, 0x22, 0x6F,
-    0x62, 0x6A, 0x65, 0x63, 0x74, 0x22, 0x2C, 0x20, 0x22, 0x6D, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74,
-    0x61, 0x22, 0x3A, 0x20, 0x6E, 0x75, 0x6C, 0x6C, 0x7D, 0x2C, 0x20, 0x7B, 0x22, 0x6E, 0x61, 0x6D,
-    0x65, 0x22, 0x3A, 0x20, 0x22, 0x32, 0x22, 0x2C, 0x20, 0x22, 0x66, 0x69, 0x65, 0x6C, 0x64, 0x5F,
-    0x6E, 0x61, 0x6D, 0x65, 0x22, 0x3A, 0x20, 0x22, 0x32, 0x22, 0x2C, 0x20, 0x22, 0x70, 0x61, 0x6E,
-    0x64, 0x61, 0x73, 0x5F, 0x74, 0x79, 0x70, 0x65, 0x22, 0x3A, 0x20, 0x22, 0x6F, 0x62, 0x6A, 0x65,
-    0x63, 0x74, 0x22, 0x2C, 0x20, 0x22, 0x6E, 0x75, 0x6D, 0x70, 0x79, 0x5F, 0x74, 0x79, 0x70, 0x65,
-    0x22, 0x3A, 0x20, 0x22, 0x6F, 0x62, 0x6A, 0x65, 0x63, 0x74, 0x22, 0x2C, 0x20, 0x22, 0x6D, 0x65,
-    0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x22, 0x3A, 0x20, 0x6E, 0x75, 0x6C, 0x6C, 0x7D, 0x5D, 0x2C,
-    0x20, 0x22, 0x63, 0x72, 0x65, 0x61, 0x74, 0x6F, 0x72, 0x22, 0x3A, 0x20, 0x7B, 0x22, 0x6C, 0x69,
-    0x62, 0x72, 0x61, 0x72, 0x79, 0x22, 0x3A, 0x20, 0x22, 0x70, 0x79, 0x61, 0x72, 0x72, 0x6F, 0x77,
-    0x22, 0x2C, 0x20, 0x22, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6F, 0x6E, 0x22, 0x3A, 0x20, 0x22, 0x38,
-    0x2E, 0x30, 0x2E, 0x31, 0x22, 0x7D, 0x2C, 0x20, 0x22, 0x70, 0x61, 0x6E, 0x64, 0x61, 0x73, 0x5F,
-    0x76, 0x65, 0x72, 0x73, 0x69, 0x6F, 0x6E, 0x22, 0x3A, 0x20, 0x22, 0x31, 0x2E, 0x34, 0x2E, 0x33,
-    0x22, 0x7D, 0x00, 0x29, 0x5C, 0x1C, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x1C, 0x00,
-    0x00, 0x1C, 0x00, 0x00, 0x00, 0x0B, 0x04, 0x00, 0x00, 0x50, 0x41, 0x52, 0x31};
-
-  // Read in the data via parquet reader
-  cudf::io::parquet_reader_options read_opts = cudf::io::parquet_reader_options::builder(
-    cudf::io::source_info{reinterpret_cast<const char*>(parquet_data), sizeof(parquet_data)});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  // Read in the data via the JSON parser
+  std::string const input = R"( [{"a":"0.0", "b":1.0}, {"b":1.1}, {"b":2.1, "a":"2.0"}] )";
+  // Get the JSON's tree representation
   auto const cudf_table = cuio_json::detail::parse_nested_json(
-    cudf::host_span<SymbolT const>{input.data(), input.size()}, default_options, stream);
+    cudf::host_span<SymbolT const>{input.data(), input.size()}, options, stream);
+
+  auto constexpr expected_col_count = 2;
+  EXPECT_EQ(cudf_table.tbl->num_columns(), expected_col_count);
+
+  auto expected_col1 =
+    cudf::test::strings_column_wrapper({R"("0.0")", R"()", R"("2.0")"}, {true, false, true});
+  auto expected_col2 =
+    cudf::test::fixed_width_column_wrapper<double>({1.0, 1.1, 2.1}, {true, true, true});
+  cudf::column_view parsed_col1 = cudf_table.tbl->get_column(0);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_col1, parsed_col1);
+  cudf::column_view parsed_col2 = cudf_table.tbl->get_column(1);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_col2, parsed_col2);
+}
 
-  // Verify that the data read via parquet matches the data read via JSON
-  CUDF_TEST_EXPECT_TABLES_EQUAL(cudf_table.tbl->view(), result.tbl->view());
+TEST_F(JsonTest, ExpectFailMixStructAndList)
+{
+  using cuio_json::SymbolT;
+
+  // Prepare cuda stream for data transfers & kernels
+  auto const stream = cudf::default_stream_value;
 
-  // Verify that the schema read via parquet matches the schema read via JSON
-  cudf::test::expect_metadata_equal(cudf_table.metadata, result.metadata);
+  // Default parsing options
+  cudf::io::json_reader_options options{};
+  options.enable_keep_quotes(true);
+
+  std::vector<std::string> const inputs_fail{
+    R"( [{"a":[123], "b":1.0}, {"b":1.1}, {"b":2.1, "a":{"0":123}}] )",
+    R"( [{"a":{"0":"foo"}, "b":1.0}, {"b":1.1}, {"b":2.1, "a":[123]}] )",
+    R"( [{"a":{"0":null}, "b":1.0}, {"b":1.1}, {"b":2.1, "a":[123]}] )"};
+
+  std::vector<std::string> const inputs_succeed{
+    R"( [{"a":[123, {"0": 123}], "b":1.0}, {"b":1.1}, {"b":2.1}] )",
+    R"( [{"a":[123, "123"], "b":1.0}, {"b":1.1}, {"b":2.1}] )"};
+
+  for (auto const& input : inputs_fail) {
+    CUDF_EXPECT_THROW_MESSAGE(
+      auto const cudf_table = cuio_json::detail::parse_nested_json(
+        cudf::host_span<SymbolT const>{input.data(), input.size()}, options, stream),
+      "A mix of lists and structs within the same column is not supported");
+  }
+
+  for (auto const& input : inputs_succeed) {
+    CUDF_EXPECT_NO_THROW(
+      auto const cudf_table = cuio_json::detail::parse_nested_json(
+        cudf::host_span<SymbolT const>{input.data(), input.size()}, options, stream));
+  }
 }
diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp
index 76ffc92e243..a658ed0a55d 100644
--- a/cpp/tests/io/orc_test.cpp
+++ b/cpp/tests/io/orc_test.cpp
@@ -1246,7 +1246,7 @@ TEST_F(OrcStatisticsTest, Overflow)
     not_too_small_seq, not_too_small_seq + num_rows, validity);
   table_view tbl({col1, col2, col3, col4});
 
-  auto filepath = temp_env->get_temp_filepath("OrcStatsMerge.orc");
+  auto filepath = temp_env->get_temp_filepath("OrcStatsOverflow.orc");
 
   cudf_io::orc_writer_options out_opts =
     cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, tbl);
@@ -1264,6 +1264,63 @@ TEST_F(OrcStatisticsTest, Overflow)
   check_sum_exist(3, true);
   check_sum_exist(4, true);
 }
+
+TEST_F(OrcStatisticsTest, HasNull)
+{
+  // cudf's ORC writer doesn't yet support the ability to encode the hasNull value in statistics so
+  // we're embedding a file created using pyorc
+  //
+  // Method to create file:
+  // >>> import pyorc
+  // >>> output = open("./temp.orc", "wb")
+  // >>> writer = pyorc.Writer(output, pyorc.Struct(a=pyorc.BigInt(), b=pyorc.BigInt()))
+  // >>> writer.write((1, 3))
+  // >>> writer.write((2, 4))
+  // >>> writer.write((None, 5))
+  // >>> writer.close()
+  //
+  // Contents of file:
+  // >>> import pyarrow.orc as po
+  // >>> po.ORCFile('new.orc').read()
+  // pyarrow.Table
+  // a: int64
+  // b: int64
+  // ----
+  // a: [[1,2,null]]
+  // b: [[3,4,5]]
+  auto nulls_orc = std::array<uint8_t, 308>{
+    0x4F, 0x52, 0x43, 0x1D, 0x00, 0x00, 0x0A, 0x0C, 0x0A, 0x04, 0x00, 0x00, 0x00, 0x00, 0x12, 0x04,
+    0x08, 0x03, 0x50, 0x00, 0x2C, 0x00, 0x00, 0xE3, 0x12, 0xE7, 0x62, 0x67, 0x80, 0x00, 0x21, 0x1E,
+    0x0E, 0x26, 0x21, 0x36, 0x0E, 0x26, 0x01, 0x16, 0x09, 0xB6, 0x00, 0x46, 0x00, 0x2C, 0x00, 0x00,
+    0xE3, 0x12, 0xE7, 0x62, 0x67, 0x80, 0x00, 0x21, 0x1E, 0x0E, 0x66, 0x21, 0x36, 0x0E, 0x36, 0x01,
+    0x2E, 0x09, 0x89, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0xFF, 0xE0, 0x05, 0x00, 0x00, 0xFF, 0xC0,
+    0x07, 0x00, 0x00, 0x46, 0x01, 0x24, 0x05, 0x00, 0x00, 0xFF, 0xE0, 0x09, 0x00, 0x00, 0x46, 0x02,
+    0x68, 0xA0, 0x68, 0x00, 0x00, 0xE3, 0x62, 0xE3, 0x60, 0x13, 0x60, 0x90, 0x10, 0xE4, 0x02, 0xD1,
+    0x8C, 0x12, 0x92, 0x60, 0x9A, 0x09, 0x4C, 0x33, 0x00, 0xC5, 0x59, 0xC1, 0x34, 0x23, 0x98, 0x66,
+    0x04, 0xD2, 0x6C, 0x60, 0x3E, 0x13, 0x94, 0xCF, 0x24, 0xC1, 0x2E, 0xC4, 0x02, 0x52, 0x07, 0x24,
+    0x99, 0x60, 0xA4, 0x14, 0x73, 0x68, 0x88, 0x33, 0x00, 0x46, 0x00, 0x00, 0xE3, 0x52, 0xE2, 0x62,
+    0xE1, 0x60, 0x0E, 0x60, 0xE0, 0xE2, 0xE1, 0x60, 0x12, 0x62, 0xE3, 0x60, 0x12, 0x60, 0x91, 0x60,
+    0x0B, 0x60, 0x04, 0xF2, 0x98, 0x81, 0x3C, 0x36, 0x01, 0x2E, 0x09, 0x89, 0x00, 0x06, 0x00, 0xB4,
+    0x00, 0x00, 0xE3, 0x60, 0x16, 0x98, 0xC6, 0x28, 0xC5, 0xC5, 0xC1, 0x2C, 0xE0, 0x2C, 0x21, 0xA3,
+    0x60, 0xAE, 0xC1, 0xAC, 0x24, 0xC4, 0xC1, 0x23, 0xC4, 0xC4, 0xC8, 0x24, 0xC5, 0x98, 0x28, 0xC5,
+    0x98, 0xA4, 0xC0, 0xA0, 0xC1, 0x60, 0xC0, 0xA0, 0xC4, 0xC1, 0xC1, 0x82, 0xCE, 0x32, 0x60, 0xB6,
+    0x62, 0xE1, 0x60, 0x0E, 0x60, 0xB0, 0xE2, 0xE1, 0x60, 0x12, 0x62, 0xE3, 0x60, 0x12, 0x60, 0x91,
+    0x60, 0x0B, 0x60, 0x04, 0xF2, 0x98, 0x81, 0x3C, 0x36, 0x01, 0x2E, 0x09, 0x89, 0x00, 0x06, 0x87,
+    0x09, 0x7E, 0x1E, 0x8C, 0x49, 0xAC, 0x86, 0x7A, 0xE6, 0x7A, 0xA6, 0x00, 0x08, 0x5D, 0x10, 0x01,
+    0x18, 0x80, 0x80, 0x04, 0x22, 0x02, 0x00, 0x0C, 0x28, 0x26, 0x30, 0x06, 0x82, 0xF4, 0x03, 0x03,
+    0x4F, 0x52, 0x43, 0x17,
+  };
+
+  auto const stats = cudf_io::read_parsed_orc_statistics(
+    cudf_io::source_info{reinterpret_cast<char const*>(nulls_orc.data()), nulls_orc.size()});
+
+  EXPECT_EQ(stats.file_stats[1].has_null, true);
+  EXPECT_EQ(stats.file_stats[2].has_null, false);
+
+  EXPECT_EQ(stats.stripes_stats[0][1].has_null, true);
+  EXPECT_EQ(stats.stripes_stats[0][2].has_null, false);
+}
+
 struct OrcWriterTestStripes
   : public OrcWriterTest,
     public ::testing::WithParamInterface<std::tuple<size_t, cudf::size_type>> {
diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp
index c5000bc0add..cf22ab8a525 100644
--- a/cpp/tests/io/parquet_test.cpp
+++ b/cpp/tests/io/parquet_test.cpp
@@ -388,11 +388,10 @@ class ParquetSizedTest : public ::testing::TestWithParam<int> {
 
 // test the allowed bit widths for dictionary encoding
 // values chosen to trigger 1, 2, 3, 4, 5, 6, 8, 10, 12, 16, 20, and 24 bit dictionaries
-INSTANTIATE_TEST_SUITE_P(
-  ParquetDictionaryTest,
-  ParquetSizedTest,
-  testing::Values(2, 4, 8, 16, 32, 64, 256, 1024, 4096, 65536, 128 * 1024, 2 * 1024 * 1024),
-  testing::PrintToStringParamName());
+INSTANTIATE_TEST_SUITE_P(ParquetDictionaryTest,
+                         ParquetSizedTest,
+                         testing::Range(1, 25),
+                         testing::PrintToStringParamName());
 
 namespace {
 // Generates a vector of uniform random values of type T
@@ -4594,12 +4593,61 @@ TEST_F(ParquetReaderTest, StructByteArray)
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
 }
 
+TEST_F(ParquetWriterTest, SingleValueDictionaryTest)
+{
+  constexpr unsigned int expected_bits = 1;
+  constexpr unsigned int nrows         = 1'000'000U;
+
+  auto elements = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return "a unique string value suffixed with 1"; });
+  auto const col0     = cudf::test::strings_column_wrapper(elements, elements + nrows);
+  auto const expected = table_view{{col0}};
+
+  auto const filepath = temp_env->get_temp_filepath("SingleValueDictionaryTest.parquet");
+  // set row group size so that there will be only one row group
+  // no compression so we can easily read page data
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .compression(cudf::io::compression_type::NONE)
+      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
+      .row_group_size_rows(nrows);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options default_in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto const result = cudf::io::read_parquet(default_in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+
+  // make sure dictionary was used
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+  auto used_dict = [&fmd]() {
+    for (auto enc : fmd.row_groups[0].columns[0].meta_data.encodings) {
+      if (enc == cudf::io::parquet::Encoding::PLAIN_DICTIONARY or
+          enc == cudf::io::parquet::Encoding::RLE_DICTIONARY) {
+        return true;
+      }
+    }
+    return false;
+  };
+  EXPECT_TRUE(used_dict());
+
+  // and check that the correct number of bits was used
+  auto const oi    = read_offset_index(source, fmd.row_groups[0].columns[0]);
+  auto const nbits = read_dict_bits(source, oi.page_locations[0]);
+  EXPECT_EQ(nbits, expected_bits);
+}
+
 TEST_P(ParquetSizedTest, DictionaryTest)
 {
-  constexpr int nrows = 3'000'000;
+  const unsigned int cardinality = (1 << (GetParam() - 1)) + 1;
+  const unsigned int nrows       = std::max(cardinality * 3 / 2, 3'000'000U);
 
-  auto elements       = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    return "a unique string value suffixed with " + std::to_string(i % GetParam());
+  auto elements       = cudf::detail::make_counting_transform_iterator(0, [cardinality](auto i) {
+    return "a unique string value suffixed with " + std::to_string(i % cardinality);
   });
   auto const col0     = cudf::test::strings_column_wrapper(elements, elements + nrows);
   auto const expected = table_view{{col0}};
@@ -4608,16 +4656,16 @@ TEST_P(ParquetSizedTest, DictionaryTest)
   // set row group size so that there will be only one row group
   // no compression so we can easily read page data
   cudf::io::parquet_writer_options out_opts =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected)
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
       .compression(cudf::io::compression_type::NONE)
       .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
       .row_group_size_rows(nrows)
-      .row_group_size_bytes(256 * 1024 * 1024);
+      .row_group_size_bytes(512 * 1024 * 1024);
   cudf::io::write_parquet(out_opts);
 
   cudf::io::parquet_reader_options default_in_opts =
-    cudf::io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  auto const result = cudf_io::read_parquet(default_in_opts);
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto const result = cudf::io::read_parquet(default_in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
 
@@ -4640,16 +4688,7 @@ TEST_P(ParquetSizedTest, DictionaryTest)
   // and check that the correct number of bits was used
   auto const oi    = read_offset_index(source, fmd.row_groups[0].columns[0]);
   auto const nbits = read_dict_bits(source, oi.page_locations[0]);
-  auto const expected_bits =
-    cudf::io::parquet::CompactProtocolReader::NumRequiredBits(GetParam() - 1);
-
-  // copied from writer_impl.cu
-  constexpr auto allowed_bitsizes =
-    std::array<cudf::size_type, 12>{1, 2, 3, 4, 5, 6, 8, 10, 12, 16, 20, 24};
-  auto const rle_bits =
-    *std::lower_bound(allowed_bitsizes.begin(), allowed_bitsizes.end(), expected_bits);
-
-  EXPECT_EQ(nbits, rle_bits);
+  EXPECT_EQ(nbits, GetParam());
 }
 
 CUDF_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/io/text/multibyte_split_test.cpp b/cpp/tests/io/text/multibyte_split_test.cpp
index 8ec88696355..43debf3d5b3 100644
--- a/cpp/tests/io/text/multibyte_split_test.cpp
+++ b/cpp/tests/io/text/multibyte_split_test.cpp
@@ -197,7 +197,7 @@ TEST_F(MultibyteSplitTest, LargeInputMultipleRange)
   auto host_input    = std::string();
   auto host_expected = std::vector<std::string>();
 
-  for (auto i = 0; i < 1000; i++) {
+  for (auto i = 0; i < (2 * 32 * 128 * 1024); i++) {
     host_input += "...:|";
   }
 
@@ -222,7 +222,7 @@ TEST_F(MultibyteSplitTest, LargeInputSparseMultipleRange)
   auto host_input    = std::string();
   auto host_expected = std::vector<std::string>();
 
-  for (auto i = 0; i < 1000; i++) {
+  for (auto i = 0; i < (2 * 32 * 128 * 1024); i++) {
     host_input += ".....";
   }
 
@@ -244,6 +244,57 @@ TEST_F(MultibyteSplitTest, LargeInputSparseMultipleRange)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected->view(), *out, debug_output_level::ALL_ERRORS);
 }
 
+TEST_F(MultibyteSplitTest, LargeInputMultipleRangeSingleByte)
+{
+  auto host_input    = std::string();
+  auto host_expected = std::vector<std::string>();
+
+  for (auto i = 0; i < (2 * 32 * 128 * 1024); i++) {
+    host_input += "...:|";
+  }
+
+  auto delimiter = std::string("|");
+  auto source    = cudf::io::text::make_source(host_input);
+
+  auto byte_ranges = cudf::io::text::create_byte_range_infos_consecutive(host_input.size(), 3);
+  auto out0        = cudf::io::text::multibyte_split(*source, delimiter, byte_ranges[0]);
+  auto out1        = cudf::io::text::multibyte_split(*source, delimiter, byte_ranges[1]);
+  auto out2        = cudf::io::text::multibyte_split(*source, delimiter, byte_ranges[2]);
+
+  auto out_views = std::vector<cudf::column_view>({out0->view(), out1->view(), out2->view()});
+  auto out       = cudf::concatenate(out_views);
+
+  auto expected = cudf::io::text::multibyte_split(*source, delimiter);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected->view(), *out, debug_output_level::ALL_ERRORS);
+}
+
+TEST_F(MultibyteSplitTest, LargeInputSparseMultipleRangeSingleByte)
+{
+  auto host_input    = std::string();
+  auto host_expected = std::vector<std::string>();
+
+  for (auto i = 0; i < (2 * 32 * 128 * 1024); i++) {
+    host_input += ".....";
+  }
+
+  auto delimiter                    = std::string("|");
+  host_input[host_input.size() / 2] = '|';
+  auto source                       = cudf::io::text::make_source(host_input);
+
+  auto byte_ranges = cudf::io::text::create_byte_range_infos_consecutive(host_input.size(), 3);
+  auto out0        = cudf::io::text::multibyte_split(*source, delimiter, byte_ranges[0]);
+  auto out1        = cudf::io::text::multibyte_split(*source, delimiter, byte_ranges[1]);
+  auto out2        = cudf::io::text::multibyte_split(*source, delimiter, byte_ranges[2]);
+
+  auto out_views = std::vector<cudf::column_view>({out0->view(), out1->view(), out2->view()});
+  auto out       = cudf::concatenate(out_views);
+
+  auto expected = cudf::io::text::multibyte_split(*source, delimiter);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected->view(), *out, debug_output_level::ALL_ERRORS);
+}
+
 TEST_F(MultibyteSplitTest, SmallInputAllPossibleRanges)
 {
   using namespace cudf::io::text;
diff --git a/cpp/tests/io/type_inference_test.cu b/cpp/tests/io/type_inference_test.cu
new file mode 100644
index 00000000000..04bb7507934
--- /dev/null
+++ b/cpp/tests/io/type_inference_test.cu
@@ -0,0 +1,265 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <io/utilities/trie.cuh>
+#include <io/utilities/type_inference.cuh>
+
+#include <cudf/scalar/scalar_factories.hpp>
+#include <cudf_test/base_fixture.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+
+#include <thrust/iterator/zip_iterator.h>
+#include <thrust/tuple.h>
+
+#include <cstddef>
+#include <string>
+#include <vector>
+
+using cudf::io::parse_options;
+using cudf::io::detail::infer_data_type;
+
+// Base test fixture for tests
+struct TypeInference : public cudf::test::BaseFixture {
+};
+
+TEST_F(TypeInference, Basic)
+{
+  auto const stream = cudf::default_stream_value;
+
+  auto options       = parse_options{',', '\n', '\"'};
+  options.trie_true  = cudf::detail::create_serialized_trie({"true"}, stream);
+  options.trie_false = cudf::detail::create_serialized_trie({"false"}, stream);
+  options.trie_na    = cudf::detail::create_serialized_trie({"", "null"}, stream);
+
+  std::string data      = R"json([42,52,5])json";
+  auto d_data           = cudf::make_string_scalar(data);
+  auto& d_string_scalar = static_cast<cudf::string_scalar&>(*d_data);
+
+  std::size_t constexpr size = 3;
+  auto const string_offset   = std::vector<int32_t>{1, 4, 7};
+  auto const string_length   = std::vector<std::size_t>{2, 2, 1};
+  rmm::device_vector<int32_t> d_string_offset{string_offset};
+  rmm::device_vector<std::size_t> d_string_length{string_length};
+
+  auto d_col_strings =
+    thrust::make_zip_iterator(make_tuple(d_string_offset.begin(), d_string_length.begin()));
+
+  auto res_type =
+    infer_data_type(options.json_view(),
+                    {d_string_scalar.data(), static_cast<std::size_t>(d_string_scalar.size())},
+                    d_col_strings,
+                    size,
+                    stream);
+
+  EXPECT_EQ(res_type, cudf::data_type{cudf::type_id::INT64});
+}
+
+TEST_F(TypeInference, Null)
+{
+  auto const stream = cudf::default_stream_value;
+
+  auto options       = parse_options{',', '\n', '\"'};
+  options.trie_true  = cudf::detail::create_serialized_trie({"true"}, stream);
+  options.trie_false = cudf::detail::create_serialized_trie({"false"}, stream);
+  options.trie_na    = cudf::detail::create_serialized_trie({"", "null"}, stream);
+
+  std::string data      = R"json([52,5])json";
+  auto d_data           = cudf::make_string_scalar(data);
+  auto& d_string_scalar = static_cast<cudf::string_scalar&>(*d_data);
+
+  std::size_t constexpr size = 3;
+  auto const string_offset   = std::vector<int32_t>{1, 1, 4};
+  auto const string_length   = std::vector<std::size_t>{0, 2, 1};
+  rmm::device_vector<int32_t> d_string_offset{string_offset};
+  rmm::device_vector<std::size_t> d_string_length{string_length};
+
+  auto d_col_strings =
+    thrust::make_zip_iterator(make_tuple(d_string_offset.begin(), d_string_length.begin()));
+
+  auto res_type =
+    infer_data_type(options.json_view(),
+                    {d_string_scalar.data(), static_cast<std::size_t>(d_string_scalar.size())},
+                    d_col_strings,
+                    size,
+                    stream);
+
+  EXPECT_EQ(res_type,
+            cudf::data_type{cudf::type_id::FLOAT64});  // FLOAT64 to align with pandas's behavior
+}
+
+TEST_F(TypeInference, AllNull)
+{
+  auto const stream = cudf::default_stream_value;
+
+  auto options       = parse_options{',', '\n', '\"'};
+  options.trie_true  = cudf::detail::create_serialized_trie({"true"}, stream);
+  options.trie_false = cudf::detail::create_serialized_trie({"false"}, stream);
+  options.trie_na    = cudf::detail::create_serialized_trie({"", "null"}, stream);
+
+  std::string data      = R"json([null])json";
+  auto d_data           = cudf::make_string_scalar(data);
+  auto& d_string_scalar = static_cast<cudf::string_scalar&>(*d_data);
+
+  std::size_t constexpr size = 3;
+  auto const string_offset   = std::vector<int32_t>{1, 1, 1};
+  auto const string_length   = std::vector<std::size_t>{0, 0, 4};
+  rmm::device_vector<int32_t> d_string_offset{string_offset};
+  rmm::device_vector<std::size_t> d_string_length{string_length};
+
+  auto d_col_strings =
+    thrust::make_zip_iterator(make_tuple(d_string_offset.begin(), d_string_length.begin()));
+
+  auto res_type =
+    infer_data_type(options.json_view(),
+                    {d_string_scalar.data(), static_cast<std::size_t>(d_string_scalar.size())},
+                    d_col_strings,
+                    size,
+                    stream);
+
+  EXPECT_EQ(res_type, cudf::data_type{cudf::type_id::INT8});  // INT8 if all nulls
+}
+
+TEST_F(TypeInference, String)
+{
+  auto const stream = cudf::default_stream_value;
+
+  auto options       = parse_options{',', '\n', '\"'};
+  options.trie_true  = cudf::detail::create_serialized_trie({"true"}, stream);
+  options.trie_false = cudf::detail::create_serialized_trie({"false"}, stream);
+  options.trie_na    = cudf::detail::create_serialized_trie({"", "null"}, stream);
+
+  std::string data      = R"json(["1990","8","25"])json";
+  auto d_data           = cudf::make_string_scalar(data);
+  auto& d_string_scalar = static_cast<cudf::string_scalar&>(*d_data);
+
+  std::size_t constexpr size = 3;
+  auto const string_offset   = std::vector<int32_t>{1, 8, 12};
+  auto const string_length   = std::vector<std::size_t>{6, 3, 4};
+  rmm::device_vector<int32_t> d_string_offset{string_offset};
+  rmm::device_vector<std::size_t> d_string_length{string_length};
+
+  auto d_col_strings =
+    thrust::make_zip_iterator(make_tuple(d_string_offset.begin(), d_string_length.begin()));
+
+  auto res_type =
+    infer_data_type(options.json_view(),
+                    {d_string_scalar.data(), static_cast<std::size_t>(d_string_scalar.size())},
+                    d_col_strings,
+                    size,
+                    stream);
+
+  EXPECT_EQ(res_type, cudf::data_type{cudf::type_id::STRING});
+}
+
+TEST_F(TypeInference, Bool)
+{
+  auto const stream = cudf::default_stream_value;
+
+  auto options       = parse_options{',', '\n', '\"'};
+  options.trie_true  = cudf::detail::create_serialized_trie({"true"}, stream);
+  options.trie_false = cudf::detail::create_serialized_trie({"false"}, stream);
+  options.trie_na    = cudf::detail::create_serialized_trie({"", "null"}, stream);
+
+  std::string data      = R"json([true,false,false])json";
+  auto d_data           = cudf::make_string_scalar(data);
+  auto& d_string_scalar = static_cast<cudf::string_scalar&>(*d_data);
+
+  std::size_t constexpr size = 3;
+  auto const string_offset   = std::vector<int32_t>{1, 6, 12};
+  auto const string_length   = std::vector<std::size_t>{4, 5, 5};
+  rmm::device_vector<int32_t> d_string_offset{string_offset};
+  rmm::device_vector<std::size_t> d_string_length{string_length};
+
+  auto d_col_strings =
+    thrust::make_zip_iterator(make_tuple(d_string_offset.begin(), d_string_length.begin()));
+
+  auto res_type =
+    infer_data_type(options.json_view(),
+                    {d_string_scalar.data(), static_cast<std::size_t>(d_string_scalar.size())},
+                    d_col_strings,
+                    size,
+                    stream);
+
+  EXPECT_EQ(res_type, cudf::data_type{cudf::type_id::BOOL8});
+}
+
+TEST_F(TypeInference, Timestamp)
+{
+  auto const stream = cudf::default_stream_value;
+
+  auto options       = parse_options{',', '\n', '\"'};
+  options.trie_true  = cudf::detail::create_serialized_trie({"true"}, stream);
+  options.trie_false = cudf::detail::create_serialized_trie({"false"}, stream);
+  options.trie_na    = cudf::detail::create_serialized_trie({"", "null"}, stream);
+
+  std::string data      = R"json([1970/2/5,1970/8/25])json";
+  auto d_data           = cudf::make_string_scalar(data);
+  auto& d_string_scalar = static_cast<cudf::string_scalar&>(*d_data);
+
+  std::size_t constexpr size = 3;
+  auto const string_offset   = std::vector<int32_t>{1, 10};
+  auto const string_length   = std::vector<std::size_t>{8, 9};
+  rmm::device_vector<int32_t> d_string_offset{string_offset};
+  rmm::device_vector<std::size_t> d_string_length{string_length};
+
+  auto d_col_strings =
+    thrust::make_zip_iterator(make_tuple(d_string_offset.begin(), d_string_length.begin()));
+
+  auto res_type =
+    infer_data_type(options.json_view(),
+                    {d_string_scalar.data(), static_cast<std::size_t>(d_string_scalar.size())},
+                    d_col_strings,
+                    size,
+                    stream);
+
+  // All data time (quoted and unquoted) is inferred as string for now
+  EXPECT_EQ(res_type, cudf::data_type{cudf::type_id::STRING});
+}
+
+TEST_F(TypeInference, InvalidInput)
+{
+  auto const stream = cudf::default_stream_value;
+
+  auto options       = parse_options{',', '\n', '\"'};
+  options.trie_true  = cudf::detail::create_serialized_trie({"true"}, stream);
+  options.trie_false = cudf::detail::create_serialized_trie({"false"}, stream);
+  options.trie_na    = cudf::detail::create_serialized_trie({"", "null"}, stream);
+
+  std::string data      = R"json([1,2,3,a,5])json";
+  auto d_data           = cudf::make_string_scalar(data);
+  auto& d_string_scalar = static_cast<cudf::string_scalar&>(*d_data);
+
+  std::size_t constexpr size = 5;
+  auto const string_offset   = std::vector<int32_t>{1, 3, 5, 7, 9};
+  auto const string_length   = std::vector<std::size_t>{1, 1, 1, 1, 1};
+  rmm::device_vector<int32_t> d_string_offset{string_offset};
+  rmm::device_vector<std::size_t> d_string_length{string_length};
+
+  auto d_col_strings =
+    thrust::make_zip_iterator(make_tuple(d_string_offset.begin(), d_string_length.begin()));
+
+  auto res_type =
+    infer_data_type(options.json_view(),
+                    {d_string_scalar.data(), static_cast<std::size_t>(d_string_scalar.size())},
+                    d_col_strings,
+                    size,
+                    stream);
+
+  // Invalid input is inferred as string for now
+  EXPECT_EQ(res_type, cudf::data_type{cudf::type_id::STRING});
+}
diff --git a/cpp/tests/iterator/iterator_tests.cuh b/cpp/tests/iterator/iterator_tests.cuh
index 0bc5d3c4a15..26902b43662 100644
--- a/cpp/tests/iterator/iterator_tests.cuh
+++ b/cpp/tests/iterator/iterator_tests.cuh
@@ -54,8 +54,14 @@ struct IteratorTest : public cudf::test::BaseFixture {
 
     // Get temporary storage size
     size_t temp_storage_bytes = 0;
-    cub::DeviceReduce::Reduce(
-      nullptr, temp_storage_bytes, d_in, dev_result.begin(), num_items, thrust::minimum{}, init);
+    cub::DeviceReduce::Reduce(nullptr,
+                              temp_storage_bytes,
+                              d_in,
+                              dev_result.begin(),
+                              num_items,
+                              thrust::minimum{},
+                              init,
+                              cudf::default_stream_value.value());
 
     // Allocate temporary storage
     rmm::device_buffer d_temp_storage(temp_storage_bytes, cudf::default_stream_value);
@@ -67,7 +73,8 @@ struct IteratorTest : public cudf::test::BaseFixture {
                               dev_result.begin(),
                               num_items,
                               thrust::minimum{},
-                              init);
+                              init,
+                              cudf::default_stream_value.value());
 
     evaluate(expected, dev_result, "cub test");
   }
@@ -85,14 +92,16 @@ struct IteratorTest : public cudf::test::BaseFixture {
     // using a temporary vector and calling transform and all_of separately is
     // equivalent to thrust::equal but compiles ~3x faster
     auto dev_results = rmm::device_uvector<bool>(num_items, cudf::default_stream_value);
-    thrust::transform(thrust::device,
+    thrust::transform(rmm::exec_policy(cudf::default_stream_value),
                       d_in,
                       d_in_last,
                       dev_expected.begin(),
                       dev_results.begin(),
                       thrust::equal_to{});
-    auto result = thrust::all_of(
-      thrust::device, dev_results.begin(), dev_results.end(), thrust::identity<bool>{});
+    auto result = thrust::all_of(rmm::exec_policy(cudf::default_stream_value),
+                                 dev_results.begin(),
+                                 dev_results.end(),
+                                 thrust::identity<bool>{});
     EXPECT_TRUE(result) << "thrust test";
   }
 
diff --git a/cpp/tests/iterator/optional_iterator_test_numeric.cu b/cpp/tests/iterator/optional_iterator_test_numeric.cu
index 2756247c368..c5b7393550a 100644
--- a/cpp/tests/iterator/optional_iterator_test_numeric.cu
+++ b/cpp/tests/iterator/optional_iterator_test_numeric.cu
@@ -112,12 +112,13 @@ TYPED_TEST(NumericOptionalIteratorTest, mean_var_output)
   // this can be computed with a single reduce and without a temporary output vector
   // but the approach increases the compile time by ~2x
   auto results = rmm::device_uvector<T_output>(d_col->size(), cudf::default_stream_value);
-  thrust::transform(thrust::device,
+  thrust::transform(rmm::exec_policy(cudf::default_stream_value),
                     it_dev_squared,
                     it_dev_squared + d_col->size(),
                     results.begin(),
                     optional_to_meanvar<T_output>{});
-  auto result = thrust::reduce(thrust::device, results.begin(), results.end(), T_output{});
+  auto result = thrust::reduce(
+    rmm::exec_policy(cudf::default_stream_value), results.begin(), results.end(), T_output{});
 
   if (not std::is_floating_point<T>()) {
     EXPECT_EQ(expected_value, result) << "optional iterator reduction sum";
diff --git a/cpp/tests/iterator/pair_iterator_test_numeric.cu b/cpp/tests/iterator/pair_iterator_test_numeric.cu
index 41dd9b65e42..f570df44286 100644
--- a/cpp/tests/iterator/pair_iterator_test_numeric.cu
+++ b/cpp/tests/iterator/pair_iterator_test_numeric.cu
@@ -14,6 +14,8 @@
  */
 #include <tests/iterator/pair_iterator_test.cuh>
 
+#include <rmm/exec_policy.hpp>
+
 #include <thrust/iterator/transform_iterator.h>
 #include <thrust/pair.h>
 #include <thrust/reduce.h>
@@ -111,7 +113,8 @@ TYPED_TEST(NumericPairIteratorTest, mean_var_output)
   // GPU test
   auto it_dev         = d_col->pair_begin<T, true>();
   auto it_dev_squared = thrust::make_transform_iterator(it_dev, transformer);
-  auto result         = thrust::reduce(it_dev_squared,
+  auto result         = thrust::reduce(rmm::exec_policy(cudf::default_stream_value),
+                               it_dev_squared,
                                it_dev_squared + d_col->size(),
                                thrust::make_pair(T_output{}, true),
                                sum_if_not_null{});
diff --git a/cpp/tests/join/conditional_join_tests.cu b/cpp/tests/join/conditional_join_tests.cu
index 9e75d6c6eb9..bc2a96b5adf 100644
--- a/cpp/tests/join/conditional_join_tests.cu
+++ b/cpp/tests/join/conditional_join_tests.cu
@@ -24,6 +24,8 @@
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/type_lists.hpp>
 
+#include <rmm/exec_policy.hpp>
+
 #include <thrust/device_vector.h>
 #include <thrust/equal.h>
 #include <thrust/execution_policy.h>
@@ -256,7 +258,7 @@ struct ConditionalJoinPairReturnTest : public ConditionalJoinTest<T> {
     thrust::device_vector<thrust::pair<cudf::size_type, cudf::size_type>> reference_pairs(
       reference.first->size());
 
-    thrust::transform(thrust::device,
+    thrust::transform(rmm::exec_policy(cudf::default_stream_value),
                       result.first->begin(),
                       result.first->end(),
                       result.second->begin(),
@@ -264,7 +266,7 @@ struct ConditionalJoinPairReturnTest : public ConditionalJoinTest<T> {
                       [] __device__(cudf::size_type first, cudf::size_type second) {
                         return thrust::make_pair(first, second);
                       });
-    thrust::transform(thrust::device,
+    thrust::transform(rmm::exec_policy(cudf::default_stream_value),
                       reference.first->begin(),
                       reference.first->end(),
                       reference.second->begin(),
@@ -273,11 +275,15 @@ struct ConditionalJoinPairReturnTest : public ConditionalJoinTest<T> {
                         return thrust::make_pair(first, second);
                       });
 
-    thrust::sort(thrust::device, result_pairs.begin(), result_pairs.end());
-    thrust::sort(thrust::device, reference_pairs.begin(), reference_pairs.end());
+    thrust::sort(
+      rmm::exec_policy(cudf::default_stream_value), result_pairs.begin(), result_pairs.end());
+    thrust::sort(
+      rmm::exec_policy(cudf::default_stream_value), reference_pairs.begin(), reference_pairs.end());
 
-    EXPECT_TRUE(thrust::equal(
-      thrust::device, reference_pairs.begin(), reference_pairs.end(), result_pairs.begin()));
+    EXPECT_TRUE(thrust::equal(rmm::exec_policy(cudf::default_stream_value),
+                              reference_pairs.begin(),
+                              reference_pairs.end(),
+                              result_pairs.begin()));
   }
 
   void compare_to_hash_join(ColumnVector<T> left_data, ColumnVector<T> right_data)
@@ -696,9 +702,13 @@ struct ConditionalJoinSingleReturnTest : public ConditionalJoinTest<T> {
   void _compare_to_hash_join(std::unique_ptr<rmm::device_uvector<cudf::size_type>> const& result,
                              std::unique_ptr<rmm::device_uvector<cudf::size_type>> const& reference)
   {
-    thrust::sort(thrust::device, result->begin(), result->end());
-    thrust::sort(thrust::device, reference->begin(), reference->end());
-    EXPECT_TRUE(thrust::equal(thrust::device, result->begin(), result->end(), reference->begin()));
+    thrust::sort(rmm::exec_policy(cudf::default_stream_value), result->begin(), result->end());
+    thrust::sort(
+      rmm::exec_policy(cudf::default_stream_value), reference->begin(), reference->end());
+    EXPECT_TRUE(thrust::equal(rmm::exec_policy(cudf::default_stream_value),
+                              result->begin(),
+                              result->end(),
+                              reference->begin()));
   }
 
   /*
diff --git a/cpp/tests/lists/sort_lists_tests.cpp b/cpp/tests/lists/sort_lists_tests.cpp
index ade626a5c2b..a26ae5c2f48 100644
--- a/cpp/tests/lists/sort_lists_tests.cpp
+++ b/cpp/tests/lists/sort_lists_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,39 +24,21 @@
 template <typename T>
 using LCW = cudf::test::lists_column_wrapper<T, int32_t>;
 
-namespace cudf {
-namespace test {
-
-auto generate_sorted_lists(lists_column_view const& input,
-                           order column_order,
-                           null_order null_precedence)
+auto generate_sorted_lists(cudf::lists_column_view const& input,
+                           cudf::order column_order,
+                           cudf::null_order null_precedence)
 {
-  return std::pair{lists::sort_lists(input, column_order, null_precedence),
-                   lists::stable_sort_lists(input, column_order, null_precedence)};
+  return std::pair{cudf::lists::sort_lists(input, column_order, null_precedence),
+                   cudf::lists::stable_sort_lists(input, column_order, null_precedence)};
 }
 
 template <typename T>
-struct SortLists : public BaseFixture {
+struct SortLists : public cudf::test::BaseFixture {
 };
 
-TYPED_TEST_SUITE(SortLists, NumericTypes);
-using SortListsInt = SortLists<int>;
+using TypesForTest = cudf::test::Concat<cudf::test::NumericTypes, cudf::test::FixedPointTypes>;
+TYPED_TEST_SUITE(SortLists, TypesForTest);
 
-/*
-empty case
-  empty list
-  single row with empty list
-  multi row with empty lists
-single case
-  single list with single element
-  single list with multi element
-normal case without nulls
-Null cases
-  null rows
-  null elements in list.
-Error:
-  depth>1
-*/
 TYPED_TEST(SortLists, NoNull)
 {
   using T = TypeParam;
@@ -68,14 +50,14 @@ TYPED_TEST(SortLists, NoNull)
   // LCW<int>  order{{2, 1, 0, 3}, {0}, {1, 2, 0},  {0, 1}};
   LCW<T> expected{{1, 2, 3, 4}, {5}, {8, 9, 10}, {6, 7}};
   {
-    auto const [sorted_lists, stable_sorted_lists] =
-      generate_sorted_lists(lists_column_view{list}, order::ASCENDING, null_order::AFTER);
+    auto const [sorted_lists, stable_sorted_lists] = generate_sorted_lists(
+      cudf::lists_column_view{list}, cudf::order::ASCENDING, cudf::null_order::AFTER);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(sorted_lists->view(), expected);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(stable_sorted_lists->view(), expected);
   }
   {
-    auto const [sorted_lists, stable_sorted_lists] =
-      generate_sorted_lists(lists_column_view{list}, order::ASCENDING, null_order::BEFORE);
+    auto const [sorted_lists, stable_sorted_lists] = generate_sorted_lists(
+      cudf::lists_column_view{list}, cudf::order::ASCENDING, cudf::null_order::BEFORE);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(sorted_lists->view(), expected);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(stable_sorted_lists->view(), expected);
   }
@@ -84,14 +66,14 @@ TYPED_TEST(SortLists, NoNull)
   // LCW<int>  order{{3, 0, 1, 2}, {0}, {0, 1, 2},  {1, 0}};
   LCW<T> expected2{{4, 3, 2, 1}, {5}, {10, 9, 8}, {7, 6}};
   {
-    auto const [sorted_lists, stable_sorted_lists] =
-      generate_sorted_lists(lists_column_view{list}, order::DESCENDING, null_order::AFTER);
+    auto const [sorted_lists, stable_sorted_lists] = generate_sorted_lists(
+      cudf::lists_column_view{list}, cudf::order::DESCENDING, cudf::null_order::AFTER);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(sorted_lists->view(), expected2);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(stable_sorted_lists->view(), expected2);
   }
   {
-    auto const [sorted_lists, stable_sorted_lists] =
-      generate_sorted_lists(lists_column_view{list}, order::DESCENDING, null_order::BEFORE);
+    auto const [sorted_lists, stable_sorted_lists] = generate_sorted_lists(
+      cudf::lists_column_view{list}, cudf::order::DESCENDING, cudf::null_order::BEFORE);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(sorted_lists->view(), expected2);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(stable_sorted_lists->view(), expected2);
   }
@@ -111,16 +93,16 @@ TYPED_TEST(SortLists, Null)
 
   {
     LCW<T> expected{{{1, 2, 3, 4}, valids_a.begin()}, {5}, {8, 9, 10}, {6, 7}};
-    auto const [sorted_lists, stable_sorted_lists] =
-      generate_sorted_lists(lists_column_view{list}, order::ASCENDING, null_order::AFTER);
+    auto const [sorted_lists, stable_sorted_lists] = generate_sorted_lists(
+      cudf::lists_column_view{list}, cudf::order::ASCENDING, cudf::null_order::AFTER);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(sorted_lists->view(), expected);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(stable_sorted_lists->view(), expected);
   }
 
   {
     LCW<T> expected{{{4, 1, 2, 3}, valids_b.begin()}, {5}, {8, 9, 10}, {6, 7}};
-    auto const [sorted_lists, stable_sorted_lists] =
-      generate_sorted_lists(lists_column_view{list}, order::ASCENDING, null_order::BEFORE);
+    auto const [sorted_lists, stable_sorted_lists] = generate_sorted_lists(
+      cudf::lists_column_view{list}, cudf::order::ASCENDING, cudf::null_order::BEFORE);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(sorted_lists->view(), expected);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(stable_sorted_lists->view(), expected);
   }
@@ -129,21 +111,23 @@ TYPED_TEST(SortLists, Null)
   // LCW<int>  order{{3, 0, 1, 2}, {0}, {0, 1, 2},  {1, 0}};
   {
     LCW<T> expected{{{4, 3, 2, 1}, valids_b.begin()}, {5}, {10, 9, 8}, {7, 6}};
-    auto const [sorted_lists, stable_sorted_lists] =
-      generate_sorted_lists(lists_column_view{list}, order::DESCENDING, null_order::AFTER);
+    auto const [sorted_lists, stable_sorted_lists] = generate_sorted_lists(
+      cudf::lists_column_view{list}, cudf::order::DESCENDING, cudf::null_order::AFTER);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(sorted_lists->view(), expected);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(stable_sorted_lists->view(), expected);
   }
 
   {
     LCW<T> expected{{{3, 2, 1, 4}, valids_a.begin()}, {5}, {10, 9, 8}, {7, 6}};
-    auto const [sorted_lists, stable_sorted_lists] =
-      generate_sorted_lists(lists_column_view{list}, order::DESCENDING, null_order::BEFORE);
+    auto const [sorted_lists, stable_sorted_lists] = generate_sorted_lists(
+      cudf::lists_column_view{list}, cudf::order::DESCENDING, cudf::null_order::BEFORE);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(sorted_lists->view(), expected);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(stable_sorted_lists->view(), expected);
   }
 }
 
+using SortListsInt = SortLists<int>;
+
 TEST_F(SortListsInt, Empty)
 {
   using T = int;
@@ -151,21 +135,21 @@ TEST_F(SortListsInt, Empty)
   {
     LCW<T> l{};
     auto const [sorted_lists, stable_sorted_lists] =
-      generate_sorted_lists(lists_column_view{l}, {}, {});
+      generate_sorted_lists(cudf::lists_column_view{l}, {}, {});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(sorted_lists->view(), l);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(stable_sorted_lists->view(), l);
   }
   {
     LCW<T> l{LCW<T>{}};
     auto const [sorted_lists, stable_sorted_lists] =
-      generate_sorted_lists(lists_column_view{l}, {}, {});
+      generate_sorted_lists(cudf::lists_column_view{l}, {}, {});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(sorted_lists->view(), l);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(stable_sorted_lists->view(), l);
   }
   {
     LCW<T> l{LCW<T>{}, LCW<T>{}};
     auto const [sorted_lists, stable_sorted_lists] =
-      generate_sorted_lists(lists_column_view{l}, {}, {});
+      generate_sorted_lists(cudf::lists_column_view{l}, {}, {});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(sorted_lists->view(), l);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(stable_sorted_lists->view(), l);
   }
@@ -178,14 +162,14 @@ TEST_F(SortListsInt, Single)
   {
     LCW<T> l{1};
     auto const [sorted_lists, stable_sorted_lists] =
-      generate_sorted_lists(lists_column_view{l}, {}, {});
+      generate_sorted_lists(cudf::lists_column_view{l}, {}, {});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(sorted_lists->view(), l);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(stable_sorted_lists->view(), l);
   }
   {
     LCW<T> l{{1, 2, 3}};
     auto const [sorted_lists, stable_sorted_lists] =
-      generate_sorted_lists(lists_column_view{l}, {}, {});
+      generate_sorted_lists(cudf::lists_column_view{l}, {}, {});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(sorted_lists->view(), l);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(stable_sorted_lists->view(), l);
   }
@@ -198,22 +182,20 @@ TEST_F(SortListsInt, NullRows)
   LCW<T> l{{{1, 2, 3}, {4, 5, 6}, {7}}, valids.begin()};  // offset 0, 0, 3, 3
 
   auto const [sorted_lists, stable_sorted_lists] =
-    generate_sorted_lists(lists_column_view{l}, {}, {});
+    generate_sorted_lists(cudf::lists_column_view{l}, {}, {});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(sorted_lists->view(), l);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(stable_sorted_lists->view(), l);
 }
 
-/*
 // Disabling this test.
 // Reason: After this exception "cudaErrorAssert device-side assert triggered", further tests fail
-TEST_F(SortListsInt, Depth)
+TEST_F(SortListsInt, DISABLED_Depth)
 {
   using T = int;
   LCW<T> l1{LCW<T>{{1, 2}, {3}}, LCW<T>{{4, 5}}};
   // device exception
-  EXPECT_THROW(sort_lists(lists_column_view{l1}, {}, {}), std::exception);
+  EXPECT_THROW(cudf::lists::sort_lists(cudf::lists_column_view{l1}, {}, {}), std::exception);
 }
-*/
 
 TEST_F(SortListsInt, Sliced)
 {
@@ -224,7 +206,7 @@ TEST_F(SortListsInt, Sliced)
     auto const sliced_list = cudf::slice(l, {0, 4})[0];
     auto const expected    = LCW<T>{{1, 2, 3, 4}, {5, 6, 7}, {8, 9}, {10}};
     auto const [sorted_lists, stable_sorted_lists] =
-      generate_sorted_lists(lists_column_view{sliced_list}, {}, {});
+      generate_sorted_lists(cudf::lists_column_view{sliced_list}, {}, {});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(sorted_lists->view(), expected);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(stable_sorted_lists->view(), expected);
   }
@@ -233,7 +215,7 @@ TEST_F(SortListsInt, Sliced)
     auto const sliced_list = cudf::slice(l, {1, 4})[0];
     auto const expected    = LCW<T>{{5, 6, 7}, {8, 9}, {10}};
     auto const [sorted_lists, stable_sorted_lists] =
-      generate_sorted_lists(lists_column_view{sliced_list}, {}, {});
+      generate_sorted_lists(cudf::lists_column_view{sliced_list}, {}, {});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(sorted_lists->view(), expected);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(stable_sorted_lists->view(), expected);
   }
@@ -242,7 +224,7 @@ TEST_F(SortListsInt, Sliced)
     auto const sliced_list = cudf::slice(l, {1, 2})[0];
     auto const expected    = LCW<T>{{5, 6, 7}};
     auto const [sorted_lists, stable_sorted_lists] =
-      generate_sorted_lists(lists_column_view{sliced_list}, {}, {});
+      generate_sorted_lists(cudf::lists_column_view{sliced_list}, {}, {});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(sorted_lists->view(), expected);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(stable_sorted_lists->view(), expected);
   }
@@ -251,11 +233,60 @@ TEST_F(SortListsInt, Sliced)
     auto const sliced_list = cudf::slice(l, {0, 2})[0];
     auto const expected    = LCW<T>{{1, 2, 3, 4}, {5, 6, 7}};
     auto const [sorted_lists, stable_sorted_lists] =
-      generate_sorted_lists(lists_column_view{sliced_list}, {}, {});
+      generate_sorted_lists(cudf::lists_column_view{sliced_list}, {}, {});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(sorted_lists->view(), expected);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(stable_sorted_lists->view(), expected);
   }
 }
 
-}  // namespace test
-}  // namespace cudf
+using SortListsDouble = SortLists<double>;
+TEST_F(SortListsDouble, InfinityAndNaN)
+{
+  auto constexpr NaN = std::numeric_limits<double>::quiet_NaN();
+  auto constexpr Inf = std::numeric_limits<double>::infinity();
+
+  using LCW = cudf::test::lists_column_wrapper<double>;
+  {
+    LCW input{-0.0, -NaN, -NaN, NaN, Inf, -Inf, 7, 5, 6, NaN, Inf, -Inf, -NaN, -NaN, -0.0};
+    auto [sorted_lists, stable_sorted_lists] =
+      generate_sorted_lists(cudf::lists_column_view{input}, {}, {});
+    LCW expected{-Inf, -Inf, -0, -0, 5, 6, 7, Inf, Inf, -NaN, -NaN, NaN, NaN, -NaN, -NaN};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(sorted_lists->view(), expected);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(stable_sorted_lists->view(), expected);
+  }
+  // This data includes a row with over 200 elements to test the
+  // radix sort is not used in the logic path in segmented_sort.
+  // Technically radix sort is not expected to be used in either case.
+  {
+    // clang-format off
+    LCW input{0.0, -0.0, -NaN, -NaN, NaN, Inf, -Inf,
+               1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 
+               1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0,
+               1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0,
+               1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0,
+               1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0,
+               1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 
+               1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0,
+               1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0,
+               1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0,
+               1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0,
+              NaN, Inf, -Inf, -NaN, -NaN, -0.0, 0.0};
+    LCW expected{-Inf, -Inf, 0.0, -0.0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0.0, 0,
+               1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+               2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+               3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+               4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+               5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+               6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+               7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+               8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+               9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+              Inf, Inf, -NaN, -NaN, NaN, NaN, -NaN, -NaN};
+    // clang-format on          
+    auto [sorted_lists, stable_sorted_lists] =
+      generate_sorted_lists(cudf::lists_column_view{input}, {}, {});
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(sorted_lists->view(), expected);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(stable_sorted_lists->view(), expected);
+  }
+}
diff --git a/cpp/tests/scalar/scalar_device_view_test.cu b/cpp/tests/scalar/scalar_device_view_test.cu
index ec005412a61..f4a1c94c3e6 100644
--- a/cpp/tests/scalar/scalar_device_view_test.cu
+++ b/cpp/tests/scalar/scalar_device_view_test.cu
@@ -59,13 +59,15 @@ TYPED_TEST(TypedScalarDeviceViewTest, Value)
   auto scalar_device_view1 = cudf::get_scalar_device_view(s1);
   rmm::device_scalar<bool> result{cudf::default_stream_value};
 
-  test_set_value<<<1, 1>>>(scalar_device_view, scalar_device_view1);
+  test_set_value<<<1, 1, 0, cudf::default_stream_value.value()>>>(scalar_device_view,
+                                                                  scalar_device_view1);
   CUDF_CHECK_CUDA(0);
 
   EXPECT_EQ(s1.value(), value);
   EXPECT_TRUE(s1.is_valid());
 
-  test_value<<<1, 1>>>(scalar_device_view, scalar_device_view1, result.data());
+  test_value<<<1, 1, 0, cudf::default_stream_value.value()>>>(
+    scalar_device_view, scalar_device_view1, result.data());
   CUDF_CHECK_CUDA(0);
 
   EXPECT_TRUE(result.value(cudf::default_stream_value));
@@ -84,7 +86,7 @@ TYPED_TEST(TypedScalarDeviceViewTest, ConstructNull)
   auto scalar_device_view = cudf::get_scalar_device_view(s);
   rmm::device_scalar<bool> result{cudf::default_stream_value};
 
-  test_null<<<1, 1>>>(scalar_device_view, result.data());
+  test_null<<<1, 1, 0, cudf::default_stream_value.value()>>>(scalar_device_view, result.data());
   CUDF_CHECK_CUDA(0);
 
   EXPECT_FALSE(result.value(cudf::default_stream_value));
@@ -104,7 +106,7 @@ TYPED_TEST(TypedScalarDeviceViewTest, SetNull)
   s.set_valid_async(true);
   EXPECT_TRUE(s.is_valid());
 
-  test_setnull<<<1, 1>>>(scalar_device_view);
+  test_setnull<<<1, 1, 0, cudf::default_stream_value.value()>>>(scalar_device_view);
   CUDF_CHECK_CUDA(0);
 
   EXPECT_FALSE(s.is_valid());
@@ -130,7 +132,8 @@ TEST_F(StringScalarDeviceViewTest, Value)
   rmm::device_scalar<bool> result{cudf::default_stream_value};
   auto value_v = cudf::detail::make_device_uvector_sync(value);
 
-  test_string_value<<<1, 1>>>(scalar_device_view, value_v.data(), value.size(), result.data());
+  test_string_value<<<1, 1, 0, cudf::default_stream_value.value()>>>(
+    scalar_device_view, value_v.data(), value.size(), result.data());
   CUDF_CHECK_CUDA(0);
 
   EXPECT_TRUE(result.value(cudf::default_stream_value));
diff --git a/cpp/tests/strings/contains_tests.cpp b/cpp/tests/strings/contains_tests.cpp
index 9ca4fbb6cb7..ba738f7b616 100644
--- a/cpp/tests/strings/contains_tests.cpp
+++ b/cpp/tests/strings/contains_tests.cpp
@@ -330,6 +330,7 @@ TEST_F(StringsContainsTests, Errors)
   auto strings_view = cudf::strings_column_view(input);
 
   EXPECT_THROW(cudf::strings::contains_re(strings_view, "(3?)+"), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::contains_re(strings_view, "(?:3?)+"), cudf::logic_error);
   EXPECT_THROW(cudf::strings::contains_re(strings_view, "3?+"), cudf::logic_error);
   EXPECT_THROW(cudf::strings::count_re(strings_view, "{3}a"), cudf::logic_error);
 }
diff --git a/cpp/tests/strings/factories_test.cu b/cpp/tests/strings/factories_test.cu
index e998be92216..36fdd423168 100644
--- a/cpp/tests/strings/factories_test.cu
+++ b/cpp/tests/strings/factories_test.cu
@@ -30,6 +30,7 @@
 #include <tests/strings/utilities.h>
 
 #include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
 
 #include <thrust/execution_policy.h>
 #include <thrust/host_vector.h>
@@ -213,7 +214,7 @@ TEST_F(StringsFactoriesTest, StringPairWithNullsAndEmpty)
 
   auto d_column = cudf::column_device_view::create(data);
   rmm::device_uvector<string_pair> pairs(d_column->size(), cudf::default_stream_value);
-  thrust::transform(thrust::device,
+  thrust::transform(rmm::exec_policy(cudf::default_stream_value),
                     d_column->pair_begin<cudf::string_view, true>(),
                     d_column->pair_end<cudf::string_view, true>(),
                     pairs.data(),
diff --git a/cpp/tests/transform/row_bit_count_test.cu b/cpp/tests/transform/row_bit_count_test.cu
index 9c3326cf575..61c2fa12895 100644
--- a/cpp/tests/transform/row_bit_count_test.cu
+++ b/cpp/tests/transform/row_bit_count_test.cu
@@ -27,7 +27,6 @@
 
 #include <rmm/exec_policy.hpp>
 
-#include <thrust/execution_policy.h>
 #include <thrust/fill.h>
 #include <thrust/functional.h>
 #include <thrust/iterator/counting_iterator.h>
@@ -54,7 +53,7 @@ TYPED_TEST(RowBitCountTyped, SimpleTypes)
   // expect size of the type per row
   auto expected = make_fixed_width_column(data_type{type_id::INT32}, 16);
   cudf::mutable_column_view mcv(*expected);
-  thrust::fill(rmm::exec_policy(),
+  thrust::fill(rmm::exec_policy(cudf::default_stream_value),
                mcv.begin<size_type>(),
                mcv.end<size_type>(),
                sizeof(device_storage_type_t<T>) * CHAR_BIT);
@@ -77,7 +76,7 @@ TYPED_TEST(RowBitCountTyped, SimpleTypesWithNulls)
   // expect size of the type + 1 bit per row
   auto expected = make_fixed_width_column(data_type{type_id::INT32}, 16);
   cudf::mutable_column_view mcv(*expected);
-  thrust::fill(rmm::exec_policy(),
+  thrust::fill(rmm::exec_policy(cudf::default_stream_value),
                mcv.begin<size_type>(),
                mcv.end<size_type>(),
                (sizeof(device_storage_type_t<T>) * CHAR_BIT) + 1);
@@ -241,13 +240,15 @@ TEST_F(RowBitCount, StructsWithLists_RowsExceedingASingleBlock)
   // List child column = {0, 1, 2, 3, 4, ..., 2*num_rows};
   auto ints      = make_numeric_column(data_type{type_id::INT32}, num_rows * 2);
   auto ints_view = ints->mutable_view();
-  thrust::tabulate(
-    thrust::device, ints_view.begin<int32_t>(), ints_view.end<int32_t>(), thrust::identity{});
+  thrust::tabulate(rmm::exec_policy(cudf::default_stream_value),
+                   ints_view.begin<int32_t>(),
+                   ints_view.end<int32_t>(),
+                   thrust::identity{});
 
   // List offsets = {0, 2, 4, 6, 8, ..., num_rows*2};
   auto list_offsets      = make_numeric_column(data_type{type_id::INT32}, num_rows + 1);
   auto list_offsets_view = list_offsets->mutable_view();
-  thrust::tabulate(thrust::device,
+  thrust::tabulate(rmm::exec_policy(cudf::default_stream_value),
                    list_offsets_view.begin<offset_type>(),
                    list_offsets_view.end<offset_type>(),
                    times_2{});
@@ -263,7 +264,7 @@ TEST_F(RowBitCount, StructsWithLists_RowsExceedingASingleBlock)
   // Compute row_bit_count, and compare.
   auto row_bit_counts          = row_bit_count(table_view{{structs_column->view()}});
   auto expected_row_bit_counts = make_numeric_column(data_type{type_id::INT32}, num_rows);
-  thrust::fill_n(thrust::device,
+  thrust::fill_n(rmm::exec_policy(cudf::default_stream_value),
                  expected_row_bit_counts->mutable_view().begin<int32_t>(),
                  num_rows,
                  CHAR_BIT * (2 * sizeof(int32_t) + sizeof(offset_type)));
@@ -612,7 +613,7 @@ TEST_F(RowBitCount, Table)
   auto expected   = cudf::make_fixed_width_column(data_type{type_id::INT32}, t.num_rows());
   cudf::mutable_column_view mcv(*expected);
   thrust::transform(
-    rmm::exec_policy(),
+    rmm::exec_policy(cudf::default_stream_value),
     thrust::make_counting_iterator(0),
     thrust::make_counting_iterator(0) + t.num_rows(),
     mcv.begin<size_type>(),
diff --git a/cpp/tests/types/type_dispatcher_test.cu b/cpp/tests/types/type_dispatcher_test.cu
index eee9cd46e4f..3280339ea85 100644
--- a/cpp/tests/types/type_dispatcher_test.cu
+++ b/cpp/tests/types/type_dispatcher_test.cu
@@ -70,7 +70,8 @@ __global__ void dispatch_test_kernel(cudf::type_id id, bool* d_result)
 TYPED_TEST(TypedDispatcherTest, DeviceDispatch)
 {
   auto result = cudf::detail::make_zeroed_device_uvector_sync<bool>(1);
-  dispatch_test_kernel<<<1, 1>>>(cudf::type_to_id<TypeParam>(), result.data());
+  dispatch_test_kernel<<<1, 1, 0, cudf::default_stream_value.value()>>>(
+    cudf::type_to_id<TypeParam>(), result.data());
   CUDF_CUDA_TRY(cudaDeviceSynchronize());
   EXPECT_EQ(true, result.front_element(cudf::default_stream_value));
 }
@@ -130,7 +131,7 @@ __global__ void double_dispatch_test_kernel(cudf::type_id id1, cudf::type_id id2
 TYPED_TEST(TypedDoubleDispatcherTest, DeviceDoubleDispatch)
 {
   auto result = cudf::detail::make_zeroed_device_uvector_sync<bool>(1);
-  double_dispatch_test_kernel<<<1, 1>>>(
+  double_dispatch_test_kernel<<<1, 1, 0, cudf::default_stream_value.value()>>>(
     cudf::type_to_id<TypeParam>(), cudf::type_to_id<TypeParam>(), result.data());
   CUDF_CUDA_TRY(cudaDeviceSynchronize());
   EXPECT_EQ(true, result.front_element(cudf::default_stream_value));
diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu
index f978f6bbc66..5106196a58f 100644
--- a/cpp/tests/utilities/column_utilities.cu
+++ b/cpp/tests/utilities/column_utilities.cu
@@ -66,7 +66,7 @@ std::unique_ptr<column> generate_all_row_indices(size_type num_rows)
 {
   auto indices =
     cudf::make_fixed_width_column(data_type{type_id::INT32}, num_rows, mask_state::UNALLOCATED);
-  thrust::sequence(rmm::exec_policy(),
+  thrust::sequence(rmm::exec_policy(cudf::default_stream_value),
                    indices->mutable_view().begin<size_type>(),
                    indices->mutable_view().end<size_type>(),
                    0);
@@ -132,8 +132,9 @@ std::unique_ptr<column> generate_child_row_indices(lists_column_view const& c,
                ? (offsets[true_index + 1] - offsets[true_index])
                : 0;
     });
-  auto const output_size =
-    thrust::reduce(rmm::exec_policy(), row_size_iter, row_size_iter + row_indices.size());
+  auto const output_size = thrust::reduce(rmm::exec_policy(cudf::default_stream_value),
+                                          row_size_iter,
+                                          row_size_iter + row_indices.size());
   // no output. done.
   auto result =
     cudf::make_fixed_width_column(data_type{type_id::INT32}, output_size, mask_state::UNALLOCATED);
@@ -146,7 +147,7 @@ std::unique_ptr<column> generate_child_row_indices(lists_column_view const& c,
   //
   auto output_row_start = cudf::make_fixed_width_column(
     data_type{type_id::INT32}, row_indices.size(), mask_state::UNALLOCATED);
-  thrust::exclusive_scan(rmm::exec_policy(),
+  thrust::exclusive_scan(rmm::exec_policy(cudf::default_stream_value),
                          row_size_iter,
                          row_size_iter + row_indices.size(),
                          output_row_start->mutable_view().begin<size_type>());
@@ -155,7 +156,7 @@ std::unique_ptr<column> generate_child_row_indices(lists_column_view const& c,
   //
   // result = [1, 1, 1, 1, 1]
   //
-  thrust::generate(rmm::exec_policy(),
+  thrust::generate(rmm::exec_policy(cudf::default_stream_value),
                    result->mutable_view().begin<size_type>(),
                    result->mutable_view().end<size_type>(),
                    [] __device__() { return 1; });
@@ -174,7 +175,7 @@ std::unique_ptr<column> generate_child_row_indices(lists_column_view const& c,
       auto const true_index = row_indices[index] + offset;
       return offsets[true_index] - first_offset;
     });
-  thrust::scatter_if(rmm::exec_policy(),
+  thrust::scatter_if(rmm::exec_policy(cudf::default_stream_value),
                      output_row_iter,
                      output_row_iter + row_indices.size(),
                      output_row_start->view().begin<size_type>(),
@@ -188,18 +189,18 @@ std::unique_ptr<column> generate_child_row_indices(lists_column_view const& c,
   //
   auto keys =
     cudf::make_fixed_width_column(data_type{type_id::INT32}, output_size, mask_state::UNALLOCATED);
-  thrust::generate(rmm::exec_policy(),
+  thrust::generate(rmm::exec_policy(cudf::default_stream_value),
                    keys->mutable_view().begin<size_type>(),
                    keys->mutable_view().end<size_type>(),
                    [] __device__() { return 0; });
-  thrust::scatter_if(rmm::exec_policy(),
+  thrust::scatter_if(rmm::exec_policy(cudf::default_stream_value),
                      row_size_iter,
                      row_size_iter + row_indices.size(),
                      output_row_start->view().begin<size_type>(),
                      row_size_iter,
                      keys->mutable_view().begin<size_type>(),
                      [] __device__(auto row_size) { return row_size != 0; });
-  thrust::inclusive_scan(rmm::exec_policy(),
+  thrust::inclusive_scan(rmm::exec_policy(cudf::default_stream_value),
                          keys->view().begin<size_type>(),
                          keys->view().end<size_type>(),
                          keys->mutable_view().begin<size_type>());
@@ -212,7 +213,7 @@ std::unique_ptr<column> generate_child_row_indices(lists_column_view const& c,
   // output
   //    result = [6, 7, 11, 12, 13]
   //
-  thrust::inclusive_scan_by_key(rmm::exec_policy(),
+  thrust::inclusive_scan_by_key(rmm::exec_policy(cudf::default_stream_value),
                                 keys->view().begin<size_type>(),
                                 keys->view().end<size_type>(),
                                 result->view().begin<size_type>(),
@@ -255,7 +256,9 @@ struct column_property_comparator {
         auto const true_index = row_indices[index] + offset;
         return !validity || cudf::bit_is_set(validity, true_index) ? 0 : 1;
       });
-    return thrust::reduce(rmm::exec_policy(), validity_iter, validity_iter + row_indices.size());
+    return thrust::reduce(rmm::exec_policy(cudf::default_stream_value),
+                          validity_iter,
+                          validity_iter + row_indices.size());
   }
 
   bool compare_common(cudf::column_view const& lhs,
@@ -549,7 +552,7 @@ struct column_comparator_impl {
       lhs.size(), cudf::default_stream_value);  // worst case: everything different
     auto input_iter = thrust::make_counting_iterator(0);
     auto diff_iter  = thrust::copy_if(
-      rmm::exec_policy(),
+      rmm::exec_policy(cudf::default_stream_value),
       input_iter,
       input_iter + lhs_row_indices.size(),
       differences.begin(),
@@ -640,7 +643,7 @@ struct column_comparator_impl<list_view, check_exact_equality> {
     //
     auto input_iter = thrust::make_counting_iterator(0);
     auto diff_iter  = thrust::copy_if(
-      rmm::exec_policy(),
+      rmm::exec_policy(cudf::default_stream_value),
       input_iter,
       input_iter + lhs_row_indices.size(),
       differences.begin(),
@@ -862,7 +865,8 @@ void expect_equal_buffers(void const* lhs, void const* rhs, std::size_t size_byt
   }
   auto typed_lhs = static_cast<char const*>(lhs);
   auto typed_rhs = static_cast<char const*>(rhs);
-  EXPECT_TRUE(thrust::equal(thrust::device, typed_lhs, typed_lhs + size_bytes, typed_rhs));
+  EXPECT_TRUE(thrust::equal(
+    rmm::exec_policy(cudf::default_stream_value), typed_lhs, typed_lhs + size_bytes, typed_rhs));
 }
 
 /**
@@ -957,7 +961,7 @@ std::string nested_offsets_to_string(NestedColumnView const& c, std::string cons
   // normalize the offset values for the column offset
   size_type const* d_offsets = offsets.head<size_type>() + c.offset();
   thrust::transform(
-    rmm::exec_policy(),
+    rmm::exec_policy(cudf::default_stream_value),
     d_offsets,
     d_offsets + output_size,
     shifted_offsets.begin(),
diff --git a/cpp/tests/utilities/table_utilities.cu b/cpp/tests/utilities/table_utilities.cu
index e7fe97efa96..e1740eb5023 100644
--- a/cpp/tests/utilities/table_utilities.cu
+++ b/cpp/tests/utilities/table_utilities.cu
@@ -1,3 +1,19 @@
+/*
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/table_utilities.hpp>
 
diff --git a/cpp/tests/utilities_tests/span_tests.cu b/cpp/tests/utilities_tests/span_tests.cu
index 78b76ff043e..fc4104c765b 100644
--- a/cpp/tests/utilities_tests/span_tests.cu
+++ b/cpp/tests/utilities_tests/span_tests.cu
@@ -238,7 +238,7 @@ TEST(SpanTest, CanUseDeviceSpan)
 
   auto d_span = device_span<bool>(d_message.data().get(), d_message.size());
 
-  simple_device_kernel<<<1, 1>>>(d_span);
+  simple_device_kernel<<<1, 1, 0, cudf::default_stream_value.value()>>>(d_span);
 
   cudaDeviceSynchronize();
 
@@ -277,8 +277,8 @@ TEST(MdSpanTest, DeviceReadWrite)
 {
   auto vector = hostdevice_2dvector<int>(11, 23, cudf::default_stream_value);
 
-  readwrite_kernel<<<1, 1>>>(vector);
-  readwrite_kernel<<<1, 1>>>(vector);
+  readwrite_kernel<<<1, 1, 0, cudf::default_stream_value.value()>>>(vector);
+  readwrite_kernel<<<1, 1, 0, cudf::default_stream_value.value()>>>(vector);
   vector.device_to_host(cudf::default_stream_value, true);
   EXPECT_EQ(vector[5][6], 30);
 }
diff --git a/cpp/tests/wrappers/timestamps_test.cu b/cpp/tests/wrappers/timestamps_test.cu
index 05acf635d90..73bfd15744a 100644
--- a/cpp/tests/wrappers/timestamps_test.cu
+++ b/cpp/tests/wrappers/timestamps_test.cu
@@ -94,8 +94,8 @@ TYPED_TEST(ChronoColumnTest, ChronoDurationsMatchPrimitiveRepresentation)
     fixed_width_column_wrapper<Rep>(chrono_col_data.begin(), chrono_col_data.end());
 
   rmm::device_uvector<int32_t> indices(this->size(), cudf::default_stream_value);
-  thrust::sequence(rmm::exec_policy(), indices.begin(), indices.end());
-  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(),
+  thrust::sequence(rmm::exec_policy(cudf::default_stream_value), indices.begin(), indices.end());
+  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value),
                              indices.begin(),
                              indices.end(),
                              compare_chrono_elements_to_primitive_representation<T>{
@@ -148,10 +148,10 @@ TYPED_TEST(ChronoColumnTest, ChronosCanBeComparedInDeviceCode)
     generate_timestamps<T>(this->size(), time_point_ms(start_rhs), time_point_ms(stop_rhs));
 
   rmm::device_uvector<int32_t> indices(this->size(), cudf::default_stream_value);
-  thrust::sequence(rmm::exec_policy(), indices.begin(), indices.end());
+  thrust::sequence(rmm::exec_policy(cudf::default_stream_value), indices.begin(), indices.end());
 
   EXPECT_TRUE(thrust::all_of(
-    rmm::exec_policy(),
+    rmm::exec_policy(cudf::default_stream_value),
     indices.begin(),
     indices.end(),
     compare_chrono_elements<TypeParam>{cudf::binary_operator::LESS,
@@ -159,7 +159,7 @@ TYPED_TEST(ChronoColumnTest, ChronosCanBeComparedInDeviceCode)
                                        *cudf::column_device_view::create(chrono_rhs_col)}));
 
   EXPECT_TRUE(thrust::all_of(
-    rmm::exec_policy(),
+    rmm::exec_policy(cudf::default_stream_value),
     indices.begin(),
     indices.end(),
     compare_chrono_elements<TypeParam>{cudf::binary_operator::GREATER,
@@ -167,7 +167,7 @@ TYPED_TEST(ChronoColumnTest, ChronosCanBeComparedInDeviceCode)
                                        *cudf::column_device_view::create(chrono_lhs_col)}));
 
   EXPECT_TRUE(thrust::all_of(
-    rmm::exec_policy(),
+    rmm::exec_policy(cudf::default_stream_value),
     indices.begin(),
     indices.end(),
     compare_chrono_elements<TypeParam>{cudf::binary_operator::LESS_EQUAL,
@@ -175,7 +175,7 @@ TYPED_TEST(ChronoColumnTest, ChronosCanBeComparedInDeviceCode)
                                        *cudf::column_device_view::create(chrono_lhs_col)}));
 
   EXPECT_TRUE(thrust::all_of(
-    rmm::exec_policy(),
+    rmm::exec_policy(cudf::default_stream_value),
     indices.begin(),
     indices.end(),
     compare_chrono_elements<TypeParam>{cudf::binary_operator::GREATER_EQUAL,
diff --git a/docs/cudf/source/_ext/PandasCompat.py b/docs/cudf/source/_ext/PandasCompat.py
index 50fc801e4c1..af2b16035c3 100644
--- a/docs/cudf/source/_ext/PandasCompat.py
+++ b/docs/cudf/source/_ext/PandasCompat.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2021-2022, NVIDIA CORPORATION
+
 # This file is adapted from official sphinx tutorial for `todo` extension:
 # https://www.sphinx-doc.org/en/master/development/tutorials/todo.html
 
diff --git a/docs/cudf/source/api_docs/index.rst b/docs/cudf/source/api_docs/index.rst
index b77c98f3ac3..ef04167c327 100644
--- a/docs/cudf/source/api_docs/index.rst
+++ b/docs/cudf/source/api_docs/index.rst
@@ -19,4 +19,6 @@ This page provides a list of all publicly accessible modules, methods and classe
     io
     subword_tokenize
     string_handling
+    list_handling
+    struct_handling
     options
diff --git a/docs/cudf/source/api_docs/list_handling.rst b/docs/cudf/source/api_docs/list_handling.rst
new file mode 100644
index 00000000000..f1fb6d1ca74
--- /dev/null
+++ b/docs/cudf/source/api_docs/list_handling.rst
@@ -0,0 +1,21 @@
+List handling
+~~~~~~~~~~~~~
+
+``Series.list`` can be used to access the values of the series as
+lists and apply list methods to it. These can be accessed like
+``Series.list.<function/property>``.
+
+.. currentmodule:: cudf.core.column.lists.ListMethods
+.. autosummary::
+   :toctree: api/
+
+   astype
+   concat
+   contains
+   index
+   get
+   leaves
+   len
+   sort_values
+   take
+   unique
diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst
index 1e53c90b44d..53042041f6d 100644
--- a/docs/cudf/source/api_docs/series.rst
+++ b/docs/cudf/source/api_docs/series.rst
@@ -310,21 +310,6 @@ Timedelta properties
 .. include:: string_handling.rst
 
 
-..
-    The following is needed to ensure the generated pages are created with the
-    correct template (otherwise they would be created in the Series/Index class page)
-
-..
-    .. currentmodule:: cudf
-    .. autosummary::
-       :toctree: api/
-       :template: autosummary/accessor.rst
-
-       Series.str
-       Series.cat
-       Series.dt
-       Index.str
-
 .. _api.series.cat:
 
 Categorical accessor
@@ -349,42 +334,27 @@ the ``Series.cat`` accessor.
 
 
 .. _api.series.list:
-
-List handling
-~~~~~~~~~~~~~
-
-``Series.list`` can be used to access the values of the series as
-lists and apply list methods to it. These can be accessed like
-``Series.list.<function/property>``.
-
-.. currentmodule:: cudf.core.column.lists.ListMethods
-.. autosummary::
-   :toctree: api/
-
-   concat
-   contains
-   get
-   len
-   sort_values
-   take
-   unique
+.. include:: list_handling.rst
 
 
 .. _api.series.struct:
+.. include:: struct_handling.rst
 
-Struct handling
-~~~~~~~~~~~~~~~
 
-``Series.struct`` can be used to access the values of the series as
-Structs and apply struct methods to it. These can be accessed like
-``Series.struct.<function/property>``.
+..
+    The following is needed to ensure the generated pages are created with the
+    correct template (otherwise they would be created in the Series/Index class page)
 
-.. currentmodule:: cudf.core.column.struct.StructMethods
-.. autosummary::
-   :toctree: api/
+..
+    .. currentmodule:: cudf
+    .. autosummary::
+       :toctree: api/
+       :template: autosummary/accessor.rst
 
-   field
-   explode
+       Series.str
+       Series.cat
+       Series.dt
+       Index.str
 
 
 Serialization / IO / conversion
diff --git a/docs/cudf/source/api_docs/struct_handling.rst b/docs/cudf/source/api_docs/struct_handling.rst
new file mode 100644
index 00000000000..05ba990382a
--- /dev/null
+++ b/docs/cudf/source/api_docs/struct_handling.rst
@@ -0,0 +1,13 @@
+Struct handling
+~~~~~~~~~~~~~~~
+
+``Series.struct`` can be used to access the values of the series as
+Structs and apply struct methods to it. These can be accessed like
+``Series.struct.<function/property>``.
+
+.. currentmodule:: cudf.core.column.struct.StructMethods
+.. autosummary::
+   :toctree: api/
+
+   field
+   explode
diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java
index 4bab7c1a403..dbc2a28c38c 100644
--- a/java/src/main/java/ai/rapids/cudf/Table.java
+++ b/java/src/main/java/ai/rapids/cudf/Table.java
@@ -3922,7 +3922,7 @@ public Table aggregateWindowsOverRanges(AggregationOverWindow... windowAggregate
             break;
           default:
             throw new IllegalArgumentException("Expected range-based window orderBy's " +
-                "type: integral (Boolean-exclusive) and timestamp");
+                "type: integral (Boolean-exclusive), decimal, and timestamp");
         }
 
         ColumnWindowOps ops = groupedOps.computeIfAbsent(agg.getColumnIndex(), (idx) -> new ColumnWindowOps());
diff --git a/print_env.sh b/print_env.sh
index c929794da2b..6774f94b540 100755
--- a/print_env.sh
+++ b/print_env.sh
@@ -1,7 +1,8 @@
 #!/usr/bin/env bash
+# Copyright (c) 2022, NVIDIA CORPORATION.
 # Reports relevant environment information useful for diagnosing and
 # debugging cuDF issues.
-# Usage: 
+# Usage:
 # "./print_env.sh" - prints to stdout
 # "./print_env.sh > env.txt" - prints to file "env.txt"
 
@@ -14,16 +15,16 @@ git submodule status --recursive
 else
 echo "Not inside a git repository"
 fi
-echo 
+echo
 
 echo "***OS Information***"
 cat /etc/*-release
 uname -a
-echo 
+echo
 
 echo "***GPU Information***"
 nvidia-smi
-echo 
+echo
 
 echo "***CPU***"
 lscpu
@@ -31,15 +32,15 @@ echo
 
 echo "***CMake***"
 which cmake && cmake --version
-echo 
+echo
 
 echo "***g++***"
 which g++ && g++ --version
-echo 
+echo
 
 echo "***nvcc***"
 which nvcc && nvcc --version
-echo 
+echo
 
 echo "***Python***"
 which python && python -c "import sys; print('Python {0}.{1}.{2}'.format(sys.version_info[0], sys.version_info[1], sys.version_info[2]))"
diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py
index f34c11de1e9..28eb380f7cb 100644
--- a/python/cudf/cudf/__init__.py
+++ b/python/cudf/cudf/__init__.py
@@ -9,7 +9,6 @@
 
 import rmm
 
-from cudf.api.types import dtype
 from cudf import api, core, datasets, testing
 from cudf._version import get_versions
 from cudf.api.extensions import (
@@ -17,15 +16,28 @@
     register_index_accessor,
     register_series_accessor,
 )
-from cudf.core.scalar import Scalar
+from cudf.api.types import dtype
+from cudf.core.algorithms import factorize
+from cudf.core.cut import cut
+from cudf.core.dataframe import DataFrame, from_dataframe, from_pandas, merge
+from cudf.core.dtypes import (
+    CategoricalDtype,
+    Decimal32Dtype,
+    Decimal64Dtype,
+    Decimal128Dtype,
+    IntervalDtype,
+    ListDtype,
+    StructDtype,
+)
+from cudf.core.groupby import Grouper
 from cudf.core.index import (
     BaseIndex,
     CategoricalIndex,
     DatetimeIndex,
     Float32Index,
     Float64Index,
-    Index,
     GenericIndex,
+    Index,
     Int8Index,
     Int16Index,
     Int32Index,
@@ -40,22 +52,8 @@
     UInt64Index,
     interval_range,
 )
-from cudf.core.dataframe import DataFrame, from_pandas, merge, from_dataframe
-from cudf.core.series import Series
 from cudf.core.missing import NA
 from cudf.core.multiindex import MultiIndex
-from cudf.core.cut import cut
-from cudf.core.algorithms import factorize
-from cudf.core.dtypes import (
-    CategoricalDtype,
-    Decimal64Dtype,
-    Decimal32Dtype,
-    Decimal128Dtype,
-    IntervalDtype,
-    ListDtype,
-    StructDtype,
-)
-from cudf.core.groupby import Grouper
 from cudf.core.reshape import (
     concat,
     crosstab,
@@ -65,8 +63,9 @@
     pivot_table,
     unstack,
 )
-from cudf.core.series import isclose
-from cudf.core.tools.datetimes import DateOffset, to_datetime
+from cudf.core.scalar import Scalar
+from cudf.core.series import Series, isclose
+from cudf.core.tools.datetimes import DateOffset, date_range, to_datetime
 from cudf.core.tools.numeric import to_numeric
 from cudf.io import (
     from_dlpack,
@@ -79,15 +78,9 @@
     read_parquet,
     read_text,
 )
-from cudf.core.tools.datetimes import date_range
+from cudf.options import describe_option, get_option, set_option
 from cudf.utils.dtypes import _NA_REP
-from cudf.utils.utils import set_allocator, clear_cache
-
-from cudf.options import (
-    get_option,
-    set_option,
-    describe_option,
-)
+from cudf.utils.utils import clear_cache, set_allocator
 
 try:
     from cubinlinker.patch import patch_numba_linker_if_needed
diff --git a/python/cudf/cudf/_lib/__init__.py b/python/cudf/cudf/_lib/__init__.py
index bab28433c41..8ecb9a57426 100644
--- a/python/cudf/cudf/_lib/__init__.py
+++ b/python/cudf/cudf/_lib/__init__.py
@@ -33,9 +33,9 @@
     stream_compaction,
     string_casting,
     strings,
+    text,
     transpose,
     unary,
-    text,
 )
 
 MAX_COLUMN_SIZE = np.iinfo(np.int32).max
diff --git a/python/cudf/cudf/_lib/cpp/datetime.pxd b/python/cudf/cudf/_lib/cpp/datetime.pxd
index 498fc313cf9..74addb87357 100644
--- a/python/cudf/cudf/_lib/cpp/datetime.pxd
+++ b/python/cudf/cudf/_lib/cpp/datetime.pxd
@@ -1,3 +1,5 @@
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+
 from libcpp.memory cimport unique_ptr
 
 from cudf._lib.cpp.column.column cimport column
diff --git a/python/cudf/cudf/_lib/strings/__init__.py b/python/cudf/cudf/_lib/strings/__init__.py
index aea733c2445..ff558a06d87 100644
--- a/python/cudf/cudf/_lib/strings/__init__.py
+++ b/python/cudf/cudf/_lib/strings/__init__.py
@@ -62,7 +62,7 @@
     startswith_multiple,
 )
 from cudf._lib.strings.findall import findall
-from cudf._lib.strings.json import get_json_object, GetJsonObjectOptions
+from cudf._lib.strings.json import GetJsonObjectOptions, get_json_object
 from cudf._lib.strings.padding import (
     SideType,
     center,
diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index 549b8bae12a..6898ae4941c 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -101,7 +101,9 @@ def __getitem__(self, key):
     def __contains__(self, item):
         return item in self._values
 
-    def _copy_type_metadata(self: BaseIndexT, other: BaseIndexT) -> BaseIndexT:
+    def _copy_type_metadata(
+        self: BaseIndexT, other: BaseIndexT, *, override_dtypes=None
+    ) -> BaseIndexT:
         raise NotImplementedError
 
     def get_level_values(self, level):
diff --git a/python/cudf/cudf/core/_internals/where.py b/python/cudf/cudf/core/_internals/where.py
index 231a42a27b7..6d4a2990e34 100644
--- a/python/cudf/cudf/core/_internals/where.py
+++ b/python/cudf/cudf/core/_internals/where.py
@@ -16,6 +16,7 @@
 from cudf.core.missing import NA
 from cudf.utils.dtypes import (
     _can_cast,
+    _dtype_can_hold_element,
     find_common_type,
     is_mixed_with_object_dtype,
 )
@@ -84,6 +85,12 @@ def _check_and_cast_columns_with_other(
         other, source_dtype
     ):
         common_dtype = source_dtype
+    elif (
+        isinstance(source_col, cudf.core.column.NumericalColumn)
+        and other_is_scalar
+        and _dtype_can_hold_element(source_dtype, other)
+    ):
+        common_dtype = source_dtype
     else:
         common_dtype = find_common_type(
             [
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index d438f47e1c4..601ad707ba6 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -755,7 +755,7 @@ def __setitem__(self, key, value):
             )
 
         if to_add_categories > 0:
-            raise ValueError(
+            raise TypeError(
                 "Cannot setitem on a Categorical with a new "
                 "category, set the categories first"
             )
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 4fe365768ef..8b2c51dae90 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -62,6 +62,7 @@
     is_string_dtype,
     is_struct_dtype,
 )
+from cudf.core._compat import PANDAS_GE_150
 from cudf.core.abc import Serializable
 from cudf.core.buffer import Buffer, DeviceBufferLike, as_device_buffer_like
 from cudf.core.dtypes import (
@@ -83,6 +84,11 @@
 )
 from cudf.utils.utils import _array_ufunc, mask_dtype
 
+if PANDAS_GE_150:
+    from pandas.core.arrays.arrow.extension_types import ArrowIntervalType
+else:
+    from pandas.core.arrays._arrow_utils import ArrowIntervalType
+
 T = TypeVar("T", bound="ColumnBase")
 # TODO: This workaround allows type hints for `slice`, since `slice` is a
 # method in ColumnBase.
@@ -290,9 +296,7 @@ def from_arrow(cls, array: pa.Array) -> ColumnBase:
                 size=codes.size,
                 ordered=array.type.ordered,
             )
-        elif isinstance(
-            array.type, pd.core.arrays._arrow_utils.ArrowIntervalType
-        ):
+        elif isinstance(array.type, ArrowIntervalType):
             return cudf.core.column.IntervalColumn.from_arrow(array)
 
         result = libcudf.interop.from_arrow(data)[0]
@@ -1591,6 +1595,14 @@ def build_list_column(
     offset: int, optional
     """
     dtype = ListDtype(element_type=elements.dtype)
+    if size is None:
+        if indices.size == 0:
+            size = 0
+        else:
+            # one less because the last element of offsets is the number of
+            # bytes in the data buffer
+            size = indices.size - 1
+        size = size - offset
 
     result = build_column(
         data=None,
diff --git a/python/cudf/cudf/core/column/interval.py b/python/cudf/cudf/core/column/interval.py
index ad73eaf2b93..657403a6082 100644
--- a/python/cudf/cudf/core/column/interval.py
+++ b/python/cudf/cudf/core/column/interval.py
@@ -131,5 +131,5 @@ def to_pandas(self, index: pd.Index = None, **kwargs) -> "pd.Series":
         # types into pandas (trying to convert the underlying numerical columns
         # directly is problematic), so we're stuck with this for now.
         return pd.Series(
-            pd.IntervalDtype().__from_arrow__(self.to_arrow()), index=index
+            self.dtype.to_pandas().__from_arrow__(self.to_arrow()), index=index
         )
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 4b74dde129c..a66c11c8bdc 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -34,6 +34,7 @@
     is_integer,
     is_integer_dtype,
     is_number,
+    is_scalar,
 )
 from cudf.core.buffer import DeviceBufferLike, as_device_buffer_like
 from cudf.core.column import (
@@ -128,6 +129,43 @@ def has_nulls(self, include_nan=False):
             self.nan_count != 0 if include_nan else False
         )
 
+    def __setitem__(self, key: Any, value: Any):
+        """
+        Set the value of ``self[key]`` to ``value``.
+
+        If ``value`` and ``self`` are of different types, ``value`` is coerced
+        to ``self.dtype``.
+        """
+
+        # Normalize value to scalar/column
+        device_value = (
+            cudf.Scalar(
+                value,
+                dtype=self.dtype
+                if cudf._lib.scalar._is_null_host_scalar(value)
+                else None,
+            )
+            if is_scalar(value)
+            else as_column(value)
+        )
+
+        if not is_bool_dtype(self.dtype) and is_bool_dtype(device_value.dtype):
+            raise TypeError(f"Invalid value {value} for dtype {self.dtype}")
+        else:
+            device_value = device_value.astype(self.dtype)
+
+        out: Optional[ColumnBase]  # If None, no need to perform mimic inplace.
+        if isinstance(key, slice):
+            out = self._scatter_by_slice(key, device_value)
+        else:
+            key = as_column(key)
+            if not isinstance(key, cudf.core.column.NumericalColumn):
+                raise ValueError(f"Invalid scatter map type {key.dtype}.")
+            out = self._scatter_by_column(key, device_value)
+
+        if out:
+            self._mimic_inplace(out, inplace=True)
+
     @property
     def __cuda_array_interface__(self) -> Mapping[str, Any]:
         output = {
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 172a1ed9edc..287e68531f8 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -23,6 +23,7 @@
 from numba import cuda
 
 import cudf
+import cudf.api.types
 from cudf import _lib as libcudf
 from cudf._lib import string_casting as str_cast, strings as libstrings
 from cudf._lib.column import Column
@@ -58,47 +59,47 @@ def str_to_boolean(column: StringColumn):
 
 
 _str_to_numeric_typecast_functions = {
-    cudf.dtype("int8"): str_cast.stoi8,
-    cudf.dtype("int16"): str_cast.stoi16,
-    cudf.dtype("int32"): str_cast.stoi,
-    cudf.dtype("int64"): str_cast.stol,
-    cudf.dtype("uint8"): str_cast.stoui8,
-    cudf.dtype("uint16"): str_cast.stoui16,
-    cudf.dtype("uint32"): str_cast.stoui,
-    cudf.dtype("uint64"): str_cast.stoul,
-    cudf.dtype("float32"): str_cast.stof,
-    cudf.dtype("float64"): str_cast.stod,
-    cudf.dtype("bool"): str_to_boolean,
+    cudf.api.types.dtype("int8"): str_cast.stoi8,
+    cudf.api.types.dtype("int16"): str_cast.stoi16,
+    cudf.api.types.dtype("int32"): str_cast.stoi,
+    cudf.api.types.dtype("int64"): str_cast.stol,
+    cudf.api.types.dtype("uint8"): str_cast.stoui8,
+    cudf.api.types.dtype("uint16"): str_cast.stoui16,
+    cudf.api.types.dtype("uint32"): str_cast.stoui,
+    cudf.api.types.dtype("uint64"): str_cast.stoul,
+    cudf.api.types.dtype("float32"): str_cast.stof,
+    cudf.api.types.dtype("float64"): str_cast.stod,
+    cudf.api.types.dtype("bool"): str_to_boolean,
 }
 
 _numeric_to_str_typecast_functions = {
-    cudf.dtype("int8"): str_cast.i8tos,
-    cudf.dtype("int16"): str_cast.i16tos,
-    cudf.dtype("int32"): str_cast.itos,
-    cudf.dtype("int64"): str_cast.ltos,
-    cudf.dtype("uint8"): str_cast.ui8tos,
-    cudf.dtype("uint16"): str_cast.ui16tos,
-    cudf.dtype("uint32"): str_cast.uitos,
-    cudf.dtype("uint64"): str_cast.ultos,
-    cudf.dtype("float32"): str_cast.ftos,
-    cudf.dtype("float64"): str_cast.dtos,
-    cudf.dtype("bool"): str_cast.from_booleans,
+    cudf.api.types.dtype("int8"): str_cast.i8tos,
+    cudf.api.types.dtype("int16"): str_cast.i16tos,
+    cudf.api.types.dtype("int32"): str_cast.itos,
+    cudf.api.types.dtype("int64"): str_cast.ltos,
+    cudf.api.types.dtype("uint8"): str_cast.ui8tos,
+    cudf.api.types.dtype("uint16"): str_cast.ui16tos,
+    cudf.api.types.dtype("uint32"): str_cast.uitos,
+    cudf.api.types.dtype("uint64"): str_cast.ultos,
+    cudf.api.types.dtype("float32"): str_cast.ftos,
+    cudf.api.types.dtype("float64"): str_cast.dtos,
+    cudf.api.types.dtype("bool"): str_cast.from_booleans,
 }
 
 _datetime_to_str_typecast_functions = {
     # TODO: support Date32 UNIX days
-    # cudf.dtype("datetime64[D]"): str_cast.int2timestamp,
-    cudf.dtype("datetime64[s]"): str_cast.int2timestamp,
-    cudf.dtype("datetime64[ms]"): str_cast.int2timestamp,
-    cudf.dtype("datetime64[us]"): str_cast.int2timestamp,
-    cudf.dtype("datetime64[ns]"): str_cast.int2timestamp,
+    # cudf.api.types.dtype("datetime64[D]"): str_cast.int2timestamp,
+    cudf.api.types.dtype("datetime64[s]"): str_cast.int2timestamp,
+    cudf.api.types.dtype("datetime64[ms]"): str_cast.int2timestamp,
+    cudf.api.types.dtype("datetime64[us]"): str_cast.int2timestamp,
+    cudf.api.types.dtype("datetime64[ns]"): str_cast.int2timestamp,
 }
 
 _timedelta_to_str_typecast_functions = {
-    cudf.dtype("timedelta64[s]"): str_cast.int2timedelta,
-    cudf.dtype("timedelta64[ms]"): str_cast.int2timedelta,
-    cudf.dtype("timedelta64[us]"): str_cast.int2timedelta,
-    cudf.dtype("timedelta64[ns]"): str_cast.int2timedelta,
+    cudf.api.types.dtype("timedelta64[s]"): str_cast.int2timedelta,
+    cudf.api.types.dtype("timedelta64[ms]"): str_cast.int2timedelta,
+    cudf.api.types.dtype("timedelta64[us]"): str_cast.int2timedelta,
+    cudf.api.types.dtype("timedelta64[ns]"): str_cast.int2timedelta,
 }
 
 
@@ -1642,7 +1643,7 @@ def isnumeric(self) -> SeriesOrIndex:
         also includes other characters that can represent
         quantities such as unicode fractions.
 
-        >>> s2 = pd.Series(['23', '³', '⅕', ''])
+        >>> s2 = pd.Series(['23', '³', '⅕', ''], dtype='str')
         >>> s2.str.isnumeric()
         0     True
         1     True
@@ -3751,8 +3752,9 @@ def endswith(self, pat: str) -> SeriesOrIndex:
         dtype: bool
         """
         if pat is None:
-            result_col = column.column_empty(
-                len(self._column), dtype="bool", masked=True
+            raise TypeError(
+                f"expected a string or a sequence-like object, not "
+                f"{type(pat).__name__}"
             )
         elif is_scalar(pat):
             result_col = libstrings.endswith(
@@ -3813,8 +3815,9 @@ def startswith(self, pat: Union[str, Sequence]) -> SeriesOrIndex:
         dtype: bool
         """
         if pat is None:
-            result_col = column.column_empty(
-                len(self._column), dtype="bool", masked=True
+            raise TypeError(
+                f"expected a string or a sequence-like object, not "
+                f"{type(pat).__name__}"
             )
         elif is_scalar(pat):
             result_col = libstrings.startswith(
@@ -5146,7 +5149,7 @@ def __init__(
         null_count: int = None,
         children: Tuple["column.ColumnBase", ...] = (),
     ):
-        dtype = cudf.dtype("object")
+        dtype = cudf.api.types.dtype("object")
 
         if size is None:
             for child in children:
@@ -5304,7 +5307,7 @@ def __contains__(self, item: ScalarLike) -> bool:
     def as_numerical_column(
         self, dtype: Dtype, **kwargs
     ) -> "cudf.core.column.NumericalColumn":
-        out_dtype = cudf.dtype(dtype)
+        out_dtype = cudf.api.types.dtype(dtype)
         string_col = self
         if out_dtype.kind in {"i", "u"}:
             if not libstrings.is_integer(string_col).all():
@@ -5346,7 +5349,7 @@ def _as_datetime_or_timedelta_column(self, dtype, format):
     def as_datetime_column(
         self, dtype: Dtype, **kwargs
     ) -> "cudf.core.column.DatetimeColumn":
-        out_dtype = cudf.dtype(dtype)
+        out_dtype = cudf.api.types.dtype(dtype)
 
         # infer on host from the first not na element
         # or return all null column if all values
@@ -5370,7 +5373,7 @@ def as_datetime_column(
     def as_timedelta_column(
         self, dtype: Dtype, **kwargs
     ) -> "cudf.core.column.TimeDeltaColumn":
-        out_dtype = cudf.dtype(dtype)
+        out_dtype = cudf.api.types.dtype(dtype)
         format = "%D days %H:%M:%S"
         return self._as_datetime_or_timedelta_column(out_dtype, format)
 
@@ -5412,7 +5415,7 @@ def to_pandas(
         return pd_series
 
     def can_cast_safely(self, to_dtype: Dtype) -> bool:
-        to_dtype = cudf.dtype(to_dtype)
+        to_dtype = cudf.api.types.dtype(to_dtype)
 
         if self.dtype == to_dtype:
             return True
@@ -5585,7 +5588,7 @@ def view(self, dtype) -> "cudf.core.column.ColumnBase":
             raise ValueError(
                 "Can not produce a view of a string column with nulls"
             )
-        dtype = cudf.dtype(dtype)
+        dtype = cudf.api.types.dtype(dtype)
         str_byte_offset = self.base_children[0].element_indexing(self.offset)
         str_end_byte_offset = self.base_children[0].element_indexing(
             self.offset + self.size
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 2f1695e4445..77aeec286a5 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -39,7 +39,7 @@
 import cudf
 import cudf.core.common
 from cudf import _lib as libcudf
-from cudf._typing import ColumnLike, NotImplementedType
+from cudf._typing import ColumnLike, Dtype, NotImplementedType
 from cudf.api.types import (
     _is_scalar_or_zero_d_array,
     is_bool_dtype,
@@ -1057,7 +1057,7 @@ def dtypes(self):
         string              object
         dtype: object
         """
-        return pd.Series(self._dtypes)
+        return pd.Series(self._dtypes, dtype="object")
 
     @property
     def ndim(self):
@@ -6536,9 +6536,14 @@ def _from_columns_like_self(
         columns: List[ColumnBase],
         column_names: abc.Iterable[str],
         index_names: Optional[List[str]] = None,
+        *,
+        override_dtypes: Optional[abc.Iterable[Optional[Dtype]]] = None,
     ) -> DataFrame:
         result = super()._from_columns_like_self(
-            columns, column_names, index_names
+            columns,
+            column_names,
+            index_names,
+            override_dtypes=override_dtypes,
         )
         result._set_column_names_like(self)
         return result
@@ -6972,7 +6977,7 @@ def from_pandas(obj, nan_as_null=None):
 
     Converting a Pandas Series to cuDF Series:
 
-    >>> psr = pd.Series(['a', 'b', 'c', 'd'], name='apple')
+    >>> psr = pd.Series(['a', 'b', 'c', 'd'], name='apple', dtype='str')
     >>> psr
     0    a
     1    b
diff --git a/python/cudf/cudf/core/df_protocol.py b/python/cudf/cudf/core/df_protocol.py
index ad352dc6543..d770f4f6130 100644
--- a/python/cudf/cudf/core/df_protocol.py
+++ b/python/cudf/cudf/core/df_protocol.py
@@ -529,6 +529,16 @@ def __init__(
         self._nan_as_null = nan_as_null
         self._allow_copy = allow_copy
 
+    def __dataframe__(
+        self, nan_as_null: bool = False, allow_copy: bool = True
+    ) -> "_CuDFDataFrame":
+        """
+        See the docstring of the `cudf.DataFrame.__dataframe__` for details
+        """
+        return _CuDFDataFrame(
+            self._df, nan_as_null=nan_as_null, allow_copy=allow_copy
+        )
+
     @property
     def metadata(self):
         # `index` isn't a regular column, and the protocol doesn't support row
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 1e342871ace..5cff057ce7c 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -10,7 +10,6 @@
 import pyarrow as pa
 from pandas.api import types as pd_types
 from pandas.api.extensions import ExtensionDtype
-from pandas.core.arrays._arrow_utils import ArrowIntervalType
 from pandas.core.dtypes.dtypes import (
     CategoricalDtype as pd_CategoricalDtype,
     CategoricalDtypeType as pd_CategoricalDtypeType,
@@ -18,10 +17,15 @@
 
 import cudf
 from cudf._typing import Dtype
-from cudf.core._compat import PANDAS_GE_130
+from cudf.core._compat import PANDAS_GE_130, PANDAS_GE_150
 from cudf.core.abc import Serializable
 from cudf.core.buffer import DeviceBufferLike
 
+if PANDAS_GE_150:
+    from pandas.core.arrays.arrow.extension_types import ArrowIntervalType
+else:
+    from pandas.core.arrays._arrow_utils import ArrowIntervalType
+
 
 def dtype(arbitrary):
     """
@@ -610,6 +614,12 @@ def from_pandas(cls, pd_dtype: pd.IntervalDtype) -> "IntervalDtype":
         else:
             return cls(subtype=pd_dtype.subtype)
 
+    def to_pandas(self) -> pd.IntervalDtype:
+        if PANDAS_GE_130:
+            return pd.IntervalDtype(subtype=self.subtype, closed=self.closed)
+        else:
+            return pd.IntervalDtype(subtype=self.subtype)
+
     def __eq__(self, other):
         if isinstance(other, str):
             # This means equality isn't transitive but mimics pandas
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index ec78a8a37cf..40926a1c8cb 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import copy
+import itertools
 import operator
 import pickle
 import warnings
@@ -131,6 +132,8 @@ def _from_columns_like_self(
         self,
         columns: List[ColumnBase],
         column_names: Optional[abc.Iterable[str]] = None,
+        *,
+        override_dtypes: Optional[abc.Iterable[Optional[Dtype]]] = None,
     ):
         """Construct a Frame from a list of columns with metadata from self.
 
@@ -139,7 +142,7 @@ def _from_columns_like_self(
         if column_names is None:
             column_names = self._column_names
         frame = self.__class__._from_columns(columns, column_names)
-        return frame._copy_type_metadata(self)
+        return frame._copy_type_metadata(self, override_dtypes=override_dtypes)
 
     def _mimic_inplace(
         self: T, result: T, inplace: bool = False
@@ -1012,8 +1015,8 @@ def from_arrow(cls, data):
             )
 
         column_names = data.column_names
-        pandas_dtypes = None
-        np_dtypes = None
+        pandas_dtypes = {}
+        np_dtypes = {}
         if isinstance(data.schema.pandas_metadata, dict):
             metadata = data.schema.pandas_metadata
             pandas_dtypes = {
@@ -1085,42 +1088,45 @@ def from_arrow(cls, data):
 
         # There are some special cases that need to be handled
         # based on metadata.
-        if pandas_dtypes:
-            for name in result:
-                dtype = None
-                if (
-                    len(result[name]) == 0
-                    and pandas_dtypes[name] == "categorical"
-                ):
-                    # When pandas_dtype is a categorical column and the size
-                    # of column is 0(i.e., empty) then we will have an
-                    # int8 column in result._data[name] returned by libcudf,
-                    # which needs to be type-casted to 'category' dtype.
-                    dtype = "category"
-                elif (
-                    pandas_dtypes[name] == "empty"
-                    and np_dtypes[name] == "object"
-                ):
-                    # When a string column has all null values, pandas_dtype is
-                    # is specified as 'empty' and np_dtypes as 'object',
-                    # hence handling this special case to type-cast the empty
-                    # float column to str column.
-                    dtype = np_dtypes[name]
-                elif pandas_dtypes[
-                    name
-                ] == "object" and cudf.api.types.is_struct_dtype(
-                    np_dtypes[name]
-                ):
-                    # Incase of struct column, libcudf is not aware of names of
-                    # struct fields, hence renaming the struct fields is
-                    # necessary by extracting the field names from arrow
-                    # struct types.
-                    result[name] = result[name]._rename_fields(
-                        [field.name for field in data[name].type]
-                    )
-
-                if dtype is not None:
-                    result[name] = result[name].astype(dtype)
+        for name in result:
+            if (
+                len(result[name]) == 0
+                and pandas_dtypes.get(name) == "categorical"
+            ):
+                # When pandas_dtype is a categorical column and the size
+                # of column is 0 (i.e., empty) then we will have an
+                # int8 column in result._data[name] returned by libcudf,
+                # which needs to be type-casted to 'category' dtype.
+                result[name] = result[name].as_categorical_column("category")
+            elif (
+                pandas_dtypes.get(name) == "empty"
+                and np_dtypes.get(name) == "object"
+            ):
+                # When a string column has all null values, pandas_dtype is
+                # is specified as 'empty' and np_dtypes as 'object',
+                # hence handling this special case to type-cast the empty
+                # float column to str column.
+                result[name] = result[name].as_string_column(cudf.dtype("str"))
+            elif name in data.column_names and isinstance(
+                data[name].type,
+                (pa.StructType, pa.ListType, pa.Decimal128Type),
+            ):
+                # In case of struct column, libcudf is not aware of names of
+                # struct fields, hence renaming the struct fields is
+                # necessary by extracting the field names from arrow
+                # struct types.
+
+                # In case of decimal column, libcudf is not aware of the
+                # decimal precision.
+
+                # In case of list column, there is a possibility of nested
+                # list columns to have struct or decimal columns inside them.
+
+                # All of these cases are handled by calling the
+                # _with_type_metadata method on the column.
+                result[name] = result[name]._with_type_metadata(
+                    cudf.utils.dtypes.cudf_dtype_from_pa_type(data[name].type)
+                )
 
         return cls._from_data({name: result[name] for name in column_names})
 
@@ -1160,17 +1166,31 @@ def _positions_from_column_names(self, column_names):
             if name in set(column_names)
         ]
 
-    def _copy_type_metadata(self: T, other: T) -> T:
+    def _copy_type_metadata(
+        self: T,
+        other: T,
+        *,
+        override_dtypes: Optional[abc.Iterable[Optional[Dtype]]] = None,
+    ) -> T:
         """
         Copy type metadata from each column of `other` to the corresponding
         column of `self`.
+
+        If override_dtypes is provided, any non-None entry
+        will be used in preference to the relevant column of other to
+        provide the new dtype.
+
         See `ColumnBase._with_type_metadata` for more information.
         """
-        for name, col, other_col in zip(
-            self._data.keys(), self._data.values(), other._data.values()
-        ):
+        if override_dtypes is None:
+            override_dtypes = itertools.repeat(None)
+        dtypes = (
+            dtype if dtype is not None else col.dtype
+            for (dtype, col) in zip(override_dtypes, other._data.values())
+        )
+        for (name, col), dtype in zip(self._data.items(), dtypes):
             self._data.set_by_label(
-                name, col._with_type_metadata(other_col.dtype), validate=False
+                name, col._with_type_metadata(dtype), validate=False
             )
 
         return self
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index d1995615e0c..57a10358561 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -184,7 +184,9 @@ def __init__(
         # whereas _stop is an upper bound.
         self._end = self._start + self._step * (len(self._range) - 1)
 
-    def _copy_type_metadata(self: RangeIndex, other: RangeIndex) -> RangeIndex:
+    def _copy_type_metadata(
+        self: RangeIndex, other: RangeIndex, *, override_dtypes=None
+    ) -> RangeIndex:
         # There is no metadata to be copied for RangeIndex since it does not
         # have an underlying column.
         return self
@@ -978,9 +980,11 @@ def _binaryop(
     # Override just to make mypy happy.
     @_cudf_nvtx_annotate
     def _copy_type_metadata(
-        self: GenericIndex, other: GenericIndex
+        self: GenericIndex, other: GenericIndex, *, override_dtypes=None
     ) -> GenericIndex:
-        return super()._copy_type_metadata(other)
+        return super()._copy_type_metadata(
+            other, override_dtypes=override_dtypes
+        )
 
     @property  # type: ignore
     @_cudf_nvtx_annotate
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 9bda475589a..30b1bc704c8 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -30,7 +30,12 @@
 
 import cudf
 import cudf._lib as libcudf
-from cudf._typing import ColumnLike, DataFrameOrSeries, NotImplementedType
+from cudf._typing import (
+    ColumnLike,
+    DataFrameOrSeries,
+    Dtype,
+    NotImplementedType,
+)
 from cudf.api.types import (
     _is_non_decimal_numeric_dtype,
     is_bool_dtype,
@@ -45,13 +50,19 @@
 from cudf.core._base_index import BaseIndex
 from cudf.core.column import ColumnBase, as_column, full
 from cudf.core.column_accessor import ColumnAccessor
+from cudf.core.dtypes import ListDtype
 from cudf.core.frame import Frame
 from cudf.core.groupby.groupby import GroupBy
 from cudf.core.index import Index, RangeIndex, _index_from_columns
 from cudf.core.missing import NA
 from cudf.core.multiindex import MultiIndex
 from cudf.core.resample import _Resampler
-from cudf.core.udf.utils import _compile_or_get, _supported_cols_from_frame
+from cudf.core.udf.utils import (
+    _compile_or_get,
+    _get_input_args_from_frame,
+    _post_process_output_col,
+    _return_arr_from_dtype,
+)
 from cudf.utils import docutils
 from cudf.utils.utils import _cudf_nvtx_annotate
 
@@ -327,18 +338,28 @@ def _from_columns_like_self(
         columns: List[ColumnBase],
         column_names: Optional[abc.Iterable[str]] = None,
         index_names: Optional[List[str]] = None,
+        *,
+        override_dtypes: Optional[abc.Iterable[Optional[Dtype]]] = None,
     ):
         """Construct a `Frame` from a list of columns with metadata from self.
 
         If `index_names` is set, the first `len(index_names)` columns are
         used to construct the index of the frame.
+
+        If override_dtypes is provided then any non-None entry will be
+        used for the dtype of the matching column in preference to the
+        dtype of the column in self.
         """
         if column_names is None:
             column_names = self._column_names
         frame = self.__class__._from_columns(
             columns, column_names, index_names
         )
-        return frame._copy_type_metadata(self, include_index=bool(index_names))
+        return frame._copy_type_metadata(
+            self,
+            include_index=bool(index_names),
+            override_dtypes=override_dtypes,
+        )
 
     def _mimic_inplace(
         self: T, result: T, inplace: bool = False
@@ -899,40 +920,44 @@ def clip(self, lower=None, upper=None, inplace=False, axis=1):
         return self._mimic_inplace(output, inplace=inplace)
 
     def _copy_type_metadata(
-        self: T, other: T, include_index: bool = True
+        self: T,
+        other: T,
+        include_index: bool = True,
+        *,
+        override_dtypes: Optional[abc.Iterable[Optional[Dtype]]] = None,
     ) -> T:
         """
         Copy type metadata from each column of `other` to the corresponding
         column of `self`.
         See `ColumnBase._with_type_metadata` for more information.
         """
-        super()._copy_type_metadata(other)
-
-        if include_index:
-            if self._index is not None and other._index is not None:
-                self._index._copy_type_metadata(other._index)
-                # When other._index is a CategoricalIndex, the current index
-                # will be a NumericalIndex with an underlying CategoricalColumn
-                # (the above _copy_type_metadata call will have converted the
-                # column). Calling cudf.Index on that column generates the
-                # appropriate index.
-                if isinstance(
-                    other._index, cudf.core.index.CategoricalIndex
-                ) and not isinstance(
-                    self._index, cudf.core.index.CategoricalIndex
-                ):
-                    self._index = cudf.Index(
-                        cast(
-                            cudf.core.index.NumericIndex, self._index
-                        )._column,
-                        name=self._index.name,
-                    )
-                elif isinstance(
-                    other._index, cudf.MultiIndex
-                ) and not isinstance(self._index, cudf.MultiIndex):
-                    self._index = cudf.MultiIndex._from_data(
-                        self._index._data, name=self._index.name
-                    )
+        super()._copy_type_metadata(other, override_dtypes=override_dtypes)
+        if (
+            include_index
+            and self._index is not None
+            and other._index is not None
+        ):
+            self._index._copy_type_metadata(other._index)
+            # When other._index is a CategoricalIndex, the current index
+            # will be a NumericalIndex with an underlying CategoricalColumn
+            # (the above _copy_type_metadata call will have converted the
+            # column). Calling cudf.Index on that column generates the
+            # appropriate index.
+            if isinstance(
+                other._index, cudf.core.index.CategoricalIndex
+            ) and not isinstance(
+                self._index, cudf.core.index.CategoricalIndex
+            ):
+                self._index = cudf.Index(
+                    cast(cudf.core.index.NumericIndex, self._index)._column,
+                    name=self._index.name,
+                )
+            elif isinstance(other._index, cudf.MultiIndex) and not isinstance(
+                self._index, cudf.MultiIndex
+            ):
+                self._index = cudf.MultiIndex._from_data(
+                    self._index._data, name=self._index.name
+                )
         return self
 
     @_cudf_nvtx_annotate
@@ -1799,30 +1824,19 @@ def _apply(self, func, kernel_getter, *args, **kwargs):
             ) from e
 
         # Mask and data column preallocated
-        ans_col = cp.empty(len(self), dtype=retty)
+        ans_col = _return_arr_from_dtype(retty, len(self))
         ans_mask = cudf.core.column.column_empty(len(self), dtype="bool")
-        launch_args = [(ans_col, ans_mask), len(self)]
-        offsets = []
-
-        # if _compile_or_get succeeds, it is safe to create a kernel that only
-        # consumes the columns that are of supported dtype
-        for col in _supported_cols_from_frame(self).values():
-            data = col.data
-            mask = col.mask
-            if mask is None:
-                launch_args.append(data)
-            else:
-                launch_args.append((data, mask))
-            offsets.append(col.offset)
-        launch_args += offsets
-        launch_args += list(args)
+        output_args = [(ans_col, ans_mask), len(self)]
+        input_args = _get_input_args_from_frame(self)
+        launch_args = output_args + input_args + list(args)
 
         try:
             kernel.forall(len(self))(*launch_args)
         except Exception as e:
             raise RuntimeError("UDF kernel execution failed.") from e
 
-        col = cudf.core.column.as_column(ans_col)
+        col = _post_process_output_col(ans_col, retty)
+
         col.set_base_mask(libcudf.transform.bools_to_mask(ans_mask))
         result = cudf.Series._from_data({None: col}, self._index)
 
@@ -3476,22 +3490,32 @@ def _explode(self, explode_column: Any, ignore_index: bool):
             idx = None if ignore_index else self._index.copy(deep=True)
             return self.__class__._from_data(data, index=idx)
 
-        explode_column_num = self._column_names.index(explode_column)
+        column_index = self._column_names.index(explode_column)
         if not ignore_index and self._index is not None:
-            explode_column_num += self._index.nlevels
+            index_offset = self._index.nlevels
+        else:
+            index_offset = 0
 
         exploded = libcudf.lists.explode_outer(
             [
                 *(self._index._data.columns if not ignore_index else ()),
                 *self._columns,
             ],
-            explode_column_num,
+            column_index + index_offset,
         )
-
+        # We must copy inner datatype of the exploded list column to
+        # maintain struct dtype key names
+        exploded_dtype = cast(
+            ListDtype, self._columns[column_index].dtype
+        ).element_type
         return self._from_columns_like_self(
             exploded,
             self._column_names,
             self._index_names if not ignore_index else None,
+            override_dtypes=(
+                exploded_dtype if i == column_index else None
+                for i in range(len(self._columns))
+            ),
         )
 
     @_cudf_nvtx_annotate
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 06a2cc33c1f..650d1d0d83a 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -20,7 +20,7 @@
 from cudf._typing import DataFrameOrSeries
 from cudf.api.types import is_integer, is_list_like, is_object_dtype
 from cudf.core import column
-from cudf.core._compat import PANDAS_GE_120
+from cudf.core._compat import PANDAS_GE_120, PANDAS_GE_150
 from cudf.core.frame import Frame
 from cudf.core.index import (
     BaseIndex,
@@ -451,8 +451,8 @@ def __repr__(self):
                 )
             )
 
-            if PANDAS_GE_120:
-                # TODO: Remove this whole `if` block,
+            if PANDAS_GE_120 and not PANDAS_GE_150:
+                # Need this whole `if` block,
                 # this is a workaround for the following issue:
                 # https://github.com/pandas-dev/pandas/issues/39984
                 preprocess_pdf = pd.DataFrame(
@@ -1854,7 +1854,9 @@ def _intersection(self, other, sort=None):
         return midx
 
     @_cudf_nvtx_annotate
-    def _copy_type_metadata(self: MultiIndex, other: MultiIndex) -> MultiIndex:
+    def _copy_type_metadata(
+        self: MultiIndex, other: MultiIndex, *, override_dtypes=None
+    ) -> MultiIndex:
         res = super()._copy_type_metadata(other)
         res._names = other._names
         return res
diff --git a/python/cudf/cudf/core/resample.py b/python/cudf/cudf/core/resample.py
index 57630e7d4a9..df901f05787 100644
--- a/python/cudf/cudf/core/resample.py
+++ b/python/cudf/cudf/core/resample.py
@@ -19,6 +19,7 @@
 
 import cudf
 import cudf._lib.labeling
+import cudf.core.index
 from cudf._typing import DataFrameOrSeries
 from cudf.core.groupby.groupby import (
     DataFrameGroupBy,
@@ -40,7 +41,7 @@ def __init__(self, obj, by, axis=None, kind=None):
     def agg(self, func):
         result = super().agg(func)
         if len(self.grouping.bin_labels) != len(result):
-            index = cudf.Index(
+            index = cudf.core.index.Index(
                 self.grouping.bin_labels, name=self.grouping.names[0]
             )
             return result._align_to_index(
@@ -92,7 +93,7 @@ class SeriesResampler(_Resampler, SeriesGroupBy):
 
 class _ResampleGrouping(_Grouping):
 
-    bin_labels: cudf.Index
+    bin_labels: cudf.core.index.Index
 
     def _handle_frequency_grouper(self, by):
         # if `by` is a time frequency grouper, we bin the key column
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 4ab28cab5a0..e94ca8d653d 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -589,7 +589,7 @@ def from_pandas(cls, s, nan_as_null=None):
         >>> import pandas as pd
         >>> import numpy as np
         >>> data = [10, 20, 30, np.nan]
-        >>> pds = pd.Series(data)
+        >>> pds = pd.Series(data, dtype='float64')
         >>> cudf.Series.from_pandas(pds)
         0    10.0
         1    20.0
diff --git a/python/cudf/cudf/core/udf/__init__.py b/python/cudf/cudf/core/udf/__init__.py
index 4608cae3228..c128bc2436c 100644
--- a/python/cudf/cudf/core/udf/__init__.py
+++ b/python/cudf/cudf/core/udf/__init__.py
@@ -1 +1,65 @@
-from . import typing, lowering
+# Copyright (c) 2022, NVIDIA CORPORATION.
+import numpy as np
+from numba import cuda, types
+from numba.cuda.cudaimpl import (
+    lower as cuda_lower,
+    registry as cuda_lowering_registry,
+)
+
+from cudf.core.dtypes import dtype
+from cudf.core.udf import api, row_function, utils
+from cudf.utils.dtypes import STRING_TYPES
+
+from . import masked_lowering, masked_typing
+
+_units = ["ns", "ms", "us", "s"]
+_datetime_cases = {types.NPDatetime(u) for u in _units}
+_timedelta_cases = {types.NPTimedelta(u) for u in _units}
+
+
+_supported_masked_types = (
+    types.integer_domain
+    | types.real_domain
+    | _datetime_cases
+    | _timedelta_cases
+    | {types.boolean}
+)
+
+_STRING_UDFS_ENABLED = False
+try:
+    import strings_udf
+
+    if strings_udf.ENABLED:
+        from . import strings_typing  # isort: skip
+        from . import strings_lowering  # isort: skip
+        from strings_udf import ptxpath
+        from strings_udf._lib.cudf_jit_udf import to_string_view_array
+        from strings_udf._typing import str_view_arg_handler, string_view
+
+        # add an overload of MaskedType.__init__(string_view, bool)
+        cuda_lower(api.Masked, strings_typing.string_view, types.boolean)(
+            masked_lowering.masked_constructor
+        )
+
+        # add an overload of pack_return(string_view)
+        cuda_lower(api.pack_return, strings_typing.string_view)(
+            masked_lowering.pack_return_scalar_impl
+        )
+
+        _supported_masked_types |= {strings_typing.string_view}
+        utils.launch_arg_getters[dtype("O")] = to_string_view_array
+        utils.masked_array_types[dtype("O")] = string_view
+        utils.JIT_SUPPORTED_TYPES |= STRING_TYPES
+        utils.ptx_files.append(ptxpath)
+        utils.arg_handlers.append(str_view_arg_handler)
+        row_function.itemsizes[dtype("O")] = string_view.size_bytes
+
+        _STRING_UDFS_ENABLED = True
+    else:
+        del strings_udf
+
+except ImportError as e:
+    # allow cuDF to work without strings_udf
+    pass
+
+masked_typing.register_masked_constructor(_supported_masked_types)
diff --git a/python/cudf/cudf/core/udf/_ops.py b/python/cudf/cudf/core/udf/_ops.py
index 7307b43fceb..559a5bfad4f 100644
--- a/python/cudf/cudf/core/udf/_ops.py
+++ b/python/cudf/cudf/core/udf/_ops.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+
 import math
 import operator
 
diff --git a/python/cudf/cudf/core/udf/api.py b/python/cudf/cudf/core/udf/api.py
index 23b4d02c57d..7a68cffeed2 100644
--- a/python/cudf/cudf/core/udf/api.py
+++ b/python/cudf/cudf/core/udf/api.py
@@ -1,3 +1,6 @@
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+
+
 class Masked:
     """
     Most of the time, MaskedType as defined in typing.py
diff --git a/python/cudf/cudf/core/udf/lowering.py b/python/cudf/cudf/core/udf/masked_lowering.py
similarity index 99%
rename from python/cudf/cudf/core/udf/lowering.py
rename to python/cudf/cudf/core/udf/masked_lowering.py
index 7dfe8427bfd..f825b6538bf 100644
--- a/python/cudf/cudf/core/udf/lowering.py
+++ b/python/cudf/cudf/core/udf/masked_lowering.py
@@ -18,7 +18,7 @@
     comparison_ops,
     unary_ops,
 )
-from cudf.core.udf.typing import MaskedType, NAType
+from cudf.core.udf.masked_typing import MaskedType, NAType
 
 
 @cuda_lowering_registry.lower_constant(NAType)
@@ -62,7 +62,6 @@ def masked_scalar_op_impl(context, builder, sig, args):
         result = cgutils.create_struct_proxy(masked_return_type)(
             context, builder
         )
-
         # compute output validity
         valid = builder.and_(m1.valid, m2.valid)
         result.valid = valid
diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/masked_typing.py
similarity index 85%
rename from python/cudf/cudf/core/udf/typing.py
rename to python/cudf/cudf/core/udf/masked_typing.py
index 073900d115d..a815a9f6dae 100644
--- a/python/cudf/cudf/core/udf/typing.py
+++ b/python/cudf/cudf/core/udf/masked_typing.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 import operator
+from typing import Any, Dict
 
 from numba import types
 from numba.core.extending import (
@@ -26,6 +27,12 @@
     comparison_ops,
     unary_ops,
 )
+from cudf.utils.dtypes import (
+    DATETIME_TYPES,
+    NUMERIC_TYPES,
+    STRING_TYPES,
+    TIMEDELTA_TYPES,
+)
 
 SUPPORTED_NUMBA_TYPES = (
     types.Number,
@@ -34,29 +41,60 @@
     types.NPTimedelta,
 )
 
+SUPPORTED_NUMPY_TYPES = (
+    NUMERIC_TYPES | DATETIME_TYPES | TIMEDELTA_TYPES | STRING_TYPES
+)
+supported_type_str = "\n".join(sorted(list(SUPPORTED_NUMPY_TYPES) + ["bool"]))
+MASKED_INIT_MAP: Dict[Any, Any] = {}
 
-class MaskedType(types.Type):
+
+def _format_error_string(err):
     """
-    A Numba type consisting of a value of some primitive type
-    and a validity boolean, over which we can define math ops
+    Wrap an error message in newlines and color it red.
     """
+    return "\033[91m" + "\n" + err + "\n" + "\033[0m"
 
-    def __init__(self, value):
-        # MaskedType in Numba shall be parameterized
-        # with a value type
-        if isinstance(value, SUPPORTED_NUMBA_TYPES):
-            self.value_type = value
+
+def _type_to_masked_type(t):
+    result = MASKED_INIT_MAP.get(t)
+    if result is None:
+        if isinstance(t, SUPPORTED_NUMBA_TYPES):
+            return t
         else:
             # Unsupported Dtype. Numba tends to print out the type info
             # for whatever operands and operation failed to type and then
             # output its own error message. Putting the message in the repr
             # then is one way of getting the true cause to the user
-            self.value_type = types.Poison(
-                "\n\n\n Unsupported MaskedType. This is usually caused by "
+            err = _format_error_string(
+                "Unsupported MaskedType. This is usually caused by "
                 "attempting to use a column of unsupported dtype in a UDF. "
-                f"Supported dtypes are {SUPPORTED_NUMBA_TYPES}"
+                f"Supported dtypes are:\n{supported_type_str}"
             )
-        super().__init__(name=f"Masked{self.value_type}")
+            return types.Poison(err)
+    else:
+        return result
+
+
+MASKED_INIT_MAP[types.pyobject] = types.Poison(
+    _format_error_string(
+        "strings_udf library required for usage of string dtypes "
+        "inside user defined functions."
+    )
+)
+
+
+# Masked scalars of all types
+class MaskedType(types.Type):
+    """
+    A Numba type consisting of a value of some primitive type
+    and a validity boolean, over which we can define math ops
+    """
+
+    def __init__(self, value):
+        # MaskedType in Numba shall be parameterized
+        # with a value type
+        self.value_type = _type_to_masked_type(value)
+        super().__init__(name=f"Masked({self.value_type})")
 
     def __hash__(self):
         """
@@ -131,44 +169,35 @@ def typeof_masked(val, c):
 
 # Implemented typing for Masked(value, valid) - the construction of a Masked
 # type in a kernel.
-@cuda_decl_registry.register
-class MaskedConstructor(ConcreteTemplate):
-    key = api.Masked
-    units = ["ns", "ms", "us", "s"]
-    datetime_cases = {types.NPDatetime(u) for u in units}
-    timedelta_cases = {types.NPTimedelta(u) for u in units}
-    cases = [
-        nb_signature(MaskedType(t), t, types.boolean)
-        for t in (
-            types.integer_domain
-            | types.real_domain
-            | datetime_cases
-            | timedelta_cases
-            | {types.boolean}
-        )
-    ]
+def register_masked_constructor(supported_masked_types):
+    class MaskedConstructor(ConcreteTemplate):
+        key = api.Masked
+        cases = [
+            nb_signature(MaskedType(t), t, types.boolean)
+            for t in supported_masked_types
+        ]
 
+    cuda_decl_registry.register(MaskedConstructor)
 
-# Provide access to `m.value` and `m.valid` in a kernel for a Masked `m`.
-make_attribute_wrapper(MaskedType, "value", "value")
-make_attribute_wrapper(MaskedType, "valid", "valid")
-
+    # Typing for `api.Masked`
+    @cuda_decl_registry.register_attr
+    class ClassesTemplate(AttributeTemplate):
+        key = types.Module(api)
 
-# Typing for `api.Masked`
-@cuda_decl_registry.register_attr
-class ClassesTemplate(AttributeTemplate):
-    key = types.Module(api)
+        def resolve_Masked(self, mod):
+            return types.Function(MaskedConstructor)
 
-    def resolve_Masked(self, mod):
-        return types.Function(MaskedConstructor)
+    # Registration of the global is also needed for Numba to type api.Masked
+    cuda_decl_registry.register_global(api, types.Module(api))
+    # For typing bare Masked (as in `from .api import Masked`
+    cuda_decl_registry.register_global(
+        api.Masked, types.Function(MaskedConstructor)
+    )
 
 
-# Registration of the global is also needed for Numba to type api.Masked
-cuda_decl_registry.register_global(api, types.Module(api))
-# For typing bare Masked (as in `from .api import Masked`
-cuda_decl_registry.register_global(
-    api.Masked, types.Function(MaskedConstructor)
-)
+# Provide access to `m.value` and `m.valid` in a kernel for a Masked `m`.
+make_attribute_wrapper(MaskedType, "value", "value")
+make_attribute_wrapper(MaskedType, "valid", "valid")
 
 
 # Tell numba how `MaskedType` is constructed on the backend in terms
diff --git a/python/cudf/cudf/core/udf/row_function.py b/python/cudf/cudf/core/udf/row_function.py
index 1d0bd5ac99d..8d887a37706 100644
--- a/python/cudf/cudf/core/udf/row_function.py
+++ b/python/cudf/cudf/core/udf/row_function.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2021-2022, NVIDIA CORPORATION.
 import math
+from typing import Any, Dict
 
 import numpy as np
 from numba import cuda
@@ -7,13 +8,13 @@
 from numba.types import Record
 
 from cudf.core.udf.api import Masked, pack_return
+from cudf.core.udf.masked_typing import MaskedType
 from cudf.core.udf.templates import (
     masked_input_initializer_template,
     row_initializer_template,
     row_kernel_template,
     unmasked_input_initializer_template,
 )
-from cudf.core.udf.typing import MaskedType
 from cudf.core.udf.utils import (
     _all_dtypes_from_frame,
     _construct_signature,
@@ -24,6 +25,8 @@
     _supported_dtypes_from_frame,
 )
 
+itemsizes: Dict[Any, int] = {}
+
 
 def _get_frame_row_type(dtype):
     """
@@ -31,12 +34,10 @@ def _get_frame_row_type(dtype):
     Models each column and its mask as a MaskedType and
     models the row as a dictionary like data structure
     containing these MaskedTypes.
-
     Large parts of this function are copied with comments
     from the Numba internals and slightly modified to
     account for validity bools to be present in the final
     struct.
-
     See numba.np.numpy_support.from_struct_dtype for details.
     """
 
@@ -45,7 +46,9 @@ def _get_frame_row_type(dtype):
     fields = []
     offset = 0
 
-    sizes = [val[0].itemsize for val in dtype.fields.values()]
+    sizes = [
+        itemsizes.get(val[0], val[0].itemsize) for val in dtype.fields.values()
+    ]
     for i, (name, info) in enumerate(dtype.fields.items()):
         # *info* consists of the element dtype, its offset from the beginning
         # of the record, and an optional "title" containing metadata.
@@ -62,7 +65,8 @@ def _get_frame_row_type(dtype):
         fields.append((name, infos))
 
         # increment offset by itemsize plus one byte for validity
-        offset += elemdtype.itemsize + 1
+        itemsize = itemsizes.get(elemdtype, elemdtype.itemsize)
+        offset += itemsize + 1
 
         # Align the next member of the struct to be a multiple of the
         # memory access size, per PTX ISA 7.4/5.4.5
@@ -127,10 +131,8 @@ def _get_row_kernel(frame, func, args):
         np.dtype(list(_all_dtypes_from_frame(frame).items()))
     )
     scalar_return_type = _get_udf_return_type(row_type, func, args)
-
     # this is the signature for the final full kernel compilation
     sig = _construct_signature(frame, scalar_return_type, args)
-
     # this row type is used within the kernel to pack up the column and
     # mask data into the dict like data structure the user udf expects
     np_field_types = np.dtype(
diff --git a/python/cudf/cudf/core/udf/scalar_function.py b/python/cudf/cudf/core/udf/scalar_function.py
index a7b887dd2d5..31599f4151e 100644
--- a/python/cudf/cudf/core/udf/scalar_function.py
+++ b/python/cudf/cudf/core/udf/scalar_function.py
@@ -4,12 +4,12 @@
 from numba.np import numpy_support
 
 from cudf.core.udf.api import Masked, pack_return
+from cudf.core.udf.masked_typing import MaskedType
 from cudf.core.udf.templates import (
     masked_input_initializer_template,
     scalar_kernel_template,
     unmasked_input_initializer_template,
 )
-from cudf.core.udf.typing import MaskedType
 from cudf.core.udf.utils import (
     _construct_signature,
     _get_kernel,
diff --git a/python/cudf/cudf/core/udf/strings_lowering.py b/python/cudf/cudf/core/udf/strings_lowering.py
new file mode 100644
index 00000000000..5b69d1a9da3
--- /dev/null
+++ b/python/cudf/cudf/core/udf/strings_lowering.py
@@ -0,0 +1,125 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+import operator
+
+from numba import types
+from numba.core import cgutils
+from numba.core.typing import signature as nb_signature
+from numba.cuda.cudaimpl import lower as cuda_lower
+
+from strings_udf._typing import size_type, string_view
+from strings_udf.lowering import (
+    contains_impl,
+    count_impl,
+    endswith_impl,
+    find_impl,
+    isalnum_impl,
+    isalpha_impl,
+    isdecimal_impl,
+    isdigit_impl,
+    islower_impl,
+    isspace_impl,
+    isupper_impl,
+    len_impl,
+    rfind_impl,
+    startswith_impl,
+)
+
+from cudf.core.udf.masked_typing import MaskedType
+
+
+@cuda_lower(len, MaskedType(string_view))
+def masked_len_impl(context, builder, sig, args):
+    ret = cgutils.create_struct_proxy(sig.return_type)(context, builder)
+    masked_sv_ty = sig.args[0]
+    masked_sv = cgutils.create_struct_proxy(masked_sv_ty)(
+        context, builder, value=args[0]
+    )
+    result = len_impl(
+        context, builder, size_type(string_view), (masked_sv.value,)
+    )
+    ret.value = result
+    ret.valid = masked_sv.valid
+
+    return ret._getvalue()
+
+
+def create_binary_string_func(op, cuda_func, retty):
+    """
+    Provide a wrapper around numba's low-level extension API which
+    produces the boilerplate needed to implement a binary function
+    of two masked strings.
+    """
+
+    def masked_binary_func_impl(context, builder, sig, args):
+        ret = cgutils.create_struct_proxy(sig.return_type)(context, builder)
+
+        lhs_masked = cgutils.create_struct_proxy(sig.args[0])(
+            context, builder, value=args[0]
+        )
+        rhs_masked = cgutils.create_struct_proxy(sig.args[0])(
+            context, builder, value=args[1]
+        )
+
+        result = cuda_func(
+            context,
+            builder,
+            nb_signature(retty, string_view, string_view),
+            (lhs_masked.value, rhs_masked.value),
+        )
+
+        ret.value = result
+        ret.valid = builder.and_(lhs_masked.valid, rhs_masked.valid)
+
+        return ret._getvalue()
+
+    cuda_lower(op, MaskedType(string_view), MaskedType(string_view))(
+        masked_binary_func_impl
+    )
+
+
+create_binary_string_func(
+    "MaskedType.startswith",
+    startswith_impl,
+    types.boolean,
+)
+create_binary_string_func("MaskedType.endswith", endswith_impl, types.boolean)
+create_binary_string_func("MaskedType.find", find_impl, size_type)
+create_binary_string_func("MaskedType.rfind", rfind_impl, size_type)
+create_binary_string_func("MaskedType.count", count_impl, size_type)
+create_binary_string_func(operator.contains, contains_impl, types.boolean)
+
+
+def create_masked_unary_identifier_func(op, cuda_func):
+    """
+    Provide a wrapper around numba's low-level extension API which
+    produces the boilerplate needed to implement a unary function
+    of a masked string.
+    """
+
+    def masked_unary_func_impl(context, builder, sig, args):
+        ret = cgutils.create_struct_proxy(sig.return_type)(context, builder)
+        masked_str = cgutils.create_struct_proxy(sig.args[0])(
+            context, builder, value=args[0]
+        )
+
+        result = cuda_func(
+            context,
+            builder,
+            types.boolean(string_view, string_view),
+            (masked_str.value,),
+        )
+        ret.value = result
+        ret.valid = masked_str.valid
+        return ret._getvalue()
+
+    cuda_lower(op, MaskedType(string_view))(masked_unary_func_impl)
+
+
+create_masked_unary_identifier_func("MaskedType.isalnum", isalnum_impl)
+create_masked_unary_identifier_func("MaskedType.isalpha", isalpha_impl)
+create_masked_unary_identifier_func("MaskedType.isdigit", isdigit_impl)
+create_masked_unary_identifier_func("MaskedType.isupper", isupper_impl)
+create_masked_unary_identifier_func("MaskedType.islower", islower_impl)
+create_masked_unary_identifier_func("MaskedType.isspace", isspace_impl)
+create_masked_unary_identifier_func("MaskedType.isdecimal", isdecimal_impl)
diff --git a/python/cudf/cudf/core/udf/strings_typing.py b/python/cudf/cudf/core/udf/strings_typing.py
new file mode 100644
index 00000000000..1179688651f
--- /dev/null
+++ b/python/cudf/cudf/core/udf/strings_typing.py
@@ -0,0 +1,182 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+import operator
+
+from numba import types
+from numba.core.typing import signature as nb_signature
+from numba.core.typing.templates import AbstractTemplate, AttributeTemplate
+from numba.cuda.cudadecl import registry as cuda_decl_registry
+
+from strings_udf._typing import (
+    StringView,
+    bool_binary_funcs,
+    id_unary_funcs,
+    int_binary_funcs,
+    size_type,
+    string_view,
+)
+
+from cudf.core.udf import masked_typing
+from cudf.core.udf._ops import comparison_ops
+from cudf.core.udf.masked_typing import MaskedType
+
+masked_typing.MASKED_INIT_MAP[types.pyobject] = string_view
+masked_typing.MASKED_INIT_MAP[string_view] = string_view
+
+
+def _is_valid_string_arg(ty):
+    return (
+        isinstance(ty, MaskedType) and isinstance(ty.value_type, StringView)
+    ) or isinstance(ty, types.StringLiteral)
+
+
+def register_string_function(func):
+    """
+    Helper function wrapping numba's low level extension API. Provides
+    the boilerplate needed to associate a signature with a function or
+    operator to be overloaded.
+    """
+
+    def deco(generic):
+        class MaskedStringFunction(AbstractTemplate):
+            pass
+
+        MaskedStringFunction.generic = generic
+        cuda_decl_registry.register_global(func)(MaskedStringFunction)
+
+    return deco
+
+
+@register_string_function(len)
+def len_typing(self, args, kws):
+    if isinstance(args[0], MaskedType) and isinstance(
+        args[0].value_type, StringView
+    ):
+        return nb_signature(MaskedType(size_type), args[0])
+    elif isinstance(args[0], types.StringLiteral) and len(args) == 1:
+        return nb_signature(size_type, args[0])
+
+
+@register_string_function(operator.contains)
+def contains_typing(self, args, kws):
+    if _is_valid_string_arg(args[0]) and _is_valid_string_arg(args[1]):
+        return nb_signature(
+            MaskedType(types.boolean),
+            MaskedType(string_view),
+            MaskedType(string_view),
+        )
+
+
+class MaskedStringViewCmpOp(AbstractTemplate):
+    """
+    return the boolean result of `cmpop` between to strings
+    since the typing is the same for every comparison operator,
+    we can reuse this class for all of them.
+    """
+
+    def generic(self, args, kws):
+        if _is_valid_string_arg(args[0]) and _is_valid_string_arg(args[1]):
+            return nb_signature(
+                MaskedType(types.boolean),
+                MaskedType(string_view),
+                MaskedType(string_view),
+            )
+
+
+for op in comparison_ops:
+    cuda_decl_registry.register_global(op)(MaskedStringViewCmpOp)
+
+
+def create_masked_binary_attr(attrname, retty):
+    """
+    Helper function wrapping numba's low level extension API. Provides
+    the boilerplate needed to register a binary function of two masked
+    string objects as an attribute of one, e.g. `string.func(other)`.
+    """
+
+    class MaskedStringViewBinaryAttr(AbstractTemplate):
+        key = attrname
+
+        def generic(self, args, kws):
+            return nb_signature(
+                MaskedType(retty), MaskedType(string_view), recvr=self.this
+            )
+
+    def attr(self, mod):
+        return types.BoundFunction(
+            MaskedStringViewBinaryAttr,
+            MaskedType(string_view),
+        )
+
+    return attr
+
+
+def create_masked_identifier_attr(attrname):
+    """
+    Helper function wrapping numba's low level extension API. Provides
+    the boilerplate needed to register a unary function of a masked
+    string object as an attribute, e.g. `string.func()`.
+    """
+
+    class MaskedStringViewIdentifierAttr(AbstractTemplate):
+        key = attrname
+
+        def generic(self, args, kws):
+            return nb_signature(MaskedType(types.boolean), recvr=self.this)
+
+    def attr(self, mod):
+        return types.BoundFunction(
+            MaskedStringViewIdentifierAttr,
+            MaskedType(string_view),
+        )
+
+    return attr
+
+
+class MaskedStringViewCount(AbstractTemplate):
+    key = "MaskedType.count"
+
+    def generic(self, args, kws):
+        return nb_signature(
+            MaskedType(size_type), MaskedType(string_view), recvr=self.this
+        )
+
+
+class MaskedStringViewAttrs(AttributeTemplate):
+    key = MaskedType(string_view)
+
+    def resolve_count(self, mod):
+        return types.BoundFunction(
+            MaskedStringViewCount, MaskedType(string_view)
+        )
+
+    def resolve_value(self, mod):
+        return string_view
+
+    def resolve_valid(self, mod):
+        return types.boolean
+
+
+# Build attributes for `MaskedType(string_view)`
+for func in bool_binary_funcs:
+    setattr(
+        MaskedStringViewAttrs,
+        f"resolve_{func}",
+        create_masked_binary_attr(f"MaskedType.{func}", types.boolean),
+    )
+
+for func in int_binary_funcs:
+    setattr(
+        MaskedStringViewAttrs,
+        f"resolve_{func}",
+        create_masked_binary_attr(f"MaskedType.{func}", size_type),
+    )
+
+for func in id_unary_funcs:
+    setattr(
+        MaskedStringViewAttrs,
+        f"resolve_{func}",
+        create_masked_identifier_attr(f"MaskedType.{func}"),
+    )
+
+cuda_decl_registry.register_attr(MaskedStringViewAttrs)
diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py
index 5e46c6d0d77..fa79088046c 100644
--- a/python/cudf/cudf/core/udf/utils.py
+++ b/python/cudf/cudf/core/udf/utils.py
@@ -1,16 +1,18 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
-from typing import Callable
+from typing import Any, Callable, Dict, List
 
 import cachetools
+import cupy as cp
 import numpy as np
 from numba import cuda, typeof
 from numba.core.errors import TypingError
 from numba.np import numpy_support
-from numba.types import Poison, Tuple, boolean, int64, void
+from numba.types import CPointer, Poison, Tuple, boolean, int64, void
 
+from cudf.core.column.column import as_column
 from cudf.core.dtypes import CategoricalDtype
-from cudf.core.udf.typing import MaskedType
+from cudf.core.udf.masked_typing import MaskedType
 from cudf.utils import cudautils
 from cudf.utils.dtypes import (
     BOOL_TYPES,
@@ -23,11 +25,12 @@
 JIT_SUPPORTED_TYPES = (
     NUMERIC_TYPES | BOOL_TYPES | DATETIME_TYPES | TIMEDELTA_TYPES
 )
-
 libcudf_bitmask_type = numpy_support.from_dtype(np.dtype("int32"))
 MASK_BITSIZE = np.dtype("int32").itemsize * 8
 
 precompiled: cachetools.LRUCache = cachetools.LRUCache(maxsize=32)
+arg_handlers: List[Any] = []
+ptx_files: List[Any] = []
 
 
 @_cudf_nvtx_annotate
@@ -109,6 +112,9 @@ def _supported_cols_from_frame(frame):
     }
 
 
+masked_array_types: Dict[Any, Any] = {}
+
+
 def _masked_array_type_from_col(col):
     """
     Return a type representing a tuple of arrays,
@@ -116,11 +122,18 @@ def _masked_array_type_from_col(col):
     corresponding to `dtype`, and the second an
     array of bools representing a mask.
     """
-    nb_scalar_ty = numpy_support.from_dtype(col.dtype)
+
+    col_type = masked_array_types.get(col.dtype)
+    if col_type:
+        col_type = CPointer(col_type)
+    else:
+        nb_scalar_ty = numpy_support.from_dtype(col.dtype)
+        col_type = nb_scalar_ty[::1]
+
     if col.mask is None:
-        return nb_scalar_ty[::1]
+        return col_type
     else:
-        return Tuple((nb_scalar_ty[::1], libcudf_bitmask_type[::1]))
+        return Tuple((col_type, libcudf_bitmask_type[::1]))
 
 
 def _construct_signature(frame, return_type, args):
@@ -200,7 +213,6 @@ def _compile_or_get(frame, func, args, kernel_getter=None):
     # could be a MaskedType or a scalar type.
 
     kernel, scalar_return_type = kernel_getter(frame, func, args)
-
     np_return_type = numpy_support.as_dtype(scalar_return_type)
     precompiled[cache_key] = (kernel, np_return_type)
 
@@ -213,6 +225,37 @@ def _get_kernel(kernel_string, globals_, sig, func):
     globals_["f_"] = f_
     exec(kernel_string, globals_)
     _kernel = globals_["_kernel"]
-    kernel = cuda.jit(sig)(_kernel)
+    kernel = cuda.jit(sig, link=ptx_files, extensions=arg_handlers)(_kernel)
 
     return kernel
+
+
+launch_arg_getters: Dict[Any, Any] = {}
+
+
+def _get_input_args_from_frame(fr):
+    args = []
+    offsets = []
+    for col in _supported_cols_from_frame(fr).values():
+        getter = launch_arg_getters.get(col.dtype)
+        if getter:
+            data = getter(col)
+        else:
+            data = col.data
+        if col.mask is not None:
+            # argument is a tuple of data, mask
+            args.append((data, col.mask))
+        else:
+            # argument is just the data pointer
+            args.append(data)
+        offsets.append(col.offset)
+
+    return args + offsets
+
+
+def _return_arr_from_dtype(dt, size):
+    return cp.empty(size, dtype=dt)
+
+
+def _post_process_output_col(col, retty):
+    return as_column(col, retty)
diff --git a/python/cudf/cudf/core/window/__init__.py b/python/cudf/cudf/core/window/__init__.py
index 76e034e4a5b..8ea3eb0179b 100644
--- a/python/cudf/cudf/core/window/__init__.py
+++ b/python/cudf/cudf/core/window/__init__.py
@@ -1 +1,3 @@
+# Copyright (c) 2019-2022, NVIDIA CORPORATION
+
 from cudf.core.window.rolling import Rolling
diff --git a/python/cudf/cudf/core/window/rolling.py b/python/cudf/cudf/core/window/rolling.py
index 8d6d0171ee7..fb1cafa5625 100644
--- a/python/cudf/cudf/core/window/rolling.py
+++ b/python/cudf/cudf/core/window/rolling.py
@@ -10,6 +10,7 @@
 from cudf import _lib as libcudf
 from cudf.api.types import is_integer, is_number
 from cudf.core import column
+from cudf.core._compat import PANDAS_GE_150
 from cudf.core.column.column import as_column
 from cudf.core.mixins import Reducible
 from cudf.utils import cudautils
@@ -215,12 +216,21 @@ def _apply_agg_column(self, source_column, agg_name):
             following_window = None
             window = self.window
         elif isinstance(self.window, BaseIndexer):
-            start, end = self.window.get_window_bounds(
-                num_values=len(self.obj),
-                min_periods=self.min_periods,
-                center=self.center,
-                closed=None,
-            )
+            if PANDAS_GE_150:
+                start, end = self.window.get_window_bounds(
+                    num_values=len(self.obj),
+                    min_periods=self.min_periods,
+                    center=self.center,
+                    closed=None,
+                    step=None,
+                )
+            else:
+                start, end = self.window.get_window_bounds(
+                    num_values=len(self.obj),
+                    min_periods=self.min_periods,
+                    center=self.center,
+                    closed=None,
+                )
             start = as_column(start, dtype="int32")
             end = as_column(end, dtype="int32")
 
diff --git a/python/cudf/cudf/io/__init__.py b/python/cudf/cudf/io/__init__.py
index 4ec84ecbc74..6d4b44d5ecc 100644
--- a/python/cudf/cudf/io/__init__.py
+++ b/python/cudf/cudf/io/__init__.py
@@ -7,9 +7,9 @@
 from cudf.io.json import read_json
 from cudf.io.orc import read_orc, read_orc_metadata, to_orc
 from cudf.io.parquet import (
+    ParquetDatasetWriter,
     merge_parquet_filemetadata,
     read_parquet,
-    ParquetDatasetWriter,
     read_parquet_metadata,
     write_to_dataset,
 )
diff --git a/python/cudf/cudf/tests/data/orc/TestOrcFile.apache_timestamp.orc b/python/cudf/cudf/tests/data/orc/TestOrcFile.apache_timestamp.orc
new file mode 100644
index 00000000000..dd51856c3f7
Binary files /dev/null and b/python/cudf/cudf/tests/data/orc/TestOrcFile.apache_timestamp.orc differ
diff --git a/python/cudf/cudf/tests/test_apply_rows.py b/python/cudf/cudf/tests/test_apply_rows.py
index f025549971f..8870eb421c7 100644
--- a/python/cudf/cudf/tests/test_apply_rows.py
+++ b/python/cudf/cudf/tests/test_apply_rows.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+
 import pytest
 
 import cudf
diff --git a/python/cudf/cudf/tests/test_array_ufunc.py b/python/cudf/cudf/tests/test_array_ufunc.py
index 3ff5210ed94..b3be097b878 100644
--- a/python/cudf/cudf/tests/test_array_ufunc.py
+++ b/python/cudf/cudf/tests/test_array_ufunc.py
@@ -10,6 +10,7 @@
 import pytest
 
 import cudf
+from cudf.core._compat import PANDAS_GE_150
 from cudf.testing._utils import assert_eq, set_random_null_mask_inplace
 
 _UFUNCS = [
@@ -84,14 +85,19 @@ def test_ufunc_index(ufunc):
                 assert_eq(g, e, check_exact=False)
         else:
             assert_eq(got, expect, check_exact=False)
-    except AssertionError:
+    except AssertionError as e:
         # TODO: This branch can be removed when
         # https://github.com/rapidsai/cudf/issues/10178 is resolved
         if fname in ("power", "float_power"):
             if (got - expect).abs().max() == 1:
                 pytest.xfail("https://github.com/rapidsai/cudf/issues/10178")
         elif fname in ("bitwise_and", "bitwise_or", "bitwise_xor"):
-            pytest.xfail("https://github.com/pandas-dev/pandas/issues/46769")
+            if PANDAS_GE_150:
+                raise e
+            else:
+                pytest.xfail(
+                    "https://github.com/pandas-dev/pandas/issues/46769"
+                )
         raise
 
 
diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py
index c1a08e507b3..2229bcc1938 100644
--- a/python/cudf/cudf/tests/test_binops.py
+++ b/python/cudf/cudf/tests/test_binops.py
@@ -13,6 +13,7 @@
 
 import cudf
 from cudf import Series
+from cudf.core._compat import PANDAS_GE_150
 from cudf.core.index import as_index
 from cudf.testing import _utils as utils
 from cudf.utils.dtypes import (
@@ -768,7 +769,7 @@ def test_operator_func_between_series_logical(
 @pytest.mark.parametrize("func", _operators_comparison)
 @pytest.mark.parametrize("has_nulls", [True, False])
 @pytest.mark.parametrize("scalar", [-59.0, np.nan, 0, 59.0])
-@pytest.mark.parametrize("fill_value", [None, True, False, 1.0])
+@pytest.mark.parametrize("fill_value", [None, 1.0])
 @pytest.mark.parametrize("use_cudf_scalar", [False, True])
 def test_operator_func_series_and_scalar_logical(
     dtype, func, has_nulls, scalar, fill_value, use_cudf_scalar
@@ -1561,7 +1562,8 @@ def test_scalar_null_binops(op, dtype_l, dtype_r):
         pytest.param(
             "nanoseconds",
             marks=pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/36589"
+                condition=not PANDAS_GE_150,
+                reason="https://github.com/pandas-dev/pandas/issues/36589",
             ),
         ),
     ],
@@ -1668,7 +1670,8 @@ def test_datetime_dateoffset_binaryop_multiple(date_col, kwargs, op):
         pytest.param(
             "nanoseconds",
             marks=pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/36589"
+                condition=not PANDAS_GE_150,
+                reason="https://github.com/pandas-dev/pandas/issues/36589",
             ),
         ),
     ],
diff --git a/python/cudf/cudf/tests/test_categorical.py b/python/cudf/cudf/tests/test_categorical.py
index df18dbb291e..46998c6830a 100644
--- a/python/cudf/cudf/tests/test_categorical.py
+++ b/python/cudf/cudf/tests/test_categorical.py
@@ -414,7 +414,7 @@ def test_categorical_as_unordered(pd_str_cat, inplace):
         pytest.param(
             True,
             marks=pytest.mark.skipif(
-                not PANDAS_GE_134,
+                condition=not PANDAS_GE_134,
                 reason="https://github.com/pandas-dev/pandas/issues/43232",
             ),
         ),
@@ -454,7 +454,7 @@ def test_categorical_reorder_categories(
         pytest.param(
             True,
             marks=pytest.mark.skipif(
-                not PANDAS_GE_134,
+                condition=not PANDAS_GE_134,
                 reason="https://github.com/pandas-dev/pandas/issues/43232",
             ),
         ),
@@ -491,7 +491,7 @@ def test_categorical_add_categories(pd_str_cat, inplace):
         pytest.param(
             True,
             marks=pytest.mark.skipif(
-                not PANDAS_GE_134,
+                condition=not PANDAS_GE_134,
                 reason="https://github.com/pandas-dev/pandas/issues/43232",
             ),
         ),
diff --git a/python/cudf/cudf/tests/test_concat.py b/python/cudf/cudf/tests/test_concat.py
index bf1e9de5d1a..8f6dce4828a 100644
--- a/python/cudf/cudf/tests/test_concat.py
+++ b/python/cudf/cudf/tests/test_concat.py
@@ -9,7 +9,7 @@
 
 import cudf as gd
 from cudf.api.types import is_categorical_dtype
-from cudf.core._compat import PANDAS_LT_140
+from cudf.core._compat import PANDAS_GE_150, PANDAS_LT_140
 from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype, Decimal128Dtype
 from cudf.testing._utils import assert_eq, assert_exceptions_equal
 
@@ -811,10 +811,13 @@ def test_concat_join_axis_1(objs, ignore_index, sort, join, axis):
         axis=axis,
     )
 
-    # TODO: Remove special handling of check_index_type below
-    # after the following bug from pandas is fixed:
-    # https://github.com/pandas-dev/pandas/issues/47501
-    assert_eq(expected, actual, check_index_type=not (axis == 1 and sort))
+    if PANDAS_GE_150:
+        assert_eq(expected, actual, check_index_type=True)
+    else:
+        # special handling of check_index_type below
+        # required because:
+        # https://github.com/pandas-dev/pandas/issues/47501
+        assert_eq(expected, actual, check_index_type=not (axis == 1 and sort))
 
 
 @pytest.mark.parametrize("ignore_index", [True, False])
@@ -881,10 +884,13 @@ def test_concat_join_one_df(ignore_index, sort, join, axis):
         [gdf1], sort=sort, join=join, ignore_index=ignore_index, axis=axis
     )
 
-    # TODO: Remove special handling of check_index_type below
-    # after the following bug from pandas is fixed:
-    # https://github.com/pandas-dev/pandas/issues/47501
-    assert_eq(expected, actual, check_index_type=not (axis == 1 and sort))
+    if PANDAS_GE_150:
+        assert_eq(expected, actual, check_index_type=True)
+    else:
+        # special handling of check_index_type below
+        # required because:
+        # https://github.com/pandas-dev/pandas/issues/47501
+        assert_eq(expected, actual, check_index_type=not (axis == 1 and sort))
 
 
 @pytest.mark.parametrize(
@@ -933,10 +939,13 @@ def test_concat_join_no_overlapping_columns(
         axis=axis,
     )
 
-    # TODO: Remove special handling of check_index_type below
-    # after the following bug from pandas is fixed:
-    # https://github.com/pandas-dev/pandas/issues/47501
-    assert_eq(expected, actual, check_index_type=not (axis == 1 and sort))
+    if PANDAS_GE_150:
+        assert_eq(expected, actual, check_index_type=True)
+    else:
+        # special handling of check_index_type below
+        # required because:
+        # https://github.com/pandas-dev/pandas/issues/47501
+        assert_eq(expected, actual, check_index_type=not (axis == 1 and sort))
 
 
 @pytest.mark.parametrize("ignore_index", [False, True])
@@ -1124,15 +1133,21 @@ def test_concat_join_series(ignore_index, sort, join, axis):
         axis=axis,
     )
 
-    # TODO: Remove special handling of check_index_type below
-    # after the following bugs from pandas are fixed:
-    # https://github.com/pandas-dev/pandas/issues/46675
-    # https://github.com/pandas-dev/pandas/issues/47501
-    assert_eq(
-        expected,
-        actual,
-        check_index_type=(axis == 0),
-    )
+    if PANDAS_GE_150:
+        assert_eq(
+            expected,
+            actual,
+            check_index_type=True,
+        )
+    else:
+        # special handling of check_index_type required below:
+        # https://github.com/pandas-dev/pandas/issues/46675
+        # https://github.com/pandas-dev/pandas/issues/47501
+        assert_eq(
+            expected,
+            actual,
+            check_index_type=(axis == 0),
+        )
 
 
 @pytest.mark.parametrize(
@@ -1299,7 +1314,8 @@ def test_concat_join_empty_dataframes(
         pytest.param(
             "outer",
             marks=pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/37937"
+                condition=not PANDAS_GE_150,
+                reason="https://github.com/pandas-dev/pandas/issues/37937",
             ),
         ),
     ],
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index af719958c1a..d9e9a4dbba1 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -3106,7 +3106,7 @@ def test_to_frame(pdf, gdf):
     gdf_new_name = gdf.x.to_frame(name=name)
     pdf_new_name = pdf.x.to_frame(name=name)
     assert_eq(gdf_new_name, pdf_new_name)
-    assert gdf_new_name.columns[0] is name
+    assert gdf_new_name.columns[0] == np.bool(name)
 
 
 def test_dataframe_empty_sort_index():
@@ -4424,8 +4424,8 @@ def test_isin_dataframe(data, values):
         except ValueError as e:
             if str(e) == "Lengths must match.":
                 pytest.xfail(
-                    not PANDAS_GE_110,
-                    "https://github.com/pandas-dev/pandas/issues/34256",
+                    condition=not PANDAS_GE_110,
+                    reason="https://github.com/pandas-dev/pandas/issues/34256",
                 )
         except TypeError as e:
             # Can't do isin with different categories
@@ -9549,3 +9549,23 @@ def test_non_string_column_name_to_arrow(data):
     actual = pa.Table.from_pandas(df.to_pandas())
 
     assert expected.equals(actual)
+
+
+def test_complex_types_from_arrow():
+
+    expected = pa.Table.from_arrays(
+        [
+            pa.array([1, 2, 3]),
+            pa.array([10, 20, 30]),
+            pa.array([{"a": 9}, {"b": 10}, {"c": 11}]),
+            pa.array([[{"a": 1}], [{"b": 2}], [{"c": 3}]]),
+            pa.array([10, 11, 12]).cast(pa.decimal128(21, 2)),
+            pa.array([{"a": 9}, {"b": 10, "c": {"g": 43}}, {"c": {"a": 10}}]),
+        ],
+        names=["a", "b", "c", "d", "e", "f"],
+    )
+
+    df = cudf.DataFrame.from_arrow(expected)
+    actual = df.to_arrow()
+
+    assert expected.equals(actual)
diff --git a/python/cudf/cudf/tests/test_datasets.py b/python/cudf/cudf/tests/test_datasets.py
index c9f07eab5dd..98f801d0cba 100644
--- a/python/cudf/cudf/tests/test_datasets.py
+++ b/python/cudf/cudf/tests/test_datasets.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+
 import numpy as np
 
 import cudf as gd
diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py
index 04ff5b88214..800a8aeeab5 100644
--- a/python/cudf/cudf/tests/test_datetime.py
+++ b/python/cudf/cudf/tests/test_datetime.py
@@ -657,7 +657,12 @@ def test_to_datetime_errors(data):
         gd_data = pd_data
 
     assert_exceptions_equal(
-        pd.to_datetime, cudf.to_datetime, ([pd_data],), ([gd_data],)
+        pd.to_datetime,
+        cudf.to_datetime,
+        ([pd_data],),
+        ([gd_data],),
+        compare_error_message=False,
+        expected_error_message="Given date string not likely a datetime.",
     )
 
 
diff --git a/python/cudf/cudf/tests/test_df_protocol.py b/python/cudf/cudf/tests/test_df_protocol.py
index c88b6ac9228..7b83eec9b63 100644
--- a/python/cudf/cudf/tests/test_df_protocol.py
+++ b/python/cudf/cudf/tests/test_df_protocol.py
@@ -124,6 +124,9 @@ def test_from_dataframe():
     df2 = cudf.from_dataframe(df1)
     assert_eq(df1, df2)
 
+    df3 = cudf.from_dataframe(df2)
+    assert_eq(df1, df3)
+
 
 def test_int_dtype():
     data_int = dict(a=[1, 2, 3], b=[9, 10, 11])
diff --git a/python/cudf/cudf/tests/test_dtypes.py b/python/cudf/cudf/tests/test_dtypes.py
index 811cae929d8..2f8e1ac5c2f 100644
--- a/python/cudf/cudf/tests/test_dtypes.py
+++ b/python/cudf/cudf/tests/test_dtypes.py
@@ -6,7 +6,7 @@
 import pytest
 
 import cudf
-from cudf.core._compat import PANDAS_GE_130
+from cudf.core._compat import PANDAS_GE_130, PANDAS_GE_150
 from cudf.core.column import ColumnBase
 from cudf.core.dtypes import (
     CategoricalDtype,
@@ -20,6 +20,11 @@
 from cudf.testing._utils import assert_eq
 from cudf.utils.dtypes import np_to_pa_dtype
 
+if PANDAS_GE_150:
+    from pandas.core.arrays.arrow.extension_types import ArrowIntervalType
+else:
+    from pandas.core.arrays._arrow_utils import ArrowIntervalType
+
 
 def test_cdt_basic():
     psr = pd.Series(["a", "b", "a", "c"], dtype="category")
@@ -176,7 +181,7 @@ def closed(request):
 
 
 def test_interval_dtype_pyarrow_round_trip(subtype, closed):
-    pa_array = pd.core.arrays._arrow_utils.ArrowIntervalType(subtype, closed)
+    pa_array = ArrowIntervalType(subtype, closed)
     expect = pa_array
     got = IntervalDtype.from_arrow(expect).to_arrow()
     assert expect.equals(got)
diff --git a/python/cudf/cudf/tests/test_extension_compilation.py b/python/cudf/cudf/tests/test_extension_compilation.py
index 692f40873d7..f1ed17c5df5 100644
--- a/python/cudf/cudf/tests/test_extension_compilation.py
+++ b/python/cudf/cudf/tests/test_extension_compilation.py
@@ -10,7 +10,7 @@
 
 from cudf import NA
 from cudf.core.udf.api import Masked
-from cudf.core.udf.typing import MaskedType
+from cudf.core.udf.masked_typing import MaskedType
 from cudf.testing._utils import parametrize_numeric_dtypes_pairwise
 
 arith_ops = (
diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
index 911b1d5443e..c4c8e81dda2 100644
--- a/python/cudf/cudf/tests/test_groupby.py
+++ b/python/cudf/cudf/tests/test_groupby.py
@@ -699,7 +699,8 @@ def test_advanced_groupby_levels():
         pytest.param(
             lambda df: df.groupby(["x", "y", "z"]).sum(),
             marks=pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/32464"
+                condition=not PANDAS_GE_150,
+                reason="https://github.com/pandas-dev/pandas/issues/32464",
             ),
         ),
         lambda df: df.groupby(["x", "y"]).sum(),
@@ -1578,8 +1579,10 @@ def test_groupby_list_of_structs(list_agg):
     )
     gdf = cudf.from_pandas(pdf)
 
-    with pytest.raises(pd.core.base.DataError):
-        gdf.groupby("a").agg({"b": list_agg}),
+    with pytest.raises(
+        pd.errors.DataError if PANDAS_GE_150 else pd.core.base.DataError
+    ):
+        gdf.groupby("a").agg({"b": list_agg})
 
 
 @pytest.mark.parametrize("list_agg", [list, "collect"])
diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py
index f3d9180d44d..f6ca4691669 100644
--- a/python/cudf/cudf/tests/test_json.py
+++ b/python/cudf/cudf/tests/test_json.py
@@ -615,6 +615,48 @@ def test_json_nested_lines(data):
     )
     bytes.seek(0)
     pdf = pd.read_json(bytes, orient="records", lines=True)
-    # In the second test-case:
-    # Pandas omits "f1" in first row, so we have to enforce a common schema
-    assert df.to_arrow().equals(pa.Table.from_pandas(pdf))
+    # In the second test-case we need to take a detour via pyarrow
+    # Pandas omits "f1" in first row, so we have to enforce a common schema,
+    # such that pandas would have the f1 member with null
+    # Also, pyarrow chooses to select different ordering of a nested column
+    # children though key-value pairs are correct.
+    pa_table_pdf = pa.Table.from_pandas(
+        pdf, schema=df.to_arrow().schema, safe=False
+    )
+    assert df.to_arrow().equals(pa_table_pdf)
+
+
+def test_json_nested_data():
+    json_str = (
+        '[{"0":{},"2":{}},{"1":[[""],[]],"2":{"2":""}},'
+        '{"0":{"a":"1"},"2":{"0":"W&RR=+I","1":""}}]'
+    )
+    df = cudf.read_json(
+        StringIO(json_str), engine="cudf_experimental", orient="records"
+    )
+    pdf = pd.read_json(StringIO(json_str), orient="records")
+    pdf.columns = pdf.columns.astype("str")
+    pa_table_pdf = pa.Table.from_pandas(
+        pdf, schema=df.to_arrow().schema, safe=False
+    )
+    assert df.to_arrow().equals(pa_table_pdf)
+
+
+def test_json_types_data():
+    # 0:<0:string,1:float>
+    # 1:list<int>
+    # 2:<0:bool>
+    json_str = (
+        '[{"0":null,"2":{}},'
+        '{"1":[123],"0":{"0":"foo","1":123.4},"2":{"0":false}},'
+        '{"0":{},"1":[],"2":{"0":null}}]'
+    )
+    df = cudf.read_json(
+        StringIO(json_str), engine="cudf_experimental", orient="records"
+    )
+    pdf = pd.read_json(StringIO(json_str), orient="records")
+    pdf.columns = pdf.columns.astype("str")
+    pa_table_pdf = pa.Table.from_pandas(
+        pdf, schema=df.to_arrow().schema, safe=False
+    )
+    assert df.to_arrow().equals(pa_table_pdf)
diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py
index aa4e5393e5b..8ea11382419 100644
--- a/python/cudf/cudf/tests/test_list.py
+++ b/python/cudf/cudf/tests/test_list.py
@@ -107,6 +107,51 @@ def test_listdtype_hash():
     assert hash(a) != hash(c)
 
 
+@pytest.fixture(params=["int", "float", "datetime", "timedelta"])
+def leaf_value(request):
+    if request.param == "int":
+        return np.int32(1)
+    elif request.param == "float":
+        return np.float64(1)
+    elif request.param == "datetime":
+        return pd.to_datetime("1900-01-01")
+    elif request.param == "timedelta":
+        return pd.to_timedelta("10d")
+    else:
+        raise ValueError("Unhandled data type")
+
+
+@pytest.fixture(params=["list", "struct"])
+def list_or_struct(request, leaf_value):
+    if request.param == "list":
+        return [[leaf_value], [leaf_value]]
+    elif request.param == "struct":
+        return {"a": leaf_value, "b": [leaf_value], "c": {"d": [leaf_value]}}
+    else:
+        raise ValueError("Unhandled data type")
+
+
+@pytest.fixture(params=["list", "struct"])
+def nested_list(request, list_or_struct, leaf_value):
+    if request.param == "list":
+        return [list_or_struct, list_or_struct]
+    elif request.param == "struct":
+        return [
+            {
+                "a": list_or_struct,
+                "b": leaf_value,
+                "c": {"d": list_or_struct, "e": leaf_value},
+            }
+        ]
+    else:
+        raise ValueError("Unhandled data type")
+
+
+def test_list_dtype_explode(nested_list):
+    sr = cudf.Series([nested_list])
+    assert sr.dtype.element_type == sr.explode().dtype
+
+
 @pytest.mark.parametrize(
     "data",
     [
diff --git a/python/cudf/cudf/tests/test_numerical.py b/python/cudf/cudf/tests/test_numerical.py
index 160db7053b9..e2fbd55c051 100644
--- a/python/cudf/cudf/tests/test_numerical.py
+++ b/python/cudf/cudf/tests/test_numerical.py
@@ -5,6 +5,7 @@
 import pytest
 
 import cudf
+from cudf.core._compat import PANDAS_GE_150
 from cudf.testing._utils import NUMERIC_TYPES, assert_eq
 from cudf.utils.dtypes import np_dtypes_to_pandas_dtypes
 
@@ -263,9 +264,12 @@ def test_to_numeric_downcast_large_float_pd_bug(data, downcast):
     expected = pd.to_numeric(ps, downcast=downcast)
     got = cudf.to_numeric(gs, downcast=downcast)
 
-    # Pandas bug: https://github.com/pandas-dev/pandas/issues/19729
-    with pytest.raises(AssertionError, match="Series are different"):
+    if PANDAS_GE_150:
         assert_eq(expected, got)
+    else:
+        # Pandas bug: https://github.com/pandas-dev/pandas/issues/19729
+        with pytest.raises(AssertionError, match="Series are different"):
+            assert_eq(expected, got)
 
 
 @pytest.mark.parametrize(
@@ -342,9 +346,12 @@ def test_to_numeric_downcast_string_large_float(data, downcast):
         expected = pd.to_numeric(ps, downcast=downcast)
         got = cudf.to_numeric(gs, downcast=downcast)
 
-        # Pandas bug: https://github.com/pandas-dev/pandas/issues/19729
-        with pytest.raises(AssertionError, match="Series are different"):
+        if PANDAS_GE_150:
             assert_eq(expected, got)
+        else:
+            # Pandas bug: https://github.com/pandas-dev/pandas/issues/19729
+            with pytest.raises(AssertionError, match="Series are different"):
+                assert_eq(expected, got)
     else:
         expected = pd.Series([np.inf, -np.inf])
         with pytest.warns(
diff --git a/python/cudf/cudf/tests/test_numpy_interop.py b/python/cudf/cudf/tests/test_numpy_interop.py
index 55b5a38c3e5..46324a85bb4 100644
--- a/python/cudf/cudf/tests/test_numpy_interop.py
+++ b/python/cudf/cudf/tests/test_numpy_interop.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+
 import numpy as np
 import pytest
 
diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py
index 18d159bc423..c6bf17110c2 100644
--- a/python/cudf/cudf/tests/test_orc.py
+++ b/python/cudf/cudf/tests/test_orc.py
@@ -1871,3 +1871,22 @@ def test_orc_writer_negative_timestamp(negative_timestamp_df):
 
     assert_eq(negative_timestamp_df, pd.read_orc(buffer))
     assert_eq(negative_timestamp_df, pyarrow.orc.ORCFile(buffer).read())
+
+
+def test_orc_reader_apache_negative_timestamp(datadir):
+    path = datadir / "TestOrcFile.apache_timestamp.orc"
+
+    pdf = pd.read_orc(path)
+    gdf = cudf.read_orc(path)
+
+    assert_eq(pdf, gdf)
+
+
+def test_statistics_string_sum():
+    strings = ["a string", "another string!"]
+    buff = BytesIO()
+    df = cudf.DataFrame({"str": strings})
+    df.to_orc(buff)
+
+    file_stats, stripe_stats = cudf.io.orc.read_orc_statistics([buff])
+    assert_eq(file_stats[0]["str"].get("sum"), sum(len(s) for s in strings))
diff --git a/python/cudf/cudf/tests/test_query_mask.py b/python/cudf/cudf/tests/test_query_mask.py
index ab1c085c6c0..ae5171f28d4 100644
--- a/python/cudf/cudf/tests/test_query_mask.py
+++ b/python/cudf/cudf/tests/test_query_mask.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+
 import numpy as np
 import pandas as pd
 import pytest
diff --git a/python/cudf/cudf/tests/test_rolling.py b/python/cudf/cudf/tests/test_rolling.py
index bede054037d..08188c25ffa 100644
--- a/python/cudf/cudf/tests/test_rolling.py
+++ b/python/cudf/cudf/tests/test_rolling.py
@@ -8,7 +8,12 @@
 import pytest
 
 import cudf
-from cudf.core._compat import PANDAS_GE_110, PANDAS_LT_140
+from cudf.core._compat import (
+    PANDAS_GE_110,
+    PANDAS_GE_130,
+    PANDAS_GE_150,
+    PANDAS_LT_140,
+)
 from cudf.testing._utils import _create_pandas_series, assert_eq
 from cudf.testing.dataset_generator import rand_dataframe
 
@@ -214,12 +219,14 @@ def test_rolling_var_std_large(agg, ddof, center, seed, window_size):
         assert_eq(expect, got, **kwargs)
 
 
-@pytest.mark.xfail
+@pytest.mark.xfail(
+    condition=not PANDAS_GE_130,
+    reason="https://github.com/pandas-dev/pandas/issues/37051",
+)
 def test_rolling_var_uniform_window():
     """
     Pandas adopts an online variance calculation algorithm. This gives a
     floating point artifact.
-    https://github.com/pandas-dev/pandas/issues/37051
 
     In cudf, each window is computed independently from the previous window,
     this gives better numeric precision.
@@ -492,7 +499,9 @@ def test_rolling_custom_index_support():
     from pandas.api.indexers import BaseIndexer
 
     class CustomIndexer(BaseIndexer):
-        def get_window_bounds(self, num_values, min_periods, center, closed):
+        def custom_get_window_bounds(
+            self, num_values, min_periods, center, closed, step=None
+        ):
             start = np.empty(num_values, dtype=np.int64)
             end = np.empty(num_values, dtype=np.int64)
 
@@ -506,6 +515,24 @@ def get_window_bounds(self, num_values, min_periods, center, closed):
 
             return start, end
 
+        if PANDAS_GE_150:
+
+            def get_window_bounds(
+                self, num_values, min_periods, center, closed, step
+            ):
+                return self.custom_get_window_bounds(
+                    num_values, min_periods, center, closed, step
+                )
+
+        else:
+
+            def get_window_bounds(
+                self, num_values, min_periods, center, closed
+            ):
+                return self.custom_get_window_bounds(
+                    num_values, min_periods, center, closed
+                )
+
     use_expanding = [True, False, True, False, True]
     indexer = CustomIndexer(window_size=1, use_expanding=use_expanding)
 
diff --git a/python/cudf/cudf/tests/test_s3.py b/python/cudf/cudf/tests/test_s3.py
index 1fdd2dae31d..3219a6ad847 100644
--- a/python/cudf/cudf/tests/test_s3.py
+++ b/python/cudf/cudf/tests/test_s3.py
@@ -416,16 +416,6 @@ def test_write_parquet(s3_base, s3so, pdf, partition_cols):
 def test_read_json(s3_base, s3so):
     fname = "test_json_reader.json"
     bucket = "json"
-    # TODO: After following bug is fixed switch
-    # back to using bytes:
-    # https://github.com/pandas-dev/pandas/issues/46935
-
-    # buffer = (
-    #     b'{"amount": 100, "name": "Alice"}\n'
-    #     b'{"amount": 200, "name": "Bob"}\n'
-    #     b'{"amount": 300, "name": "Charlie"}\n'
-    #     b'{"amount": 400, "name": "Dennis"}\n'
-    # )
     buffer = (
         '{"amount": 100, "name": "Alice"}\n'
         '{"amount": 200, "name": "Bob"}\n'
diff --git a/python/cudf/cudf/tests/test_serialize.py b/python/cudf/cudf/tests/test_serialize.py
index 61eee6bba43..53318eef1c8 100644
--- a/python/cudf/cudf/tests/test_serialize.py
+++ b/python/cudf/cudf/tests/test_serialize.py
@@ -8,6 +8,7 @@
 import pytest
 
 import cudf
+from cudf.core._compat import PANDAS_GE_150
 from cudf.testing import _utils as utils
 from cudf.testing._utils import assert_eq
 
@@ -86,13 +87,14 @@
             ),
         ),
         pd._testing.makeRangeIndex,
-        pd._testing.makeStringIndex,
         pd._testing.makeStringSeries,
         pd._testing.makeTimeDataFrame,
         pd._testing.makeTimeSeries,
         pd._testing.makeTimedeltaIndex,
         pd._testing.makeUIntIndex,
-        pd._testing.makeUnicodeIndex,
+        pd._testing.makeUnicodeIndex
+        if not PANDAS_GE_150
+        else pd._testing.makeStringIndex,
     ],
 )
 @pytest.mark.parametrize("to_host", [True, False])
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index b1ecb38e4d4..c0b99f56238 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -395,7 +395,7 @@ def test_series_describe_numeric(dtype):
     actual = gs.describe()
     expected = ps.describe()
 
-    assert_eq(expected, actual)
+    assert_eq(expected, actual, check_dtype=True)
 
 
 @pytest.mark.parametrize("dtype", ["datetime64[ns]"])
@@ -1650,7 +1650,7 @@ def test_isin_numeric(data, values):
     assert_eq(got, expected)
 
 
-@pytest.mark.xfail(raises=ValueError)
+@pytest.mark.xfail(raises=TypeError)
 def test_fill_new_category():
     gs = cudf.Series(pd.Categorical(["a", "b", "c"]))
     gs[0:1] = "d"
@@ -1937,3 +1937,17 @@ def test_series_ordered_dedup():
     expect = pd.Series(sr.to_pandas().unique())
     got = cudf.Series(sr._column.unique(preserve_order=True))
     assert_eq(expect.values, got.values)
+
+
+@pytest.mark.parametrize("dtype", ["int64", "float64"])
+@pytest.mark.parametrize("bool_scalar", [True, False])
+def test_set_bool_error(dtype, bool_scalar):
+    sr = cudf.Series([1, 2, 3], dtype=dtype)
+    psr = sr.to_pandas(nullable=True)
+
+    assert_exceptions_equal(
+        lfunc=sr.__setitem__,
+        rfunc=psr.__setitem__,
+        lfunc_args_and_kwargs=([bool_scalar],),
+        rfunc_args_and_kwargs=([bool_scalar],),
+    )
diff --git a/python/cudf/cudf/tests/test_setitem.py b/python/cudf/cudf/tests/test_setitem.py
index 733fb4d5e4d..cb455ae831c 100644
--- a/python/cudf/cudf/tests/test_setitem.py
+++ b/python/cudf/cudf/tests/test_setitem.py
@@ -5,7 +5,7 @@
 import pytest
 
 import cudf
-from cudf.core._compat import PANDAS_GE_120, PANDAS_LE_122
+from cudf.core._compat import PANDAS_GE_120, PANDAS_GE_150, PANDAS_LE_122
 from cudf.testing._utils import assert_eq, assert_exceptions_equal
 
 
@@ -220,23 +220,25 @@ def test_column_set_unequal_length_object_by_mask():
 
 
 def test_categorical_setitem_invalid():
-    # ps = pd.Series([1, 2, 3], dtype="category")
+    ps = pd.Series([1, 2, 3], dtype="category")
     gs = cudf.Series([1, 2, 3], dtype="category")
 
-    # TODO: After https://github.com/pandas-dev/pandas/issues/46646
-    # is fixed remove the following workaround and
-    # uncomment assert_exceptions_equal
-    # WORKAROUND
-    with pytest.raises(
-        ValueError,
-        match="Cannot setitem on a Categorical with a new category, set the "
-        "categories first",
-    ):
-        gs[0] = 5
-
-    # assert_exceptions_equal(
-    #     lfunc=ps.__setitem__,
-    #     rfunc=gs.__setitem__,
-    #     lfunc_args_and_kwargs=([0, 5], {}),
-    #     rfunc_args_and_kwargs=([0, 5], {}),
-    # )
+    if PANDAS_GE_150:
+        assert_exceptions_equal(
+            lfunc=ps.__setitem__,
+            rfunc=gs.__setitem__,
+            lfunc_args_and_kwargs=([0, 5], {}),
+            rfunc_args_and_kwargs=([0, 5], {}),
+            compare_error_message=False,
+            expected_error_message="Cannot setitem on a Categorical with a "
+            "new category, set the categories first",
+        )
+    else:
+        # Following workaround is needed because:
+        # https://github.com/pandas-dev/pandas/issues/46646
+        with pytest.raises(
+            ValueError,
+            match="Cannot setitem on a Categorical with a new category, set "
+            "the categories first",
+        ):
+            gs[0] = 5
diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py
index 046d4498a2a..74d602c2cf1 100644
--- a/python/cudf/cudf/tests/test_string.py
+++ b/python/cudf/cudf/tests/test_string.py
@@ -15,7 +15,7 @@
 
 import cudf
 from cudf import concat
-from cudf.core._compat import PANDAS_GE_110
+from cudf.core._compat import PANDAS_GE_110, PANDAS_GE_150
 from cudf.core.column.string import StringColumn
 from cudf.core.index import StringIndex, as_index
 from cudf.testing._utils import (
@@ -1769,7 +1769,8 @@ def test_strings_filling_tests(data, width, fillchar):
         pytest.param(
             ["hello", "there", "world", "+1234", "-1234", None, "accént", ""],
             marks=pytest.mark.xfail(
-                reason="pandas 1.5 upgrade TODO",
+                condition=not PANDAS_GE_150,
+                reason="https://github.com/pandas-dev/pandas/issues/20868",
             ),
         ),
         [" ", "\t\r\n ", ""],
@@ -2012,10 +2013,32 @@ def test_string_starts_ends(data, pat):
     ps = pd.Series(data)
     gs = cudf.Series(data)
 
-    assert_eq(
-        ps.str.startswith(pat), gs.str.startswith(pat), check_dtype=False
-    )
-    assert_eq(ps.str.endswith(pat), gs.str.endswith(pat), check_dtype=False)
+    if pat is None:
+        assert_exceptions_equal(
+            lfunc=ps.str.startswith,
+            rfunc=gs.str.startswith,
+            lfunc_args_and_kwargs=([pat],),
+            rfunc_args_and_kwargs=([pat],),
+            compare_error_message=False,
+            expected_error_message="expected a string or a sequence-like "
+            "object, not NoneType",
+        )
+        assert_exceptions_equal(
+            lfunc=ps.str.endswith,
+            rfunc=gs.str.endswith,
+            lfunc_args_and_kwargs=([pat],),
+            rfunc_args_and_kwargs=([pat],),
+            compare_error_message=False,
+            expected_error_message="expected a string or a sequence-like "
+            "object, not NoneType",
+        )
+    else:
+        assert_eq(
+            ps.str.startswith(pat), gs.str.startswith(pat), check_dtype=False
+        )
+        assert_eq(
+            ps.str.endswith(pat), gs.str.endswith(pat), check_dtype=False
+        )
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py
index 4f385656405..2b96c920765 100644
--- a/python/cudf/cudf/tests/test_udf_masked_ops.py
+++ b/python/cudf/cudf/tests/test_udf_masked_ops.py
@@ -8,6 +8,7 @@
 
 import cudf
 from cudf.core.missing import NA
+from cudf.core.udf import _STRING_UDFS_ENABLED
 from cudf.core.udf._ops import (
     arith_ops,
     bitwise_ops,
@@ -22,6 +23,49 @@
 )
 
 
+# only run string udf tests if library exists and is enabled
+def string_udf_test(f):
+    if _STRING_UDFS_ENABLED:
+        return f
+    else:
+        return pytest.mark.skip(reason="String UDFs not enabled")(f)
+
+
+@pytest.fixture(scope="module")
+def str_udf_data():
+    return cudf.DataFrame(
+        {
+            "str_col": [
+                "abc",
+                "ABC",
+                "AbC",
+                "123",
+                "123aBc",
+                "123@.!",
+                "",
+                "rapids ai",
+                "gpu",
+                "True",
+                "False",
+                "1.234",
+                ".123a",
+                "0.013",
+                "1.0",
+                "01",
+                "20010101",
+                "cudf",
+                "cuda",
+                "gpu",
+            ]
+        }
+    )
+
+
+@pytest.fixture(params=["a", "cu", "2", "gpu", "", " "])
+def substr(request):
+    return request.param
+
+
 def run_masked_udf_test(func, data, args=(), **kwargs):
     gdf = data
     pdf = data.to_pandas(nullable=True)
@@ -537,7 +581,6 @@ def func(row):
 @pytest.mark.parametrize(
     "unsupported_col",
     [
-        ["a", "b", "c"],
         _decimal_series(
             ["1.0", "2.0", "3.0"], dtype=cudf.Decimal64Dtype(2, 1)
         ),
@@ -682,6 +725,128 @@ def f(x):
     assert precompiled.currsize == 1
 
 
+@string_udf_test
+def test_string_udf_len(str_udf_data):
+    def func(row):
+        return len(row["str_col"])
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+@string_udf_test
+def test_string_udf_startswith(str_udf_data, substr):
+    def func(row):
+        return row["str_col"].startswith(substr)
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+@string_udf_test
+def test_string_udf_endswith(str_udf_data, substr):
+    def func(row):
+        return row["str_col"].endswith(substr)
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+@string_udf_test
+def test_string_udf_find(str_udf_data, substr):
+    def func(row):
+        return row["str_col"].find(substr)
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+@string_udf_test
+def test_string_udf_rfind(str_udf_data, substr):
+    def func(row):
+        return row["str_col"].rfind(substr)
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+@string_udf_test
+def test_string_udf_contains(str_udf_data, substr):
+    def func(row):
+        return substr in row["str_col"]
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+@string_udf_test
+@pytest.mark.parametrize("other", ["cudf", "123", "", " "])
+@pytest.mark.parametrize("cmpop", comparison_ops)
+def test_string_udf_cmpops(str_udf_data, other, cmpop):
+    def func(row):
+        return cmpop(row["str_col"], other)
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+@string_udf_test
+def test_string_udf_isalnum(str_udf_data):
+    def func(row):
+        return row["str_col"].isalnum()
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+@string_udf_test
+def test_string_udf_isalpha(str_udf_data):
+    def func(row):
+        return row["str_col"].isalpha()
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+@string_udf_test
+def test_string_udf_isdigit(str_udf_data):
+    def func(row):
+        return row["str_col"].isdigit()
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+@string_udf_test
+def test_string_udf_isdecimal(str_udf_data):
+    def func(row):
+        return row["str_col"].isdecimal()
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+@string_udf_test
+def test_string_udf_isupper(str_udf_data):
+    def func(row):
+        return row["str_col"].isupper()
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+@string_udf_test
+def test_string_udf_islower(str_udf_data):
+    def func(row):
+        return row["str_col"].islower()
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+@string_udf_test
+def test_string_udf_isspace(str_udf_data):
+    def func(row):
+        return row["str_col"].isspace()
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+@string_udf_test
+def test_string_udf_count(str_udf_data, substr):
+    def func(row):
+        return row["str_col"].count(substr)
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
 @pytest.mark.parametrize(
     "data", [[1.0, 0.0, 1.5], [1, 0, 2], [True, False, True]]
 )
diff --git a/python/cudf/cudf/utils/cudautils.py b/python/cudf/cudf/utils/cudautils.py
index 8b9a6be0ffe..e2bd4556ce8 100755
--- a/python/cudf/cudf/utils/cudautils.py
+++ b/python/cudf/cudf/utils/cudautils.py
@@ -197,13 +197,15 @@ def make_cache_key(udf, sig):
     """
     codebytes = udf.__code__.co_code
     constants = udf.__code__.co_consts
+    names = udf.__code__.co_names
+
     if udf.__closure__ is not None:
         cvars = tuple(x.cell_contents for x in udf.__closure__)
         cvarbytes = dumps(cvars)
     else:
         cvarbytes = b""
 
-    return constants, codebytes, cvarbytes, sig
+    return names, constants, codebytes, cvarbytes, sig
 
 
 def compile_udf(udf, type_signature):
@@ -248,7 +250,7 @@ def compile_udf(udf, type_signature):
     ptx_code, return_type = cuda.compile_ptx_for_current_device(
         udf, type_signature, device=True
     )
-    if not isinstance(return_type, cudf.core.udf.typing.MaskedType):
+    if not isinstance(return_type, cudf.core.udf.masked_typing.MaskedType):
         output_type = numpy_support.as_dtype(return_type).type
     else:
         output_type = return_type
diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index 29d2337e9d6..92c23d8b97b 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -11,6 +11,7 @@
 from pandas.core.dtypes.common import infer_dtype_from_object
 
 import cudf
+from cudf.api.types import is_bool, is_float, is_integer
 from cudf.core._compat import PANDAS_GE_120
 from cudf.core.missing import NA
 
@@ -671,6 +672,43 @@ def _maybe_convert_to_default_type(dtype):
     return dtype
 
 
+def _dtype_can_hold_range(rng: range, dtype: np.dtype) -> bool:
+    if not len(rng):
+        return True
+    return np.can_cast(rng[0], dtype) and np.can_cast(rng[-1], dtype)
+
+
+def _dtype_can_hold_element(dtype: np.dtype, element) -> bool:
+    if dtype.kind in {"i", "u"}:
+        if isinstance(element, range):
+            if _dtype_can_hold_range(element, dtype):
+                return True
+            return False
+
+        elif is_integer(element) or (
+            is_float(element) and element.is_integer()
+        ):
+            info = np.iinfo(dtype)
+            if info.min <= element <= info.max:
+                return True
+            return False
+
+    elif dtype.kind == "f":
+        if is_integer(element) or is_float(element):
+            casted = dtype.type(element)
+            if np.isnan(casted) or casted == element:
+                return True
+            # otherwise e.g. overflow see TestCoercionFloat32
+            return False
+
+    elif dtype.kind == "b":
+        if is_bool(element):
+            return True
+        return False
+
+    raise NotImplementedError(f"Unsupported dtype: {dtype}")
+
+
 # Type dispatch loops similar to what are found in `np.add.types`
 # In NumPy, whether or not an op can be performed between two
 # operands is determined by checking to see if NumPy has a c/c++
diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py
index 52830fe219f..63bc6d59524 100644
--- a/python/cudf/cudf/utils/utils.py
+++ b/python/cudf/cudf/utils/utils.py
@@ -13,11 +13,12 @@
 import rmm
 
 import cudf
+import cudf.api.types
 from cudf.core import column
 from cudf.core.buffer import as_device_buffer_like
 
 # The size of the mask in bytes
-mask_dtype = cudf.dtype(np.int32)
+mask_dtype = cudf.api.types.dtype(np.int32)
 mask_bitsize = mask_dtype.itemsize * 8
 
 # Mapping from ufuncs to the corresponding binary operators.
diff --git a/python/cudf/setup.cfg b/python/cudf/setup.cfg
index 1f7cfeb49ae..8a648097ac8 100644
--- a/python/cudf/setup.cfg
+++ b/python/cudf/setup.cfg
@@ -25,6 +25,7 @@ known_dask=
     dask_cuda
 known_rapids=
     rmm
+    strings_udf
 known_first_party=
     cudf
 default_section=THIRDPARTY
@@ -41,4 +42,4 @@ skip=
     buck-out
     build
     dist
-    __init__.py
\ No newline at end of file
+    __init__.py
diff --git a/python/cudf/setup.py b/python/cudf/setup.py
index cfc31fccc03..93948afc0f6 100644
--- a/python/cudf/setup.py
+++ b/python/cudf/setup.py
@@ -7,12 +7,11 @@
 import sys
 from distutils.spawn import find_executable
 
+import versioneer
 from setuptools import find_packages
 from skbuild import setup
 from skbuild.command.build_ext import build_ext
 
-import versioneer
-
 install_requires = [
     "cachetools",
     "cuda-python>=11.5,<11.7.1",
@@ -21,7 +20,7 @@
     "numpy",
     "nvtx>=0.2.1",
     "packaging",
-    "pandas>=1.0,<1.5.0dev0",
+    "pandas>=1.0,<1.6.0dev0",
     "protobuf>=3.20.1,<3.21.0a0",
     "typing_extensions",
 ]
diff --git a/python/cudf_kafka/setup.py b/python/cudf_kafka/setup.py
index 6416bfb550d..aafb5630b94 100644
--- a/python/cudf_kafka/setup.py
+++ b/python/cudf_kafka/setup.py
@@ -5,12 +5,11 @@
 from distutils.sysconfig import get_python_lib
 
 import numpy as np
+import versioneer
 from Cython.Build import cythonize
 from setuptools import find_packages, setup
 from setuptools.extension import Extension
 
-import versioneer
-
 install_requires = ["cudf", "cython"]
 
 extras_require = {"test": ["pytest", "pytest-xdist"]}
diff --git a/python/custreamz/setup.py b/python/custreamz/setup.py
index 37a45729921..911f83ff6bd 100644
--- a/python/custreamz/setup.py
+++ b/python/custreamz/setup.py
@@ -1,8 +1,7 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
-from setuptools import find_packages, setup
-
 import versioneer
+from setuptools import find_packages, setup
 
 install_requires = ["cudf_kafka", "cudf"]
 
diff --git a/python/dask_cudf/dask_cudf/groupby.py b/python/dask_cudf/dask_cudf/groupby.py
index 764b05f149f..159602f195a 100644
--- a/python/dask_cudf/dask_cudf/groupby.py
+++ b/python/dask_cudf/dask_cudf/groupby.py
@@ -57,10 +57,10 @@ def wrapper(*args, **kwargs):
 
 class CudfDataFrameGroupBy(DataFrameGroupBy):
     @_dask_cudf_nvtx_annotate
-    def __init__(self, *args, **kwargs):
+    def __init__(self, *args, sort=None, **kwargs):
         self.sep = kwargs.pop("sep", "___")
         self.as_index = kwargs.pop("as_index", True)
-        super().__init__(*args, **kwargs)
+        super().__init__(*args, sort=sort, **kwargs)
 
     @_dask_cudf_nvtx_annotate
     def __getitem__(self, key):
@@ -280,10 +280,10 @@ def aggregate(self, arg, split_every=None, split_out=1, shuffle=None):
 
 class CudfSeriesGroupBy(SeriesGroupBy):
     @_dask_cudf_nvtx_annotate
-    def __init__(self, *args, **kwargs):
+    def __init__(self, *args, sort=None, **kwargs):
         self.sep = kwargs.pop("sep", "___")
         self.as_index = kwargs.pop("as_index", True)
-        super().__init__(*args, **kwargs)
+        super().__init__(*args, sort=sort, **kwargs)
 
     @_dask_cudf_nvtx_annotate
     @_check_groupby_supported
diff --git a/python/dask_cudf/dask_cudf/io/json.py b/python/dask_cudf/dask_cudf/io/json.py
index 266693dafe2..6c3c95d1a2e 100644
--- a/python/dask_cudf/dask_cudf/io/json.py
+++ b/python/dask_cudf/dask_cudf/io/json.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+
 from functools import partial
 
 import dask
diff --git a/python/dask_cudf/dask_cudf/io/tests/test_csv.py b/python/dask_cudf/dask_cudf/io/tests/test_csv.py
index 32960a90bd7..564a719fb86 100644
--- a/python/dask_cudf/dask_cudf/io/tests/test_csv.py
+++ b/python/dask_cudf/dask_cudf/io/tests/test_csv.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+
 import gzip
 import os
 import warnings
diff --git a/python/dask_cudf/dask_cudf/io/tests/test_json.py b/python/dask_cudf/dask_cudf/io/tests/test_json.py
index 3a1e98feb31..3f854bb343b 100644
--- a/python/dask_cudf/dask_cudf/io/tests/test_json.py
+++ b/python/dask_cudf/dask_cudf/io/tests/test_json.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+
 import os
 
 import pandas as pd
diff --git a/python/dask_cudf/dask_cudf/tests/test_delayed_io.py b/python/dask_cudf/dask_cudf/tests/test_delayed_io.py
index 7789664afae..6c68d92a8df 100644
--- a/python/dask_cudf/dask_cudf/tests/test_delayed_io.py
+++ b/python/dask_cudf/dask_cudf/tests/test_delayed_io.py
@@ -1,6 +1,9 @@
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+
 """
 Test IO with dask.delayed API
 """
+
 import numpy as np
 import pytest
 from pandas.testing import assert_frame_equal
diff --git a/python/dask_cudf/dask_cudf/tests/test_dispatch.py b/python/dask_cudf/dask_cudf/tests/test_dispatch.py
index 6bf4b956404..5d17a605577 100644
--- a/python/dask_cudf/dask_cudf/tests/test_dispatch.py
+++ b/python/dask_cudf/dask_cudf/tests/test_dispatch.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+
 import pandas as pd
 
 from dask.dataframe.methods import is_categorical_dtype
diff --git a/python/dask_cudf/dask_cudf/tests/test_groupby.py b/python/dask_cudf/dask_cudf/tests/test_groupby.py
index a8e9e8e92aa..cc27c7f2a86 100644
--- a/python/dask_cudf/dask_cudf/tests/test_groupby.py
+++ b/python/dask_cudf/dask_cudf/tests/test_groupby.py
@@ -577,14 +577,18 @@ def test_groupby_categorical_key():
     got = gddf.groupby("name", sort=True).agg(
         {"x": ["mean", "max"], "y": ["mean", "count"]}
     )
-    expect = ddf.groupby("name", sort=True).agg(
-        {"x": ["mean", "max"], "y": ["mean", "count"]}
+    # Use `compute` to avoid upstream issue for now
+    # (See: https://github.com/dask/dask/issues/9515)
+    expect = (
+        ddf.compute()
+        .groupby("name", sort=True)
+        .agg({"x": ["mean", "max"], "y": ["mean", "count"]})
     )
     dd.assert_eq(expect, got)
 
 
 @pytest.mark.parametrize("as_index", [True, False])
-@pytest.mark.parametrize("split_out", [None, 1, 2])
+@pytest.mark.parametrize("split_out", ["use_dask_default", 1, 2])
 @pytest.mark.parametrize("split_every", [False, 4])
 @pytest.mark.parametrize("npartitions", [1, 10])
 def test_groupby_agg_params(npartitions, split_every, split_out, as_index):
@@ -602,14 +606,17 @@ def test_groupby_agg_params(npartitions, split_every, split_out, as_index):
         "c": ["mean", "std", "var"],
     }
 
+    split_kwargs = {"split_every": split_every, "split_out": split_out}
+    if split_out == "use_dask_default":
+        split_kwargs.pop("split_out")
+
     # Check `sort=True` behavior
     if split_out == 1:
         gf = (
             ddf.groupby(["name", "a"], sort=True, as_index=as_index)
             .aggregate(
                 agg_dict,
-                split_every=split_every,
-                split_out=split_out,
+                **split_kwargs,
             )
             .compute()
         )
@@ -630,13 +637,11 @@ def test_groupby_agg_params(npartitions, split_every, split_out, as_index):
     # Full check (`sort=False`)
     gr = ddf.groupby(["name", "a"], sort=False, as_index=as_index).aggregate(
         agg_dict,
-        split_every=split_every,
-        split_out=split_out,
+        **split_kwargs,
     )
     pr = pddf.groupby(["name", "a"], sort=False).agg(
         agg_dict,
-        split_every=split_every,
-        split_out=split_out,
+        **split_kwargs,
     )
 
     # Test `as_index` argument
@@ -648,7 +653,9 @@ def test_groupby_agg_params(npartitions, split_every, split_out, as_index):
         assert ("name", "") in gr.columns and ("a", "") in gr.columns
 
     # Check `split_out` argument
-    assert gr.npartitions == (split_out or 1)
+    assert gr.npartitions == (
+        1 if split_out == "use_dask_default" else split_out
+    )
 
     # Compute for easier multiindex handling
     gf = gr.compute()
diff --git a/python/dask_cudf/dask_cudf/tests/test_join.py b/python/dask_cudf/dask_cudf/tests/test_join.py
index 8b2d85c59d7..15e383ac4d9 100644
--- a/python/dask_cudf/dask_cudf/tests/test_join.py
+++ b/python/dask_cudf/dask_cudf/tests/test_join.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+
 from functools import partial
 
 import numpy as np
diff --git a/python/dask_cudf/dask_cudf/tests/test_sort.py b/python/dask_cudf/dask_cudf/tests/test_sort.py
index 0b258dd33e7..770a52316b6 100644
--- a/python/dask_cudf/dask_cudf/tests/test_sort.py
+++ b/python/dask_cudf/dask_cudf/tests/test_sort.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+
 import cupy as cp
 import numpy as np
 import pytest
diff --git a/python/dask_cudf/dask_cudf/tests/test_struct.py b/python/dask_cudf/dask_cudf/tests/test_struct.py
index 4164c683b51..6abac4cf53b 100644
--- a/python/dask_cudf/dask_cudf/tests/test_struct.py
+++ b/python/dask_cudf/dask_cudf/tests/test_struct.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+
 import pytest
 
 import cudf
diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py
index 0477fea8ee9..107fd5e7d33 100644
--- a/python/dask_cudf/setup.py
+++ b/python/dask_cudf/setup.py
@@ -4,9 +4,8 @@
 import re
 import shutil
 
-from setuptools import find_packages, setup
-
 import versioneer
+from setuptools import find_packages, setup
 
 install_requires = [
     "cudf",
@@ -14,13 +13,13 @@
     "distributed>=2022.7.1",
     "fsspec>=0.6.0",
     "numpy",
-    "pandas>=1.0,<1.5.0dev0",
+    "pandas>=1.0,<1.6.0dev0",
 ]
 
 extras_require = {
     "test": [
         "numpy",
-        "pandas>=1.0,<1.5.0dev0",
+        "pandas>=1.0,<1.6.0dev0",
         "pytest",
         "numba>=0.56.2",
         "dask>=2021.09.1",
diff --git a/python/strings_udf/CMakeLists.txt b/python/strings_udf/CMakeLists.txt
new file mode 100644
index 00000000000..59d8ae795f2
--- /dev/null
+++ b/python/strings_udf/CMakeLists.txt
@@ -0,0 +1,43 @@
+# =============================================================================
+# Copyright (c) 2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+cmake_minimum_required(VERSION 3.20.1 FATAL_ERROR)
+
+set(strings_udf_version 22.10.00)
+
+include(../../fetch_rapids.cmake)
+
+project(
+  strings-udf-python
+  VERSION ${strings_udf_version}
+  LANGUAGES CXX
+            # TODO: Building Python extension modules via the python_extension_module requires the C
+            # language to be enabled here. The test project that is built in scikit-build to verify
+            # various linking options for the python library is hardcoded to build with C, so until
+            # that is fixed we need to keep C.
+            C
+            # TODO: Enabling CUDA will not be necessary once we upgrade to CMake 3.22, which will
+            # pull in the required languages for the C++ project even if this project does not
+            # require those languges.
+            CUDA
+)
+
+find_package(cudf ${strings_udf_version} REQUIRED)
+
+add_subdirectory(cpp)
+
+include(rapids-cython)
+rapids_cython_init()
+
+add_subdirectory(strings_udf/_lib)
diff --git a/python/strings_udf/cpp/CMakeLists.txt b/python/strings_udf/cpp/CMakeLists.txt
new file mode 100644
index 00000000000..d157acfefde
--- /dev/null
+++ b/python/strings_udf/cpp/CMakeLists.txt
@@ -0,0 +1,111 @@
+# =============================================================================
+# Copyright (c) 2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+cmake_minimum_required(VERSION 3.20.1)
+
+include(rapids-cmake)
+include(rapids-cpm)
+include(rapids-cuda)
+include(rapids-find)
+
+rapids_cpm_init()
+
+rapids_cuda_init_architectures(STRINGS_UDF)
+
+# Create a project so that we can enable CUDA architectures in this file.
+project(
+  strings-udf-cpp
+  VERSION ${strings_udf_version}
+  LANGUAGES CUDA
+)
+
+rapids_find_package(
+  CUDAToolkit REQUIRED
+  BUILD_EXPORT_SET strings-udf-exports
+  INSTALL_EXPORT_SET strings-udf-exports
+)
+
+include(${rapids-cmake-dir}/cpm/libcudacxx.cmake)
+rapids_cpm_libcudacxx(BUILD_EXPORT_SET strings-udf-exports INSTALL_EXPORT_SET strings-udf-exports)
+
+add_library(cudf_strings_udf SHARED src/strings/udf/udf_apis.cu)
+target_include_directories(
+  cudf_strings_udf PUBLIC "$<BUILD_INTERFACE:${strings-udf-cpp_SOURCE_DIR}/include>"
+)
+
+set_target_properties(
+  cudf_strings_udf
+  PROPERTIES BUILD_RPATH "\$ORIGIN"
+             INSTALL_RPATH "\$ORIGIN"
+             CXX_STANDARD 17
+             CXX_STANDARD_REQUIRED ON
+             CUDA_STANDARD 17
+             CUDA_STANDARD_REQUIRED ON
+             POSITION_INDEPENDENT_CODE ON
+             INTERFACE_POSITION_INDEPENDENT_CODE ON
+)
+
+set(UDF_CXX_FLAGS)
+set(UDF_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr)
+target_compile_options(
+  cudf_strings_udf PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${UDF_CXX_FLAGS}>"
+                           "$<$<COMPILE_LANGUAGE:CUDA>:${UDF_CUDA_FLAGS}>"
+)
+target_link_libraries(cudf_strings_udf PUBLIC cudf::cudf CUDA::nvrtc)
+install(TARGETS cudf_strings_udf DESTINATION ./strings_udf/_lib/)
+
+# This function will copy the generated PTX file from its generator-specific location in the build
+# tree into a specified location in the build tree from which we can install it.
+function(copy_ptx_to_location target destination)
+  set(cmake_generated_file
+      "${CMAKE_CURRENT_BINARY_DIR}/cmake/cp_${target}_$<LOWER_CASE:$<CONFIG>>_ptx.cmake"
+  )
+  file(
+    GENERATE
+    OUTPUT "${cmake_generated_file}"
+    CONTENT
+      "
+set(ptx_paths \"$<TARGET_OBJECTS:${target}>\")
+file(COPY \${ptx_paths} DESTINATION \"${destination}\")"
+  )
+
+  add_custom_target(
+    ${target}_cp_ptx ALL
+    COMMAND ${CMAKE_COMMAND} -P "${cmake_generated_file}"
+    DEPENDS $<TARGET_OBJECTS:${target}>
+    COMMENT "Copying PTX files to '${destination}'"
+  )
+endfunction()
+
+# Create the shim library for each architecture.
+set(SHIM_CUDA_FLAGS --expt-relaxed-constexpr -rdc=true)
+
+foreach(arch IN LISTS CMAKE_CUDA_ARCHITECTURES)
+  set(tgt shim_${arch})
+
+  add_library(${tgt} OBJECT src/strings/udf/shim.cu)
+
+  set_target_properties(${tgt} PROPERTIES CUDA_ARCHITECTURES ${arch} CUDA_PTX_COMPILATION ON)
+
+  target_include_directories(${tgt} PUBLIC include)
+  target_compile_options(${tgt} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${SHIM_CUDA_FLAGS}>")
+  target_link_libraries(${tgt} PUBLIC cudf::cudf)
+
+  copy_ptx_to_location(${tgt} "${CMAKE_CURRENT_BINARY_DIR}/../strings_udf")
+  install(
+    FILES $<TARGET_OBJECTS:${tgt}>
+    DESTINATION ./strings_udf
+    RENAME ${tgt}.ptx
+  )
+endforeach()
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/char_types.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/char_types.cuh
new file mode 100644
index 00000000000..e28111fd1f2
--- /dev/null
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/char_types.cuh
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cudf/strings/char_types/char_types_enum.hpp>
+#include <cudf/strings/detail/char_tables.hpp>
+#include <cudf/strings/detail/utf8.hpp>
+#include <cudf/strings/string_view.cuh>
+
+namespace cudf {
+namespace strings {
+namespace udf {
+
+/**
+ * @brief Returns true if all characters in the string are of the type specified.
+ *
+ * The output will be false if the string is empty or has at least one character
+ * not of the specified type. If all characters fit the type then true is returned.
+ *
+ * To ignore all but specific types, set the `verify_types` to those types
+ * which should be checked. Otherwise, the default `ALL_TYPES` will verify all
+ * characters match `types`.
+ *
+ * @code{.pseudo}
+ * Examples:
+ * s = ['ab', 'a b', 'a7', 'a B']
+ * all_characters_of_type('ab', LOWER) => true
+ * all_characters_of_type('a b', LOWER) => false
+ * all_characters_of_type('a7b', LOWER) => false
+ * all_characters_of_type('aB', LOWER) => false
+ * all_characters_of_type('ab', LOWER, LOWER|UPPER) => true
+ * all_characters_of_type('a b', LOWER, LOWER|UPPER) => true
+ * all_characters_of_type('a7', LOWER, LOWER|UPPER) => true
+ * all_characters_of_type('a B', LOWER, LOWER|UPPER) => false
+ * @endcode
+ *
+ * @param flags_table Table of character-type flags
+ * @param d_str String for this operation
+ * @param types The character types to check in the string
+ * @param verify_types Only verify against these character types.
+ *                     Default `ALL_TYPES` means return `true`
+ *                     iff all characters match `types`.
+ * @return True if all characters match the type conditions
+ */
+__device__ inline bool all_characters_of_type(
+  cudf::strings::detail::character_flags_table_type* flags_table,
+  string_view d_str,
+  string_character_types types,
+  string_character_types verify_types = string_character_types::ALL_TYPES)
+{
+  bool check            = !d_str.empty();  // require at least one character
+  size_type check_count = 0;
+  for (auto itr = d_str.begin(); check && (itr != d_str.end()); ++itr) {
+    auto code_point = cudf::strings::detail::utf8_to_codepoint(*itr);
+    // lookup flags in table by code-point
+    auto flag = code_point <= 0x00FFFF ? flags_table[code_point] : 0;
+    if ((verify_types & flag) ||                   // should flag be verified
+        (flag == 0 && verify_types == ALL_TYPES))  // special edge case
+    {
+      check = (types & flag) > 0;
+      ++check_count;
+    }
+  }
+  return check && (check_count > 0);
+}
+
+/**
+ * @brief Returns true if all characters are alphabetic only
+ *
+ * @param flags_table Table required for checking character types
+ * @param d_str Input string to check
+ * @return True if characters alphabetic
+ */
+__device__ inline bool is_alpha(cudf::strings::detail::character_flags_table_type* flags_table,
+                                string_view d_str)
+{
+  return all_characters_of_type(flags_table, d_str, string_character_types::ALPHA);
+}
+
+/**
+ * @brief Returns true if all characters are alphanumeric only
+ *
+ * @param flags_table Table required for checking character types
+ * @param d_str Input string to check
+ * @return True if characters are alphanumeric
+ */
+__device__ inline bool is_alpha_numeric(
+  cudf::strings::detail::character_flags_table_type* flags_table, string_view d_str)
+{
+  return all_characters_of_type(flags_table, d_str, string_character_types::ALPHANUM);
+}
+
+/**
+ * @brief Returns true if all characters are numeric only
+ *
+ * @param flags_table Table required for checking character types
+ * @param d_str Input string to check
+ * @return True if characters are numeric
+ */
+__device__ inline bool is_numeric(cudf::strings::detail::character_flags_table_type* flags_table,
+                                  string_view d_str)
+{
+  return all_characters_of_type(flags_table, d_str, string_character_types::NUMERIC);
+}
+
+/**
+ * @brief Returns true if all characters are digits only
+ *
+ * @param flags_table Table required for checking character types
+ * @param d_str Input string to check
+ * @return True if characters are digits
+ */
+__device__ inline bool is_digit(cudf::strings::detail::character_flags_table_type* flags_table,
+                                string_view d_str)
+{
+  return all_characters_of_type(flags_table, d_str, string_character_types::DIGIT);
+}
+
+/**
+ * @brief Returns true if all characters are decimal only
+ *
+ * @param flags_table Table required for checking character types
+ * @param d_str Input string to check
+ * @return True if characters are decimal
+ */
+__device__ inline bool is_decimal(cudf::strings::detail::character_flags_table_type* flags_table,
+                                  string_view d_str)
+{
+  return all_characters_of_type(flags_table, d_str, string_character_types::DECIMAL);
+}
+
+/**
+ * @brief Returns true if all characters are spaces only
+ *
+ * @param flags_table Table required for checking character types
+ * @param d_str Input string to check
+ * @return True if characters spaces
+ */
+__device__ inline bool is_space(cudf::strings::detail::character_flags_table_type* flags_table,
+                                string_view d_str)
+{
+  return all_characters_of_type(flags_table, d_str, string_character_types::SPACE);
+}
+
+/**
+ * @brief Returns true if all characters are upper case only
+ *
+ * @param flags_table Table required for checking character types
+ * @param d_str Input string to check
+ * @return True if characters are upper case
+ */
+__device__ inline bool is_upper(cudf::strings::detail::character_flags_table_type* flags_table,
+                                string_view d_str)
+{
+  return all_characters_of_type(
+    flags_table, d_str, string_character_types::UPPER, string_character_types::CASE_TYPES);
+}
+
+/**
+ * @brief Returns true if all characters are lower case only
+ *
+ * @param flags_table Table required for checking character types
+ * @param d_str Input string to check
+ * @return True if characters are lower case
+ */
+__device__ inline bool is_lower(cudf::strings::detail::character_flags_table_type* flags_table,
+                                string_view d_str)
+{
+  return all_characters_of_type(
+    flags_table, d_str, string_character_types::LOWER, string_character_types::CASE_TYPES);
+}
+
+}  // namespace udf
+}  // namespace strings
+}  // namespace cudf
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/search.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/search.cuh
new file mode 100644
index 00000000000..ef15886f1f5
--- /dev/null
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/search.cuh
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cudf/strings/string_view.cuh>
+
+namespace cudf {
+namespace strings {
+namespace udf {
+
+/**
+ * @brief Returns the number of times that the target string appears
+ * in the source string.
+ *
+ * If `start <= 0` the search begins at the beginning of the `source` string.
+ * If `end <=0` or `end` is greater the length of the `source` string,
+ * the search stops at the end of the string.
+ *
+ * @param source Source string to search
+ * @param target String to match within source
+ * @param start First character position within source to start the search
+ * @param end Last character position (exclusive) within source to search
+ * @return Number of matches
+ */
+__device__ inline cudf::size_type count(string_view const source,
+                                        string_view const target,
+                                        cudf::size_type start = 0,
+                                        cudf::size_type end   = -1)
+{
+  auto const tgt_length = target.length();
+  auto const src_length = source.length();
+
+  start = start < 0 ? 0 : start;
+  end   = (end < 0 || end > src_length) ? src_length : end;
+
+  if (tgt_length == 0) { return (end - start) + 1; }
+  cudf::size_type count = 0;
+  cudf::size_type pos   = start;
+  while (pos != cudf::string_view::npos) {
+    pos = source.find(target, pos, end - pos);
+    if (pos != cudf::string_view::npos) {
+      ++count;
+      pos += tgt_length;
+    }
+  }
+  return count;
+}
+
+}  // namespace udf
+}  // namespace strings
+}  // namespace cudf
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/starts_with.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/starts_with.cuh
new file mode 100644
index 00000000000..38c609ae505
--- /dev/null
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/starts_with.cuh
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/strings/string_view.cuh>
+
+namespace cudf {
+namespace strings {
+namespace udf {
+
+/**
+ * @brief Returns true if the beginning of the specified string
+ * matches the given character array.
+ *
+ * @param dstr String to check
+ * @param tgt Character array encoded in UTF-8
+ * @param bytes Number of bytes to read from `tgt`
+ * @return true if `tgt` matches the beginning of `dstr`
+ */
+__device__ inline bool starts_with(cudf::string_view const dstr,
+                                   char const* tgt,
+                                   cudf::size_type bytes)
+{
+  if (bytes > dstr.size_bytes()) { return false; }
+  auto const start_str = cudf::string_view{dstr.data(), bytes};
+  return start_str.compare(tgt, bytes) == 0;
+}
+
+/**
+ * @brief Returns true if the beginning of the specified string
+ * matches the given target string.
+ *
+ * @param dstr String to check
+ * @param tgt String to match
+ * @return true if `tgt` matches the beginning of `dstr`
+ */
+__device__ inline bool starts_with(cudf::string_view const dstr, cudf::string_view const& tgt)
+{
+  return starts_with(dstr, tgt.data(), tgt.size_bytes());
+}
+
+/**
+ * @brief Returns true if the end of the specified string
+ * matches the given character array.
+ *
+ * @param dstr String to check
+ * @param tgt Character array encoded in UTF-8
+ * @param bytes Number of bytes to read from `tgt`
+ * @return true if `tgt` matches the end of `dstr`
+ */
+__device__ inline bool ends_with(cudf::string_view const dstr,
+                                 char const* tgt,
+                                 cudf::size_type bytes)
+{
+  if (bytes > dstr.size_bytes()) { return false; }
+  auto const end_str = cudf::string_view{dstr.data() + dstr.size_bytes() - bytes, bytes};
+  return end_str.compare(tgt, bytes) == 0;
+}
+
+/**
+ * @brief Returns true if the end of the specified string
+ * matches the given target` string.
+ *
+ * @param dstr String to check
+ * @param tgt String to match
+ * @return true if `tgt` matches the end of `dstr`
+ */
+__device__ inline bool ends_with(cudf::string_view const dstr, cudf::string_view const& tgt)
+{
+  return ends_with(dstr, tgt.data(), tgt.size_bytes());
+}
+
+}  // namespace udf
+}  // namespace strings
+}  // namespace cudf
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/udf_apis.hpp b/python/strings_udf/cpp/include/cudf/strings/udf/udf_apis.hpp
new file mode 100644
index 00000000000..6de9b91de08
--- /dev/null
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/udf_apis.hpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/column/column_view.hpp>
+
+#include <rmm/device_buffer.hpp>
+
+#include <memory>
+
+namespace cudf {
+namespace strings {
+namespace udf {
+
+/**
+ * @brief Return a cudf::string_view array for the given strings column
+ *
+ * @param input Strings column to convert to a string_view array.
+ * @throw cudf::logic_error if input is not a strings column.
+ */
+std::unique_ptr<rmm::device_buffer> to_string_view_array(cudf::column_view const input);
+
+}  // namespace udf
+}  // namespace strings
+}  // namespace cudf
diff --git a/python/strings_udf/cpp/src/strings/udf/shim.cu b/python/strings_udf/cpp/src/strings/udf/shim.cu
new file mode 100644
index 00000000000..656861f9cd6
--- /dev/null
+++ b/python/strings_udf/cpp/src/strings/udf/shim.cu
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/strings/udf/char_types.cuh>
+#include <cudf/strings/udf/search.cuh>
+#include <cudf/strings/udf/starts_with.cuh>
+
+using namespace cudf::strings::udf;
+
+extern "C" __device__ int len(int* nb_retval, void const* str)
+{
+  auto sv    = reinterpret_cast<cudf::string_view const*>(str);
+  *nb_retval = sv->length();
+  return 0;
+}
+
+extern "C" __device__ int startswith(bool* nb_retval, void const* str, void const* substr)
+{
+  auto str_view    = reinterpret_cast<cudf::string_view const*>(str);
+  auto substr_view = reinterpret_cast<cudf::string_view const*>(substr);
+
+  *nb_retval = starts_with(*str_view, *substr_view);
+  return 0;
+}
+
+extern "C" __device__ int endswith(bool* nb_retval, void const* str, void const* substr)
+{
+  auto str_view    = reinterpret_cast<cudf::string_view const*>(str);
+  auto substr_view = reinterpret_cast<cudf::string_view const*>(substr);
+
+  *nb_retval = ends_with(*str_view, *substr_view);
+  return 0;
+}
+
+extern "C" __device__ int contains(bool* nb_retval, void const* str, void const* substr)
+{
+  auto str_view    = reinterpret_cast<cudf::string_view const*>(str);
+  auto substr_view = reinterpret_cast<cudf::string_view const*>(substr);
+
+  *nb_retval = (str_view->find(*substr_view) != cudf::string_view::npos);
+  return 0;
+}
+
+extern "C" __device__ int find(int* nb_retval, void const* str, void const* substr)
+{
+  auto str_view    = reinterpret_cast<cudf::string_view const*>(str);
+  auto substr_view = reinterpret_cast<cudf::string_view const*>(substr);
+
+  *nb_retval = str_view->find(*substr_view);
+  return 0;
+}
+
+extern "C" __device__ int rfind(int* nb_retval, void const* str, void const* substr)
+{
+  auto str_view    = reinterpret_cast<cudf::string_view const*>(str);
+  auto substr_view = reinterpret_cast<cudf::string_view const*>(substr);
+
+  *nb_retval = str_view->rfind(*substr_view);
+  return 0;
+}
+
+extern "C" __device__ int eq(bool* nb_retval, void const* str, void const* rhs)
+{
+  auto str_view = reinterpret_cast<cudf::string_view const*>(str);
+  auto rhs_view = reinterpret_cast<cudf::string_view const*>(rhs);
+
+  *nb_retval = (*str_view == *rhs_view);
+  return 0;
+}
+
+extern "C" __device__ int ne(bool* nb_retval, void const* str, void const* rhs)
+{
+  auto str_view = reinterpret_cast<cudf::string_view const*>(str);
+  auto rhs_view = reinterpret_cast<cudf::string_view const*>(rhs);
+
+  *nb_retval = (*str_view != *rhs_view);
+  return 0;
+}
+
+extern "C" __device__ int ge(bool* nb_retval, void const* str, void const* rhs)
+{
+  auto str_view = reinterpret_cast<cudf::string_view const*>(str);
+  auto rhs_view = reinterpret_cast<cudf::string_view const*>(rhs);
+
+  *nb_retval = (*str_view >= *rhs_view);
+  return 0;
+}
+
+extern "C" __device__ int le(bool* nb_retval, void const* str, void const* rhs)
+{
+  auto str_view = reinterpret_cast<cudf::string_view const*>(str);
+  auto rhs_view = reinterpret_cast<cudf::string_view const*>(rhs);
+
+  *nb_retval = (*str_view <= *rhs_view);
+  return 0;
+}
+
+extern "C" __device__ int gt(bool* nb_retval, void const* str, void const* rhs)
+{
+  auto str_view = reinterpret_cast<cudf::string_view const*>(str);
+  auto rhs_view = reinterpret_cast<cudf::string_view const*>(rhs);
+
+  *nb_retval = (*str_view > *rhs_view);
+  return 0;
+}
+
+extern "C" __device__ int lt(bool* nb_retval, void const* str, void const* rhs)
+{
+  auto str_view = reinterpret_cast<cudf::string_view const*>(str);
+  auto rhs_view = reinterpret_cast<cudf::string_view const*>(rhs);
+
+  *nb_retval = (*str_view < *rhs_view);
+  return 0;
+}
+
+extern "C" __device__ int pyislower(bool* nb_retval, void const* str, std::int64_t chars_table)
+{
+  auto str_view = reinterpret_cast<cudf::string_view const*>(str);
+
+  *nb_retval = is_lower(
+    reinterpret_cast<cudf::strings::detail::character_flags_table_type*>(chars_table), *str_view);
+  return 0;
+}
+
+extern "C" __device__ int pyisupper(bool* nb_retval, void const* str, std::int64_t chars_table)
+{
+  auto str_view = reinterpret_cast<cudf::string_view const*>(str);
+
+  *nb_retval = is_upper(
+    reinterpret_cast<cudf::strings::detail::character_flags_table_type*>(chars_table), *str_view);
+  return 0;
+}
+
+extern "C" __device__ int pyisspace(bool* nb_retval, void const* str, std::int64_t chars_table)
+{
+  auto str_view = reinterpret_cast<cudf::string_view const*>(str);
+
+  *nb_retval = is_space(
+    reinterpret_cast<cudf::strings::detail::character_flags_table_type*>(chars_table), *str_view);
+  return 0;
+}
+
+extern "C" __device__ int pyisdecimal(bool* nb_retval, void const* str, std::int64_t chars_table)
+{
+  auto str_view = reinterpret_cast<cudf::string_view const*>(str);
+
+  *nb_retval = is_decimal(
+    reinterpret_cast<cudf::strings::detail::character_flags_table_type*>(chars_table), *str_view);
+  return 0;
+}
+
+extern "C" __device__ int pyisnumeric(bool* nb_retval, void const* str, std::int64_t chars_table)
+{
+  auto str_view = reinterpret_cast<cudf::string_view const*>(str);
+
+  *nb_retval = is_numeric(
+    reinterpret_cast<cudf::strings::detail::character_flags_table_type*>(chars_table), *str_view);
+  return 0;
+}
+
+extern "C" __device__ int pyisdigit(bool* nb_retval, void const* str, std::int64_t chars_table)
+{
+  auto str_view = reinterpret_cast<cudf::string_view const*>(str);
+
+  *nb_retval = is_digit(
+    reinterpret_cast<cudf::strings::detail::character_flags_table_type*>(chars_table), *str_view);
+  return 0;
+}
+
+extern "C" __device__ int pyisalnum(bool* nb_retval, void const* str, std::int64_t chars_table)
+{
+  auto str_view = reinterpret_cast<cudf::string_view const*>(str);
+
+  *nb_retval = is_alpha_numeric(
+    reinterpret_cast<cudf::strings::detail::character_flags_table_type*>(chars_table), *str_view);
+  return 0;
+}
+
+extern "C" __device__ int pyisalpha(bool* nb_retval, void const* str, std::int64_t chars_table)
+{
+  auto str_view = reinterpret_cast<cudf::string_view const*>(str);
+
+  *nb_retval = is_alpha(
+    reinterpret_cast<cudf::strings::detail::character_flags_table_type*>(chars_table), *str_view);
+  return 0;
+}
+
+extern "C" __device__ int pycount(int* nb_retval, void const* str, void const* substr)
+{
+  auto str_view    = reinterpret_cast<cudf::string_view const*>(str);
+  auto substr_view = reinterpret_cast<cudf::string_view const*>(substr);
+
+  *nb_retval = count(*str_view, *substr_view);
+  return 0;
+}
diff --git a/python/strings_udf/cpp/src/strings/udf/udf_apis.cu b/python/strings_udf/cpp/src/strings/udf/udf_apis.cu
new file mode 100644
index 00000000000..dfef1be39f5
--- /dev/null
+++ b/python/strings_udf/cpp/src/strings/udf/udf_apis.cu
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/strings/udf/udf_apis.hpp>
+
+#include <cudf/column/column_factories.hpp>
+#include <cudf/strings/detail/utilities.hpp>
+#include <cudf/strings/string_view.cuh>
+
+#include <rmm/device_uvector.hpp>
+
+namespace cudf {
+namespace strings {
+namespace udf {
+namespace detail {
+
+std::unique_ptr<rmm::device_buffer> to_string_view_array(cudf::column_view const input,
+                                                         rmm::cuda_stream_view stream)
+{
+  return std::make_unique<rmm::device_buffer>(
+    std::move(cudf::strings::detail::create_string_vector_from_column(
+                cudf::strings_column_view(input), stream)
+                .release()));
+}
+
+}  // namespace detail
+
+std::unique_ptr<rmm::device_buffer> to_string_view_array(cudf::column_view const input)
+{
+  return detail::to_string_view_array(input, rmm::cuda_stream_default);
+}
+
+}  // namespace udf
+}  // namespace strings
+}  // namespace cudf
diff --git a/python/strings_udf/setup.cfg b/python/strings_udf/setup.cfg
new file mode 100644
index 00000000000..9f29b26b5e0
--- /dev/null
+++ b/python/strings_udf/setup.cfg
@@ -0,0 +1,41 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+[versioneer]
+VCS = git
+style = pep440
+versionfile_source = strings_udf/_version.py
+versionfile_build = strings_udf/_version.py
+tag_prefix = v
+parentdir_prefix = strings_udf-
+
+[isort]
+line_length=79
+multi_line_output=3
+include_trailing_comma=True
+force_grid_wrap=0
+combine_as_imports=True
+order_by_type=True
+known_dask=
+    dask
+    distributed
+    dask_cuda
+known_rapids=
+    rmm
+    cudf
+known_first_party=
+    strings_udf
+default_section=THIRDPARTY
+sections=FUTURE,STDLIB,THIRDPARTY,DASK,RAPIDS,FIRSTPARTY,LOCALFOLDER
+skip=
+    thirdparty
+    .eggs
+    .git
+    .hg
+    .mypy_cache
+    .tox
+    .venv
+    _build
+    buck-out
+    build
+    dist
+    __init__.py
diff --git a/python/strings_udf/setup.py b/python/strings_udf/setup.py
new file mode 100644
index 00000000000..c8cafe978f7
--- /dev/null
+++ b/python/strings_udf/setup.py
@@ -0,0 +1,81 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+import os
+import re
+import shutil
+
+import versioneer
+from setuptools import find_packages
+from skbuild import setup
+
+install_requires = ["numba>=0.53.1", "numpy", "cudf"]
+
+extras_require = {
+    "test": [
+        "pytest",
+    ]
+}
+
+
+def get_cuda_version_from_header(cuda_include_dir, delimeter=""):
+
+    cuda_version = None
+
+    with open(os.path.join(cuda_include_dir, "cuda.h"), encoding="utf-8") as f:
+        for line in f.readlines():
+            if re.search(r"#define CUDA_VERSION ", line) is not None:
+                cuda_version = line
+                break
+
+    if cuda_version is None:
+        raise TypeError("CUDA_VERSION not found in cuda.h")
+    cuda_version = int(cuda_version.split()[2])
+    return "%d%s%d" % (
+        cuda_version // 1000,
+        delimeter,
+        (cuda_version % 1000) // 10,
+    )
+
+
+CUDA_HOME = os.environ.get("CUDA_HOME", False)
+if not CUDA_HOME:
+    path_to_cuda_gdb = shutil.which("cuda-gdb")
+    if path_to_cuda_gdb is None:
+        raise OSError(
+            "Could not locate CUDA. "
+            "Please set the environment variable "
+            "CUDA_HOME to the path to the CUDA installation "
+            "and try again."
+        )
+    CUDA_HOME = os.path.dirname(os.path.dirname(path_to_cuda_gdb))
+
+if not os.path.isdir(CUDA_HOME):
+    raise OSError(f"Invalid CUDA_HOME: directory does not exist: {CUDA_HOME}")
+
+cuda_include_dir = os.path.join(CUDA_HOME, "include")
+
+setup(
+    name="strings_udf",
+    version=versioneer.get_version(),
+    description="Strings UDF Library",
+    url="https://github.com/rapidsai/cudf",
+    author="NVIDIA Corporation",
+    license="Apache 2.0",
+    classifiers=[
+        "Intended Audience :: Developers",
+        "Topic :: Database",
+        "Topic :: Scientific/Engineering",
+        "License :: OSI Approved :: Apache Software License",
+        "Programming Language :: Python",
+        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
+    ],
+    packages=find_packages(include=["strings_udf", "strings_udf.*"]),
+    package_data={
+        key: ["*.pxd"] for key in find_packages(include=["strings_udf._lib*"])
+    },
+    cmdclass=versioneer.get_cmdclass(),
+    install_requires=install_requires,
+    extras_require=extras_require,
+    zip_safe=False,
+)
diff --git a/python/strings_udf/strings_udf/__init__.py b/python/strings_udf/strings_udf/__init__.py
new file mode 100644
index 00000000000..94bd2531779
--- /dev/null
+++ b/python/strings_udf/strings_udf/__init__.py
@@ -0,0 +1,75 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+import glob
+import os
+import re
+import subprocess
+import sys
+
+from numba import cuda
+from ptxcompiler.patch import CMD
+
+from . import _version
+
+ENABLED = False
+
+
+def compiler_from_ptx_file(path):
+    """Parse a PTX file header and extract the CUDA version used to compile it.
+
+    Here is an example PTX header that this function should parse:
+
+    // Generated by NVIDIA NVVM Compiler
+    //
+    // Compiler Build ID: CL-30672275
+    // Cuda compilation tools, release 11.5, V11.5.119
+    // Based on NVVM 7
+    """
+    file = open(path).read()
+    major, minor = (
+        re.search(r"Cuda compilation tools, release ([0-9\.]+)", file)
+        .group(1)
+        .split(".")
+    )
+    return int(major), int(minor)
+
+
+# adapted from PTXCompiler
+cp = subprocess.run([sys.executable, "-c", CMD], capture_output=True)
+if cp.returncode == 0:
+    # must have a driver to proceed
+    versions = [int(s) for s in cp.stdout.strip().split()]
+    driver_version = tuple(versions[:2])
+    runtime_version = tuple(versions[2:])
+
+    # CUDA enhanced compatibility not yet enabled
+    if driver_version >= runtime_version:
+        # Load the highest compute capability file available that is less than
+        # the current device's.
+        files = glob.glob(
+            os.path.join(os.path.dirname(__file__), "shim_*.ptx")
+        )
+        dev = cuda.get_current_device()
+        cc = "".join(str(x) for x in dev.compute_capability)
+        files = glob.glob(
+            os.path.join(os.path.dirname(__file__), "shim_*.ptx")
+        )
+        if len(files) == 0:
+            raise RuntimeError(
+                "This strings_udf installation is missing the necessary PTX "
+                "files. Please file an issue reporting this error and how you "
+                "installed cudf and strings_udf."
+            )
+        sms = [
+            os.path.basename(f).rstrip(".ptx").lstrip("shim_") for f in files
+        ]
+        selected_sm = max(sm for sm in sms if sm < cc)
+        ptxpath = os.path.join(
+            os.path.dirname(__file__), f"shim_{selected_sm}.ptx"
+        )
+
+        if driver_version >= compiler_from_ptx_file(ptxpath):
+            ENABLED = True
+        else:
+            del ptxpath
+
+__version__ = _version.get_versions()["version"]
diff --git a/python/strings_udf/strings_udf/_lib/CMakeLists.txt b/python/strings_udf/strings_udf/_lib/CMakeLists.txt
new file mode 100644
index 00000000000..91069a43891
--- /dev/null
+++ b/python/strings_udf/strings_udf/_lib/CMakeLists.txt
@@ -0,0 +1,25 @@
+# =============================================================================
+# Copyright (c) 2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+set(cython_sources cudf_jit_udf.pyx tables.pyx)
+set(linked_libraries cudf::cudf cudf_strings_udf)
+rapids_cython_create_modules(
+  CXX
+  SOURCE_FILES "${cython_sources}"
+  LINKED_LIBRARIES "${linked_libraries}"
+)
+
+foreach(cython_module IN LISTS _RAPIDS_CYTHON_CREATED_TARGETS)
+  set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/cpp")
+endforeach()
diff --git a/python/strings_udf/strings_udf/_lib/__init__.py b/python/strings_udf/strings_udf/_lib/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/strings_udf/strings_udf/_lib/cpp/__init__.pxd b/python/strings_udf/strings_udf/_lib/cpp/__init__.pxd
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd b/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd
new file mode 100644
index 00000000000..fb8e3a949bf
--- /dev/null
+++ b/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd
@@ -0,0 +1,20 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+from libc.stdint cimport uint8_t
+from libcpp.memory cimport unique_ptr
+from libcpp.string cimport string
+from libcpp.vector cimport vector
+
+from cudf._lib.cpp.column.column cimport column
+from cudf._lib.cpp.column.column_view cimport column_view
+from cudf._lib.cpp.types cimport size_type
+from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
+
+
+cdef extern from "cudf/strings/udf/udf_apis.hpp"  namespace \
+        "cudf::strings::udf" nogil:
+    cdef unique_ptr[device_buffer] to_string_view_array(column_view) except +
+
+cdef extern from "cudf/strings/detail/char_tables.hpp" namespace \
+        "cudf::strings::detail" nogil:
+    cdef const uint8_t* get_character_flags_table() except +
diff --git a/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx b/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx
new file mode 100644
index 00000000000..bb1892a4d26
--- /dev/null
+++ b/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx
@@ -0,0 +1,24 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+from libcpp.memory cimport unique_ptr
+from libcpp.utility cimport move
+
+from cudf.core.buffer import Buffer
+
+from cudf._lib.column cimport Column
+from cudf._lib.cpp.column.column cimport column, column_view
+from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
+
+from strings_udf._lib.cpp.strings_udf cimport (
+    to_string_view_array as cpp_to_string_view_array,
+)
+
+
+def to_string_view_array(Column strings_col):
+    cdef unique_ptr[device_buffer] c_buffer
+    cdef column_view input_view = strings_col.view()
+    with nogil:
+        c_buffer = move(cpp_to_string_view_array(input_view))
+
+    device_buffer = DeviceBuffer.c_from_unique_ptr(move(c_buffer))
+    return Buffer(device_buffer)
diff --git a/python/strings_udf/strings_udf/_lib/tables.pyx b/python/strings_udf/strings_udf/_lib/tables.pyx
new file mode 100644
index 00000000000..5443364a4a7
--- /dev/null
+++ b/python/strings_udf/strings_udf/_lib/tables.pyx
@@ -0,0 +1,14 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+from libc.stdint cimport uint8_t, uintptr_t
+
+from strings_udf._lib.cpp.strings_udf cimport (
+    get_character_flags_table as cpp_get_character_flags_table,
+)
+
+import numpy as np
+
+
+def get_character_flags_table_ptr():
+    cdef const uint8_t* tbl_ptr = cpp_get_character_flags_table()
+    return np.int64(<uintptr_t>tbl_ptr)
diff --git a/python/strings_udf/strings_udf/_typing.py b/python/strings_udf/strings_udf/_typing.py
new file mode 100644
index 00000000000..2e4519a01fe
--- /dev/null
+++ b/python/strings_udf/strings_udf/_typing.py
@@ -0,0 +1,229 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+import operator
+
+import llvmlite.binding as ll
+from numba import types
+from numba.core.datamodel import default_manager
+from numba.core.extending import models, register_model
+from numba.core.typing import signature as nb_signature
+from numba.core.typing.templates import AbstractTemplate, AttributeTemplate
+from numba.cuda.cudadecl import registry as cuda_decl_registry
+from numba.cuda.cudadrv import nvvm
+
+data_layout = nvvm.data_layout
+
+# libcudf size_type
+size_type = types.int32
+
+# workaround for numba < 0.56
+if isinstance(data_layout, dict):
+    data_layout = data_layout[64]
+target_data = ll.create_target_data(data_layout)
+
+
+# String object definitions
+class DString(types.Type):
+    def __init__(self):
+        super().__init__(name="dstring")
+        llty = default_manager[self].get_value_type()
+        self.size_bytes = llty.get_abi_size(target_data)
+
+
+class StringView(types.Type):
+    def __init__(self):
+        super().__init__(name="string_view")
+        llty = default_manager[self].get_value_type()
+        self.size_bytes = llty.get_abi_size(target_data)
+
+
+@register_model(StringView)
+class stringview_model(models.StructModel):
+    # from string_view.hpp:
+    _members = (
+        # const char* _data{}
+        # Pointer to device memory contain char array for this string
+        ("data", types.CPointer(types.char)),
+        # size_type _bytes{};
+        # Number of bytes in _data for this string
+        ("bytes", size_type),
+        # mutable size_type _length{};
+        # Number of characters in this string (computed)
+        ("length", size_type),
+    )
+
+    def __init__(self, dmm, fe_type):
+        super().__init__(dmm, fe_type, self._members)
+
+
+@register_model(DString)
+class dstring_model(models.StructModel):
+    # from dstring.hpp:
+    # private:
+    #   char* m_data{};
+    #   cudf::size_type m_bytes{};
+    #   cudf::size_type m_size{};
+
+    _members = (
+        ("m_data", types.CPointer(types.char)),
+        ("m_bytes", size_type),
+        ("m_size", size_type),
+    )
+
+    def __init__(self, dmm, fe_type):
+        super().__init__(dmm, fe_type, self._members)
+
+
+any_string_ty = (StringView, DString, types.StringLiteral)
+string_view = StringView()
+
+
+class StrViewArgHandler:
+    """
+    As part of Numba's preprocessing step, incoming function arguments are
+    modified based on the associated type for that argument that was used
+    to JIT the kernel. However it only knows how to handle built in array
+    types natively. With string UDFs, the jitted type is string_view*,
+    which numba does not know how to handle.
+
+    This class converts string_view* to raw pointer arguments, which Numba
+    knows how to use.
+
+    See numba.cuda.compiler._prepare_args for details.
+    """
+
+    def prepare_args(self, ty, val, **kwargs):
+        if isinstance(ty, types.CPointer) and isinstance(ty.dtype, StringView):
+            return types.uint64, val.ptr
+        else:
+            return ty, val
+
+
+str_view_arg_handler = StrViewArgHandler()
+
+
+# String functions
+@cuda_decl_registry.register_global(len)
+class StringLength(AbstractTemplate):
+    """
+    provide the length of a cudf::string_view like struct
+    """
+
+    def generic(self, args, kws):
+        if isinstance(args[0], any_string_ty) and len(args) == 1:
+            # length:
+            # string_view -> int32
+            # dstring -> int32
+            # literal -> int32
+            return nb_signature(size_type, args[0])
+
+
+def register_stringview_binaryop(op, retty):
+    """
+    Helper function wrapping numba's low level extension API. Provides
+    the boilerplate needed to associate a signature with a function or
+    operator expecting a string.
+    """
+
+    class StringViewBinaryOp(AbstractTemplate):
+        def generic(self, args, kws):
+            if isinstance(args[0], any_string_ty) and isinstance(
+                args[1], any_string_ty
+            ):
+                return nb_signature(retty, string_view, string_view)
+
+    cuda_decl_registry.register_global(op)(StringViewBinaryOp)
+
+
+register_stringview_binaryop(operator.eq, types.boolean)
+register_stringview_binaryop(operator.ne, types.boolean)
+register_stringview_binaryop(operator.lt, types.boolean)
+register_stringview_binaryop(operator.gt, types.boolean)
+register_stringview_binaryop(operator.le, types.boolean)
+register_stringview_binaryop(operator.ge, types.boolean)
+register_stringview_binaryop(operator.contains, types.boolean)
+
+
+def create_binary_attr(attrname, retty):
+    """
+    Helper function wrapping numba's low level extension API. Provides
+    the boilerplate needed to register a binary function of two string
+    objects as an attribute of one, e.g. `string.func(other)`.
+    """
+
+    class StringViewBinaryAttr(AbstractTemplate):
+        key = f"StringView.{attrname}"
+
+        def generic(self, args, kws):
+            return nb_signature(retty, string_view, recvr=self.this)
+
+    def attr(self, mod):
+        return types.BoundFunction(StringViewBinaryAttr, string_view)
+
+    return attr
+
+
+def create_identifier_attr(attrname):
+    """
+    Helper function wrapping numba's low level extension API. Provides
+    the boilerplate needed to register a unary function of a string
+    object as an attribute, e.g. `string.func()`.
+    """
+
+    class StringViewIdentifierAttr(AbstractTemplate):
+        key = f"StringView.{attrname}"
+
+        def generic(self, args, kws):
+            return nb_signature(types.boolean, recvr=self.this)
+
+    def attr(self, mod):
+        return types.BoundFunction(StringViewIdentifierAttr, string_view)
+
+    return attr
+
+
+class StringViewCount(AbstractTemplate):
+    key = "StringView.count"
+
+    def generic(self, args, kws):
+        return nb_signature(size_type, string_view, recvr=self.this)
+
+
+@cuda_decl_registry.register_attr
+class StringViewAttrs(AttributeTemplate):
+    key = string_view
+
+    def resolve_count(self, mod):
+        return types.BoundFunction(StringViewCount, string_view)
+
+
+# Build attributes for `MaskedType(string_view)`
+bool_binary_funcs = ["startswith", "endswith"]
+int_binary_funcs = ["find", "rfind"]
+id_unary_funcs = [
+    "isalpha",
+    "isalnum",
+    "isdecimal",
+    "isdigit",
+    "isupper",
+    "islower",
+    "isspace",
+    "isnumeric",
+]
+
+for func in bool_binary_funcs:
+    setattr(
+        StringViewAttrs,
+        f"resolve_{func}",
+        create_binary_attr(func, types.boolean),
+    )
+
+for func in int_binary_funcs:
+    setattr(
+        StringViewAttrs, f"resolve_{func}", create_binary_attr(func, size_type)
+    )
+
+for func in id_unary_funcs:
+    setattr(StringViewAttrs, f"resolve_{func}", create_identifier_attr(func))
+
+cuda_decl_registry.register_attr(StringViewAttrs)
diff --git a/python/strings_udf/strings_udf/_version.py b/python/strings_udf/strings_udf/_version.py
new file mode 100644
index 00000000000..14ff9ec314d
--- /dev/null
+++ b/python/strings_udf/strings_udf/_version.py
@@ -0,0 +1,711 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (built by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+
+# This file is released into the public domain. Generated by
+# versioneer-0.23 (https://github.com/python-versioneer/python-versioneer)
+
+"""Git implementation of _version.py."""
+
+import errno
+import functools
+import os
+import re
+import subprocess
+import sys
+from typing import Callable, Dict
+
+
+def get_keywords():
+    """Get the keywords needed to look up the version information."""
+    # these strings will be replaced by git during git-archive.
+    # setup.py/versioneer.py will grep for the variable names, so they must
+    # each be defined on a line of their own. _version.py will just call
+    # get_keywords().
+    git_refnames = "$Format:%d$"
+    git_full = "$Format:%H$"
+    git_date = "$Format:%ci$"
+    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
+    return keywords
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_config():
+    """Create, populate and return the VersioneerConfig() object."""
+    # these strings are filled in when 'setup.py versioneer' creates
+    # _version.py
+    cfg = VersioneerConfig()
+    cfg.VCS = "git"
+    cfg.style = "pep440"
+    cfg.tag_prefix = "v"
+    cfg.parentdir_prefix = "strings_udf-"
+    cfg.versionfile_source = "strings_udf/_version.py"
+    cfg.verbose = False
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+
+    return decorate
+
+
+def run_command(
+    commands, args, cwd=None, verbose=False, hide_stderr=False, env=None
+):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+
+    popen_kwargs = {}
+    if sys.platform == "win32":
+        # This hides the console window if pythonw.exe is used
+        startupinfo = subprocess.STARTUPINFO()
+        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
+        popen_kwargs["startupinfo"] = startupinfo
+
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen(
+                [command] + args,
+                cwd=cwd,
+                env=env,
+                stdout=subprocess.PIPE,
+                stderr=(subprocess.PIPE if hide_stderr else None),
+                **popen_kwargs,
+            )
+            break
+        except OSError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %s" % dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %s" % (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %s (error)" % dispcmd)
+            print("stdout was %s" % stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {
+                "version": dirname[len(parentdir_prefix) :],
+                "full-revisionid": None,
+                "dirty": False,
+                "error": None,
+                "date": None,
+            }
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print(
+            "Tried directories %s but none started with prefix %s"
+            % (str(rootdirs), parentdir_prefix)
+        )
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r"\d", r)}
+        if verbose:
+            print("discarding '%s', no digits" % ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %s" % ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix) :]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r"\d", r):
+                continue
+            if verbose:
+                print("picking %s" % r)
+            return {
+                "version": r,
+                "full-revisionid": keywords["full"].strip(),
+                "dirty": False,
+                "error": None,
+                "date": date,
+            }
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {
+        "version": "0+unknown",
+        "full-revisionid": keywords["full"].strip(),
+        "dirty": False,
+        "error": "no suitable tags",
+        "date": None,
+    }
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+
+    # GIT_DIR can interfere with correct operation of Versioneer.
+    # It may be intended to be passed to the Versioneer-versioned project,
+    # but that should not change where we get our version from.
+    env = os.environ.copy()
+    env.pop("GIT_DIR", None)
+    runner = functools.partial(runner, env=env)
+
+    _, rc = runner(
+        GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True
+    )
+    if rc != 0:
+        if verbose:
+            print("Directory %s not under git control" % root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(
+        GITS,
+        [
+            "describe",
+            "--tags",
+            "--dirty",
+            "--always",
+            "--long",
+            "--match",
+            f"{tag_prefix}[[:digit:]]*",
+        ],
+        cwd=root,
+    )
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(
+        GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root
+    )
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[: git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = (
+                "unable to parse git-describe output: '%s'" % describe_out
+            )
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%s' doesn't start with prefix '%s'"
+                print(fmt % (full_tag, tag_prefix))
+            pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % (
+                full_tag,
+                tag_prefix,
+            )
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix) :]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
+        pieces["distance"] = len(out.split())  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[
+        0
+    ].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver):
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces):
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(
+                pieces["closest-tag"]
+            )
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%d.dev%d" % (
+                    post_version + 1,
+                    pieces["distance"],
+                )
+            else:
+                rendered += ".post0.dev%d" % (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%d" % pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {
+            "version": "unknown",
+            "full-revisionid": pieces.get("long"),
+            "dirty": None,
+            "error": pieces["error"],
+            "date": None,
+        }
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%s'" % style)
+
+    return {
+        "version": rendered,
+        "full-revisionid": pieces["long"],
+        "dirty": pieces["dirty"],
+        "error": None,
+        "date": pieces.get("date"),
+    }
+
+
+def get_versions():
+    """Get version information or return default if unable to do so."""
+    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
+    # __file__, we can work backwards from there to the root. Some
+    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
+    # case we can only use expanded keywords.
+
+    cfg = get_config()
+    verbose = cfg.verbose
+
+    try:
+        return git_versions_from_keywords(
+            get_keywords(), cfg.tag_prefix, verbose
+        )
+    except NotThisMethod:
+        pass
+
+    try:
+        root = os.path.realpath(__file__)
+        # versionfile_source is the relative path from the top of the source
+        # tree (where the .git directory might live) to this file. Invert
+        # this to find the root from __file__.
+        for _ in cfg.versionfile_source.split("/"):
+            root = os.path.dirname(root)
+    except NameError:
+        return {
+            "version": "0+unknown",
+            "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to find root of source tree",
+            "date": None,
+        }
+
+    try:
+        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+        return render(pieces, cfg.style)
+    except NotThisMethod:
+        pass
+
+    try:
+        if cfg.parentdir_prefix:
+            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+    except NotThisMethod:
+        pass
+
+    return {
+        "version": "0+unknown",
+        "full-revisionid": None,
+        "dirty": None,
+        "error": "unable to compute version",
+        "date": None,
+    }
diff --git a/python/strings_udf/strings_udf/lowering.py b/python/strings_udf/strings_udf/lowering.py
new file mode 100644
index 00000000000..fd965a7a187
--- /dev/null
+++ b/python/strings_udf/strings_udf/lowering.py
@@ -0,0 +1,287 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+import operator
+from functools import partial
+
+from numba import cuda, types
+from numba.core import cgutils
+from numba.core.typing import signature as nb_signature
+from numba.cuda.cudadrv import nvvm
+from numba.cuda.cudaimpl import (
+    lower as cuda_lower,
+    registry as cuda_lowering_registry,
+)
+
+from strings_udf._lib.tables import get_character_flags_table_ptr
+from strings_udf._typing import size_type, string_view
+
+character_flags_table_ptr = get_character_flags_table_ptr()
+
+
+# read-only functions
+# We will provide only one overload for this set of functions, which will
+# expect a string_view. When a literal is encountered, numba will promote it to
+# a string_view whereas when a dstring is encountered, numba will convert it to
+# a view via its native view() method.
+
+_STR_VIEW_PTR = types.CPointer(string_view)
+
+
+# CUDA function declarations
+_string_view_len = cuda.declare_device("len", size_type(_STR_VIEW_PTR))
+
+
+def _declare_binary_func(lhs, rhs, out, name):
+    # Declare a binary function
+    return cuda.declare_device(
+        name,
+        out(lhs, rhs),
+    )
+
+
+# A binary function of the form f(string, string) -> bool
+_declare_bool_str_str_func = partial(
+    _declare_binary_func, _STR_VIEW_PTR, _STR_VIEW_PTR, types.boolean
+)
+
+_declare_size_type_str_str_func = partial(
+    _declare_binary_func, _STR_VIEW_PTR, _STR_VIEW_PTR, size_type
+)
+
+_string_view_contains = _declare_bool_str_str_func("contains")
+_string_view_eq = _declare_bool_str_str_func("eq")
+_string_view_ne = _declare_bool_str_str_func("ne")
+_string_view_ge = _declare_bool_str_str_func("ge")
+_string_view_le = _declare_bool_str_str_func("le")
+_string_view_gt = _declare_bool_str_str_func("gt")
+_string_view_lt = _declare_bool_str_str_func("lt")
+_string_view_startswith = _declare_bool_str_str_func("startswith")
+_string_view_endswith = _declare_bool_str_str_func("endswith")
+_string_view_find = _declare_size_type_str_str_func("find")
+_string_view_rfind = _declare_size_type_str_str_func("rfind")
+_string_view_contains = _declare_bool_str_str_func("contains")
+
+
+# A binary function of the form f(string, int) -> bool
+_declare_bool_str_int_func = partial(
+    _declare_binary_func, _STR_VIEW_PTR, types.int64, types.boolean
+)
+
+
+_string_view_isdigit = _declare_bool_str_int_func("pyisdigit")
+_string_view_isalnum = _declare_bool_str_int_func("pyisalnum")
+_string_view_isalpha = _declare_bool_str_int_func("pyisalpha")
+_string_view_isdecimal = _declare_bool_str_int_func("pyisdecimal")
+_string_view_isnumeric = _declare_bool_str_int_func("pyisnumeric")
+_string_view_isspace = _declare_bool_str_int_func("pyisspace")
+_string_view_isupper = _declare_bool_str_int_func("pyisupper")
+_string_view_islower = _declare_bool_str_int_func("pyislower")
+
+
+_string_view_count = cuda.declare_device(
+    "pycount",
+    size_type(_STR_VIEW_PTR, _STR_VIEW_PTR),
+)
+
+
+# casts
+@cuda_lowering_registry.lower_cast(types.StringLiteral, string_view)
+def cast_string_literal_to_string_view(context, builder, fromty, toty, val):
+    """
+    Cast a literal to a string_view
+    """
+    # create an empty string_view
+    sv = cgutils.create_struct_proxy(string_view)(context, builder)
+
+    # set the empty strview data pointer to point to the literal value
+    s = context.insert_const_string(builder.module, fromty.literal_value)
+    sv.data = context.insert_addrspace_conv(
+        builder, s, nvvm.ADDRSPACE_CONSTANT
+    )
+    sv.length = context.get_constant(size_type, len(fromty.literal_value))
+    sv.bytes = context.get_constant(
+        size_type, len(fromty.literal_value.encode("UTF-8"))
+    )
+
+    return sv._getvalue()
+
+
+# String function implementations
+def call_len_string_view(st):
+    return _string_view_len(st)
+
+
+@cuda_lower(len, string_view)
+def len_impl(context, builder, sig, args):
+    sv_ptr = builder.alloca(args[0].type)
+    builder.store(args[0], sv_ptr)
+    result = context.compile_internal(
+        builder,
+        call_len_string_view,
+        nb_signature(size_type, _STR_VIEW_PTR),
+        (sv_ptr,),
+    )
+
+    return result
+
+
+def create_binary_string_func(binary_func, retty):
+    """
+    Provide a wrapper around numba's low-level extension API which
+    produces the boilerplate needed to implement a binary function
+    of two strings.
+    """
+
+    def deco(cuda_func):
+        @cuda_lower(binary_func, string_view, string_view)
+        def binary_func_impl(context, builder, sig, args):
+            lhs_ptr = builder.alloca(args[0].type)
+            rhs_ptr = builder.alloca(args[1].type)
+
+            builder.store(args[0], lhs_ptr)
+            builder.store(args[1], rhs_ptr)
+            result = context.compile_internal(
+                builder,
+                cuda_func,
+                nb_signature(retty, _STR_VIEW_PTR, _STR_VIEW_PTR),
+                (lhs_ptr, rhs_ptr),
+            )
+
+            return result
+
+        return binary_func_impl
+
+    return deco
+
+
+@create_binary_string_func(operator.contains, types.boolean)
+def contains_impl(st, substr):
+    return _string_view_contains(st, substr)
+
+
+@create_binary_string_func(operator.eq, types.boolean)
+def eq_impl(st, rhs):
+    return _string_view_eq(st, rhs)
+
+
+@create_binary_string_func(operator.ne, types.boolean)
+def ne_impl(st, rhs):
+    return _string_view_ne(st, rhs)
+
+
+@create_binary_string_func(operator.ge, types.boolean)
+def ge_impl(st, rhs):
+    return _string_view_ge(st, rhs)
+
+
+@create_binary_string_func(operator.le, types.boolean)
+def le_impl(st, rhs):
+    return _string_view_le(st, rhs)
+
+
+@create_binary_string_func(operator.gt, types.boolean)
+def gt_impl(st, rhs):
+    return _string_view_gt(st, rhs)
+
+
+@create_binary_string_func(operator.lt, types.boolean)
+def lt_impl(st, rhs):
+    return _string_view_lt(st, rhs)
+
+
+@create_binary_string_func("StringView.startswith", types.boolean)
+def startswith_impl(sv, substr):
+    return _string_view_startswith(sv, substr)
+
+
+@create_binary_string_func("StringView.endswith", types.boolean)
+def endswith_impl(sv, substr):
+    return _string_view_endswith(sv, substr)
+
+
+@create_binary_string_func("StringView.count", size_type)
+def count_impl(st, substr):
+    return _string_view_count(st, substr)
+
+
+@create_binary_string_func("StringView.find", size_type)
+def find_impl(sv, substr):
+    return _string_view_find(sv, substr)
+
+
+@create_binary_string_func("StringView.rfind", size_type)
+def rfind_impl(sv, substr):
+    return _string_view_rfind(sv, substr)
+
+
+def create_unary_identifier_func(id_func):
+    """
+    Provide a wrapper around numba's low-level extension API which
+    produces the boilerplate needed to implement a unary function
+    of a string.
+    """
+
+    def deco(cuda_func):
+        @cuda_lower(id_func, string_view)
+        def id_func_impl(context, builder, sig, args):
+            str_ptr = builder.alloca(args[0].type)
+            builder.store(args[0], str_ptr)
+
+            # Lookup table required for conversion functions
+            # must be resolved at runtime after context initialization,
+            # therefore cannot be a global variable
+            tbl_ptr = context.get_constant(
+                types.int64, character_flags_table_ptr
+            )
+            result = context.compile_internal(
+                builder,
+                cuda_func,
+                nb_signature(types.boolean, _STR_VIEW_PTR, types.int64),
+                (str_ptr, tbl_ptr),
+            )
+
+            return result
+
+        return id_func_impl
+
+    return deco
+
+
+@create_unary_identifier_func("StringView.isdigit")
+def isdigit_impl(st, tbl):
+    return _string_view_isdigit(st, tbl)
+
+
+@create_unary_identifier_func("StringView.isalnum")
+def isalnum_impl(st, tbl):
+    return _string_view_isalnum(st, tbl)
+
+
+@create_unary_identifier_func("StringView.isalpha")
+def isalpha_impl(st, tbl):
+    return _string_view_isalpha(st, tbl)
+
+
+@create_unary_identifier_func("StringView.isnumeric")
+def isnumeric_impl(st, tbl):
+    return _string_view_isnumeric(st, tbl)
+
+
+@create_unary_identifier_func("StringView.isdecimal")
+def isdecimal_impl(st, tbl):
+    return _string_view_isdecimal(st, tbl)
+
+
+@create_unary_identifier_func("StringView.isspace")
+def isspace_impl(st, tbl):
+    return _string_view_isspace(st, tbl)
+
+
+@create_unary_identifier_func("StringView.isupper")
+def isupper_impl(st, tbl):
+    return _string_view_isupper(st, tbl)
+
+
+@create_unary_identifier_func("StringView.islower")
+def islower_impl(st, tbl):
+    return _string_view_islower(st, tbl)
diff --git a/python/strings_udf/strings_udf/tests/test_string_udfs.py b/python/strings_udf/strings_udf/tests/test_string_udfs.py
new file mode 100644
index 00000000000..9038f4cc79a
--- /dev/null
+++ b/python/strings_udf/strings_udf/tests/test_string_udfs.py
@@ -0,0 +1,249 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+import numba
+import numpy as np
+import pandas as pd
+import pytest
+from numba import cuda
+from numba.core.typing import signature as nb_signature
+from numba.types import CPointer, void
+
+import cudf
+from cudf.testing._utils import assert_eq
+
+import strings_udf
+from strings_udf._lib.cudf_jit_udf import to_string_view_array
+from strings_udf._typing import str_view_arg_handler, string_view
+
+if not strings_udf.ENABLED:
+    pytest.skip("Strings UDF not enabled.", allow_module_level=True)
+
+
+def get_kernel(func, dtype):
+    """
+    Create a kernel for testing a single scalar string function
+    Allocates an output vector with a dtype specified by the caller
+    The returned kernel executes the input function on each data
+    element of the input and returns the output into the output vector
+    """
+
+    func = cuda.jit(device=True)(func)
+    outty = numba.np.numpy_support.from_dtype(dtype)
+    sig = nb_signature(void, CPointer(string_view), outty[::1])
+
+    @cuda.jit(
+        sig, link=[strings_udf.ptxpath], extensions=[str_view_arg_handler]
+    )
+    def kernel(input_strings, output_col):
+        id = cuda.grid(1)
+        if id < len(output_col):
+            st = input_strings[id]
+            result = func(st)
+            output_col[id] = result
+
+    return kernel
+
+
+def run_udf_test(data, func, dtype):
+    """
+    Run a test kernel on a set of input data
+    Converts the input data to a cuDF column and subsequently
+    to an array of cudf::string_view objects. It then creates
+    a CUDA kernel using get_kernel which calls the input function,
+    and then assembles the result back into a cuDF series before
+    comparing it with the equivalent pandas result
+    """
+    dtype = np.dtype(dtype)
+    cudf_column = cudf.core.column.as_column(data)
+    str_view_ary = to_string_view_array(cudf_column)
+
+    output_ary = cudf.core.column.column_empty(len(data), dtype=dtype)
+
+    kernel = get_kernel(func, dtype)
+    kernel.forall(len(data))(str_view_ary, output_ary)
+    got = cudf.Series(output_ary, dtype=dtype)
+    expect = pd.Series(data).apply(func)
+    assert_eq(expect, got, check_dtype=False)
+
+
+@pytest.fixture(scope="module")
+def data():
+    return [
+        "abc",
+        "ABC",
+        "AbC",
+        "123",
+        "123aBc",
+        "123@.!",
+        "",
+        "rapids ai",
+        "gpu",
+        "True",
+        "False",
+        "1.234",
+        ".123a",
+        "0.013",
+        "1.0",
+        "01",
+        "20010101",
+        "cudf",
+        "cuda",
+        "gpu",
+    ]
+
+
+@pytest.fixture(params=["cudf", "cuda", "gpucudf", "abc"])
+def rhs(request):
+    return request.param
+
+
+@pytest.fixture(params=["c", "cu", "2", "abc", "", "gpu"])
+def substr(request):
+    return request.param
+
+
+def test_string_udf_eq(data, rhs):
+    def func(st):
+        return st == rhs
+
+    run_udf_test(data, func, "bool")
+
+
+def test_string_udf_ne(data, rhs):
+    def func(st):
+        return st != rhs
+
+    run_udf_test(data, func, "bool")
+
+
+def test_string_udf_ge(data, rhs):
+    def func(st):
+        return st >= rhs
+
+    run_udf_test(data, func, "bool")
+
+
+def test_string_udf_le(data, rhs):
+    def func(st):
+        return st <= rhs
+
+    run_udf_test(data, func, "bool")
+
+
+def test_string_udf_gt(data, rhs):
+    def func(st):
+        return st > rhs
+
+    run_udf_test(data, func, "bool")
+
+
+def test_string_udf_lt(data, rhs):
+    def func(st):
+        return st < rhs
+
+    run_udf_test(data, func, "bool")
+
+
+def test_string_udf_contains(data, substr):
+    def func(st):
+        return substr in st
+
+    run_udf_test(data, func, "bool")
+
+
+def test_string_udf_count(data, substr):
+    def func(st):
+        return st.count(substr)
+
+    run_udf_test(data, func, "int32")
+
+
+def test_string_udf_find(data, substr):
+    def func(st):
+        return st.find(substr)
+
+    run_udf_test(data, func, "int32")
+
+
+def test_string_udf_endswith(data, substr):
+    def func(st):
+        return st.endswith(substr)
+
+    run_udf_test(data, func, "bool")
+
+
+def test_string_udf_isalnum(data):
+    def func(st):
+        return st.isalnum()
+
+    run_udf_test(data, func, "bool")
+
+
+def test_string_udf_isalpha(data):
+    def func(st):
+        return st.isalpha()
+
+    run_udf_test(data, func, "bool")
+
+
+def test_string_udf_isdecimal(data):
+    def func(st):
+        return st.isdecimal()
+
+    run_udf_test(data, func, "bool")
+
+
+def test_string_udf_isdigit(data):
+    def func(st):
+        return st.isdigit()
+
+    run_udf_test(data, func, "bool")
+
+
+def test_string_udf_islower(data):
+    def func(st):
+        return st.islower()
+
+    run_udf_test(data, func, "bool")
+
+
+def test_string_udf_isnumeric(data):
+    def func(st):
+        return st.isnumeric()
+
+    run_udf_test(data, func, "bool")
+
+
+def test_string_udf_isspace(data):
+    def func(st):
+        return st.isspace()
+
+    run_udf_test(data, func, "bool")
+
+
+def test_string_udf_isupper(data):
+    def func(st):
+        return st.isupper()
+
+    run_udf_test(data, func, "bool")
+
+
+def test_string_udf_len(data):
+    def func(st):
+        return len(st)
+
+    run_udf_test(data, func, "int64")
+
+
+def test_string_udf_rfind(data, substr):
+    def func(st):
+        return st.rfind(substr)
+
+    run_udf_test(data, func, "int32")
+
+
+def test_string_udf_startswith(data, substr):
+    def func(st):
+        return st.startswith(substr)
+
+    run_udf_test(data, func, "bool")
diff --git a/python/strings_udf/versioneer.py b/python/strings_udf/versioneer.py
new file mode 100644
index 00000000000..6194b6a5698
--- /dev/null
+++ b/python/strings_udf/versioneer.py
@@ -0,0 +1,2245 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+# Version: 0.23
+
+"""The Versioneer - like a rocketeer, but for versions.
+
+The Versioneer
+==============
+
+* like a rocketeer, but for versions!
+* https://github.com/python-versioneer/python-versioneer
+* Brian Warner
+* License: Public Domain (CC0-1.0)
+* Compatible with: Python 3.7, 3.8, 3.9, 3.10 and pypy3
+* [![Latest Version][pypi-image]][pypi-url]
+* [![Build Status][travis-image]][travis-url]
+
+This is a tool for managing a recorded version number in
+distutils/setuptools-based python projects. The goal is to
+remove the tedious and error-prone "update the embedded version string"
+step from your release process. Making a new release should be as easy
+as recording a new tag in your version-control
+system, and maybe making new tarballs.
+
+
+## Quick Install
+
+* `pip install versioneer` to somewhere in your $PATH
+* add a `[versioneer]` section to your setup.cfg (see [Install](INSTALL.md))
+* run `versioneer install` in your source tree, commit the results
+* Verify version information with `python setup.py version`
+
+## Version Identifiers
+
+Source trees come from a variety of places:
+
+* a version-control system checkout (mostly used by developers)
+* a nightly tarball, produced by build automation
+* a snapshot tarball, produced by a web-based VCS browser, like github's
+  "tarball from tag" feature
+* a release tarball, produced by "setup.py sdist", distributed through PyPI
+
+Within each source tree, the version identifier (either a string or a number,
+this tool is format-agnostic) can come from a variety of places:
+
+* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows
+  about recent "tags" and an absolute revision-id
+* the name of the directory into which the tarball was unpacked
+* an expanded VCS keyword ($Id$, etc)
+* a `_version.py` created by some earlier build step
+
+For released software, the version identifier is closely related to a VCS
+tag. Some projects use tag names that include more than just the version
+string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool
+needs to strip the tag prefix to extract the version identifier. For
+unreleased software (between tags), the version identifier should provide
+enough information to help developers recreate the same tree, while also
+giving them an idea of roughly how old the tree is (after version 1.2, before
+version 1.3). Many VCS systems can report a description that captures this,
+for example `git describe --tags --dirty --always` reports things like
+"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the
+0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has
+uncommitted changes).
+
+The version identifier is used for multiple purposes:
+
+* to allow the module to self-identify its version: `myproject.__version__`
+* to choose a name and prefix for a 'setup.py sdist' tarball
+
+## Theory of Operation
+
+Versioneer works by adding a special `_version.py` file into your source
+tree, where your `__init__.py` can import it. This `_version.py` knows how to
+dynamically ask the VCS tool for version information at import time.
+
+`_version.py` also contains `$Revision$` markers, and the installation
+process marks `_version.py` to have this marker rewritten with a tag name
+during the `git archive` command. As a result, generated tarballs will
+contain enough information to get the proper version.
+
+To allow `setup.py` to compute a version too, a `versioneer.py` is added to
+the top level of your source tree, next to `setup.py` and the `setup.cfg`
+that configures it. This overrides several distutils/setuptools commands to
+compute the version when invoked, and changes `setup.py build` and `setup.py
+sdist` to replace `_version.py` with a small static file that contains just
+the generated version data.
+
+## Installation
+
+See [INSTALL.md](./INSTALL.md) for detailed installation instructions.
+
+## Version-String Flavors
+
+Code which uses Versioneer can learn about its version string at runtime by
+importing `_version` from your main `__init__.py` file and running the
+`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can
+import the top-level `versioneer.py` and run `get_versions()`.
+
+Both functions return a dictionary with different flavors of version
+information:
+
+* `['version']`: A condensed version string, rendered using the selected
+  style. This is the most commonly used value for the project's version
+  string. The default "pep440" style yields strings like `0.11`,
+  `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section
+  below for alternative styles.
+
+* `['full-revisionid']`: detailed revision identifier. For Git, this is the
+  full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac".
+
+* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the
+  commit date in ISO 8601 format. This will be None if the date is not
+  available.
+
+* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that
+  this is only accurate if run in a VCS checkout, otherwise it is likely to
+  be False or None
+
+* `['error']`: if the version string could not be computed, this will be set
+  to a string describing the problem, otherwise it will be None. It may be
+  useful to throw an exception in setup.py if this is set, to avoid e.g.
+  creating tarballs with a version string of "unknown".
+
+Some variants are more useful than others. Including `full-revisionid` in a
+bug report should allow developers to reconstruct the exact code being tested
+(or indicate the presence of local changes that should be shared with the
+developers). `version` is suitable for display in an "about" box or a CLI
+`--version` output: it can be easily compared against release notes and lists
+of bugs fixed in various releases.
+
+The installer adds the following text to your `__init__.py` to place a basic
+version in `YOURPROJECT.__version__`:
+
+    from ._version import get_versions
+    __version__ = get_versions()['version']
+    del get_versions
+
+## Styles
+
+The setup.cfg `style=` configuration controls how the VCS information is
+rendered into a version string.
+
+The default style, "pep440", produces a PEP440-compliant string, equal to the
+un-prefixed tag name for actual releases, and containing an additional "local
+version" section with more detail for in-between builds. For Git, this is
+TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags
+--dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the
+tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and
+that this commit is two revisions ("+2") beyond the "0.11" tag. For released
+software (exactly equal to a known tag), the identifier will only contain the
+stripped tag, e.g. "0.11".
+
+Other styles are available. See [details.md](details.md) in the Versioneer
+source tree for descriptions.
+
+## Debugging
+
+Versioneer tries to avoid fatal errors: if something goes wrong, it will tend
+to return a version of "0+unknown". To investigate the problem, run `setup.py
+version`, which will run the version-lookup code in a verbose mode, and will
+display the full contents of `get_versions()` (including the `error` string,
+which may help identify what went wrong).
+
+## Known Limitations
+
+Some situations are known to cause problems for Versioneer. This details the
+most significant ones. More can be found on Github
+[issues page](https://github.com/python-versioneer/python-versioneer/issues).
+
+### Subprojects
+
+Versioneer has limited support for source trees in which `setup.py` is not in
+the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are
+two common reasons why `setup.py` might not be in the root:
+
+* Source trees which contain multiple subprojects, such as
+  [Buildbot](https://github.com/buildbot/buildbot), which contains both
+  "master" and "slave" subprojects, each with their own `setup.py`,
+  `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI
+  distributions (and upload multiple independently-installable tarballs).
+* Source trees whose main purpose is to contain a C library, but which also
+  provide bindings to Python (and perhaps other languages) in subdirectories.
+
+Versioneer will look for `.git` in parent directories, and most operations
+should get the right version string. However `pip` and `setuptools` have bugs
+and implementation details which frequently cause `pip install .` from a
+subproject directory to fail to find a correct version string (so it usually
+defaults to `0+unknown`).
+
+`pip install --editable .` should work correctly. `setup.py install` might
+work too.
+
+Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in
+some later version.
+
+[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38)
+is tracking this issue. The discussion in
+[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61)
+describes the issue from the Versioneer side in more detail.
+[pip PR#3176](https://github.com/pypa/pip/pull/3176) and
+[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve
+pip to let Versioneer work correctly.
+
+Versioneer-0.16 and earlier only looked for a `.git` directory next to the
+`setup.cfg`, so subprojects were completely unsupported with those releases.
+
+### Editable installs with setuptools <= 18.5
+
+`setup.py develop` and `pip install --editable .` allow you to install a
+project into a virtualenv once, then continue editing the source code (and
+test) without re-installing after every change.
+
+"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a
+convenient way to specify executable scripts that should be installed along
+with the python package.
+
+These both work as expected when using modern setuptools. When using
+setuptools-18.5 or earlier, however, certain operations will cause
+`pkg_resources.DistributionNotFound` errors when running the entrypoint
+script, which must be resolved by re-installing the package. This happens
+when the install happens with one version, then the egg_info data is
+regenerated while a different version is checked out. Many setup.py commands
+cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into
+a different virtualenv), so this can be surprising.
+
+[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83)
+describes this one, but upgrading to a newer version of setuptools should
+probably resolve it.
+
+
+## Updating Versioneer
+
+To upgrade your project to a new release of Versioneer, do the following:
+
+* install the new Versioneer (`pip install -U versioneer` or equivalent)
+* edit `setup.cfg`, if necessary, to include any new configuration settings
+  indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details.
+* re-run `versioneer install` in your source tree, to replace
+  `SRC/_version.py`
+* commit any changed files
+
+## Future Directions
+
+This tool is designed to make it easily extended to other version-control
+systems: all VCS-specific components are in separate directories like
+src/git/ . The top-level `versioneer.py` script is assembled from these
+components by running make-versioneer.py . In the future, make-versioneer.py
+will take a VCS name as an argument, and will construct a version of
+`versioneer.py` that is specific to the given VCS. It might also take the
+configuration arguments that are currently provided manually during
+installation by editing setup.py . Alternatively, it might go the other
+direction and include code from all supported VCS systems, reducing the
+number of intermediate scripts.
+
+## Similar projects
+
+* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored
+  build-time dependency
+* [minver](https://github.com/jbweston/miniver) - a lightweight
+  reimplementation of versioneer
+* [versioningit](https://github.com/jwodder/versioningit) - a PEP 518-based
+  setuptools plugin
+
+## License
+
+To make Versioneer easier to embed, all its code is dedicated to the public
+domain. The `_version.py` that it creates is also in the public domain.
+Specifically, both are released under the Creative Commons "Public Domain
+Dedication" license (CC0-1.0), as described in
+https://creativecommons.org/publicdomain/zero/1.0/ .
+
+[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg
+[pypi-url]: https://pypi.python.org/pypi/versioneer/
+[travis-image]:
+https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg
+[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer
+
+"""
+# pylint:disable=invalid-name,import-outside-toplevel,missing-function-docstring
+# pylint:disable=missing-class-docstring,too-many-branches,too-many-statements
+# pylint:disable=raise-missing-from,too-many-lines,too-many-locals,import-error
+# pylint:disable=too-few-public-methods,redefined-outer-name,consider-using-with
+# pylint:disable=attribute-defined-outside-init,too-many-arguments
+
+import configparser
+import errno
+import functools
+import json
+import os
+import re
+import subprocess
+import sys
+from typing import Callable, Dict
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_root():
+    """Get the project root directory.
+
+    We require that all commands are run from the project root, i.e. the
+    directory that contains setup.py, setup.cfg, and versioneer.py .
+    """
+    root = os.path.realpath(os.path.abspath(os.getcwd()))
+    setup_py = os.path.join(root, "setup.py")
+    versioneer_py = os.path.join(root, "versioneer.py")
+    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
+        # allow 'python path/to/setup.py COMMAND'
+        root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0])))
+        setup_py = os.path.join(root, "setup.py")
+        versioneer_py = os.path.join(root, "versioneer.py")
+    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
+        err = (
+            "Versioneer was unable to run the project root directory. "
+            "Versioneer requires setup.py to be executed from "
+            "its immediate directory (like 'python setup.py COMMAND'), "
+            "or in a way that lets it use sys.argv[0] to find the root "
+            "(like 'python path/to/setup.py COMMAND')."
+        )
+        raise VersioneerBadRootError(err)
+    try:
+        # Certain runtime workflows (setup.py install/develop in a setuptools
+        # tree) execute all dependencies in a single python process, so
+        # "versioneer" may be imported multiple times, and python's shared
+        # module-import table will cache the first one. So we can't use
+        # os.path.dirname(__file__), as that will find whichever
+        # versioneer.py was first imported, even in later projects.
+        my_path = os.path.realpath(os.path.abspath(__file__))
+        me_dir = os.path.normcase(os.path.splitext(my_path)[0])
+        vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0])
+        if me_dir != vsr_dir:
+            print(
+                "Warning: build in %s is using versioneer.py from %s"
+                % (os.path.dirname(my_path), versioneer_py)
+            )
+    except NameError:
+        pass
+    return root
+
+
+def get_config_from_root(root):
+    """Read the project setup.cfg file to determine Versioneer config."""
+    # This might raise OSError (if setup.cfg is missing), or
+    # configparser.NoSectionError (if it lacks a [versioneer] section), or
+    # configparser.NoOptionError (if it lacks "VCS="). See the docstring at
+    # the top of versioneer.py for instructions on writing your setup.cfg .
+    setup_cfg = os.path.join(root, "setup.cfg")
+    parser = configparser.ConfigParser()
+    with open(setup_cfg, "r") as cfg_file:
+        parser.read_file(cfg_file)
+    VCS = parser.get("versioneer", "VCS")  # mandatory
+
+    # Dict-like interface for non-mandatory entries
+    section = parser["versioneer"]
+
+    cfg = VersioneerConfig()
+    cfg.VCS = VCS
+    cfg.style = section.get("style", "")
+    cfg.versionfile_source = section.get("versionfile_source")
+    cfg.versionfile_build = section.get("versionfile_build")
+    cfg.tag_prefix = section.get("tag_prefix")
+    if cfg.tag_prefix in ("''", '""', None):
+        cfg.tag_prefix = ""
+    cfg.parentdir_prefix = section.get("parentdir_prefix")
+    cfg.verbose = section.get("verbose")
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+# these dictionaries contain VCS-specific tools
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        HANDLERS.setdefault(vcs, {})[method] = f
+        return f
+
+    return decorate
+
+
+def run_command(
+    commands, args, cwd=None, verbose=False, hide_stderr=False, env=None
+):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+
+    popen_kwargs = {}
+    if sys.platform == "win32":
+        # This hides the console window if pythonw.exe is used
+        startupinfo = subprocess.STARTUPINFO()
+        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
+        popen_kwargs["startupinfo"] = startupinfo
+
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen(
+                [command] + args,
+                cwd=cwd,
+                env=env,
+                stdout=subprocess.PIPE,
+                stderr=(subprocess.PIPE if hide_stderr else None),
+                **popen_kwargs,
+            )
+            break
+        except OSError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %s" % dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %s" % (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %s (error)" % dispcmd)
+            print("stdout was %s" % stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+LONG_VERSION_PY[
+    "git"
+] = r'''
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (built by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+
+# This file is released into the public domain. Generated by
+# versioneer-0.23 (https://github.com/python-versioneer/python-versioneer)
+
+"""Git implementation of _version.py."""
+
+import errno
+import os
+import re
+import subprocess
+import sys
+from typing import Callable, Dict
+import functools
+
+
+def get_keywords():
+    """Get the keywords needed to look up the version information."""
+    # these strings will be replaced by git during git-archive.
+    # setup.py/versioneer.py will grep for the variable names, so they must
+    # each be defined on a line of their own. _version.py will just call
+    # get_keywords().
+    git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s"
+    git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s"
+    git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s"
+    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
+    return keywords
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_config():
+    """Create, populate and return the VersioneerConfig() object."""
+    # these strings are filled in when 'setup.py versioneer' creates
+    # _version.py
+    cfg = VersioneerConfig()
+    cfg.VCS = "git"
+    cfg.style = "%(STYLE)s"
+    cfg.tag_prefix = "%(TAG_PREFIX)s"
+    cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s"
+    cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s"
+    cfg.verbose = False
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+
+    popen_kwargs = {}
+    if sys.platform == "win32":
+        # This hides the console window if pythonw.exe is used
+        startupinfo = subprocess.STARTUPINFO()
+        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
+        popen_kwargs["startupinfo"] = startupinfo
+
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None), **popen_kwargs)
+            break
+        except OSError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %%s" %% dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %%s" %% (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %%s (error)" %% dispcmd)
+            print("stdout was %%s" %% stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print("Tried directories %%s but none started with prefix %%s" %%
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %%d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r'\d', r)}
+        if verbose:
+            print("discarding '%%s', no digits" %% ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %%s" %% ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
+            if verbose:
+                print("picking %%s" %% r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+
+    # GIT_DIR can interfere with correct operation of Versioneer.
+    # It may be intended to be passed to the Versioneer-versioned project,
+    # but that should not change where we get our version from.
+    env = os.environ.copy()
+    env.pop("GIT_DIR", None)
+    runner = functools.partial(runner, env=env)
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=True)
+    if rc != 0:
+        if verbose:
+            print("Directory %%s not under git control" %% root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, [
+        "describe", "--tags", "--dirty", "--always", "--long",
+        "--match", f"{tag_prefix}[[:digit:]]*"
+        ], cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%%s'"
+                               %% describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%%s' doesn't start with prefix '%%s'"
+                print(fmt %% (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'"
+                               %% (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
+        pieces["distance"] = len(out.split())  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%%ci", "HEAD"],
+                  cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%%d.g%%s" %% (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver):
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces):
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(
+                pieces["closest-tag"]
+            )
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%%d.dev%%d" %% (
+                    post_version + 1, pieces["distance"]
+                )
+            else:
+                rendered += ".post0.dev%%d" %% (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%%d" %% pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%%s" %% pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%%s" %% pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%%s" %% pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%%s" %% pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%%s'" %% style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+
+
+def get_versions():
+    """Get version information or return default if unable to do so."""
+    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
+    # __file__, we can work backwards from there to the root. Some
+    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
+    # case we can only use expanded keywords.
+
+    cfg = get_config()
+    verbose = cfg.verbose
+
+    try:
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
+                                          verbose)
+    except NotThisMethod:
+        pass
+
+    try:
+        root = os.path.realpath(__file__)
+        # versionfile_source is the relative path from the top of the source
+        # tree (where the .git directory might live) to this file. Invert
+        # this to find the root from __file__.
+        for _ in cfg.versionfile_source.split('/'):
+            root = os.path.dirname(root)
+    except NameError:
+        return {"version": "0+unknown", "full-revisionid": None,
+                "dirty": None,
+                "error": "unable to find root of source tree",
+                "date": None}
+
+    try:
+        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+        return render(pieces, cfg.style)
+    except NotThisMethod:
+        pass
+
+    try:
+        if cfg.parentdir_prefix:
+            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+    except NotThisMethod:
+        pass
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to compute version", "date": None}
+'''
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r"\d", r)}
+        if verbose:
+            print("discarding '%s', no digits" % ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %s" % ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix) :]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r"\d", r):
+                continue
+            if verbose:
+                print("picking %s" % r)
+            return {
+                "version": r,
+                "full-revisionid": keywords["full"].strip(),
+                "dirty": False,
+                "error": None,
+                "date": date,
+            }
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {
+        "version": "0+unknown",
+        "full-revisionid": keywords["full"].strip(),
+        "dirty": False,
+        "error": "no suitable tags",
+        "date": None,
+    }
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+
+    # GIT_DIR can interfere with correct operation of Versioneer.
+    # It may be intended to be passed to the Versioneer-versioned project,
+    # but that should not change where we get our version from.
+    env = os.environ.copy()
+    env.pop("GIT_DIR", None)
+    runner = functools.partial(runner, env=env)
+
+    _, rc = runner(
+        GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True
+    )
+    if rc != 0:
+        if verbose:
+            print("Directory %s not under git control" % root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(
+        GITS,
+        [
+            "describe",
+            "--tags",
+            "--dirty",
+            "--always",
+            "--long",
+            "--match",
+            f"{tag_prefix}[[:digit:]]*",
+        ],
+        cwd=root,
+    )
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(
+        GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root
+    )
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[: git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = (
+                "unable to parse git-describe output: '%s'" % describe_out
+            )
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%s' doesn't start with prefix '%s'"
+                print(fmt % (full_tag, tag_prefix))
+            pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % (
+                full_tag,
+                tag_prefix,
+            )
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix) :]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
+        pieces["distance"] = len(out.split())  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[
+        0
+    ].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def do_vcs_install(versionfile_source, ipy):
+    """Git-specific installation logic for Versioneer.
+
+    For Git, this means creating/changing .gitattributes to mark _version.py
+    for export-subst keyword substitution.
+    """
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+    files = [versionfile_source]
+    if ipy:
+        files.append(ipy)
+    try:
+        my_path = __file__
+        if my_path.endswith(".pyc") or my_path.endswith(".pyo"):
+            my_path = os.path.splitext(my_path)[0] + ".py"
+        versioneer_file = os.path.relpath(my_path)
+    except NameError:
+        versioneer_file = "versioneer.py"
+    files.append(versioneer_file)
+    present = False
+    try:
+        with open(".gitattributes", "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith(versionfile_source):
+                    if "export-subst" in line.strip().split()[1:]:
+                        present = True
+                        break
+    except OSError:
+        pass
+    if not present:
+        with open(".gitattributes", "a+") as fobj:
+            fobj.write(f"{versionfile_source} export-subst\n")
+        files.append(".gitattributes")
+    run_command(GITS, ["add", "--"] + files)
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {
+                "version": dirname[len(parentdir_prefix) :],
+                "full-revisionid": None,
+                "dirty": False,
+                "error": None,
+                "date": None,
+            }
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print(
+            "Tried directories %s but none started with prefix %s"
+            % (str(rootdirs), parentdir_prefix)
+        )
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+SHORT_VERSION_PY = """
+# This file was generated by 'versioneer.py' (0.23) from
+# revision-control system data, or from the parent directory name of an
+# unpacked source archive. Distribution tarballs contain a pre-generated copy
+# of this file.
+
+import json
+
+version_json = '''
+%s
+'''  # END VERSION_JSON
+
+
+def get_versions():
+    return json.loads(version_json)
+"""
+
+
+def versions_from_file(filename):
+    """Try to determine the version from _version.py if present."""
+    try:
+        with open(filename) as f:
+            contents = f.read()
+    except OSError:
+        raise NotThisMethod("unable to read _version.py")
+    mo = re.search(
+        r"version_json = '''\n(.*)'''  # END VERSION_JSON",
+        contents,
+        re.M | re.S,
+    )
+    if not mo:
+        mo = re.search(
+            r"version_json = '''\r\n(.*)'''  # END VERSION_JSON",
+            contents,
+            re.M | re.S,
+        )
+    if not mo:
+        raise NotThisMethod("no version_json in _version.py")
+    return json.loads(mo.group(1))
+
+
+def write_to_version_file(filename, versions):
+    """Write the given version number to the given _version.py file."""
+    os.unlink(filename)
+    contents = json.dumps(
+        versions, sort_keys=True, indent=1, separators=(",", ": ")
+    )
+    with open(filename, "w") as f:
+        f.write(SHORT_VERSION_PY % contents)
+
+    print("set %s to '%s'" % (filename, versions["version"]))
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver):
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces):
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(
+                pieces["closest-tag"]
+            )
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%d.dev%d" % (
+                    post_version + 1,
+                    pieces["distance"],
+                )
+            else:
+                rendered += ".post0.dev%d" % (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%d" % pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {
+            "version": "unknown",
+            "full-revisionid": pieces.get("long"),
+            "dirty": None,
+            "error": pieces["error"],
+            "date": None,
+        }
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%s'" % style)
+
+    return {
+        "version": rendered,
+        "full-revisionid": pieces["long"],
+        "dirty": pieces["dirty"],
+        "error": None,
+        "date": pieces.get("date"),
+    }
+
+
+class VersioneerBadRootError(Exception):
+    """The project root directory is unknown or missing key files."""
+
+
+def get_versions(verbose=False):
+    """Get the project version from whatever source is available.
+
+    Returns dict with two keys: 'version' and 'full'.
+    """
+    if "versioneer" in sys.modules:
+        # see the discussion in cmdclass.py:get_cmdclass()
+        del sys.modules["versioneer"]
+
+    root = get_root()
+    cfg = get_config_from_root(root)
+
+    assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg"
+    handlers = HANDLERS.get(cfg.VCS)
+    assert handlers, "unrecognized VCS '%s'" % cfg.VCS
+    verbose = verbose or cfg.verbose
+    assert (
+        cfg.versionfile_source is not None
+    ), "please set versioneer.versionfile_source"
+    assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix"
+
+    versionfile_abs = os.path.join(root, cfg.versionfile_source)
+
+    # extract version from first of: _version.py, VCS command (e.g. 'git
+    # describe'), parentdir. This is meant to work for developers using a
+    # source checkout, for users of a tarball created by 'setup.py sdist',
+    # and for users of a tarball/zipball created by 'git archive' or github's
+    # download-from-tag feature or the equivalent in other VCSes.
+
+    get_keywords_f = handlers.get("get_keywords")
+    from_keywords_f = handlers.get("keywords")
+    if get_keywords_f and from_keywords_f:
+        try:
+            keywords = get_keywords_f(versionfile_abs)
+            ver = from_keywords_f(keywords, cfg.tag_prefix, verbose)
+            if verbose:
+                print("got version from expanded keyword %s" % ver)
+            return ver
+        except NotThisMethod:
+            pass
+
+    try:
+        ver = versions_from_file(versionfile_abs)
+        if verbose:
+            print("got version from file %s %s" % (versionfile_abs, ver))
+        return ver
+    except NotThisMethod:
+        pass
+
+    from_vcs_f = handlers.get("pieces_from_vcs")
+    if from_vcs_f:
+        try:
+            pieces = from_vcs_f(cfg.tag_prefix, root, verbose)
+            ver = render(pieces, cfg.style)
+            if verbose:
+                print("got version from VCS %s" % ver)
+            return ver
+        except NotThisMethod:
+            pass
+
+    try:
+        if cfg.parentdir_prefix:
+            ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+            if verbose:
+                print("got version from parentdir %s" % ver)
+            return ver
+    except NotThisMethod:
+        pass
+
+    if verbose:
+        print("unable to compute version")
+
+    return {
+        "version": "0+unknown",
+        "full-revisionid": None,
+        "dirty": None,
+        "error": "unable to compute version",
+        "date": None,
+    }
+
+
+def get_version():
+    """Get the short version string for this project."""
+    return get_versions()["version"]
+
+
+def get_cmdclass(cmdclass=None):
+    """Get the custom setuptools subclasses used by Versioneer.
+
+    If the package uses a different cmdclass (e.g. one from numpy), it
+    should be provide as an argument.
+    """
+    if "versioneer" in sys.modules:
+        del sys.modules["versioneer"]
+        # this fixes the "python setup.py develop" case (also 'install' and
+        # 'easy_install .'), in which subdependencies of the main project are
+        # built (using setup.py bdist_egg) in the same python process. Assume
+        # a main project A and a dependency B, which use different versions
+        # of Versioneer. A's setup.py imports A's Versioneer, leaving it in
+        # sys.modules by the time B's setup.py is executed, causing B to run
+        # with the wrong versioneer. Setuptools wraps the sub-dep builds in a
+        # sandbox that restores sys.modules to it's pre-build state, so the
+        # parent is protected against the child's "import versioneer". By
+        # removing ourselves from sys.modules here, before the child build
+        # happens, we protect the child from the parent's versioneer too.
+        # Also see
+        # https://github.com/python-versioneer/python-versioneer/issues/52
+
+    cmds = {} if cmdclass is None else cmdclass.copy()
+
+    # we add "version" to setuptools
+    from setuptools import Command
+
+    class cmd_version(Command):
+        description = "report generated version string"
+        user_options = []
+        boolean_options = []
+
+        def initialize_options(self):
+            pass
+
+        def finalize_options(self):
+            pass
+
+        def run(self):
+            vers = get_versions(verbose=True)
+            print("Version: %s" % vers["version"])
+            print(" full-revisionid: %s" % vers.get("full-revisionid"))
+            print(" dirty: %s" % vers.get("dirty"))
+            print(" date: %s" % vers.get("date"))
+            if vers["error"]:
+                print(" error: %s" % vers["error"])
+
+    cmds["version"] = cmd_version
+
+    # we override "build_py" in setuptools
+    #
+    # most invocation pathways end up running build_py:
+    #  distutils/build -> build_py
+    #  distutils/install -> distutils/build ->..
+    #  setuptools/bdist_wheel -> distutils/install ->..
+    #  setuptools/bdist_egg -> distutils/install_lib -> build_py
+    #  setuptools/install -> bdist_egg ->..
+    #  setuptools/develop -> ?
+    #  pip install:
+    #   copies source tree to a tempdir before running egg_info/etc
+    #   if .git isn't copied too, 'git describe' will fail
+    #   then does setup.py bdist_wheel, or sometimes setup.py install
+    #  setup.py egg_info -> ?
+
+    # pip install -e . and setuptool/editable_wheel will invoke build_py
+    # but the build_py command is not expected to copy any files.
+
+    # we override different "build_py" commands for both environments
+    if "build_py" in cmds:
+        _build_py = cmds["build_py"]
+    else:
+        from setuptools.command.build_py import build_py as _build_py
+
+    class cmd_build_py(_build_py):
+        def run(self):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            versions = get_versions()
+            _build_py.run(self)
+            if getattr(self, "editable_mode", False):
+                # During editable installs `.py` and data files are
+                # not copied to build_lib
+                return
+            # now locate _version.py in the new build/ directory and replace
+            # it with an updated value
+            if cfg.versionfile_build:
+                target_versionfile = os.path.join(
+                    self.build_lib, cfg.versionfile_build
+                )
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+
+    cmds["build_py"] = cmd_build_py
+
+    if "build_ext" in cmds:
+        _build_ext = cmds["build_ext"]
+    else:
+        from setuptools.command.build_ext import build_ext as _build_ext
+
+    class cmd_build_ext(_build_ext):
+        def run(self):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            versions = get_versions()
+            _build_ext.run(self)
+            if self.inplace:
+                # build_ext --inplace will only build extensions in
+                # build/lib<..> dir with no _version.py to write to.
+                # As in place builds will already have a _version.py
+                # in the module dir, we do not need to write one.
+                return
+            # now locate _version.py in the new build/ directory and replace
+            # it with an updated value
+            target_versionfile = os.path.join(
+                self.build_lib, cfg.versionfile_build
+            )
+            if not os.path.exists(target_versionfile):
+                print(
+                    f"Warning: {target_versionfile} does not exist, skipping "
+                    "version update. This can happen if you are running "
+                    "build_ext without first running build_py."
+                )
+                return
+            print("UPDATING %s" % target_versionfile)
+            write_to_version_file(target_versionfile, versions)
+
+    cmds["build_ext"] = cmd_build_ext
+
+    if "cx_Freeze" in sys.modules:  # cx_freeze enabled?
+        from cx_Freeze.dist import build_exe as _build_exe
+
+        # nczeczulin reports that py2exe won't like the pep440-style string
+        # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g.
+        # setup(console=[{
+        #   "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION
+        #   "product_version": versioneer.get_version(),
+        #   ...
+
+        class cmd_build_exe(_build_exe):
+            def run(self):
+                root = get_root()
+                cfg = get_config_from_root(root)
+                versions = get_versions()
+                target_versionfile = cfg.versionfile_source
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+
+                _build_exe.run(self)
+                os.unlink(target_versionfile)
+                with open(cfg.versionfile_source, "w") as f:
+                    LONG = LONG_VERSION_PY[cfg.VCS]
+                    f.write(
+                        LONG
+                        % {
+                            "DOLLAR": "$",
+                            "STYLE": cfg.style,
+                            "TAG_PREFIX": cfg.tag_prefix,
+                            "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                            "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                        }
+                    )
+
+        cmds["build_exe"] = cmd_build_exe
+        del cmds["build_py"]
+
+    if "py2exe" in sys.modules:  # py2exe enabled?
+        from py2exe.distutils_buildexe import py2exe as _py2exe
+
+        class cmd_py2exe(_py2exe):
+            def run(self):
+                root = get_root()
+                cfg = get_config_from_root(root)
+                versions = get_versions()
+                target_versionfile = cfg.versionfile_source
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+
+                _py2exe.run(self)
+                os.unlink(target_versionfile)
+                with open(cfg.versionfile_source, "w") as f:
+                    LONG = LONG_VERSION_PY[cfg.VCS]
+                    f.write(
+                        LONG
+                        % {
+                            "DOLLAR": "$",
+                            "STYLE": cfg.style,
+                            "TAG_PREFIX": cfg.tag_prefix,
+                            "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                            "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                        }
+                    )
+
+        cmds["py2exe"] = cmd_py2exe
+
+    # sdist farms its file list building out to egg_info
+    if "egg_info" in cmds:
+        _sdist = cmds["egg_info"]
+    else:
+        from setuptools.command.egg_info import egg_info as _egg_info
+
+    class cmd_egg_info(_egg_info):
+        def find_sources(self):
+            # egg_info.find_sources builds the manifest list and writes it
+            # in one shot
+            super().find_sources()
+
+            # Modify the filelist and normalize it
+            root = get_root()
+            cfg = get_config_from_root(root)
+            self.filelist.append("versioneer.py")
+            if cfg.versionfile_source:
+                # There are rare cases where versionfile_source might not be
+                # included by default, so we must be explicit
+                self.filelist.append(cfg.versionfile_source)
+            self.filelist.sort()
+            self.filelist.remove_duplicates()
+
+            # The write method is hidden in the manifest_maker instance that
+            # generated the filelist and was thrown away
+            # We will instead replicate their final normalization (to unicode,
+            # and POSIX-style paths)
+            from setuptools import unicode_utils
+
+            normalized = [
+                unicode_utils.filesys_decode(f).replace(os.sep, "/")
+                for f in self.filelist.files
+            ]
+
+            manifest_filename = os.path.join(self.egg_info, "SOURCES.txt")
+            with open(manifest_filename, "w") as fobj:
+                fobj.write("\n".join(normalized))
+
+    cmds["egg_info"] = cmd_egg_info
+
+    # we override different "sdist" commands for both environments
+    if "sdist" in cmds:
+        _sdist = cmds["sdist"]
+    else:
+        from setuptools.command.sdist import sdist as _sdist
+
+    class cmd_sdist(_sdist):
+        def run(self):
+            versions = get_versions()
+            self._versioneer_generated_versions = versions
+            # unless we update this, the command will keep using the old
+            # version
+            self.distribution.metadata.version = versions["version"]
+            return _sdist.run(self)
+
+        def make_release_tree(self, base_dir, files):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            _sdist.make_release_tree(self, base_dir, files)
+            # now locate _version.py in the new base_dir directory
+            # (remembering that it may be a hardlink) and replace it with an
+            # updated value
+            target_versionfile = os.path.join(base_dir, cfg.versionfile_source)
+            print("UPDATING %s" % target_versionfile)
+            write_to_version_file(
+                target_versionfile, self._versioneer_generated_versions
+            )
+
+    cmds["sdist"] = cmd_sdist
+
+    return cmds
+
+
+CONFIG_ERROR = """
+setup.cfg is missing the necessary Versioneer configuration. You need
+a section like:
+
+ [versioneer]
+ VCS = git
+ style = pep440
+ versionfile_source = src/myproject/_version.py
+ versionfile_build = myproject/_version.py
+ tag_prefix =
+ parentdir_prefix = myproject-
+
+You will also need to edit your setup.py to use the results:
+
+ import versioneer
+ setup(version=versioneer.get_version(),
+       cmdclass=versioneer.get_cmdclass(), ...)
+
+Please read the docstring in ./versioneer.py for configuration instructions,
+edit setup.cfg, and re-run the installer or 'python versioneer.py setup'.
+"""
+
+SAMPLE_CONFIG = """
+# See the docstring in versioneer.py for instructions. Note that you must
+# re-run 'versioneer.py setup' after changing this section, and commit the
+# resulting files.
+
+[versioneer]
+#VCS = git
+#style = pep440
+#versionfile_source =
+#versionfile_build =
+#tag_prefix =
+#parentdir_prefix =
+
+"""
+
+OLD_SNIPPET = """
+from ._version import get_versions
+__version__ = get_versions()['version']
+del get_versions
+"""
+
+INIT_PY_SNIPPET = """
+from . import {0}
+__version__ = {0}.get_versions()['version']
+"""
+
+
+def do_setup():
+    """Do main VCS-independent setup function for installing Versioneer."""
+    root = get_root()
+    try:
+        cfg = get_config_from_root(root)
+    except (
+        OSError,
+        configparser.NoSectionError,
+        configparser.NoOptionError,
+    ) as e:
+        if isinstance(e, (OSError, configparser.NoSectionError)):
+            print(
+                "Adding sample versioneer config to setup.cfg", file=sys.stderr
+            )
+            with open(os.path.join(root, "setup.cfg"), "a") as f:
+                f.write(SAMPLE_CONFIG)
+        print(CONFIG_ERROR, file=sys.stderr)
+        return 1
+
+    print(" creating %s" % cfg.versionfile_source)
+    with open(cfg.versionfile_source, "w") as f:
+        LONG = LONG_VERSION_PY[cfg.VCS]
+        f.write(
+            LONG
+            % {
+                "DOLLAR": "$",
+                "STYLE": cfg.style,
+                "TAG_PREFIX": cfg.tag_prefix,
+                "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                "VERSIONFILE_SOURCE": cfg.versionfile_source,
+            }
+        )
+
+    ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py")
+    if os.path.exists(ipy):
+        try:
+            with open(ipy, "r") as f:
+                old = f.read()
+        except OSError:
+            old = ""
+        module = os.path.splitext(os.path.basename(cfg.versionfile_source))[0]
+        snippet = INIT_PY_SNIPPET.format(module)
+        if OLD_SNIPPET in old:
+            print(" replacing boilerplate in %s" % ipy)
+            with open(ipy, "w") as f:
+                f.write(old.replace(OLD_SNIPPET, snippet))
+        elif snippet not in old:
+            print(" appending to %s" % ipy)
+            with open(ipy, "a") as f:
+                f.write(snippet)
+        else:
+            print(" %s unmodified" % ipy)
+    else:
+        print(" %s doesn't exist, ok" % ipy)
+        ipy = None
+
+    # Make VCS-specific changes. For git, this means creating/changing
+    # .gitattributes to mark _version.py for export-subst keyword
+    # substitution.
+    do_vcs_install(cfg.versionfile_source, ipy)
+    return 0
+
+
+def scan_setup_py():
+    """Validate the contents of setup.py against Versioneer's expectations."""
+    found = set()
+    setters = False
+    errors = 0
+    with open("setup.py", "r") as f:
+        for line in f.readlines():
+            if "import versioneer" in line:
+                found.add("import")
+            if "versioneer.get_cmdclass()" in line:
+                found.add("cmdclass")
+            if "versioneer.get_version()" in line:
+                found.add("get_version")
+            if "versioneer.VCS" in line:
+                setters = True
+            if "versioneer.versionfile_source" in line:
+                setters = True
+    if len(found) != 3:
+        print("")
+        print("Your setup.py appears to be missing some important items")
+        print("(but I might be wrong). Please make sure it has something")
+        print("roughly like the following:")
+        print("")
+        print(" import versioneer")
+        print(" setup( version=versioneer.get_version(),")
+        print("        cmdclass=versioneer.get_cmdclass(),  ...)")
+        print("")
+        errors += 1
+    if setters:
+        print("You should remove lines like 'versioneer.VCS = ' and")
+        print("'versioneer.versionfile_source = ' . This configuration")
+        print("now lives in setup.cfg, and should be removed from setup.py")
+        print("")
+        errors += 1
+    return errors
+
+
+if __name__ == "__main__":
+    cmd = sys.argv[1]
+    if cmd == "setup":
+        errors = do_setup()
+        errors += scan_setup_py()
+        if errors:
+            sys.exit(1)