From 3710d9b7597fb37892b0e1b2c3d4226aad39845b Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 21 Jul 2022 16:13:14 -0500 Subject: [PATCH] Fix style checks for string_udfs. --- .../cudf/_lib/cpp/column/column_factories.pxd | 1 + python/cudf/cudf/core/indexed_frame.py | 10 +- python/cudf/cudf/core/udf/masked_lowering.py | 14 +- python/cudf/cudf/core/udf/masked_typing.py | 10 +- python/cudf/cudf/core/udf/row_function.py | 9 +- python/cudf/cudf/core/udf/scalar_function.py | 2 +- python/cudf/cudf/core/udf/strings_lowering.py | 170 ++++++++------- python/cudf/cudf/core/udf/strings_typing.py | 100 +++++---- python/cudf/cudf/core/udf/utils.py | 44 ++-- python/cudf/cudf/tests/test_udf_masked_ops.py | 30 +-- python/cudf/setup.cfg | 3 +- python/strings_udf/cpp/tests/run_tests.sh | 2 + .../strings_udf/_lib/cpp/strings_udf.pxd | 19 +- .../strings_udf/_lib/cudf_jit_udf.pyx | 22 +- .../strings_udf/strings_udf/_lib/tables.pyx | 8 +- python/strings_udf/strings_udf/_typing.py | 197 +++++++----------- python/strings_udf/strings_udf/lowering.py | 182 ++++++++-------- .../strings_udf/tests/test_cmpops.py | 28 ++- .../strings_udf/tests/test_contains.py | 7 +- .../strings_udf/tests/test_count.py | 12 +- .../strings_udf/tests/test_endswith.py | 12 +- .../strings_udf/tests/test_find.py | 12 +- .../strings_udf/tests/test_isalnum.py | 6 +- .../strings_udf/tests/test_isalpha.py | 10 +- .../strings_udf/tests/test_isdecimal.py | 6 +- .../strings_udf/tests/test_isdigit.py | 6 +- .../strings_udf/tests/test_islower.py | 10 +- .../strings_udf/tests/test_isnumeric.py | 6 +- .../strings_udf/tests/test_isspace.py | 6 +- .../strings_udf/tests/test_isupper.py | 10 +- .../strings_udf/tests/test_rfind.py | 12 +- .../strings_udf/tests/test_startswith.py | 12 +- python/strings_udf/strings_udf/tests/utils.py | 5 +- 33 files changed, 512 insertions(+), 471 deletions(-) diff --git a/python/cudf/cudf/_lib/cpp/column/column_factories.pxd b/python/cudf/cudf/_lib/cpp/column/column_factories.pxd index d3148dd28cc..0f22e788bd7 100644 --- a/python/cudf/cudf/_lib/cpp/column/column_factories.pxd +++ b/python/cudf/cudf/_lib/cpp/column/column_factories.pxd @@ -6,6 +6,7 @@ from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.scalar.scalar cimport scalar from cudf._lib.cpp.types cimport data_type, mask_state, size_type + cdef extern from "cudf/column/column_factories.hpp" namespace "cudf" nogil: cdef unique_ptr[column] make_numeric_column(data_type type, size_type size, diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 4fa2a1b13fa..3ef86598e49 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -49,7 +49,13 @@ from cudf.core.index import Index, RangeIndex, _index_from_columns from cudf.core.missing import NA from cudf.core.multiindex import MultiIndex -from cudf.core.udf.utils import _compile_or_get, _launch_arg_from_col, _supported_cols_from_frame, _return_col_from_dtype, _post_process_output_col +from cudf.core.udf.utils import ( + _compile_or_get, + _launch_arg_from_col, + _post_process_output_col, + _return_col_from_dtype, + _supported_cols_from_frame, +) from cudf.utils import docutils from cudf.utils.utils import _cudf_nvtx_annotate @@ -1809,7 +1815,7 @@ def _apply(self, func, kernel_getter, *args, **kwargs): offsets.append(col.offset) launch_args += offsets launch_args += list(args) - + try: kernel.forall(len(self))(*launch_args) except Exception as e: diff --git a/python/cudf/cudf/core/udf/masked_lowering.py b/python/cudf/cudf/core/udf/masked_lowering.py index 26f4040a56a..e1ef5f8e61d 100644 --- a/python/cudf/cudf/core/udf/masked_lowering.py +++ b/python/cudf/cudf/core/udf/masked_lowering.py @@ -5,13 +5,14 @@ from llvmlite import ir from numba.core import cgutils from numba.core.typing import signature as nb_signature -from numba.cuda.cudadrv import nvvm from numba.cuda.cudaimpl import ( lower as cuda_lower, registry as cuda_lowering_registry, ) from numba.extending import lower_builtin, types +from strings_udf._typing import string_view + from cudf.core.udf import api from cudf.core.udf._ops import ( arith_ops, @@ -19,16 +20,9 @@ comparison_ops, unary_ops, ) -from cudf.core.udf.masked_typing import ( - MaskedType, - NAType, -) - -import operator +from cudf.core.udf.masked_typing import MaskedType, NAType -from strings_udf._typing import string_view - @cuda_lowering_registry.lower_constant(NAType) def constant_na(context, builder, ty, pyval): # This handles None, etc. @@ -295,6 +289,7 @@ def pack_return_scalar_impl(context, builder, sig, args): return outdata._getvalue() + @cuda_lower(operator.truth, MaskedType) def masked_scalar_truth_impl(context, builder, sig, args): indata = cgutils.create_struct_proxy(MaskedType(types.boolean))( @@ -368,7 +363,6 @@ def masked_constructor(context, builder, sig, args): return masked._getvalue() - # Allows us to make an instance of MaskedType a global variable # and properly use it inside functions we will later compile @cuda_lowering_registry.lower_constant(MaskedType) diff --git a/python/cudf/cudf/core/udf/masked_typing.py b/python/cudf/cudf/core/udf/masked_typing.py index aa2ef8f8df6..18bf5e3cd50 100644 --- a/python/cudf/cudf/core/udf/masked_typing.py +++ b/python/cudf/cudf/core/udf/masked_typing.py @@ -2,7 +2,7 @@ import operator -from numba import cuda, types +from numba import types from numba.core.extending import ( make_attribute_wrapper, models, @@ -18,6 +18,8 @@ from numba.core.typing.typeof import typeof from numba.cuda.cudadecl import registry as cuda_decl_registry +from strings_udf._typing import StringView, string_view + from cudf.core.missing import NA from cudf.core.udf import api from cudf.core.udf._ops import ( @@ -35,9 +37,6 @@ types.PyObject, ) -import operator - -from strings_udf._typing import string_view, StringView # Masked scalars of all types class MaskedType(types.Type): @@ -131,6 +130,7 @@ def __eq__(self, other): # Require a cast for another masked with a different value type return self.value_type == other.value_type + # For typing a Masked constant value defined outside a kernel (e.g. captured in # a closure). @typeof_impl.register(api.Masked) @@ -163,6 +163,7 @@ class MaskedConstructor(ConcreteTemplate): make_attribute_wrapper(MaskedType, "value", "value") make_attribute_wrapper(MaskedType, "valid", "valid") + # Typing for `api.Masked` @cuda_decl_registry.register_attr class ClassesTemplate(AttributeTemplate): @@ -365,7 +366,6 @@ def generic(self, args, kws): return nb_signature(return_type, args[0]) - for binary_op in arith_ops + bitwise_ops + comparison_ops: # Every op shares the same typing class cuda_decl_registry.register_global(binary_op)(MaskedScalarArithOp) diff --git a/python/cudf/cudf/core/udf/row_function.py b/python/cudf/cudf/core/udf/row_function.py index fb9e9690ce0..028d8bcb5a7 100644 --- a/python/cudf/cudf/core/udf/row_function.py +++ b/python/cudf/cudf/core/udf/row_function.py @@ -6,14 +6,16 @@ from numba.np import numpy_support from numba.types import Record +from strings_udf._typing import DString + from cudf.core.udf.api import Masked, pack_return +from cudf.core.udf.masked_typing import MaskedType from cudf.core.udf.templates import ( masked_input_initializer_template, row_initializer_template, row_kernel_template, unmasked_input_initializer_template, ) -from cudf.core.udf.masked_typing import MaskedType from cudf.core.udf.utils import ( _all_dtypes_from_frame, _construct_signature, @@ -24,7 +26,6 @@ _supported_dtypes_from_frame, ) -from strings_udf._typing import DString dstring = DString() @@ -50,7 +51,7 @@ def _get_frame_row_type(dtype): sizes = [] for field in dtype.fields.values(): - if field[0] == np.dtype('object'): + if field[0] == np.dtype("object"): sizes.append(dstring.size_bytes) else: sizes.append(field[0].itemsize) @@ -71,7 +72,7 @@ def _get_frame_row_type(dtype): fields.append((name, infos)) # increment offset by itemsize plus one byte for validity - if elemdtype == np.dtype('object'): + if elemdtype == np.dtype("object"): itemsize = dstring.size_bytes else: itemsize = elemdtype.itemsize diff --git a/python/cudf/cudf/core/udf/scalar_function.py b/python/cudf/cudf/core/udf/scalar_function.py index 26d88fca7a5..31599f4151e 100644 --- a/python/cudf/cudf/core/udf/scalar_function.py +++ b/python/cudf/cudf/core/udf/scalar_function.py @@ -4,12 +4,12 @@ from numba.np import numpy_support from cudf.core.udf.api import Masked, pack_return +from cudf.core.udf.masked_typing import MaskedType from cudf.core.udf.templates import ( masked_input_initializer_template, scalar_kernel_template, unmasked_input_initializer_template, ) -from cudf.core.udf.masked_typing import MaskedType from cudf.core.udf.utils import ( _construct_signature, _get_kernel, diff --git a/python/cudf/cudf/core/udf/strings_lowering.py b/python/cudf/cudf/core/udf/strings_lowering.py index 77fb5f90dde..02850dae2b2 100644 --- a/python/cudf/cudf/core/udf/strings_lowering.py +++ b/python/cudf/cudf/core/udf/strings_lowering.py @@ -1,33 +1,30 @@ # Copyright (c) 2022, NVIDIA CORPORATION. +import operator + +from numba import types +from numba.core import cgutils +from numba.cuda.cudaimpl import lower as cuda_lower + from strings_udf._typing import string_view from strings_udf.lowering import ( - string_view_len_impl, - string_view_startswith_impl, - string_view_endswith_impl, - string_view_find_impl, - string_view_rfind_impl, string_view_contains_impl, + string_view_endswith_impl, + string_view_find_impl, string_view_isalnum_impl, string_view_isalpha_impl, string_view_isdecimal_impl, string_view_isdigit_impl, - string_view_isupper_impl, string_view_islower_impl, - string_view_isspace_impl + string_view_isspace_impl, + string_view_isupper_impl, + string_view_len_impl, + string_view_rfind_impl, + string_view_startswith_impl, ) -from numba import types from cudf.core.udf.masked_typing import MaskedType -from numba.cuda.cudaimpl import ( - lower as cuda_lower, - registry as cuda_lowering_registry, -) - -from numba.core import cgutils -import operator - @cuda_lower(len, MaskedType(string_view)) def masked_string_view_len_impl(context, builder, sig, args): @@ -36,7 +33,9 @@ def masked_string_view_len_impl(context, builder, sig, args): masked_sv = cgutils.create_struct_proxy(masked_sv_ty)( context, builder, value=args[0] ) - result = string_view_len_impl(context, builder, types.int32(string_view), (masked_sv.value,)) + result = string_view_len_impl( + context, builder, types.int32(string_view), (masked_sv.value,) + ) ret.value = result ret.valid = masked_sv.valid @@ -56,10 +55,10 @@ def masked_string_view_startswith_impl(context, builder, sig, args): context, builder, value=args[1] ) result = string_view_startswith_impl( - context, - builder, - types.boolean(string_view, string_view), - (masked_sv_str.value, masked_sv_substr.value) + context, + builder, + types.boolean(string_view, string_view), + (masked_sv_str.value, masked_sv_substr.value), ) ret.value = result @@ -80,16 +79,17 @@ def masked_string_view_endswith_impl(context, builder, sig, args): context, builder, value=args[1] ) result = string_view_endswith_impl( - context, - builder, - types.boolean(string_view, string_view), - (masked_sv_str.value, masked_sv_substr.value) + context, + builder, + types.boolean(string_view, string_view), + (masked_sv_str.value, masked_sv_substr.value), ) ret.value = result ret.valid = builder.and_(masked_sv_str.valid, masked_sv_substr.valid) return ret._getvalue() + @cuda_lower( "MaskedType.find", MaskedType(string_view), MaskedType(string_view) ) @@ -103,16 +103,17 @@ def masked_string_view_find_impl(context, builder, sig, args): context, builder, value=args[1] ) result = string_view_find_impl( - context, - builder, - types.boolean(string_view, string_view), - (masked_sv_str.value, masked_sv_substr.value) + context, + builder, + types.boolean(string_view, string_view), + (masked_sv_str.value, masked_sv_substr.value), ) ret.value = result ret.valid = builder.and_(masked_sv_str.valid, masked_sv_substr.valid) return ret._getvalue() + @cuda_lower( "MaskedType.rfind", MaskedType(string_view), MaskedType(string_view) ) @@ -126,17 +127,20 @@ def masked_string_view_rfind_impl(context, builder, sig, args): context, builder, value=args[1] ) result = string_view_rfind_impl( - context, - builder, - types.boolean(string_view, string_view), - (masked_sv_str.value, masked_sv_substr.value) + context, + builder, + types.boolean(string_view, string_view), + (masked_sv_str.value, masked_sv_substr.value), ) ret.value = result ret.valid = builder.and_(masked_sv_str.valid, masked_sv_substr.valid) return ret._getvalue() -@cuda_lower(operator.contains, MaskedType(string_view), MaskedType(string_view)) + +@cuda_lower( + operator.contains, MaskedType(string_view), MaskedType(string_view) +) def masked_string_view_contains_impl(context, builder, sig, args): ret = cgutils.create_struct_proxy(sig.return_type)(context, builder) masked_sv_ty = sig.args[0] @@ -147,19 +151,18 @@ def masked_string_view_contains_impl(context, builder, sig, args): context, builder, value=args[1] ) result = string_view_contains_impl( - context, - builder, - types.boolean(string_view, string_view), - (masked_sv_str.value, masked_sv_substr.value) + context, + builder, + types.boolean(string_view, string_view), + (masked_sv_str.value, masked_sv_substr.value), ) ret.value = result ret.valid = builder.and_(masked_sv_str.valid, masked_sv_substr.valid) return ret._getvalue() -@cuda_lower( - "MaskedType.isalnum", MaskedType(string_view) -) + +@cuda_lower("MaskedType.isalnum", MaskedType(string_view)) def masked_string_view_isalnum_impl(context, builder, sig, args): ret = cgutils.create_struct_proxy(sig.return_type)(context, builder) masked_sv_ty = sig.args[0] @@ -168,19 +171,18 @@ def masked_string_view_isalnum_impl(context, builder, sig, args): ) result = string_view_isalnum_impl( - context, - builder, - types.boolean(string_view, string_view), - (masked_sv_str.value,) + context, + builder, + types.boolean(string_view, string_view), + (masked_sv_str.value,), ) ret.value = result ret.valid = masked_sv_str.valid return ret._getvalue() -@cuda_lower( - "MaskedType.isalpha", MaskedType(string_view) -) + +@cuda_lower("MaskedType.isalpha", MaskedType(string_view)) def masked_string_view_isalpha_impl(context, builder, sig, args): ret = cgutils.create_struct_proxy(sig.return_type)(context, builder) masked_sv_ty = sig.args[0] @@ -189,19 +191,18 @@ def masked_string_view_isalpha_impl(context, builder, sig, args): ) result = string_view_isalpha_impl( - context, - builder, - types.boolean(string_view, string_view), - (masked_sv_str.value,) + context, + builder, + types.boolean(string_view, string_view), + (masked_sv_str.value,), ) ret.value = result ret.valid = masked_sv_str.valid return ret._getvalue() -@cuda_lower( - "MaskedType.isdigit", MaskedType(string_view) -) + +@cuda_lower("MaskedType.isdigit", MaskedType(string_view)) def masked_string_view_isdigit_impl(context, builder, sig, args): ret = cgutils.create_struct_proxy(sig.return_type)(context, builder) masked_sv_ty = sig.args[0] @@ -210,19 +211,18 @@ def masked_string_view_isdigit_impl(context, builder, sig, args): ) result = string_view_isdigit_impl( - context, - builder, - types.boolean(string_view, string_view), - (masked_sv_str.value,) + context, + builder, + types.boolean(string_view, string_view), + (masked_sv_str.value,), ) ret.value = result ret.valid = masked_sv_str.valid return ret._getvalue() -@cuda_lower( - "MaskedType.isdecimal", MaskedType(string_view) -) + +@cuda_lower("MaskedType.isdecimal", MaskedType(string_view)) def masked_string_view_isdecimal_impl(context, builder, sig, args): ret = cgutils.create_struct_proxy(sig.return_type)(context, builder) masked_sv_ty = sig.args[0] @@ -231,19 +231,18 @@ def masked_string_view_isdecimal_impl(context, builder, sig, args): ) result = string_view_isdecimal_impl( - context, - builder, - types.boolean(string_view, string_view), - (masked_sv_str.value,) + context, + builder, + types.boolean(string_view, string_view), + (masked_sv_str.value,), ) ret.value = result ret.valid = masked_sv_str.valid return ret._getvalue() -@cuda_lower( - "MaskedType.isupper", MaskedType(string_view) -) + +@cuda_lower("MaskedType.isupper", MaskedType(string_view)) def masked_string_view_isupper_impl(context, builder, sig, args): ret = cgutils.create_struct_proxy(sig.return_type)(context, builder) masked_sv_ty = sig.args[0] @@ -252,19 +251,18 @@ def masked_string_view_isupper_impl(context, builder, sig, args): ) result = string_view_isupper_impl( - context, - builder, - types.boolean(string_view, string_view), - (masked_sv_str.value,) + context, + builder, + types.boolean(string_view, string_view), + (masked_sv_str.value,), ) ret.value = result ret.valid = masked_sv_str.valid return ret._getvalue() -@cuda_lower( - "MaskedType.islower", MaskedType(string_view) -) + +@cuda_lower("MaskedType.islower", MaskedType(string_view)) def masked_string_view_islower_impl(context, builder, sig, args): ret = cgutils.create_struct_proxy(sig.return_type)(context, builder) masked_sv_ty = sig.args[0] @@ -273,10 +271,10 @@ def masked_string_view_islower_impl(context, builder, sig, args): ) result = string_view_islower_impl( - context, - builder, - types.boolean(string_view, string_view), - (masked_sv_str.value,) + context, + builder, + types.boolean(string_view, string_view), + (masked_sv_str.value,), ) ret.value = result @@ -284,9 +282,7 @@ def masked_string_view_islower_impl(context, builder, sig, args): return ret._getvalue() -@cuda_lower( - "MaskedType.isspace", MaskedType(string_view) -) +@cuda_lower("MaskedType.isspace", MaskedType(string_view)) def masked_string_view_isspace_impl(context, builder, sig, args): ret = cgutils.create_struct_proxy(sig.return_type)(context, builder) masked_sv_ty = sig.args[0] @@ -295,10 +291,10 @@ def masked_string_view_isspace_impl(context, builder, sig, args): ) result = string_view_isspace_impl( - context, - builder, - types.boolean(string_view, string_view), - (masked_sv_str.value,) + context, + builder, + types.boolean(string_view, string_view), + (masked_sv_str.value,), ) ret.value = result diff --git a/python/cudf/cudf/core/udf/strings_typing.py b/python/cudf/cudf/core/udf/strings_typing.py index f27b8d36622..69236259641 100644 --- a/python/cudf/cudf/core/udf/strings_typing.py +++ b/python/cudf/cudf/core/udf/strings_typing.py @@ -1,22 +1,18 @@ # Copyright (c) 2022, NVIDIA CORPORATION. -#make_attribute_wrapper(StringView, "data", "data") -from numba.core.typing.templates import ( - AbstractTemplate, - AttributeTemplate, -) - import operator -from cudf.core.udf.masked_typing import MaskedType - -from strings_udf._typing import StringView, string_view -from numba.cuda.cudadecl import registry as cuda_decl_registry from numba import types from numba.core.typing import signature as nb_signature +from numba.core.typing.templates import AbstractTemplate, AttributeTemplate +from numba.cuda.cudadecl import registry as cuda_decl_registry + +from strings_udf._typing import StringView, string_view from cudf.core.udf._ops import comparison_ops +from cudf.core.udf.masked_typing import MaskedType + # String functions @cuda_decl_registry.register_global(len) @@ -31,6 +27,7 @@ def generic(self, args, kws): ): return nb_signature(MaskedType(types.int32), args[0]) + @cuda_decl_registry.register_global(operator.contains) class MaskedStringViewContains(AbstractTemplate): """ @@ -38,12 +35,21 @@ class MaskedStringViewContains(AbstractTemplate): """ def generic(self, args, kws): - if (isinstance(args[0], MaskedType) and isinstance( - args[0].value_type, StringView - ) or isinstance(args[0], types.StringLiteral)) and (isinstance(args[1], MaskedType) and isinstance( - args[1].value_type, StringView - ) or isinstance(args[1], types.StringLiteral)): - return nb_signature(MaskedType(types.boolean), MaskedType(string_view), MaskedType(string_view)) + if ( + isinstance(args[0], MaskedType) + and isinstance(args[0].value_type, StringView) + or isinstance(args[0], types.StringLiteral) + ) and ( + isinstance(args[1], MaskedType) + and isinstance(args[1].value_type, StringView) + or isinstance(args[1], types.StringLiteral) + ): + return nb_signature( + MaskedType(types.boolean), + MaskedType(string_view), + MaskedType(string_view), + ) + class MaskedStringViewCmpOp(AbstractTemplate): """ @@ -53,12 +59,21 @@ class MaskedStringViewCmpOp(AbstractTemplate): """ def generic(self, args, kws): - if (isinstance(args[0], MaskedType) and isinstance( - args[0].value_type, StringView - ) or isinstance(args[0], types.StringLiteral)) and (isinstance(args[1], MaskedType) and isinstance( - args[1].value_type, StringView - ) or isinstance(args[1], types.StringLiteral)): - return nb_signature(MaskedType(types.boolean), MaskedType(string_view), MaskedType(string_view)) + if ( + isinstance(args[0], MaskedType) + and isinstance(args[0].value_type, StringView) + or isinstance(args[0], types.StringLiteral) + ) and ( + isinstance(args[1], MaskedType) + and isinstance(args[1].value_type, StringView) + or isinstance(args[1], types.StringLiteral) + ): + return nb_signature( + MaskedType(types.boolean), + MaskedType(string_view), + MaskedType(string_view), + ) + @cuda_decl_registry.register_global(len) class StringLiteralLength(AbstractTemplate): @@ -71,6 +86,7 @@ def generic(self, args, kws): if isinstance(args[0], types.StringLiteral) and len(args) == 1: return nb_signature(types.int32, args[0]) + class MaskedStringViewStartsWith(AbstractTemplate): key = "MaskedType.startswith" @@ -79,6 +95,7 @@ def generic(self, args, kws): MaskedType(types.boolean), MaskedType(string_view), recvr=self.this ) + class MaskedStringViewEndsWith(AbstractTemplate): key = "MaskedType.endswith" @@ -87,6 +104,7 @@ def generic(self, args, kws): MaskedType(types.boolean), MaskedType(string_view), recvr=self.this ) + class MaskedStringViewFind(AbstractTemplate): key = "MaskedType.find" @@ -95,6 +113,7 @@ def generic(self, args, kws): MaskedType(types.int32), MaskedType(string_view), recvr=self.this ) + class MaskedStringViewRFind(AbstractTemplate): key = "MaskedType.rfind" @@ -103,61 +122,54 @@ def generic(self, args, kws): MaskedType(types.int32), MaskedType(string_view), recvr=self.this ) + class MaskedStringViewIsAlnum(AbstractTemplate): key = "MaskedType.isalnum" def generic(self, args, kws): - return nb_signature( - MaskedType(types.boolean), recvr=self.this - ) + return nb_signature(MaskedType(types.boolean), recvr=self.this) + class MaskedStringViewIsAlpha(AbstractTemplate): key = "MaskedType.isalpha" def generic(self, args, kws): - return nb_signature( - MaskedType(types.boolean), recvr=self.this - ) + return nb_signature(MaskedType(types.boolean), recvr=self.this) + class MaskedStringViewIsDecimal(AbstractTemplate): key = "MaskedType.isdecimal" def generic(self, args, kws): - return nb_signature( - MaskedType(types.boolean), recvr=self.this - ) + return nb_signature(MaskedType(types.boolean), recvr=self.this) + class MaskedStringViewIsDigit(AbstractTemplate): key = "MaskedType.isdigit" def generic(self, args, kws): - return nb_signature( - MaskedType(types.boolean), recvr=self.this - ) + return nb_signature(MaskedType(types.boolean), recvr=self.this) + class MaskedStringViewIsLower(AbstractTemplate): key = "MaskedType.islower" def generic(self, args, kws): - return nb_signature( - MaskedType(types.boolean), recvr=self.this - ) + return nb_signature(MaskedType(types.boolean), recvr=self.this) + class MaskedStringViewIsUpper(AbstractTemplate): key = "MaskedType.isupper" def generic(self, args, kws): - return nb_signature( - MaskedType(types.boolean), recvr=self.this - ) + return nb_signature(MaskedType(types.boolean), recvr=self.this) + class MaskedStringViewIsSpace(AbstractTemplate): key = "MaskedType.isspace" def generic(self, args, kws): - return nb_signature( - MaskedType(types.boolean), recvr=self.this - ) + return nb_signature(MaskedType(types.boolean), recvr=self.this) @cuda_decl_registry.register_attr @@ -219,12 +231,12 @@ def resolve_isspace(self, mod): MaskedStringViewIsSpace, MaskedType(string_view) ) - def resolve_value(self, mod): return string_view def resolve_valid(self, mod): return types.boolean + for op in comparison_ops: cuda_decl_registry.register_global(op)(MaskedStringViewCmpOp) diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py index 936c638fcb0..223a26de401 100644 --- a/python/cudf/cudf/core/udf/utils.py +++ b/python/cudf/cudf/core/udf/utils.py @@ -3,34 +3,36 @@ from typing import Callable import cachetools +import cupy as cp import numpy as np from numba import cuda, typeof from numba.core.errors import TypingError from numba.np import numpy_support -from numba.types import Poison, Tuple, boolean, int64, void, CPointer +from numba.types import CPointer, Poison, Tuple, boolean, int64, void -import cupy as cp +from strings_udf import ptxpath +from strings_udf._typing import str_view_arg_handler, string_view + +from cudf.api.types import is_string_dtype +from cudf.core.column.column import as_column from cudf.core.dtypes import CategoricalDtype from cudf.core.udf.masked_typing import MaskedType -from cudf.core.column.column import as_column from cudf.utils import cudautils from cudf.utils.dtypes import ( BOOL_TYPES, DATETIME_TYPES, NUMERIC_TYPES, - TIMEDELTA_TYPES, STRING_TYPES, + TIMEDELTA_TYPES, ) -from cudf.api.types import is_string_dtype -from strings_udf._typing import str_view_arg_handler, string_view -from strings_udf import ptxpath - from cudf.utils.utils import _cudf_nvtx_annotate -import rmm - JIT_SUPPORTED_TYPES = ( - NUMERIC_TYPES | BOOL_TYPES | DATETIME_TYPES | TIMEDELTA_TYPES | STRING_TYPES + NUMERIC_TYPES + | BOOL_TYPES + | DATETIME_TYPES + | TIMEDELTA_TYPES + | STRING_TYPES ) libcudf_bitmask_type = numpy_support.from_dtype(np.dtype("int32")) MASK_BITSIZE = np.dtype("int32").itemsize * 8 @@ -134,12 +136,7 @@ def _masked_array_type_from_col(col): if col.mask is None: return col_type else: - return Tuple( - ( - col_type, - libcudf_bitmask_type[::1] - ) - ) + return Tuple((col_type, libcudf_bitmask_type[::1])) def _construct_signature(frame, return_type, args): @@ -231,22 +228,31 @@ def _get_kernel(kernel_string, globals_, sig, func): globals_["f_"] = f_ exec(kernel_string, globals_) _kernel = globals_["_kernel"] - kernel = cuda.jit(sig, link=[ptxpath], extensions=[str_view_arg_handler])(_kernel) + kernel = cuda.jit(sig, link=[ptxpath], extensions=[str_view_arg_handler])( + _kernel + ) return kernel + def _launch_arg_from_col(col): from strings_udf._lib.cudf_jit_udf import to_string_view_array - data = col.data if not is_string_dtype(col.dtype) else to_string_view_array(col) + data = ( + col.data + if not is_string_dtype(col.dtype) + else to_string_view_array(col) + ) mask = col.mask if mask is None: return data else: return data, mask + def _return_col_from_dtype(dt, size): return cp.empty(size, dtype=dt) + def _post_process_output_col(col, retty): return as_column(col, retty) diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index b599114202c..2650c989ecd 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -786,6 +786,7 @@ def func(row): run_masked_udf_test(func, data, check_dtype=False) + @pytest.mark.parametrize( "data", [ @@ -808,7 +809,7 @@ def test_string_udf_contains(data, substr): data = cudf.DataFrame(data) def func(row): - st = row['str_col'] + st = row["str_col"] return substr in st run_masked_udf_test(func, data, check_dtype=False) @@ -836,11 +837,12 @@ def test_string_udf_cmpops(data, other, cmpop): data = cudf.DataFrame(data) def func(row): - st = row['str_col'] + st = row["str_col"] return cmpop(st, other) run_masked_udf_test(func, data, check_dtype=False) + @pytest.mark.parametrize( "data", [ @@ -861,11 +863,12 @@ def test_string_udf_isalnum(data): data = cudf.DataFrame(data) def func(row): - st = row['str_col'] + st = row["str_col"] return st.isalnum() run_masked_udf_test(func, data, check_dtype=False) + @pytest.mark.parametrize( "data", [ @@ -886,11 +889,12 @@ def test_string_udf_isalpha(data): data = cudf.DataFrame(data) def func(row): - st = row['str_col'] + st = row["str_col"] return st.isalpha() run_masked_udf_test(func, data, check_dtype=False) + @pytest.mark.parametrize( "data", [ @@ -911,11 +915,12 @@ def test_string_udf_isdigit(data): data = cudf.DataFrame(data) def func(row): - st = row['str_col'] + st = row["str_col"] return st.isdigit() run_masked_udf_test(func, data, check_dtype=False) + @pytest.mark.parametrize( "data", [ @@ -930,8 +935,7 @@ def func(row): "@2a", "12.34", "0.123", - ".123" - ".12abc" + ".123" ".12abc", ] } ], @@ -940,7 +944,7 @@ def test_string_udf_isdecimal(data): data = cudf.DataFrame(data) def func(row): - st = row['str_col'] + st = row["str_col"] return st.isdecimal() run_masked_udf_test(func, data, check_dtype=False) @@ -959,7 +963,7 @@ def func(row): "12 ab", "@2a", "12.34", - "ABC DEF" + "ABC DEF", ] } ], @@ -968,7 +972,7 @@ def test_string_udf_isupper(data): data = cudf.DataFrame(data) def func(row): - st = row['str_col'] + st = row["str_col"] return st.isupper() run_masked_udf_test(func, data, check_dtype=False) @@ -987,7 +991,7 @@ def func(row): "12 ab", "@2a", "12.34", - "abc def" + "abc def", ] } ], @@ -996,7 +1000,7 @@ def test_string_udf_islower(data): data = cudf.DataFrame(data) def func(row): - st = row['str_col'] + st = row["str_col"] return st.islower() run_masked_udf_test(func, data, check_dtype=False) @@ -1021,7 +1025,7 @@ def test_string_udf_isspace(data): data = cudf.DataFrame(data) def func(row): - st = row['str_col'] + st = row["str_col"] return st.isspace() run_masked_udf_test(func, data, check_dtype=False) diff --git a/python/cudf/setup.cfg b/python/cudf/setup.cfg index 1f7cfeb49ae..8a648097ac8 100644 --- a/python/cudf/setup.cfg +++ b/python/cudf/setup.cfg @@ -25,6 +25,7 @@ known_dask= dask_cuda known_rapids= rmm + strings_udf known_first_party= cudf default_section=THIRDPARTY @@ -41,4 +42,4 @@ skip= buck-out build dist - __init__.py \ No newline at end of file + __init__.py diff --git a/python/strings_udf/cpp/tests/run_tests.sh b/python/strings_udf/cpp/tests/run_tests.sh index ab8ebaca6c1..4af1217c58a 100755 --- a/python/strings_udf/cpp/tests/run_tests.sh +++ b/python/strings_udf/cpp/tests/run_tests.sh @@ -1,5 +1,7 @@ #!/bin/bash +# Copyright (c) 2022, NVIDIA CORPORATION. + ./udf_cli -u ../tests/ctors.udf -t ../tests/done.txt ./udf_cli -u ../tests/append.udf -t ../tests/done.txt ./udf_cli -u ../tests/insert.udf -t ../tests/done.txt diff --git a/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd b/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd index 4b3c6de4b2d..35ed54bd4c3 100644 --- a/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd +++ b/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd @@ -1,24 +1,25 @@ # Copyright (c) 2021-2022, NVIDIA CORPORATION. -from libcpp.vector cimport vector -from libcpp.string cimport string -from libcpp.memory cimport unique_ptr from libc.stdint cimport uint8_t +from libcpp.memory cimport unique_ptr +from libcpp.string cimport string +from libcpp.vector cimport vector + +from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer -from cudf._lib.cpp.types cimport size_type from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.column.column_view cimport column_view +from cudf._lib.cpp.types cimport size_type -from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer -# cdef extern from "cudf/strings/udf/udf_apis.hpp": cdef cppclass udf_module - # cdef unique_ptr[udf_module] create_udf_module(string, vector[string]) - cdef unique_ptr[column] call_udf(udf_module, string, size_type, vector[column_view]) + cdef unique_ptr[column] call_udf( + udf_module, string, size_type, vector[column_view]) cdef unique_ptr[device_buffer] to_string_view_array(column_view) cdef unique_ptr[column] from_dstring_array(void*, size_t) -cdef extern from "cudf/strings/detail/char_tables.hpp" namespace "cudf::strings::detail": +cdef extern from "cudf/strings/detail/char_tables.hpp" namespace \ + "cudf::strings::detail": cdef const uint8_t* get_character_flags_table() except + diff --git a/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx b/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx index a721fa9d93d..cb0a8424a13 100644 --- a/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx +++ b/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx @@ -5,20 +5,19 @@ import os -from libcpp.string cimport string -from libcpp.vector cimport vector from libcpp.memory cimport unique_ptr +from libcpp.string cimport string from libcpp.utility cimport move +from libcpp.vector cimport vector from cudf.core.buffer import Buffer +from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer + +from cudf._lib.column cimport Column from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.cpp.types cimport size_type -from cudf._lib.column cimport Column - -from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer - from strings_udf._lib.cpp.strings_udf cimport ( call_udf as cpp_call_udf, create_udf_module as cpp_create_udf_module, @@ -29,7 +28,8 @@ from strings_udf._lib.cpp.strings_udf cimport ( import numpy as np -def process_udf( udf, name, cols ): + +def process_udf(udf, name, cols): cdef string c_udf cdef string c_name cdef size_type c_size @@ -52,11 +52,11 @@ def process_udf( udf, name, cols ): include_path = "-I" + os.environ.get("CONDA_PREFIX") + "/include" c_options.push_back(str(include_path).encode('UTF-8')) - #with nogil: + # with nogil: c_module = move(cpp_create_udf_module(c_udf, c_options)) # c_module will be nullptr if there is a compile error - #with nogil: + # with nogil: c_result = move(cpp_call_udf(c_module.get()[0], c_name, c_size, c_columns)) return Column.from_unique_ptr(move(c_result)) @@ -65,7 +65,7 @@ def process_udf( udf, name, cols ): def to_string_view_array(Column strings_col): cdef unique_ptr[device_buffer] c_buffer - #with nogil: + # with nogil: c_buffer = move(cpp_to_string_view_array(strings_col.view())) buffer = DeviceBuffer.c_from_unique_ptr(move(c_buffer)) @@ -79,7 +79,7 @@ def from_dstring_array(DeviceBuffer d_buffer): cdef unique_ptr[column] c_result # data = - #with nogil: + # with nogil: c_result = move(cpp_from_dstring_array(data, size)) return Column.from_unique_ptr(move(c_result)) diff --git a/python/strings_udf/strings_udf/_lib/tables.pyx b/python/strings_udf/strings_udf/_lib/tables.pyx index f78faeef1ea..b24aa19155d 100644 --- a/python/strings_udf/strings_udf/_lib/tables.pyx +++ b/python/strings_udf/strings_udf/_lib/tables.pyx @@ -3,8 +3,12 @@ # distutils: language = c++ # cython: c_string_type=unicode, c_string_encoding=utf8 -from strings_udf._lib.cpp.strings_udf cimport get_character_flags_table as cpp_get_character_flags_table -from libc.stdint cimport uintptr_t, uint8_t +from libc.stdint cimport uint8_t, uintptr_t + +from strings_udf._lib.cpp.strings_udf cimport ( + get_character_flags_table as cpp_get_character_flags_table, +) + import numpy as np diff --git a/python/strings_udf/strings_udf/_typing.py b/python/strings_udf/strings_udf/_typing.py index 38fc30b41b9..f4ab69cf880 100644 --- a/python/strings_udf/strings_udf/_typing.py +++ b/python/strings_udf/strings_udf/_typing.py @@ -1,18 +1,15 @@ # Copyright (c) 2022, NVIDIA CORPORATION. +import operator + +import llvmlite.binding as ll from numba import cuda, types +from numba.core.datamodel import default_manager from numba.core.extending import models, register_model from numba.core.typing import signature as nb_signature from numba.core.typing.templates import AbstractTemplate, AttributeTemplate from numba.cuda.cudadecl import registry as cuda_decl_registry - -import operator - -import llvmlite.binding as ll from numba.cuda.cudadrv import nvvm -from numba.core.datamodel import default_manager - -import operator data_layout = nvvm.data_layout @@ -21,6 +18,7 @@ data_layout = data_layout[64] target_data = ll.create_target_data(data_layout) + # String object definitions class DString(types.Type): def __init__(self): @@ -121,7 +119,10 @@ def generic(self, args, kws): return nb_signature(types.int32, args[0]) -_string_view_len = cuda.declare_device("len", types.int32(types.CPointer(string_view))) +_string_view_len = cuda.declare_device( + "len", types.int32(types.CPointer(string_view)) +) + @cuda_decl_registry.register_global(operator.contains) class StringViewContains(AbstractTemplate): @@ -137,9 +138,11 @@ def generic(self, args, kws): _string_view_contains = cuda.declare_device( - "contains", types.boolean(types.CPointer(string_view), types.CPointer(string_view)) + "contains", + types.boolean(types.CPointer(string_view), types.CPointer(string_view)), ) + @cuda_decl_registry.register_global(operator.eq) class StringViewEq(AbstractTemplate): """ @@ -156,7 +159,8 @@ def generic(self, args, kws): _string_view_eq = cuda.declare_device( - "eq", types.boolean(types.CPointer(string_view), types.CPointer(string_view)) + "eq", + types.boolean(types.CPointer(string_view), types.CPointer(string_view)), ) @@ -176,7 +180,8 @@ def generic(self, args, kws): _string_view_ne = cuda.declare_device( - "ne", types.boolean(types.CPointer(string_view), types.CPointer(string_view)) + "ne", + types.boolean(types.CPointer(string_view), types.CPointer(string_view)), ) @@ -196,7 +201,8 @@ def generic(self, args, kws): _string_view_ge = cuda.declare_device( - "ge", types.boolean(types.CPointer(string_view), types.CPointer(string_view)) + "ge", + types.boolean(types.CPointer(string_view), types.CPointer(string_view)), ) @@ -216,7 +222,8 @@ def generic(self, args, kws): _string_view_le = cuda.declare_device( - "le", types.boolean(types.CPointer(string_view), types.CPointer(string_view)) + "le", + types.boolean(types.CPointer(string_view), types.CPointer(string_view)), ) @@ -236,7 +243,8 @@ def generic(self, args, kws): _string_view_gt = cuda.declare_device( - "gt", types.boolean(types.CPointer(string_view), types.CPointer(string_view)) + "gt", + types.boolean(types.CPointer(string_view), types.CPointer(string_view)), ) @@ -256,117 +264,100 @@ def generic(self, args, kws): _string_view_lt = cuda.declare_device( - "lt", types.boolean(types.CPointer(string_view), types.CPointer(string_view)) + "lt", + types.boolean(types.CPointer(string_view), types.CPointer(string_view)), ) + class StringViewStartsWith(AbstractTemplate): key = "StringView.startswith" def generic(self, args, kws): - return nb_signature( - types.boolean, string_view, recvr=self.this - ) + return nb_signature(types.boolean, string_view, recvr=self.this) + class StringViewEndsWith(AbstractTemplate): key = "StringView.endswith" def generic(self, args, kws): - return nb_signature( - types.boolean, string_view, recvr=self.this - ) + return nb_signature(types.boolean, string_view, recvr=self.this) + class StringViewFind(AbstractTemplate): key = "StringView.find" def generic(self, args, kws): - return nb_signature( - types.int32, string_view, recvr=self.this - ) + return nb_signature(types.int32, string_view, recvr=self.this) + class StringViewRFind(AbstractTemplate): key = "StringView.rfind" def generic(self, args, kws): - return nb_signature( - types.int32, string_view, recvr=self.this - ) - + return nb_signature(types.int32, string_view, recvr=self.this) + + class StringViewIsAlnum(AbstractTemplate): key = "StringView.isalnum" def generic(self, args, kws): - return nb_signature( - types.boolean, recvr=self.this - ) + return nb_signature(types.boolean, recvr=self.this) + - class StringViewIsAlpha(AbstractTemplate): key = "StringView.isalpha" def generic(self, args, kws): - return nb_signature( - types.boolean, recvr=self.this - ) + return nb_signature(types.boolean, recvr=self.this) + - class StringViewIsDecimal(AbstractTemplate): key = "StringView.isdecimal" def generic(self, args, kws): - return nb_signature( - types.boolean, recvr=self.this - ) + return nb_signature(types.boolean, recvr=self.this) + - class StringViewIsDigit(AbstractTemplate): key = "StringView.isdigit" def generic(self, args, kws): - return nb_signature( - types.boolean, recvr=self.this - ) + return nb_signature(types.boolean, recvr=self.this) + class StringViewIsNumeric(AbstractTemplate): key = "StringView.isnumeric" def generic(self, args, kws): - return nb_signature( - types.boolean, recvr=self.this - ) + return nb_signature(types.boolean, recvr=self.this) + class StringViewIsUpper(AbstractTemplate): key = "StringView.isupper" def generic(self, args, kws): - return nb_signature( - types.boolean, recvr=self.this - ) + return nb_signature(types.boolean, recvr=self.this) + - class StringViewIsLower(AbstractTemplate): key = "StringView.islower" def generic(self, args, kws): - return nb_signature( - types.boolean, recvr=self.this - ) + return nb_signature(types.boolean, recvr=self.this) + - class StringViewIsSpace(AbstractTemplate): key = "StringView.isspace" def generic(self, args, kws): - return nb_signature( - types.boolean, recvr=self.this - ) + return nb_signature(types.boolean, recvr=self.this) + class StringViewCount(AbstractTemplate): key = "StringView.count" def generic(self, args, kws): - return nb_signature( - types.int32, string_view, recvr=self.this - ) + return nb_signature(types.int32, string_view, recvr=self.this) @cuda_decl_registry.register_attr @@ -374,69 +365,44 @@ class StringViewAttrs(AttributeTemplate): key = string_view def resolve_startswith(self, mod): - return types.BoundFunction( - StringViewStartsWith, string_view - ) + return types.BoundFunction(StringViewStartsWith, string_view) def resolve_endswith(self, mod): - return types.BoundFunction( - StringViewEndsWith, string_view - ) + return types.BoundFunction(StringViewEndsWith, string_view) def resolve_find(self, mod): - return types.BoundFunction( - StringViewFind, string_view - ) + return types.BoundFunction(StringViewFind, string_view) def resolve_rfind(self, mod): - return types.BoundFunction( - StringViewRFind, string_view - ) + return types.BoundFunction(StringViewRFind, string_view) def resolve_isalnum(self, mod): - return types.BoundFunction( - StringViewIsAlnum, string_view - ) + return types.BoundFunction(StringViewIsAlnum, string_view) def resolve_isalpha(self, mod): - return types.BoundFunction( - StringViewIsAlpha, string_view - ) + return types.BoundFunction(StringViewIsAlpha, string_view) def resolve_isdecimal(self, mod): - return types.BoundFunction( - StringViewIsDecimal, string_view - ) + return types.BoundFunction(StringViewIsDecimal, string_view) def resolve_isdigit(self, mod): - return types.BoundFunction( - StringViewIsDigit, string_view - ) + return types.BoundFunction(StringViewIsDigit, string_view) def resolve_isnumeric(self, mod): - return types.BoundFunction( - StringViewIsNumeric, string_view - ) + return types.BoundFunction(StringViewIsNumeric, string_view) def resolve_islower(self, mod): - return types.BoundFunction( - StringViewIsLower, string_view - ) + return types.BoundFunction(StringViewIsLower, string_view) def resolve_isupper(self, mod): - return types.BoundFunction( - StringViewIsUpper, string_view - ) + return types.BoundFunction(StringViewIsUpper, string_view) def resolve_isspace(self, mod): - return types.BoundFunction( - StringViewIsSpace, string_view - ) + return types.BoundFunction(StringViewIsSpace, string_view) def resolve_count(self, mod): - return types.BoundFunction( - StringViewCount, string_view - ) + return types.BoundFunction(StringViewCount, string_view) + _string_view_startswith = cuda.declare_device( "startswith", @@ -444,61 +410,54 @@ def resolve_count(self, mod): ) _string_view_endswith = cuda.declare_device( - "endswith", types.boolean(types.CPointer(string_view), types.CPointer(string_view)) + "endswith", + types.boolean(types.CPointer(string_view), types.CPointer(string_view)), ) _string_view_find = cuda.declare_device( "find", - types.int32(types.CPointer(string_view), types.CPointer(string_view)) + types.int32(types.CPointer(string_view), types.CPointer(string_view)), ) _string_view_rfind = cuda.declare_device( "rfind", - types.int32(types.CPointer(string_view), types.CPointer(string_view)) + types.int32(types.CPointer(string_view), types.CPointer(string_view)), ) _string_view_isdigit = cuda.declare_device( - "pyisdigit", - types.boolean(types.CPointer(string_view), types.int64) + "pyisdigit", types.boolean(types.CPointer(string_view), types.int64) ) _string_view_isalnum = cuda.declare_device( - "pyisalnum", - types.boolean(types.CPointer(string_view), types.int64) + "pyisalnum", types.boolean(types.CPointer(string_view), types.int64) ) _string_view_isalpha = cuda.declare_device( - "pyisalpha", - types.boolean(types.CPointer(string_view), types.int64) + "pyisalpha", types.boolean(types.CPointer(string_view), types.int64) ) _string_view_isdecimal = cuda.declare_device( - "pyisdecimal", - types.boolean(types.CPointer(string_view), types.int64) + "pyisdecimal", types.boolean(types.CPointer(string_view), types.int64) ) _string_view_isnumeric = cuda.declare_device( - "pyisnumeric", - types.boolean(types.CPointer(string_view), types.int64) + "pyisnumeric", types.boolean(types.CPointer(string_view), types.int64) ) _string_view_isspace = cuda.declare_device( - "pyisspace", - types.boolean(types.CPointer(string_view), types.int64) + "pyisspace", types.boolean(types.CPointer(string_view), types.int64) ) _string_view_isupper = cuda.declare_device( - "pyisupper", - types.boolean(types.CPointer(string_view), types.int64) + "pyisupper", types.boolean(types.CPointer(string_view), types.int64) ) _string_view_islower = cuda.declare_device( - "pyislower", - types.boolean(types.CPointer(string_view), types.int64) + "pyislower", types.boolean(types.CPointer(string_view), types.int64) ) _string_view_count = cuda.declare_device( "pycount", - types.int32(types.CPointer(string_view), types.CPointer(string_view)) + types.int32(types.CPointer(string_view), types.CPointer(string_view)), ) diff --git a/python/strings_udf/strings_udf/lowering.py b/python/strings_udf/strings_udf/lowering.py index 3f82f48fb15..c3f0c35f5d7 100644 --- a/python/strings_udf/strings_udf/lowering.py +++ b/python/strings_udf/strings_udf/lowering.py @@ -1,43 +1,45 @@ # Copyright (c) 2022, NVIDIA CORPORATION. +import operator + from numba import types from numba.core import cgutils from numba.core.typing import signature as nb_signature from numba.cuda.cudadrv import nvvm -from numba.cuda.cudaimpl import lower as cuda_lower -from numba.cuda.cudaimpl import registry as cuda_lowering_registry +from numba.cuda.cudaimpl import ( + lower as cuda_lower, + registry as cuda_lowering_registry, +) + +from strings_udf._lib.tables import get_character_flags_table_ptr from strings_udf._typing import ( - StringView, - string_view, - _string_view_endswith, - _string_view_len, - _string_view_startswith, _string_view_contains, + _string_view_count, + _string_view_endswith, _string_view_eq, - _string_view_ne, + _string_view_find, _string_view_ge, - _string_view_le, _string_view_gt, - _string_view_lt, - _string_view_find, - _string_view_rfind, - _string_view_isdigit, _string_view_isalnum, _string_view_isalpha, - _string_view_isnumeric, _string_view_isdecimal, + _string_view_isdigit, + _string_view_islower, + _string_view_isnumeric, _string_view_isspace, _string_view_isupper, - _string_view_islower, - _string_view_count, + _string_view_le, + _string_view_len, + _string_view_lt, + _string_view_ne, + _string_view_rfind, + _string_view_startswith, + string_view, ) -import operator - -from strings_udf._lib.tables import get_character_flags_table_ptr - character_flags_table_ptr = get_character_flags_table_ptr() + # String function implementations def call_len_string_view(st): return _string_view_len(st) @@ -72,7 +74,9 @@ def string_view_contains_impl(context, builder, sig, args): builder, call_string_view_contains, nb_signature( - types.boolean, types.CPointer(string_view), types.CPointer(string_view) + types.boolean, + types.CPointer(string_view), + types.CPointer(string_view), ), (sv_ptr, substr_ptr), ) @@ -96,7 +100,9 @@ def string_view_eq_impl(context, builder, sig, args): builder, call_string_view_eq, nb_signature( - types.boolean, types.CPointer(string_view), types.CPointer(string_view) + types.boolean, + types.CPointer(string_view), + types.CPointer(string_view), ), (sv_ptr, rhs_ptr), ) @@ -120,7 +126,9 @@ def string_view_ne_impl(context, builder, sig, args): builder, call_string_view_ne, nb_signature( - types.boolean, types.CPointer(string_view), types.CPointer(string_view) + types.boolean, + types.CPointer(string_view), + types.CPointer(string_view), ), (sv_ptr, rhs_ptr), ) @@ -144,7 +152,9 @@ def string_view_ge_impl(context, builder, sig, args): builder, call_string_view_ge, nb_signature( - types.boolean, types.CPointer(string_view), types.CPointer(string_view) + types.boolean, + types.CPointer(string_view), + types.CPointer(string_view), ), (sv_ptr, rhs_ptr), ) @@ -168,7 +178,9 @@ def string_view_le_impl(context, builder, sig, args): builder, call_string_view_le, nb_signature( - types.boolean, types.CPointer(string_view), types.CPointer(string_view) + types.boolean, + types.CPointer(string_view), + types.CPointer(string_view), ), (sv_ptr, rhs_ptr), ) @@ -192,7 +204,9 @@ def string_view_gt_impl(context, builder, sig, args): builder, call_string_view_gt, nb_signature( - types.boolean, types.CPointer(string_view), types.CPointer(string_view) + types.boolean, + types.CPointer(string_view), + types.CPointer(string_view), ), (sv_ptr, rhs_ptr), ) @@ -216,7 +230,9 @@ def string_view_lt_impl(context, builder, sig, args): builder, call_string_view_lt, nb_signature( - types.boolean, types.CPointer(string_view), types.CPointer(string_view) + types.boolean, + types.CPointer(string_view), + types.CPointer(string_view), ), (sv_ptr, rhs_ptr), ) @@ -225,10 +241,10 @@ def string_view_lt_impl(context, builder, sig, args): # read-only functions -# We will provide only one overload for this set of functions, which will expect a -# string_view. When a literal is encountered, numba will promote it to a string_view -# whereas when a dstring is encountered, numba will convert it to a view via its native -# view() method +# We will provide only one overload for this set of functions, which will +# expect a string_view. When a literal is encountered, numba will promote it to +# a string_view whereas when a dstring is encountered, numba will convert it to +# a view via its native view() method. # casts @cuda_lowering_registry.lower_cast(types.StringLiteral, string_view) @@ -241,7 +257,9 @@ def cast_string_literal_to_string_view(context, builder, fromty, toty, val): # set the empty strview data pointer to point to the literal value s = context.insert_const_string(builder.module, fromty.literal_value) - sv.data = context.insert_addrspace_conv(builder, s, nvvm.ADDRSPACE_CONSTANT) + sv.data = context.insert_addrspace_conv( + builder, s, nvvm.ADDRSPACE_CONSTANT + ) sv.length = context.get_constant(types.int32, len(fromty.literal_value)) sv.bytes = context.get_constant( types.int32, len(fromty.literal_value.encode("UTF-8")) @@ -256,7 +274,9 @@ def call_string_view_startswith(sv, substr): @cuda_lower("StringView.startswith", string_view, string_view) def string_view_startswith_impl(context, builder, sig, args): - sv_ptr, substr_ptr = builder.alloca(args[0].type), builder.alloca(args[1].type) + sv_ptr, substr_ptr = builder.alloca(args[0].type), builder.alloca( + args[1].type + ) builder.store(args[0], sv_ptr) builder.store(args[1], substr_ptr) @@ -265,7 +285,9 @@ def string_view_startswith_impl(context, builder, sig, args): builder, call_string_view_startswith, nb_signature( - types.boolean, types.CPointer(string_view), types.CPointer(string_view) + types.boolean, + types.CPointer(string_view), + types.CPointer(string_view), ), (sv_ptr, substr_ptr), ) @@ -279,7 +301,9 @@ def call_string_view_endswith(sv, substr): @cuda_lower("StringView.endswith", string_view, string_view) def string_view_endswith_impl(context, builder, sig, args): - sv_ptr, substr_ptr = builder.alloca(args[0].type), builder.alloca(args[1].type) + sv_ptr, substr_ptr = builder.alloca(args[0].type), builder.alloca( + args[1].type + ) builder.store(args[0], sv_ptr) builder.store(args[1], substr_ptr) @@ -288,7 +312,9 @@ def string_view_endswith_impl(context, builder, sig, args): builder, call_string_view_endswith, nb_signature( - types.boolean, types.CPointer(string_view), types.CPointer(string_view) + types.boolean, + types.CPointer(string_view), + types.CPointer(string_view), ), (sv_ptr, substr_ptr), ) @@ -299,9 +325,12 @@ def string_view_endswith_impl(context, builder, sig, args): def call_string_view_count(st, substr): return _string_view_count(st, substr) + @cuda_lower("StringView.count", string_view, string_view) def string_view_coount_impl(context, builder, sig, args): - sv_ptr, substr_ptr = builder.alloca(args[0].type), builder.alloca(args[1].type) + sv_ptr, substr_ptr = builder.alloca(args[0].type), builder.alloca( + args[1].type + ) builder.store(args[0], sv_ptr) builder.store(args[1], substr_ptr) @@ -310,7 +339,9 @@ def string_view_coount_impl(context, builder, sig, args): builder, call_string_view_count, nb_signature( - types.int32, types.CPointer(string_view), types.CPointer(string_view) + types.int32, + types.CPointer(string_view), + types.CPointer(string_view), ), (sv_ptr, substr_ptr), ) @@ -321,9 +352,12 @@ def string_view_coount_impl(context, builder, sig, args): def call_string_view_find(sv, substr): return _string_view_find(sv, substr) + @cuda_lower("StringView.find", string_view, string_view) def string_view_find_impl(context, builder, sig, args): - sv_ptr, substr_ptr = builder.alloca(args[0].type), builder.alloca(args[1].type) + sv_ptr, substr_ptr = builder.alloca(args[0].type), builder.alloca( + args[1].type + ) builder.store(args[0], sv_ptr) builder.store(args[1], substr_ptr) @@ -332,19 +366,25 @@ def string_view_find_impl(context, builder, sig, args): builder, call_string_view_find, nb_signature( - types.int32, types.CPointer(string_view), types.CPointer(string_view) + types.int32, + types.CPointer(string_view), + types.CPointer(string_view), ), (sv_ptr, substr_ptr), ) return result + def call_string_view_rfind(sv, substr): return _string_view_rfind(sv, substr) + @cuda_lower("StringView.rfind", string_view, string_view) def string_view_rfind_impl(context, builder, sig, args): - sv_ptr, substr_ptr = builder.alloca(args[0].type), builder.alloca(args[1].type) + sv_ptr, substr_ptr = builder.alloca(args[0].type), builder.alloca( + args[1].type + ) builder.store(args[0], sv_ptr) builder.store(args[1], substr_ptr) @@ -353,13 +393,16 @@ def string_view_rfind_impl(context, builder, sig, args): builder, call_string_view_rfind, nb_signature( - types.int32, types.CPointer(string_view), types.CPointer(string_view) + types.int32, + types.CPointer(string_view), + types.CPointer(string_view), ), (sv_ptr, substr_ptr), ) return result + def call_string_view_isdigit(st, tbl): return _string_view_isdigit(st, tbl) @@ -373,14 +416,13 @@ def string_view_isdigit_impl(context, builder, sig, args): result = context.compile_internal( builder, call_string_view_isdigit, - nb_signature( - types.boolean, types.CPointer(string_view), types.int64 - ), + nb_signature(types.boolean, types.CPointer(string_view), types.int64), (sv_ptr, tbl_ptr), ) return result + def call_string_view_isalnum(st, tbl): return _string_view_isalnum(st, tbl) @@ -394,14 +436,13 @@ def string_view_isalnum_impl(context, builder, sig, args): result = context.compile_internal( builder, call_string_view_isalnum, - nb_signature( - types.boolean, types.CPointer(string_view), types.int64 - ), + nb_signature(types.boolean, types.CPointer(string_view), types.int64), (sv_ptr, tbl_ptr), ) return result + def call_string_view_isalpha(st, tbl): return _string_view_isalpha(st, tbl) @@ -415,14 +456,13 @@ def string_view_isalpha_impl(context, builder, sig, args): result = context.compile_internal( builder, call_string_view_isalpha, - nb_signature( - types.boolean, types.CPointer(string_view), types.int64 - ), + nb_signature(types.boolean, types.CPointer(string_view), types.int64), (sv_ptr, tbl_ptr), ) return result + def call_string_view_isnumeric(st, tbl): return _string_view_isnumeric(st, tbl) @@ -436,14 +476,13 @@ def string_view_isnumeric_impl(context, builder, sig, args): result = context.compile_internal( builder, call_string_view_isnumeric, - nb_signature( - types.boolean, types.CPointer(string_view), types.int64 - ), + nb_signature(types.boolean, types.CPointer(string_view), types.int64), (sv_ptr, tbl_ptr), ) return result + def call_string_view_isdecimal(st, tbl): return _string_view_isdecimal(st, tbl) @@ -457,14 +496,13 @@ def string_view_isdecimal_impl(context, builder, sig, args): result = context.compile_internal( builder, call_string_view_isdecimal, - nb_signature( - types.boolean, types.CPointer(string_view), types.int64 - ), + nb_signature(types.boolean, types.CPointer(string_view), types.int64), (sv_ptr, tbl_ptr), ) return result + def call_string_view_isspace(st, tbl): return _string_view_isspace(st, tbl) @@ -478,14 +516,13 @@ def string_view_isspace_impl(context, builder, sig, args): result = context.compile_internal( builder, call_string_view_isspace, - nb_signature( - types.boolean, types.CPointer(string_view), types.int64 - ), + nb_signature(types.boolean, types.CPointer(string_view), types.int64), (sv_ptr, tbl_ptr), ) return result + def call_string_view_isupper(st, tbl): return _string_view_isupper(st, tbl) @@ -499,9 +536,7 @@ def string_view_isupper_impl(context, builder, sig, args): result = context.compile_internal( builder, call_string_view_isupper, - nb_signature( - types.boolean, types.CPointer(string_view), types.int64 - ), + nb_signature(types.boolean, types.CPointer(string_view), types.int64), (sv_ptr, tbl_ptr), ) @@ -521,26 +556,7 @@ def string_view_islower_impl(context, builder, sig, args): result = context.compile_internal( builder, call_string_view_islower, - nb_signature( - types.boolean, types.CPointer(string_view), types.int64 - ), - (sv_ptr, tbl_ptr), - ) - - return result - -@cuda_lower("StringView.isnumeric", string_view) -def string_view_isnumeric_impl(context, builder, sig, args): - sv_ptr = builder.alloca(args[0].type) - builder.store(args[0], sv_ptr) - tbl_ptr = context.get_constant(types.int64, character_flags_table_ptr) - - result = context.compile_internal( - builder, - call_string_view_isnumeric, - nb_signature( - types.boolean, types.CPointer(string_view), types.int64 - ), + nb_signature(types.boolean, types.CPointer(string_view), types.int64), (sv_ptr, tbl_ptr), ) diff --git a/python/strings_udf/strings_udf/tests/test_cmpops.py b/python/strings_udf/strings_udf/tests/test_cmpops.py index 95e4a9cc66a..b82adb5fbe8 100644 --- a/python/strings_udf/strings_udf/tests/test_cmpops.py +++ b/python/strings_udf/strings_udf/tests/test_cmpops.py @@ -1,11 +1,13 @@ # Copyright (c) 2022, NVIDIA CORPORATION. -from .utils import run_udf_test import pytest -import strings_udf.lowering + +from .utils import run_udf_test -@pytest.mark.parametrize("data", [["cudf", "rapids", "AI", "gpu", "2022", "cuda"]]) +@pytest.mark.parametrize( + "data", [["cudf", "rapids", "AI", "gpu", "2022", "cuda"]] +) @pytest.mark.parametrize("rhs", ["cudf", "cuda", "gpucudf", "abc"]) def test_string_udf_eq(data, rhs): # tests the `==` operator in string udfs @@ -16,7 +18,9 @@ def func(st): run_udf_test(data, func, "bool") -@pytest.mark.parametrize("data", [["cudf", "rapids", "AI", "gpu", "2022", "cuda"]]) +@pytest.mark.parametrize( + "data", [["cudf", "rapids", "AI", "gpu", "2022", "cuda"]] +) @pytest.mark.parametrize("rhs", ["cudf", "cuda", "gpucudf", "abc"]) def test_string_udf_ne(data, rhs): # tests the `!=` operator in string udfs @@ -27,7 +31,9 @@ def func(st): run_udf_test(data, func, "bool") -@pytest.mark.parametrize("data", [["cudf", "rapids", "AI", "gpu", "2022", "cuda"]]) +@pytest.mark.parametrize( + "data", [["cudf", "rapids", "AI", "gpu", "2022", "cuda"]] +) @pytest.mark.parametrize("rhs", ["cudf", "cuda", "gpucudf", "abc"]) def test_string_udf_ge(data, rhs): # tests the `>=` operator in string udfs @@ -38,7 +44,9 @@ def func(st): run_udf_test(data, func, "bool") -@pytest.mark.parametrize("data", [["cudf", "rapids", "AI", "gpu", "2022", "cuda"]]) +@pytest.mark.parametrize( + "data", [["cudf", "rapids", "AI", "gpu", "2022", "cuda"]] +) @pytest.mark.parametrize("rhs", ["cudf", "cuda", "gpucudf", "abc"]) def test_string_udf_le(data, rhs): # tests the `<=` operator in string udfs @@ -49,7 +57,9 @@ def func(st): run_udf_test(data, func, "bool") -@pytest.mark.parametrize("data", [["cudf", "rapids", "AI", "gpu", "2022", "cuda"]]) +@pytest.mark.parametrize( + "data", [["cudf", "rapids", "AI", "gpu", "2022", "cuda"]] +) @pytest.mark.parametrize("rhs", ["cudf", "cuda", "gpucudf", "abc"]) def test_string_udf_gt(data, rhs): # tests the `>` operator in string udfs @@ -60,7 +70,9 @@ def func(st): run_udf_test(data, func, "bool") -@pytest.mark.parametrize("data", [["cudf", "rapids", "AI", "gpu", "2022", "cuda"]]) +@pytest.mark.parametrize( + "data", [["cudf", "rapids", "AI", "gpu", "2022", "cuda"]] +) @pytest.mark.parametrize("rhs", ["cudf", "cuda", "gpucudf", "abc"]) def test_string_udf_lt(data, rhs): # tests the `<` operator in string udfs diff --git a/python/strings_udf/strings_udf/tests/test_contains.py b/python/strings_udf/strings_udf/tests/test_contains.py index ca397209c88..562a4d2535f 100644 --- a/python/strings_udf/strings_udf/tests/test_contains.py +++ b/python/strings_udf/strings_udf/tests/test_contains.py @@ -1,10 +1,13 @@ # Copyright (c) 2022, NVIDIA CORPORATION. -from .utils import run_udf_test import pytest +from .utils import run_udf_test + -@pytest.mark.parametrize("data", [["cudf", "rapids", "AI", "gpu", "2022", "cuda"]]) +@pytest.mark.parametrize( + "data", [["cudf", "rapids", "AI", "gpu", "2022", "cuda"]] +) @pytest.mark.parametrize("substr", ["a", "cu", "2", "abc"]) def test_string_udf_contains(data, substr): # Tests contains for string UDFs diff --git a/python/strings_udf/strings_udf/tests/test_count.py b/python/strings_udf/strings_udf/tests/test_count.py index 0b9b284d9cd..faa70b43450 100644 --- a/python/strings_udf/strings_udf/tests/test_count.py +++ b/python/strings_udf/strings_udf/tests/test_count.py @@ -1,18 +1,18 @@ # Copyright (c) 2022, NVIDIA CORPORATION. -from .utils import run_udf_test import pytest +from .utils import run_udf_test + + @pytest.mark.parametrize( - "data", [ - ["cudf", "rapids", "AI", "gpu", "2022", "cuda"] - ] + "data", [["cudf", "rapids", "AI", "gpu", "2022", "cuda"]] ) -@pytest.mark.parametrize('substr', ['c', 'cu', "2", "abc", ""]) +@pytest.mark.parametrize("substr", ["c", "cu", "2", "abc", ""]) def test_string_udf_count(data, substr): # tests the `count` function in string udfs def func(st): return st.count(substr) - run_udf_test(data, func, 'int32') + run_udf_test(data, func, "int32") diff --git a/python/strings_udf/strings_udf/tests/test_endswith.py b/python/strings_udf/strings_udf/tests/test_endswith.py index 0601429b1bf..5a09077d5f7 100644 --- a/python/strings_udf/strings_udf/tests/test_endswith.py +++ b/python/strings_udf/strings_udf/tests/test_endswith.py @@ -1,18 +1,18 @@ # Copyright (c) 2022, NVIDIA CORPORATION. -from .utils import run_udf_test import pytest +from .utils import run_udf_test + + @pytest.mark.parametrize( - "data", [ - ["cudf", "rapids", "AI", "gpu", "2022", "cuda"] - ] + "data", [["cudf", "rapids", "AI", "gpu", "2022", "cuda"]] ) -@pytest.mark.parametrize('substr', ['c', 'cu', "2", "abc"]) +@pytest.mark.parametrize("substr", ["c", "cu", "2", "abc"]) def test_string_udf_endswith(data, substr): # tests the `endswith` function in string udfs def func(st): return st.endswith(substr) - run_udf_test(data, func, 'bool') + run_udf_test(data, func, "bool") diff --git a/python/strings_udf/strings_udf/tests/test_find.py b/python/strings_udf/strings_udf/tests/test_find.py index 6744def0993..59c03a74f28 100644 --- a/python/strings_udf/strings_udf/tests/test_find.py +++ b/python/strings_udf/strings_udf/tests/test_find.py @@ -1,18 +1,18 @@ # Copyright (c) 2022, NVIDIA CORPORATION. -from .utils import run_udf_test import pytest +from .utils import run_udf_test + + @pytest.mark.parametrize( - "data", [ - ["cudf", "rapids", "AI", "gpu", "2022", "cuda"] - ] + "data", [["cudf", "rapids", "AI", "gpu", "2022", "cuda"]] ) -@pytest.mark.parametrize('substr', ['c', 'cu', "2", "abc", "", "gpu"]) +@pytest.mark.parametrize("substr", ["c", "cu", "2", "abc", "", "gpu"]) def test_string_udf_find(data, substr): # tests the `find` function in string udfs def func(st): return st.find(substr) - run_udf_test(data, func, 'int32') + run_udf_test(data, func, "int32") diff --git a/python/strings_udf/strings_udf/tests/test_isalnum.py b/python/strings_udf/strings_udf/tests/test_isalnum.py index a722def6313..38059eeb1f4 100644 --- a/python/strings_udf/strings_udf/tests/test_isalnum.py +++ b/python/strings_udf/strings_udf/tests/test_isalnum.py @@ -1,8 +1,10 @@ # Copyright (c) 2022, NVIDIA CORPORATION. -from .utils import run_udf_test import pytest +from .utils import run_udf_test + + @pytest.mark.parametrize("data", [["1", "1@2", "123abc", "2.1", "", "0003"]]) def test_string_udf_isalnum(data): # tests the `rfind` function in string udfs @@ -10,4 +12,4 @@ def test_string_udf_isalnum(data): def func(st): return st.isalnum() - run_udf_test(data, func, 'bool') + run_udf_test(data, func, "bool") diff --git a/python/strings_udf/strings_udf/tests/test_isalpha.py b/python/strings_udf/strings_udf/tests/test_isalpha.py index 5faf918afff..222beb1cf4c 100644 --- a/python/strings_udf/strings_udf/tests/test_isalpha.py +++ b/python/strings_udf/strings_udf/tests/test_isalpha.py @@ -1,13 +1,17 @@ # Copyright (c) 2022, NVIDIA CORPORATION. -from .utils import run_udf_test import pytest -@pytest.mark.parametrize("data", [["abc", "1@2", "123abc", "2.1", "@Aa", "ABC"]]) +from .utils import run_udf_test + + +@pytest.mark.parametrize( + "data", [["abc", "1@2", "123abc", "2.1", "@Aa", "ABC"]] +) def test_string_udf_isalpha(data): # tests the `isalpha` function in string udfs def func(st): return st.isalpha() - run_udf_test(data, func, 'bool') + run_udf_test(data, func, "bool") diff --git a/python/strings_udf/strings_udf/tests/test_isdecimal.py b/python/strings_udf/strings_udf/tests/test_isdecimal.py index 53adb3601d7..a1f6ad55376 100644 --- a/python/strings_udf/strings_udf/tests/test_isdecimal.py +++ b/python/strings_udf/strings_udf/tests/test_isdecimal.py @@ -1,8 +1,10 @@ # Copyright (c) 2022, NVIDIA CORPORATION. -from .utils import run_udf_test import pytest +from .utils import run_udf_test + + @pytest.mark.parametrize("data", [["1", "12", "123abc", "2.1", "", "0003"]]) def test_string_udf_isdecimal(data): # tests the `isdecimal` function in string udfs @@ -10,4 +12,4 @@ def test_string_udf_isdecimal(data): def func(st): return st.isdecimal() - run_udf_test(data, func, 'bool') + run_udf_test(data, func, "bool") diff --git a/python/strings_udf/strings_udf/tests/test_isdigit.py b/python/strings_udf/strings_udf/tests/test_isdigit.py index 0a13ba365ff..5c32b9570de 100644 --- a/python/strings_udf/strings_udf/tests/test_isdigit.py +++ b/python/strings_udf/strings_udf/tests/test_isdigit.py @@ -1,8 +1,10 @@ # Copyright (c) 2022, NVIDIA CORPORATION. -from .utils import run_udf_test import pytest +from .utils import run_udf_test + + @pytest.mark.parametrize("data", [["1", "12", "123abc", "2.1", "", "0003"]]) def test_string_udf_isdigit(data): # tests the `isdigit` function in string udfs @@ -10,4 +12,4 @@ def test_string_udf_isdigit(data): def func(st): return st.isdigit() - run_udf_test(data, func, 'bool') + run_udf_test(data, func, "bool") diff --git a/python/strings_udf/strings_udf/tests/test_islower.py b/python/strings_udf/strings_udf/tests/test_islower.py index e37771a8adf..993cd73e51f 100644 --- a/python/strings_udf/strings_udf/tests/test_islower.py +++ b/python/strings_udf/strings_udf/tests/test_islower.py @@ -1,13 +1,17 @@ # Copyright (c) 2022, NVIDIA CORPORATION. -from .utils import run_udf_test import pytest -@pytest.mark.parametrize("data", [["1", "12", "123abc", "2.1", "", "0003", 'abc', 'b a', 'AbC']]) +from .utils import run_udf_test + + +@pytest.mark.parametrize( + "data", [["1", "12", "123abc", "2.1", "", "0003", "abc", "b a", "AbC"]] +) def test_string_udf_islower(data): # tests the `islower` function in string udfs def func(st): return st.islower() - run_udf_test(data, func, 'bool') + run_udf_test(data, func, "bool") diff --git a/python/strings_udf/strings_udf/tests/test_isnumeric.py b/python/strings_udf/strings_udf/tests/test_isnumeric.py index 8480740f1c3..b4df15180a9 100644 --- a/python/strings_udf/strings_udf/tests/test_isnumeric.py +++ b/python/strings_udf/strings_udf/tests/test_isnumeric.py @@ -1,8 +1,10 @@ # Copyright (c) 2022, NVIDIA CORPORATION. -from .utils import run_udf_test import pytest +from .utils import run_udf_test + + @pytest.mark.parametrize("data", [["1", "12", "123abc", "2.1", "", "0003"]]) def test_string_udf_isnumeric(data): # tests the `isnumeric` function in string udfs @@ -10,4 +12,4 @@ def test_string_udf_isnumeric(data): def func(st): return st.isnumeric() - run_udf_test(data, func, 'bool') + run_udf_test(data, func, "bool") diff --git a/python/strings_udf/strings_udf/tests/test_isspace.py b/python/strings_udf/strings_udf/tests/test_isspace.py index db5571c6f8a..3d9a52b081b 100644 --- a/python/strings_udf/strings_udf/tests/test_isspace.py +++ b/python/strings_udf/strings_udf/tests/test_isspace.py @@ -1,8 +1,10 @@ # Copyright (c) 2022, NVIDIA CORPORATION. -from .utils import run_udf_test import pytest +from .utils import run_udf_test + + @pytest.mark.parametrize("data", [["1", " x ", " ", "2.1", "", "0003"]]) def test_string_udf_isspace(data): # tests the `isspace` function in string udfs @@ -10,4 +12,4 @@ def test_string_udf_isspace(data): def func(st): return st.isspace() - run_udf_test(data, func, 'bool') + run_udf_test(data, func, "bool") diff --git a/python/strings_udf/strings_udf/tests/test_isupper.py b/python/strings_udf/strings_udf/tests/test_isupper.py index ad4940a6bf1..bdc2283521b 100644 --- a/python/strings_udf/strings_udf/tests/test_isupper.py +++ b/python/strings_udf/strings_udf/tests/test_isupper.py @@ -1,13 +1,17 @@ # Copyright (c) 2022, NVIDIA CORPORATION. -from .utils import run_udf_test import pytest -@pytest.mark.parametrize("data", [["1", "12", "123abc", "2.1", "", "0003", "ABC", "AbC", " 123ABC"]]) +from .utils import run_udf_test + + +@pytest.mark.parametrize( + "data", [["1", "12", "123abc", "2.1", "", "0003", "ABC", "AbC", " 123ABC"]] +) def test_string_udf_isupper(data): # tests the `isupper` function in string udfs def func(st): return st.isupper() - run_udf_test(data, func, 'bool') + run_udf_test(data, func, "bool") diff --git a/python/strings_udf/strings_udf/tests/test_rfind.py b/python/strings_udf/strings_udf/tests/test_rfind.py index 72e529f274c..8aeb59edd9e 100644 --- a/python/strings_udf/strings_udf/tests/test_rfind.py +++ b/python/strings_udf/strings_udf/tests/test_rfind.py @@ -1,18 +1,18 @@ # Copyright (c) 2022, NVIDIA CORPORATION. -from .utils import run_udf_test import pytest +from .utils import run_udf_test + + @pytest.mark.parametrize( - "data", [ - ["cudf", "rapids", "AI", "gpu", "2022", "cuda"] - ] + "data", [["cudf", "rapids", "AI", "gpu", "2022", "cuda"]] ) -@pytest.mark.parametrize('substr', ['c', 'cu', "2", "abc", "", "gpu"]) +@pytest.mark.parametrize("substr", ["c", "cu", "2", "abc", "", "gpu"]) def test_string_udf_rfind(data, substr): # tests the `rfind` function in string udfs def func(st): return st.rfind(substr) - run_udf_test(data, func, 'int32') + run_udf_test(data, func, "int32") diff --git a/python/strings_udf/strings_udf/tests/test_startswith.py b/python/strings_udf/strings_udf/tests/test_startswith.py index 1822bd78239..6193f81bab8 100644 --- a/python/strings_udf/strings_udf/tests/test_startswith.py +++ b/python/strings_udf/strings_udf/tests/test_startswith.py @@ -1,18 +1,18 @@ # Copyright (c) 2022, NVIDIA CORPORATION. -from .utils import run_udf_test import pytest +from .utils import run_udf_test + + @pytest.mark.parametrize( - "data", [ - ["cudf", "rapids", "AI", "gpu", "2022", "cuda"] - ] + "data", [["cudf", "rapids", "AI", "gpu", "2022", "cuda"]] ) -@pytest.mark.parametrize('substr', ['c', 'cu', "2", "abc"]) +@pytest.mark.parametrize("substr", ["c", "cu", "2", "abc"]) def test_string_udf_startswith(data, substr): # tests the `startswith` function in string udfs def func(st): return st.startswith(substr) - run_udf_test(data, func, 'bool') + run_udf_test(data, func, "bool") diff --git a/python/strings_udf/strings_udf/tests/utils.py b/python/strings_udf/strings_udf/tests/utils.py index cecfe10bb1c..657287d6d8b 100644 --- a/python/strings_udf/strings_udf/tests/utils.py +++ b/python/strings_udf/strings_udf/tests/utils.py @@ -1,13 +1,14 @@ # Copyright (c) 2022, NVIDIA CORPORATION. -import cudf import numba import numpy as np import pandas as pd -from cudf.testing._utils import assert_eq from numba import cuda from numba.core.typing import signature as nb_signature from numba.types import CPointer, void + +import cudf +from cudf.testing._utils import assert_eq from strings_udf import ptxpath from strings_udf._lib.cudf_jit_udf import to_string_view_array from strings_udf._typing import str_view_arg_handler, string_view