Skip to content

Commit

Permalink
Finish convert_datetime
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke committed Oct 2, 2024
1 parent 0411e4e commit 8b475da
Show file tree
Hide file tree
Showing 5 changed files with 130 additions and 61 deletions.
28 changes: 7 additions & 21 deletions python/cudf/cudf/_lib/string_casting.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@
from cudf._lib.column cimport Column

from cudf._lib.scalar import as_device_scalar

from cudf._lib.scalar cimport DeviceScalar

from cudf._lib.types import SUPPORTED_NUMPY_TO_LIBCUDF_TYPES

from libcpp.memory cimport unique_ptr
Expand All @@ -14,10 +11,6 @@ from libcpp.utility cimport move

from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.column.column_view cimport column_view
from pylibcudf.libcudf.scalar.scalar cimport string_scalar
from pylibcudf.libcudf.strings.convert.convert_datetime cimport (
is_timestamp as cpp_is_timestamp,
)
from pylibcudf.libcudf.strings.convert.convert_floats cimport (
from_floats as cpp_from_floats,
to_floats as cpp_to_floats,
Expand Down Expand Up @@ -460,11 +453,10 @@ def int2timestamp(
A Column with date-time represented in string format
"""
cdef string c_timestamp_format = format.encode("UTF-8")
return Column.from_pylibcudf(
plc.strings.convert.convert_datetime.from_timestamps(
input_col.to_pylibcudf(mode="read"),
c_timestamp_format,
format,
names.to_pylibcudf(mode="read")
)
)
Expand All @@ -485,12 +477,11 @@ def timestamp2int(Column input_col, dtype, format):
"""
dtype = dtype_to_pylibcudf_type(dtype)
cdef string c_timestamp_format = format.encode('UTF-8')
return Column.from_pylibcudf(
plc.strings.convert.convert_datetime.to_timestamps(
input_col.to_pylibcudf(mode="read"),
dtype,
c_timestamp_format
format
)
)

Expand All @@ -512,16 +503,11 @@ def istimestamp(Column input_col, str format):
"""
if input_col.size == 0:
return cudf.core.column.column_empty(0, dtype=cudf.dtype("bool"))
cdef column_view input_column_view = input_col.view()
cdef string c_timestamp_format = <string>str(format).encode('UTF-8')
cdef unique_ptr[column] c_result
with nogil:
c_result = move(
cpp_is_timestamp(
input_column_view,
c_timestamp_format))

return Column.from_unique_ptr(move(c_result))
plc_column = plc.strings.convert.convert_datetime.is_timestamp(
input_col.to_pylibcudf(mode="read"),
format
)
return Column.from_pylibcudf(plc_column)


def timedelta2int(Column input_col, dtype, format):
Expand Down
29 changes: 0 additions & 29 deletions python/cudf/cudf/tests/groupby/test_ordering_pandas_compat.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,16 @@ from pylibcudf.types cimport DataType
cpdef Column to_timestamps(
Column input,
DataType timestamp_type,
const string& format
str format
)

cpdef Column from_timestamps(
Column input,
const string& format,
Column timestamps,
str format,
Column input_strings_names
)

cpdef Column is_timestamp(
Column input,
const string& format,
str format,
)
80 changes: 73 additions & 7 deletions python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -15,42 +15,108 @@ from pylibcudf.types import DataType
cpdef Column to_timestamps(
Column input,
DataType timestamp_type,
const string& format
str format
):
"""
Returns a new timestamp column converting a strings column into
timestamps using the provided format pattern.
For details, see cpp:`cudf::strings::to_timestamps`.
Parameters
----------
input : Column
Strings instance for this operation.
timestamp_type : DataType
The timestamp type used for creating the output column.
format : str
String specifying the timestamp format in strings.
Returns
-------
Column
New datetime column
"""
cdef unique_ptr[column] c_result
cdef string c_format = format.encode()
with nogil:
c_result = cpp_convert_datetime.to_timestamps(
input.view(),
timestamp_type.c_obj,
format
c_format
)

return Column.from_libcudf(move(c_result))

cpdef Column from_timestamps(
Column timestamps,
const string& format,
str format,
Column input_strings_names
):
"""
Returns a new strings column converting a timestamp column into
strings using the provided format pattern.
For details, see cpp:`cudf::strings::from_timestamps`.
Parameters
----------
timestamps : Column
Timestamp values to convert
format : str
The string specifying output format.
input_strings_names : Column
The string names to use for weekdays ("%a", "%A") and months ("%b", "%B").
Returns
-------
Column
New strings column with formatted timestamps.
"""
cdef unique_ptr[column] c_result
cdef string c_format = format.encode()
with nogil:
c_result = cpp_convert_datetime.from_timestamps(
input.view(),
format,
timestamps.view(),
c_format,
input_strings_names.view()
)

return Column.from_libcudf(move(c_result))

cpdef Column is_timestamp(
Column input,
const string& format
str format
):
"""
Verifies the given strings column can be parsed to timestamps
using the provided format pattern.
For details, see cpp:`cudf::strings::is_timestamp`.
Parameters
----------
input : Column
Strings instance for this operation.
format : str
String specifying the timestamp format in strings.
Returns
-------
Column
New bool column.
"""
cdef unique_ptr[column] c_result
cdef string c_format = format.encode()
with nogil:
c_result = cpp_convert_datetime.is_timestamp(
input.view(),
format
c_format
)

return Column.from_libcudf(move(c_result))
46 changes: 46 additions & 0 deletions python/pylibcudf/pylibcudf/tests/test_string_convert_datetime.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
import datetime

import pyarrow as pa
import pyarrow.compute as pc
import pylibcudf as plc
import pytest
from utils import assert_column_eq


@pytest.fixture
def fmt():
return "%Y-%m-%dT%H:%M:%S"


def test_to_timestamp(fmt):
arr = pa.array(["2020-01-01T01:01:01", None])
result = plc.strings.convert.convert_datetime.to_timestamps(
plc.interop.from_arrow(arr),
plc.DataType(plc.TypeId.TIMESTAMP_SECONDS),
fmt,
)
expected = pc.strptime(arr, fmt, "s")
assert_column_eq(result, expected)


def test_from_timestamp(fmt):
arr = pa.array([datetime.datetime(2020, 1, 1, 1, 1, 1), None])
result = plc.strings.convert.convert_datetime.from_timestamps(
plc.interop.from_arrow(arr),
fmt,
plc.interop.from_arrow(pa.array([], type=pa.string())),
)
# pc.strftime will add the extra %f
expected = pa.array(["2020-01-01T01:01:01", None])
assert_column_eq(result, expected)


def test_is_timestamp(fmt):
arr = pa.array(["2020-01-01T01:01:01", None, "2020-01-01"])
result = plc.strings.convert.convert_datetime.is_timestamp(
plc.interop.from_arrow(arr),
fmt,
)
expected = pa.array([True, None, False])
assert_column_eq(result, expected)

0 comments on commit 8b475da

Please sign in to comment.