Skip to content

Commit

Permalink
Add transpose API to pylibcudf (#16749)
Browse files Browse the repository at this point in the history
Contributes to #15162

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Murray (https://github.com/Matt711)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #16749
  • Loading branch information
mroeschke authored Sep 25, 2024
1 parent c1f377a commit 503ce03
Show file tree
Hide file tree
Showing 9 changed files with 95 additions and 22 deletions.
1 change: 1 addition & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ This page provides API documentation for pylibcudf.
table
traits
transform
transpose
types
unary

Expand Down
6 changes: 6 additions & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/transpose.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
=========
transpose
=========

.. automodule:: pylibcudf.transpose
:members:
30 changes: 8 additions & 22 deletions python/cudf/cudf/_lib/transpose.pyx
Original file line number Diff line number Diff line change
@@ -1,32 +1,18 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

from libcpp.memory cimport unique_ptr
from libcpp.pair cimport pair
from libcpp.utility cimport move

from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.table.table_view cimport table_view
from pylibcudf.libcudf.transpose cimport transpose as cpp_transpose
import pylibcudf as plc

from cudf._lib.column cimport Column
from cudf._lib.utils cimport columns_from_table_view, table_view_from_columns


def transpose(list source_columns):
"""Transpose m n-row columns into n m-row columns
"""
cdef pair[unique_ptr[column], table_view] c_result
cdef table_view c_input = table_view_from_columns(source_columns)

with nogil:
c_result = move(cpp_transpose(c_input))

# Notice, the data pointer of `result_owner` has been exposed
# through `c_result.second` at this point.
result_owner = Column.from_unique_ptr(
move(c_result.first), data_ptr_exposed=True
)
return columns_from_table_view(
c_result.second,
owners=[result_owner] * c_result.second.num_columns()
input_table = plc.table.Table(
[col.to_pylibcudf(mode="read") for col in source_columns]
)
result_table = plc.transpose.transpose(input_table)
return [
Column.from_pylibcudf(col, data_ptr_exposed=True)
for col in result_table.columns()
]
1 change: 1 addition & 0 deletions python/pylibcudf/pylibcudf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ set(cython_sources
table.pyx
traits.pyx
transform.pyx
transpose.pyx
types.pyx
unary.pyx
utils.pyx
Expand Down
2 changes: 2 additions & 0 deletions python/pylibcudf/pylibcudf/__init__.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ from . cimport (
strings,
traits,
transform,
transpose,
types,
unary,
)
Expand Down Expand Up @@ -72,6 +73,7 @@ __all__ = [
"sorting",
"traits",
"transform",
"transpose",
"types",
"unary",
]
2 changes: 2 additions & 0 deletions python/pylibcudf/pylibcudf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
strings,
traits,
transform,
transpose,
types,
unary,
)
Expand Down Expand Up @@ -86,6 +87,7 @@
"sorting",
"traits",
"transform",
"transpose",
"types",
"unary",
]
32 changes: 32 additions & 0 deletions python/pylibcudf/pylibcudf/tests/test_transpose.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

import pyarrow as pa
import pylibcudf as plc
import pytest
from packaging.version import parse


@pytest.mark.skipif(
parse(pa.__version__) < parse("16.0.0"),
reason="https://github.com/apache/arrow/pull/40070",
)
@pytest.mark.parametrize(
"arr",
[
[],
[1, 2, 3],
[1, 2],
[1],
],
)
def test_transpose(arr):
data = {"a": arr, "b": arr}
arrow_tbl = pa.table(data)
plc_tbl = plc.interop.from_arrow(arrow_tbl)
plc_result = plc.transpose.transpose(plc_tbl)
result = plc.interop.to_arrow(plc_result)
expected = pa.Table.from_pandas(
arrow_tbl.to_pandas().T, preserve_index=False
).rename_columns([""] * len(arr))
expected = pa.table(expected, schema=result.schema)
assert result.equals(expected)
5 changes: 5 additions & 0 deletions python/pylibcudf/pylibcudf/transpose.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
from .table cimport Table


cpdef Table transpose(Table input_table)
38 changes: 38 additions & 0 deletions python/pylibcudf/pylibcudf/transpose.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
from libcpp.memory cimport unique_ptr
from libcpp.pair cimport pair
from libcpp.utility cimport move
from pylibcudf.libcudf cimport transpose as cpp_transpose
from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.table.table_view cimport table_view

from .column cimport Column
from .table cimport Table


cpdef Table transpose(Table input_table):
"""Transpose a Table.
For details, see :cpp:func:`transpose`.
Parameters
----------
input_table : Table
Table to transpose
Returns
-------
Table
Transposed table.
"""
cdef pair[unique_ptr[column], table_view] c_result
cdef Table owner_table

with nogil:
c_result = move(cpp_transpose.transpose(input_table.view()))

owner_table = Table(
[Column.from_libcudf(move(c_result.first))] * c_result.second.num_columns()
)

return Table.from_table_view(c_result.second, owner_table)

0 comments on commit 503ce03

Please sign in to comment.