Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Delegate basic arithmetic and logical binops to libgdf. #14

Merged
merged 5 commits into from
Jun 2, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 10 additions & 20 deletions conda_environments/testing_py35.yml
Original file line number Diff line number Diff line change
@@ -1,25 +1,15 @@
name: pycudf_testing_py35
channels:
- numba
- gpuopenanalytics/label/dev
- defaults
dependencies:
- accelerate_cudalib=2.0=0
- cudatoolkit=8.0=0
- mkl=2017.0.1=0
- numpy=1.12.1=py35_0
- openssl=1.0.2k=1
- pip=9.0.1=py35_1
- py=1.4.33=py35_0
- pytest=3.0.7=py35_0
- python=3.5.3=1
- readline=6.2=2
- setuptools=27.2.0=py35_0
- sqlite=3.13.0=0
- tk=8.5.18=0
- wheel=0.29.0=py35_0
- xz=5.2.2=1
- zlib=1.2.8=3
- llvmlite=0.18
- numba=0.33
- pip:
- flatbuffers==2015.12.22.1
- pytest=3.0.7
- python=3.5.3
- setuptools=27.2.0
- accelerate_cudalib=2.0
- cudatoolkit=8.0
- llvmlite>=0.18
- numpy=1.12.1
- numba>=0.33
- libgdf_cffi>=0.1.0a1.dev
51 changes: 51 additions & 0 deletions pygdf/_gdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""
This file provide binding to the libgdf library.
"""
import numpy as np

from libgdf_cffi import ffi, libgdf


def columnview(size, data, mask=None, dtype=None):
"""
Make a column view.
"""
def unwrap(buffer):
if buffer is None:
return ffi.NULL
devary = buffer.to_gpu_array()
return ffi.cast('void*', devary.device_ctypes_pointer.value)

dtype = dtype or data.dtype
colview = ffi.new('gdf_column*')
libgdf.gdf_column_view(colview, unwrap(data), unwrap(mask), size,
np_to_gdf_dtype(dtype))

return colview


def apply_binaryop(binop, lhs, rhs, out):
"""Apply binary operator *binop* to operands *lhs* and *rhs*.
The result is stored to *out*.
"""
binop(lhs._cffi_view, rhs._cffi_view, out._cffi_view)


def apply_unaryop(unaop, inp, out):
"""Apply unary operator *unaop* to *inp* and store to *out*.
"""
unaop(inp._cffi_view, out._cffi_view)


def np_to_gdf_dtype(dtype):
"""Util to convert numpy dtype to gdf dtype.
"""
return {
np.float64: libgdf.GDF_FLOAT64,
np.float32: libgdf.GDF_FLOAT32,
np.int64: libgdf.GDF_INT64,
np.int32: libgdf.GDF_INT32,
np.int8: libgdf.GDF_INT8,
np.bool_: libgdf.GDF_INT8,
}[np.dtype(dtype).type]

120 changes: 119 additions & 1 deletion pygdf/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@

from numba import cuda

from . import cudautils, utils
from libgdf_cffi import libgdf
from . import cudautils, utils, _gdf



class DataFrame(object):
Expand Down Expand Up @@ -499,6 +501,10 @@ def __init__(self, size, dtype, buffer=None, mask=None, null_count=None):
raise ValueError('null_count must be provided')
null_count = 0
self._null_count = null_count
# make cffi view for libgdf
libgdf.gdf_column_view
self._cffi_view = _gdf.columnview(size=self._size, data=self._data,
mask=self._mask)

def __len__(self):
"""Returns the size of the ``Series`` including null values.
Expand Down Expand Up @@ -532,6 +538,100 @@ def __getitem__(self, arg):
else:
raise NotImplementedError(type(arg))

def __bool__(self):
"""Always raise TypeError when converting a Series
into a boolean.
"""
raise TypeError("can't compute boolean for {!r}".format(type(self)))

def _call_binop(self, other, fn, out_dtype):
"""
Internal util to call a binary operator *fn* on operands *self*
and *other* with output dtype *out_dtype*. Returns the output
Series.
"""
# Allocate output series
out = Series.from_array(cuda.device_array(shape=len(self),
dtype=out_dtype))
_gdf.apply_binaryop(fn, self, other, out)
return out

def _binaryop(self, other, fn):
"""
Internal util to call a binary operator *fn* on operands *self*
and *other*. Return the output Series. The output dtype is
determined by the input operands.
"""
if isinstance(other, Series):
return self._call_binop(other, fn, self.dtype)
else:
return NotImplemented

def _call_unaop(self, fn, out_dtype):
"""
Internal util to call a unary operator *fn* on operands *self* with
output dtype *out_dtype*. Returns the output Series.
"""
# Allocate output series
out = Series.from_array(cuda.device_array(shape=len(self),
dtype=out_dtype))
_gdf.apply_unaryop(fn, self, out)
return out

def _unaryop(self, fn):
"""
Internal util to call a unary operator *fn* on operands *self*.
Return the output Series. The output dtype is determined by the input
operand.
"""
return self._call_unaop(fn, self.dtype)

def __add__(self, other):
return self._binaryop(other, fn=libgdf.gdf_add_generic)

def __sub__(self, other):
return self._binaryop(other, fn=libgdf.gdf_sub_generic)

def __mul__(self, other):
return self._binaryop(other, fn=libgdf.gdf_mul_generic)

def __floordiv__(self, other):
return self._binaryop(other, fn=libgdf.gdf_floordiv_generic)

def __truediv__(self, other):
return self._binaryop(other, fn=libgdf.gdf_div_generic)

__div__ = __truediv__

def _compare(self, other, fn):
"""
Internal util to call a comparison operator *fn*
comparing *self* and *other*. Return the output Series.
The output dtype is always `np.bool_`.
"""
if isinstance(other, Series):
return self._call_binop(other, fn, np.bool_)
else:
return NotImplemented

def __eq__(self, other):
return self._compare(other, fn=libgdf.gdf_eq_generic)

def __ne__(self, other):
return self._compare(other, fn=libgdf.gdf_ne_generic)

def __lt__(self, other):
return self._compare(other, fn=libgdf.gdf_lt_generic)

def __le__(self, other):
return self._compare(other, fn=libgdf.gdf_le_generic)

def __gt__(self, other):
return self._compare(other, fn=libgdf.gdf_gt_generic)

def __ge__(self, other):
return self._compare(other, fn=libgdf.gdf_ge_generic)

@property
def dtype(self):
"""dtype of the Series"""
Expand Down Expand Up @@ -720,6 +820,24 @@ def scale(self):
scaled = cudautils.compute_scale(gpuarr, vmin, vmax)
return Series.from_array(scaled)

# Rounding

def ceil(self):
"""Rounds each value upward to the smallest integral value not less
than the original.

Returns a new Series.
"""
return self._unaryop(libgdf.gdf_ceil_generic)

def floor(self):
"""Rounds each value downward to the largest integral value not greater
than the original.

Returns a new Series.
"""
return self._unaryop(libgdf.gdf_floor_generic)


class BufferSentryError(ValueError):
pass
Expand Down
Loading