Skip to content

Commit

Permalink
Merge pull request #3 from VibhuJawa/branch-0.7
Browse files Browse the repository at this point in the history
update to latest base
  • Loading branch information
VibhuJawa committed May 8, 2019
2 parents b563256 + cda516e commit 3e92606
Show file tree
Hide file tree
Showing 7 changed files with 38 additions and 4 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
- PR #1599 Level keyword supported in groupby
- PR #929 Add support operations to dataframe
- PR #1609 Groupby accept list of Series
- PR #1658 Support `group_keys=True` keyword in groupby method

## Improvements

Expand Down Expand Up @@ -85,6 +86,7 @@
- PR #1617 `has_nulls` and `column_dtypes` for `cudf::table`
- PR #1590 Remove CFFI from the build / install process entirely
- PR #1536 Convert gpuarrow CFFI to Cython
- PR #1655 Add `Column._pointer` as a way to access underlying `gdf_column*` of a `Column`

## Bug Fixes

Expand Down Expand Up @@ -134,7 +136,7 @@
- PR #1607 Revert change of `column.to_dense_buffer` always return by copy for performance concerns
- PR #1618 ORC reader: fix assert & data output when nrows/skiprows isn't aligned to stripe boundaries
- PR #1631 Fix failure of TYPES_TEST on some gcc-7 based systems.
- PR #1641 CSV Reader: Fix skip_blank_lines behavior with Windows line terminators (\r\n)
- PR #1641 CSV Reader: Fix skip_blank_lines behavior with Windows line terminators (\r\n)
- PR #1648 ORC reader: fix non-deterministic output when skiprows is non-zero


Expand Down
2 changes: 2 additions & 0 deletions python/cudf/bindings/cudf_cpp.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ cdef gdf_context* create_context_view(flag_sorted, method, flag_distinct,
flag_sort_result, flag_sort_inplace,
null_sort_behavior)

cpdef uintptr_t column_view_pointer(col)

cpdef check_gdf_error(errcode)

# Import cudf.h header to import all functions
Expand Down
7 changes: 7 additions & 0 deletions python/cudf/bindings/cudf_cpp.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,13 @@ cdef gdf_column* column_view_from_NDArrays(size, data, mask, dtype,
return c_col


cpdef uintptr_t column_view_pointer(col):
"""
Return pointer to a view of the underlying <gdf_column*>
"""
return <uintptr_t> column_view_from_column(col)


cdef gdf_column_to_column_mem(gdf_column* input_col):
gdf_dtype = input_col.dtype
data_ptr = int(<uintptr_t>input_col.data)
Expand Down
9 changes: 8 additions & 1 deletion python/cudf/dataframe/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from cudf.utils import cudautils, utils, ioutils
from cudf.dataframe.buffer import Buffer
from cudf.bindings.cudf_cpp import count_nonzero_mask
from cudf.bindings.cudf_cpp import count_nonzero_mask, column_view_pointer
from cudf.bindings.concat import _column_concat


Expand Down Expand Up @@ -588,3 +588,10 @@ def to_dlpack(self):
"""{docstring}"""
import cudf.io.dlpack as dlpack
return dlpack.to_dlpack(self)

@property
def _pointer(self):
"""
Return pointer to a view of the underlying data structure
"""
return column_view_pointer(self)
6 changes: 5 additions & 1 deletion python/cudf/dataframe/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1771,7 +1771,7 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='',
return df

def groupby(self, by=None, sort=False, as_index=True, method="hash",
level=None):
level=None, group_keys=True):
"""Groupby
Parameters
Expand Down Expand Up @@ -1806,6 +1806,10 @@ def groupby(self, by=None, sort=False, as_index=True, method="hash",
- Since we don't support multiindex, the *by* columns are stored
as regular columns.
"""
if group_keys is not True:
raise NotImplementedError(
"The group_keys keyword is not yet implemented"
)

if by is None and level is None:
raise TypeError('groupby() requires either by or level to be'
Expand Down
8 changes: 7 additions & 1 deletion python/cudf/dataframe/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1568,7 +1568,13 @@ def diff(self, periods=1):
periods)
return Series(output_dary, name=self.name, index=self.index)

def groupby(self, group_series=None, level=None, sort=False):
def groupby(self, group_series=None, level=None, sort=False,
group_keys=True):
if group_keys is not True:
raise NotImplementedError(
"The group_keys keyword is not yet implemented"
)

from cudf.groupby.groupby import SeriesGroupBy
return SeriesGroupBy(self, group_series, level, sort)

Expand Down
6 changes: 6 additions & 0 deletions python/cudf/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@ def test_groupby_default(pdf, gdf):
assert_eq(pdf, gdf)


def test_group_keys_true(pdf, gdf):
gdf = gdf.groupby('y', group_keys=True).sum()
pdf = pdf.groupby('y', group_keys=True).sum()
assert_eq(pdf, gdf)


def test_groupby_getitem_styles():
pdf = pd.DataFrame({'x': [1, 3, 1], 'y': [1, 2, 3]})
gdf = cudf.from_pandas(pdf)
Expand Down

0 comments on commit 3e92606

Please sign in to comment.