Skip to content

Commit

Permalink
Apply suggestions from code review
Browse files Browse the repository at this point in the history
  • Loading branch information
AlenkaF committed Aug 16, 2021
1 parent 441aada commit 954c4bb
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 9 deletions.
17 changes: 12 additions & 5 deletions packages/vaex-core/vaex/dataframe_protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,9 +202,9 @@ def __init__(self, column : vaex.expression.Expression, metadata : dict = {}) ->
self._col = column

# Store the info about category
self.is_cat = metadata["vaex.cetagories_bool"][self._col.expression] #is column categorical
self.is_cat = metadata["vaex.cetagories_bool"][self._col.expression] # is column categorical
if metadata["vaex.cetagories"]:
self.labels = metadata["vaex.cetagories"][self._col.expression] #list of categories/labels
self.labels = metadata["vaex.cetagories"][self._col.expression] # list of categories/labels
else:
self.labels = metadata["vaex.cetagories"]

Expand Down Expand Up @@ -266,7 +266,10 @@ def dtype(self) -> Tuple[enum.IntEnum, int, str, str]:
bool_c = True # internal, categorical must stay categorical

dtype = self._col.dtype


if dtype.is_encoded:
dtype = dtype.index_type

return self._dtype_from_vaexdtype(dtype, bool_c)

def _dtype_from_vaexdtype(self, dtype, bool_c) -> Tuple[enum.IntEnum, int, str, str]:
Expand Down Expand Up @@ -405,7 +408,11 @@ def get_data_buffer(self) -> Tuple[_VaexBuffer, Any]: # Any is for self.dtype t
codes = self._col.values # values are already codes for the labels
buffer = _VaexBuffer(codes)
bool_c = False # If it is external (call from_dataframe) _dtype_from_vaexdtype must give data dtype
dtype = self._dtype_from_vaexdtype(self._col.dtype, bool_c)
data_dtype = self._col.dtype
# If column is arrow dictionary we have to get the type of the indices
if data_dtype.is_encoded:
data_dtype = data_dtype.index_type
dtype = self._dtype_from_vaexdtype(data_dtype, bool_c)
else:
raise NotImplementedError(f"Data type {self._col.dtype} not handled yet")

Expand Down Expand Up @@ -457,7 +464,7 @@ def metadata(self) -> Dict[str, Any]:
return {"vaex.cetagories_bool": is_category, "vaex.cetagories": labels}

def num_columns(self) -> int:
return len(self._df.columns)
return len(self._df.get_column_names())

def num_rows(self) -> int:
return len(self._df)
Expand Down
10 changes: 6 additions & 4 deletions tests/dataframe_protocol_test.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
from common import *
import vaex
import numpy as np
import pyarrow as pa
from vaex.dataframe_protocol import _from_dataframe_to_vaex, _DtypeKind

def test_float_only():
df = vaex.from_arrays(x=np.array([1.5, 2.5, 3.5]), y=np.array([9.2, 10.5, 11.8]))

def test_float_only(df_factory):
df = df_factory(x=[1.5, 2.5, 3.5], y=[9.2, 10.5, 11.8])
df2 = _from_dataframe_to_vaex(df.__dataframe__())
assert df2.x.tolist() == df.x.tolist()
assert df2.y.tolist() == df.y.tolist()

def test_mixed_intfloat():
df = vaex.from_arrays(x=np.array([1, 2, 0]), y=np.array([9.2, 10.5, 11.8]))
def test_mixed_intfloat(df_factory):
df = df_factory(x=[1, 2, 0], y=[9.2, 10.5, 11.8])
df2 = _from_dataframe_to_vaex(df.__dataframe__())
assert df2.x.tolist() == df.x.tolist()
assert df2.y.tolist() == df.y.tolist()
Expand Down

0 comments on commit 954c4bb

Please sign in to comment.