Skip to content

Commit

Permalink
fix: Fix caching from generating row numbers in partial ordering mode (
Browse files Browse the repository at this point in the history
  • Loading branch information
TrevorBergeron authored Aug 5, 2024
1 parent 5317327 commit 52b7786
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 3 deletions.
2 changes: 1 addition & 1 deletion bigframes/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -641,7 +641,7 @@ def head(self, n: int = 5) -> Series:
def tail(self, n: int = 5) -> Series:
return typing.cast(Series, self.iloc[-n:])

def peek(self, n: int = 5, *, force: bool = True) -> pandas.DataFrame:
def peek(self, n: int = 5, *, force: bool = True) -> pandas.Series:
"""
Preview n arbitrary elements from the series without guarantees about row selection or ordering.
Expand Down
4 changes: 3 additions & 1 deletion bigframes/session/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1997,8 +1997,10 @@ def _cache_with_session_awareness(self, array_value: core.ArrayValue) -> None:
)
if len(cluster_cols) > 0:
self._cache_with_cluster_cols(core.ArrayValue(target), cluster_cols)
else:
elif self._strictly_ordered:
self._cache_with_offsets(core.ArrayValue(target))
else:
self._cache_with_cluster_cols(core.ArrayValue(target), [])

def _simplify_with_caching(self, array_value: core.ArrayValue):
"""Attempts to handle the complexity by caching duplicated subtrees and breaking the query into pieces."""
Expand Down
15 changes: 14 additions & 1 deletion tests/system/small/test_unordered.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@

import bigframes.exceptions
import bigframes.pandas as bpd
from tests.system.utils import assert_pandas_df_equal, skip_legacy_pandas
from tests.system.utils import (
assert_pandas_df_equal,
assert_series_equal,
skip_legacy_pandas,
)


def test_unordered_mode_sql_no_hash(unordered_session):
Expand Down Expand Up @@ -51,6 +55,15 @@ def test_unordered_mode_cache_aggregate(unordered_session):
assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)


def test_unordered_mode_series_peek(unordered_session):
pd_series = pd.Series([1, 2, 3, 4, 5, 6], dtype=pd.Int64Dtype())
bf_series = bpd.Series(pd_series, session=unordered_session)
pd_result = pd_series.groupby(pd_series % 4).sum()
bf_peek = bf_series.groupby(bf_series % 4).sum().peek(2)

assert_series_equal(bf_peek, pd_result.reindex(bf_peek.index))


def test_unordered_mode_single_aggregate(unordered_session):
pd_df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, dtype=pd.Int64Dtype())
bf_df = bpd.DataFrame(pd_df, session=unordered_session)
Expand Down

0 comments on commit 52b7786

Please sign in to comment.