Skip to content

Commit

Permalink
build: Use Pandas 2.0 forward compatible API (#582)
Browse files Browse the repository at this point in the history
  • Loading branch information
jjerphan authored Jul 12, 2023
1 parent 754a827 commit 68ded9f
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,14 @@ def __init__(self, wrapped: pd.DataFrame, *, with_timezone_attr: bool, timezone_

def __getitem__(self, item):
if isinstance(item, slice):
open_ended = slice(item.start + timedelta(microseconds=1), item.stop - timedelta(microseconds=1), item.step)
# Comparing datetimes with timezone to datetimes without timezone has been deprecated in Pandas 1.2.0
# (see https://github.com/pandas-dev/pandas/pull/36148/) and is not support anymore in Pandas 2.0
# (see https://github.com/pandas-dev/pandas/pull/49492/).
# We explicitly remove the timezone from the start and stop of the slice to be able to use the
# index of the wrapped DataFrame.
start_wo_tz = item.start.replace(tzinfo=None) + timedelta(microseconds=1)
stop_wo_tz = item.stop.replace(tzinfo=None) - timedelta(microseconds=1)
open_ended = slice(start_wo_tz, stop_wo_tz, item.step)
return CustomTimeseries(
self.wrapped[open_ended],
with_timezone_attr=self.with_timezone_attr,
Expand Down
40 changes: 30 additions & 10 deletions python/tests/unit/arcticdb/test_column_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@ def generate_symbol(lib, sym):
lib.write(sym, df0)
lib.append(sym, df1)
expected_column_stats = lib.read_index(sym)
expected_column_stats.drop(expected_column_stats.columns.difference(["start_index", "end_index"]), 1, inplace=True)
expected_column_stats.drop(
expected_column_stats.columns.difference(["start_index", "end_index"]),
axis=1,
inplace=True,
)
expected_column_stats = expected_column_stats.iloc[[0, 1]]
expected_column_stats["v1.0_MIN(col_1)"] = [df0["col_1"].min(), df1["col_1"].min()]
expected_column_stats["v1.0_MAX(col_1)"] = [df0["col_1"].max(), df1["col_1"].max()]
Expand All @@ -41,7 +45,7 @@ def test_column_stats_basic_flow(lmdb_version_store_tiny_segment):
expected_column_stats = generate_symbol(lib, sym)
expected_column_stats.drop(
expected_column_stats.columns.difference(["start_index", "end_index", "v1.0_MIN(col_1)", "v1.0_MAX(col_1)"]),
1,
axis=1,
inplace=True,
)

Expand Down Expand Up @@ -74,7 +78,11 @@ def test_column_stats_infinity(lmdb_version_store_tiny_segment):
lib.append(sym, df1)
lib.append(sym, df2)
expected_column_stats = lib.read_index(sym)
expected_column_stats.drop(expected_column_stats.columns.difference(["start_index", "end_index"]), 1, inplace=True)
expected_column_stats.drop(
expected_column_stats.columns.difference(["start_index", "end_index"]),
axis=1,
inplace=True,
)
expected_column_stats = expected_column_stats.iloc[[0, 1, 2]]
expected_column_stats["v1.0_MIN(col_1)"] = [df0["col_1"].min(), df1["col_1"].min(), df2["col_1"].min()]
expected_column_stats["v1.0_MAX(col_1)"] = [df0["col_1"].max(), df1["col_1"].max(), df2["col_1"].max()]
Expand All @@ -94,7 +102,7 @@ def test_column_stats_as_of(lmdb_version_store_tiny_segment):
expected_column_stats = expected_column_stats.iloc[[0]]
expected_column_stats.drop(
expected_column_stats.columns.difference(["start_index", "end_index", "v1.0_MIN(col_1)", "v1.0_MAX(col_1)"]),
1,
axis=1,
inplace=True,
)

Expand Down Expand Up @@ -150,7 +158,7 @@ def test_column_stats_multiple_indexes_different_columns(lmdb_version_store_tiny

expected_column_stats.drop(
expected_column_stats.columns.difference(["start_index", "end_index", "v1.0_MIN(col_1)", "v1.0_MAX(col_1)"]),
1,
axis=1,
inplace=True,
)
column_stats = lib.read_column_stats(sym)
Expand Down Expand Up @@ -251,7 +259,7 @@ def test_column_stats_multiple_creates(lmdb_version_store_tiny_segment):
expected_column_stats = base_expected_column_stats.copy()
expected_column_stats.drop(
expected_column_stats.columns.difference(["start_index", "end_index", "v1.0_MIN(col_1)", "v1.0_MAX(col_1)"]),
1,
axis=1,
inplace=True,
)
column_stats = lib.read_column_stats(sym)
Expand Down Expand Up @@ -287,10 +295,14 @@ def test_column_stats_duplicated_primary_index(lmdb_version_store_tiny_segment):
lib = lmdb_version_store_tiny_segment
sym = "test_column_stats_duplicated_primary_index"

total_df = df0.append(df1)
total_df = pd.concat((df0, df1))
lib.write(sym, total_df)
expected_column_stats = lib.read_index(sym)
expected_column_stats.drop(expected_column_stats.columns.difference(["start_index", "end_index"]), 1, inplace=True)
expected_column_stats.drop(
expected_column_stats.columns.difference(["start_index", "end_index"]),
axis=1,
inplace=True,
)
expected_column_stats = expected_column_stats.iloc[[0, 1]]
expected_column_stats["v1.0_MIN(col_1)"] = [df0["col_1"].min(), df1["col_1"].min()]
expected_column_stats["v1.0_MAX(col_1)"] = [df0["col_1"].max(), df1["col_1"].max()]
Expand Down Expand Up @@ -324,7 +336,11 @@ def test_column_stats_dynamic_schema_missing_data(lmdb_version_store_tiny_segmen
df = lib.read(sym).data

expected_column_stats = lib.read_index(sym)
expected_column_stats.drop(expected_column_stats.columns.difference(["start_index", "end_index"]), 1, inplace=True)
expected_column_stats.drop(
expected_column_stats.columns.difference(["start_index", "end_index"]),
axis=1,
inplace=True,
)
expected_column_stats = expected_column_stats.iloc[[0, 1, 2, 3, 4]]
expected_column_stats["v1.0_MIN(col_1)"] = [
df0["col_1"].min(),
Expand Down Expand Up @@ -395,7 +411,11 @@ def test_column_stats_dynamic_schema_types_changing(lmdb_version_store_tiny_segm
lib.append(sym, df1)

expected_column_stats = lib.read_index(sym)
expected_column_stats.drop(expected_column_stats.columns.difference(["start_index", "end_index"]), 1, inplace=True)
expected_column_stats.drop(
expected_column_stats.columns.difference(["start_index", "end_index"]),
axis=1,
inplace=True,
)
expected_column_stats = expected_column_stats.iloc[[0, 1]]
expected_column_stats["v1.0_MIN(int_widening)"] = [df0["int_widening"].min(), df1["int_widening"].min()]
expected_column_stats["v1.0_MAX(int_widening)"] = [df0["int_widening"].max(), df1["int_widening"].max()]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ def test_aggregation_grouping_column_missing_from_row_group(lmdb_version_store_d
{"to_sum": [3, 4]},
index=np.arange(2, 4),
)
expected = df0.append(df1).groupby("grouping_column").agg({"to_sum": "sum"})
expected = pd.concat((df0, df1)).groupby("grouping_column").agg({"to_sum": "sum"})

symbol = "test_aggregation_grouping_column_missing_from_row_group"
lib.write(symbol, df0)
Expand Down
6 changes: 3 additions & 3 deletions python/tests/unit/arcticdb/version_store/test_empty_writes.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,15 @@ def test_write_no_rows(lmdb_version_store, sym):
assert_frame_equal(lmdb_version_store.read(sym).data, df)

df2 = pd.DataFrame([[1.3, 6, "test"]], columns=column_names, index=[pd.Timestamp(0)])
df2 = df.append(df2)
df2 = pd.concat((df, df2))
# coercing not needed
lmdb_version_store.append(sym, df2, dynamic_strings=True)
assert_frame_equal(lmdb_version_store.read(sym).data, df2)

df3 = pd.DataFrame(
[[3.3, 8, None], [2.3, 10, "test2"]], columns=column_names, index=[pd.Timestamp(1), pd.Timestamp(2)]
)
df2 = df2.append(df3)
df2 = pd.concat((df2, df3))
# coercing not needed
lmdb_version_store.append(sym, df3, dynamic_strings=True)
assert_frame_equal(lmdb_version_store.read(sym).data, df2)
Expand Down Expand Up @@ -100,7 +100,7 @@ def test_write_no_rows_and_columns(lmdb_version_store_dynamic_schema, sym):
columns=column_names + ["d"],
index=[pd.Timestamp(3), pd.Timestamp(4)],
)
df5 = df2.append(df4)
df5 = pd.concat((df2, df4))
lmdb_version_store_dynamic_schema.append(sym, df4, dynamic_strings=True)
assert_frame_equal(lmdb_version_store_dynamic_schema.read(sym).data, df5)

Expand Down
4 changes: 2 additions & 2 deletions python/tests/unit/arcticdb/version_store/test_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def test_sort_merge_write(lmdb_version_store):
new_df = pd.DataFrame(data=vals, index=index)

dataframes.append(new_df)
df = df.append(new_df)
df = pd.concat((df, new_df))
dt = dt + datetime.timedelta(days=1)

random.shuffle(dataframes)
Expand Down Expand Up @@ -139,7 +139,7 @@ def test_sort_merge_append(lmdb_version_store_dynamic_schema):
vals = {c: random_floats(num_rows_per_day) for c in cols}
new_df = pd.DataFrame(data=vals, index=index)
dataframes.append(new_df)
df = df.append(new_df)
df = pd.concat((df, new_df))
dt = dt + datetime.timedelta(days=1)

half_way = len(dataframes) / 2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,18 @@ def test_project_column_types_changing_and_missing(lmdb_version_store_dynamic_sc
# uint8
df = pd.DataFrame({"col_to_project": np.arange(2, dtype=np.uint8), "data_col": [2, 3]}, index=np.arange(2, 4))
lib.append(symbol, df)
expected = expected.append(df)
expected = pd.concat((expected, df))
# Missing
df = pd.DataFrame({"data_col": [4, 5]}, index=np.arange(4, 6))
lib.append(symbol, df)
expected = expected.append(df)
expected = pd.concat((expected, df))
# int16
df = pd.DataFrame(
{"col_to_project": np.arange(200, 202, dtype=np.int16), "data_col": [6, 7]}, index=np.arange(6, 8)
)
lib.append(symbol, df)

expected = expected.append(df)
expected = pd.concat((expected, df))
expected["projected_col"] = expected["col_to_project"] * 2
q = QueryBuilder()
q = q.apply("projected_col", q["col_to_project"] * 2)
Expand Down

0 comments on commit 68ded9f

Please sign in to comment.