diff --git a/python/tests/integration/arcticdb/version_store/test_update_with_date_range.py b/python/tests/integration/arcticdb/version_store/test_update_with_date_range.py index 73e25aaf63..e220fc6177 100644 --- a/python/tests/integration/arcticdb/version_store/test_update_with_date_range.py +++ b/python/tests/integration/arcticdb/version_store/test_update_with_date_range.py @@ -53,7 +53,14 @@ def __init__(self, wrapped: pd.DataFrame, *, with_timezone_attr: bool, timezone_ def __getitem__(self, item): if isinstance(item, slice): - open_ended = slice(item.start + timedelta(microseconds=1), item.stop - timedelta(microseconds=1), item.step) + # Comparing datetimes with timezone to datetimes without timezone has been deprecated in Pandas 1.2.0 + # (see https://github.com/pandas-dev/pandas/pull/36148/) and is not support anymore in Pandas 2.0 + # (see https://github.com/pandas-dev/pandas/pull/49492/). + # We explicitly remove the timezone from the start and stop of the slice to be able to use the + # index of the wrapped DataFrame. + start_wo_tz = item.start.replace(tzinfo=None) + timedelta(microseconds=1) + stop_wo_tz = item.stop.replace(tzinfo=None) - timedelta(microseconds=1) + open_ended = slice(start_wo_tz, stop_wo_tz, item.step) return CustomTimeseries( self.wrapped[open_ended], with_timezone_attr=self.with_timezone_attr, diff --git a/python/tests/unit/arcticdb/test_column_stats.py b/python/tests/unit/arcticdb/test_column_stats.py index 50c47122ca..8db891c2b7 100644 --- a/python/tests/unit/arcticdb/test_column_stats.py +++ b/python/tests/unit/arcticdb/test_column_stats.py @@ -19,7 +19,11 @@ def generate_symbol(lib, sym): lib.write(sym, df0) lib.append(sym, df1) expected_column_stats = lib.read_index(sym) - expected_column_stats.drop(expected_column_stats.columns.difference(["start_index", "end_index"]), 1, inplace=True) + expected_column_stats.drop( + expected_column_stats.columns.difference(["start_index", "end_index"]), + axis=1, + inplace=True, + ) expected_column_stats = expected_column_stats.iloc[[0, 1]] expected_column_stats["v1.0_MIN(col_1)"] = [df0["col_1"].min(), df1["col_1"].min()] expected_column_stats["v1.0_MAX(col_1)"] = [df0["col_1"].max(), df1["col_1"].max()] @@ -41,7 +45,7 @@ def test_column_stats_basic_flow(lmdb_version_store_tiny_segment): expected_column_stats = generate_symbol(lib, sym) expected_column_stats.drop( expected_column_stats.columns.difference(["start_index", "end_index", "v1.0_MIN(col_1)", "v1.0_MAX(col_1)"]), - 1, + axis=1, inplace=True, ) @@ -74,7 +78,11 @@ def test_column_stats_infinity(lmdb_version_store_tiny_segment): lib.append(sym, df1) lib.append(sym, df2) expected_column_stats = lib.read_index(sym) - expected_column_stats.drop(expected_column_stats.columns.difference(["start_index", "end_index"]), 1, inplace=True) + expected_column_stats.drop( + expected_column_stats.columns.difference(["start_index", "end_index"]), + axis=1, + inplace=True, + ) expected_column_stats = expected_column_stats.iloc[[0, 1, 2]] expected_column_stats["v1.0_MIN(col_1)"] = [df0["col_1"].min(), df1["col_1"].min(), df2["col_1"].min()] expected_column_stats["v1.0_MAX(col_1)"] = [df0["col_1"].max(), df1["col_1"].max(), df2["col_1"].max()] @@ -94,7 +102,7 @@ def test_column_stats_as_of(lmdb_version_store_tiny_segment): expected_column_stats = expected_column_stats.iloc[[0]] expected_column_stats.drop( expected_column_stats.columns.difference(["start_index", "end_index", "v1.0_MIN(col_1)", "v1.0_MAX(col_1)"]), - 1, + axis=1, inplace=True, ) @@ -150,7 +158,7 @@ def test_column_stats_multiple_indexes_different_columns(lmdb_version_store_tiny expected_column_stats.drop( expected_column_stats.columns.difference(["start_index", "end_index", "v1.0_MIN(col_1)", "v1.0_MAX(col_1)"]), - 1, + axis=1, inplace=True, ) column_stats = lib.read_column_stats(sym) @@ -251,7 +259,7 @@ def test_column_stats_multiple_creates(lmdb_version_store_tiny_segment): expected_column_stats = base_expected_column_stats.copy() expected_column_stats.drop( expected_column_stats.columns.difference(["start_index", "end_index", "v1.0_MIN(col_1)", "v1.0_MAX(col_1)"]), - 1, + axis=1, inplace=True, ) column_stats = lib.read_column_stats(sym) @@ -287,10 +295,14 @@ def test_column_stats_duplicated_primary_index(lmdb_version_store_tiny_segment): lib = lmdb_version_store_tiny_segment sym = "test_column_stats_duplicated_primary_index" - total_df = df0.append(df1) + total_df = pd.concat((df0, df1)) lib.write(sym, total_df) expected_column_stats = lib.read_index(sym) - expected_column_stats.drop(expected_column_stats.columns.difference(["start_index", "end_index"]), 1, inplace=True) + expected_column_stats.drop( + expected_column_stats.columns.difference(["start_index", "end_index"]), + axis=1, + inplace=True, + ) expected_column_stats = expected_column_stats.iloc[[0, 1]] expected_column_stats["v1.0_MIN(col_1)"] = [df0["col_1"].min(), df1["col_1"].min()] expected_column_stats["v1.0_MAX(col_1)"] = [df0["col_1"].max(), df1["col_1"].max()] @@ -324,7 +336,11 @@ def test_column_stats_dynamic_schema_missing_data(lmdb_version_store_tiny_segmen df = lib.read(sym).data expected_column_stats = lib.read_index(sym) - expected_column_stats.drop(expected_column_stats.columns.difference(["start_index", "end_index"]), 1, inplace=True) + expected_column_stats.drop( + expected_column_stats.columns.difference(["start_index", "end_index"]), + axis=1, + inplace=True, + ) expected_column_stats = expected_column_stats.iloc[[0, 1, 2, 3, 4]] expected_column_stats["v1.0_MIN(col_1)"] = [ df0["col_1"].min(), @@ -395,7 +411,11 @@ def test_column_stats_dynamic_schema_types_changing(lmdb_version_store_tiny_segm lib.append(sym, df1) expected_column_stats = lib.read_index(sym) - expected_column_stats.drop(expected_column_stats.columns.difference(["start_index", "end_index"]), 1, inplace=True) + expected_column_stats.drop( + expected_column_stats.columns.difference(["start_index", "end_index"]), + axis=1, + inplace=True, + ) expected_column_stats = expected_column_stats.iloc[[0, 1]] expected_column_stats["v1.0_MIN(int_widening)"] = [df0["int_widening"].min(), df1["int_widening"].min()] expected_column_stats["v1.0_MAX(int_widening)"] = [df0["int_widening"].max(), df1["int_widening"].max()] diff --git a/python/tests/unit/arcticdb/version_store/test_aggregation_dynamic.py b/python/tests/unit/arcticdb/version_store/test_aggregation_dynamic.py index aa20037241..ffb09a2a0b 100644 --- a/python/tests/unit/arcticdb/version_store/test_aggregation_dynamic.py +++ b/python/tests/unit/arcticdb/version_store/test_aggregation_dynamic.py @@ -328,7 +328,7 @@ def test_aggregation_grouping_column_missing_from_row_group(lmdb_version_store_d {"to_sum": [3, 4]}, index=np.arange(2, 4), ) - expected = df0.append(df1).groupby("grouping_column").agg({"to_sum": "sum"}) + expected = pd.concat((df0, df1)).groupby("grouping_column").agg({"to_sum": "sum"}) symbol = "test_aggregation_grouping_column_missing_from_row_group" lib.write(symbol, df0) diff --git a/python/tests/unit/arcticdb/version_store/test_empty_writes.py b/python/tests/unit/arcticdb/version_store/test_empty_writes.py index 9d4a78002f..070d8de8a9 100644 --- a/python/tests/unit/arcticdb/version_store/test_empty_writes.py +++ b/python/tests/unit/arcticdb/version_store/test_empty_writes.py @@ -23,7 +23,7 @@ def test_write_no_rows(lmdb_version_store, sym): assert_frame_equal(lmdb_version_store.read(sym).data, df) df2 = pd.DataFrame([[1.3, 6, "test"]], columns=column_names, index=[pd.Timestamp(0)]) - df2 = df.append(df2) + df2 = pd.concat((df, df2)) # coercing not needed lmdb_version_store.append(sym, df2, dynamic_strings=True) assert_frame_equal(lmdb_version_store.read(sym).data, df2) @@ -31,7 +31,7 @@ def test_write_no_rows(lmdb_version_store, sym): df3 = pd.DataFrame( [[3.3, 8, None], [2.3, 10, "test2"]], columns=column_names, index=[pd.Timestamp(1), pd.Timestamp(2)] ) - df2 = df2.append(df3) + df2 = pd.concat((df2, df3)) # coercing not needed lmdb_version_store.append(sym, df3, dynamic_strings=True) assert_frame_equal(lmdb_version_store.read(sym).data, df2) @@ -100,7 +100,7 @@ def test_write_no_rows_and_columns(lmdb_version_store_dynamic_schema, sym): columns=column_names + ["d"], index=[pd.Timestamp(3), pd.Timestamp(4)], ) - df5 = df2.append(df4) + df5 = pd.concat((df2, df4)) lmdb_version_store_dynamic_schema.append(sym, df4, dynamic_strings=True) assert_frame_equal(lmdb_version_store_dynamic_schema.read(sym).data, df5) diff --git a/python/tests/unit/arcticdb/version_store/test_parallel.py b/python/tests/unit/arcticdb/version_store/test_parallel.py index 9118d48a84..54ca1e3025 100644 --- a/python/tests/unit/arcticdb/version_store/test_parallel.py +++ b/python/tests/unit/arcticdb/version_store/test_parallel.py @@ -108,7 +108,7 @@ def test_sort_merge_write(lmdb_version_store): new_df = pd.DataFrame(data=vals, index=index) dataframes.append(new_df) - df = df.append(new_df) + df = pd.concat((df, new_df)) dt = dt + datetime.timedelta(days=1) random.shuffle(dataframes) @@ -139,7 +139,7 @@ def test_sort_merge_append(lmdb_version_store_dynamic_schema): vals = {c: random_floats(num_rows_per_day) for c in cols} new_df = pd.DataFrame(data=vals, index=index) dataframes.append(new_df) - df = df.append(new_df) + df = pd.concat((df, new_df)) dt = dt + datetime.timedelta(days=1) half_way = len(dataframes) / 2 diff --git a/python/tests/unit/arcticdb/version_store/test_projection_dynamic.py b/python/tests/unit/arcticdb/version_store/test_projection_dynamic.py index 97736c1c07..1491893211 100644 --- a/python/tests/unit/arcticdb/version_store/test_projection_dynamic.py +++ b/python/tests/unit/arcticdb/version_store/test_projection_dynamic.py @@ -55,18 +55,18 @@ def test_project_column_types_changing_and_missing(lmdb_version_store_dynamic_sc # uint8 df = pd.DataFrame({"col_to_project": np.arange(2, dtype=np.uint8), "data_col": [2, 3]}, index=np.arange(2, 4)) lib.append(symbol, df) - expected = expected.append(df) + expected = pd.concat((expected, df)) # Missing df = pd.DataFrame({"data_col": [4, 5]}, index=np.arange(4, 6)) lib.append(symbol, df) - expected = expected.append(df) + expected = pd.concat((expected, df)) # int16 df = pd.DataFrame( {"col_to_project": np.arange(200, 202, dtype=np.int16), "data_col": [6, 7]}, index=np.arange(6, 8) ) lib.append(symbol, df) - expected = expected.append(df) + expected = pd.concat((expected, df)) expected["projected_col"] = expected["col_to_project"] * 2 q = QueryBuilder() q = q.apply("projected_col", q["col_to_project"] * 2)