From e712c65bbd4ff35432ad7b588ea92130f1489e09 Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Tue, 12 Nov 2024 21:57:46 +0000 Subject: [PATCH 1/4] fix: unordered mode too many labels issue. --- bigframes/session/executor.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bigframes/session/executor.py b/bigframes/session/executor.py index 170f0ac086..a84861878a 100644 --- a/bigframes/session/executor.py +++ b/bigframes/session/executor.py @@ -379,11 +379,12 @@ def _run_execute_query( job_config.maximum_bytes_billed = ( bigframes.options.compute.maximum_bytes_billed ) - # Note: add_labels is global scope which may have unexpected effects - bq_io.add_labels(job_config, api_name=api_name) if not self.strictly_ordered: job_config.labels["bigframes-mode"] = "unordered" + + # Note: add_labels is global scope which may have unexpected effects + bq_io.add_labels(job_config, api_name=api_name) try: query_job = self.bqclient.query(sql, job_config=job_config) return ( From 882f70558d6adcbc417a081169690b04c2247c66 Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Tue, 12 Nov 2024 22:23:59 +0000 Subject: [PATCH 2/4] update test --- tests/system/small/test_series.py | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index b906f452b7..407b1cf535 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -2766,22 +2766,17 @@ def test_series_case_when(scalars_dfs_maybe_ordered): bf_series = scalars_df["int64_col"] pd_series = scalars_pandas_df["int64_col"] - # TODO(tswast): pandas case_when appears to assume True when a value is - # null. I suspect this should be considered a bug in pandas. - bf_result = bf_series.case_when( - [ - ((bf_series > 100).fillna(True), bf_series - 1), - ((bf_series > 0).fillna(True), pd.NA), - ((bf_series < -100).fillna(True), -1000), - ] - ).to_pandas() - pd_result = pd_series.case_when( - [ - (pd_series > 100, pd_series - 1), - (pd_series > 0, pd.NA), - (pd_series < -100, -1000), - ] - ) + bf_conditions = [ + ((bf_series > (-100 + i * 5)).fillna(True), i) for i in range(149, 0, -1) + ] + [((bf_series <= -100).fillna(True), 0)] + + pd_conditions = [((pd_series > (-100 + i * 5)), i) for i in range(149, 0, -1)] + [ + (pd_series <= -100, 0) + ] + + bf_result = bf_series.case_when(bf_conditions).to_pandas() + pd_result = pd_series.case_when(pd_conditions) + pd.testing.assert_series_equal( bf_result, pd_result.astype(pd.Int64Dtype()), From 6f39d680b01ef6097b5469876aa767a0af91a11e Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Tue, 12 Nov 2024 22:41:04 +0000 Subject: [PATCH 3/4] update test --- tests/system/small/test_series.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index 407b1cf535..d019f55918 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -2766,13 +2766,19 @@ def test_series_case_when(scalars_dfs_maybe_ordered): bf_series = scalars_df["int64_col"] pd_series = scalars_pandas_df["int64_col"] - bf_conditions = [ - ((bf_series > (-100 + i * 5)).fillna(True), i) for i in range(149, 0, -1) - ] + [((bf_series <= -100).fillna(True), 0)] + # TODO(tswast): pandas case_when appears to assume True when a value is + # null. I suspect this should be considered a bug in pandas. + bf_conditions = ( + [((bf_series > 645).fillna(True), bf_series - 1)] + + [((bf_series > (-100 + i * 5)).fillna(True), i) for i in range(148, 0, -1)] + + [((bf_series <= -100).fillna(True), pd.NA)] + ) - pd_conditions = [((pd_series > (-100 + i * 5)), i) for i in range(149, 0, -1)] + [ - (pd_series <= -100, 0) - ] + pd_conditions = ( + [((pd_series > 645), pd_series - 1)] + + [((pd_series > (-100 + i * 5)), i) for i in range(148, 0, -1)] + + [(pd_series <= -100, pd.NA)] + ) bf_result = bf_series.case_when(bf_conditions).to_pandas() pd_result = pd_series.case_when(pd_conditions) From ebc6d051596229a7d344154862dda49510a340ab Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Wed, 13 Nov 2024 00:13:11 +0000 Subject: [PATCH 4/4] Update test --- tests/system/small/test_series.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index d019f55918..5bb20e2714 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -2768,6 +2768,8 @@ def test_series_case_when(scalars_dfs_maybe_ordered): # TODO(tswast): pandas case_when appears to assume True when a value is # null. I suspect this should be considered a bug in pandas. + + # Generate 150 conditions to test case_when with a large number of conditions bf_conditions = ( [((bf_series > 645).fillna(True), bf_series - 1)] + [((bf_series > (-100 + i * 5)).fillna(True), i) for i in range(148, 0, -1)] @@ -2780,6 +2782,8 @@ def test_series_case_when(scalars_dfs_maybe_ordered): + [(pd_series <= -100, pd.NA)] ) + assert len(bf_conditions) == 150 + bf_result = bf_series.case_when(bf_conditions).to_pandas() pd_result = pd_series.case_when(pd_conditions)