diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 656c62ef19..2bc612bdbe 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -629,6 +629,24 @@ def _get_snapshot_sql_and_primary_key( job_config.labels["bigframes-api"] = api_name if use_cache and table_ref in self._df_snapshot.keys(): snapshot_timestamp = self._df_snapshot[table_ref] + + # Cache hit could be unexpected. See internal issue 329545805. + # Raise a warning with more information about how to avoid the + # problems with the cache. + warnings.warn( + f"Reading cached table from {snapshot_timestamp} to avoid " + "incompatibilies with previous reads of this table. To read " + "the latest version, set `use_cache=False` or close the " + "current session with Session.close() or " + "bigframes.pandas.close_session().", + # There are many layers before we get to (possibly) the user's code: + # pandas.read_gbq_table + # -> with_default_session + # -> Session.read_gbq_table + # -> _read_gbq_table + # -> _get_snapshot_sql_and_primary_key + stacklevel=6, + ) else: snapshot_timestamp = list( self.bqclient.query( diff --git a/tests/unit/resources.py b/tests/unit/resources.py index 967e42548f..b57cd85360 100644 --- a/tests/unit/resources.py +++ b/tests/unit/resources.py @@ -31,6 +31,9 @@ """Utilities for creating test resources.""" +TEST_SCHEMA = (google.cloud.bigquery.SchemaField("col", "INTEGER"),) + + def create_bigquery_session( bqclient: Optional[mock.Mock] = None, session_id: str = "abcxyz", @@ -44,6 +47,13 @@ def create_bigquery_session( bqclient = mock.create_autospec(google.cloud.bigquery.Client, instance=True) bqclient.project = "test-project" + # Mock the location. + table = mock.create_autospec(google.cloud.bigquery.Table, instance=True) + table._properties = {} + type(table).location = mock.PropertyMock(return_value="test-region") + type(table).schema = mock.PropertyMock(return_value=TEST_SCHEMA) + bqclient.get_table.return_value = table + if anonymous_dataset is None: anonymous_dataset = google.cloud.bigquery.DatasetReference( "test-project", @@ -61,6 +71,8 @@ def query_mock(query, *args, **kwargs): if query.startswith("SELECT CURRENT_TIMESTAMP()"): query_job.result = mock.MagicMock(return_value=[[datetime.datetime.now()]]) + else: + type(query_job).schema = mock.PropertyMock(return_value=TEST_SCHEMA) return query_job diff --git a/tests/unit/session/test_session.py b/tests/unit/session/test_session.py index b474c9f63e..3e2b28c200 100644 --- a/tests/unit/session/test_session.py +++ b/tests/unit/session/test_session.py @@ -12,10 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import datetime import os +import re from unittest import mock import google.api_core.exceptions +import google.cloud.bigquery import pytest import bigframes @@ -31,6 +34,22 @@ def test_read_gbq_missing_parts(missing_parts_table_id): session.read_gbq(missing_parts_table_id) +def test_read_gbq_cached_table(): + session = resources.create_bigquery_session() + table_ref = google.cloud.bigquery.TableReference( + google.cloud.bigquery.DatasetReference("my-project", "my_dataset"), + "my_table", + ) + session._df_snapshot[table_ref] = datetime.datetime( + 1999, 1, 2, 3, 4, 5, 678901, tzinfo=datetime.timezone.utc + ) + + with pytest.warns(UserWarning, match=re.escape("use_cache=False")): + df = session.read_gbq("my-project.my_dataset.my_table") + + assert "1999-01-02T03:04:05.678901" in df.sql + + @pytest.mark.parametrize( "not_found_table_id", [("unknown.dataset.table"), ("project.unknown.table"), ("project.dataset.unknown")],