diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 8ccc1af..637c5a2 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -22,7 +22,7 @@ In order to add a feature: documentation. - The feature must work fully on the following CPython versions: - 3.7, 3.8, 3.11 and 3.12 on both UNIX and Windows. + 3.7, 3.8, 3.9, 3.11 and 3.12 on both UNIX and Windows. - The feature must not add unnecessary dependencies (where "unnecessary" is of course subjective, but new dependencies should @@ -148,7 +148,7 @@ Running System Tests .. note:: - System tests are only configured to run under Python 3.8, 3.11 and 3.12. + System tests are only configured to run under Python 3.8, 3.9, 3.11 and 3.12. For expediency, we do not run them in older versions of Python 3. This alone will not run the tests. You'll need to change some local @@ -223,11 +223,13 @@ We support: - `Python 3.7`_ - `Python 3.8`_ +- `Python 3.9`_ - `Python 3.11`_ - `Python 3.12`_ .. _Python 3.7: https://docs.python.org/3.7/ .. _Python 3.8: https://docs.python.org/3.8/ +.. _Python 3.9: https://docs.python.org/3.9/ .. _Python 3.11: https://docs.python.org/3.11/ .. _Python 3.12: https://docs.python.org/3.12/ diff --git a/bigquery_magics/bigquery.py b/bigquery_magics/bigquery.py index 736745e..4cd0c5e 100644 --- a/bigquery_magics/bigquery.py +++ b/bigquery_magics/bigquery.py @@ -114,6 +114,11 @@ except ImportError: bigquery_storage = None +try: + import bigframes.pandas as bpd +except ImportError: + bpd = None + USER_AGENT = f"ipython-{IPython.__version__} bigquery-magics/{bigquery_magics.version.__version__}" context = bigquery_magics.config.context @@ -255,6 +260,7 @@ def _create_dataset_if_necessary(client, dataset_id): help=( "Sets query to be a dry run to estimate costs. " "Defaults to executing the query instead of dry run if this argument is not used." + "Does not work with engine 'bigframes'. " ), ) @magic_arguments.argument( @@ -319,6 +325,7 @@ def _create_dataset_if_necessary(client, dataset_id): "amount of time for the query to finish. By default, this " "information will be displayed as the query runs, but will be " "cleared after the query is finished." + "This flag is ignored when the engine is 'bigframes'." ), ) @magic_arguments.argument( @@ -350,6 +357,7 @@ def _create_dataset_if_necessary(client, dataset_id): help=( "Set the location to execute query." "Defaults to location set in query setting in console." + "This flag is ignored when the engine is 'bigframes'." ), ) def _cell_magic(line, query): @@ -376,18 +384,10 @@ def _cell_magic(line, query): return query = _validate_and_resolve_query(query, args) - bq_client, bqstorage_client = _create_clients(args) + if context.engine == "bigframes": + return _query_with_bigframes(query, params, args) - try: - return _make_bq_query( - query, - args=args, - params=params, - bq_client=bq_client, - bqstorage_client=bqstorage_client, - ) - finally: - _close_transports(bq_client, bqstorage_client) + return _query_with_pandas(query, params, args) def _parse_magic_args(line: str) -> Tuple[List[Any], Any]: @@ -444,6 +444,45 @@ def _split_args_line(line: str) -> Tuple[str, str]: return params_option_value, rest_of_args +def _query_with_bigframes(query: str, params: List[Any], args: Any): + if args.dry_run: + raise ValueError("Dry run is not supported by bigframes engine.") + + if bpd is None: + raise ValueError("Bigframes package is not installed.") + + bpd.options.bigquery.project = context.project + bpd.options.bigquery.credentials = context.credentials + + max_results = int(args.max_results) if args.max_results else None + + result = bpd.read_gbq_query( + query, + max_results=max_results, + configuration=_create_job_config(args, params).to_api_repr(), + ) + + if args.destination_var: + get_ipython().push({args.destination_var: result}) + else: + return result + + +def _query_with_pandas(query: str, params: List[Any], args: Any): + bq_client, bqstorage_client = _create_clients(args) + + try: + return _make_bq_query( + query, + args=args, + params=params, + bq_client=bq_client, + bqstorage_client=bqstorage_client, + ) + finally: + _close_transports(bq_client, bqstorage_client) + + def _create_clients(args: Any) -> Tuple[bigquery.Client, Any]: bigquery_client_options = copy.deepcopy(context.bigquery_client_options) if args.bigquery_api_endpoint: diff --git a/bigquery_magics/config.py b/bigquery_magics/config.py index 0cffb44..8e5f6ec 100644 --- a/bigquery_magics/config.py +++ b/bigquery_magics/config.py @@ -33,8 +33,6 @@ class Context(object): and can be found at ``bigquery_magics.context``. """ - _credentials = None - _project = None _connection = None default_query_job_config = bigquery.QueryJobConfig() @@ -103,6 +101,8 @@ class Context(object): >>> bigquery_magics.context.progress_bar_type = "tqdm_notebook" """ + _credentials = None + @property def credentials(self): """google.auth.credentials.Credentials: Credentials to use for queries @@ -138,6 +138,8 @@ def credentials(self): def credentials(self, value): self._credentials = value + _project = None + @property def project(self): """str: Default project to use for queries performed through IPython @@ -163,5 +165,28 @@ def project(self): def project(self, value): self._project = value + _engine = "pandas" + + @property + def engine(self) -> str: + """Engine to run the query. Could either be "pandas" or "bigframes". + + If using "pandas", the query result will be stored in a Pandas dataframe. + If using "bigframes", the query result will be stored in a bigframes dataframe instead. + + Example: + Manully setting the content engine: + + >>> from google.cloud.bigquery import magics + >>> bigquery_magics.context.engine = 'bigframes' + """ + return self._engine + + @engine.setter + def engine(self, value): + if value != "pandas" and value != "bigframes": + raise ValueError("engine must be either 'pandas' or 'bigframes'") + self._engine = value + context = Context() diff --git a/noxfile.py b/noxfile.py index 22877d5..a448407 100644 --- a/noxfile.py +++ b/noxfile.py @@ -34,7 +34,7 @@ DEFAULT_PYTHON_VERSION = "3.8" -UNIT_TEST_PYTHON_VERSIONS: List[str] = ["3.7", "3.8", "3.11", "3.12"] +UNIT_TEST_PYTHON_VERSIONS: List[str] = ["3.7", "3.8", "3.9", "3.11", "3.12"] UNIT_TEST_STANDARD_DEPENDENCIES = [ "mock", "asyncmock", @@ -57,17 +57,20 @@ ], "3.9": [ "bqstorage", + "bigframes", ], "3.10": [ "bqstorage", + "bigframes", ], "3.11": [], "3.12": [ "bqstorage", + "bigframes", ], } -SYSTEM_TEST_PYTHON_VERSIONS: List[str] = ["3.8", "3.11", "3.12"] +SYSTEM_TEST_PYTHON_VERSIONS: List[str] = ["3.8", "3.9", "3.11", "3.12"] SYSTEM_TEST_STANDARD_DEPENDENCIES: List[str] = [ "mock", "pytest", @@ -86,13 +89,16 @@ ], "3.9": [ "bqstorage", + "bigframes", ], "3.10": [ "bqstorage", + "bigframes", ], "3.11": [], "3.12": [ "bqstorage", + "bigframes", ], } diff --git a/owlbot.py b/owlbot.py index a05ee95..ee6146c 100644 --- a/owlbot.py +++ b/owlbot.py @@ -28,19 +28,20 @@ # Add templated files # ---------------------------------------------------------------------------- -extras = ["bqstorage"] +extras_storage = ["bqstorage"] +extras_bf = ["bqstorage", "bigframes"] extras_by_python = { - "3.7": extras, - "3.8": extras, - "3.9": extras, - "3.10": extras, + "3.7": extras_storage, + "3.8": extras_storage, + "3.9": extras_bf, + "3.10": extras_bf, # Use a middle version of Python to test when no extras are installed. "3.11": [], - "3.12": extras, + "3.12": extras_bf, } templated_files = common.py_library( - unit_test_python_versions=["3.7", "3.8", "3.11", "3.12"], - system_test_python_versions=["3.8", "3.11", "3.12"], + unit_test_python_versions=["3.7", "3.8", "3.9", "3.11", "3.12"], + system_test_python_versions=["3.8", "3.9", "3.11", "3.12"], cov_level=100, unit_test_extras_by_python=extras_by_python, unit_test_external_dependencies=["google-cloud-testutils"], diff --git a/setup.py b/setup.py index 5e4ad45..2b67aa6 100644 --- a/setup.py +++ b/setup.py @@ -54,6 +54,7 @@ "grpcio >= 1.47.0, < 2.0dev", "grpcio >= 1.49.1, < 2.0dev; python_version>='3.11'", ], + "bigframes": ["bigframes >= 1.17.0"], } all_extras = [] diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt index 66cf6ee..653bc82 100644 --- a/testing/constraints-3.9.txt +++ b/testing/constraints-3.9.txt @@ -4,4 +4,5 @@ # We try to test across major versions of our dependencies. # This is the last pandas 2.0.x release. pandas==2.0.3 +bigframes==1.17.0 diff --git a/tests/unit/test_bigquery.py b/tests/unit/test_bigquery.py index 721bb48..7079bea 100644 --- a/tests/unit/test_bigquery.py +++ b/tests/unit/test_bigquery.py @@ -43,6 +43,11 @@ except ImportError: bigquery_storage = None +try: + import bigframes.pandas as bpd +except ImportError: + bpd = None + def make_connection(*args): # TODO(tswast): Remove this in favor of a mock google.cloud.bigquery.Client @@ -121,6 +126,11 @@ def mock_credentials(monkeypatch): monkeypatch.setattr(bigquery_magics.context, "_credentials", credentials) +@pytest.fixture +def bigframes_engine(monkeypatch): + monkeypatch.setattr(bigquery_magics.context, "engine", "bigframes") + + PROJECT_ID = "its-a-project-eh" JOB_ID = "some-random-id" JOB_REFERENCE_RESOURCE = {"projectId": PROJECT_ID, "jobId": JOB_ID} @@ -1884,3 +1894,129 @@ def test_bigquery_magic_with_location(): client_options_used = run_query_mock.call_args_list[0][0][0] assert client_options_used.location == "us-east1" + + +@pytest.mark.usefixtures("ipython_interactive", "mock_credentials", "bigframes_engine") +def test_big_query_magic_bigframes(): + if bpd is None: + pytest.skip("BigFrames not installed") + + ip = IPython.get_ipython() + ip.extension_manager.load_extension("bigquery_magics") + sql = "SELECT 0 AS something" + expected_configuration = { + "query": {"queryParameters": [], "useLegacySql": False}, + "dryRun": False, + } + bf_patch = mock.patch("bigframes.pandas.read_gbq_query", autospec=True) + + with bf_patch as bf_mock: + ip.run_cell_magic("bigquery", "", sql) + + bf_mock.assert_called_once_with( + sql, max_results=None, configuration=expected_configuration + ) + assert bpd.options.bigquery.credentials is bigquery_magics.context.credentials + assert bpd.options.bigquery.project == bigquery_magics.context.project + + +@pytest.mark.usefixtures("ipython_interactive", "mock_credentials", "bigframes_engine") +def test_big_query_magic_bigframes__bigframes_is_not_installed__should_raise_error(): + if bpd is not None: + pytest.skip("BigFrames is installed") + + ip = IPython.get_ipython() + ip.extension_manager.load_extension("bigquery_magics") + sql = "SELECT 0 AS something" + + with pytest.raises(ValueError, match="Bigframes package is not installed."): + ip.run_cell_magic("bigquery", "", sql) + + +@pytest.mark.usefixtures("ipython_interactive", "mock_credentials", "bigframes_engine") +def test_big_query_magic_bigframes_with_params(): + if bpd is None: + pytest.skip("BigFrames not installed") + + ip = IPython.get_ipython() + ip.extension_manager.load_extension("bigquery_magics") + sql = "SELECT 0 AS @p" + expected_configuration = { + "query": { + "queryParameters": [ + { + "name": "p", + "parameterType": {"type": "STRING"}, + "parameterValue": {"value": "num"}, + }, + ], + "useLegacySql": False, + "parameterMode": "NAMED", + }, + "dryRun": False, + } + bf_patch = mock.patch("bigframes.pandas.read_gbq_query", autospec=True) + + with bf_patch as bf_mock: + ip.run_cell_magic("bigquery", '--params {"p":"num"}', sql) + + bf_mock.assert_called_once_with( + sql, max_results=None, configuration=expected_configuration + ) + + +@pytest.mark.usefixtures("ipython_interactive", "mock_credentials", "bigframes_engine") +def test_big_query_magic_bigframes_with_max_results(): + if bpd is None: + pytest.skip("BigFrames not installed") + + ip = IPython.get_ipython() + ip.extension_manager.load_extension("bigquery_magics") + sql = "SELECT 0 AS something" + expected_configuration = { + "query": {"queryParameters": [], "useLegacySql": False}, + "dryRun": False, + } + bf_patch = mock.patch("bigframes.pandas.read_gbq_query", autospec=True) + + with bf_patch as bf_mock: + ip.run_cell_magic("bigquery", "--max_results 10", sql) + + bf_mock.assert_called_once_with( + sql, max_results=10, configuration=expected_configuration + ) + + +@pytest.mark.usefixtures("ipython_interactive", "mock_credentials", "bigframes_engine") +def test_big_query_magic_bigframes_with_destination_var(ipython_ns_cleanup): + if bpd is None: + pytest.skip("BigFrames not installed") + + ip = IPython.get_ipython() + ip.extension_manager.load_extension("bigquery_magics") + sql = "SELECT 0 AS something" + + bf_patch = mock.patch("bigframes.pandas.read_gbq_query", autospec=True) + ipython_ns_cleanup.append((ip, "df")) + + with bf_patch as bf_mock: + ip.run_cell_magic("bigquery", "df", sql) + + assert "df" in ip.user_ns + df = ip.user_ns["df"] + assert df is bf_mock.return_value + + +@pytest.mark.usefixtures("ipython_interactive", "mock_credentials", "bigframes_engine") +def test_big_query_magic_bigframes_with_dry_run__should_fail(): + if bpd is None: + pytest.skip("BigFrames not installed") + + ip = IPython.get_ipython() + ip.extension_manager.load_extension("bigquery_magics") + sql = "SELECT 0 AS @p" + + bf_patch = mock.patch("bigframes.pandas.read_gbq_query", autospec=True) + + with bf_patch, pytest.raises(ValueError): + ip.run_cell_magic("bigquery", "--dry_run", sql) diff --git a/tests/unit/test_context.py b/tests/unit/test_context.py index d69fe41..2909f2c 100644 --- a/tests/unit/test_context.py +++ b/tests/unit/test_context.py @@ -16,6 +16,7 @@ import google.auth.credentials import pydata_google_auth +import pytest import bigquery_magics @@ -64,3 +65,15 @@ def test_context_credentials_and_project_can_be_set_explicitly(): assert bigquery_magics.context.credentials is credentials_mock # default should not be called if credentials & project are explicitly set assert default_mock.call_count == 0 + + +@pytest.mark.parametrize("engine", ["pandas", "bigframes"]) +def test_context_set_engine(engine): + bigquery_magics.context.engine = engine + + assert bigquery_magics.context.engine == engine + + +def test_context_set_invalid_engine(): + with pytest.raises(ValueError): + bigquery_magics.context.engine = "whatever"