Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add support for BigQuery DataFrames. Set context.engine to 'bigframes' to support query results larger than 10 GB #58

Merged
merged 18 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions CONTRIBUTING.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ In order to add a feature:
documentation.

- The feature must work fully on the following CPython versions:
3.7, 3.8, 3.11 and 3.12 on both UNIX and Windows.
3.7, 3.8, 3.9, 3.11 and 3.12 on both UNIX and Windows.

- The feature must not add unnecessary dependencies (where
"unnecessary" is of course subjective, but new dependencies should
Expand Down Expand Up @@ -148,7 +148,7 @@ Running System Tests

.. note::

System tests are only configured to run under Python 3.8, 3.11 and 3.12.
System tests are only configured to run under Python 3.8, 3.9, 3.11 and 3.12.
For expediency, we do not run them in older versions of Python 3.

This alone will not run the tests. You'll need to change some local
Expand Down Expand Up @@ -223,11 +223,13 @@ We support:

- `Python 3.7`_
- `Python 3.8`_
- `Python 3.9`_
- `Python 3.11`_
- `Python 3.12`_

.. _Python 3.7: https://docs.python.org/3.7/
.. _Python 3.8: https://docs.python.org/3.8/
.. _Python 3.9: https://docs.python.org/3.9/
.. _Python 3.11: https://docs.python.org/3.11/
.. _Python 3.12: https://docs.python.org/3.12/

Expand Down
61 changes: 50 additions & 11 deletions bigquery_magics/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,11 @@
except ImportError:
bigquery_storage = None

try:
import bigframes.pandas as bpd
except ImportError:
bpd = None

USER_AGENT = f"ipython-{IPython.__version__} bigquery-magics/{bigquery_magics.version.__version__}"
context = bigquery_magics.config.context

Expand Down Expand Up @@ -255,6 +260,7 @@ def _create_dataset_if_necessary(client, dataset_id):
help=(
"Sets query to be a dry run to estimate costs. "
"Defaults to executing the query instead of dry run if this argument is not used."
"Does not work with engine 'bigframes'. "
),
)
@magic_arguments.argument(
Expand Down Expand Up @@ -319,6 +325,7 @@ def _create_dataset_if_necessary(client, dataset_id):
"amount of time for the query to finish. By default, this "
"information will be displayed as the query runs, but will be "
"cleared after the query is finished."
"This flag is ignored when the engine is 'bigframes'."
),
)
@magic_arguments.argument(
Expand Down Expand Up @@ -350,6 +357,7 @@ def _create_dataset_if_necessary(client, dataset_id):
help=(
"Set the location to execute query."
"Defaults to location set in query setting in console."
"This flag is ignored when the engine is 'bigframes'."
),
)
def _cell_magic(line, query):
Expand All @@ -376,18 +384,10 @@ def _cell_magic(line, query):
return
query = _validate_and_resolve_query(query, args)

bq_client, bqstorage_client = _create_clients(args)
if context.engine == "bigframes":
return _query_with_bigframes(query, params, args)

try:
return _make_bq_query(
query,
args=args,
params=params,
bq_client=bq_client,
bqstorage_client=bqstorage_client,
)
finally:
_close_transports(bq_client, bqstorage_client)
return _query_with_pandas(query, params, args)


def _parse_magic_args(line: str) -> Tuple[List[Any], Any]:
Expand Down Expand Up @@ -444,6 +444,45 @@ def _split_args_line(line: str) -> Tuple[str, str]:
return params_option_value, rest_of_args


def _query_with_bigframes(query: str, params: List[Any], args: Any):
if args.dry_run:
raise ValueError("Dry run is not supported by bigframes engine.")

if bpd is None:
raise ValueError("Bigframes package is not installed.")

bpd.options.bigquery.project = context.project
bpd.options.bigquery.credentials = context.credentials
Comment on lines +454 to +455
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm worried these might fail if the bigframes session has already started. Maybe that's what we want to happen?

Alternatively, we could explicitly create a bigframes Session object, but then whatever DataFrame we produce won't be compatible with other DataFrames in the notebook.


max_results = int(args.max_results) if args.max_results else None

result = bpd.read_gbq_query(
query,
max_results=max_results,
configuration=_create_job_config(args, params).to_api_repr(),
)

if args.destination_var:
get_ipython().push({args.destination_var: result})
else:
return result


def _query_with_pandas(query: str, params: List[Any], args: Any):
bq_client, bqstorage_client = _create_clients(args)

try:
return _make_bq_query(
query,
args=args,
params=params,
bq_client=bq_client,
bqstorage_client=bqstorage_client,
)
finally:
_close_transports(bq_client, bqstorage_client)


def _create_clients(args: Any) -> Tuple[bigquery.Client, Any]:
bigquery_client_options = copy.deepcopy(context.bigquery_client_options)
if args.bigquery_api_endpoint:
Expand Down
29 changes: 27 additions & 2 deletions bigquery_magics/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ class Context(object):
and can be found at ``bigquery_magics.context``.
"""

_credentials = None
_project = None
_connection = None

default_query_job_config = bigquery.QueryJobConfig()
Expand Down Expand Up @@ -103,6 +101,8 @@ class Context(object):
>>> bigquery_magics.context.progress_bar_type = "tqdm_notebook"
"""

_credentials = None

@property
def credentials(self):
"""google.auth.credentials.Credentials: Credentials to use for queries
Expand Down Expand Up @@ -138,6 +138,8 @@ def credentials(self):
def credentials(self, value):
self._credentials = value

_project = None

@property
def project(self):
"""str: Default project to use for queries performed through IPython
Expand All @@ -163,5 +165,28 @@ def project(self):
def project(self, value):
self._project = value

_engine = "pandas"

@property
def engine(self) -> str:
"""Engine to run the query. Could either be "pandas" or "bigframes".

If using "pandas", the query result will be stored in a Pandas dataframe.
If using "bigframes", the query result will be stored in a bigframes dataframe instead.

Example:
Manully setting the content engine:

>>> from google.cloud.bigquery import magics
>>> bigquery_magics.context.engine = 'bigframes'
"""
return self._engine

@engine.setter
def engine(self, value):
if value != "pandas" and value != "bigframes":
raise ValueError("engine must be either 'pandas' or 'bigframes'")
self._engine = value


context = Context()
10 changes: 8 additions & 2 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

DEFAULT_PYTHON_VERSION = "3.8"

UNIT_TEST_PYTHON_VERSIONS: List[str] = ["3.7", "3.8", "3.11", "3.12"]
UNIT_TEST_PYTHON_VERSIONS: List[str] = ["3.7", "3.8", "3.9", "3.11", "3.12"]
UNIT_TEST_STANDARD_DEPENDENCIES = [
"mock",
"asyncmock",
Expand All @@ -57,17 +57,20 @@
],
"3.9": [
"bqstorage",
"bigframes",
],
"3.10": [
"bqstorage",
"bigframes",
],
"3.11": [],
"3.12": [
"bqstorage",
"bigframes",
],
}

SYSTEM_TEST_PYTHON_VERSIONS: List[str] = ["3.8", "3.11", "3.12"]
SYSTEM_TEST_PYTHON_VERSIONS: List[str] = ["3.8", "3.9", "3.11", "3.12"]
SYSTEM_TEST_STANDARD_DEPENDENCIES: List[str] = [
"mock",
"pytest",
Expand All @@ -86,13 +89,16 @@
],
"3.9": [
"bqstorage",
"bigframes",
],
"3.10": [
"bqstorage",
"bigframes",
],
"3.11": [],
"3.12": [
"bqstorage",
"bigframes",
],
}

Expand Down
17 changes: 9 additions & 8 deletions owlbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,20 @@
# Add templated files
# ----------------------------------------------------------------------------

extras = ["bqstorage"]
extras_storage = ["bqstorage"]
extras_bf = ["bqstorage", "bigframes"]
extras_by_python = {
"3.7": extras,
"3.8": extras,
"3.9": extras,
"3.10": extras,
"3.7": extras_storage,
"3.8": extras_storage,
"3.9": extras_bf,
"3.10": extras_bf,
# Use a middle version of Python to test when no extras are installed.
"3.11": [],
"3.12": extras,
"3.12": extras_bf,
}
templated_files = common.py_library(
unit_test_python_versions=["3.7", "3.8", "3.11", "3.12"],
system_test_python_versions=["3.8", "3.11", "3.12"],
unit_test_python_versions=["3.7", "3.8", "3.9", "3.11", "3.12"],
system_test_python_versions=["3.8", "3.9", "3.11", "3.12"],
cov_level=100,
unit_test_extras_by_python=extras_by_python,
unit_test_external_dependencies=["google-cloud-testutils"],
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
"grpcio >= 1.47.0, < 2.0dev",
"grpcio >= 1.49.1, < 2.0dev; python_version>='3.11'",
],
"bigframes": ["bigframes >= 1.17.0"],
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pip will always install the latest version of bigframes. Let's make sure we test our compatibility with the minimum supported bigframes by adding bigframes==1.17.0 to https://github.com/googleapis/python-bigquery-magics/blob/main/testing/constraints-3.9.txt

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

}

all_extras = []
Expand Down
1 change: 1 addition & 0 deletions testing/constraints-3.9.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@
# We try to test across major versions of our dependencies.
# This is the last pandas 2.0.x release.
pandas==2.0.3
bigframes==1.17.0

Loading