Skip to content

Commit

Permalink
feat: add support for BigQuery DataFrames. Set context.engine to 'b…
Browse files Browse the repository at this point in the history
…igframes' to support query results larger than 10 GB (#58)

* define 'engine' field in the conext

* use bigframes API when the context.engine says so

* remove unnecessary deps

* relax pip deps

* undo pip deps

* make bigframes an optional dependency

* fix format

* use 'is' for None checks

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* parametrize bigframes installation

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* define unit_bf with bf installation to make owlbot happy

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* parametrize bf installation and disable owlbot enforcement on the noxfile

* chang owlbot file to test optional bf deps

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
  • Loading branch information
sycai and gcf-owl-bot[bot] committed Sep 20, 2024
1 parent b0b7bec commit 90ba05f
Show file tree
Hide file tree
Showing 9 changed files with 249 additions and 25 deletions.
6 changes: 4 additions & 2 deletions CONTRIBUTING.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ In order to add a feature:
documentation.

- The feature must work fully on the following CPython versions:
3.7, 3.8, 3.11 and 3.12 on both UNIX and Windows.
3.7, 3.8, 3.9, 3.11 and 3.12 on both UNIX and Windows.

- The feature must not add unnecessary dependencies (where
"unnecessary" is of course subjective, but new dependencies should
Expand Down Expand Up @@ -148,7 +148,7 @@ Running System Tests

.. note::

System tests are only configured to run under Python 3.8, 3.11 and 3.12.
System tests are only configured to run under Python 3.8, 3.9, 3.11 and 3.12.
For expediency, we do not run them in older versions of Python 3.

This alone will not run the tests. You'll need to change some local
Expand Down Expand Up @@ -223,11 +223,13 @@ We support:

- `Python 3.7`_
- `Python 3.8`_
- `Python 3.9`_
- `Python 3.11`_
- `Python 3.12`_

.. _Python 3.7: https://docs.python.org/3.7/
.. _Python 3.8: https://docs.python.org/3.8/
.. _Python 3.9: https://docs.python.org/3.9/
.. _Python 3.11: https://docs.python.org/3.11/
.. _Python 3.12: https://docs.python.org/3.12/

Expand Down
61 changes: 50 additions & 11 deletions bigquery_magics/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,11 @@
except ImportError:
bigquery_storage = None

try:
import bigframes.pandas as bpd
except ImportError:
bpd = None

USER_AGENT = f"ipython-{IPython.__version__} bigquery-magics/{bigquery_magics.version.__version__}"
context = bigquery_magics.config.context

Expand Down Expand Up @@ -255,6 +260,7 @@ def _create_dataset_if_necessary(client, dataset_id):
help=(
"Sets query to be a dry run to estimate costs. "
"Defaults to executing the query instead of dry run if this argument is not used."
"Does not work with engine 'bigframes'. "
),
)
@magic_arguments.argument(
Expand Down Expand Up @@ -319,6 +325,7 @@ def _create_dataset_if_necessary(client, dataset_id):
"amount of time for the query to finish. By default, this "
"information will be displayed as the query runs, but will be "
"cleared after the query is finished."
"This flag is ignored when the engine is 'bigframes'."
),
)
@magic_arguments.argument(
Expand Down Expand Up @@ -350,6 +357,7 @@ def _create_dataset_if_necessary(client, dataset_id):
help=(
"Set the location to execute query."
"Defaults to location set in query setting in console."
"This flag is ignored when the engine is 'bigframes'."
),
)
def _cell_magic(line, query):
Expand All @@ -376,18 +384,10 @@ def _cell_magic(line, query):
return
query = _validate_and_resolve_query(query, args)

bq_client, bqstorage_client = _create_clients(args)
if context.engine == "bigframes":
return _query_with_bigframes(query, params, args)

try:
return _make_bq_query(
query,
args=args,
params=params,
bq_client=bq_client,
bqstorage_client=bqstorage_client,
)
finally:
_close_transports(bq_client, bqstorage_client)
return _query_with_pandas(query, params, args)


def _parse_magic_args(line: str) -> Tuple[List[Any], Any]:
Expand Down Expand Up @@ -444,6 +444,45 @@ def _split_args_line(line: str) -> Tuple[str, str]:
return params_option_value, rest_of_args


def _query_with_bigframes(query: str, params: List[Any], args: Any):
if args.dry_run:
raise ValueError("Dry run is not supported by bigframes engine.")

if bpd is None:
raise ValueError("Bigframes package is not installed.")

bpd.options.bigquery.project = context.project
bpd.options.bigquery.credentials = context.credentials

max_results = int(args.max_results) if args.max_results else None

result = bpd.read_gbq_query(
query,
max_results=max_results,
configuration=_create_job_config(args, params).to_api_repr(),
)

if args.destination_var:
get_ipython().push({args.destination_var: result})
else:
return result


def _query_with_pandas(query: str, params: List[Any], args: Any):
bq_client, bqstorage_client = _create_clients(args)

try:
return _make_bq_query(
query,
args=args,
params=params,
bq_client=bq_client,
bqstorage_client=bqstorage_client,
)
finally:
_close_transports(bq_client, bqstorage_client)


def _create_clients(args: Any) -> Tuple[bigquery.Client, Any]:
bigquery_client_options = copy.deepcopy(context.bigquery_client_options)
if args.bigquery_api_endpoint:
Expand Down
29 changes: 27 additions & 2 deletions bigquery_magics/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ class Context(object):
and can be found at ``bigquery_magics.context``.
"""

_credentials = None
_project = None
_connection = None

default_query_job_config = bigquery.QueryJobConfig()
Expand Down Expand Up @@ -103,6 +101,8 @@ class Context(object):
>>> bigquery_magics.context.progress_bar_type = "tqdm_notebook"
"""

_credentials = None

@property
def credentials(self):
"""google.auth.credentials.Credentials: Credentials to use for queries
Expand Down Expand Up @@ -138,6 +138,8 @@ def credentials(self):
def credentials(self, value):
self._credentials = value

_project = None

@property
def project(self):
"""str: Default project to use for queries performed through IPython
Expand All @@ -163,5 +165,28 @@ def project(self):
def project(self, value):
self._project = value

_engine = "pandas"

@property
def engine(self) -> str:
"""Engine to run the query. Could either be "pandas" or "bigframes".
If using "pandas", the query result will be stored in a Pandas dataframe.
If using "bigframes", the query result will be stored in a bigframes dataframe instead.
Example:
Manully setting the content engine:
>>> from google.cloud.bigquery import magics
>>> bigquery_magics.context.engine = 'bigframes'
"""
return self._engine

@engine.setter
def engine(self, value):
if value != "pandas" and value != "bigframes":
raise ValueError("engine must be either 'pandas' or 'bigframes'")
self._engine = value


context = Context()
10 changes: 8 additions & 2 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

DEFAULT_PYTHON_VERSION = "3.8"

UNIT_TEST_PYTHON_VERSIONS: List[str] = ["3.7", "3.8", "3.11", "3.12"]
UNIT_TEST_PYTHON_VERSIONS: List[str] = ["3.7", "3.8", "3.9", "3.11", "3.12"]
UNIT_TEST_STANDARD_DEPENDENCIES = [
"mock",
"asyncmock",
Expand All @@ -57,17 +57,20 @@
],
"3.9": [
"bqstorage",
"bigframes",
],
"3.10": [
"bqstorage",
"bigframes",
],
"3.11": [],
"3.12": [
"bqstorage",
"bigframes",
],
}

SYSTEM_TEST_PYTHON_VERSIONS: List[str] = ["3.8", "3.11", "3.12"]
SYSTEM_TEST_PYTHON_VERSIONS: List[str] = ["3.8", "3.9", "3.11", "3.12"]
SYSTEM_TEST_STANDARD_DEPENDENCIES: List[str] = [
"mock",
"pytest",
Expand All @@ -86,13 +89,16 @@
],
"3.9": [
"bqstorage",
"bigframes",
],
"3.10": [
"bqstorage",
"bigframes",
],
"3.11": [],
"3.12": [
"bqstorage",
"bigframes",
],
}

Expand Down
17 changes: 9 additions & 8 deletions owlbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,20 @@
# Add templated files
# ----------------------------------------------------------------------------

extras = ["bqstorage"]
extras_storage = ["bqstorage"]
extras_bf = ["bqstorage", "bigframes"]
extras_by_python = {
"3.7": extras,
"3.8": extras,
"3.9": extras,
"3.10": extras,
"3.7": extras_storage,
"3.8": extras_storage,
"3.9": extras_bf,
"3.10": extras_bf,
# Use a middle version of Python to test when no extras are installed.
"3.11": [],
"3.12": extras,
"3.12": extras_bf,
}
templated_files = common.py_library(
unit_test_python_versions=["3.7", "3.8", "3.11", "3.12"],
system_test_python_versions=["3.8", "3.11", "3.12"],
unit_test_python_versions=["3.7", "3.8", "3.9", "3.11", "3.12"],
system_test_python_versions=["3.8", "3.9", "3.11", "3.12"],
cov_level=100,
unit_test_extras_by_python=extras_by_python,
unit_test_external_dependencies=["google-cloud-testutils"],
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
"grpcio >= 1.47.0, < 2.0dev",
"grpcio >= 1.49.1, < 2.0dev; python_version>='3.11'",
],
"bigframes": ["bigframes >= 1.17.0"],
}

all_extras = []
Expand Down
1 change: 1 addition & 0 deletions testing/constraints-3.9.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@
# We try to test across major versions of our dependencies.
# This is the last pandas 2.0.x release.
pandas==2.0.3
bigframes==1.17.0

Loading

0 comments on commit 90ba05f

Please sign in to comment.