diff --git a/.kokoro/continuous/bigquery_storage.cfg b/.kokoro/continuous/bigquery_storage.cfg new file mode 100644 index 000000000000..7f72bfed0587 --- /dev/null +++ b/.kokoro/continuous/bigquery_storage.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Tell the trampoline which build file to use. +env_vars: { + key: "PACKAGE" + value: "bigquery_storage" +} diff --git a/.kokoro/presubmit/bigquery_storage.cfg b/.kokoro/presubmit/bigquery_storage.cfg new file mode 100644 index 000000000000..7f72bfed0587 --- /dev/null +++ b/.kokoro/presubmit/bigquery_storage.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Tell the trampoline which build file to use. +env_vars: { + key: "PACKAGE" + value: "bigquery_storage" +} diff --git a/bigquery_storage/.coveragerc b/bigquery_storage/.coveragerc new file mode 100644 index 000000000000..8aba24fd6a78 --- /dev/null +++ b/bigquery_storage/.coveragerc @@ -0,0 +1,16 @@ +[run] +branch = True + +[report] +fail_under = 100 +show_missing = True +exclude_lines = + # Re-enable the standard pragma + pragma: NO COVER + # Ignore debug-only repr + def __repr__ + # Ignore abstract methods + raise NotImplementedError +omit = + */gapic/*.py + */proto/*.py \ No newline at end of file diff --git a/bigquery_storage/.gitignore b/bigquery_storage/.gitignore new file mode 100644 index 000000000000..9e3a5f25770c --- /dev/null +++ b/bigquery_storage/.gitignore @@ -0,0 +1 @@ +docs/_build \ No newline at end of file diff --git a/bigquery_storage/LICENSE b/bigquery_storage/LICENSE new file mode 100644 index 000000000000..a8ee855de2aa --- /dev/null +++ b/bigquery_storage/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + https://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/bigquery_storage/MANIFEST.in b/bigquery_storage/MANIFEST.in new file mode 100644 index 000000000000..9cbf175afe6b --- /dev/null +++ b/bigquery_storage/MANIFEST.in @@ -0,0 +1,5 @@ +include README.rst LICENSE +recursive-include google *.json *.proto +recursive-include tests * +global-exclude *.py[co] +global-exclude __pycache__ diff --git a/bigquery_storage/README.rst b/bigquery_storage/README.rst new file mode 100644 index 000000000000..22c9292c1023 --- /dev/null +++ b/bigquery_storage/README.rst @@ -0,0 +1,75 @@ +Python Client for BigQuery Storage API (`Alpha`_) +================================================= + +`BigQuery Storage API`_: + +- `Client Library Documentation`_ +- `Product Documentation`_ + +.. _Alpha: https://github.com/GoogleCloudPlatform/google-cloud-python/blob/master/README.rst +.. _BigQuery Storage API: https://cloud.google.com/bigquery +.. _Client Library Documentation: https://googlecloudplatform.github.io/google-cloud-python/stable/bigquery_storage/index.html +.. _Product Documentation: https://cloud.google.com/bigquery + +Quick Start +----------- + +In order to use this library, you first need to go through the following steps: + +1. `Select or create a Cloud Platform project.`_ +2. `Enable billing for your project.`_ +3. `Enable the BigQuery Storage API.`_ +4. `Setup Authentication.`_ + +.. _Select or create a Cloud Platform project.: https://console.cloud.google.com/project +.. _Enable billing for your project.: https://cloud.google.com/billing/docs/how-to/modify-project#enable_billing_for_a_project +.. _Enable the BigQuery Storage API.: https://cloud.google.com/bigquerystorage +.. _Setup Authentication.: https://googlecloudplatform.github.io/google-cloud-python/stable/core/auth.html + +Installation +~~~~~~~~~~~~ + +Install this library in a `virtualenv`_ using pip. `virtualenv`_ is a tool to +create isolated Python environments. The basic problem it addresses is one of +dependencies and versions, and indirectly permissions. + +With `virtualenv`_, it's possible to install this library without needing system +install permissions, and without clashing with the installed system +dependencies. + +.. _`virtualenv`: https://virtualenv.pypa.io/en/latest/ + + +Mac/Linux +^^^^^^^^^ + +.. code-block:: console + + pip install virtualenv + virtualenv + source /bin/activate + /bin/pip install google-cloud-bigquery-storage + + +Windows +^^^^^^^ + +.. code-block:: console + + pip install virtualenv + virtualenv + \Scripts\activate + \Scripts\pip.exe install google-cloud-bigquery-storage + +Next Steps +~~~~~~~~~~ + +- Read the `Client Library Documentation`_ for BigQuery Storage API + API to see other available methods on the client. +- Read the `BigQuery Storage API Product documentation`_ to learn + more about the product and see How-to Guides. +- View this `repository’s main README`_ to see the full list of Cloud + APIs that we cover. + +.. _BigQuery Storage API Product documentation: https://cloud.google.com/bigquery +.. _repository’s main README: https://github.com/GoogleCloudPlatform/google-cloud-python/blob/master/README.rst \ No newline at end of file diff --git a/bigquery_storage/docs/conf.py b/bigquery_storage/docs/conf.py new file mode 100644 index 000000000000..50efafe7cfbd --- /dev/null +++ b/bigquery_storage/docs/conf.py @@ -0,0 +1,313 @@ +# -*- coding: utf-8 -*- +# +# google-cloud-bigquerystorage documentation build configuration file +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys +import os +import shlex + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +sys.path.insert(0, os.path.abspath('..')) + +__version__ = '0.1.0' + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.autosummary', + 'sphinx.ext.intersphinx', + 'sphinx.ext.coverage', + 'sphinx.ext.napoleon', + 'sphinx.ext.viewcode', +] + +# autodoc/autosummary flags +autoclass_content = 'both' +autodoc_default_flags = ['members'] +autosummary_generate = True + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'google-cloud-bigquerystorage' +copyright = u'2017, Google' +author = u'Google APIs' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The full version, including alpha/beta/rc tags. +release = __version__ +# The short X.Y version. +version = '.'.join(release.split('.')[0:2]) + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build'] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +#keep_warnings = False + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = True + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'sphinx_rtd_theme' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +# html_static_path = [] + +# Add any extra paths that contain custom files (such as robots.txt or +# .htaccess) here, relative to this directory. These files are copied +# directly to the root of the documentation. +#html_extra_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Language to be used for generating the HTML full-text search index. +# Sphinx supports the following languages: +# 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' +# 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr' +#html_search_language = 'en' + +# A dictionary with options for the search language support, empty by default. +# Now only 'ja' uses this config value +#html_search_options = {'type': 'default'} + +# The name of a javascript file (relative to the configuration directory) that +# implements a search results scorer. If empty, the default will be used. +#html_search_scorer = 'scorer.js' + +# Output file base name for HTML help builder. +htmlhelp_basename = 'google-cloud-bigquerystorage-doc' + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + #'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + #'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + #'preamble': '', + + # Latex figure (float) alignment + #'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'google-cloud-bigquerystorage.tex', + u'google-cloud-bigquerystorage Documentation', author, 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [(master_doc, 'google-cloud-bigquerystorage', + u'google-cloud-bigquerystorage Documentation', [author], 1)] + +# If true, show URL addresses after external links. +#man_show_urls = False + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'google-cloud-bigquerystorage', + u'google-cloud-bigquerystorage Documentation', author, + 'google-cloud-bigquerystorage', + 'GAPIC library for the {metadata.shortName} v1beta1 service', 'APIs'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' + +# If true, do not generate a @detailmenu in the "Top" node's menu. +#texinfo_no_detailmenu = False + +# Example configuration for intersphinx: refer to the Python standard library. +intersphinx_mapping = { + 'python': ('http://python.readthedocs.org/en/latest/', None), + 'gax': ('https://gax-python.readthedocs.org/en/latest/', None), + 'fastavro': ('https://fastavro.readthedocs.io/en/stable/', None), + 'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None), +} + +# Napoleon settings +napoleon_google_docstring = True +napoleon_numpy_docstring = True +napoleon_include_private_with_doc = False +napoleon_include_special_with_doc = True +napoleon_use_admonition_for_examples = False +napoleon_use_admonition_for_notes = False +napoleon_use_admonition_for_references = False +napoleon_use_ivar = False +napoleon_use_param = True +napoleon_use_rtype = True diff --git a/bigquery_storage/docs/gapic/v1beta1/api.rst b/bigquery_storage/docs/gapic/v1beta1/api.rst new file mode 100644 index 000000000000..d4df98557e15 --- /dev/null +++ b/bigquery_storage/docs/gapic/v1beta1/api.rst @@ -0,0 +1,6 @@ +Client for BigQuery Storage API +=============================== + +.. automodule:: google.cloud.bigquery_storage_v1beta1 + :members: + :inherited-members: \ No newline at end of file diff --git a/bigquery_storage/docs/gapic/v1beta1/reader.rst b/bigquery_storage/docs/gapic/v1beta1/reader.rst new file mode 100644 index 000000000000..5b6af828f53e --- /dev/null +++ b/bigquery_storage/docs/gapic/v1beta1/reader.rst @@ -0,0 +1,6 @@ +Reader for BigQuery Storage API +=============================== + +.. automodule:: google.cloud.bigquery_storage_v1beta1.reader + :members: + :inherited-members: diff --git a/bigquery_storage/docs/gapic/v1beta1/types.rst b/bigquery_storage/docs/gapic/v1beta1/types.rst new file mode 100644 index 000000000000..a36210a64e52 --- /dev/null +++ b/bigquery_storage/docs/gapic/v1beta1/types.rst @@ -0,0 +1,5 @@ +Types for BigQuery Storage API Client +===================================== + +.. automodule:: google.cloud.bigquery_storage_v1beta1.types + :members: \ No newline at end of file diff --git a/bigquery_storage/docs/index.rst b/bigquery_storage/docs/index.rst new file mode 100644 index 000000000000..1232e5865ae5 --- /dev/null +++ b/bigquery_storage/docs/index.rst @@ -0,0 +1,10 @@ +.. include:: ../../bigquery_storage/README.rst + +API Reference +------------- +.. toctree:: + :maxdepth: 2 + + gapic/v1beta1/api + gapic/v1beta1/reader + gapic/v1beta1/types diff --git a/bigquery_storage/google/__init__.py b/bigquery_storage/google/__init__.py new file mode 100644 index 000000000000..f65701dd143f --- /dev/null +++ b/bigquery_storage/google/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +try: + import pkg_resources + pkg_resources.declare_namespace(__name__) +except ImportError: + import pkgutil + __path__ = pkgutil.extend_path(__path__, __name__) diff --git a/bigquery_storage/google/cloud/__init__.py b/bigquery_storage/google/cloud/__init__.py new file mode 100644 index 000000000000..f65701dd143f --- /dev/null +++ b/bigquery_storage/google/cloud/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +try: + import pkg_resources + pkg_resources.declare_namespace(__name__) +except ImportError: + import pkgutil + __path__ = pkgutil.extend_path(__path__, __name__) diff --git a/bigquery_storage/google/cloud/bigquery_storage.py b/bigquery_storage/google/cloud/bigquery_storage.py new file mode 100644 index 000000000000..7270e2885319 --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import + +from google.cloud.bigquery_storage_v1beta1 import BigQueryStorageClient +from google.cloud.bigquery_storage_v1beta1 import enums +from google.cloud.bigquery_storage_v1beta1 import types + +__all__ = ( + 'enums', + 'types', + 'BigQueryStorageClient', +) diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/__init__.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/__init__.py new file mode 100644 index 000000000000..b71f61049f68 --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/__init__.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import + +import pkg_resources +__version__ = pkg_resources.get_distribution('google-cloud-bigquery-storage').version # noqa + +from google.cloud.bigquery_storage_v1beta1 import types +from google.cloud.bigquery_storage_v1beta1 import client +from google.cloud.bigquery_storage_v1beta1.gapic import enums + + +class BigQueryStorageClient(client.BigQueryStorageClient): + __doc__ = client.BigQueryStorageClient.__doc__ + enums = enums + + +__all__ = ( + # google.cloud.bigquery_storage_v1beta1 + '__version__', + 'types', + + # google.cloud.bigquery_storage_v1beta1.client + 'BigQueryStorageClient', + + # google.cloud.bigquery_storage_v1beta1.gapic + 'enums', +) diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/client.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/client.py new file mode 100644 index 000000000000..75a34c37346e --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/client.py @@ -0,0 +1,131 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Parent client for calling the Cloud BigQuery Storage API. + +This is the base from which all interactions with the API occur. +""" + +from __future__ import absolute_import + +import google.api_core.gapic_v1.method + +from google.cloud.bigquery_storage_v1beta1 import reader +from google.cloud.bigquery_storage_v1beta1.gapic import big_query_storage_client # noqa + + +_SCOPES = ( + 'https://www.googleapis.com/auth/bigquery', + 'https://www.googleapis.com/auth/cloud-platform', +) + + +class BigQueryStorageClient(big_query_storage_client.BigQueryStorageClient): + """Client for interacting with BigQuery Storage API. + + The BigQuery storage API can be used to read data stored in BigQuery. + """ + + def read_rows(self, + read_position, + retry=google.api_core.gapic_v1.method.DEFAULT, + timeout=google.api_core.gapic_v1.method.DEFAULT, + metadata=None): + """ + Reads rows from the table in the format prescribed by the read + session. Each response contains one or more table rows, up to a + maximum of 10 MiB per response; read requests which attempt to read + individual rows larger than this will fail. + + Each request also returns a set of stream statistics reflecting the + estimated total number of rows in the read stream. This number is + computed based on the total table size and the number of active + streams in the read session, and may change as other streams continue + to read data. + + Example: + >>> from google.cloud import bigquery_storage_v1beta1 + >>> + >>> client = bigquery_storage_v1beta1.BigQueryStorageClient() + >>> + >>> # TODO: Initialize ``table_reference``: + >>> table_reference = { + ... 'project_id': 'your-data-project-id', + ... 'dataset_id': 'your_dataset_id', + ... 'table_id': 'your_table_id', + ... } + >>> + >>> # TODO: Initialize `parent`: + >>> parent = 'projects/your-billing-project-id' + >>> + >>> session = client.create_read_session(table_reference, parent) + >>> read_position = bigquery_storage_v1beta1.types.StreamPosition( + ... stream=session.streams[0], # TODO: Read the other streams. + ... ) + >>> + >>> for element in client.read_rows(read_position): + ... # process element + ... pass + + Args: + read_position (Union[ \ + dict, \ + ~google.cloud.bigquery_storage_v1beta1.types.StreamPosition \ + ]): + Required. Identifier of the position in the stream to start + reading from. The offset requested must be less than the last + row read from ReadRows. Requesting a larger offset is + undefined. If a dict is provided, it must be of the same form + as the protobuf message + :class:`~google.cloud.bigquery_storage_v1beta1.types.StreamPosition` + retry (Optional[google.api_core.retry.Retry]): A retry object used + to retry requests. If ``None`` is specified, requests will not + be retried. + timeout (Optional[float]): The amount of time, in seconds, to wait + for the request to complete. Note that if ``retry`` is + specified, the timeout applies to each individual attempt. + metadata (Optional[Sequence[Tuple[str, str]]]): Additional metadata + that is provided to the method. + + Returns: + ~google.cloud.bigquery_storage_v1beta1.reader.ReadRowsStream: + An iterable of + :class:`~google.cloud.bigquery_storage_v1beta1.types.ReadRowsResponse`. + + Raises: + google.api_core.exceptions.GoogleAPICallError: If the request + failed for any reason. + google.api_core.exceptions.RetryError: If the request failed due + to a retryable error and retry attempts failed. + ValueError: If the parameters are invalid. + """ + gapic_client = super(BigQueryStorageClient, self) + stream = gapic_client.read_rows( + read_position, + retry=retry, + timeout=timeout, + metadata=metadata, + ) + return reader.ReadRowsStream( + stream, + gapic_client, + read_position, + { + 'retry': retry, + 'timeout': timeout, + 'metadata': metadata, + }, + ) diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/__init__.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client.py new file mode 100644 index 000000000000..5acd74320b5f --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client.py @@ -0,0 +1,610 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Accesses the google.cloud.bigquery.storage.v1beta1 BigQueryStorage API.""" + +import pkg_resources +import warnings + +from google.oauth2 import service_account +import google.api_core.gapic_v1.client_info +import google.api_core.gapic_v1.config +import google.api_core.gapic_v1.method +import google.api_core.path_template +import google.api_core.gapic_v1.routing_header +import google.api_core.grpc_helpers +import grpc + +from google.cloud.bigquery_storage_v1beta1.gapic import big_query_storage_client_config +from google.cloud.bigquery_storage_v1beta1.gapic import enums +from google.cloud.bigquery_storage_v1beta1.gapic.transports import big_query_storage_grpc_transport +from google.cloud.bigquery_storage_v1beta1.proto import read_options_pb2 +from google.cloud.bigquery_storage_v1beta1.proto import storage_pb2 +from google.cloud.bigquery_storage_v1beta1.proto import storage_pb2_grpc +from google.cloud.bigquery_storage_v1beta1.proto import table_reference_pb2 +from google.protobuf import empty_pb2 + +_GAPIC_LIBRARY_VERSION = pkg_resources.get_distribution( + 'google-cloud-bigquery-storage', ).version + + +class BigQueryStorageClient(object): + """ + BigQuery storage API. + + The BigQuery storage API can be used to read data stored in BigQuery. + """ + + SERVICE_ADDRESS = 'bigquerystorage.googleapis.com:443' + """The default address of the service.""" + + # The name of the interface for this client. This is the key used to + # find the method configuration in the client_config dictionary. + _INTERFACE_NAME = 'google.cloud.bigquery.storage.v1beta1.BigQueryStorage' + + @classmethod + def from_service_account_file(cls, filename, *args, **kwargs): + """Creates an instance of this client using the provided credentials + file. + + Args: + filename (str): The path to the service account private key json + file. + args: Additional arguments to pass to the constructor. + kwargs: Additional arguments to pass to the constructor. + + Returns: + BigQueryStorageClient: The constructed client. + """ + credentials = service_account.Credentials.from_service_account_file( + filename) + kwargs['credentials'] = credentials + return cls(*args, **kwargs) + + from_service_account_json = from_service_account_file + + def __init__(self, + transport=None, + channel=None, + credentials=None, + client_config=big_query_storage_client_config.config, + client_info=None): + """Constructor. + + Args: + transport (Union[~.BigQueryStorageGrpcTransport, + Callable[[~.Credentials, type], ~.BigQueryStorageGrpcTransport]): A transport + instance, responsible for actually making the API calls. + The default transport uses the gRPC protocol. + This argument may also be a callable which returns a + transport instance. Callables will be sent the credentials + as the first argument and the default transport class as + the second argument. + channel (grpc.Channel): DEPRECATED. A ``Channel`` instance + through which to make calls. This argument is mutually exclusive + with ``credentials``; providing both will raise an exception. + credentials (google.auth.credentials.Credentials): The + authorization credentials to attach to requests. These + credentials identify this application to the service. If none + are specified, the client will attempt to ascertain the + credentials from the environment. + This argument is mutually exclusive with providing a + transport instance to ``transport``; doing so will raise + an exception. + client_config (dict): DEPRECATED. A dictionary of call options for + each method. If not specified, the default configuration is used. + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you're developing + your own client library. + """ + # Raise deprecation warnings for things we want to go away. + if client_config: + warnings.warn('The `client_config` argument is deprecated.', + PendingDeprecationWarning) + if channel: + warnings.warn( + 'The `channel` argument is deprecated; use ' + '`transport` instead.', PendingDeprecationWarning) + + # Instantiate the transport. + # The transport is responsible for handling serialization and + # deserialization and actually sending data to the service. + if transport: # pragma: no cover + if callable(transport): + self.transport = transport( + credentials=credentials, + default_class=big_query_storage_grpc_transport. + BigQueryStorageGrpcTransport, + ) + else: + if credentials: + raise ValueError( + 'Received both a transport instance and ' + 'credentials; these are mutually exclusive.') + self.transport = transport + else: + self.transport = big_query_storage_grpc_transport.BigQueryStorageGrpcTransport( + address=self.SERVICE_ADDRESS, + channel=channel, + credentials=credentials, + ) + + if client_info is None: + client_info = google.api_core.gapic_v1.client_info.ClientInfo( + gapic_version=_GAPIC_LIBRARY_VERSION, ) + else: + client_info.gapic_version = _GAPIC_LIBRARY_VERSION + self._client_info = client_info + + # Parse out the default settings for retry and timeout for each RPC + # from the client configuration. + # (Ordinarily, these are the defaults specified in the `*_config.py` + # file next to this one.) + self._method_configs = google.api_core.gapic_v1.config.parse_method_configs( + client_config['interfaces'][self._INTERFACE_NAME], ) + + # Save a dictionary of cached API call functions. + # These are the actual callables which invoke the proper + # transport methods, wrapped with `wrap_method` to add retry, + # timeout, and the like. + self._inner_api_calls = {} + + # Service calls + def create_read_session(self, + table_reference, + parent, + table_modifiers=None, + requested_streams=None, + read_options=None, + format_=None, + retry=google.api_core.gapic_v1.method.DEFAULT, + timeout=google.api_core.gapic_v1.method.DEFAULT, + metadata=None): + """ + Creates a new read session. A read session divides the contents of a + BigQuery table into one or more streams, which can then be used to read + data from the table. The read session also specifies properties of the + data to be read, such as a list of columns or a push-down filter describing + the rows to be returned. + + A particular row can be read by at most one stream. When the caller has + reached the end of each stream in the session, then all the data in the + table has been read. + + Read sessions automatically expire 24 hours after they are created and do + not require manual clean-up by the caller. + + Example: + >>> from google.cloud import bigquery_storage_v1beta1 + >>> + >>> client = bigquery_storage_v1beta1.BigQueryStorageClient() + >>> + >>> # TODO: Initialize `table_reference`: + >>> table_reference = {} + >>> + >>> # TODO: Initialize `parent`: + >>> parent = '' + >>> + >>> response = client.create_read_session(table_reference, parent) + + Args: + table_reference (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.TableReference]): Required. Reference to the table to read. + + If a dict is provided, it must be of the same form as the protobuf + message :class:`~google.cloud.bigquery_storage_v1beta1.types.TableReference` + parent (str): Required. String of the form "projects/your-project-id" indicating the + project this ReadSession is associated with. This is the project that will + be billed for usage. + table_modifiers (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.TableModifiers]): Optional. Any modifiers to the Table (e.g. snapshot timestamp). + + If a dict is provided, it must be of the same form as the protobuf + message :class:`~google.cloud.bigquery_storage_v1beta1.types.TableModifiers` + requested_streams (int): Optional. Initial number of streams. If unset or 0, we will + provide a value of streams so as to produce reasonable throughput. Must be + non-negative. The number of streams may be lower than the requested number, + depending on the amount parallelism that is reasonable for the table and + the maximum amount of parallelism allowed by the system. + + Streams must be read starting from offset 0. + read_options (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.TableReadOptions]): Optional. Read options for this session (e.g. column selection, filters). + + If a dict is provided, it must be of the same form as the protobuf + message :class:`~google.cloud.bigquery_storage_v1beta1.types.TableReadOptions` + format_ (~google.cloud.bigquery_storage_v1beta1.types.DataFormat): Data output format. Currently default to Avro. + retry (Optional[google.api_core.retry.Retry]): A retry object used + to retry requests. If ``None`` is specified, requests will not + be retried. + timeout (Optional[float]): The amount of time, in seconds, to wait + for the request to complete. Note that if ``retry`` is + specified, the timeout applies to each individual attempt. + metadata (Optional[Sequence[Tuple[str, str]]]): Additional metadata + that is provided to the method. + + Returns: + A :class:`~google.cloud.bigquery_storage_v1beta1.types.ReadSession` instance. + + Raises: + google.api_core.exceptions.GoogleAPICallError: If the request + failed for any reason. + google.api_core.exceptions.RetryError: If the request failed due + to a retryable error and retry attempts failed. + ValueError: If the parameters are invalid. + """ + # Wrap the transport method to add retry and timeout logic. + if 'create_read_session' not in self._inner_api_calls: + self._inner_api_calls[ + 'create_read_session'] = google.api_core.gapic_v1.method.wrap_method( + self.transport.create_read_session, + default_retry=self._method_configs['CreateReadSession']. + retry, + default_timeout=self._method_configs['CreateReadSession']. + timeout, + client_info=self._client_info, + ) + + request = storage_pb2.CreateReadSessionRequest( + table_reference=table_reference, + parent=parent, + table_modifiers=table_modifiers, + requested_streams=requested_streams, + read_options=read_options, + format=format_, + ) + if metadata is None: + metadata = [] + metadata = list(metadata) + try: + routing_header = [('table_reference.project_id', + table_reference.project_id), + ('table_reference.dataset_id', + table_reference.dataset_id)] + except AttributeError: + pass + else: + routing_metadata = google.api_core.gapic_v1.routing_header.to_grpc_metadata( # pragma: no cover + routing_header) + metadata.append(routing_metadata) # pragma: no cover + + return self._inner_api_calls['create_read_session']( + request, retry=retry, timeout=timeout, metadata=metadata) + + def read_rows(self, + read_position, + retry=google.api_core.gapic_v1.method.DEFAULT, + timeout=google.api_core.gapic_v1.method.DEFAULT, + metadata=None): + """ + Reads rows from the table in the format prescribed by the read session. + Each response contains one or more table rows, up to a maximum of 10 MiB + per response; read requests which attempt to read individual rows larger + than this will fail. + + Each request also returns a set of stream statistics reflecting the + estimated total number of rows in the read stream. This number is computed + based on the total table size and the number of active streams in the read + session, and may change as other streams continue to read data. + + Example: + >>> from google.cloud import bigquery_storage_v1beta1 + >>> + >>> client = bigquery_storage_v1beta1.BigQueryStorageClient() + >>> + >>> # TODO: Initialize `read_position`: + >>> read_position = {} + >>> + >>> for element in client.read_rows(read_position): + ... # process element + ... pass + + Args: + read_position (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.StreamPosition]): Required. Identifier of the position in the stream to start reading from. + The offset requested must be less than the last row read from ReadRows. + Requesting a larger offset is undefined. + + If a dict is provided, it must be of the same form as the protobuf + message :class:`~google.cloud.bigquery_storage_v1beta1.types.StreamPosition` + retry (Optional[google.api_core.retry.Retry]): A retry object used + to retry requests. If ``None`` is specified, requests will not + be retried. + timeout (Optional[float]): The amount of time, in seconds, to wait + for the request to complete. Note that if ``retry`` is + specified, the timeout applies to each individual attempt. + metadata (Optional[Sequence[Tuple[str, str]]]): Additional metadata + that is provided to the method. + + Returns: + Iterable[~google.cloud.bigquery_storage_v1beta1.types.ReadRowsResponse]. + + Raises: + google.api_core.exceptions.GoogleAPICallError: If the request + failed for any reason. + google.api_core.exceptions.RetryError: If the request failed due + to a retryable error and retry attempts failed. + ValueError: If the parameters are invalid. + """ + # Wrap the transport method to add retry and timeout logic. + if 'read_rows' not in self._inner_api_calls: + self._inner_api_calls[ + 'read_rows'] = google.api_core.gapic_v1.method.wrap_method( + self.transport.read_rows, + default_retry=self._method_configs['ReadRows'].retry, + default_timeout=self._method_configs['ReadRows'].timeout, + client_info=self._client_info, + ) + + request = storage_pb2.ReadRowsRequest(read_position=read_position, ) + if metadata is None: + metadata = [] + metadata = list(metadata) + try: + routing_header = [('read_position.stream.name', + read_position.stream.name)] + except AttributeError: + pass + else: + routing_metadata = google.api_core.gapic_v1.routing_header.to_grpc_metadata( # pragma: no cover + routing_header) + metadata.append(routing_metadata) # pragma: no cover + + return self._inner_api_calls['read_rows']( + request, retry=retry, timeout=timeout, metadata=metadata) + + def batch_create_read_session_streams( + self, + session, + requested_streams, + retry=google.api_core.gapic_v1.method.DEFAULT, + timeout=google.api_core.gapic_v1.method.DEFAULT, + metadata=None): + """ + Creates additional streams for a ReadSession. This API can be used to + dynamically adjust the parallelism of a batch processing task upwards by + adding additional workers. + + Example: + >>> from google.cloud import bigquery_storage_v1beta1 + >>> + >>> client = bigquery_storage_v1beta1.BigQueryStorageClient() + >>> + >>> # TODO: Initialize `session`: + >>> session = {} + >>> + >>> # TODO: Initialize `requested_streams`: + >>> requested_streams = 0 + >>> + >>> response = client.batch_create_read_session_streams(session, requested_streams) + + Args: + session (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.ReadSession]): Required. Must be a non-expired session obtained from a call to + CreateReadSession. Only the name field needs to be set. + + If a dict is provided, it must be of the same form as the protobuf + message :class:`~google.cloud.bigquery_storage_v1beta1.types.ReadSession` + requested_streams (int): Required. Number of new streams requested. Must be positive. + Number of added streams may be less than this, see CreateReadSessionRequest + for more information. + retry (Optional[google.api_core.retry.Retry]): A retry object used + to retry requests. If ``None`` is specified, requests will not + be retried. + timeout (Optional[float]): The amount of time, in seconds, to wait + for the request to complete. Note that if ``retry`` is + specified, the timeout applies to each individual attempt. + metadata (Optional[Sequence[Tuple[str, str]]]): Additional metadata + that is provided to the method. + + Returns: + A :class:`~google.cloud.bigquery_storage_v1beta1.types.BatchCreateReadSessionStreamsResponse` instance. + + Raises: + google.api_core.exceptions.GoogleAPICallError: If the request + failed for any reason. + google.api_core.exceptions.RetryError: If the request failed due + to a retryable error and retry attempts failed. + ValueError: If the parameters are invalid. + """ + # Wrap the transport method to add retry and timeout logic. + if 'batch_create_read_session_streams' not in self._inner_api_calls: + self._inner_api_calls[ + 'batch_create_read_session_streams'] = google.api_core.gapic_v1.method.wrap_method( + self.transport.batch_create_read_session_streams, + default_retry=self. + _method_configs['BatchCreateReadSessionStreams'].retry, + default_timeout=self. + _method_configs['BatchCreateReadSessionStreams'].timeout, + client_info=self._client_info, + ) + + request = storage_pb2.BatchCreateReadSessionStreamsRequest( + session=session, + requested_streams=requested_streams, + ) + if metadata is None: + metadata = [] + metadata = list(metadata) + try: + routing_header = [('session.name', session.name)] + except AttributeError: + pass + else: + routing_metadata = google.api_core.gapic_v1.routing_header.to_grpc_metadata( # pragma: no cover + routing_header) + metadata.append(routing_metadata) # pragma: no cover + + return self._inner_api_calls['batch_create_read_session_streams']( + request, retry=retry, timeout=timeout, metadata=metadata) + + def finalize_stream(self, + stream, + retry=google.api_core.gapic_v1.method.DEFAULT, + timeout=google.api_core.gapic_v1.method.DEFAULT, + metadata=None): + """ + Triggers the graceful termination of a single stream in a ReadSession. This + API can be used to dynamically adjust the parallelism of a batch processing + task downwards without losing data. + + This API does not delete the stream -- it remains visible in the + ReadSession, and any data processed by the stream is not released to other + streams. However, no additional data will be assigned to the stream once + this call completes. Callers must continue reading data on the stream until + the end of the stream is reached so that data which has already been + assigned to the stream will be processed. + + This method will return an error if there are no other live streams + in the Session, or if SplitReadStream() has been called on the given + Stream. + + Example: + >>> from google.cloud import bigquery_storage_v1beta1 + >>> + >>> client = bigquery_storage_v1beta1.BigQueryStorageClient() + >>> + >>> # TODO: Initialize `stream`: + >>> stream = {} + >>> + >>> client.finalize_stream(stream) + + Args: + stream (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.Stream]): Stream to finalize. + + If a dict is provided, it must be of the same form as the protobuf + message :class:`~google.cloud.bigquery_storage_v1beta1.types.Stream` + retry (Optional[google.api_core.retry.Retry]): A retry object used + to retry requests. If ``None`` is specified, requests will not + be retried. + timeout (Optional[float]): The amount of time, in seconds, to wait + for the request to complete. Note that if ``retry`` is + specified, the timeout applies to each individual attempt. + metadata (Optional[Sequence[Tuple[str, str]]]): Additional metadata + that is provided to the method. + + Raises: + google.api_core.exceptions.GoogleAPICallError: If the request + failed for any reason. + google.api_core.exceptions.RetryError: If the request failed due + to a retryable error and retry attempts failed. + ValueError: If the parameters are invalid. + """ + # Wrap the transport method to add retry and timeout logic. + if 'finalize_stream' not in self._inner_api_calls: + self._inner_api_calls[ + 'finalize_stream'] = google.api_core.gapic_v1.method.wrap_method( + self.transport.finalize_stream, + default_retry=self._method_configs['FinalizeStream'].retry, + default_timeout=self._method_configs['FinalizeStream']. + timeout, + client_info=self._client_info, + ) + + request = storage_pb2.FinalizeStreamRequest(stream=stream, ) + if metadata is None: + metadata = [] + metadata = list(metadata) + try: + routing_header = [('stream.name', stream.name)] + except AttributeError: + pass + else: + routing_metadata = google.api_core.gapic_v1.routing_header.to_grpc_metadata( # pragma: no cover + routing_header) + metadata.append(routing_metadata) # pragma: no cover + + self._inner_api_calls['finalize_stream']( + request, retry=retry, timeout=timeout, metadata=metadata) + + def split_read_stream(self, + original_stream, + retry=google.api_core.gapic_v1.method.DEFAULT, + timeout=google.api_core.gapic_v1.method.DEFAULT, + metadata=None): + """ + Splits a given read stream into two Streams. These streams are referred + to as the primary and the residual of the split. The original stream can + still be read from in the same manner as before. Both of the returned + streams can also be read from, and the total rows return by both child + streams will be the same as the rows read from the original stream. + + Moreover, the two child streams will be allocated back to back in the + original Stream. Concretely, it is guaranteed that for streams Original, + Primary, and Residual, that Original[0-j] = Primary[0-j] and + Original[j-n] = Residual[0-m] once the streams have been read to + completion. + + This method is guaranteed to be idempotent. + + Example: + >>> from google.cloud import bigquery_storage_v1beta1 + >>> + >>> client = bigquery_storage_v1beta1.BigQueryStorageClient() + >>> + >>> # TODO: Initialize `original_stream`: + >>> original_stream = {} + >>> + >>> response = client.split_read_stream(original_stream) + + Args: + original_stream (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.Stream]): Stream to split. + + If a dict is provided, it must be of the same form as the protobuf + message :class:`~google.cloud.bigquery_storage_v1beta1.types.Stream` + retry (Optional[google.api_core.retry.Retry]): A retry object used + to retry requests. If ``None`` is specified, requests will not + be retried. + timeout (Optional[float]): The amount of time, in seconds, to wait + for the request to complete. Note that if ``retry`` is + specified, the timeout applies to each individual attempt. + metadata (Optional[Sequence[Tuple[str, str]]]): Additional metadata + that is provided to the method. + + Returns: + A :class:`~google.cloud.bigquery_storage_v1beta1.types.SplitReadStreamResponse` instance. + + Raises: + google.api_core.exceptions.GoogleAPICallError: If the request + failed for any reason. + google.api_core.exceptions.RetryError: If the request failed due + to a retryable error and retry attempts failed. + ValueError: If the parameters are invalid. + """ + # Wrap the transport method to add retry and timeout logic. + if 'split_read_stream' not in self._inner_api_calls: + self._inner_api_calls[ + 'split_read_stream'] = google.api_core.gapic_v1.method.wrap_method( + self.transport.split_read_stream, + default_retry=self._method_configs['SplitReadStream']. + retry, + default_timeout=self._method_configs['SplitReadStream']. + timeout, + client_info=self._client_info, + ) + + request = storage_pb2.SplitReadStreamRequest( + original_stream=original_stream, ) + if metadata is None: + metadata = [] + metadata = list(metadata) + try: + routing_header = [('original_stream.name', original_stream.name)] + except AttributeError: + pass + else: + routing_metadata = google.api_core.gapic_v1.routing_header.to_grpc_metadata( # pragma: no cover + routing_header) + metadata.append(routing_metadata) # pragma: no cover + + return self._inner_api_calls['split_read_stream']( + request, retry=retry, timeout=timeout, metadata=metadata) diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client_config.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client_config.py new file mode 100644 index 000000000000..d6357097836f --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client_config.py @@ -0,0 +1,48 @@ +config = { + "interfaces": { + "google.cloud.bigquery.storage.v1beta1.BigQueryStorage": { + "retry_codes": { + "idempotent": ["DEADLINE_EXCEEDED", "UNAVAILABLE"], + "non_idempotent": [] + }, + "retry_params": { + "default": { + "initial_retry_delay_millis": 100, + "retry_delay_multiplier": 1.3, + "max_retry_delay_millis": 60000, + "initial_rpc_timeout_millis": 20000, + "rpc_timeout_multiplier": 1.0, + "max_rpc_timeout_millis": 20000, + "total_timeout_millis": 600000 + } + }, + "methods": { + "CreateReadSession": { + "timeout_millis": 60000, + "retry_codes_name": "idempotent", + "retry_params_name": "default" + }, + "ReadRows": { + "timeout_millis": 86400000, + "retry_codes_name": "idempotent", + "retry_params_name": "default" + }, + "BatchCreateReadSessionStreams": { + "timeout_millis": 60000, + "retry_codes_name": "idempotent", + "retry_params_name": "default" + }, + "FinalizeStream": { + "timeout_millis": 60000, + "retry_codes_name": "idempotent", + "retry_params_name": "default" + }, + "SplitReadStream": { + "timeout_millis": 60000, + "retry_codes_name": "idempotent", + "retry_params_name": "default" + } + } + } + } +} diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/enums.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/enums.py new file mode 100644 index 000000000000..fc6e52d2e6fa --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/enums.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Wrappers for protocol buffer enum types.""" + +import enum + + +class DataFormat(enum.IntEnum): + """ + Data format for input or output data. + + Attributes: + DATA_FORMAT_UNSPECIFIED (int): Data format is unspecified. + AVRO (int): Avro is a standard open source row based file format. + See https://avro.apache.org/ for more details. + """ + DATA_FORMAT_UNSPECIFIED = 0 + AVRO = 1 diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/transports/__init__.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/transports/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/transports/big_query_storage_grpc_transport.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/transports/big_query_storage_grpc_transport.py new file mode 100644 index 000000000000..e5d4483b157e --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/transports/big_query_storage_grpc_transport.py @@ -0,0 +1,208 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import google.api_core.grpc_helpers + +from google.cloud.bigquery_storage_v1beta1.proto import storage_pb2_grpc + + +class BigQueryStorageGrpcTransport(object): + """gRPC transport class providing stubs for + google.cloud.bigquery.storage.v1beta1 BigQueryStorage API. + + The transport provides access to the raw gRPC stubs, + which can be used to take advantage of advanced + features of gRPC. + """ + # The scopes needed to make gRPC calls to all of the methods defined + # in this service. + _OAUTH_SCOPES = ( + 'https://www.googleapis.com/auth/bigquery', + 'https://www.googleapis.com/auth/cloud-platform', + ) + + def __init__(self, + channel=None, + credentials=None, + address='bigquerystorage.googleapis.com:443'): + """Instantiate the transport class. + + Args: + channel (grpc.Channel): A ``Channel`` instance through + which to make calls. This argument is mutually exclusive + with ``credentials``; providing both will raise an exception. + credentials (google.auth.credentials.Credentials): The + authorization credentials to attach to requests. These + credentials identify this application to the service. If none + are specified, the client will attempt to ascertain the + credentials from the environment. + address (str): The address where the service is hosted. + """ + # If both `channel` and `credentials` are specified, raise an + # exception (channels come with credentials baked in already). + if channel is not None and credentials is not None: # pragma: no cover + raise ValueError( + 'The `channel` and `credentials` arguments are mutually ' + 'exclusive.', ) + + # Create the channel. + if channel is None: # pragma: no cover + channel = self.create_channel( + address=address, + credentials=credentials, + ) + + # gRPC uses objects called "stubs" that are bound to the + # channel and provide a basic method for each RPC. + self._stubs = { + 'big_query_storage_stub': + storage_pb2_grpc.BigQueryStorageStub(channel), + } + + @classmethod + def create_channel(cls, + address='bigquerystorage.googleapis.com:443', + credentials=None): + """Create and return a gRPC channel object. + + Args: + address (str): The host for the channel to use. + credentials (~.Credentials): The + authorization credentials to attach to requests. These + credentials identify this application to the service. If + none are specified, the client will attempt to ascertain + the credentials from the environment. + + Returns: + grpc.Channel: A gRPC channel object. + """ + return google.api_core.grpc_helpers.create_channel( # pragma: no cover + address, + credentials=credentials, + scopes=cls._OAUTH_SCOPES, + ) + + @property + def create_read_session(self): + """Return the gRPC stub for {$apiMethod.name}. + + Creates a new read session. A read session divides the contents of a + BigQuery table into one or more streams, which can then be used to read + data from the table. The read session also specifies properties of the + data to be read, such as a list of columns or a push-down filter describing + the rows to be returned. + + A particular row can be read by at most one stream. When the caller has + reached the end of each stream in the session, then all the data in the + table has been read. + + Read sessions automatically expire 24 hours after they are created and do + not require manual clean-up by the caller. + + Returns: + Callable: A callable which accepts the appropriate + deserialized request object and returns a + deserialized response object. + """ + return self._stubs['big_query_storage_stub'].CreateReadSession + + @property + def read_rows(self): + """Return the gRPC stub for {$apiMethod.name}. + + Reads rows from the table in the format prescribed by the read session. + Each response contains one or more table rows, up to a maximum of 10 MiB + per response; read requests which attempt to read individual rows larger + than this will fail. + + Each request also returns a set of stream statistics reflecting the + estimated total number of rows in the read stream. This number is computed + based on the total table size and the number of active streams in the read + session, and may change as other streams continue to read data. + + Returns: + Callable: A callable which accepts the appropriate + deserialized request object and returns a + deserialized response object. + """ + return self._stubs['big_query_storage_stub'].ReadRows + + @property + def batch_create_read_session_streams(self): + """Return the gRPC stub for {$apiMethod.name}. + + Creates additional streams for a ReadSession. This API can be used to + dynamically adjust the parallelism of a batch processing task upwards by + adding additional workers. + + Returns: + Callable: A callable which accepts the appropriate + deserialized request object and returns a + deserialized response object. + """ + return self._stubs[ + 'big_query_storage_stub'].BatchCreateReadSessionStreams + + @property + def finalize_stream(self): + """Return the gRPC stub for {$apiMethod.name}. + + Triggers the graceful termination of a single stream in a ReadSession. This + API can be used to dynamically adjust the parallelism of a batch processing + task downwards without losing data. + + This API does not delete the stream -- it remains visible in the + ReadSession, and any data processed by the stream is not released to other + streams. However, no additional data will be assigned to the stream once + this call completes. Callers must continue reading data on the stream until + the end of the stream is reached so that data which has already been + assigned to the stream will be processed. + + This method will return an error if there are no other live streams + in the Session, or if SplitReadStream() has been called on the given + Stream. + + Returns: + Callable: A callable which accepts the appropriate + deserialized request object and returns a + deserialized response object. + """ + return self._stubs['big_query_storage_stub'].FinalizeStream + + @property + def split_read_stream(self): + """Return the gRPC stub for {$apiMethod.name}. + + Splits a given read stream into two Streams. These streams are referred + to as the primary and the residual of the split. The original stream can + still be read from in the same manner as before. Both of the returned + streams can also be read from, and the total rows return by both child + streams will be the same as the rows read from the original stream. + + Moreover, the two child streams will be allocated back to back in the + original Stream. Concretely, it is guaranteed that for streams Original, + Primary, and Residual, that Original[0-j] = Primary[0-j] and + Original[j-n] = Residual[0-m] once the streams have been read to + completion. + + This method is guaranteed to be idempotent. + + Returns: + Callable: A callable which accepts the appropriate + deserialized request object and returns a + deserialized response object. + """ + return self._stubs['big_query_storage_stub'].SplitReadStream diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/__init__.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/avro_pb2.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/avro_pb2.py new file mode 100644 index 000000000000..10a029a9cf14 --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/avro_pb2.py @@ -0,0 +1,136 @@ +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: google/cloud/bigquery/storage_v1beta1/proto/avro.proto + +import sys +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database +from google.protobuf import descriptor_pb2 +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor.FileDescriptor( + name='google/cloud/bigquery/storage_v1beta1/proto/avro.proto', + package='google.cloud.bigquery.storage.v1beta1', + syntax='proto3', + serialized_pb=_b('\n6google/cloud/bigquery/storage_v1beta1/proto/avro.proto\x12%google.cloud.bigquery.storage.v1beta1\"\x1c\n\nAvroSchema\x12\x0e\n\x06schema\x18\x01 \x01(\t\"=\n\x08\x41vroRows\x12\x1e\n\x16serialized_binary_rows\x18\x01 \x01(\x0c\x12\x11\n\trow_count\x18\x02 \x01(\x03\x42\x84\x01\n)com.google.cloud.bigquery.storage.v1beta1B\tAvroProtoZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storageb\x06proto3') +) + + + + +_AVROSCHEMA = _descriptor.Descriptor( + name='AvroSchema', + full_name='google.cloud.bigquery.storage.v1beta1.AvroSchema', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='schema', full_name='google.cloud.bigquery.storage.v1beta1.AvroSchema.schema', index=0, + number=1, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=97, + serialized_end=125, +) + + +_AVROROWS = _descriptor.Descriptor( + name='AvroRows', + full_name='google.cloud.bigquery.storage.v1beta1.AvroRows', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='serialized_binary_rows', full_name='google.cloud.bigquery.storage.v1beta1.AvroRows.serialized_binary_rows', index=0, + number=1, type=12, cpp_type=9, label=1, + has_default_value=False, default_value=_b(""), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='row_count', full_name='google.cloud.bigquery.storage.v1beta1.AvroRows.row_count', index=1, + number=2, type=3, cpp_type=2, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=127, + serialized_end=188, +) + +DESCRIPTOR.message_types_by_name['AvroSchema'] = _AVROSCHEMA +DESCRIPTOR.message_types_by_name['AvroRows'] = _AVROROWS +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +AvroSchema = _reflection.GeneratedProtocolMessageType('AvroSchema', (_message.Message,), dict( + DESCRIPTOR = _AVROSCHEMA, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.avro_pb2' + , + __doc__ = """Avro schema. + + + Attributes: + schema: + Json serialized schema, as described at + https://avro.apache.org/docs/1.8.1/spec.html + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.AvroSchema) + )) +_sym_db.RegisterMessage(AvroSchema) + +AvroRows = _reflection.GeneratedProtocolMessageType('AvroRows', (_message.Message,), dict( + DESCRIPTOR = _AVROROWS, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.avro_pb2' + , + __doc__ = """Avro rows. + + + Attributes: + serialized_binary_rows: + Binary serialized rows in a block. + row_count: + The count of rows in the returning block. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.AvroRows) + )) +_sym_db.RegisterMessage(AvroRows) + + +DESCRIPTOR.has_options = True +DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\n)com.google.cloud.bigquery.storage.v1beta1B\tAvroProtoZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storage')) +# @@protoc_insertion_point(module_scope) diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/avro_pb2_grpc.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/avro_pb2_grpc.py new file mode 100644 index 000000000000..a89435267cb2 --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/avro_pb2_grpc.py @@ -0,0 +1,3 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +import grpc + diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/read_options_pb2.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/read_options_pb2.py new file mode 100644 index 000000000000..7a01b7593c51 --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/read_options_pb2.py @@ -0,0 +1,96 @@ +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: google/cloud/bigquery/storage_v1beta1/proto/read_options.proto + +import sys +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database +from google.protobuf import descriptor_pb2 +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor.FileDescriptor( + name='google/cloud/bigquery/storage_v1beta1/proto/read_options.proto', + package='google.cloud.bigquery.storage.v1beta1', + syntax='proto3', + serialized_pb=_b('\n>google/cloud/bigquery/storage_v1beta1/proto/read_options.proto\x12%google.cloud.bigquery.storage.v1beta1\"D\n\x10TableReadOptions\x12\x17\n\x0fselected_fields\x18\x01 \x03(\t\x12\x17\n\x0frow_restriction\x18\x02 \x01(\tBy\n)com.google.cloud.bigquery.storage.v1beta1ZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storageb\x06proto3') +) + + + + +_TABLEREADOPTIONS = _descriptor.Descriptor( + name='TableReadOptions', + full_name='google.cloud.bigquery.storage.v1beta1.TableReadOptions', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='selected_fields', full_name='google.cloud.bigquery.storage.v1beta1.TableReadOptions.selected_fields', index=0, + number=1, type=9, cpp_type=9, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='row_restriction', full_name='google.cloud.bigquery.storage.v1beta1.TableReadOptions.row_restriction', index=1, + number=2, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=105, + serialized_end=173, +) + +DESCRIPTOR.message_types_by_name['TableReadOptions'] = _TABLEREADOPTIONS +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +TableReadOptions = _reflection.GeneratedProtocolMessageType('TableReadOptions', (_message.Message,), dict( + DESCRIPTOR = _TABLEREADOPTIONS, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.read_options_pb2' + , + __doc__ = """Options dictating how we read a table. + + + Attributes: + selected_fields: + Optional. Names of the fields in the table that should be + read. If empty, all fields will be read. If the specified + field is a nested field, all the sub-fields in the field will + be selected. The output field order is unrelated to the order + of fields in selected\_fields. + row_restriction: + Optional. SQL text filtering statement, similar to a WHERE + clause in a query. Currently, we support combinations of + predicates that are a comparison between a column and a + constant value in SQL statement. Aggregates are not supported. + Example: "a > DATE '2014-9-27' AND (b > 5 and C LIKE 'date')" + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.TableReadOptions) + )) +_sym_db.RegisterMessage(TableReadOptions) + + +DESCRIPTOR.has_options = True +DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\n)com.google.cloud.bigquery.storage.v1beta1ZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storage')) +# @@protoc_insertion_point(module_scope) diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/read_options_pb2_grpc.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/read_options_pb2_grpc.py new file mode 100644 index 000000000000..a89435267cb2 --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/read_options_pb2_grpc.py @@ -0,0 +1,3 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +import grpc + diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/storage_pb2.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/storage_pb2.py new file mode 100644 index 000000000000..4a11ee41c446 --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/storage_pb2.py @@ -0,0 +1,950 @@ +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: google/cloud/bigquery/storage_v1beta1/proto/storage.proto + +import sys +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) +from google.protobuf.internal import enum_type_wrapper +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database +from google.protobuf import descriptor_pb2 +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +from google.cloud.bigquery_storage_v1beta1.proto import avro_pb2 as google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_avro__pb2 +from google.cloud.bigquery_storage_v1beta1.proto import read_options_pb2 as google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_read__options__pb2 +from google.cloud.bigquery_storage_v1beta1.proto import table_reference_pb2 as google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_table__reference__pb2 +from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 +from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 + + +DESCRIPTOR = _descriptor.FileDescriptor( + name='google/cloud/bigquery/storage_v1beta1/proto/storage.proto', + package='google.cloud.bigquery.storage.v1beta1', + syntax='proto3', + serialized_pb=_b('\n9google/cloud/bigquery/storage_v1beta1/proto/storage.proto\x12%google.cloud.bigquery.storage.v1beta1\x1a\x36google/cloud/bigquery/storage_v1beta1/proto/avro.proto\x1a>google/cloud/bigquery/storage_v1beta1/proto/read_options.proto\x1a\x41google/cloud/bigquery/storage_v1beta1/proto/table_reference.proto\x1a\x1bgoogle/protobuf/empty.proto\x1a\x1fgoogle/protobuf/timestamp.proto\")\n\x06Stream\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x11\n\trow_count\x18\x02 \x01(\x03\"_\n\x0eStreamPosition\x12=\n\x06stream\x18\x01 \x01(\x0b\x32-.google.cloud.bigquery.storage.v1beta1.Stream\x12\x0e\n\x06offset\x18\x02 \x01(\x03\"\x80\x03\n\x0bReadSession\x12\x0c\n\x04name\x18\x01 \x01(\t\x12/\n\x0b\x65xpire_time\x18\x02 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12H\n\x0b\x61vro_schema\x18\x05 \x01(\x0b\x32\x31.google.cloud.bigquery.storage.v1beta1.AvroSchemaH\x00\x12>\n\x07streams\x18\x04 \x03(\x0b\x32-.google.cloud.bigquery.storage.v1beta1.Stream\x12N\n\x0ftable_reference\x18\x07 \x01(\x0b\x32\x35.google.cloud.bigquery.storage.v1beta1.TableReference\x12N\n\x0ftable_modifiers\x18\x08 \x01(\x0b\x32\x35.google.cloud.bigquery.storage.v1beta1.TableModifiersB\x08\n\x06schema\"\xf7\x02\n\x18\x43reateReadSessionRequest\x12N\n\x0ftable_reference\x18\x01 \x01(\x0b\x32\x35.google.cloud.bigquery.storage.v1beta1.TableReference\x12\x0e\n\x06parent\x18\x06 \x01(\t\x12N\n\x0ftable_modifiers\x18\x02 \x01(\x0b\x32\x35.google.cloud.bigquery.storage.v1beta1.TableModifiers\x12\x19\n\x11requested_streams\x18\x03 \x01(\x05\x12M\n\x0cread_options\x18\x04 \x01(\x0b\x32\x37.google.cloud.bigquery.storage.v1beta1.TableReadOptions\x12\x41\n\x06\x66ormat\x18\x05 \x01(\x0e\x32\x31.google.cloud.bigquery.storage.v1beta1.DataFormat\"_\n\x0fReadRowsRequest\x12L\n\rread_position\x18\x01 \x01(\x0b\x32\x35.google.cloud.bigquery.storage.v1beta1.StreamPosition\"+\n\x0cStreamStatus\x12\x1b\n\x13\x65stimated_row_count\x18\x01 \x01(\x03\"*\n\x0eThrottleStatus\x12\x18\n\x10throttle_percent\x18\x01 \x01(\x05\"\xf5\x01\n\x10ReadRowsResponse\x12\x44\n\tavro_rows\x18\x03 \x01(\x0b\x32/.google.cloud.bigquery.storage.v1beta1.AvroRowsH\x00\x12\x43\n\x06status\x18\x02 \x01(\x0b\x32\x33.google.cloud.bigquery.storage.v1beta1.StreamStatus\x12N\n\x0fthrottle_status\x18\x05 \x01(\x0b\x32\x35.google.cloud.bigquery.storage.v1beta1.ThrottleStatusB\x06\n\x04rows\"\x86\x01\n$BatchCreateReadSessionStreamsRequest\x12\x43\n\x07session\x18\x01 \x01(\x0b\x32\x32.google.cloud.bigquery.storage.v1beta1.ReadSession\x12\x19\n\x11requested_streams\x18\x02 \x01(\x05\"g\n%BatchCreateReadSessionStreamsResponse\x12>\n\x07streams\x18\x01 \x03(\x0b\x32-.google.cloud.bigquery.storage.v1beta1.Stream\"V\n\x15\x46inalizeStreamRequest\x12=\n\x06stream\x18\x02 \x01(\x0b\x32-.google.cloud.bigquery.storage.v1beta1.Stream\"`\n\x16SplitReadStreamRequest\x12\x46\n\x0foriginal_stream\x18\x01 \x01(\x0b\x32-.google.cloud.bigquery.storage.v1beta1.Stream\"\xa9\x01\n\x17SplitReadStreamResponse\x12\x45\n\x0eprimary_stream\x18\x01 \x01(\x0b\x32-.google.cloud.bigquery.storage.v1beta1.Stream\x12G\n\x10remainder_stream\x18\x02 \x01(\x0b\x32-.google.cloud.bigquery.storage.v1beta1.Stream*3\n\nDataFormat\x12\x1b\n\x17\x44\x41TA_FORMAT_UNSPECIFIED\x10\x00\x12\x08\n\x04\x41VRO\x10\x01\x32\xdd\x05\n\x0f\x42igQueryStorage\x12\x8a\x01\n\x11\x43reateReadSession\x12?.google.cloud.bigquery.storage.v1beta1.CreateReadSessionRequest\x1a\x32.google.cloud.bigquery.storage.v1beta1.ReadSession\"\x00\x12\x7f\n\x08ReadRows\x12\x36.google.cloud.bigquery.storage.v1beta1.ReadRowsRequest\x1a\x37.google.cloud.bigquery.storage.v1beta1.ReadRowsResponse\"\x00\x30\x01\x12\xbc\x01\n\x1d\x42\x61tchCreateReadSessionStreams\x12K.google.cloud.bigquery.storage.v1beta1.BatchCreateReadSessionStreamsRequest\x1aL.google.cloud.bigquery.storage.v1beta1.BatchCreateReadSessionStreamsResponse\"\x00\x12h\n\x0e\x46inalizeStream\x12<.google.cloud.bigquery.storage.v1beta1.FinalizeStreamRequest\x1a\x16.google.protobuf.Empty\"\x00\x12\x92\x01\n\x0fSplitReadStream\x12=.google.cloud.bigquery.storage.v1beta1.SplitReadStreamRequest\x1a>.google.cloud.bigquery.storage.v1beta1.SplitReadStreamResponse\"\x00\x42y\n)com.google.cloud.bigquery.storage.v1beta1ZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storageb\x06proto3') + , + dependencies=[google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_avro__pb2.DESCRIPTOR,google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_read__options__pb2.DESCRIPTOR,google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_table__reference__pb2.DESCRIPTOR,google_dot_protobuf_dot_empty__pb2.DESCRIPTOR,google_dot_protobuf_dot_timestamp__pb2.DESCRIPTOR,]) + +_DATAFORMAT = _descriptor.EnumDescriptor( + name='DataFormat', + full_name='google.cloud.bigquery.storage.v1beta1.DataFormat', + filename=None, + file=DESCRIPTOR, + values=[ + _descriptor.EnumValueDescriptor( + name='DATA_FORMAT_UNSPECIFIED', index=0, number=0, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='AVRO', index=1, number=1, + options=None, + type=None), + ], + containing_type=None, + options=None, + serialized_start=2288, + serialized_end=2339, +) +_sym_db.RegisterEnumDescriptor(_DATAFORMAT) + +DataFormat = enum_type_wrapper.EnumTypeWrapper(_DATAFORMAT) +DATA_FORMAT_UNSPECIFIED = 0 +AVRO = 1 + + + +_STREAM = _descriptor.Descriptor( + name='Stream', + full_name='google.cloud.bigquery.storage.v1beta1.Stream', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='name', full_name='google.cloud.bigquery.storage.v1beta1.Stream.name', index=0, + number=1, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='row_count', full_name='google.cloud.bigquery.storage.v1beta1.Stream.row_count', index=1, + number=2, type=3, cpp_type=2, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=349, + serialized_end=390, +) + + +_STREAMPOSITION = _descriptor.Descriptor( + name='StreamPosition', + full_name='google.cloud.bigquery.storage.v1beta1.StreamPosition', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='stream', full_name='google.cloud.bigquery.storage.v1beta1.StreamPosition.stream', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='offset', full_name='google.cloud.bigquery.storage.v1beta1.StreamPosition.offset', index=1, + number=2, type=3, cpp_type=2, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=392, + serialized_end=487, +) + + +_READSESSION = _descriptor.Descriptor( + name='ReadSession', + full_name='google.cloud.bigquery.storage.v1beta1.ReadSession', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='name', full_name='google.cloud.bigquery.storage.v1beta1.ReadSession.name', index=0, + number=1, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='expire_time', full_name='google.cloud.bigquery.storage.v1beta1.ReadSession.expire_time', index=1, + number=2, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='avro_schema', full_name='google.cloud.bigquery.storage.v1beta1.ReadSession.avro_schema', index=2, + number=5, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='streams', full_name='google.cloud.bigquery.storage.v1beta1.ReadSession.streams', index=3, + number=4, type=11, cpp_type=10, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='table_reference', full_name='google.cloud.bigquery.storage.v1beta1.ReadSession.table_reference', index=4, + number=7, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='table_modifiers', full_name='google.cloud.bigquery.storage.v1beta1.ReadSession.table_modifiers', index=5, + number=8, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + _descriptor.OneofDescriptor( + name='schema', full_name='google.cloud.bigquery.storage.v1beta1.ReadSession.schema', + index=0, containing_type=None, fields=[]), + ], + serialized_start=490, + serialized_end=874, +) + + +_CREATEREADSESSIONREQUEST = _descriptor.Descriptor( + name='CreateReadSessionRequest', + full_name='google.cloud.bigquery.storage.v1beta1.CreateReadSessionRequest', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='table_reference', full_name='google.cloud.bigquery.storage.v1beta1.CreateReadSessionRequest.table_reference', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='parent', full_name='google.cloud.bigquery.storage.v1beta1.CreateReadSessionRequest.parent', index=1, + number=6, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='table_modifiers', full_name='google.cloud.bigquery.storage.v1beta1.CreateReadSessionRequest.table_modifiers', index=2, + number=2, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='requested_streams', full_name='google.cloud.bigquery.storage.v1beta1.CreateReadSessionRequest.requested_streams', index=3, + number=3, type=5, cpp_type=1, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='read_options', full_name='google.cloud.bigquery.storage.v1beta1.CreateReadSessionRequest.read_options', index=4, + number=4, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='format', full_name='google.cloud.bigquery.storage.v1beta1.CreateReadSessionRequest.format', index=5, + number=5, type=14, cpp_type=8, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=877, + serialized_end=1252, +) + + +_READROWSREQUEST = _descriptor.Descriptor( + name='ReadRowsRequest', + full_name='google.cloud.bigquery.storage.v1beta1.ReadRowsRequest', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='read_position', full_name='google.cloud.bigquery.storage.v1beta1.ReadRowsRequest.read_position', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1254, + serialized_end=1349, +) + + +_STREAMSTATUS = _descriptor.Descriptor( + name='StreamStatus', + full_name='google.cloud.bigquery.storage.v1beta1.StreamStatus', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='estimated_row_count', full_name='google.cloud.bigquery.storage.v1beta1.StreamStatus.estimated_row_count', index=0, + number=1, type=3, cpp_type=2, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1351, + serialized_end=1394, +) + + +_THROTTLESTATUS = _descriptor.Descriptor( + name='ThrottleStatus', + full_name='google.cloud.bigquery.storage.v1beta1.ThrottleStatus', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='throttle_percent', full_name='google.cloud.bigquery.storage.v1beta1.ThrottleStatus.throttle_percent', index=0, + number=1, type=5, cpp_type=1, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1396, + serialized_end=1438, +) + + +_READROWSRESPONSE = _descriptor.Descriptor( + name='ReadRowsResponse', + full_name='google.cloud.bigquery.storage.v1beta1.ReadRowsResponse', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='avro_rows', full_name='google.cloud.bigquery.storage.v1beta1.ReadRowsResponse.avro_rows', index=0, + number=3, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='status', full_name='google.cloud.bigquery.storage.v1beta1.ReadRowsResponse.status', index=1, + number=2, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='throttle_status', full_name='google.cloud.bigquery.storage.v1beta1.ReadRowsResponse.throttle_status', index=2, + number=5, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + _descriptor.OneofDescriptor( + name='rows', full_name='google.cloud.bigquery.storage.v1beta1.ReadRowsResponse.rows', + index=0, containing_type=None, fields=[]), + ], + serialized_start=1441, + serialized_end=1686, +) + + +_BATCHCREATEREADSESSIONSTREAMSREQUEST = _descriptor.Descriptor( + name='BatchCreateReadSessionStreamsRequest', + full_name='google.cloud.bigquery.storage.v1beta1.BatchCreateReadSessionStreamsRequest', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='session', full_name='google.cloud.bigquery.storage.v1beta1.BatchCreateReadSessionStreamsRequest.session', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='requested_streams', full_name='google.cloud.bigquery.storage.v1beta1.BatchCreateReadSessionStreamsRequest.requested_streams', index=1, + number=2, type=5, cpp_type=1, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1689, + serialized_end=1823, +) + + +_BATCHCREATEREADSESSIONSTREAMSRESPONSE = _descriptor.Descriptor( + name='BatchCreateReadSessionStreamsResponse', + full_name='google.cloud.bigquery.storage.v1beta1.BatchCreateReadSessionStreamsResponse', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='streams', full_name='google.cloud.bigquery.storage.v1beta1.BatchCreateReadSessionStreamsResponse.streams', index=0, + number=1, type=11, cpp_type=10, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1825, + serialized_end=1928, +) + + +_FINALIZESTREAMREQUEST = _descriptor.Descriptor( + name='FinalizeStreamRequest', + full_name='google.cloud.bigquery.storage.v1beta1.FinalizeStreamRequest', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='stream', full_name='google.cloud.bigquery.storage.v1beta1.FinalizeStreamRequest.stream', index=0, + number=2, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1930, + serialized_end=2016, +) + + +_SPLITREADSTREAMREQUEST = _descriptor.Descriptor( + name='SplitReadStreamRequest', + full_name='google.cloud.bigquery.storage.v1beta1.SplitReadStreamRequest', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='original_stream', full_name='google.cloud.bigquery.storage.v1beta1.SplitReadStreamRequest.original_stream', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=2018, + serialized_end=2114, +) + + +_SPLITREADSTREAMRESPONSE = _descriptor.Descriptor( + name='SplitReadStreamResponse', + full_name='google.cloud.bigquery.storage.v1beta1.SplitReadStreamResponse', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='primary_stream', full_name='google.cloud.bigquery.storage.v1beta1.SplitReadStreamResponse.primary_stream', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='remainder_stream', full_name='google.cloud.bigquery.storage.v1beta1.SplitReadStreamResponse.remainder_stream', index=1, + number=2, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=2117, + serialized_end=2286, +) + +_STREAMPOSITION.fields_by_name['stream'].message_type = _STREAM +_READSESSION.fields_by_name['expire_time'].message_type = google_dot_protobuf_dot_timestamp__pb2._TIMESTAMP +_READSESSION.fields_by_name['avro_schema'].message_type = google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_avro__pb2._AVROSCHEMA +_READSESSION.fields_by_name['streams'].message_type = _STREAM +_READSESSION.fields_by_name['table_reference'].message_type = google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_table__reference__pb2._TABLEREFERENCE +_READSESSION.fields_by_name['table_modifiers'].message_type = google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_table__reference__pb2._TABLEMODIFIERS +_READSESSION.oneofs_by_name['schema'].fields.append( + _READSESSION.fields_by_name['avro_schema']) +_READSESSION.fields_by_name['avro_schema'].containing_oneof = _READSESSION.oneofs_by_name['schema'] +_CREATEREADSESSIONREQUEST.fields_by_name['table_reference'].message_type = google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_table__reference__pb2._TABLEREFERENCE +_CREATEREADSESSIONREQUEST.fields_by_name['table_modifiers'].message_type = google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_table__reference__pb2._TABLEMODIFIERS +_CREATEREADSESSIONREQUEST.fields_by_name['read_options'].message_type = google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_read__options__pb2._TABLEREADOPTIONS +_CREATEREADSESSIONREQUEST.fields_by_name['format'].enum_type = _DATAFORMAT +_READROWSREQUEST.fields_by_name['read_position'].message_type = _STREAMPOSITION +_READROWSRESPONSE.fields_by_name['avro_rows'].message_type = google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_avro__pb2._AVROROWS +_READROWSRESPONSE.fields_by_name['status'].message_type = _STREAMSTATUS +_READROWSRESPONSE.fields_by_name['throttle_status'].message_type = _THROTTLESTATUS +_READROWSRESPONSE.oneofs_by_name['rows'].fields.append( + _READROWSRESPONSE.fields_by_name['avro_rows']) +_READROWSRESPONSE.fields_by_name['avro_rows'].containing_oneof = _READROWSRESPONSE.oneofs_by_name['rows'] +_BATCHCREATEREADSESSIONSTREAMSREQUEST.fields_by_name['session'].message_type = _READSESSION +_BATCHCREATEREADSESSIONSTREAMSRESPONSE.fields_by_name['streams'].message_type = _STREAM +_FINALIZESTREAMREQUEST.fields_by_name['stream'].message_type = _STREAM +_SPLITREADSTREAMREQUEST.fields_by_name['original_stream'].message_type = _STREAM +_SPLITREADSTREAMRESPONSE.fields_by_name['primary_stream'].message_type = _STREAM +_SPLITREADSTREAMRESPONSE.fields_by_name['remainder_stream'].message_type = _STREAM +DESCRIPTOR.message_types_by_name['Stream'] = _STREAM +DESCRIPTOR.message_types_by_name['StreamPosition'] = _STREAMPOSITION +DESCRIPTOR.message_types_by_name['ReadSession'] = _READSESSION +DESCRIPTOR.message_types_by_name['CreateReadSessionRequest'] = _CREATEREADSESSIONREQUEST +DESCRIPTOR.message_types_by_name['ReadRowsRequest'] = _READROWSREQUEST +DESCRIPTOR.message_types_by_name['StreamStatus'] = _STREAMSTATUS +DESCRIPTOR.message_types_by_name['ThrottleStatus'] = _THROTTLESTATUS +DESCRIPTOR.message_types_by_name['ReadRowsResponse'] = _READROWSRESPONSE +DESCRIPTOR.message_types_by_name['BatchCreateReadSessionStreamsRequest'] = _BATCHCREATEREADSESSIONSTREAMSREQUEST +DESCRIPTOR.message_types_by_name['BatchCreateReadSessionStreamsResponse'] = _BATCHCREATEREADSESSIONSTREAMSRESPONSE +DESCRIPTOR.message_types_by_name['FinalizeStreamRequest'] = _FINALIZESTREAMREQUEST +DESCRIPTOR.message_types_by_name['SplitReadStreamRequest'] = _SPLITREADSTREAMREQUEST +DESCRIPTOR.message_types_by_name['SplitReadStreamResponse'] = _SPLITREADSTREAMRESPONSE +DESCRIPTOR.enum_types_by_name['DataFormat'] = _DATAFORMAT +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +Stream = _reflection.GeneratedProtocolMessageType('Stream', (_message.Message,), dict( + DESCRIPTOR = _STREAM, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' + , + __doc__ = """Information about a single data stream within a read session. + + + Attributes: + name: + Name of the stream. In the form + ``/projects/{project_id}/stream/{stream_id}`` + row_count: + Rows in the stream. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.Stream) + )) +_sym_db.RegisterMessage(Stream) + +StreamPosition = _reflection.GeneratedProtocolMessageType('StreamPosition', (_message.Message,), dict( + DESCRIPTOR = _STREAMPOSITION, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' + , + __doc__ = """Expresses a point within a given stream using an offset position. + + + Attributes: + stream: + Identifier for a given Stream. + offset: + Position in the stream. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.StreamPosition) + )) +_sym_db.RegisterMessage(StreamPosition) + +ReadSession = _reflection.GeneratedProtocolMessageType('ReadSession', (_message.Message,), dict( + DESCRIPTOR = _READSESSION, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' + , + __doc__ = """Information returned from a ``CreateReadSession`` request. + + + Attributes: + name: + Unique identifier for the session. In the form + ``projects/{project_id}/sessions/{session_id}`` + expire_time: + Time at which the session becomes invalid. After this time, + subsequent requests to read this Session will return errors. + schema: + The schema for the read. If read\_options.selected\_fields is + set, the schema may be different from the table schema as it + will only contain the selected fields. + avro_schema: + Avro schema. + streams: + Streams associated with this session. + table_reference: + Table that this ReadSession is reading from. + table_modifiers: + Any modifiers which are applied when reading from the + specified table. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.ReadSession) + )) +_sym_db.RegisterMessage(ReadSession) + +CreateReadSessionRequest = _reflection.GeneratedProtocolMessageType('CreateReadSessionRequest', (_message.Message,), dict( + DESCRIPTOR = _CREATEREADSESSIONREQUEST, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' + , + __doc__ = """Creates a new read session, which may include additional options such as + requested parallelism, projection filters and constraints. + + + Attributes: + table_reference: + Required. Reference to the table to read. + parent: + Required. String of the form "projects/your-project-id" + indicating the project this ReadSession is associated with. + This is the project that will be billed for usage. + table_modifiers: + Optional. Any modifiers to the Table (e.g. snapshot + timestamp). + requested_streams: + Optional. Initial number of streams. If unset or 0, we will + provide a value of streams so as to produce reasonable + throughput. Must be non-negative. The number of streams may be + lower than the requested number, depending on the amount + parallelism that is reasonable for the table and the maximum + amount of parallelism allowed by the system. Streams must be + read starting from offset 0. + read_options: + Optional. Read options for this session (e.g. column + selection, filters). + format: + Data output format. Currently default to Avro. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.CreateReadSessionRequest) + )) +_sym_db.RegisterMessage(CreateReadSessionRequest) + +ReadRowsRequest = _reflection.GeneratedProtocolMessageType('ReadRowsRequest', (_message.Message,), dict( + DESCRIPTOR = _READROWSREQUEST, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' + , + __doc__ = """Requesting row data via ``ReadRows`` must provide Stream position + information. + + + Attributes: + read_position: + Required. Identifier of the position in the stream to start + reading from. The offset requested must be less than the last + row read from ReadRows. Requesting a larger offset is + undefined. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.ReadRowsRequest) + )) +_sym_db.RegisterMessage(ReadRowsRequest) + +StreamStatus = _reflection.GeneratedProtocolMessageType('StreamStatus', (_message.Message,), dict( + DESCRIPTOR = _STREAMSTATUS, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' + , + __doc__ = """Progress information for a given Stream. + + + Attributes: + estimated_row_count: + Number of estimated rows in the current stream. May change + over time as different readers in the stream progress at rates + which are relatively fast or slow. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.StreamStatus) + )) +_sym_db.RegisterMessage(StreamStatus) + +ThrottleStatus = _reflection.GeneratedProtocolMessageType('ThrottleStatus', (_message.Message,), dict( + DESCRIPTOR = _THROTTLESTATUS, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' + , + __doc__ = """Information on if the current connection is being throttled. + + + Attributes: + throttle_percent: + How much this connection is being throttled. 0 is no + throttling, 100 is completely throttled. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.ThrottleStatus) + )) +_sym_db.RegisterMessage(ThrottleStatus) + +ReadRowsResponse = _reflection.GeneratedProtocolMessageType('ReadRowsResponse', (_message.Message,), dict( + DESCRIPTOR = _READROWSRESPONSE, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' + , + __doc__ = """Response from calling ``ReadRows`` may include row data, progress and + throttling information. + + + Attributes: + rows: + Row data is returned in format specified during session + creation. + avro_rows: + Serialized row data in AVRO format. + status: + Estimated stream statistics. + throttle_status: + Throttling status. If unset, the latest response still + describes the current throttling status. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.ReadRowsResponse) + )) +_sym_db.RegisterMessage(ReadRowsResponse) + +BatchCreateReadSessionStreamsRequest = _reflection.GeneratedProtocolMessageType('BatchCreateReadSessionStreamsRequest', (_message.Message,), dict( + DESCRIPTOR = _BATCHCREATEREADSESSIONSTREAMSREQUEST, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' + , + __doc__ = """Information needed to request additional streams for an established read + session. + + + Attributes: + session: + Required. Must be a non-expired session obtained from a call + to CreateReadSession. Only the name field needs to be set. + requested_streams: + Required. Number of new streams requested. Must be positive. + Number of added streams may be less than this, see + CreateReadSessionRequest for more information. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.BatchCreateReadSessionStreamsRequest) + )) +_sym_db.RegisterMessage(BatchCreateReadSessionStreamsRequest) + +BatchCreateReadSessionStreamsResponse = _reflection.GeneratedProtocolMessageType('BatchCreateReadSessionStreamsResponse', (_message.Message,), dict( + DESCRIPTOR = _BATCHCREATEREADSESSIONSTREAMSRESPONSE, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' + , + __doc__ = """The response from ``BatchCreateReadSessionStreams`` returns the stream + identifiers for the newly created streams. + + + Attributes: + streams: + Newly added streams. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.BatchCreateReadSessionStreamsResponse) + )) +_sym_db.RegisterMessage(BatchCreateReadSessionStreamsResponse) + +FinalizeStreamRequest = _reflection.GeneratedProtocolMessageType('FinalizeStreamRequest', (_message.Message,), dict( + DESCRIPTOR = _FINALIZESTREAMREQUEST, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' + , + __doc__ = """Request information for invoking ``FinalizeStream``. + + + Attributes: + stream: + Stream to finalize. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.FinalizeStreamRequest) + )) +_sym_db.RegisterMessage(FinalizeStreamRequest) + +SplitReadStreamRequest = _reflection.GeneratedProtocolMessageType('SplitReadStreamRequest', (_message.Message,), dict( + DESCRIPTOR = _SPLITREADSTREAMREQUEST, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' + , + __doc__ = """Request information for ``SplitReadStream``. + + + Attributes: + original_stream: + Stream to split. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.SplitReadStreamRequest) + )) +_sym_db.RegisterMessage(SplitReadStreamRequest) + +SplitReadStreamResponse = _reflection.GeneratedProtocolMessageType('SplitReadStreamResponse', (_message.Message,), dict( + DESCRIPTOR = _SPLITREADSTREAMRESPONSE, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' + , + __doc__ = """Response from ``SplitReadStream``. + + + Attributes: + primary_stream: + Primary stream. Will contain the beginning portion of + \|original\_stream\|. + remainder_stream: + Remainder stream. Will contain the tail of + \|original\_stream\|. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.SplitReadStreamResponse) + )) +_sym_db.RegisterMessage(SplitReadStreamResponse) + + +DESCRIPTOR.has_options = True +DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\n)com.google.cloud.bigquery.storage.v1beta1ZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storage')) + +_BIGQUERYSTORAGE = _descriptor.ServiceDescriptor( + name='BigQueryStorage', + full_name='google.cloud.bigquery.storage.v1beta1.BigQueryStorage', + file=DESCRIPTOR, + index=0, + options=None, + serialized_start=2342, + serialized_end=3075, + methods=[ + _descriptor.MethodDescriptor( + name='CreateReadSession', + full_name='google.cloud.bigquery.storage.v1beta1.BigQueryStorage.CreateReadSession', + index=0, + containing_service=None, + input_type=_CREATEREADSESSIONREQUEST, + output_type=_READSESSION, + options=None, + ), + _descriptor.MethodDescriptor( + name='ReadRows', + full_name='google.cloud.bigquery.storage.v1beta1.BigQueryStorage.ReadRows', + index=1, + containing_service=None, + input_type=_READROWSREQUEST, + output_type=_READROWSRESPONSE, + options=None, + ), + _descriptor.MethodDescriptor( + name='BatchCreateReadSessionStreams', + full_name='google.cloud.bigquery.storage.v1beta1.BigQueryStorage.BatchCreateReadSessionStreams', + index=2, + containing_service=None, + input_type=_BATCHCREATEREADSESSIONSTREAMSREQUEST, + output_type=_BATCHCREATEREADSESSIONSTREAMSRESPONSE, + options=None, + ), + _descriptor.MethodDescriptor( + name='FinalizeStream', + full_name='google.cloud.bigquery.storage.v1beta1.BigQueryStorage.FinalizeStream', + index=3, + containing_service=None, + input_type=_FINALIZESTREAMREQUEST, + output_type=google_dot_protobuf_dot_empty__pb2._EMPTY, + options=None, + ), + _descriptor.MethodDescriptor( + name='SplitReadStream', + full_name='google.cloud.bigquery.storage.v1beta1.BigQueryStorage.SplitReadStream', + index=4, + containing_service=None, + input_type=_SPLITREADSTREAMREQUEST, + output_type=_SPLITREADSTREAMRESPONSE, + options=None, + ), +]) +_sym_db.RegisterServiceDescriptor(_BIGQUERYSTORAGE) + +DESCRIPTOR.services_by_name['BigQueryStorage'] = _BIGQUERYSTORAGE + +# @@protoc_insertion_point(module_scope) diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/storage_pb2_grpc.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/storage_pb2_grpc.py new file mode 100644 index 000000000000..c619db1a5dc3 --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/storage_pb2_grpc.py @@ -0,0 +1,165 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +import grpc + +from google.cloud.bigquery_storage_v1beta1.proto import storage_pb2 as google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2 +from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 + + +class BigQueryStorageStub(object): + """BigQuery storage API. + + The BigQuery storage API can be used to read data stored in BigQuery. + """ + + def __init__(self, channel): + """Constructor. + + Args: + channel: A grpc.Channel. + """ + self.CreateReadSession = channel.unary_unary( + '/google.cloud.bigquery.storage.v1beta1.BigQueryStorage/CreateReadSession', + request_serializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.CreateReadSessionRequest.SerializeToString, + response_deserializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.ReadSession.FromString, + ) + self.ReadRows = channel.unary_stream( + '/google.cloud.bigquery.storage.v1beta1.BigQueryStorage/ReadRows', + request_serializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.ReadRowsRequest.SerializeToString, + response_deserializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.ReadRowsResponse.FromString, + ) + self.BatchCreateReadSessionStreams = channel.unary_unary( + '/google.cloud.bigquery.storage.v1beta1.BigQueryStorage/BatchCreateReadSessionStreams', + request_serializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.BatchCreateReadSessionStreamsRequest.SerializeToString, + response_deserializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.BatchCreateReadSessionStreamsResponse.FromString, + ) + self.FinalizeStream = channel.unary_unary( + '/google.cloud.bigquery.storage.v1beta1.BigQueryStorage/FinalizeStream', + request_serializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.FinalizeStreamRequest.SerializeToString, + response_deserializer=google_dot_protobuf_dot_empty__pb2.Empty.FromString, + ) + self.SplitReadStream = channel.unary_unary( + '/google.cloud.bigquery.storage.v1beta1.BigQueryStorage/SplitReadStream', + request_serializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.SplitReadStreamRequest.SerializeToString, + response_deserializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.SplitReadStreamResponse.FromString, + ) + + +class BigQueryStorageServicer(object): + """BigQuery storage API. + + The BigQuery storage API can be used to read data stored in BigQuery. + """ + + def CreateReadSession(self, request, context): + """Creates a new read session. A read session divides the contents of a + BigQuery table into one or more streams, which can then be used to read + data from the table. The read session also specifies properties of the + data to be read, such as a list of columns or a push-down filter describing + the rows to be returned. + + A particular row can be read by at most one stream. When the caller has + reached the end of each stream in the session, then all the data in the + table has been read. + + Read sessions automatically expire 24 hours after they are created and do + not require manual clean-up by the caller. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def ReadRows(self, request, context): + """Reads rows from the table in the format prescribed by the read session. + Each response contains one or more table rows, up to a maximum of 10 MiB + per response; read requests which attempt to read individual rows larger + than this will fail. + + Each request also returns a set of stream statistics reflecting the + estimated total number of rows in the read stream. This number is computed + based on the total table size and the number of active streams in the read + session, and may change as other streams continue to read data. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def BatchCreateReadSessionStreams(self, request, context): + """Creates additional streams for a ReadSession. This API can be used to + dynamically adjust the parallelism of a batch processing task upwards by + adding additional workers. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def FinalizeStream(self, request, context): + """Triggers the graceful termination of a single stream in a ReadSession. This + API can be used to dynamically adjust the parallelism of a batch processing + task downwards without losing data. + + This API does not delete the stream -- it remains visible in the + ReadSession, and any data processed by the stream is not released to other + streams. However, no additional data will be assigned to the stream once + this call completes. Callers must continue reading data on the stream until + the end of the stream is reached so that data which has already been + assigned to the stream will be processed. + + This method will return an error if there are no other live streams + in the Session, or if SplitReadStream() has been called on the given + Stream. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def SplitReadStream(self, request, context): + """Splits a given read stream into two Streams. These streams are referred to + as the primary and the residual of the split. The original stream can still + be read from in the same manner as before. Both of the returned streams can + also be read from, and the total rows return by both child streams will be + the same as the rows read from the original stream. + + Moreover, the two child streams will be allocated back to back in the + original Stream. Concretely, it is guaranteed that for streams Original, + Primary, and Residual, that Original[0-j] = Primary[0-j] and + Original[j-n] = Residual[0-m] once the streams have been read to + completion. + + This method is guaranteed to be idempotent. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + +def add_BigQueryStorageServicer_to_server(servicer, server): + rpc_method_handlers = { + 'CreateReadSession': grpc.unary_unary_rpc_method_handler( + servicer.CreateReadSession, + request_deserializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.CreateReadSessionRequest.FromString, + response_serializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.ReadSession.SerializeToString, + ), + 'ReadRows': grpc.unary_stream_rpc_method_handler( + servicer.ReadRows, + request_deserializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.ReadRowsRequest.FromString, + response_serializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.ReadRowsResponse.SerializeToString, + ), + 'BatchCreateReadSessionStreams': grpc.unary_unary_rpc_method_handler( + servicer.BatchCreateReadSessionStreams, + request_deserializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.BatchCreateReadSessionStreamsRequest.FromString, + response_serializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.BatchCreateReadSessionStreamsResponse.SerializeToString, + ), + 'FinalizeStream': grpc.unary_unary_rpc_method_handler( + servicer.FinalizeStream, + request_deserializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.FinalizeStreamRequest.FromString, + response_serializer=google_dot_protobuf_dot_empty__pb2.Empty.SerializeToString, + ), + 'SplitReadStream': grpc.unary_unary_rpc_method_handler( + servicer.SplitReadStream, + request_deserializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.SplitReadStreamRequest.FromString, + response_serializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.SplitReadStreamResponse.SerializeToString, + ), + } + generic_handler = grpc.method_handlers_generic_handler( + 'google.cloud.bigquery.storage.v1beta1.BigQueryStorage', rpc_method_handlers) + server.add_generic_rpc_handlers((generic_handler,)) diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/table_reference_pb2.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/table_reference_pb2.py new file mode 100644 index 000000000000..24e71dcaff54 --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/table_reference_pb2.py @@ -0,0 +1,149 @@ +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: google/cloud/bigquery/storage_v1beta1/proto/table_reference.proto + +import sys +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database +from google.protobuf import descriptor_pb2 +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 + + +DESCRIPTOR = _descriptor.FileDescriptor( + name='google/cloud/bigquery/storage_v1beta1/proto/table_reference.proto', + package='google.cloud.bigquery.storage.v1beta1', + syntax='proto3', + serialized_pb=_b('\nAgoogle/cloud/bigquery/storage_v1beta1/proto/table_reference.proto\x12%google.cloud.bigquery.storage.v1beta1\x1a\x1fgoogle/protobuf/timestamp.proto\"J\n\x0eTableReference\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x12\x10\n\x08table_id\x18\x03 \x01(\t\"C\n\x0eTableModifiers\x12\x31\n\rsnapshot_time\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.TimestampB\x8e\x01\n)com.google.cloud.bigquery.storage.v1beta1B\x13TableReferenceProtoZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storageb\x06proto3') + , + dependencies=[google_dot_protobuf_dot_timestamp__pb2.DESCRIPTOR,]) + + + + +_TABLEREFERENCE = _descriptor.Descriptor( + name='TableReference', + full_name='google.cloud.bigquery.storage.v1beta1.TableReference', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='project_id', full_name='google.cloud.bigquery.storage.v1beta1.TableReference.project_id', index=0, + number=1, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='dataset_id', full_name='google.cloud.bigquery.storage.v1beta1.TableReference.dataset_id', index=1, + number=2, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='table_id', full_name='google.cloud.bigquery.storage.v1beta1.TableReference.table_id', index=2, + number=3, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=141, + serialized_end=215, +) + + +_TABLEMODIFIERS = _descriptor.Descriptor( + name='TableModifiers', + full_name='google.cloud.bigquery.storage.v1beta1.TableModifiers', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='snapshot_time', full_name='google.cloud.bigquery.storage.v1beta1.TableModifiers.snapshot_time', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=217, + serialized_end=284, +) + +_TABLEMODIFIERS.fields_by_name['snapshot_time'].message_type = google_dot_protobuf_dot_timestamp__pb2._TIMESTAMP +DESCRIPTOR.message_types_by_name['TableReference'] = _TABLEREFERENCE +DESCRIPTOR.message_types_by_name['TableModifiers'] = _TABLEMODIFIERS +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +TableReference = _reflection.GeneratedProtocolMessageType('TableReference', (_message.Message,), dict( + DESCRIPTOR = _TABLEREFERENCE, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.table_reference_pb2' + , + __doc__ = """Table reference that includes just the 3 strings needed to identify a + table. + + + Attributes: + project_id: + The assigned project ID of the project. + dataset_id: + The ID of the dataset in the above project. + table_id: + The ID of the table in the above dataset. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.TableReference) + )) +_sym_db.RegisterMessage(TableReference) + +TableModifiers = _reflection.GeneratedProtocolMessageType('TableModifiers', (_message.Message,), dict( + DESCRIPTOR = _TABLEMODIFIERS, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.table_reference_pb2' + , + __doc__ = """All fields in this message optional. + + + Attributes: + snapshot_time: + The snapshot time of the table. If not set, interpreted as + now. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.TableModifiers) + )) +_sym_db.RegisterMessage(TableModifiers) + + +DESCRIPTOR.has_options = True +DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\n)com.google.cloud.bigquery.storage.v1beta1B\023TableReferenceProtoZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storage')) +# @@protoc_insertion_point(module_scope) diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/table_reference_pb2_grpc.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/table_reference_pb2_grpc.py new file mode 100644 index 000000000000..a89435267cb2 --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/table_reference_pb2_grpc.py @@ -0,0 +1,3 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +import grpc + diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/reader.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/reader.py new file mode 100644 index 000000000000..da8909f5bfe2 --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/reader.py @@ -0,0 +1,265 @@ +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import + +import itertools +import json + +try: + import fastavro +except ImportError: # pragma: NO COVER + fastavro = None +import google.api_core.exceptions +try: + import pandas +except ImportError: # pragma: NO COVER + pandas = None +import six + +from google.cloud.bigquery_storage_v1beta1 import types + + +_STREAM_RESUMPTION_EXCEPTIONS = ( + google.api_core.exceptions.DeadlineExceeded, + google.api_core.exceptions.ServiceUnavailable, +) +_FASTAVRO_REQUIRED = "fastavro is required to parse Avro blocks" + + +class ReadRowsStream(object): + """A stream of results from a read rows request. + + This stream is an iterable of + :class:`~google.cloud.bigquery_storage_v1beta1.types.ReadRowsResponse`. + Iterate over it to fetch all row blocks. + + If the fastavro library is installed, use the + :func:`~google.cloud.bigquery_storage_v1beta1.reader.ReadRowsStream.rows()` + method to parse all blocks into a stream of row dictionaries. + + If the pandas and fastavro libraries are installed, use the + :func:`~google.cloud.bigquery_storage_v1beta1.reader.ReadRowsStream.to_dataframe()` + method to parse all blocks into a :class:`pandas.DataFrame`. + """ + + def __init__( + self, + wrapped, + client, + read_position, + read_rows_kwargs, + ): + """Construct a ReadRowsStream. + + Args: + wrapped (Iterable[ \ + ~google.cloud.bigquery_storage_v1beta1.types.ReadRowsResponse \ + ]): + The ReadRows stream to read. + client ( \ + ~google.cloud.bigquery_storage_v1beta1.gapic. \ + big_query_storage_client.BigQueryStorageClient \ + ): + A GAPIC client used to reconnect to a ReadRows stream. This + must be the GAPIC client to avoid a circular dependency on + this class. + read_position (Union[ \ + dict, \ + ~google.cloud.bigquery_storage_v1beta1.types.StreamPosition \ + ]): + Required. Identifier of the position in the stream to start + reading from. The offset requested must be less than the last + row read from ReadRows. Requesting a larger offset is + undefined. If a dict is provided, it must be of the same form + as the protobuf message + :class:`~google.cloud.bigquery_storage_v1beta1.types.StreamPosition` + read_rows_kwargs (dict): + Keyword arguments to use when reconnecting to a ReadRows + stream. + + Returns: + Iterable[ \ + ~google.cloud.bigquery_storage_v1beta1.types.ReadRowsResponse \ + ]: + A sequence of row blocks. + """ + + # Make a copy of the read position so that we can update it without + # mutating the original input. + self._position = _copy_stream_position(read_position) + self._client = client + self._wrapped = wrapped + self._read_rows_kwargs = read_rows_kwargs + + def __iter__(self): + """An iterable of blocks. + + Returns: + Iterable[ \ + ~google.cloud.bigquery_storage_v1beta1.types.ReadRowsResponse \ + ]: + A sequence of row blocks. + """ + + # Infinite loop to reconnect on reconnectable errors while processing + # the row stream. + while True: + try: + for block in self._wrapped: + rowcount = block.avro_rows.row_count + self._position.offset += rowcount + yield block + + return # Made it through the whole stream. + except _STREAM_RESUMPTION_EXCEPTIONS: + # Transient error, so reconnect to the stream. + pass + + self._reconnect() + + def _reconnect(self): + """Reconnect to the ReadRows stream using the most recent offset.""" + self._wrapped = self._client.read_rows( + _copy_stream_position(self._position), **self._read_rows_kwargs) + + def rows(self, read_session): + """Iterate over all rows in the stream. + + This method requires the fastavro library in order to parse row + blocks. + + .. warning:: + DATETIME columns are not supported. They are currently parsed as + strings in the fastavro library. + + Args: + read_session ( \ + ~google.cloud.bigquery_storage_v1beta1.types.ReadSession \ + ): + The read session associated with this read rows stream. This + contains the schema, which is required to parse the data + blocks. + + Returns: + Iterable[Mapping]: + A sequence of rows, represented as dictionaries. + """ + if fastavro is None: + raise ImportError(_FASTAVRO_REQUIRED) + + avro_schema = _avro_schema(read_session) + blocks = (_avro_rows(block, avro_schema) for block in self) + return itertools.chain.from_iterable(blocks) + + def to_dataframe(self, read_session): + """Create a :class:`pandas.DataFrame` of all rows in the stream. + + This method requires the pandas libary to create a data frame and the + fastavro library to parse row blocks. + + .. warning:: + DATETIME columns are not supported. They are currently parsed as + strings in the fastavro library. + + Args: + read_session ( \ + ~google.cloud.bigquery_storage_v1beta1.types.ReadSession \ + ): + The read session associated with this read rows stream. This + contains the schema, which is required to parse the data + blocks. + + Returns: + pandas.DataFrame: + A data frame of all rows in the stream. + """ + if fastavro is None: + raise ImportError(_FASTAVRO_REQUIRED) + if pandas is None: + raise ImportError("pandas is required to create a DataFrame") + + avro_schema = _avro_schema(read_session) + frames = [] + for block in self: + dataframe = pandas.DataFrame(list(_avro_rows(block, avro_schema))) + frames.append(dataframe) + return pandas.concat(frames) + + +def _avro_schema(read_session): + """Extract and parse Avro schema from a read session. + + Args: + read_session ( \ + ~google.cloud.bigquery_storage_v1beta1.types.ReadSession \ + ): + The read session associated with this read rows stream. This + contains the schema, which is required to parse the data + blocks. + + Returns: + A parsed Avro schema, using :func:`fastavro.schema.parse_schema`. + """ + json_schema = json.loads(read_session.avro_schema.schema) + return fastavro.parse_schema(json_schema) + + +def _avro_rows(block, avro_schema): + """Parse all rows in a stream block. + + Args: + read_session ( \ + ~google.cloud.bigquery_storage_v1beta1.types.ReadSession \ + ): + The read session associated with this read rows stream. This + contains the schema, which is required to parse the data + blocks. + + Returns: + Iterable[Mapping]: + A sequence of rows, represented as dictionaries. + """ + blockio = six.BytesIO(block.avro_rows.serialized_binary_rows) + while True: + # Loop in a while loop because schemaless_reader can only read + # a single record. + try: + # TODO: Parse DATETIME into datetime.datetime (no timezone), + # instead of as a string. + yield fastavro.schemaless_reader(blockio, avro_schema) + except StopIteration: + break # Finished with block + + +def _copy_stream_position(position): + """Copy a StreamPosition. + + Args: + position (Union[ \ + dict, \ + ~google.cloud.bigquery_storage_v1beta1.types.StreamPosition \ + ]): + StreamPostion (or dictionary in StreamPosition format) to copy. + + Returns: + ~google.cloud.bigquery_storage_v1beta1.types.StreamPosition: + A copy of the input StreamPostion. + """ + if isinstance(position, types.StreamPosition): + output = types.StreamPosition() + output.CopyFrom(position) + return output + + return types.StreamPosition(**position) diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/types.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/types.py new file mode 100644 index 000000000000..9b0d557fe728 --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/types.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import sys + +from google.api_core.protobuf_helpers import get_messages + +from google.cloud.bigquery_storage_v1beta1.proto import avro_pb2 +from google.cloud.bigquery_storage_v1beta1.proto import read_options_pb2 +from google.cloud.bigquery_storage_v1beta1.proto import storage_pb2 +from google.cloud.bigquery_storage_v1beta1.proto import table_reference_pb2 +from google.protobuf import empty_pb2 +from google.protobuf import timestamp_pb2 + +_shared_modules = [ + empty_pb2, + timestamp_pb2, +] + +_local_modules = [ + avro_pb2, + read_options_pb2, + storage_pb2, + table_reference_pb2, +] + +names = [] + +for module in _shared_modules: + for name, message in get_messages(module).items(): + setattr(sys.modules[__name__], name, message) + names.append(name) +for module in _local_modules: + for name, message in get_messages(module).items(): + message.__module__ = 'google.cloud.bigquery_storage_v1beta1.types' + setattr(sys.modules[__name__], name, message) + names.append(name) + +__all__ = tuple(sorted(names)) diff --git a/bigquery_storage/noxfile.py b/bigquery_storage/noxfile.py new file mode 100644 index 000000000000..d363f1ad5c30 --- /dev/null +++ b/bigquery_storage/noxfile.py @@ -0,0 +1,133 @@ +# Copyright 2018, Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import + +import os +import shutil + +import nox + + +LOCAL_DEPS = ( + os.path.join('..', 'api_core'), +) + + +def default(session): + """Default unit test session. + + This is intended to be run **without** an interpreter set, so + that the current ``python`` (on the ``PATH``) or the version of + Python corresponding to the ``nox`` binary on the ``PATH`` can + run the tests. + """ + # Install all test dependencies, then install this package in-place. + session.install('mock', 'pytest', 'pytest-cov') + for local_dep in LOCAL_DEPS: + session.install('-e', local_dep) + session.install('-e', '.[pandas,fastavro]') + + # Run py.test against the unit tests. + session.run( + 'py.test', + '--quiet', + '--cov=google.cloud.bigquery_storage', + '--cov=google.cloud.bigquery_storage_v1beta1', + '--cov=tests.unit', + '--cov-append', + '--cov-config=.coveragerc', + '--cov-report=', + os.path.join('tests', 'unit'), + *session.posargs + ) + + +@nox.session(python=['2.7', '3.5', '3.6', '3.7']) +def unit(session): + """Run the unit test suite.""" + default(session) + + +@nox.session(python='3.6') +def lint(session): + """Run linters. + Returns a failure if the linters find linting errors or sufficiently + serious code quality issues. + """ + + session.install('flake8', *LOCAL_DEPS) + session.install('-e', '.') + session.run( + 'flake8', os.path.join('google', 'cloud', 'bigquery_storage_v1beta1')) + session.run('flake8', 'tests') + + +@nox.session(python='3.6') +def lint_setup_py(session): + """Verify that setup.py is valid (including RST check).""" + session.install('docutils', 'pygments') + session.run('python', 'setup.py', 'check', '--restructuredtext', + '--strict') + + +@nox.session(python='3.6') +def cover(session): + """Run the final coverage report. + This outputs the coverage report aggregating coverage from the unit + test runs (not system test runs), and then erases coverage data. + """ + session.install('coverage', 'pytest-cov') + session.run('coverage', 'report', '--show-missing', '--fail-under=100') + session.run('coverage', 'erase') + + +@nox.session(python=['2.7', '3.6']) +def system(session): + """Run the system test suite.""" + + # Sanity check: Only run system tests if the environment variable is set. + if not os.environ.get('GOOGLE_APPLICATION_CREDENTIALS', ''): + session.skip('Credentials must be set via environment variable.') + + # Install all test dependencies, then install this package into the + # virtualenv's dist-packages. + session.install('pytest') + session.install('-e', os.path.join('..', 'test_utils')) + for local_dep in LOCAL_DEPS: + session.install('-e', local_dep) + session.install('-e', '.[pandas,fastavro]') + + # Run py.test against the system tests. + session.run('py.test', '--quiet', 'tests/system/') + + +@nox.session(python='3.6') +def docs(session): + """Build the docs.""" + + session.install('sphinx', 'sphinx_rtd_theme') + session.install('-e', '.[pandas,fastavro]') + + shutil.rmtree(os.path.join('docs', '_build'), ignore_errors=True) + session.run( + 'sphinx-build', + '-W', # warnings as errors + '-T', # show full traceback on exception + '-N', # no colors + '-b', 'html', + '-d', os.path.join('docs', '_build', 'doctrees', ''), + os.path.join('docs', ''), + os.path.join('docs', '_build', 'html', ''), + ) diff --git a/bigquery_storage/setup.cfg b/bigquery_storage/setup.cfg new file mode 100644 index 000000000000..17f660661b30 --- /dev/null +++ b/bigquery_storage/setup.cfg @@ -0,0 +1,10 @@ +[bdist_wheel] +universal = 1 + +[flake8] +exclude = + *_pb2.py + *_pb2_grpc.py + google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client.py + google/cloud/bigquery_storage_v1beta1/gapic/transports/big_query_storage_grpc_transport.py + tests/unit/gapic/v1beta1/test_big_query_storage_client_v1beta1.py diff --git a/bigquery_storage/setup.py b/bigquery_storage/setup.py new file mode 100644 index 000000000000..f8019f14a579 --- /dev/null +++ b/bigquery_storage/setup.py @@ -0,0 +1,80 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import io +import os + +import setuptools + +name = 'google-cloud-bigquery-storage' +description = 'BigQuery Storage API API client library' +version = '0.1.0' +release_status = '3 - Alpha' +dependencies = [ + 'google-api-core[grpc] >= 1.5.1, < 2.0.0dev', + 'enum34; python_version < "3.4"', +] +extras = { + 'pandas': 'pandas>=0.17.1', + 'fastavro': 'fastavro>=0.21.2', +} + +package_root = os.path.abspath(os.path.dirname(__file__)) + +readme_filename = os.path.join(package_root, 'README.rst') +with io.open(readme_filename, encoding='utf-8') as readme_file: + readme = readme_file.read() + +packages = [ + package for package in setuptools.find_packages() + if package.startswith('google') +] + +namespaces = ['google'] +if 'google.cloud' in packages: + namespaces.append('google.cloud') + +setuptools.setup( + name=name, + version=version, + description=description, + long_description=readme, + author='Google LLC', + author_email='googleapis-packages@google.com', + license='Apache 2.0', + url='https://github.com/GoogleCloudPlatform/google-cloud-python', + classifiers=[ + release_status, + 'Intended Audience :: Developers', + 'License :: OSI Approved :: Apache Software License', + 'Programming Language :: Python', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Operating System :: OS Independent', + 'Topic :: Internet', + ], + platforms='Posix; MacOS X; Windows', + packages=packages, + namespace_packages=namespaces, + install_requires=dependencies, + extras_require=extras, + include_package_data=True, + zip_safe=False, +) diff --git a/bigquery_storage/synth.py b/bigquery_storage/synth.py new file mode 100644 index 000000000000..59f2067b9fdd --- /dev/null +++ b/bigquery_storage/synth.py @@ -0,0 +1,113 @@ +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This script is used to synthesize generated parts of this library.""" + +import synthtool as s +from synthtool import gcp + +gapic = gcp.GAPICGenerator() + +version = 'v1' + +library = gapic.py_library( + 'bigquery-datatransfer', + version, + config_path='/google/cloud/bigquery/storage/' + 'artman_bigquerystorage_v1beta1.yaml', + artman_output_name='bigquerystorage-v1beta1' +) + +s.move( + library, + excludes=[ + 'docs/conf.py', + 'docs/index.rst', + 'google/cloud/bigquery_storage_v1beta1/__init__.py', + 'README.rst', + 'nox*.py', + 'setup.py', + 'setup.cfg', + ], +) + +s.replace( + ['google/cloud/bigquery_storage_v1beta1/proto/storage_pb2.py', + 'google/cloud/bigquery_storage_v1beta1/proto/storage_pb2_grpc.py'], + 'from google.cloud.bigquery.storage_v1beta1.proto', + 'from google.cloud.bigquery_storage_v1beta1.proto', +) + +s.replace( + 'google/cloud/bigquery_storage_v1beta1/gapic/' + 'big_query_storage_client.py', + 'google-cloud-bigquerystorage', + 'google-cloud-bigquery-storage') + +s.replace( + 'google/cloud/bigquery_storage_v1beta1/gapic/' + 'big_query_storage_client.py', + 'import google.api_core.gapic_v1.method\n', + '\g<0>import google.api_core.path_template\n' +) + +s.replace( + ['tests/unit/gapic/v1beta1/test_big_query_storage_client_v1beta1.py'], + 'from google.cloud import bigquery_storage_v1beta1', + 'from google.cloud.bigquery_storage_v1beta1.gapic import big_query_storage_client # noqa', +) + +s.replace( + ['tests/unit/gapic/v1beta1/test_big_query_storage_client_v1beta1.py'], + 'bigquery_storage_v1beta1.BigQueryStorageClient', + 'big_query_storage_client.BigQueryStorageClient', +) + +# START: Ignore lint and coverage +s.replace( + ['google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client.py'], + 'if transport:', + 'if transport: # pragma: no cover', +) + +s.replace( + ['google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client.py'], + r'to_grpc_metadata\(\n', + 'to_grpc_metadata( # pragma: no cover\n', +) + +s.replace( + ['google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client.py'], + r'metadata.append\(routing_metadata\)', + 'metadata.append(routing_metadata) # pragma: no cover', +) + +s.replace( + ['google/cloud/bigquery_storage_v1beta1/gapic/transports/big_query_storage_grpc_transport.py'], + 'if channel is not None and credentials is not None:', + 'if channel is not None and credentials is not None: # pragma: no cover', +) + +s.replace( + ['google/cloud/bigquery_storage_v1beta1/gapic/transports/big_query_storage_grpc_transport.py'], + 'if channel is None:', + 'if channel is None: # pragma: no cover', +) + +s.replace( + ['google/cloud/bigquery_storage_v1beta1/gapic/transports/big_query_storage_grpc_transport.py'], + r'google.api_core.grpc_helpers.create_channel\(', + 'google.api_core.grpc_helpers.create_channel( # pragma: no cover', +) +# END: Ignore lint and coverage diff --git a/bigquery_storage/tests/system/test_system.py b/bigquery_storage/tests/system/test_system.py new file mode 100644 index 000000000000..03e1064e066a --- /dev/null +++ b/bigquery_storage/tests/system/test_system.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""System tests for reading rows from tables.""" + +import os + +import pytest + +from google.cloud import bigquery_storage_v1beta1 + + +@pytest.fixture() +def project_id(): + return os.environ['PROJECT_ID'] + + +@pytest.fixture() +def client(): + return bigquery_storage_v1beta1.BigQueryStorageClient() + + +@pytest.fixture() +def table_reference(): + table_ref = bigquery_storage_v1beta1.types.TableReference() + table_ref.project_id = 'bigquery-public-data' + table_ref.dataset_id = 'usa_names' + table_ref.table_id = 'usa_1910_2013' + return table_ref + + +@pytest.fixture() +def small_table_reference(): + table_ref = bigquery_storage_v1beta1.types.TableReference() + table_ref.project_id = 'bigquery-public-data' + table_ref.dataset_id = 'utility_us' + table_ref.table_id = 'country_code_iso' + return table_ref + + +def test_read_rows_full_table(client, project_id, small_table_reference): + session = client.create_read_session( + small_table_reference, + 'projects/{}'.format(project_id), + requested_streams=1, + ) + + stream_pos = bigquery_storage_v1beta1.types.StreamPosition( + stream=session.streams[0]) + blocks = list(client.read_rows(stream_pos)) + + assert len(blocks) > 0 + block = blocks[0] + assert block.status.estimated_row_count > 0 + assert len(block.avro_rows.serialized_binary_rows) > 0 + + +def test_read_rows_to_dataframe(client, project_id): + table_ref = bigquery_storage_v1beta1.types.TableReference() + table_ref.project_id = 'bigquery-public-data' + table_ref.dataset_id = 'new_york_citibike' + table_ref.table_id = 'citibike_stations' + session = client.create_read_session( + table_ref, + 'projects/{}'.format(project_id), + requested_streams=1, + ) + stream_pos = bigquery_storage_v1beta1.types.StreamPosition( + stream=session.streams[0]) + + frame = client.read_rows(stream_pos).to_dataframe(session) + + # Station ID is a required field (no nulls), so the datatype should always + # be integer. + assert frame.station_id.dtype.name == 'int64' + assert frame['name'].str.startswith('Central Park').any() + + +def test_split_read_stream(client, project_id, table_reference): + session = client.create_read_session( + table_reference, + parent='projects/{}'.format(project_id), + ) + + split = client.split_read_stream(session.streams[0]) + + assert split.primary_stream is not None + assert split.remainder_stream is not None + assert split.primary_stream != split.remainder_stream diff --git a/bigquery_storage/tests/unit/gapic/v1beta1/test_big_query_storage_client_v1beta1.py b/bigquery_storage/tests/unit/gapic/v1beta1/test_big_query_storage_client_v1beta1.py new file mode 100644 index 000000000000..699517f480c0 --- /dev/null +++ b/bigquery_storage/tests/unit/gapic/v1beta1/test_big_query_storage_client_v1beta1.py @@ -0,0 +1,244 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Unit tests.""" + +import pytest + +from google.cloud.bigquery_storage_v1beta1.gapic import big_query_storage_client # noqa +from google.cloud.bigquery_storage_v1beta1.proto import storage_pb2 +from google.cloud.bigquery_storage_v1beta1.proto import table_reference_pb2 +from google.protobuf import empty_pb2 + + +class MultiCallableStub(object): + """Stub for the grpc.UnaryUnaryMultiCallable interface.""" + + def __init__(self, method, channel_stub): + self.method = method + self.channel_stub = channel_stub + + def __call__(self, request, timeout=None, metadata=None, credentials=None): + self.channel_stub.requests.append((self.method, request)) + + response = None + if self.channel_stub.responses: + response = self.channel_stub.responses.pop() + + if isinstance(response, Exception): + raise response + + if response: + return response + + +class ChannelStub(object): + """Stub for the grpc.Channel interface.""" + + def __init__(self, responses=[]): + self.responses = responses + self.requests = [] + + def unary_unary(self, + method, + request_serializer=None, + response_deserializer=None): + return MultiCallableStub(method, self) + + def unary_stream(self, + method, + request_serializer=None, + response_deserializer=None): + return MultiCallableStub(method, self) + + +class CustomException(Exception): + pass + + +class TestBigQueryStorageClient(object): + def test_create_read_session(self): + # Setup Expected Response + name = 'name3373707' + expected_response = {'name': name} + expected_response = storage_pb2.ReadSession(**expected_response) + + # Mock the API response + channel = ChannelStub(responses=[expected_response]) + client = big_query_storage_client.BigQueryStorageClient( + channel=channel) + + # Setup Request + table_reference = {} + parent = 'parent-995424086' + + response = client.create_read_session(table_reference, parent) + assert expected_response == response + + assert len(channel.requests) == 1 + expected_request = storage_pb2.CreateReadSessionRequest( + table_reference=table_reference, parent=parent) + actual_request = channel.requests[0][1] + assert expected_request == actual_request + + def test_create_read_session_exception(self): + # Mock the API response + channel = ChannelStub(responses=[CustomException()]) + client = big_query_storage_client.BigQueryStorageClient( + channel=channel) + + # Setup request + table_reference = {} + parent = 'parent-995424086' + + with pytest.raises(CustomException): + client.create_read_session(table_reference, parent) + + def test_read_rows(self): + # Setup Expected Response + expected_response = {} + expected_response = storage_pb2.ReadRowsResponse(**expected_response) + + # Mock the API response + channel = ChannelStub(responses=[iter([expected_response])]) + client = big_query_storage_client.BigQueryStorageClient( + channel=channel) + + # Setup Request + read_position = {} + + response = client.read_rows(read_position) + resources = list(response) + assert len(resources) == 1 + assert expected_response == resources[0] + + assert len(channel.requests) == 1 + expected_request = storage_pb2.ReadRowsRequest( + read_position=read_position) + actual_request = channel.requests[0][1] + assert expected_request == actual_request + + def test_read_rows_exception(self): + # Mock the API response + channel = ChannelStub(responses=[CustomException()]) + client = big_query_storage_client.BigQueryStorageClient( + channel=channel) + + # Setup request + read_position = {} + + with pytest.raises(CustomException): + client.read_rows(read_position) + + def test_batch_create_read_session_streams(self): + # Setup Expected Response + expected_response = {} + expected_response = storage_pb2.BatchCreateReadSessionStreamsResponse( + **expected_response) + + # Mock the API response + channel = ChannelStub(responses=[expected_response]) + client = big_query_storage_client.BigQueryStorageClient( + channel=channel) + + # Setup Request + session = {} + requested_streams = 1017221410 + + response = client.batch_create_read_session_streams( + session, requested_streams) + assert expected_response == response + + assert len(channel.requests) == 1 + expected_request = storage_pb2.BatchCreateReadSessionStreamsRequest( + session=session, requested_streams=requested_streams) + actual_request = channel.requests[0][1] + assert expected_request == actual_request + + def test_batch_create_read_session_streams_exception(self): + # Mock the API response + channel = ChannelStub(responses=[CustomException()]) + client = big_query_storage_client.BigQueryStorageClient( + channel=channel) + + # Setup request + session = {} + requested_streams = 1017221410 + + with pytest.raises(CustomException): + client.batch_create_read_session_streams(session, + requested_streams) + + def test_finalize_stream(self): + channel = ChannelStub() + client = big_query_storage_client.BigQueryStorageClient( + channel=channel) + + # Setup Request + stream = {} + + client.finalize_stream(stream) + + assert len(channel.requests) == 1 + expected_request = storage_pb2.FinalizeStreamRequest(stream=stream) + actual_request = channel.requests[0][1] + assert expected_request == actual_request + + def test_finalize_stream_exception(self): + # Mock the API response + channel = ChannelStub(responses=[CustomException()]) + client = big_query_storage_client.BigQueryStorageClient( + channel=channel) + + # Setup request + stream = {} + + with pytest.raises(CustomException): + client.finalize_stream(stream) + + def test_split_read_stream(self): + # Setup Expected Response + expected_response = {} + expected_response = storage_pb2.SplitReadStreamResponse( + **expected_response) + + # Mock the API response + channel = ChannelStub(responses=[expected_response]) + client = big_query_storage_client.BigQueryStorageClient( + channel=channel) + + # Setup Request + original_stream = {} + + response = client.split_read_stream(original_stream) + assert expected_response == response + + assert len(channel.requests) == 1 + expected_request = storage_pb2.SplitReadStreamRequest( + original_stream=original_stream) + actual_request = channel.requests[0][1] + assert expected_request == actual_request + + def test_split_read_stream_exception(self): + # Mock the API response + channel = ChannelStub(responses=[CustomException()]) + client = big_query_storage_client.BigQueryStorageClient( + channel=channel) + + # Setup request + original_stream = {} + + with pytest.raises(CustomException): + client.split_read_stream(original_stream) diff --git a/bigquery_storage/tests/unit/test_client.py b/bigquery_storage/tests/unit/test_client.py new file mode 100644 index 000000000000..e671b9a3a92a --- /dev/null +++ b/bigquery_storage/tests/unit/test_client.py @@ -0,0 +1,97 @@ +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.api_core.gapic_v1 import client_info +import mock +import pytest + +from google.cloud.bigquery_storage_v1beta1 import types + + +PROJECT = 'my-project' +SERVICE_ACCOUNT_PROJECT = 'project-from-credentials' + + +@pytest.fixture() +def mock_transport(monkeypatch): + from google.cloud.bigquery_storage_v1beta1.gapic.transports import ( + big_query_storage_grpc_transport + ) + transport = mock.create_autospec( + big_query_storage_grpc_transport.BigQueryStorageGrpcTransport, + ) + return transport + + +@pytest.fixture() +def client_under_test(mock_transport): + from google.cloud.bigquery_storage_v1beta1 import client + + # The mock is detected as a callable. By creating a real callable here, the + # mock can still be used to verify RPCs. + def transport_callable(credentials=None, default_class=None): + return mock_transport + + return client.BigQueryStorageClient( + transport=transport_callable, + ) + + +def test_constructor_w_client_info(mock_transport): + from google.cloud.bigquery_storage_v1beta1 import client + + def transport_callable(credentials=None, default_class=None): + return mock_transport + + client_under_test = client.BigQueryStorageClient( + transport=transport_callable, + client_info=client_info.ClientInfo( + client_library_version='test-client-version', + ), + ) + + user_agent = client_under_test._client_info.to_user_agent() + assert 'test-client-version' in user_agent + + +def test_create_read_session(mock_transport, client_under_test): + table_reference = types.TableReference( + project_id='data-project-id', + dataset_id='dataset_id', + table_id='table_id', + ) + + client_under_test.create_read_session( + table_reference, + 'projects/other-project', + ) + + expected_request = types.CreateReadSessionRequest( + table_reference=table_reference, + parent='projects/other-project', + ) + mock_transport.create_read_session.assert_called_once_with( + expected_request, metadata=mock.ANY, timeout=mock.ANY) + + +def test_read_rows(mock_transport, client_under_test): + stream_position = types.StreamPosition() + + client_under_test.read_rows(stream_position) + + expected_request = types.ReadRowsRequest( + read_position=stream_position, + ) + mock_transport.create_read_session.read_rows( + expected_request, metadata=mock.ANY, timeout=mock.ANY) diff --git a/bigquery_storage/tests/unit/test_reader.py b/bigquery_storage/tests/unit/test_reader.py new file mode 100644 index 000000000000..489e81fec176 --- /dev/null +++ b/bigquery_storage/tests/unit/test_reader.py @@ -0,0 +1,360 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import decimal +import itertools +import json + +import fastavro +import mock +import pandas +import pandas.testing +import pytest +import pytz +import six + +import google.api_core.exceptions +from google.cloud import bigquery_storage_v1beta1 + + +PROJECT = "my-project" +BQ_TO_AVRO_TYPES = { + "int64": "long", + "float64": "double", + "bool": "boolean", + "numeric": { + "type": "bytes", + "logicalType": "decimal", + "precision": 38, + "scale": 9, + }, + "string": "string", + "bytes": "bytes", + "date": {"type": "int", "logicalType": "date"}, + "datetime": {"type": "string", "sqlType": "DATETIME"}, + "time": {"type": "long", "logicalType": "time-micros"}, + "timestamp": {"type": "long", "logicalType": "timestamp-micros"}, +} +SCALAR_COLUMNS = [ + {"name": "int_col", "type": "int64"}, + {"name": "float_col", "type": "float64"}, + {"name": "num_col", "type": "numeric"}, + {"name": "bool_col", "type": "bool"}, + {"name": "str_col", "type": "string"}, + {"name": "bytes_col", "type": "bytes"}, + {"name": "date_col", "type": "date"}, + {"name": "time_col", "type": "time"}, + {"name": "ts_col", "type": "timestamp"}, +] +SCALAR_BLOCKS = [ + [ + { + "int_col": 123, + "float_col": 3.14, + "num_col": decimal.Decimal("9.99"), + "bool_col": True, + "str_col": "hello world", + "bytes_col": b"ascii bytes", + "date_col": datetime.date(1998, 9, 4), + "time_col": datetime.time(12, 0), + "ts_col": datetime.datetime(2000, 1, 1, 5, 0, tzinfo=pytz.utc), + }, + { + "int_col": 456, + "float_col": 2.72, + "num_col": decimal.Decimal("0.99"), + "bool_col": False, + "str_col": "hallo welt", + "bytes_col": b"\xbb\xee\xff", + "date_col": datetime.date(1995, 3, 2), + "time_col": datetime.time(13, 37), + "ts_col": datetime.datetime(1965, 4, 3, 2, 1, tzinfo=pytz.utc), + }, + ], + [ + { + "int_col": 789, + "float_col": 1.23, + "num_col": decimal.Decimal("5.67"), + "bool_col": True, + "str_col": u"こんにちは世界", + "bytes_col": b"\x54\x69\x6d", + "date_col": datetime.date(1970, 1, 1), + "time_col": datetime.time(16, 20), + "ts_col": datetime.datetime( + 1991, 8, 25, 20, 57, 8, tzinfo=pytz.utc + ), + } + ], +] + + +@pytest.fixture() +def mut(): + from google.cloud.bigquery_storage_v1beta1 import reader + + return reader + + +@pytest.fixture() +def class_under_test(mut): + return mut.ReadRowsStream + + +@pytest.fixture() +def mock_client(): + from google.cloud.bigquery_storage_v1beta1.gapic import ( + big_query_storage_client, + ) + + return mock.create_autospec(big_query_storage_client.BigQueryStorageClient) + + +def _bq_to_avro_blocks(bq_blocks, avro_schema_json): + avro_schema = fastavro.parse_schema(avro_schema_json) + avro_blocks = [] + for block in bq_blocks: + blockio = six.BytesIO() + for row in block: + fastavro.schemaless_writer(blockio, avro_schema, row) + + response = bigquery_storage_v1beta1.types.ReadRowsResponse() + response.avro_rows.row_count = len(block) + response.avro_rows.serialized_binary_rows = blockio.getvalue() + avro_blocks.append(response) + return avro_blocks + + +def _avro_blocks_w_deadline(avro_blocks): + for block in avro_blocks: + yield block + raise google.api_core.exceptions.DeadlineExceeded('test: please reconnect') + + +def _generate_read_session(avro_schema_json): + schema = json.dumps(avro_schema_json) + return bigquery_storage_v1beta1.types.ReadSession( + avro_schema={"schema": schema}, + ) + + +def _bq_to_avro_schema(bq_columns): + fields = [] + avro_schema = {"type": "record", "name": "__root__", "fields": fields} + + for column in bq_columns: + doc = column.get("description") + name = column["name"] + type_ = BQ_TO_AVRO_TYPES[column["type"]] + mode = column.get("mode", "nullable").lower() + + if mode == "nullable": + type_ = ["null", type_] + + fields.append( + { + "name": name, + "type": type_, + "doc": doc, + } + ) + + return avro_schema + + +def _get_avro_bytes(rows, avro_schema): + avro_file = six.BytesIO() + for row in rows: + fastavro.schemaless_writer(avro_file, avro_schema, row) + return avro_file.getvalue() + + +def test_rows_raises_import_error( + mut, class_under_test, mock_client, monkeypatch): + monkeypatch.setattr(mut, 'fastavro', None) + reader = class_under_test( + [], + mock_client, + bigquery_storage_v1beta1.types.StreamPosition(), + {}, + ) + read_session = bigquery_storage_v1beta1.types.ReadSession() + + with pytest.raises(ImportError): + reader.rows(read_session) + + +def test_rows_w_empty_stream(class_under_test, mock_client): + bq_columns = [ + {"name": "int_col", "type": "int64"}, + ] + avro_schema = _bq_to_avro_schema(bq_columns) + read_session = _generate_read_session(avro_schema) + reader = class_under_test( + [], + mock_client, + bigquery_storage_v1beta1.types.StreamPosition(), + {}, + ) + + got = tuple(reader.rows(read_session)) + assert got == () + + +def test_rows_w_scalars(class_under_test, mock_client): + avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS) + read_session = _generate_read_session(avro_schema) + avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema) + + reader = class_under_test( + avro_blocks, + mock_client, + bigquery_storage_v1beta1.types.StreamPosition(), + {}, + ) + got = tuple(reader.rows(read_session)) + + expected = tuple(itertools.chain.from_iterable(SCALAR_BLOCKS)) + assert got == expected + + +def test_rows_w_reconnect(class_under_test, mock_client): + bq_columns = [ + {"name": "int_col", "type": "int64"}, + ] + avro_schema = _bq_to_avro_schema(bq_columns) + read_session = _generate_read_session(avro_schema) + bq_blocks_1 = [ + [{"int_col": 123}, {"int_col": 234}], + [{"int_col": 345}, {"int_col": 456}], + ] + avro_blocks_1 = _avro_blocks_w_deadline( + _bq_to_avro_blocks(bq_blocks_1, avro_schema), + ) + bq_blocks_2 = [ + [{"int_col": 567}, {"int_col": 789}], + [{"int_col": 890}], + ] + avro_blocks_2 = _bq_to_avro_blocks(bq_blocks_2, avro_schema) + mock_client.read_rows.return_value = avro_blocks_2 + stream_position = bigquery_storage_v1beta1.types.StreamPosition( + stream={'name': 'test'}, + ) + + reader = class_under_test( + avro_blocks_1, + mock_client, + stream_position, + {'metadata': {'test-key': 'test-value'}}, + ) + got = tuple(reader.rows(read_session)) + + expected = tuple(itertools.chain( + itertools.chain.from_iterable(bq_blocks_1), + itertools.chain.from_iterable(bq_blocks_2), + )) + + assert got == expected + mock_client.read_rows.assert_called_once_with( + bigquery_storage_v1beta1.types.StreamPosition( + stream={'name': 'test'}, + offset=4, + ), + metadata={'test-key': 'test-value'}, + ) + + +def test_to_dataframe_no_pandas_raises_import_error( + mut, class_under_test, mock_client, monkeypatch): + monkeypatch.setattr(mut, 'pandas', None) + reader = class_under_test( + [], + mock_client, + bigquery_storage_v1beta1.types.StreamPosition(), + {}, + ) + read_session = bigquery_storage_v1beta1.types.ReadSession() + + with pytest.raises(ImportError): + reader.to_dataframe(read_session) + + +def test_to_dataframe_no_fastavro_raises_import_error( + mut, class_under_test, mock_client, monkeypatch): + monkeypatch.setattr(mut, 'fastavro', None) + reader = class_under_test( + [], + mock_client, + bigquery_storage_v1beta1.types.StreamPosition(), + {}, + ) + read_session = bigquery_storage_v1beta1.types.ReadSession() + + with pytest.raises(ImportError): + reader.to_dataframe(read_session) + + +def test_to_dataframe_w_scalars(class_under_test): + avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS) + read_session = _generate_read_session(avro_schema) + avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema) + + reader = class_under_test( + avro_blocks, + mock_client, + bigquery_storage_v1beta1.types.StreamPosition(), + {}, + ) + got = reader.to_dataframe(read_session) + + expected = pandas.DataFrame( + list(itertools.chain.from_iterable(SCALAR_BLOCKS)), + ) + # fastavro provides its own UTC definition, so + # compare the timestamp columns separately. + got_ts = got['ts_col'] + got = got.drop(columns=['ts_col']) + expected_ts = expected['ts_col'] + expected = expected.drop(columns=['ts_col']) + + pandas.testing.assert_frame_equal( + got.reset_index(drop=True), # reset_index to ignore row labels + expected.reset_index(drop=True), + ) + pandas.testing.assert_series_equal( + got_ts.reset_index(drop=True), + expected_ts.reset_index(drop=True), + check_dtype=False, # fastavro's UTC means different dtype + check_datetimelike_compat=True, + ) + + +def test_copy_stream_position(mut): + read_position = bigquery_storage_v1beta1.types.StreamPosition( + stream={"name": "test"}, offset=41 + ) + got = mut._copy_stream_position(read_position) + assert got == read_position + got.offset = 42 + assert read_position.offset == 41 + + +def test_copy_stream_position_w_dict(mut): + read_position = {"stream": {"name": "test"}, "offset": 42} + got = mut._copy_stream_position(read_position) + assert got.stream.name == "test" + assert got.offset == 42 diff --git a/docs/conf.py b/docs/conf.py index a396990cc621..63a5e4a840a2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -321,7 +321,8 @@ 'google-gax': ('https://gax-python.readthedocs.io/en/latest/', None), 'grpc': ('https://grpc.io/grpc/python/', None), 'requests': ('http://docs.python-requests.org/en/master/', None), - 'pandas': ('http://pandas.pydata.org/pandas-docs/stable/', None), + 'fastavro': ('https://fastavro.readthedocs.io/en/stable/', None), + 'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None), 'python': ('https://docs.python.org/3', None), }