Skip to content

Commit

Permalink
Only load fields that are needed from Google API for Big Query schema.
Browse files Browse the repository at this point in the history
  • Loading branch information
jezdez committed Apr 3, 2019
1 parent 1b142b3 commit de471f1
Showing 1 changed file with 41 additions and 15 deletions.
56 changes: 41 additions & 15 deletions redash/query_runner/big_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,26 +264,52 @@ def get_schema(self, get_stats=False):

service = self._get_bigquery_service()
project_id = self._get_project_id()
datasets = service.datasets().list(projectId=project_id).execute()
# get a list of Big Query datasets
datasets_request = service.datasets().list(
projectId=project_id,
fields="datasets/datasetReference/datasetId,nextPageToken",
)
datasets = []
while datasets_request:
# request datasets
datasets_response = datasets_request.execute()
# store results
datasets.extend(datasets_response.get('datasets', []))
# try loading next page
datasets_request = service.datasets().list_next(
datasets_request,
datasets_response,
)

schema = []
for dataset in datasets.get('datasets', []):
# load all tables for all datasets
for dataset in datasets:
dataset_id = dataset['datasetReference']['datasetId']
tables = service.tables().list(projectId=project_id, datasetId=dataset_id).execute()
while True:
for table in tables.get('tables', []):
table_data = service.tables().get(projectId=project_id,
datasetId=dataset_id,
tableId=table['tableReference']['tableId']).execute()
tables_request = service.tables().list(
projectId=project_id,
datasetId=dataset_id,
fields="tables/tableReference/tableId,nextPageToken",
)
while tables_request:
# request tables with fields above
tables_response = tables_request.execute()
for table in tables_response.get('tables', []):
# load schema for given table
table_data = service.tables().get(
projectId=project_id,
datasetId=dataset_id,
tableId=table['tableReference']['tableId'],
fields="id,schema",
).execute()
# build schema data with given table data
table_schema = self._get_columns_schema(table_data)
schema.append(table_schema)

next_token = tables.get('nextPageToken', None)
if next_token is None:
break

tables = service.tables().list(projectId=project_id,
datasetId=dataset_id,
pageToken=next_token).execute()
# try loading next page of results
tables_request = service.tables().list_next(
tables_request,
tables_response,
)

return schema

Expand Down

0 comments on commit de471f1

Please sign in to comment.