Skip to content

Commit

Permalink
adds to_dataframe() to QueryJob
Browse files Browse the repository at this point in the history
  • Loading branch information
alixhami committed Nov 7, 2017
1 parent 1ed56fd commit edecb93
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 0 deletions.
9 changes: 9 additions & 0 deletions bigquery/google/cloud/bigquery/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -1949,6 +1949,15 @@ def result(self, timeout=None, retry=DEFAULT_RETRY):
return self._client.list_rows(dest_table, selected_fields=schema,
retry=retry)

def to_dataframe(self):
import pandas as pd

iterator = self.result()
column_headers = [field.name for field in iterator.schema]
rows = [row.values() for row in iterator]

return pd.DataFrame(rows, columns=column_headers)


class QueryPlanEntryStep(object):
"""Map a single step in a query plan entry.
Expand Down
17 changes: 17 additions & 0 deletions bigquery/tests/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -1235,6 +1235,23 @@ def test_query_future(self):
row_tuples = [r.values() for r in iterator]
self.assertEqual(row_tuples, [(1,)])

def test_query_to_dataframe(self):
import pandas as pd

query = """
SELECT corpus AS title, COUNT(*) AS unique_words
FROM `bigquery-public-data.samples.shakespeare`
GROUP BY title
ORDER BY unique_words DESC
LIMIT 10"""

query_job = Config.CLIENT.query(query)
df = query_job.to_dataframe()

self.assertIsInstance(df, pd.DataFrame)
self.assertEqual(list(df), ['title', 'unique_words'])
self.assertEqual(len(df), 10)

def test_query_table_def(self):
gs_url = self._write_csv_to_storage(
'bq_external_test' + unique_resource_id(), 'person_ages.csv',
Expand Down
64 changes: 64 additions & 0 deletions bigquery/tests/unit/test_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -2720,6 +2720,70 @@ def test_reload_w_alternate_client(self):
self.assertEqual(req['path'], PATH)
self._verifyResourceProperties(job, RESOURCE)

def test_to_dataframe(self):
import pandas as pd

begun_resource = self._makeResource()
query_resource = {
'jobComplete': True,
'jobReference': {
'projectId': self.PROJECT,
'jobId': self.JOB_ID,
},
'schema': {
'fields': [
{'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE'},
{'name': 'age', 'type': 'INTEGER', 'mode': 'NULLABLE'},
],
},
'rows': [
{'f': [{'v': 'Phred Phlyntstone'}, {'v': '32'}]},
{'f': [{'v': 'Bharney Rhubble'}, {'v': '33'}]},
{'f': [{'v': 'Wylma Phlyntstone'}, {'v': '29'}]},
{'f': [{'v': 'Bhettye Rhubble'}, {'v': '27'}]},
],
}
done_resource = copy.deepcopy(begun_resource)
done_resource['status'] = {'state': 'DONE'}
connection = _Connection(
begun_resource, query_resource, done_resource, query_resource)
client = _make_client(project=self.PROJECT, connection=connection)
job = self._make_one(self.JOB_ID, self.QUERY, client)
df = job.to_dataframe()

self.assertIsInstance(df, pd.DataFrame)
self.assertEqual(len(df), 4)
self.assertEqual(list(df), ['name', 'age'])

def test_to_dataframe_w_empty_results(self):
import pandas as pd

begun_resource = self._makeResource()
query_resource = {
'jobComplete': True,
'jobReference': {
'projectId': self.PROJECT,
'jobId': self.JOB_ID,
},
'schema': {
'fields': [
{'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE'},
{'name': 'age', 'type': 'INTEGER', 'mode': 'NULLABLE'},
],
},
}
done_resource = copy.deepcopy(begun_resource)
done_resource['status'] = {'state': 'DONE'}
connection = _Connection(
begun_resource, query_resource, done_resource, query_resource)
client = _make_client(project=self.PROJECT, connection=connection)
job = self._make_one(self.JOB_ID, self.QUERY, client)
df = job.to_dataframe()

self.assertIsInstance(df, pd.DataFrame)
self.assertEqual(len(df), 0)
self.assertEqual(list(df), ['name', 'age'])


class TestQueryPlanEntryStep(unittest.TestCase, _Base):
KIND = 'KIND'
Expand Down

0 comments on commit edecb93

Please sign in to comment.