From 1b5195ed3e73b06cbb4505102a1d25bd3151a5cb Mon Sep 17 00:00:00 2001 From: Kasia Hinkson <52927664+KasiaHinkson@users.noreply.github.com> Date: Tue, 2 Apr 2024 17:29:43 -0500 Subject: [PATCH 1/5] Change dict to json --- parsons/google/google_bigquery.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parsons/google/google_bigquery.py b/parsons/google/google_bigquery.py index c020ca684e..e500d2a6be 100644 --- a/parsons/google/google_bigquery.py +++ b/parsons/google/google_bigquery.py @@ -30,7 +30,7 @@ "datetime": "DATETIME", "date": "DATE", "time": "TIME", - "dict": "RECORD", + "dict": "JSON", "NoneType": "STRING", "UUID": "STRING", "timestamp": "TIMESTAMP", From 792f15786dd567a3b334c4af2b4ff1e2efa77299 Mon Sep 17 00:00:00 2001 From: Kasia Hinkson <52927664+KasiaHinkson@users.noreply.github.com> Date: Wed, 3 Apr 2024 08:18:39 -0500 Subject: [PATCH 2/5] convert dict to json string in parsons table --- parsons/google/google_bigquery.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/parsons/google/google_bigquery.py b/parsons/google/google_bigquery.py index e500d2a6be..330f5c491d 100644 --- a/parsons/google/google_bigquery.py +++ b/parsons/google/google_bigquery.py @@ -8,6 +8,7 @@ import google import petl +import json from google.cloud import bigquery, exceptions from google.cloud.bigquery import dbapi from google.cloud.bigquery.job import LoadJobConfig @@ -1167,6 +1168,11 @@ def _get_job_config_schema( ) # if load is coming from a Parsons table, use that to generate schema if parsons_table: + # BQ can handle JSON but struggles with dictionaries that don't have + # defined schemas, so we need to convert dicts to JSON + parsons_table.convert_table( + (lambda v: json.dumps(v) if isinstance(v, dict) else v) + ) return self._generate_schema_from_parsons_table(parsons_table) return None From ae9ebdd59a733ce41ad3122e1491f4d8d84f7699 Mon Sep 17 00:00:00 2001 From: Kasia Hinkson <52927664+KasiaHinkson@users.noreply.github.com> Date: Thu, 4 Apr 2024 08:55:32 -0500 Subject: [PATCH 3/5] remove type change --- parsons/google/google_bigquery.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/parsons/google/google_bigquery.py b/parsons/google/google_bigquery.py index 330f5c491d..e500d2a6be 100644 --- a/parsons/google/google_bigquery.py +++ b/parsons/google/google_bigquery.py @@ -8,7 +8,6 @@ import google import petl -import json from google.cloud import bigquery, exceptions from google.cloud.bigquery import dbapi from google.cloud.bigquery.job import LoadJobConfig @@ -1168,11 +1167,6 @@ def _get_job_config_schema( ) # if load is coming from a Parsons table, use that to generate schema if parsons_table: - # BQ can handle JSON but struggles with dictionaries that don't have - # defined schemas, so we need to convert dicts to JSON - parsons_table.convert_table( - (lambda v: json.dumps(v) if isinstance(v, dict) else v) - ) return self._generate_schema_from_parsons_table(parsons_table) return None From 532bd5a347b1b65c2f7a56450479c3f8d21727c0 Mon Sep 17 00:00:00 2001 From: Kasia Hinkson <52927664+KasiaHinkson@users.noreply.github.com> Date: Wed, 29 May 2024 08:43:10 -0500 Subject: [PATCH 4/5] remove dict since it needs to be a JSON string --- parsons/google/google_bigquery.py | 1 - 1 file changed, 1 deletion(-) diff --git a/parsons/google/google_bigquery.py b/parsons/google/google_bigquery.py index 6405e62f54..6957ec0ec7 100644 --- a/parsons/google/google_bigquery.py +++ b/parsons/google/google_bigquery.py @@ -30,7 +30,6 @@ "datetime": "DATETIME", "date": "DATE", "time": "TIME", - "dict": "JSON", "NoneType": "STRING", "UUID": "STRING", "timestamp": "TIMESTAMP", From d9a1faa29a3d9abea48df76a3de3c7a60935cb36 Mon Sep 17 00:00:00 2001 From: Austin Weisgrau <62900254+austinweisgrau@users.noreply.github.com> Date: Thu, 11 Jul 2024 11:51:21 -0700 Subject: [PATCH 5/5] Remove support for dict type, add helpful exception message (#1068) --- parsons/google/google_bigquery.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/parsons/google/google_bigquery.py b/parsons/google/google_bigquery.py index 35c6cc591e..c21eac9ca2 100644 --- a/parsons/google/google_bigquery.py +++ b/parsons/google/google_bigquery.py @@ -1199,7 +1199,14 @@ def _generate_schema_from_parsons_table(self, tbl): if isinstance(value, datetime.datetime) and value.tzinfo: best_type = "timestamp" - field_type = self._bigquery_type(best_type) + try: + field_type = self._bigquery_type(best_type) + except KeyError as e: + raise KeyError( + "Column type not supported for load to BigQuery. " + "Consider converting to another type. " + f"[type={best_type}]" + ) from e field = bigquery.schema.SchemaField(stat["name"], field_type) fields.append(field) return fields