Added parquet_options to google_bigquery_job resource (#7873) (#14497)

Signed-off-by: Modular Magician <magic-modules@google.com>
hashicorp · May 4, 2023 · dd8ea1a · dd8ea1a
1 parent c0a1a57
commit dd8ea1a
Show file tree

Hide file tree

Showing 4 changed files with 255 additions and 0 deletions.
diff --git a/.changelog/7873.txt b/.changelog/7873.txt
@@ -0,0 +1,3 @@
+```release-note: enhancement
+bigquery: Added general field `load.parquet_options ` to `google_bigquery_job`
+```
diff --git a/google/resource_bigquery_job.go b/google/resource_bigquery_job.go
@@ -480,6 +480,30 @@ empty string is present for all data types except for STRING and BYTE. For STRIN
 an empty value.`,
 							Default: "",
 						},
+						"parquet_options": {
+							Type:        schema.TypeList,
+							Optional:    true,
+							ForceNew:    true,
+							Description: `Parquet Options for load and make external tables.`,
+							MaxItems:    1,
+							Elem: &schema.Resource{
+								Schema: map[string]*schema.Schema{
+									"enable_list_inference": {
+										Type:         schema.TypeBool,
+										Optional:     true,
+										ForceNew:     true,
+										Description:  `If sourceFormat is set to PARQUET, indicates whether to use schema inference specifically for Parquet LIST logical type.`,
+										AtLeastOneOf: []string{"load.0.parquet_options.0.enum_as_string", "load.0.parquet_options.0.enable_list_inference"},
+									},
+									"enum_as_string": {
+										Type:        schema.TypeBool,
+										Optional:    true,
+										ForceNew:    true,
+										Description: `If sourceFormat is set to PARQUET, indicates whether to infer Parquet ENUM logical type as STRING instead of BYTES by default.`,
+									},
+								},
+							},
+						},
 						"projection_fields": {
 							Type:     schema.TypeList,
 							Optional: true,
@@ -1473,6 +1497,8 @@ func flattenBigQueryJobConfigurationLoad(v interface{}, d *schema.ResourceData,
 		flattenBigQueryJobConfigurationLoadTimePartitioning(original["timePartitioning"], d, config)
 	transformed["destination_encryption_configuration"] =
 		flattenBigQueryJobConfigurationLoadDestinationEncryptionConfiguration(original["destinationEncryptionConfiguration"], d, config)
+	transformed["parquet_options"] =
+		flattenBigQueryJobConfigurationLoadParquetOptions(original["parquetOptions"], d, config)
 	return []interface{}{transformed}
 }
 func flattenBigQueryJobConfigurationLoadSourceUris(v interface{}, d *schema.ResourceData, config *transport_tpg.Config) interface{} {
@@ -1643,6 +1669,29 @@ func flattenBigQueryJobConfigurationLoadDestinationEncryptionConfiguration(v int
 
 }
 
+func flattenBigQueryJobConfigurationLoadParquetOptions(v interface{}, d *schema.ResourceData, config *transport_tpg.Config) interface{} {
+	if v == nil {
+		return nil
+	}
+	original := v.(map[string]interface{})
+	if len(original) == 0 {
+		return nil
+	}
+	transformed := make(map[string]interface{})
+	transformed["enum_as_string"] =
+		flattenBigQueryJobConfigurationLoadParquetOptionsEnumAsString(original["enumAsString"], d, config)
+	transformed["enable_list_inference"] =
+		flattenBigQueryJobConfigurationLoadParquetOptionsEnableListInference(original["enableListInference"], d, config)
+	return []interface{}{transformed}
+}
+func flattenBigQueryJobConfigurationLoadParquetOptionsEnumAsString(v interface{}, d *schema.ResourceData, config *transport_tpg.Config) interface{} {
+	return v
+}
+
+func flattenBigQueryJobConfigurationLoadParquetOptionsEnableListInference(v interface{}, d *schema.ResourceData, config *transport_tpg.Config) interface{} {
+	return v
+}
+
 func flattenBigQueryJobConfigurationCopy(v interface{}, d *schema.ResourceData, config *transport_tpg.Config) interface{} {
 	if v == nil {
 		return nil
@@ -2527,6 +2576,13 @@ func expandBigQueryJobConfigurationLoad(v interface{}, d TerraformResourceData,
 		transformed["destinationEncryptionConfiguration"] = transformedDestinationEncryptionConfiguration
 	}
 
+	transformedParquetOptions, err := expandBigQueryJobConfigurationLoadParquetOptions(original["parquet_options"], d, config)
+	if err != nil {
+		return nil, err
+	} else if val := reflect.ValueOf(transformedParquetOptions); val.IsValid() && !isEmptyValue(val) {
+		transformed["parquetOptions"] = transformedParquetOptions
+	}
+
 	return transformed, nil
 }
 
@@ -2710,6 +2766,40 @@ func expandBigQueryJobConfigurationLoadDestinationEncryptionConfigurationKmsKeyV
 	return v, nil
 }
 
+func expandBigQueryJobConfigurationLoadParquetOptions(v interface{}, d TerraformResourceData, config *transport_tpg.Config) (interface{}, error) {
+	l := v.([]interface{})
+	if len(l) == 0 || l[0] == nil {
+		return nil, nil
+	}
+	raw := l[0]
+	original := raw.(map[string]interface{})
+	transformed := make(map[string]interface{})
+
+	transformedEnumAsString, err := expandBigQueryJobConfigurationLoadParquetOptionsEnumAsString(original["enum_as_string"], d, config)
+	if err != nil {
+		return nil, err
+	} else if val := reflect.ValueOf(transformedEnumAsString); val.IsValid() && !isEmptyValue(val) {
+		transformed["enumAsString"] = transformedEnumAsString
+	}
+
+	transformedEnableListInference, err := expandBigQueryJobConfigurationLoadParquetOptionsEnableListInference(original["enable_list_inference"], d, config)
+	if err != nil {
+		return nil, err
+	} else if val := reflect.ValueOf(transformedEnableListInference); val.IsValid() && !isEmptyValue(val) {
+		transformed["enableListInference"] = transformedEnableListInference
+	}
+
+	return transformed, nil
+}
+
+func expandBigQueryJobConfigurationLoadParquetOptionsEnumAsString(v interface{}, d TerraformResourceData, config *transport_tpg.Config) (interface{}, error) {
+	return v, nil
+}
+
+func expandBigQueryJobConfigurationLoadParquetOptionsEnableListInference(v interface{}, d TerraformResourceData, config *transport_tpg.Config) (interface{}, error) {
+	return v, nil
+}
+
 func expandBigQueryJobConfigurationCopy(v interface{}, d TerraformResourceData, config *transport_tpg.Config) (interface{}, error) {
 	l := v.([]interface{})
 	if len(l) == 0 || l[0] == nil {

diff --git a/google/resource_bigquery_job_generated_test.go b/google/resource_bigquery_job_generated_test.go
@@ -311,6 +311,89 @@ resource "google_bigquery_job" "job" {
 `, context)
 }
 
+func TestAccBigQueryJob_bigqueryJobLoadParquetExample(t *testing.T) {
+	t.Parallel()
+
+	context := map[string]interface{}{
+		"random_suffix": RandString(t, 10),
+	}
+
+	VcrTest(t, resource.TestCase{
+		PreCheck:                 func() { acctest.AccTestPreCheck(t) },
+		ProtoV5ProviderFactories: ProtoV5ProviderFactories(t),
+		Steps: []resource.TestStep{
+			{
+				Config: testAccBigQueryJob_bigqueryJobLoadParquetExample(context),
+			},
+			{
+				ResourceName:            "google_bigquery_job.job",
+				ImportState:             true,
+				ImportStateVerify:       true,
+				ImportStateVerifyIgnore: []string{"etag", "status.0.state"},
+			},
+		},
+	})
+}
+
+func testAccBigQueryJob_bigqueryJobLoadParquetExample(context map[string]interface{}) string {
+	return Nprintf(`
+resource "google_storage_bucket" "test" {
+  name                        = "tf_test_job_load%{random_suffix}_bucket"
+  location                    = "US"
+  uniform_bucket_level_access = true
+}
+
+resource "google_storage_bucket_object" "test" {
+  name   =  "tf_test_job_load%{random_suffix}_bucket_object"
+  source = "./test-fixtures/bigquerytable/test.parquet.gzip"
+  bucket = google_storage_bucket.test.name
+}
+
+resource "google_bigquery_dataset" "test" {
+  dataset_id                  = "tf_test_job_load%{random_suffix}_dataset"
+  friendly_name               = "test"
+  description                 = "This is a test description"
+  location                    = "US"
+}
+
+resource "google_bigquery_table" "test" {
+  deletion_protection = false
+  table_id            = "tf_test_job_load%{random_suffix}_table"
+  dataset_id          = google_bigquery_dataset.test.dataset_id
+}
+
+resource "google_bigquery_job" "job" {
+  job_id = "tf_test_job_load%{random_suffix}"
+
+  labels = {
+    "my_job" ="load"
+  }
+
+  load {
+    source_uris = [
+      "gs://${google_storage_bucket_object.test.bucket}/${google_storage_bucket_object.test.name}"
+    ]
+
+    destination_table {
+      project_id = google_bigquery_table.test.project
+      dataset_id = google_bigquery_table.test.dataset_id
+      table_id   = google_bigquery_table.test.table_id
+    }
+
+    schema_update_options = ["ALLOW_FIELD_RELAXATION", "ALLOW_FIELD_ADDITION"]
+    write_disposition     = "WRITE_APPEND"
+    source_format         = "PARQUET"
+    autodetect            = true
+
+    parquet_options {
+      enum_as_string        = true
+      enable_list_inference = true
+    }
+  }
+}
+`, context)
+}
+
 func TestAccBigQueryJob_bigqueryJobLoadTableReferenceExample(t *testing.T) {
 	t.Parallel()
 

diff --git a/website/docs/r/bigquery_job.html.markdown b/website/docs/r/bigquery_job.html.markdown
@@ -236,6 +236,70 @@ resource "google_bigquery_job" "job" {
   depends_on = ["google_storage_bucket_object.object"]
 }
 ```
+<div class = "oics-button" style="float: right; margin: 0 0 -15px">
+  <a href="https://console.cloud.google.com/cloudshell/open?cloudshell_git_repo=https%3A%2F%2Fgithub.com%2Fterraform-google-modules%2Fdocs-examples.git&cloudshell_working_dir=bigquery_job_load_parquet&cloudshell_image=gcr.io%2Fgraphite-cloud-shell-images%2Fterraform%3Alatest&open_in_editor=main.tf&cloudshell_print=.%2Fmotd&cloudshell_tutorial=.%2Ftutorial.md" target="_blank">
+    <img alt="Open in Cloud Shell" src="//gstatic.com/cloudssh/images/open-btn.svg" style="max-height: 44px; margin: 32px auto; max-width: 100%;">
+  </a>
+</div>
+## Example Usage - Bigquery Job Load Parquet
+
+
+```hcl
+resource "google_storage_bucket" "test" {
+  name                        = "job_load_bucket"
+  location                    = "US"
+  uniform_bucket_level_access = true
+}
+
+resource "google_storage_bucket_object" "test" {
+  name   =  "job_load_bucket_object"
+  source = "./test-fixtures/bigquerytable/test.parquet.gzip"
+  bucket = google_storage_bucket.test.name
+}
+
+resource "google_bigquery_dataset" "test" {
+  dataset_id                  = "job_load_dataset"
+  friendly_name               = "test"
+  description                 = "This is a test description"
+  location                    = "US"
+}
+
+resource "google_bigquery_table" "test" {
+  deletion_protection = false
+  table_id            = "job_load_table"
+  dataset_id          = google_bigquery_dataset.test.dataset_id
+}
+
+resource "google_bigquery_job" "job" {
+  job_id = "job_load"
+
+  labels = {
+    "my_job" ="load"
+  }
+
+  load {
+    source_uris = [
+      "gs://${google_storage_bucket_object.test.bucket}/${google_storage_bucket_object.test.name}"
+    ]
+
+    destination_table {
+      project_id = google_bigquery_table.test.project
+      dataset_id = google_bigquery_table.test.dataset_id
+      table_id   = google_bigquery_table.test.table_id
+    }
+
+    schema_update_options = ["ALLOW_FIELD_RELAXATION", "ALLOW_FIELD_ADDITION"]
+    write_disposition     = "WRITE_APPEND"
+    source_format         = "PARQUET"
+    autodetect            = true
+
+    parquet_options {
+      enum_as_string        = true
+      enable_list_inference = true
+    }
+  }
+}
+```
 ## Example Usage - Bigquery Job Copy
 
 
@@ -757,6 +821,11 @@ The following arguments are supported:
   Custom encryption configuration (e.g., Cloud KMS keys)
   Structure is [documented below](#nested_destination_encryption_configuration).
 
+* `parquet_options` -
+  (Optional)
+  Parquet Options for load and make external tables.
+  Structure is [documented below](#nested_parquet_options).
+
 
 <a name="nested_destination_table"></a>The `destination_table` block supports:
 
@@ -801,6 +870,16 @@ The following arguments are supported:
   (Output)
   Describes the Cloud KMS encryption key version used to protect destination BigQuery table.
 
+<a name="nested_parquet_options"></a>The `parquet_options` block supports:
+
+* `enum_as_string` -
+  (Optional)
+  If sourceFormat is set to PARQUET, indicates whether to infer Parquet ENUM logical type as STRING instead of BYTES by default.
+
+* `enable_list_inference` -
+  (Optional)
+  If sourceFormat is set to PARQUET, indicates whether to use schema inference specifically for Parquet LIST logical type.
+
 <a name="nested_copy"></a>The `copy` block supports:
 
 * `source_tables` -