From c05bc7a0b791fe6f1a7534aaa5b0c72e0e1bd984 Mon Sep 17 00:00:00 2001
From: bradmiro <bmiro@google.com>
Date: Mon, 18 Sep 2023 15:54:26 -0400
Subject: [PATCH 01/20] changing dataplex tables to managed by default,
 removing manual 'upgrade' logic

---
 dataplex.tf                 | 10 +++--
 src/yaml/project-setup.yaml | 84 -------------------------------------
 2 files changed, 6 insertions(+), 88 deletions(-)

diff --git a/dataplex.tf b/dataplex.tf
index 958eb3e..69e375d 100644
--- a/dataplex.tf
+++ b/dataplex.tf
@@ -114,8 +114,9 @@ resource "google_dataplex_asset" "gcp_primary_textocr" {
   }
 
   resource_spec {
-    name = "projects/${module.project-services.project_id}/buckets/${google_storage_bucket.textocr_images_bucket.name}"
-    type = "STORAGE_BUCKET"
+    name             = "projects/${module.project-services.project_id}/buckets/${google_storage_bucket.textocr_images_bucket.name}"
+    type             = "STORAGE_BUCKET"
+    read_access_mode = "MANAGED"
   }
 
   project    = module.project-services.project_id
@@ -136,8 +137,9 @@ resource "google_dataplex_asset" "gcp_primary_ga4_obfuscated_sample_ecommerce" {
   }
 
   resource_spec {
-    name = "projects/${module.project-services.project_id}/buckets/${google_storage_bucket.ga4_images_bucket.name}"
-    type = "STORAGE_BUCKET"
+    name             = "projects/${module.project-services.project_id}/buckets/${google_storage_bucket.ga4_images_bucket.name}"
+    type             = "STORAGE_BUCKET"
+    read_access_mode = "MANAGED"
   }
 
   project    = module.project-services.project_id
diff --git a/src/yaml/project-setup.yaml b/src/yaml/project-setup.yaml
index 935cd2d..7bb702e 100644
--- a/src/yaml/project-setup.yaml
+++ b/src/yaml/project-setup.yaml
@@ -33,9 +33,6 @@ main:
                 - dataproc_service_account_name: ${dataproc_service_account}
                 - provisioner_bucket_name: ${provisioner_bucket}
                 - warehouse_bucket_name: ${warehouse_bucket}
-        - sub_upgrade_dataplex_assets:
-            call: upgrade_dataplex_assets
-            result: upgrade_dataplex_assets_output
         # TODO: change this to poll for BigQuery table creation
         - sub_wait_for_dataplex_discovery:
             call: sys.sleep
@@ -56,87 +53,6 @@ main:
             call: create_taxonomy
             result: create_taxonomy_output
 
-# Subworkflow to upgrade all Dataplex Assets to Managed
-# Subworkflow gets all lakes, then all zones within each lake, then all assets within each zone and upgrades
-upgrade_dataplex_assets:
-    steps:
-        - init:
-            assign:
-                - project_id: $${sys.get_env("GOOGLE_CLOUD_PROJECT_ID")}
-                - location: $${sys.get_env("GOOGLE_CLOUD_LOCATION")}
-                - zones: []
-        - get_lakes:
-            call: http.get
-            args:
-                url: $${"https://dataplex.googleapis.com/v1/projects/"+project_id+"/locations/"+location+"/lakes"}
-                auth:
-                    type: OAuth2
-            result: Response
-        - assign_lakes:
-            assign:
-                - response_lakes: $${Response.body.lakes}
-        - get_zones:
-            for:
-                value: lake
-                index: i
-                in: $${response_lakes}
-                steps:
-                  - get_zones_in_lake:
-                      call: http.get
-                      args:
-                          url: $${"https://dataplex.googleapis.com/v1/"+lake.name+"/zones"}
-                          auth:
-                              type: OAuth2
-                      result: Response
-                  - assign_zones:
-                      assign:
-                          - response_zones: $${Response.body.zones}
-
-                  - save_zones:
-                        for:
-                          value: zone
-                          index: j
-                          in: $${response_zones}
-                          steps:
-                              - save_to_list:
-                                  assign:
-                                    - zones: $${list.concat(zones, zone)}
-        - get_and_upgrade_all_assets:
-            for:
-                value: zone
-                index: i
-                in: $${zones}
-                steps:
-                  - get_assets_in_zone:
-                      call: http.get
-                      args:
-                          url: $${"https://dataplex.googleapis.com/v1/"+zone.name+"/assets"}
-                          auth:
-                              type: OAuth2
-                      result: Response
-                  - check_for_assets:
-                      switch:
-                        - condition: $${not("assets" in Response.body)}
-                          next: continue
-                  - assign_assets:
-                      assign:
-                          - response_assets: $${Response.body.assets}
-                  - upgrade_all_assets:
-                        for:
-                            value: asset
-                            index: j
-                            in: $${response_assets}
-                            steps:
-                              - upgrade_asset:
-                                  call: http.patch
-                                  args:
-                                      url: $${"https://dataplex.googleapis.com/v1/"+asset.name+"?updateMask=resourceSpec.readAccessMode"}
-                                      auth:
-                                          type: OAuth2
-                                      body:
-                                          resourceSpec:
-                                              readAccessMode: "MANAGED"
-
 # Subworkflow to create BigQuery views
 create_tables:
     steps:

From 308f2dc6b178eeb53b735844722ce0603fabc5f9 Mon Sep 17 00:00:00 2001
From: bradmiro <bmiro@google.com>
Date: Mon, 18 Sep 2023 16:14:26 -0400
Subject: [PATCH 02/20] scrubbing unnecessary explicit 'depends_on'

---
 dataplex.tf  |  6 +++---
 dataproc.tf  | 16 ----------------
 workflows.tf |  2 --
 3 files changed, 3 insertions(+), 21 deletions(-)

diff --git a/dataplex.tf b/dataplex.tf
index 69e375d..6e3e121 100644
--- a/dataplex.tf
+++ b/dataplex.tf
@@ -160,13 +160,13 @@ resource "google_dataplex_asset" "gcp_primary_tables" {
   }
 
   resource_spec {
-    name = "projects/${module.project-services.project_id}/buckets/${google_storage_bucket.tables_bucket.name}"
-    type = "STORAGE_BUCKET"
+    name             = "projects/${module.project-services.project_id}/buckets/${google_storage_bucket.tables_bucket.name}"
+    type             = "STORAGE_BUCKET"
+    read_access_mode = "MANAGED"
   }
 
   project    = module.project-services.project_id
   depends_on = [time_sleep.wait_after_all_resources, google_project_iam_member.dataplex_bucket_access]
-
 }
 
 
diff --git a/dataproc.tf b/dataproc.tf
index 91609b0..d51e62b 100644
--- a/dataproc.tf
+++ b/dataproc.tf
@@ -31,10 +31,6 @@ resource "google_compute_subnetwork" "subnet" {
   region                   = var.region
   network                  = google_compute_network.default_network.id
   private_ip_google_access = true
-
-  depends_on = [
-    google_compute_network.default_network,
-  ]
 }
 
 # Firewall rule for dataproc cluster
@@ -83,10 +79,6 @@ resource "google_project_iam_member" "dataproc_sa_roles" {
   project = module.project-services.project_id
   role    = each.key
   member  = "serviceAccount:${google_service_account.dataproc_service_account.email}"
-
-  depends_on = [
-    google_service_account.dataproc_service_account
-  ]
 }
 
 # # Create a BigQuery connection
@@ -103,10 +95,6 @@ resource "google_project_iam_member" "bq_connection_iam_object_viewer" {
   project = module.project-services.project_id
   role    = "roles/storage.objectViewer"
   member  = "serviceAccount:${google_bigquery_connection.ds_connection.cloud_resource[0].service_account_id}"
-
-  depends_on = [
-    google_bigquery_connection.ds_connection
-  ]
 }
 
 # # Grant IAM access to the BigQuery Connection account for BigLake Metastore
@@ -114,10 +102,6 @@ resource "google_project_iam_member" "bq_connection_iam_biglake" {
   project = module.project-services.project_id
   role    = "roles/biglake.admin"
   member  = "serviceAccount:${google_bigquery_connection.ds_connection.cloud_resource[0].service_account_id}"
-
-  depends_on = [
-    google_bigquery_connection.ds_connection
-  ]
 }
 
 # # Create a BigQuery external table.
diff --git a/workflows.tf b/workflows.tf
index ec50d71..75316fd 100644
--- a/workflows.tf
+++ b/workflows.tf
@@ -120,7 +120,6 @@ data "http" "call_workflows_copy_data" {
     Accept = "application/json"
   Authorization = "Bearer ${data.google_client_config.current.access_token}" }
   depends_on = [
-    google_workflows_workflow.copy_data,
     google_storage_bucket.textocr_images_bucket,
     google_storage_bucket.ga4_images_bucket,
     google_storage_bucket.tables_bucket
@@ -135,7 +134,6 @@ data "http" "call_workflows_project_setup" {
     Accept = "application/json"
   Authorization = "Bearer ${data.google_client_config.current.access_token}" }
   depends_on = [
-    google_workflows_workflow.project_setup,
     google_dataplex_asset.gcp_primary_textocr,
     google_dataplex_asset.gcp_primary_ga4_obfuscated_sample_ecommerce,
     google_dataplex_asset.gcp_primary_tables

From 752e109b86377e16fcb525d8338fd63b5c101bb6 Mon Sep 17 00:00:00 2001
From: bradmiro <bmiro@google.com>
Date: Mon, 18 Sep 2023 16:39:32 -0400
Subject: [PATCH 03/20] remove creating bq external table

---
 dataplex.tf |  15 ++++----
 dataproc.tf | 100 +---------------------------------------------------
 2 files changed, 8 insertions(+), 107 deletions(-)

diff --git a/dataplex.tf b/dataplex.tf
index 6e3e121..db72c2e 100644
--- a/dataplex.tf
+++ b/dataplex.tf
@@ -20,6 +20,13 @@ resource "google_project_service_identity" "dataplex_sa" {
   service  = "dataplex.googleapis.com"
 }
 
+#give dataplex access to biglake bucket
+resource "google_project_iam_member" "dataplex_bucket_access" {
+  project = module.project-services.project_id
+  role    = "roles/dataplex.serviceAgent"
+  member  = "serviceAccount:${google_project_service_identity.dataplex_sa.email}"
+}
+
 resource "google_dataplex_lake" "gcp_primary" {
   location     = var.region
   name         = "gcp-primary-lake"
@@ -168,11 +175,3 @@ resource "google_dataplex_asset" "gcp_primary_tables" {
   project    = module.project-services.project_id
   depends_on = [time_sleep.wait_after_all_resources, google_project_iam_member.dataplex_bucket_access]
 }
-
-
-#give dataplex access to biglake bucket
-resource "google_project_iam_member" "dataplex_bucket_access" {
-  project = module.project-services.project_id
-  role    = "roles/dataplex.serviceAgent"
-  member  = "serviceAccount:${google_project_service_identity.dataplex_sa.email}"
-}
diff --git a/dataproc.tf b/dataproc.tf
index d51e62b..323eed7 100644
--- a/dataproc.tf
+++ b/dataproc.tf
@@ -102,102 +102,4 @@ resource "google_project_iam_member" "bq_connection_iam_biglake" {
   project = module.project-services.project_id
   role    = "roles/biglake.admin"
   member  = "serviceAccount:${google_bigquery_connection.ds_connection.cloud_resource[0].service_account_id}"
-}
-
-# # Create a BigQuery external table.
-resource "google_bigquery_table" "tbl_thelook_events" {
-  dataset_id          = google_bigquery_dataset.gcp_lakehouse_ds.dataset_id
-  table_id            = "gcp_tbl_events"
-  project             = module.project-services.project_id
-  deletion_protection = var.deletion_protection
-
-  external_data_configuration {
-    autodetect    = true
-    connection_id = google_bigquery_connection.ds_connection.name #TODO: Change other solutions to remove hardcoded reference
-    source_format = "PARQUET"
-    source_uris   = ["gs://${var.public_data_bucket}/thelook_ecommerce/events-*.Parquet"]
-
-  }
-
-  schema = <<EOF
-[
-  {
-    "name": "id",
-    "type": "INTEGER",
-    "mode": "NULLABLE",
-    "description": ""
-  },
-  {
-    "name": "user_id",
-    "type": "INTEGER",
-    "mode": "NULLABLE",
-    "description": ""
-  },
-  {
-    "name": "sequence_number",
-    "type": "INTEGER",
-    "mode": "NULLABLE",
-    "description": ""
-  },
-  {
-    "name": "session_id",
-    "type": "STRING",
-    "mode": "NULLABLE",
-    "description": ""
-  },
-  {
-    "name": "created_at",
-    "type": "TIMESTAMP",
-    "mode": "NULLABLE",
-    "description": ""
-  },
-  {
-    "name": "ip_address",
-    "type": "STRING",
-    "mode": "NULLABLE",
-    "description": ""
-  },
-  {
-    "name": "city",
-    "type": "STRING",
-    "mode": "NULLABLE",
-    "description": ""
-  },
-  {
-    "name": "postal_code",
-    "type": "STRING",
-    "mode": "NULLABLE",
-    "description": ""
-  },
-  {
-    "name": "browser",
-    "type": "STRING",
-    "mode": "NULLABLE",
-    "description": ""
-  },
-  {
-    "name": "traffic_source",
-    "type": "STRING",
-    "mode": "NULLABLE",
-    "description": ""
-  },
-  {
-    "name": "uri",
-    "type": "STRING",
-    "mode": "NULLABLE",
-    "description": ""
-  },
-  {
-    "name": "event_type",
-    "type": "STRING",
-    "mode": "NULLABLE",
-    "description": ""
-  }
-]
-EOF
-
-  depends_on = [
-    google_bigquery_connection.ds_connection,
-    google_storage_bucket.raw_bucket
-  ]
-}
+}
\ No newline at end of file

From c4e2b7afd2fba10b321bf77487769d3a13b9fb60 Mon Sep 17 00:00:00 2001
From: bradmiro <bmiro@google.com>
Date: Mon, 18 Sep 2023 16:47:20 -0400
Subject: [PATCH 04/20] adding options for stage and nonstage table prefix

---
 src/bigquery.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/bigquery.py b/src/bigquery.py
index 3ad9e09..bade034 100644
--- a/src/bigquery.py
+++ b/src/bigquery.py
@@ -44,9 +44,14 @@
 
 
 # Load data from BigQuery.
-events = spark.read.format("bigquery") \
-    .option("table", "gcp_primary_staging.stage_thelook_ecommerce_events") \
-    .load()
+try:
+    events = spark.read.format("bigquery") \
+        .option("table", "gcp_primary_staging.thelook_ecommerce_events") \
+        .load()
+except:
+    events = spark.read.format("bigquery") \
+        .option("table", "gcp_primary_staging.stage_thelook_ecommerce_events") \
+        .load()
 events.createOrReplaceTempView("events")
 
 # Create Iceberg Table if not exists

From 642073fcd4d1e25bee787b34499ed4631524cc1b Mon Sep 17 00:00:00 2001
From: bradmiro <bmiro@google.com>
Date: Thu, 21 Sep 2023 18:34:14 -0400
Subject: [PATCH 05/20] remove wait_after_all_resources and move dependencies
 to more appropriate spots

---
 dataplex.tf  |  6 +++---
 dataproc.tf  |  4 ++++
 main.tf      | 18 ------------------
 workflows.tf | 23 +++++++++++++++++++++--
 4 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/dataplex.tf b/dataplex.tf
index db72c2e..07c41df 100644
--- a/dataplex.tf
+++ b/dataplex.tf
@@ -127,7 +127,7 @@ resource "google_dataplex_asset" "gcp_primary_textocr" {
   }
 
   project    = module.project-services.project_id
-  depends_on = [time_sleep.wait_after_all_resources, google_project_iam_member.dataplex_bucket_access]
+  depends_on = [time_sleep.wait_after_copy_data, google_project_iam_member.dataplex_bucket_access]
 
 }
 
@@ -150,7 +150,7 @@ resource "google_dataplex_asset" "gcp_primary_ga4_obfuscated_sample_ecommerce" {
   }
 
   project    = module.project-services.project_id
-  depends_on = [time_sleep.wait_after_all_resources, google_project_iam_member.dataplex_bucket_access]
+  depends_on = [time_sleep.wait_after_copy_data, google_project_iam_member.dataplex_bucket_access]
 
 }
 
@@ -173,5 +173,5 @@ resource "google_dataplex_asset" "gcp_primary_tables" {
   }
 
   project    = module.project-services.project_id
-  depends_on = [time_sleep.wait_after_all_resources, google_project_iam_member.dataplex_bucket_access]
+  depends_on = [time_sleep.wait_after_copy_data, google_project_iam_member.dataplex_bucket_access]
 }
diff --git a/dataproc.tf b/dataproc.tf
index 323eed7..8075d04 100644
--- a/dataproc.tf
+++ b/dataproc.tf
@@ -102,4 +102,8 @@ resource "google_project_iam_member" "bq_connection_iam_biglake" {
   project = module.project-services.project_id
   role    = "roles/biglake.admin"
   member  = "serviceAccount:${google_bigquery_connection.ds_connection.cloud_resource[0].service_account_id}"
+}
+
+resource "google_dataproc_cluster" "phs" {
+
 }
\ No newline at end of file
diff --git a/main.tf b/main.tf
index 26cc876..01d2d81 100644
--- a/main.tf
+++ b/main.tf
@@ -161,21 +161,3 @@ resource "google_storage_bucket" "dataplex_bucket" {
   uniform_bucket_level_access = true
   force_destroy               = var.force_destroy
 }
-
-# Resources are dependent on one another. We will ensure the following set of resources are created before proceeding.
-resource "time_sleep" "wait_after_all_resources" {
-  create_duration = "120s"
-  depends_on = [
-    module.project-services,
-    google_storage_bucket.provisioning_bucket,
-    google_bigquery_dataset.gcp_lakehouse_ds,
-    google_bigquery_connection.gcp_lakehouse_connection,
-    google_project_iam_member.connectionPermissionGrant,
-    google_workflows_workflow.project_setup,
-    google_dataplex_zone.gcp_primary_raw,
-    google_dataplex_zone.gcp_primary_staging,
-    google_dataplex_zone.gcp_primary_curated_bi,
-    data.google_storage_project_service_account.gcs_account,
-    data.http.call_workflows_copy_data
-  ]
-}
diff --git a/workflows.tf b/workflows.tf
index 75316fd..910f9a0 100644
--- a/workflows.tf
+++ b/workflows.tf
@@ -126,7 +126,15 @@ data "http" "call_workflows_copy_data" {
   ]
 }
 
-# # execute the other project setup workflow
+resource "time_sleep" "wait_after_copy_data" {
+  create_duration = "30s"
+  depends_on = [
+    data.google_storage_project_service_account.gcs_account,
+    data.http.call_workflows_copy_data
+  ]
+}
+
+# execute the other project setup workflow
 data "http" "call_workflows_project_setup" {
   url    = "https://workflowexecutions.googleapis.com/v1/projects/${module.project-services.project_id}/locations/${var.region}/workflows/${google_workflows_workflow.project_setup.name}/executions"
   method = "POST"
@@ -134,9 +142,20 @@ data "http" "call_workflows_project_setup" {
     Accept = "application/json"
   Authorization = "Bearer ${data.google_client_config.current.access_token}" }
   depends_on = [
+    google_storage_bucket.temp_bucket,
+    google_storage_bucket.provisioning_bucket,
+    google_storage_bucket.warehouse_bucket,
+    google_storage_bucket.dataproc_service_account,
+    google_bigquery_dataset.gcp_lakehouse_ds,
+    google_bigquery_connection.gcp_lakehouse_connection,
     google_dataplex_asset.gcp_primary_textocr,
     google_dataplex_asset.gcp_primary_ga4_obfuscated_sample_ecommerce,
-    google_dataplex_asset.gcp_primary_tables
+    google_dataplex_asset.gcp_primary_tables,
+    google_project_iam_member.connectionPermissionGrant,
+    google_project_iam_member.dataproc_sa_roles,
+
+    google_project_iam_member.connectionPermissionGrant,
+    time_sleep.wait_after_copy_data
   ]
 }
 

From e83e8d9396604ae3cf2e37cc669c7be0dd2825f5 Mon Sep 17 00:00:00 2001
From: bradmiro <bmiro@google.com>
Date: Fri, 22 Sep 2023 15:46:34 -0400
Subject: [PATCH 06/20] fixes

---
 dataproc.tf  |  4 ----
 workflows.tf | 13 ++++++-------
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/dataproc.tf b/dataproc.tf
index 8075d04..fd86315 100644
--- a/dataproc.tf
+++ b/dataproc.tf
@@ -103,7 +103,3 @@ resource "google_project_iam_member" "bq_connection_iam_biglake" {
   role    = "roles/biglake.admin"
   member  = "serviceAccount:${google_bigquery_connection.ds_connection.cloud_resource[0].service_account_id}"
 }
-
-resource "google_dataproc_cluster" "phs" {
-
-}
\ No newline at end of file
diff --git a/workflows.tf b/workflows.tf
index 910f9a0..3b25bf9 100644
--- a/workflows.tf
+++ b/workflows.tf
@@ -142,19 +142,18 @@ data "http" "call_workflows_project_setup" {
     Accept = "application/json"
   Authorization = "Bearer ${data.google_client_config.current.access_token}" }
   depends_on = [
-    google_storage_bucket.temp_bucket,
-    google_storage_bucket.provisioning_bucket,
-    google_storage_bucket.warehouse_bucket,
-    google_storage_bucket.dataproc_service_account,
     google_bigquery_dataset.gcp_lakehouse_ds,
     google_bigquery_connection.gcp_lakehouse_connection,
-    google_dataplex_asset.gcp_primary_textocr,
     google_dataplex_asset.gcp_primary_ga4_obfuscated_sample_ecommerce,
     google_dataplex_asset.gcp_primary_tables,
+    google_dataplex_asset.gcp_primary_textocr,
     google_project_iam_member.connectionPermissionGrant,
-    google_project_iam_member.dataproc_sa_roles,
-
     google_project_iam_member.connectionPermissionGrant,
+    google_project_iam_member.dataproc_sa_roles,
+    google_storage_bucket.temp_bucket,
+    google_storage_bucket.provisioning_bucket,
+    google_storage_bucket.warehouse_bucket,
+    google_storage_bucket.dataproc_service_account,
     time_sleep.wait_after_copy_data
   ]
 }

From 869d5aeabc9e9904839f0bef3845260270462414 Mon Sep 17 00:00:00 2001
From: bradmiro <bmiro@google.com>
Date: Tue, 26 Sep 2023 11:42:31 -0400
Subject: [PATCH 07/20] updating terraform version

---
 versions.tf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/versions.tf b/versions.tf
index d9e3288..95afc51 100644
--- a/versions.tf
+++ b/versions.tf
@@ -18,7 +18,7 @@ terraform {
   required_providers {
     google = {
       source  = "hashicorp/google"
-      version = "<= 4.69.0, != 4.65.0, != 4.65.1"
+      version = ">= 4.83.0, <= 4.89.0"
     }
     google-beta = {
       source  = "hashicorp/google-beta"

From c14106da8c789d7b93abe3161077a38a38b1e455 Mon Sep 17 00:00:00 2001
From: bradmiro <bmiro@google.com>
Date: Tue, 26 Sep 2023 12:23:21 -0400
Subject: [PATCH 08/20] removing temporary bucket

---
 src/bigquery.py | 4 ++--
 workflows.tf    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/bigquery.py b/src/bigquery.py
index bade034..69d81d3 100644
--- a/src/bigquery.py
+++ b/src/bigquery.py
@@ -25,14 +25,14 @@
 
 catalog = os.getenv("lakehouse_catalog", "lakehouse_catalog")
 database = os.getenv("lakehouse_db", "lakehouse_db")
-bucket = os.getenv("temp_bucket", "gcp-lakehouse-provisioner-8a68acad")
+# bucket = os.getenv("temp_bucket", "gcp-lakehouse-provisioner-8a68acad")
 bq_dataset = os.getenv("bq_dataset", "gcp_lakehouse_ds")
 bq_connection = os.getenv("bq_gcs_connection",
                           "us-central1.gcp_gcs_connection")
 
 # Use the Cloud Storage bucket for temporary BigQuery export data
 # used by the connector.
-spark.conf.set("temporaryGcsBucket", bucket)
+# spark.conf.set("temporaryGcsBucket", bucket)
 
 # Delete the BigLake Catalog if it currently exists to ensure proper setup.
 spark.sql(f"DROP NAMESPACE IF EXISTS {catalog} CASCADE;")
diff --git a/workflows.tf b/workflows.tf
index 3b25bf9..a3c9948 100644
--- a/workflows.tf
+++ b/workflows.tf
@@ -150,7 +150,7 @@ data "http" "call_workflows_project_setup" {
     google_project_iam_member.connectionPermissionGrant,
     google_project_iam_member.connectionPermissionGrant,
     google_project_iam_member.dataproc_sa_roles,
-    google_storage_bucket.temp_bucket,
+    # google_storage_bucket.temp_bucket,
     google_storage_bucket.provisioning_bucket,
     google_storage_bucket.warehouse_bucket,
     google_storage_bucket.dataproc_service_account,

From 0865d5879fa0787e06b0afb51a50be8e07f571c2 Mon Sep 17 00:00:00 2001
From: bradmiro <bmiro@google.com>
Date: Tue, 26 Sep 2023 12:28:45 -0400
Subject: [PATCH 09/20] lint fixes

---
 workflows.tf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflows.tf b/workflows.tf
index a3c9948..e121169 100644
--- a/workflows.tf
+++ b/workflows.tf
@@ -150,10 +150,10 @@ data "http" "call_workflows_project_setup" {
     google_project_iam_member.connectionPermissionGrant,
     google_project_iam_member.connectionPermissionGrant,
     google_project_iam_member.dataproc_sa_roles,
+    google_service_account.dataproc_service_account,
     # google_storage_bucket.temp_bucket,
     google_storage_bucket.provisioning_bucket,
     google_storage_bucket.warehouse_bucket,
-    google_storage_bucket.dataproc_service_account,
     time_sleep.wait_after_copy_data
   ]
 }

From 23b335cfa13e832ce1095517c7dd1bb95645a971 Mon Sep 17 00:00:00 2001
From: bradmiro <bmiro@google.com>
Date: Tue, 26 Sep 2023 15:18:24 -0400
Subject: [PATCH 10/20] lint cleanup

---
 src/bigquery.py |  4 +++-
 variables.tf    | 12 ------------
 2 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/src/bigquery.py b/src/bigquery.py
index 69d81d3..62d33c1 100644
--- a/src/bigquery.py
+++ b/src/bigquery.py
@@ -14,6 +14,8 @@
 # limitations under the License.
 
 """BigQuery I/O with BigLake Iceberg PySpark example."""
+from py4j.protocol import Py4JJavaError
+
 from pyspark.sql import SparkSession
 import os
 
@@ -48,7 +50,7 @@
     events = spark.read.format("bigquery") \
         .option("table", "gcp_primary_staging.thelook_ecommerce_events") \
         .load()
-except:
+except Py4JJavaError:
     events = spark.read.format("bigquery") \
         .option("table", "gcp_primary_staging.stage_thelook_ecommerce_events") \
         .load()
diff --git a/variables.tf b/variables.tf
index d48efd1..21b8177 100644
--- a/variables.tf
+++ b/variables.tf
@@ -48,20 +48,8 @@ variable "force_destroy" {
   default     = false
 }
 
-variable "deletion_protection" {
-  type        = string
-  description = "Whether or not to protect GCS resources from deletion when solution is modified or changed."
-  default     = true
-}
-
 variable "use_case_short" {
   type        = string
   description = "Short name for use case"
   default     = "lakehouse"
 }
-
-variable "public_data_bucket" {
-  type        = string
-  description = "Public Data bucket for access"
-  default     = "data-analytics-demos"
-}

From af117fd893c0a31e5afd6553c11fce456dc38c73 Mon Sep 17 00:00:00 2001
From: bradmiro <bmiro@google.com>
Date: Tue, 26 Sep 2023 15:42:55 -0400
Subject: [PATCH 11/20] remove 'delete_protection' from example

---
 examples/analytics_lakehouse/main.tf | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/examples/analytics_lakehouse/main.tf b/examples/analytics_lakehouse/main.tf
index bd65a98..057e705 100644
--- a/examples/analytics_lakehouse/main.tf
+++ b/examples/analytics_lakehouse/main.tf
@@ -17,9 +17,8 @@
 module "analytics_lakehouse" {
   source = "../.."
 
-  project_id          = var.project_id
-  region              = "us-central1"
-  deletion_protection = false
-  force_destroy       = true
+  project_id    = var.project_id
+  region        = "us-central1"
+  force_destroy = true
 
 }

From 9c9b1ec49c9d75c464fb2de45ef19bea19893edd Mon Sep 17 00:00:00 2001
From: bradmiro <bmiro@google.com>
Date: Wed, 4 Oct 2023 12:24:33 -0400
Subject: [PATCH 12/20] resolving merge conflict

---
 examples/analytics_lakehouse/main.tf | 7 ++++---
 src/bigquery.py                      | 3 ++-
 variables.tf                         | 6 ++++++
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/examples/analytics_lakehouse/main.tf b/examples/analytics_lakehouse/main.tf
index 057e705..bd65a98 100644
--- a/examples/analytics_lakehouse/main.tf
+++ b/examples/analytics_lakehouse/main.tf
@@ -17,8 +17,9 @@
 module "analytics_lakehouse" {
   source = "../.."
 
-  project_id    = var.project_id
-  region        = "us-central1"
-  force_destroy = true
+  project_id          = var.project_id
+  region              = "us-central1"
+  deletion_protection = false
+  force_destroy       = true
 
 }
diff --git a/src/bigquery.py b/src/bigquery.py
index 62d33c1..11b62d4 100644
--- a/src/bigquery.py
+++ b/src/bigquery.py
@@ -52,7 +52,8 @@
         .load()
 except Py4JJavaError:
     events = spark.read.format("bigquery") \
-        .option("table", "gcp_primary_staging.stage_thelook_ecommerce_events") \
+        .option("table",
+                "gcp_primary_staging.stage_thelook_ecommerce_events") \
         .load()
 events.createOrReplaceTempView("events")
 
diff --git a/variables.tf b/variables.tf
index 21b8177..169f26e 100644
--- a/variables.tf
+++ b/variables.tf
@@ -48,6 +48,12 @@ variable "force_destroy" {
   default     = false
 }
 
+variable "deletion_protection" {
+  type        = string
+  description = "Whether or not to protect GCS resources from deletion when solution is modified or changed."
+  default     = true
+}
+
 variable "use_case_short" {
   type        = string
   description = "Short name for use case"

From 28ec8c713f2991a3e66b4e42866dbffa0d47a1e3 Mon Sep 17 00:00:00 2001
From: bradmiro <bmiro@google.com>
Date: Wed, 27 Sep 2023 17:20:11 -0400
Subject: [PATCH 13/20] removing delete_protection, fixing docstrings, adding
 public_data_bucket to workflow

---
 README.md               |  1 -
 bigquery.tf             |  2 --
 src/yaml/copy-data.yaml |  2 +-
 variables.tf            | 14 +++++++-------
 versions.tf             |  2 +-
 workflows.tf            |  1 +
 6 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index e23acde..8168ee6 100644
--- a/README.md
+++ b/README.md
@@ -41,7 +41,6 @@ Functional examples are included in the
 
 | Name | Description | Type | Default | Required |
 |------|-------------|------|---------|:--------:|
-| deletion\_protection | Whether or not to protect GCS resources from deletion when solution is modified or changed. | `string` | `true` | no |
 | enable\_apis | Whether or not to enable underlying apis in this solution. . | `string` | `true` | no |
 | force\_destroy | Whether or not to protect BigQuery resources from deletion when solution is modified or changed. | `string` | `false` | no |
 | labels | A map of labels to apply to contained resources. | `map(string)` | <pre>{<br>  "analytics-lakehouse": true<br>}</pre> | no |
diff --git a/bigquery.tf b/bigquery.tf
index cd2a34d..dd86d63 100644
--- a/bigquery.tf
+++ b/bigquery.tf
@@ -26,8 +26,6 @@ resource "google_bigquery_dataset" "gcp_lakehouse_ds" {
   delete_contents_on_destroy = var.force_destroy
 }
 
-
-
 # # Create a BigQuery connection
 resource "google_bigquery_connection" "gcp_lakehouse_connection" {
   project       = module.project-services.project_id
diff --git a/src/yaml/copy-data.yaml b/src/yaml/copy-data.yaml
index 9901d7e..75dd0a0 100644
--- a/src/yaml/copy-data.yaml
+++ b/src/yaml/copy-data.yaml
@@ -18,7 +18,7 @@ main:
         - init:
             # Define local variables from terraform env variables
             assign:
-                - source_bucket_name: "data-analytics-demos"
+                - source_bucket_name: ${public_data_bucket}
                 - dest_ga4_images_bucket_name: ${ga4_images_bucket}
                 - dest_textocr_images_bucket_name: ${textocr_images_bucket}
                 - dest_tables_bucket_name: ${tables_bucket}
diff --git a/variables.tf b/variables.tf
index 169f26e..f8fa017 100644
--- a/variables.tf
+++ b/variables.tf
@@ -43,15 +43,9 @@ variable "enable_apis" {
 }
 
 variable "force_destroy" {
-  type        = string
-  description = "Whether or not to protect BigQuery resources from deletion when solution is modified or changed."
-  default     = false
-}
-
-variable "deletion_protection" {
   type        = string
   description = "Whether or not to protect GCS resources from deletion when solution is modified or changed."
-  default     = true
+  default     = false
 }
 
 variable "use_case_short" {
@@ -59,3 +53,9 @@ variable "use_case_short" {
   description = "Short name for use case"
   default     = "lakehouse"
 }
+
+variable "public_data_bucket" {
+  type        = string
+  description = "Public Data bucket for access"
+  default     = "data-analytics-demos"
+}
diff --git a/versions.tf b/versions.tf
index 95afc51..9a48b03 100644
--- a/versions.tf
+++ b/versions.tf
@@ -41,7 +41,7 @@ terraform {
       version = ">= 3.2.1"
     }
   }
-  required_version = ">= 0.13"
+  required_version = ">= 0.13, <= 1.2.3"
 
   provider_meta "google" {
     module_name = "blueprints/terraform/terraform-google-analytics-lakehouse/v0.3.0"
diff --git a/workflows.tf b/workflows.tf
index e121169..424307b 100644
--- a/workflows.tf
+++ b/workflows.tf
@@ -98,6 +98,7 @@ resource "google_workflows_workflow" "project_setup" {
     provisioner_bucket       = google_storage_bucket.provisioning_bucket.name,
     warehouse_bucket         = google_storage_bucket.warehouse_bucket.name,
     temp_bucket              = google_storage_bucket.warehouse_bucket.name,
+    public_data_bucket       = var.public_data_bucket
   })
 
   depends_on = [

From b60add1b5da15a4a779ed1b5ac86b6e790a71ccd Mon Sep 17 00:00:00 2001
From: bradmiro <bmiro@google.com>
Date: Wed, 27 Sep 2023 17:27:33 -0400
Subject: [PATCH 14/20] revert version pinning

---
 versions.tf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/versions.tf b/versions.tf
index 9a48b03..95afc51 100644
--- a/versions.tf
+++ b/versions.tf
@@ -41,7 +41,7 @@ terraform {
       version = ">= 3.2.1"
     }
   }
-  required_version = ">= 0.13, <= 1.2.3"
+  required_version = ">= 0.13"
 
   provider_meta "google" {
     module_name = "blueprints/terraform/terraform-google-analytics-lakehouse/v0.3.0"

From b2131cce0b959e2d579dcb627e83e26e923baaad Mon Sep 17 00:00:00 2001
From: bradmiro <bmiro@google.com>
Date: Wed, 27 Sep 2023 17:42:55 -0400
Subject: [PATCH 15/20] updating examples

---
 examples/analytics_lakehouse/main.tf | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/examples/analytics_lakehouse/main.tf b/examples/analytics_lakehouse/main.tf
index bd65a98..057e705 100644
--- a/examples/analytics_lakehouse/main.tf
+++ b/examples/analytics_lakehouse/main.tf
@@ -17,9 +17,8 @@
 module "analytics_lakehouse" {
   source = "../.."
 
-  project_id          = var.project_id
-  region              = "us-central1"
-  deletion_protection = false
-  force_destroy       = true
+  project_id    = var.project_id
+  region        = "us-central1"
+  force_destroy = true
 
 }

From a89b7b687f578d6d8cd3f6188dd0d183f21431c4 Mon Sep 17 00:00:00 2001
From: bradmiro <bmiro@google.com>
Date: Wed, 27 Sep 2023 17:50:31 -0400
Subject: [PATCH 16/20] moving vars around

---
 workflows.tf | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/workflows.tf b/workflows.tf
index 424307b..eca9444 100644
--- a/workflows.tf
+++ b/workflows.tf
@@ -66,6 +66,7 @@ resource "google_workflows_workflow" "copy_data" {
   description     = "Copies data and performs project setup"
   service_account = google_service_account.workflows_sa.email
   source_contents = templatefile("${path.module}/src/yaml/copy-data.yaml", {
+    public_data_bucket    = var.public_data_bucket,
     textocr_images_bucket = google_storage_bucket.textocr_images_bucket.name,
     ga4_images_bucket     = google_storage_bucket.ga4_images_bucket.name,
     tables_bucket         = google_storage_bucket.tables_bucket.name,
@@ -97,8 +98,7 @@ resource "google_workflows_workflow" "project_setup" {
     dataproc_service_account = google_service_account.dataproc_service_account.email,
     provisioner_bucket       = google_storage_bucket.provisioning_bucket.name,
     warehouse_bucket         = google_storage_bucket.warehouse_bucket.name,
-    temp_bucket              = google_storage_bucket.warehouse_bucket.name,
-    public_data_bucket       = var.public_data_bucket
+    temp_bucket              = google_storage_bucket.warehouse_bucket.name
   })
 
   depends_on = [
@@ -169,4 +169,4 @@ resource "time_sleep" "wait_after_all_workflows" {
   depends_on = [
     data.http.call_workflows_project_setup,
   ]
-}
+}
\ No newline at end of file

From 1909f2cb2321aba48c5ba6769bd838287a1d35b1 Mon Sep 17 00:00:00 2001
From: bradmiro <bmiro@google.com>
Date: Wed, 27 Sep 2023 18:08:50 -0400
Subject: [PATCH 17/20] lint fix

---
 workflows.tf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflows.tf b/workflows.tf
index eca9444..f56f0eb 100644
--- a/workflows.tf
+++ b/workflows.tf
@@ -169,4 +169,4 @@ resource "time_sleep" "wait_after_all_workflows" {
   depends_on = [
     data.http.call_workflows_project_setup,
   ]
-}
\ No newline at end of file
+}

From cbc5ee1441912713ad5ebafb4783fa9345873cb7 Mon Sep 17 00:00:00 2001
From: bradmiro <bmiro@google.com>
Date: Wed, 27 Sep 2023 18:16:14 -0400
Subject: [PATCH 18/20] adding docs updates

---
 README.md     |   2 +-
 metadata.yaml | 115 ++++++++++++++++++++++++--------------------------
 2 files changed, 57 insertions(+), 60 deletions(-)

diff --git a/README.md b/README.md
index 8168ee6..22d23d3 100644
--- a/README.md
+++ b/README.md
@@ -42,7 +42,7 @@ Functional examples are included in the
 | Name | Description | Type | Default | Required |
 |------|-------------|------|---------|:--------:|
 | enable\_apis | Whether or not to enable underlying apis in this solution. . | `string` | `true` | no |
-| force\_destroy | Whether or not to protect BigQuery resources from deletion when solution is modified or changed. | `string` | `false` | no |
+| force\_destroy | Whether or not to protect GCS resources from deletion when solution is modified or changed. | `string` | `false` | no |
 | labels | A map of labels to apply to contained resources. | `map(string)` | <pre>{<br>  "analytics-lakehouse": true<br>}</pre> | no |
 | project\_id | Google Cloud Project ID | `string` | n/a | yes |
 | public\_data\_bucket | Public Data bucket for access | `string` | `"data-analytics-demos"` | no |
diff --git a/metadata.yaml b/metadata.yaml
index fc57108..4b7f003 100644
--- a/metadata.yaml
+++ b/metadata.yaml
@@ -24,74 +24,71 @@ spec:
     source:
       repo: https://github.com/GoogleCloudPlatform/terraform-google-analytics-lakehouse.git
       sourceType: git
-    version: 0.1.0
+    version: 0.3.0
     actuationTool:
       flavor: Terraform
-      version: '>= 0.13'
+      version: ">= 0.13"
     description: {}
   content:
     documentation:
-    - title: Create an Analytics Lakehouse
-      url: https://cloud.google.com/architecture/big-data-analytics/analytics-lakehouse
+      - title: Create an Analytics Lakehouse
+        url: https://cloud.google.com/architecture/big-data-analytics/analytics-lakehouse
     examples:
-    - name: analytics_lakehouse
-      location: examples/analytics_lakehouse
+      - name: analytics_lakehouse
+        location: examples/analytics_lakehouse
   interfaces:
     variables:
-    - name: deletion_protection
-      description: Whether or not to protect GCS resources from deletion when solution is modified or changed.
-      varType: string
-      defaultValue: true
-    - name: enable_apis
-      description: Whether or not to enable underlying apis in this solution. .
-      varType: string
-      defaultValue: true
-    - name: force_destroy
-      description: Whether or not to protect BigQuery resources from deletion when solution is modified or changed.
-      varType: string
-      defaultValue: false
-    - name: labels
-      description: A map of labels to apply to contained resources.
-      varType: map(string)
-      defaultValue:
-        analytics-lakehouse: true
-    - name: project_id
-      description: Google Cloud Project ID
-      varType: string
-      required: true
-    - name: public_data_bucket
-      description: Public Data bucket for access
-      varType: string
-      defaultValue: data-analytics-demos
-    - name: region
-      description: Google Cloud Region
-      varType: string
-      defaultValue: us-central1
-    - name: use_case_short
-      description: Short name for use case
-      varType: string
-      defaultValue: lakehouse
+      - name: enable_apis
+        description: Whether or not to enable underlying apis in this solution. .
+        varType: string
+        defaultValue: true
+      - name: force_destroy
+        description: Whether or not to protect GCS resources from deletion when solution is modified or changed.
+        varType: string
+        defaultValue: false
+      - name: labels
+        description: A map of labels to apply to contained resources.
+        varType: map(string)
+        defaultValue:
+          analytics-lakehouse: true
+      - name: project_id
+        description: Google Cloud Project ID
+        varType: string
+        defaultValue: null
+        required: true
+      - name: public_data_bucket
+        description: Public Data bucket for access
+        varType: string
+        defaultValue: data-analytics-demos
+      - name: region
+        description: Google Cloud Region
+        varType: string
+        defaultValue: us-central1
+      - name: use_case_short
+        description: Short name for use case
+        varType: string
+        defaultValue: lakehouse
     outputs:
-    - name: bigquery_editor_url
-      description: The URL to launch the BigQuery editor
-    - name: lakehouse_colab_url
-      description: The URL to launch the in-console tutorial for the Analytics Lakehouse solution
-    - name: lookerstudio_report_url
-      description: The URL to create a new Looker Studio report displays a sample dashboard for data analysis
-    - name: neos_tutorial_url
-      description: The URL to launch the in-console tutorial for the Analytics Lakehouse solution
-    - name: workflow_return_project_setup
-      description: Output of the project setup workflow
+      - name: bigquery_editor_url
+        description: The URL to launch the BigQuery editor
+      - name: lakehouse_colab_url
+        description: The URL to launch the in-console tutorial for the Analytics Lakehouse solution
+      - name: lookerstudio_report_url
+        description: The URL to create a new Looker Studio report displays a sample dashboard for data analysis
+      - name: neos_tutorial_url
+        description: The URL to launch the in-console tutorial for the Analytics Lakehouse solution
+      - name: workflow_return_project_setup
+        description: Output of the project setup workflow
   requirements:
     roles:
-    - level: Project
-      roles:
-      - roles/owner
+      - level: Project
+        roles:
+          - roles/owner
     services:
-    - cloudkms.googleapis.com
-    - cloudresourcemanager.googleapis.com
-    - bigquery.googleapis.com
-    - bigquerystorage.googleapis.com
-    - bigqueryconnection.googleapis.com
-    - serviceusage.googleapis.com
-    - iam.googleapis.com
+      - cloudkms.googleapis.com
+      - cloudresourcemanager.googleapis.com
+      - bigquery.googleapis.com
+      - bigquerystorage.googleapis.com
+      - bigqueryconnection.googleapis.com
+      - serviceusage.googleapis.com
+      - iam.googleapis.com

From f8771b7ecf19520e9e5763f546b662be40b997fe Mon Sep 17 00:00:00 2001
From: bradmiro <bmiro@google.com>
Date: Wed, 27 Sep 2023 22:26:01 -0400
Subject: [PATCH 19/20] remove stage_ references

---
 src/bigquery.py            | 14 +++-----------
 src/sql/view_ecommerce.sql | 10 +++++-----
 2 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/src/bigquery.py b/src/bigquery.py
index 11b62d4..1069923 100644
--- a/src/bigquery.py
+++ b/src/bigquery.py
@@ -14,8 +14,6 @@
 # limitations under the License.
 
 """BigQuery I/O with BigLake Iceberg PySpark example."""
-from py4j.protocol import Py4JJavaError
-
 from pyspark.sql import SparkSession
 import os
 
@@ -46,15 +44,9 @@
 
 
 # Load data from BigQuery.
-try:
-    events = spark.read.format("bigquery") \
-        .option("table", "gcp_primary_staging.thelook_ecommerce_events") \
-        .load()
-except Py4JJavaError:
-    events = spark.read.format("bigquery") \
-        .option("table",
-                "gcp_primary_staging.stage_thelook_ecommerce_events") \
-        .load()
+events = spark.read.format("bigquery") \
+    .option("table", "gcp_primary_staging.thelook_ecommerce_events") \
+    .load()
 events.createOrReplaceTempView("events")
 
 # Create Iceberg Table if not exists
diff --git a/src/sql/view_ecommerce.sql b/src/sql/view_ecommerce.sql
index a83dfba..fa104fb 100644
--- a/src/sql/view_ecommerce.sql
+++ b/src/sql/view_ecommerce.sql
@@ -51,21 +51,21 @@ SELECT
   u.longitude user_long,
   u.traffic_source user_traffic_source
 FROM
-  gcp_primary_staging.stage_thelook_ecommerce_orders o
+  gcp_primary_staging.thelook_ecommerce_orders o
 INNER JOIN
-  gcp_primary_staging.stage_thelook_ecommerce_order_items i
+  gcp_primary_staging.thelook_ecommerce_order_items i
 ON
   o.order_id = i.order_id
 INNER JOIN
-  `gcp_primary_staging.stage_thelook_ecommerce_products` p
+  `gcp_primary_staging.thelook_ecommerce_products` p
 ON
   i.product_id = p.id
 INNER JOIN
-  `gcp_primary_staging.stage_thelook_ecommerce_distribution_centers` d
+  `gcp_primary_staging.thelook_ecommerce_distribution_centers` d
 ON
   p.distribution_center_id = d.id
 INNER JOIN
-  `gcp_primary_staging.stage_thelook_ecommerce_users` u
+  `gcp_primary_staging.thelook_ecommerce_users` u
 ON
   o.user_id = u.id
 ;

From b893a5828128b987d5a72fd989ea1cd2e9f3ec58 Mon Sep 17 00:00:00 2001
From: bradmiro <bmiro@google.com>
Date: Wed, 4 Oct 2023 12:25:50 -0400
Subject: [PATCH 20/20] added comment to dataplex bucket

---
 main.tf | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/main.tf b/main.tf
index 01d2d81..f703699 100644
--- a/main.tf
+++ b/main.tf
@@ -144,6 +144,15 @@ resource "google_storage_bucket" "tables_bucket" {
   force_destroy               = var.force_destroy
 }
 
+# Bucket used to store BI data in Dataplex
+resource "google_storage_bucket" "dataplex_bucket" {
+  name                        = "gcp-${var.use_case_short}-dataplex-${random_id.id.hex}"
+  project                     = module.project-services.project_id
+  location                    = var.region
+  uniform_bucket_level_access = true
+  force_destroy               = var.force_destroy
+}
+
 resource "google_storage_bucket_object" "pyspark_file" {
   bucket = google_storage_bucket.provisioning_bucket.name
   name   = "bigquery.py"
@@ -153,11 +162,3 @@ resource "google_storage_bucket_object" "pyspark_file" {
     google_storage_bucket.provisioning_bucket
   ]
 }
-
-resource "google_storage_bucket" "dataplex_bucket" {
-  name                        = "gcp-${var.use_case_short}-dataplex-${random_id.id.hex}"
-  project                     = module.project-services.project_id
-  location                    = var.region
-  uniform_bucket_level_access = true
-  force_destroy               = var.force_destroy
-}