Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Update Terraform versioning, improve dependency tree, remove unused table, add Managed Tables to Dataplex Assets #72

Merged
merged 20 commits into from
Oct 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,8 @@ Functional examples are included in the

| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
| deletion\_protection | Whether or not to protect GCS resources from deletion when solution is modified or changed. | `string` | `true` | no |
| enable\_apis | Whether or not to enable underlying apis in this solution. . | `string` | `true` | no |
| force\_destroy | Whether or not to protect BigQuery resources from deletion when solution is modified or changed. | `string` | `false` | no |
| force\_destroy | Whether or not to protect GCS resources from deletion when solution is modified or changed. | `string` | `false` | no |
| labels | A map of labels to apply to contained resources. | `map(string)` | <pre>{<br> "analytics-lakehouse": true<br>}</pre> | no |
| project\_id | Google Cloud Project ID | `string` | n/a | yes |
| public\_data\_bucket | Public Data bucket for access | `string` | `"data-analytics-demos"` | no |
Expand Down
2 changes: 0 additions & 2 deletions bigquery.tf
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ resource "google_bigquery_dataset" "gcp_lakehouse_ds" {
delete_contents_on_destroy = var.force_destroy
}



# # Create a BigQuery connection
resource "google_bigquery_connection" "gcp_lakehouse_connection" {
project = module.project-services.project_id
Expand Down
37 changes: 19 additions & 18 deletions dataplex.tf
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,13 @@ resource "google_project_service_identity" "dataplex_sa" {
service = "dataplex.googleapis.com"
}

#give dataplex access to biglake bucket
resource "google_project_iam_member" "dataplex_bucket_access" {
project = module.project-services.project_id
role = "roles/dataplex.serviceAgent"
member = "serviceAccount:${google_project_service_identity.dataplex_sa.email}"
}

resource "google_dataplex_lake" "gcp_primary" {
location = var.region
name = "gcp-primary-lake"
Expand Down Expand Up @@ -114,12 +121,13 @@ resource "google_dataplex_asset" "gcp_primary_textocr" {
}

resource_spec {
name = "projects/${module.project-services.project_id}/buckets/${google_storage_bucket.textocr_images_bucket.name}"
type = "STORAGE_BUCKET"
name = "projects/${module.project-services.project_id}/buckets/${google_storage_bucket.textocr_images_bucket.name}"
type = "STORAGE_BUCKET"
read_access_mode = "MANAGED"
}

project = module.project-services.project_id
depends_on = [time_sleep.wait_after_all_resources, google_project_iam_member.dataplex_bucket_access]
depends_on = [time_sleep.wait_after_copy_data, google_project_iam_member.dataplex_bucket_access]

}

Expand All @@ -136,12 +144,13 @@ resource "google_dataplex_asset" "gcp_primary_ga4_obfuscated_sample_ecommerce" {
}

resource_spec {
name = "projects/${module.project-services.project_id}/buckets/${google_storage_bucket.ga4_images_bucket.name}"
type = "STORAGE_BUCKET"
name = "projects/${module.project-services.project_id}/buckets/${google_storage_bucket.ga4_images_bucket.name}"
type = "STORAGE_BUCKET"
read_access_mode = "MANAGED"
}

project = module.project-services.project_id
depends_on = [time_sleep.wait_after_all_resources, google_project_iam_member.dataplex_bucket_access]
depends_on = [time_sleep.wait_after_copy_data, google_project_iam_member.dataplex_bucket_access]

}

Expand All @@ -158,19 +167,11 @@ resource "google_dataplex_asset" "gcp_primary_tables" {
}

resource_spec {
name = "projects/${module.project-services.project_id}/buckets/${google_storage_bucket.tables_bucket.name}"
type = "STORAGE_BUCKET"
name = "projects/${module.project-services.project_id}/buckets/${google_storage_bucket.tables_bucket.name}"
type = "STORAGE_BUCKET"
read_access_mode = "MANAGED"
}

project = module.project-services.project_id
depends_on = [time_sleep.wait_after_all_resources, google_project_iam_member.dataplex_bucket_access]

}


#give dataplex access to biglake bucket
resource "google_project_iam_member" "dataplex_bucket_access" {
project = module.project-services.project_id
role = "roles/dataplex.serviceAgent"
member = "serviceAccount:${google_project_service_identity.dataplex_sa.email}"
depends_on = [time_sleep.wait_after_copy_data, google_project_iam_member.dataplex_bucket_access]
}
114 changes: 0 additions & 114 deletions dataproc.tf
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,6 @@ resource "google_compute_subnetwork" "subnet" {
region = var.region
network = google_compute_network.default_network.id
private_ip_google_access = true

depends_on = [
google_compute_network.default_network,
]
}

# Firewall rule for dataproc cluster
Expand Down Expand Up @@ -83,10 +79,6 @@ resource "google_project_iam_member" "dataproc_sa_roles" {
project = module.project-services.project_id
role = each.key
member = "serviceAccount:${google_service_account.dataproc_service_account.email}"

depends_on = [
google_service_account.dataproc_service_account
]
}

# # Create a BigQuery connection
Expand All @@ -103,117 +95,11 @@ resource "google_project_iam_member" "bq_connection_iam_object_viewer" {
project = module.project-services.project_id
role = "roles/storage.objectViewer"
member = "serviceAccount:${google_bigquery_connection.ds_connection.cloud_resource[0].service_account_id}"

depends_on = [
google_bigquery_connection.ds_connection
]
}

# # Grant IAM access to the BigQuery Connection account for BigLake Metastore
resource "google_project_iam_member" "bq_connection_iam_biglake" {
project = module.project-services.project_id
role = "roles/biglake.admin"
member = "serviceAccount:${google_bigquery_connection.ds_connection.cloud_resource[0].service_account_id}"

depends_on = [
google_bigquery_connection.ds_connection
]
}

# # Create a BigQuery external table.
resource "google_bigquery_table" "tbl_thelook_events" {
dataset_id = google_bigquery_dataset.gcp_lakehouse_ds.dataset_id
table_id = "gcp_tbl_events"
project = module.project-services.project_id
deletion_protection = var.deletion_protection

external_data_configuration {
autodetect = true
connection_id = google_bigquery_connection.ds_connection.name #TODO: Change other solutions to remove hardcoded reference
source_format = "PARQUET"
source_uris = ["gs://${var.public_data_bucket}/thelook_ecommerce/events-*.Parquet"]

}

schema = <<EOF
[
{
"name": "id",
"type": "INTEGER",
"mode": "NULLABLE",
"description": ""
},
{
"name": "user_id",
"type": "INTEGER",
"mode": "NULLABLE",
"description": ""
},
{
"name": "sequence_number",
"type": "INTEGER",
"mode": "NULLABLE",
"description": ""
},
{
"name": "session_id",
"type": "STRING",
"mode": "NULLABLE",
"description": ""
},
{
"name": "created_at",
"type": "TIMESTAMP",
"mode": "NULLABLE",
"description": ""
},
{
"name": "ip_address",
"type": "STRING",
"mode": "NULLABLE",
"description": ""
},
{
"name": "city",
"type": "STRING",
"mode": "NULLABLE",
"description": ""
},
{
"name": "postal_code",
"type": "STRING",
"mode": "NULLABLE",
"description": ""
},
{
"name": "browser",
"type": "STRING",
"mode": "NULLABLE",
"description": ""
},
{
"name": "traffic_source",
"type": "STRING",
"mode": "NULLABLE",
"description": ""
},
{
"name": "uri",
"type": "STRING",
"mode": "NULLABLE",
"description": ""
},
{
"name": "event_type",
"type": "STRING",
"mode": "NULLABLE",
"description": ""
}
]
EOF

depends_on = [
google_bigquery_connection.ds_connection,
google_storage_bucket.raw_bucket
]
}
7 changes: 3 additions & 4 deletions examples/analytics_lakehouse/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@
module "analytics_lakehouse" {
source = "../.."

project_id = var.project_id
region = "us-central1"
deletion_protection = false
force_destroy = true
project_id = var.project_id
region = "us-central1"
force_destroy = true

}
31 changes: 7 additions & 24 deletions main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -144,16 +144,7 @@ resource "google_storage_bucket" "tables_bucket" {
force_destroy = var.force_destroy
}

resource "google_storage_bucket_object" "pyspark_file" {
bucket = google_storage_bucket.provisioning_bucket.name
name = "bigquery.py"
source = "${path.module}/src/bigquery.py"

depends_on = [
google_storage_bucket.provisioning_bucket
]
}

# Bucket used to store BI data in Dataplex
resource "google_storage_bucket" "dataplex_bucket" {
name = "gcp-${var.use_case_short}-dataplex-${random_id.id.hex}"
project = module.project-services.project_id
Expand All @@ -162,20 +153,12 @@ resource "google_storage_bucket" "dataplex_bucket" {
force_destroy = var.force_destroy
}

# Resources are dependent on one another. We will ensure the following set of resources are created before proceeding.
resource "time_sleep" "wait_after_all_resources" {
create_duration = "120s"
resource "google_storage_bucket_object" "pyspark_file" {
bucket = google_storage_bucket.provisioning_bucket.name
name = "bigquery.py"
source = "${path.module}/src/bigquery.py"

depends_on = [
module.project-services,
google_storage_bucket.provisioning_bucket,
google_bigquery_dataset.gcp_lakehouse_ds,
google_bigquery_connection.gcp_lakehouse_connection,
google_project_iam_member.connectionPermissionGrant,
google_workflows_workflow.project_setup,
google_dataplex_zone.gcp_primary_raw,
google_dataplex_zone.gcp_primary_staging,
google_dataplex_zone.gcp_primary_curated_bi,
data.google_storage_project_service_account.gcs_account,
data.http.call_workflows_copy_data
google_storage_bucket.provisioning_bucket
]
}
Loading