From 7e1eb841c75a50f4ec4a94cb21b697ee52b6dd5c Mon Sep 17 00:00:00 2001 From: Nick Predey Date: Tue, 20 Apr 2021 15:22:46 -0500 Subject: [PATCH 1/8] initial commit for network creation --- main.tf | 26 +++++++++--------- monitoring.tf | 8 +++--- network.tf | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++ pipeline.tf | 32 +++++++++++----------- variables.tf | 38 +++++++++++++++----------- 5 files changed, 129 insertions(+), 49 deletions(-) create mode 100644 network.tf diff --git a/main.tf b/main.tf index 28e87b5..f87256f 100644 --- a/main.tf +++ b/main.tf @@ -30,13 +30,13 @@ locals { dataflow_temporary_gcs_bucket_name = "${var.project}-${var.dataflow_job_name}-${random_id.bucket_suffix.hex}" dataflow_temporary_gcs_bucket_path = "tmp/" - project_log_sink_name = "${var.dataflow_job_name}-project-log-sink" + project_log_sink_name = "${var.dataflow_job_name}-project-log-sink" organization_log_sink_name = "${var.dataflow_job_name}-organization-log-sink" - dataflow_input_topic_name = "${var.dataflow_job_name}-input-topic" - dataflow_input_subscription_name = "${var.dataflow_job_name}-input-subscription" + dataflow_input_topic_name = "${var.dataflow_job_name}-input-topic" + dataflow_input_subscription_name = "${var.dataflow_job_name}-input-subscription" dataflow_output_deadletter_topic_name = "${var.dataflow_job_name}-deadletter-topic" - dataflow_output_deadletter_sub_name = "${var.dataflow_job_name}-deadletter-subscription" + dataflow_output_deadletter_sub_name = "${var.dataflow_job_name}-deadletter-subscription" dataflow_replay_job_name = "${var.dataflow_job_name}-replay" @@ -54,7 +54,7 @@ resource "google_pubsub_subscription" "dataflow_input_pubsub_subscription" { # messages retained for 7 days (max) message_retention_duration = "604800s" - ack_deadline_seconds = 30 + ack_deadline_seconds = 30 # subscription never expires expiration_policy { @@ -63,9 +63,9 @@ resource "google_pubsub_subscription" "dataflow_input_pubsub_subscription" { } resource "google_logging_project_sink" "project_log_sink" { - name = local.project_log_sink_name + name = local.project_log_sink_name destination = "pubsub.googleapis.com/projects/${var.project}/topics/${google_pubsub_topic.dataflow_input_pubsub_topic.name}" - filter = var.log_filter + filter = var.log_filter unique_writer_identity = true } @@ -81,25 +81,25 @@ resource "google_logging_project_sink" "project_log_sink" { resource "google_pubsub_topic_iam_binding" "pubsub_iam_binding" { project = google_pubsub_topic.dataflow_input_pubsub_topic.project - topic = google_pubsub_topic.dataflow_input_pubsub_topic.name - role = "roles/pubsub.publisher" + topic = google_pubsub_topic.dataflow_input_pubsub_topic.name + role = "roles/pubsub.publisher" members = [ google_logging_project_sink.project_log_sink.writer_identity, ] } output "dataflow_job_id" { - value = google_dataflow_job.dataflow_job.job_id + value = google_dataflow_job.dataflow_job.job_id } output "dataflow_input_topic" { - value = google_pubsub_topic.dataflow_input_pubsub_topic.name + value = google_pubsub_topic.dataflow_input_pubsub_topic.name } output "dataflow_output_deadletter_subscription" { - value = google_pubsub_subscription.dataflow_deadletter_pubsub_sub.name + value = google_pubsub_subscription.dataflow_deadletter_pubsub_sub.name } output "dataflow_log_export_dashboard" { - value = var.workspace != "" ? google_monitoring_dashboard.splunk-export-pipeline-dashboard[0].id : "" + value = var.workspace != "" ? google_monitoring_dashboard.splunk-export-pipeline-dashboard[0].id : "" } \ No newline at end of file diff --git a/monitoring.tf b/monitoring.tf index 0647ac2..f3e1091 100644 --- a/monitoring.tf +++ b/monitoring.tf @@ -13,18 +13,18 @@ # limitations under the License. resource "google_monitoring_group" "splunk-export-pipeline-group" { - count = var.workspace != "" ? 1 : 0 + count = var.workspace != "" ? 1 : 0 display_name = "Splunk Log Export Group" - project = var.workspace + project = var.workspace filter = "resource.metadata.name=starts_with(\"${var.dataflow_job_name}\")" } resource "google_monitoring_dashboard" "splunk-export-pipeline-dashboard" { - count = var.workspace != "" ? 1 : 0 + count = var.workspace != "" ? 1 : 0 - project = var.workspace + project = var.workspace dashboard_json = < 1 worker can communicate over internal IPs. +resource "google_compute_firewall" "connect_dataflow_workers" { + count = var.create_network == true ? 1 : 0 + + name = "dataflow-internal-ip-fwr" + network = google_compute_network.splunk_export.id + + allow { + protocol = "tcp" + ports = ["12345-12346"] + } + + source_tags = ["dataflow"] + target_tags = ["dataflow"] +} diff --git a/pipeline.tf b/pipeline.tf index 476965c..c7584bb 100644 --- a/pipeline.tf +++ b/pipeline.tf @@ -31,34 +31,34 @@ resource "google_pubsub_subscription" "dataflow_deadletter_pubsub_sub" { } resource "google_storage_bucket" "dataflow_job_temp_bucket" { - name = local.dataflow_temporary_gcs_bucket_name - location = var.region + name = local.dataflow_temporary_gcs_bucket_name + location = var.region storage_class = "REGIONAL" } resource "google_storage_bucket_object" "dataflow_job_temp_object" { - name = local.dataflow_temporary_gcs_bucket_path + name = local.dataflow_temporary_gcs_bucket_path content = "Placeholder for Dataflow to write temporary files" - bucket = google_storage_bucket.dataflow_job_temp_bucket.name + bucket = google_storage_bucket.dataflow_job_temp_bucket.name } resource "google_dataflow_job" "dataflow_job" { - name = var.dataflow_job_name + name = var.dataflow_job_name template_gcs_path = var.dataflow_template_path temp_gcs_location = "gs://${local.dataflow_temporary_gcs_bucket_name}/${local.dataflow_temporary_gcs_bucket_path}" - machine_type = var.dataflow_job_machine_type - max_workers = var.dataflow_job_machine_count + machine_type = var.dataflow_job_machine_type + max_workers = var.dataflow_job_machine_count parameters = { - inputSubscription = google_pubsub_subscription.dataflow_input_pubsub_subscription.id - outputDeadletterTopic = google_pubsub_topic.dataflow_deadletter_pubsub_topic.id - url = var.splunk_hec_url - token = var.splunk_hec_token - parallelism = var.dataflow_job_parallelism - batchCount = var.dataflow_job_batch_count - includePubsubMessage = local.dataflow_job_include_pubsub_message + inputSubscription = google_pubsub_subscription.dataflow_input_pubsub_subscription.id + outputDeadletterTopic = google_pubsub_topic.dataflow_deadletter_pubsub_topic.id + url = var.splunk_hec_url + token = var.splunk_hec_token + parallelism = var.dataflow_job_parallelism + batchCount = var.dataflow_job_batch_count + includePubsubMessage = local.dataflow_job_include_pubsub_message disableCertificateValidation = var.dataflow_job_disable_certificate_validation } - region = var.region - network = var.network + region = var.region + network = var.create_netowork == true ? google_compute_network.splunk_export.id : var.network ip_configuration = "WORKER_IP_PRIVATE" } diff --git a/variables.tf b/variables.tf index d5900cc..65f0760 100644 --- a/variables.tf +++ b/variables.tf @@ -22,18 +22,24 @@ variable "region" { variable "zone" { description = "Zone to deploy into" - default = "" + default = "" } variable "network" { description = "Network to deploy into" } +variable "create_network" { + description = "Boolean value if a new network needs to be created." + default = false + type = bool +} + # Dashboard parameters variable "workspace" { description = "Cloud Monitoring Workspace to create dashboard under. This assumes Workspace is already created and project provided is already added to it. If parameter is empty, no dashboard will be created" - default = "" + default = "" } # Log sink details @@ -46,26 +52,26 @@ variable "log_filter" { variable "splunk_hec_url" { description = "Splunk HEC URL to write data to. Example: https://[MY_SPLUNK_IP_OR_FQDN]:8088" - + validation { - condition = can(regex("https?://.*(:[0-9]+)?", var.splunk_hec_url)) + condition = can(regex("https?://.*(:[0-9]+)?", var.splunk_hec_url)) error_message = "Splunk HEC url must of the form ://: ." } } variable "splunk_hec_token" { description = "Splunk HEC token" - sensitive = true + sensitive = true } # Dataflow job parameters variable "dataflow_template_path" { description = "Dataflow template path. Defaults to latest version of Google-hosted Pub/Sub to Splunk template" - default = "gs://dataflow-templates/latest/Cloud_PubSub_to_Splunk" + default = "gs://dataflow-templates/latest/Cloud_PubSub_to_Splunk" validation { - condition = can(regex("gs://.+", var.dataflow_template_path)) + condition = can(regex("gs://.+", var.dataflow_template_path)) error_message = "Splunk Dataflow template path must be a GCS object path gs:/// ." } } @@ -76,29 +82,29 @@ variable "dataflow_job_name" { variable "dataflow_job_machine_type" { description = "Dataflow job worker machine type" - default = "n1-standard-4" + default = "n1-standard-4" } variable "dataflow_job_machine_count" { description = "Dataflow job max worker count. Defaults to 2." - type = number - default = 2 + type = number + default = 2 } variable "dataflow_job_parallelism" { description = "Maximum parallel requests to Splunk. Defaults to 8." - type = number - default = 8 + type = number + default = 8 } variable "dataflow_job_batch_count" { description = "Batch count of messages in single request to Splunk. Defaults to 50." - type = number - default = 50 + type = number + default = 50 } variable "dataflow_job_disable_certificate_validation" { description = "Disable SSL certificate validation (default: false)" - type = bool - default = false + type = bool + default = false } \ No newline at end of file From 83c25c53d9410818fafbf9ac550fe21ac42f7f36 Mon Sep 17 00:00:00 2001 From: Nick Predey Date: Tue, 27 Apr 2021 12:29:05 -0500 Subject: [PATCH 2/8] adding source for dataflow firewall rule --- network.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/network.tf b/network.tf index f73b01a..bac716c 100644 --- a/network.tf +++ b/network.tf @@ -58,6 +58,7 @@ resource "google_compute_router_nat" "dataflow_nat" { } # Creating firewall rule so that dataflow jobs with > 1 worker can communicate over internal IPs. +# Source: https://cloud.google.com/dataflow/docs/guides/routes-firewall#firewall_rules_required_by resource "google_compute_firewall" "connect_dataflow_workers" { count = var.create_network == true ? 1 : 0 From 74334ec7175e783b79f079dd2f63aad3c9ac2dd1 Mon Sep 17 00:00:00 2001 From: Nick Predey Date: Tue, 27 Apr 2021 12:33:54 -0500 Subject: [PATCH 3/8] making subnet logging optional --- network.tf | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/network.tf b/network.tf index bac716c..a1d992b 100644 --- a/network.tf +++ b/network.tf @@ -13,11 +13,13 @@ resource "google_compute_subnetwork" "splunk_subnet" { region = var.region network = google_compute_network.splunk_export.id private_ip_google_access = true - log_config { - aggregation_interval = "INTERVAL_15_MIN" - flow_sampling = 0.1 - metadata = "INCLUDE_ALL_METADATA" - } + +# Optional configuration to log network traffic at the subnet level +# log_config { +# aggregation_interval = "INTERVAL_15_MIN" +# flow_sampling = 0.1 +# metadata = "INCLUDE_ALL_METADATA" +# } } From 3d45278462d7934685d2eb81b0429cc5074ca6f8 Mon Sep 17 00:00:00 2001 From: Nick Predey Date: Wed, 28 Apr 2021 17:26:06 -0500 Subject: [PATCH 4/8] adding default primary cidr range w/ corresponding variable and changing min_ports to 128 --- network.tf | 14 +++++++------- variables.tf | 6 ++++++ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/network.tf b/network.tf index a1d992b..4539582 100644 --- a/network.tf +++ b/network.tf @@ -14,12 +14,12 @@ resource "google_compute_subnetwork" "splunk_subnet" { network = google_compute_network.splunk_export.id private_ip_google_access = true -# Optional configuration to log network traffic at the subnet level -# log_config { -# aggregation_interval = "INTERVAL_15_MIN" -# flow_sampling = 0.1 -# metadata = "INCLUDE_ALL_METADATA" -# } + # Optional configuration to log network traffic at the subnet level + # log_config { + # aggregation_interval = "INTERVAL_15_MIN" + # flow_sampling = 0.1 + # metadata = "INCLUDE_ALL_METADATA" + # } } @@ -47,7 +47,7 @@ resource "google_compute_router_nat" "dataflow_nat" { nat_ip_allocate_option = "MANUAL_ONLY" source_subnetwork_ip_ranges_to_nat = "LIST_OF_SUBNETWORKS" nat_ips = google_compute_address.dataflow_nat_ip_address.*.self_link - min_ports_per_vm = 1024 + min_ports_per_vm = 128 subnetwork { name = google_compute_subnetwork.splunk_subnet.id source_ip_ranges_to_nat = ["PRIMARY_IP_RANGE"] diff --git a/variables.tf b/variables.tf index 65f0760..d8ec721 100644 --- a/variables.tf +++ b/variables.tf @@ -35,6 +35,12 @@ variable "create_network" { type = bool } +variable "primary_subnet_cidr" { + type = string + description = "The CIDR Range of the primary subnet" + default = "10.128.0.0/20" +} + # Dashboard parameters variable "workspace" { From 9239153dd44b45084a6df19a6e5a8ef677a3c851 Mon Sep 17 00:00:00 2001 From: Nick Predey Date: Thu, 29 Apr 2021 09:53:14 -0500 Subject: [PATCH 5/8] removing whitespace --- main.tf | 26 +++++++++++++------------- monitoring.tf | 8 ++++---- pipeline.tf | 26 +++++++++++++------------- variables.tf | 29 ++++++++++++++--------------- 4 files changed, 44 insertions(+), 45 deletions(-) diff --git a/main.tf b/main.tf index f87256f..35143e1 100644 --- a/main.tf +++ b/main.tf @@ -30,13 +30,13 @@ locals { dataflow_temporary_gcs_bucket_name = "${var.project}-${var.dataflow_job_name}-${random_id.bucket_suffix.hex}" dataflow_temporary_gcs_bucket_path = "tmp/" - project_log_sink_name = "${var.dataflow_job_name}-project-log-sink" + project_log_sink_name = "${var.dataflow_job_name}-project-log-sink" organization_log_sink_name = "${var.dataflow_job_name}-organization-log-sink" - dataflow_input_topic_name = "${var.dataflow_job_name}-input-topic" - dataflow_input_subscription_name = "${var.dataflow_job_name}-input-subscription" + dataflow_input_topic_name = "${var.dataflow_job_name}-input-topic" + dataflow_input_subscription_name = "${var.dataflow_job_name}-input-subscription" dataflow_output_deadletter_topic_name = "${var.dataflow_job_name}-deadletter-topic" - dataflow_output_deadletter_sub_name = "${var.dataflow_job_name}-deadletter-subscription" + dataflow_output_deadletter_sub_name = "${var.dataflow_job_name}-deadletter-subscription" dataflow_replay_job_name = "${var.dataflow_job_name}-replay" @@ -54,7 +54,7 @@ resource "google_pubsub_subscription" "dataflow_input_pubsub_subscription" { # messages retained for 7 days (max) message_retention_duration = "604800s" - ack_deadline_seconds = 30 + ack_deadline_seconds = 30 # subscription never expires expiration_policy { @@ -63,9 +63,9 @@ resource "google_pubsub_subscription" "dataflow_input_pubsub_subscription" { } resource "google_logging_project_sink" "project_log_sink" { - name = local.project_log_sink_name + name = local.project_log_sink_name destination = "pubsub.googleapis.com/projects/${var.project}/topics/${google_pubsub_topic.dataflow_input_pubsub_topic.name}" - filter = var.log_filter + filter = var.log_filter unique_writer_identity = true } @@ -81,25 +81,25 @@ resource "google_logging_project_sink" "project_log_sink" { resource "google_pubsub_topic_iam_binding" "pubsub_iam_binding" { project = google_pubsub_topic.dataflow_input_pubsub_topic.project - topic = google_pubsub_topic.dataflow_input_pubsub_topic.name - role = "roles/pubsub.publisher" + topic = google_pubsub_topic.dataflow_input_pubsub_topic.name + role = "roles/pubsub.publisher" members = [ google_logging_project_sink.project_log_sink.writer_identity, ] } output "dataflow_job_id" { - value = google_dataflow_job.dataflow_job.job_id + value = google_dataflow_job.dataflow_job.job_id } output "dataflow_input_topic" { - value = google_pubsub_topic.dataflow_input_pubsub_topic.name + value = google_pubsub_topic.dataflow_input_pubsub_topic.name } output "dataflow_output_deadletter_subscription" { - value = google_pubsub_subscription.dataflow_deadletter_pubsub_sub.name + value = google_pubsub_subscription.dataflow_deadletter_pubsub_sub.name } output "dataflow_log_export_dashboard" { - value = var.workspace != "" ? google_monitoring_dashboard.splunk-export-pipeline-dashboard[0].id : "" + value = var.workspace != "" ? google_monitoring_dashboard.splunk-export-pipeline-dashboard[0].id : "" } \ No newline at end of file diff --git a/monitoring.tf b/monitoring.tf index f3e1091..0647ac2 100644 --- a/monitoring.tf +++ b/monitoring.tf @@ -13,18 +13,18 @@ # limitations under the License. resource "google_monitoring_group" "splunk-export-pipeline-group" { - count = var.workspace != "" ? 1 : 0 + count = var.workspace != "" ? 1 : 0 display_name = "Splunk Log Export Group" - project = var.workspace + project = var.workspace filter = "resource.metadata.name=starts_with(\"${var.dataflow_job_name}\")" } resource "google_monitoring_dashboard" "splunk-export-pipeline-dashboard" { - count = var.workspace != "" ? 1 : 0 + count = var.workspace != "" ? 1 : 0 - project = var.workspace + project = var.workspace dashboard_json = < Date: Thu, 29 Apr 2021 09:55:45 -0500 Subject: [PATCH 6/8] trimming whitespace pt 2 --- main.tf | 2 +- pipeline.tf | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/main.tf b/main.tf index 35143e1..28e87b5 100644 --- a/main.tf +++ b/main.tf @@ -82,7 +82,7 @@ resource "google_logging_project_sink" "project_log_sink" { resource "google_pubsub_topic_iam_binding" "pubsub_iam_binding" { project = google_pubsub_topic.dataflow_input_pubsub_topic.project topic = google_pubsub_topic.dataflow_input_pubsub_topic.name - role = "roles/pubsub.publisher" + role = "roles/pubsub.publisher" members = [ google_logging_project_sink.project_log_sink.writer_identity, ] diff --git a/pipeline.tf b/pipeline.tf index d1fe050..183d78e 100644 --- a/pipeline.tf +++ b/pipeline.tf @@ -39,20 +39,20 @@ resource "google_storage_bucket" "dataflow_job_temp_bucket" { resource "google_storage_bucket_object" "dataflow_job_temp_object" { name = local.dataflow_temporary_gcs_bucket_path content = "Placeholder for Dataflow to write temporary files" - bucket = google_storage_bucket.dataflow_job_temp_bucket.name + bucket = google_storage_bucket.dataflow_job_temp_bucket.name } resource "google_dataflow_job" "dataflow_job" { name = var.dataflow_job_name template_gcs_path = var.dataflow_template_path temp_gcs_location = "gs://${local.dataflow_temporary_gcs_bucket_name}/${local.dataflow_temporary_gcs_bucket_path}" - machine_type = var.dataflow_job_machine_type - max_workers = var.dataflow_job_machine_count + machine_type = var.dataflow_job_machine_type + max_workers = var.dataflow_job_machine_count parameters = { inputSubscription = google_pubsub_subscription.dataflow_input_pubsub_subscription.id outputDeadletterTopic = google_pubsub_topic.dataflow_deadletter_pubsub_topic.id - url = var.splunk_hec_url - token = var.splunk_hec_token + url = var.splunk_hec_url + token = var.splunk_hec_token parallelism = var.dataflow_job_parallelism batchCount = var.dataflow_job_batch_count includePubsubMessage = local.dataflow_job_include_pubsub_message From a9fd71905432f0584b2c936f61f312dc22f32aec Mon Sep 17 00:00:00 2001 From: Nick Predey Date: Thu, 29 Apr 2021 09:57:11 -0500 Subject: [PATCH 7/8] trimming whitespace pt 3 --- variables.tf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/variables.tf b/variables.tf index e9fc292..b307c89 100644 --- a/variables.tf +++ b/variables.tf @@ -58,6 +58,7 @@ variable "log_filter" { variable "splunk_hec_url" { description = "Splunk HEC URL to write data to. Example: https://[MY_SPLUNK_IP_OR_FQDN]:8088" + validation { condition = can(regex("https?://.*(:[0-9]+)?", var.splunk_hec_url)) error_message = "Splunk HEC url must of the form ://: ." @@ -66,7 +67,7 @@ variable "splunk_hec_url" { variable "splunk_hec_token" { description = "Splunk HEC token" - sensitive = true + sensitive = true } # Dataflow job parameters From 5eac5d6c66fe2927b014b8b5c76bf04905fbf3be Mon Sep 17 00:00:00 2001 From: Nick Predey Date: Fri, 30 Apr 2021 15:17:03 -0500 Subject: [PATCH 8/8] changed typo, added vars to readme and tfvars file --- README.md | 2 ++ pipeline.tf | 2 +- variables.yaml | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index fd52433..808b83a 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,8 @@ project | The project to deploy to, if not set the default provider project is u region | Region to deploy into (for regional resources) zone | Zone to deploy into (for zonal resources) network | Network to deploy into +create\_network | Boolean value if a new network needs to be created. +primary\_subnet\_cidr | The CIDR Range of the primary subnet workspace | (Optional) Workspace to create Monitoring dashboard in. This assumes Workspace is already created and project is already added to it. If not specified, no dashboard will be created log_filter | Log filter to use when exporting logs splunk_hec_url | Splunk HEC URL to stream data to, e.g. https://[MY_SPLUNK_IP_OR_FQDN]:8088 diff --git a/pipeline.tf b/pipeline.tf index 183d78e..48367ad 100644 --- a/pipeline.tf +++ b/pipeline.tf @@ -59,6 +59,6 @@ resource "google_dataflow_job" "dataflow_job" { disableCertificateValidation = var.dataflow_job_disable_certificate_validation } region = var.region - network = var.create_netowork == true ? google_compute_network.splunk_export.id : var.network + network = var.create_network == true ? google_compute_network.splunk_export.id : var.network ip_configuration = "WORKER_IP_PRIVATE" } diff --git a/variables.yaml b/variables.yaml index 988d45e..73cfb1d 100644 --- a/variables.yaml +++ b/variables.yaml @@ -16,6 +16,8 @@ project = "" region = "" zone = "" network = "" +create_network = false +primary_subnet_cidr = "10.128.0.0/20" # Log sink details log_filter = ""