diff --git a/README.md b/README.md index 8b3ef8c..3080c96 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ dataflow_job_batch_count | (Optional) Batch count of messages in single request dataflow_job_disable_certificate_validation | (Optional) Boolean to disable SSL certificate validation (default false) dataflow_job_udf_gcs_path | (Optional) GCS path for JavaScript file (default No UDF used) dataflow_job_udf_function_name | (Optional) Name of JavaScript function to be called (default No UDF used) -dataflow_template_path | (Optional) Dataflow template GCS path (default 'gs://dataflow-templates/latest/Cloud_PubSub_to_Splunk'). Override this for version pinning. +dataflow_template_version | (Optional) Dataflow template release version (default 'latest'). Override this for version pinning e.g. '2021-08-02-00_RC00'. Must specify version only since template GCS path will be deduced automatically: 'gs://dataflow-templates/`version`/Cloud_PubSub_to_Splunk' ### Getting Started @@ -67,6 +67,11 @@ $ terraform output dataflow_log_export_dashboad 2. Visit newly created Monitoring Dashboard in Cloud Console by replacing dashboard_id in the following URL: https://console.cloud.google.com/monitoring/dashboards/builder/{dashboard_id} +#### Deploy replay pipeline + +In the `replay.tf` file, uncomment the code under `splunk_dataflow_replay` and follow the sequence of `terraform plan` and `terraform apply`. + +Once the replay pipeline is no longer needed (the number of messages in the PubSub deadletter topic are at 0), comment out `splunk_dataflow_replay` and follow the `plan` and `apply` sequence above. ### Cleanup @@ -79,7 +84,7 @@ $ terraform destroy * Support KMS-encrypted HEC token * Expose logging level knob -* Create replay pipeline +* ~~Create replay pipeline~~ * ~~Create secure network for self-contained setup if existing network is not provided~~ * ~~Add Cloud Monitoring dashboard~~ diff --git a/main.tf b/main.tf index 81e6531..36fbb80 100644 --- a/main.tf +++ b/main.tf @@ -21,25 +21,39 @@ provider "google" { region = var.region } +# Generate new random hex to be used for bucket name resource "random_id" "bucket_suffix" { byte_length = 4 } +# Generate new random id each time we switch to a new template version id, +# to be used for pipeline job name to force job replacement vs in-place update +resource "random_id" "dataflow_job_instance" { + byte_length = 2 + keepers = { + dataflow_template_version = var.dataflow_template_version + } +} + locals { dataflow_temporary_gcs_bucket_name = "${var.project}-${var.dataflow_job_name}-${random_id.bucket_suffix.hex}" dataflow_temporary_gcs_bucket_path = "tmp/" + dataflow_splunk_template_gcs_path = "gs://dataflow-templates/${var.dataflow_template_version}/Cloud_PubSub_to_Splunk" + dataflow_pubsub_template_gcs_path = "gs://dataflow-templates/${var.dataflow_template_version}/Cloud_PubSub_to_Cloud_PubSub" + subnet_name = coalesce(var.subnet, "${var.network}-${var.region}") project_log_sink_name = "${var.dataflow_job_name}-project-log-sink" organization_log_sink_name = "${var.dataflow_job_name}-organization-log-sink" + dataflow_main_job_name = "${var.dataflow_job_name}-main-${random_id.dataflow_job_instance.hex}" + dataflow_replay_job_name = "${var.dataflow_job_name}-replay-${random_id.dataflow_job_instance.hex}" + dataflow_input_topic_name = "${var.dataflow_job_name}-input-topic" dataflow_input_subscription_name = "${var.dataflow_job_name}-input-subscription" dataflow_output_deadletter_topic_name = "${var.dataflow_job_name}-deadletter-topic" dataflow_output_deadletter_sub_name = "${var.dataflow_job_name}-deadletter-subscription" - dataflow_replay_job_name = "${var.dataflow_job_name}-replay" - # dataflow job parameters (not externalized for this project) dataflow_job_include_pubsub_message = true } diff --git a/monitoring.tf b/monitoring.tf index 0647ac2..ec689b6 100644 --- a/monitoring.tf +++ b/monitoring.tf @@ -54,7 +54,7 @@ resource "google_monitoring_dashboard" "splunk-export-pipeline-dashboard" { "scorecard": { "timeSeriesQuery": { "timeSeriesFilter": { - "filter": "metric.type=\"dataflow.googleapis.com/job/elements_produced_count\" resource.type=\"dataflow_job\" resource.label.\"job_name\"=\"${var.dataflow_job_name}\" metric.label.\"ptransform\"=\"WriteToSplunk/Create KV pairs/Inject Keys\"", + "filter": "metric.type=\"dataflow.googleapis.com/job/elements_produced_count\" resource.type=\"dataflow_job\" resource.label.\"job_name\"=\"${local.dataflow_main_job_name}\" metric.label.\"ptransform\"=\"WriteToSplunk/Create KV pairs/Inject Keys\"", "aggregation": { "alignmentPeriod": "3600s", "perSeriesAligner": "ALIGN_SUM", @@ -136,7 +136,7 @@ resource "google_monitoring_dashboard" "splunk-export-pipeline-dashboard" { { "timeSeriesQuery": { "timeSeriesFilter": { - "filter": "metric.type=\"dataflow.googleapis.com/job/elements_produced_count\" resource.type=\"dataflow_job\" resource.label.\"job_name\"=\"${var.dataflow_job_name}\" metric.label.\"pcollection\"=\"WriteToSplunk/Create KV pairs/Inject Keys.out0\"", + "filter": "metric.type=\"dataflow.googleapis.com/job/elements_produced_count\" resource.type=\"dataflow_job\" resource.label.\"job_name\"=\"${local.dataflow_main_job_name}\" metric.label.\"pcollection\"=\"WriteToSplunk/Create KV pairs/Inject Keys.out0\"", "aggregation": { "alignmentPeriod": "60s", "perSeriesAligner": "ALIGN_RATE" @@ -196,7 +196,7 @@ resource "google_monitoring_dashboard" "splunk-export-pipeline-dashboard" { "scorecard": { "timeSeriesQuery": { "timeSeriesFilter": { - "filter": "metric.type=\"custom.googleapis.com/dataflow/outbound-successful-events\" resource.type=\"dataflow_job\" resource.label.\"job_name\"=\"${var.dataflow_job_name}\"", + "filter": "metric.type=\"custom.googleapis.com/dataflow/outbound-successful-events\" resource.type=\"dataflow_job\" resource.label.\"job_name\"=\"${local.dataflow_main_job_name}\"", "aggregation": { "alignmentPeriod": "60s", "perSeriesAligner": "ALIGN_MAX", @@ -216,7 +216,7 @@ resource "google_monitoring_dashboard" "splunk-export-pipeline-dashboard" { { "timeSeriesQuery": { "timeSeriesFilter": { - "filter": "metric.type=\"custom.googleapis.com/dataflow/outbound-failed-events\" resource.type=\"dataflow_job\" resource.label.\"job_name\"=\"${var.dataflow_job_name}\"", + "filter": "metric.type=\"custom.googleapis.com/dataflow/outbound-failed-events\" resource.type=\"dataflow_job\" resource.label.\"job_name\"=\"${local.dataflow_main_job_name}\"", "aggregation": { "alignmentPeriod": "60s", "perSeriesAligner": "ALIGN_MEAN", @@ -230,7 +230,7 @@ resource "google_monitoring_dashboard" "splunk-export-pipeline-dashboard" { { "timeSeriesQuery": { "timeSeriesFilter": { - "filter": "metric.type=\"custom.googleapis.com/dataflow/total-failed-messages\" resource.type=\"dataflow_job\" resource.label.\"job_name\"=\"${var.dataflow_job_name}\"", + "filter": "metric.type=\"custom.googleapis.com/dataflow/total-failed-messages\" resource.type=\"dataflow_job\" resource.label.\"job_name\"=\"${local.dataflow_main_job_name}\"", "aggregation": { "alignmentPeriod": "60s", "perSeriesAligner": "ALIGN_MEAN", @@ -460,7 +460,7 @@ resource "google_monitoring_dashboard" "splunk-export-pipeline-dashboard" { { "timeSeriesQuery": { "timeSeriesFilter": { - "filter": "metric.type=\"dataflow.googleapis.com/job/current_num_vcpus\" resource.type=\"dataflow_job\" resource.label.\"job_name\"=\"${var.dataflow_job_name}\"", + "filter": "metric.type=\"dataflow.googleapis.com/job/current_num_vcpus\" resource.type=\"dataflow_job\" resource.label.\"job_name\"=\"${local.dataflow_main_job_name}\"", "aggregation": { "alignmentPeriod": "60s", "perSeriesAligner": "ALIGN_MEAN", @@ -526,7 +526,7 @@ resource "google_monitoring_dashboard" "splunk-export-pipeline-dashboard" { { "timeSeriesQuery": { "timeSeriesFilter": { - "filter": "metric.type=\"dataflow.googleapis.com/job/system_lag\" resource.type=\"dataflow_job\" resource.label.\"job_name\"=\"${var.dataflow_job_name}\"", + "filter": "metric.type=\"dataflow.googleapis.com/job/system_lag\" resource.type=\"dataflow_job\" resource.label.\"job_name\"=\"${local.dataflow_main_job_name}\"", "aggregation": { "alignmentPeriod": "60s", "perSeriesAligner": "ALIGN_MEAN", diff --git a/pipeline.tf b/pipeline.tf index 6b83ff7..f0c63e0 100644 --- a/pipeline.tf +++ b/pipeline.tf @@ -42,16 +42,9 @@ resource "google_storage_bucket_object" "dataflow_job_temp_object" { bucket = google_storage_bucket.dataflow_job_temp_bucket.name } -resource "random_id" "dataflow_job_instance" { - byte_length = 2 - keepers = { - template_gcs_path = var.dataflow_template_path - } -} - resource "google_dataflow_job" "dataflow_job" { - name = "${var.dataflow_job_name}-${random_id.dataflow_job_instance.hex}" - template_gcs_path = random_id.dataflow_job_instance.keepers.template_gcs_path + name = local.dataflow_main_job_name + template_gcs_path = local.dataflow_splunk_template_gcs_path temp_gcs_location = "gs://${local.dataflow_temporary_gcs_bucket_name}/${local.dataflow_temporary_gcs_bucket_path}" machine_type = var.dataflow_job_machine_type max_workers = var.dataflow_job_machine_count diff --git a/replay.tf b/replay.tf new file mode 100644 index 0000000..8618698 --- /dev/null +++ b/replay.tf @@ -0,0 +1,44 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +/* +The replay job should stay commented out while the main export pipeline is initially deployed. +When the replay job needs to be run, simply uncomment the module and deploy the replay pipeline. +From the CLI, this may look like `terraform apply -target="google_dataflow_job.splunk_dataflow_replay"` +After the deadletter Pub/Sub topic has no more messages, comment out the module and run a regular terraform deployment (ex. terraform apply). Terraform will automatically destroy the replay job. + +`terraform apply -target` usage documentation is here: https://www.terraform.io/docs/cli/commands/apply.html +*/ + +/* +resource "google_dataflow_job" "splunk_dataflow_replay" { + name = local.dataflow_replay_job_name + template_gcs_path = local.dataflow_pubsub_template_gcs_path + temp_gcs_location = "gs://${local.dataflow_temporary_gcs_bucket_name}/${local.dataflow_temporary_gcs_bucket_path}" + machine_type = var.dataflow_job_machine_type + max_workers = var.dataflow_job_machine_count + parameters = { + inputSubscription = google_pubsub_subscription.dataflow_deadletter_pubsub_sub.id + outputTopic = google_pubsub_topic.dataflow_input_pubsub_topic.id + } + region = var.region + network = var.network + subnetwork = "regions/${var.region}/subnetworks/${local.subnet_name}" + ip_configuration = "WORKER_IP_PRIVATE" + + depends_on = [ + google_compute_subnetwork.splunk_subnet + ] +} +*/ \ No newline at end of file diff --git a/variables.tf b/variables.tf index f24fbe6..9c52b8f 100644 --- a/variables.tf +++ b/variables.tf @@ -72,14 +72,10 @@ variable "splunk_hec_token" { # Dataflow job parameters -variable "dataflow_template_path" { - description = "Dataflow template path. Defaults to latest version of Google-hosted Pub/Sub to Splunk template" - default = "gs://dataflow-templates/latest/Cloud_PubSub_to_Splunk" - - validation { - condition = can(regex("gs://.+", var.dataflow_template_path)) - error_message = "Splunk Dataflow template path must be a GCS object path gs:/// ." - } +variable "dataflow_template_version" { + type = string + description = "Dataflow template version for the replay job." + default = "latest" } variable "dataflow_job_name" {