GoogleCloudPlatform · rarsan · Aug 25, 2021 · Jul 27, 2021 · Jul 27, 2021 · Jul 29, 2021
diff --git a/README.md b/README.md
@@ -67,6 +67,11 @@ $ terraform output dataflow_log_export_dashboad
 
  2. Visit newly created Monitoring Dashboard in Cloud Console by replacing dashboard_id in the following URL: https://console.cloud.google.com/monitoring/dashboards/builder/{dashboard_id}
 
+#### Deploy replay pipeline
+
+In the `replay.tf` file, uncomment the code under `splunk_dataflow_replay` and follow the sequence of `terraform plan` and `terraform apply`.
+
+Once the replay pipeline is no longer needed (the number of messages in the PubSub deadletter topic are at 0), comment out `splunk_dataflow_replay` and follow the `plan` and `apply` sequence above.
 
 ### Cleanup
 

diff --git a/main.tf b/main.tf
@@ -28,6 +28,7 @@ resource "random_id" "bucket_suffix" {
 locals {
   dataflow_temporary_gcs_bucket_name = "${var.project}-${var.dataflow_job_name}-${random_id.bucket_suffix.hex}"
   dataflow_temporary_gcs_bucket_path = "tmp/"
+  dataflow_template_path = "gs://dataflow-templates/${var.dataflow_template_version}/Cloud_PubSub_to_Splunk"
 
   subnet_name = coalesce(var.subnet, "${var.network}-${var.region}")
   project_log_sink_name = "${var.dataflow_job_name}-project-log-sink"
@@ -39,7 +40,7 @@ locals {
   dataflow_output_deadletter_sub_name = "${var.dataflow_job_name}-deadletter-subscription"
 
   dataflow_replay_job_name = "${var.dataflow_job_name}-replay"
-
+  dataflow_deadletter_template_gcs_path = "gs://dataflow-templates/${var.dataflow_template_version}/Cloud_PubSub_to_Cloud_PubSub"
   # dataflow job parameters (not externalized for this project)
   dataflow_job_include_pubsub_message = true
 }

diff --git a/pipeline.tf b/pipeline.tf
@@ -45,7 +45,7 @@ resource "google_storage_bucket_object" "dataflow_job_temp_object" {
 resource "random_id" "dataflow_job_instance" {
   byte_length = 2
   keepers = {
-    template_gcs_path = var.dataflow_template_path
+    template_gcs_path = local.dataflow_template_path
   }
 }
 

diff --git a/replay.tf b/replay.tf
@@ -0,0 +1,39 @@
+# Copyright 2021 Google LLC
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     https://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+/*
+The replay job should stay commented out while the main export pipeline is initially deployed.
+When the replay job needs to be run, simply uncomment the module and deploy the replay pipeline. 
+From the CLI, this may look like `terraform apply -target="google_dataflow_job.splunk_dataflow_replay"`
+After the deadletter Pub/Sub topic has no more messages, comment out the module and run a regular terraform deployment (ex. terraform apply). Terraform will automatically destroy the replay job.
+
+`terraform apply -target` usage documentation is here: https://www.terraform.io/docs/cli/commands/apply.html
+*/
+
+resource "google_dataflow_job" "splunk_dataflow_replay" {
+  name              = local.dataflow_replay_job_name
+  template_gcs_path = local.dataflow_deadletter_template_gcs_path
+  temp_gcs_location = "gs://${local.dataflow_temporary_gcs_bucket_name}/${local.dataflow_temporary_gcs_bucket_path}"
+  machine_type      = var.dataflow_job_machine_type
+  max_workers       = var.dataflow_job_machine_count
+  parameters = {
+    inputSubscription = google_pubsub_subscription.dataflow_deadletter_pubsub_sub.id
+    outputTopic       = google_pubsub_topic.dataflow_input_pubsub_topic.id
+  }
+  region                = var.region
+  network               = var.network
+  subnetwork            = "regions/${var.region}/subnetworks/${local.subnet_name}"
+  ip_configuration      = "WORKER_IP_PRIVATE"
+#   service_account_email = ""
+}
diff --git a/variables.tf b/variables.tf
@@ -72,6 +72,12 @@ variable "splunk_hec_token" {
 
 # Dataflow job parameters
 
+variable "dataflow_template_version" {
+  type        = string
+  description = "Dataflow template version for the replay job."
+  default     = "latest"
+}
+
 variable "dataflow_template_path" {
   description = "Dataflow template path. Defaults to latest version of Google-hosted Pub/Sub to Splunk template"
   default = "gs://dataflow-templates/latest/Cloud_PubSub_to_Splunk"