diff --git a/cloud-infrastructure/terraform/environments/production/aws/data/main.tf b/cloud-infrastructure/terraform/environments/production/aws/data/main.tf index abacf4db5e..2f9761f0b1 100644 --- a/cloud-infrastructure/terraform/environments/production/aws/data/main.tf +++ b/cloud-infrastructure/terraform/environments/production/aws/data/main.tf @@ -52,7 +52,7 @@ data "external" "hm_local_tracker_sink_plugin" { program = ["bash", "files/amazon-msk/${var.environment}-tracker-kafka/plugins/build.sh"] query = { kafka_plugin_name = local.tracker_kafka_sink_plugin_name - snowflake_kafka_connector_version = "2.2.2" # https://github.com/snowflakedb/snowflake-kafka-connector/releases + snowflake_kafka_connector_version = "2.2.2" # https://mvnrepository.com/artifact/com.snowflake/snowflake-kafka-connector bc_fips_version = "1.0.2.5" # https://mvnrepository.com/artifact/org.bouncycastle/bc-fips bcpkix_fips_version = "1.0.7" # https://mvnrepository.com/artifact/org.bouncycastle/bcpkix-fips confluent_kafka_connect_avro_converter_version = "7.6.1" # https://www.confluent.io/hub/confluentinc/kafka-connect-avro-converter diff --git a/cloud-infrastructure/terraform/environments/production/aws/general/main.tf b/cloud-infrastructure/terraform/environments/production/aws/general/main.tf index efc0e913ad..64847a0747 100644 --- a/cloud-infrastructure/terraform/environments/production/aws/general/main.tf +++ b/cloud-infrastructure/terraform/environments/production/aws/general/main.tf @@ -408,6 +408,7 @@ module "hm_glue_crawler_motor_data" { aws_glue_crawler_name = "hm-delta-lake-crawler-iot" aws_glue_crawler_delta_tables = ["s3://hm-production-bucket/delta-tables/motor_data/"] aws_glue_database = "production_hm_delta_db" + schedule = "cron(40 9 * * ? *)" # Every day at 9:40 UTC https://crontab.cronhub.io/ iam_role_arn = "arn:aws:iam::272394222652:role/service-role/AWSGlueServiceRole-hm" environment = var.environment team = var.team diff --git a/cloud-infrastructure/terraform/modules/aws/hm_aws_glue_crawler/main.tf b/cloud-infrastructure/terraform/modules/aws/hm_aws_glue_crawler/main.tf index 79c9995b89..fdf027bf32 100644 --- a/cloud-infrastructure/terraform/modules/aws/hm_aws_glue_crawler/main.tf +++ b/cloud-infrastructure/terraform/modules/aws/hm_aws_glue_crawler/main.tf @@ -12,13 +12,18 @@ resource "aws_glue_crawler" "hm_aws_glue_crawler" { name = var.aws_glue_crawler_name role = var.iam_role_arn database_name = var.aws_glue_database + schedule = var.schedule delta_target { delta_tables = var.aws_glue_crawler_delta_tables - create_native_delta_table = false + create_native_delta_table = true write_manifest = false } schema_change_policy { delete_behavior = "LOG" + update_behavior = "LOG" + } + lineage_configuration { + crawler_lineage_settings = "ENABLE" } configuration = jsonencode( { @@ -34,8 +39,8 @@ resource "aws_glue_crawler" "hm_aws_glue_crawler" { } ) tags = { - Environment = var.environment - Team = var.team - ResourceName = var.aws_glue_crawler_name + Environment = var.environment + Team = var.team + Name = var.aws_glue_crawler_name } } diff --git a/cloud-infrastructure/terraform/modules/aws/hm_aws_glue_crawler/variables.tf b/cloud-infrastructure/terraform/modules/aws/hm_aws_glue_crawler/variables.tf index 73a2441cda..97a0971a5c 100644 --- a/cloud-infrastructure/terraform/modules/aws/hm_aws_glue_crawler/variables.tf +++ b/cloud-infrastructure/terraform/modules/aws/hm_aws_glue_crawler/variables.tf @@ -7,6 +7,9 @@ variable "aws_glue_crawler_delta_tables" { variable "aws_glue_database" { type = string } +variable "schedule" { + type = string +} variable "iam_role_arn" { type = string }