Skip to content

Commit

Permalink
Draft for put landing page; identified TODOs (#34)
Browse files Browse the repository at this point in the history
* Draft for put landing page; identified TODOs
Issue: #25

* Completed tf surgery; Identify all TODOs in golang (#35)

* Complete tf surgery; Identify all TODOs in golang
For #25

* fix compile error; progress in metadata cronjob add query

* Ready to test (#36)

* Ready to test

* Fix db field first char not lowercase
Tracked by #25 (comment)

* Fix permission of db index, S3 pull
Tracked by #25 (comment)

* All tests complete
Tracked by #25 (comment)
  • Loading branch information
rivernews authored Sep 29, 2022
1 parent eb61eef commit 0e1ee8f
Show file tree
Hide file tree
Showing 23 changed files with 646 additions and 329 deletions.
9 changes: 5 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
**credential**
**/builds/**

lambda_golang/landing
lambda_golang/stories
lambda_golang/landing_metadata
lambda_golang/story
lambda_golang/*
!lambda_golang/go.mod
!lambda_golang/go.sum
!lambda_golang/*/
!lambda_golang/*/**
venv

# Binaries for programs and plugins
Expand Down
5 changes: 3 additions & 2 deletions cloud_environments/terraform.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,10 @@ set +o allexport
if (
cd $GOLANG_SRC_DIR && \
go build ./cmd/landing && \
go build ./cmd/landing_metadata && \
go build ./cmd/landing_metadata_cronjob && \
go build ./cmd/stories && \
go build ./cmd/story && \
go build ./cmd/stories_finalizer && \
cd $PYTHON_SRC_DIR && python -m compileall layer src
); then
cd $DEPLOY_DIR
Expand All @@ -37,7 +38,7 @@ if (
# https://github.com/terraform-aws-modules/terraform-aws-step-functions/issues/20
# terraform "$@" \
# -target=module.main.module.scraper_lambda \
# -target=module.main.module.landing_parse_metadata_lambda
# -target=module.main.module.landing_metadata_cronjob_lambda

terraform "$@"
else
Expand Down
19 changes: 16 additions & 3 deletions cloud_module/dynamodb/table.tf
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
resource "aws_ssm_parameter" "media_table" {
name = "/app/media-literacy/table"
type = "String"
value = aws_dynamodb_table.media_table.arn
value = "${aws_dynamodb_table.media_table.arn},${aws_dynamodb_table.media_table.id}"
}

// https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/dynamodb_table#attributes-reference
resource "aws_dynamodb_table" "media_table" {
name = "Mediatable"
name = "${title(replace("${var.project_alias}_${var.environment_name}", "-", "_"))}"
billing_mode = "PROVISIONED"
read_capacity = 20
write_capacity = 20
Expand All @@ -23,8 +23,12 @@ resource "aws_dynamodb_table" "media_table" {
type = "S"
}

attribute {
name = "s3Key"
type = "S"
}

// other fields
// S3 key
// docType = {landing | story | landingMetadata | ...}
// events

Expand Down Expand Up @@ -58,6 +62,15 @@ resource "aws_dynamodb_table" "media_table" {
non_key_attributes = ["s3Key"]
}

global_secondary_index {
name = "s3KeyIndex"
hash_key = "s3Key"
range_key = "createdAt"
write_capacity = 10
read_capacity = 10
projection_type = "KEYS_ONLY"
}

tags = {
Project = local.project_name
Environment = var.environment_name
Expand Down
4 changes: 4 additions & 0 deletions cloud_module/pipeline/global_ssm.tf
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,8 @@ data aws_ssm_parameter media_table {
locals {
newssite_economy_tokens = split(",", data.aws_ssm_parameter.newssite_economy.value)
newssite_economy_alias = local.newssite_economy_tokens[2]

_media_table_tokens = split(",", data.aws_ssm_parameter.media_table.value)
media_table_arn = local._media_table_tokens[0]
media_table_id = local._media_table_tokens[1]
}
10 changes: 9 additions & 1 deletion cloud_module/pipeline/lambda.tf
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ module "step_function" {
module "scraper_lambda" {
source = "terraform-aws-modules/lambda/aws"
create_function = true
function_name = "${local.project_name}-scraper-lambda"
function_name = "${local.project_name}-landing-lambda"
description = "Lambda function for scraping"
handler = "landing"
runtime = "go1.x"
Expand All @@ -82,6 +82,13 @@ module "scraper_lambda" {

attach_policy_statements = true
policy_statements = {
allow_db_query = {
effect = "Allow",
actions = [
"dynamodb:PutItem"
],
resources = [local.media_table_arn]
}
s3_archive_bucket = {
effect = "Allow",
actions = [
Expand All @@ -98,6 +105,7 @@ module "scraper_lambda" {
S3_ARCHIVE_BUCKET = data.aws_s3_bucket.archive.id

NEWSSITE_ECONOMY = data.aws_ssm_parameter.newssite_economy.value
DYNAMODB_TABLE_ID = local.media_table_id
}

tags = {
Expand Down
38 changes: 0 additions & 38 deletions cloud_module/pipeline/landing_s3_trigger.tf

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,35 +1,31 @@
module "stories_queue" {
source = "terraform-aws-modules/sqs/aws"
version = ">= 2.0, < 3.0"

# SQS queue attributes: https://docs.aws.amazon.com/AWSSimpleQueueService/latest/APIReference/API_CreateQueue.html

# FIFO queue should append suffix .fifo
name = "${local.project_name}-stories-queue"

delay_seconds = 0

# so we can use per-message delay
fifo_queue = false

# FIFO queue only
# content_based_deduplication = true

visibility_timeout_seconds = 3600
resource "aws_s3_bucket_notification" "bucket_notification" {
bucket = data.aws_s3_bucket.archive.id

lambda_function {
lambda_function_arn = module.landing_metadata_s3_trigger_lambda.lambda_function_arn
events = ["s3:ObjectCreated:*"]
filter_prefix = "${local.newssite_economy_alias}/"
filter_suffix = "/metadata.json"
}

# enable long polling
receive_wait_time_seconds = 10
depends_on = [
aws_lambda_permission.allow_bucket_trigger_by_landing_metadata
]
}

tags = {
Project = local.project_name
}
resource "aws_lambda_permission" "allow_bucket_trigger_by_landing_metadata" {
statement_id = "AllowExecutionFromS3Bucket"
action = "lambda:InvokeFunction"
function_name = module.landing_metadata_s3_trigger_lambda.lambda_function_arn
principal = "s3.amazonaws.com"
source_arn = data.aws_s3_bucket.archive.arn
}

module "stories_queue_consumer_lambda" {
module "landing_metadata_s3_trigger_lambda" {
source = "terraform-aws-modules/lambda/aws"

create_function = true
function_name = "${local.project_name}-fetch-stories"
function_name = "${local.project_name}-stories-lambda"
description = "Fetch ${local.project_name} stories; triggered by metadata.json creation"
handler = "stories"
runtime = "go1.x"
Expand Down Expand Up @@ -62,30 +58,20 @@ module "stories_queue_consumer_lambda" {
}
EOF

# event source mapping for long polling
event_source_mapping = {
sqs = {
event_source_arn = module.stories_queue.this_sqs_queue_arn
batch_size = 1
}
}
allowed_triggers = {
sqs = {
principal = "sqs.amazonaws.com"
source_arn = module.stories_queue.this_sqs_queue_arn
}
}
attach_policy_statements = true
policy_statements = {
pull_sqs = {
allow_db_put = {
effect = "Allow",
actions = ["sqs:ReceiveMessage", "sqs:DeleteMessage", "sqs:GetQueueAttributes"],
resources = [module.stories_queue.this_sqs_queue_arn]
actions = [
"dynamodb:UpdateItem",
],
resources = [
local.media_table_arn,
]
}
s3_archive_bucket = {
effect = "Allow",
actions = [
"s3:PutObject",
"s3:GetObject"
],
resources = [
Expand All @@ -107,8 +93,10 @@ EOF
SLACK_WEBHOOK_URL = var.slack_post_webhook_url
LOGLEVEL = "DEBUG"
ENV = local.environment
DEBUG = "true"

S3_ARCHIVE_BUCKET = data.aws_s3_bucket.archive.id
DYNAMODB_TABLE_ID = local.media_table_id
SFN_ARN = module.batch_stories_sfn.state_machine_arn
}

Expand Down
87 changes: 87 additions & 0 deletions cloud_module/pipeline/scheduler.tf
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,90 @@ data "aws_iam_policy_document" "scheduler" {
}
}
}


resource "aws_cloudwatch_event_rule" "landing_metadata_scheduler" {
count = var.environment_name == "" ? 1 : 0

name = "${local.project_name}-schedule-start-metadata-for-landing"
# schedule experssion
# https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html
schedule_expression = "rate(1 hours)"
description = "Every hour to give courtesy to the website"
}

resource "aws_cloudwatch_event_target" "landing_metadata_scheduler_event_target" {
count = var.environment_name == "" ? 1 : 0

target_id = "${local.project_name}-schedule-start-metadata-for-landing-event-target"
rule = aws_cloudwatch_event_rule.landing_metadata_scheduler.0.name
arn = module.landing_metadata_cronjob_lambda.lambda_function_arn
}

module landing_metadata_cronjob_lambda {
source = "terraform-aws-modules/lambda/aws"
create_function = true
function_name = "${local.project_name}-landing-metadata-cronjob-lambda"
description = "Query landing pages in db; compute & archive their metadata"
handler = "landing_metadata_cronjob"
runtime = "go1.x"

source_path = [{
path = "${var.repo_dir}/lambda_golang/"
commands = ["${local.go_build_flags} go build ./cmd/landing_metadata_cronjob", ":zip"]
patterns = ["landing_metadata_cronjob"]
}]

timeout = 900
cloudwatch_logs_retention_in_days = 7

publish = true

attach_policy_statements = true
policy_statements = {
allow_db_query = {
effect = "Allow",
actions = [
"dynamodb:Query",
"dynamodb:UpdateItem",
],
resources = [
local.media_table_arn,
"${local.media_table_arn}/index/metadataIndex"
]
}
s3_archive_bucket = {
effect = "Allow",
actions = [
"s3:GetObject",
"s3:PutObject",
],
resources = [
"${data.aws_s3_bucket.archive.arn}/*",
]
}
# enable getting 404 instead of 403 in case of not found
# https://stackoverflow.com/a/19808954/9814131
s3_archive_bucket_check_404 = {
effect = "Allow",
actions = [
"s3:ListBucket",
],
resources = [
"${data.aws_s3_bucket.archive.arn}",
]
}
}

environment_variables = {
SLACK_WEBHOOK_URL = var.slack_post_webhook_url
LOG_LEVEL = "DEBUG"
DEBUG = "true"
S3_ARCHIVE_BUCKET = data.aws_s3_bucket.archive.id
DYNAMODB_TABLE_ID = local.media_table_id
}

tags = {
Project = local.project_name
}
}
6 changes: 6 additions & 0 deletions cloud_module/pipeline/sfn_def/batch_stories_def.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"Parameters": {
"story.$": "$$.Map.Item.Value",
"newsSiteAlias.$": "$.newsSiteAlias",
"landingPageUuid.$": "$.landingPageUuid",
"landingPageTimeStamp.$": "$.landingPageTimeStamp"
},
"Iterator": {
Expand All @@ -32,6 +33,11 @@
}
}
},
"Next": "Stories-Finalizer"
},
"Stories-Finalizer": {
"Type":"Task",
"Resource": "${STORIES_FINALIZER_LAMBDA_ARN}",
"End": true
}
}
Expand Down
Loading

0 comments on commit 0e1ee8f

Please sign in to comment.