Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates related to the MCP Prod deployment and code / document improvements #139

Merged
merged 14 commits into from
Feb 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions .github/workflows/secrets-detection.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:

# find the secrets in the repository
detect-secrets scan --disable-plugin AbsolutePathDetectorExperimental --baseline .secrets.new \
--exclude-files '\.secrets..*' \
--exclude-files '\.secrets\..*' \
--exclude-files '\.git.*' \
--exclude-files '\.pre-commit-config\.yaml' \
--exclude-files '\.mypy_cache' \
Expand All @@ -55,7 +55,9 @@ jobs:
--exclude-files 'venv' \
--exclude-files 'dist' \
--exclude-files 'build' \
--exclude-files '.*\.egg-info'
--exclude-files '.*\.egg-info' \
--exclude-files '.*\.tfstate' \
--exclude-files '.*\.tfvars'

# if there is any difference between the known and newly detected secrets, break the build
# Function to compare secrets without listing them
Expand Down
84 changes: 9 additions & 75 deletions .secrets.baseline
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@
{
"path": "detect_secrets.filters.regex.should_exclude_file",
"pattern": [
"\\.secrets..*",
"\\.secrets\\..*",
"\\.git.*",
"\\.pre-commit-config\\.yaml",
"\\.mypy_cache",
Expand All @@ -135,7 +135,9 @@
"venv",
"dist",
"build",
".*\\.egg-info"
".*\\.egg-info",
".*\\.tfstate",
".*\\.tfvars"
]
}
],
Expand All @@ -156,31 +158,31 @@
"filename": "terraform/README.md",
"hashed_secret": "f2d4e04179e44fa7386b985ac3c7ee4d95dfd65d",
"is_verified": false,
"line_number": 109,
"line_number": 116,
"is_secret": false
},
{
"type": "AWS Sensitive Information (Experimental Plugin)",
"filename": "terraform/README.md",
"hashed_secret": "659a4d010b74afeddbcb9c4e8eae01f4390eeacc",
"is_verified": false,
"line_number": 110,
"line_number": 117,
"is_secret": false
},
{
"type": "AWS Sensitive Information (Experimental Plugin)",
"filename": "terraform/README.md",
"hashed_secret": "bd3b85b91cb8cf6cfc6a4adc7a2505714939505b",
"is_verified": false,
"line_number": 110,
"line_number": 117,
"is_secret": false
},
{
"type": "Secret Keyword",
"filename": "terraform/README.md",
"hashed_secret": "a356cb3f3d1c9797cf59daf5b22fc0c7434d8dc7",
"is_verified": false,
"line_number": 114,
"line_number": 121,
"is_secret": false
}
],
Expand Down Expand Up @@ -229,75 +231,7 @@
"line_number": 295,
"is_secret": false
}
],
"terraform/variables/terraform.tfvars.dev": [
{
"type": "AWS Sensitive Information (Experimental Plugin)",
"filename": "terraform/variables/terraform.tfvars.dev",
"hashed_secret": "f2d4e04179e44fa7386b985ac3c7ee4d95dfd65d",
"is_verified": false,
"line_number": 3,
"is_secret": false
},
{
"type": "AWS Sensitive Information (Experimental Plugin)",
"filename": "terraform/variables/terraform.tfvars.dev",
"hashed_secret": "226201cd08f00a589068e569d01716d0ad488ae4",
"is_verified": false,
"line_number": 4,
"is_secret": false
},
{
"type": "AWS Sensitive Information (Experimental Plugin)",
"filename": "terraform/variables/terraform.tfvars.dev",
"hashed_secret": "4592cff3a9944664c9c182333782a5d551ec2516",
"is_verified": false,
"line_number": 4,
"is_secret": false
},
{
"type": "Secret Keyword",
"filename": "terraform/variables/terraform.tfvars.dev",
"hashed_secret": "b293afb11f1f9b32461ab510aacb65a27ccb6111",
"is_verified": false,
"line_number": 9,
"is_secret": false
}
],
"terraform/variables/terraform.tfvars.test": [
{
"type": "AWS Sensitive Information (Experimental Plugin)",
"filename": "terraform/variables/terraform.tfvars.test",
"hashed_secret": "f2d4e04179e44fa7386b985ac3c7ee4d95dfd65d",
"is_verified": false,
"line_number": 3,
"is_secret": false
},
{
"type": "AWS Sensitive Information (Experimental Plugin)",
"filename": "terraform/variables/terraform.tfvars.test",
"hashed_secret": "226201cd08f00a589068e569d01716d0ad488ae4",
"is_verified": false,
"line_number": 4,
"is_secret": false
},
{
"type": "AWS Sensitive Information (Experimental Plugin)",
"filename": "terraform/variables/terraform.tfvars.test",
"hashed_secret": "4592cff3a9944664c9c182333782a5d551ec2516",
"is_verified": false,
"line_number": 4,
"is_secret": false
},
{
"type": "Secret Keyword",
"filename": "terraform/variables/terraform.tfvars.test",
"hashed_secret": "b293afb11f1f9b32461ab510aacb65a27ccb6111",
"is_verified": false,
"line_number": 9,
"is_secret": false
}
]
},
"generated_at": "2025-02-04T19:12:34Z"
"generated_at": "2025-02-13T09:10:40Z"
}
42 changes: 42 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,45 @@ User documentation is managed with Sphinx, which is also installed in your Pytho
sphinx-build -b html docs/source docs/build/html

Publish the pages on gh-pages branch


## Secret Detection

The following commands can be used to detect secrets in the code.

1) Setup a pythion virtual environment.

```shell
mkdir ~/Tools
python3 -m venv ~/Tools/detect-secrets
source ~/Tools/detect-secrets/bin/activate
pip install git+https://github.com/NASA-AMMOS/slim-detect-secrets.git@exp
```

2) Execute the following command in Nucleus root directory to scan the code for secrets.

```shell
detect-secrets scan --disable-plugin AbsolutePathDetectorExperimental \
--exclude-files '\.secrets\..*' \
--exclude-files '\.git.*' \
--exclude-files '\.pre-commit-config\.yaml' \
--exclude-files '\.mypy_cache' \
--exclude-files '\.pytest_cache' \
--exclude-files '\.tox' \
--exclude-files '\.venv' \
--exclude-files 'venv' \
--exclude-files 'dist' \
--exclude-files 'build' \
--exclude-files '.*\.egg-info' \
--exclude-files '.*\.tfstate' \
--exclude-files '.*\.tfvars' \
> .secrets.baseline
```

3) Execute the following command in Nucleus root directory to audit the possible secrets detected.

```shell
detect-secrets audit .secrets.baseline
```

This will create a `.secrets.baseline` in Nucleus root directory. Commit and push that file, in order to pass the checks in GitHub during a pull request.
77 changes: 49 additions & 28 deletions terraform/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,29 +18,32 @@ Therefore, as a result of the Terraform scripts in this directory following thin

## Prerequisites to Deploy Nucleus Baseline System

1. An AWS Account with permissions to deploy following AWS services
1. Some of the libraries used in the ECS containers of PDS Nucleus are platform specific. Therefore, please execute the deployment
from an Amazon Linux EC2 instance with Architecture 64 bit (x86).

2. An AWS Account with permissions to deploy following AWS services
- Amazon Managed Workflows for Apache Airflow (MWAA)
- AWS Security Groups
- AWS S3 Bucket with relevant bucket policies
- ECS Cluster and ECS Tasks
- EFS File System
- ECR

2. Ability to get AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN for the AWS account
3. Ability to get AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN for the AWS account

3. Terraform is installed in local environment (This was tested with Terraform v1.5.7. Any higher version should also work)
4. Terraform is installed in local environment (This was tested with Terraform v1.5.7. Any higher version should also work)
- Instructions to install Terraform is available at https://developer.hashicorp.com/terraform/tutorials/aws-get-started/install-cli

4. A VPC and one or more subnets should be available on AWS (obtain the VPC ID and subnet IDs from AWS console or from the AWS
5. A VPC and one or more subnets should be available on AWS (obtain the VPC ID and subnet IDs from AWS console or from the AWS
system admin team of your AWS account)

5. Docker service is installed and running (Instructions to install Docker: https://docs.docker.com/engine/install/)
6. Docker service is installed and running (Instructions to install Docker: https://docs.docker.com/engine/install/)

6. PDS Registry (OpenSearch) is accessible from the AWS account which is used to deploy PDS Nucleus)
7. PDS Registry (OpenSearch) is accessible from the AWS account which is used to deploy PDS Nucleus)

7. A Cognito User Pool to manage Nucleus users
8. A Cognito User Pool to manage Nucleus users

8. A certificate to be used for the ALB Listener facing Airflow UI
9. A certificate to be used for the ALB Listener facing Airflow UI


## Steps to Deploy the PDS Nucleus Baseline System
Expand Down Expand Up @@ -81,6 +84,9 @@ Note: Examples of `terraform.tfvars` files are available at `terraform/variable
- pds_node_names = List of PDS Node names to be supported (E.g.: ["PDS_SBN", "PDS_IMG", "PDS_EN"]).The following node name format should be used.
- (PDS_ATM, PDS_ENG, PDS_GEO, PDS_IMG, PDS_NAIF, PDS_RMS, PDS_SBN, PSA, JAXA, ROSCOSMOS)
- Please check https://nasa-pds.github.io/registry/user/harvest_job_configuration.html for PDS Node name descriptions.

- pds_nucleus_opensearch_url : OpenSearch URL to be used with Harvest tool
- pds_nucleus_opensearch_registry_names : List of Nod3e specific OpenSearch registry names (E.g.: ["pds-nucleus-sbn-registry"", "pds-nucleus-img-registry"])
- pds_nucleus_opensearch_urls : List of Node specific OpenSearch URLs (E.g.: ["https://abcdef.us-west-2.aoss.amazonaws.com", "https://opqrst.us-west-2.aoss.amazonaws.com"])
- pds_nucleus_opensearch_credential_relative_url : Opensearch Credential URL (E.g.: "http://<IP ADDRESS>/AWS_CONTAINER_CREDENTIALS_RELATIVE_URI")
- pds_nucleus_harvest_replace_prefix_with_list : List of harvest replace with strings (E.g.: ["s3://pds-sbn-nucleus-staging","s3://pds-img-nucleus-staging"])
Expand Down Expand Up @@ -121,7 +127,8 @@ aws_secretmanager_key_arn = "arn:aws:kms:us-west-2:12345678:key/12345-12
# Please check https://nasa-pds.github.io/registry/user/harvest_job_configuration.html for PDS Node name descriptions.

pds_node_names = ["PDS_SBN", "PDS_IMG"]
pds_nucleus_opensearch_urls = ["https://abcdef.us-west-2.aoss.amazonaws.com", "https://opqrst.us-west-2.aoss.amazonaws.com"]
pds_nucleus_opensearch_url = "https://abcdef.us-west-2.aoss.amazonaws.com"
pds_nucleus_opensearch_registry_names = ["pds-nucleus-sbn-registry"", "pds-nucleus-img-registry"]
pds_nucleus_opensearch_credential_relative_url = "http://<IP ADDRESS>/AWS_CONTAINER_CREDENTIALS_RELATIVE_URI"
pds_nucleus_harvest_replace_prefix_with_list = ["s3://pds-sbn-nucleus-staging", "s3://pds-img-nucleus-staging"]

Expand Down Expand Up @@ -171,40 +178,54 @@ terraform apply

8. Wait for `terraform apply` command to be completed. If it fails due to expiration of AWS credentials, please provide a new set of AWS credentials and execute `terraform apply` again.

9. Login to the AWS Console with your AWS Account.
9. Note the `pds_nucleus_airflow_ui_url` printed as an output at the end of the `terraform apply` command results.

10. Make sure that the correct AWS Region is selected and search for "Managed Apache Airflow".
Example:

11. Visit the "Managed Apache Airflow" (Amazon MWAA) page and check the list of environments.
```shell
Outputs:

12. Find the relevant Amazon MWAA environment (Default name: PDS-Nucleus-Airflow-Env) and click on
Open Airflow UI link to open the Airflow UI.
pds_nucleus_airflow_ui_url = "https://pds-nucleus-12345678.us-west-2.elb.amazonaws.com:4443/aws_mwaa/aws-console-sso"
```

13. The DAGs can be added to the Airflow by uploading Airflow DAG files to the DAG folder of S3 bucket
configured as `mwaa_dag_s3_bucket_name` in the `terraform.tfvars` file.
10. Login to the AWS Console with your AWS Account.

14. Go to the AWS Secret manager (https://us-west-2.console.aws.amazon.com/secretsmanager/listsecrets?region=us-west-2) and locate the secrets in the following format.
- pds/nucleus/opensearch/creds/<PDS NODE NAME>/user
- pds/nucleus/opensearch/creds/<PDS NODE NAME>/password

E.g.:
- pds/nucleus/opensearch/creds/PDS_IMG/user
- pds/nucleus/opensearch/creds/PDS_SBN/user
- pds/nucleus/opensearch/creds/PDS_IMG/password
- pds/nucleus/opensearch/creds/PDS_SBN/password
11. Make sure that the correct AWS Region is selected and search for "Managed Apache Airflow".

15. Obtain the Opensearch username and password for each PDS Node and update the above secrets with relevant usernames and passwords.
- To update a secret, click on a secret -> Retrieve secret value -> Edit -> Save
12. Visit the "Managed Apache Airflow" (Amazon MWAA) page and check the list of environments.

13. Find the relevant Amazon MWAA environment (Default name: PDS-Nucleus-Airflow-Env) and click on
Open Airflow UI link to open the Airflow UI.

15. Use the PDS Data Upload Manager (DUM) tool to upload files to pds_nucleus_staging_bucket.
14. The DAGs can be added to the Airflow by uploading Airflow DAG files to the DAG folder of S3 bucket
configured as `mwaa_dag_s3_bucket_name` in the `terraform.tfvars` file.


## Steps to Access Nucleus Airflow UI With Cognito Credentials

Only some users have direct access to AWS and those users can access Airflow UI as explained in the step 9 to 12
in the above section. However, there is another way to access Airflow UI using a Cognito account as follows.

### Approach 1: Using the Web Based Login

1. Make sure you have a Cognito user created in the Cognito user pool with required role (Cognito group). The PDS engineering node team can
help with this.

2. Access the pds_nucleus_airflow_ui_url obtained in the step 9. of the section above.

Example:

```shell
Outputs:

pds_nucleus_airflow_ui_url = "https://pds-nucleus-12345678.us-west-2.elb.amazonaws.com:4443/aws_mwaa/aws-console-sso"
```

3. Use the Cognito username and password to login.


### Approach 2: Using a Web Token

1. Make sure you have a Cognito user created in the Cognito user pool with required role (Cognito group). The PDS engineering node team can
help with this.

Expand Down
7 changes: 6 additions & 1 deletion terraform/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ module "product-copy-completion-checker" {
pds_nucleus_cold_archive_bucket_name_postfix = var.pds_nucleus_cold_archive_bucket_name_postfix

pds_node_names = var.pds_node_names
pds_nucleus_opensearch_urls = var.pds_nucleus_opensearch_urls
pds_nucleus_opensearch_url = var.pds_nucleus_opensearch_url
pds_nucleus_opensearch_registry_names = var.pds_nucleus_opensearch_registry_names
pds_nucleus_opensearch_credential_relative_url = var.pds_nucleus_opensearch_credential_relative_url
pds_nucleus_harvest_replace_prefix_with_list = var.pds_nucleus_harvest_replace_prefix_with_list

Expand Down Expand Up @@ -142,3 +143,7 @@ module "cognito-auth" {
aws_elb_account_id_for_the_region = var.aws_elb_account_id_for_the_region
}

# Output the ALB URL for Airflow UI
output "pds_nucleus_airflow_ui_url" {
value = nonsensitive(module.cognito-auth.pds_nucleus_airflow_ui_url)
}
4 changes: 4 additions & 0 deletions terraform/terraform-modules/cognito-auth/cognito-auth.tf
Original file line number Diff line number Diff line change
Expand Up @@ -443,3 +443,7 @@ resource "aws_cognito_user_group" "pds_nucleus_viewer_cognito_user_group" {
precedence = 65
role_arn = aws_iam_role.pds_nucleus_viewer_role.arn
}

output "pds_nucleus_airflow_ui_url" {
value = "https://${aws_lb.pds_nucleus_auth_alb.dns_name}:${var.auth_alb_listener_port}/aws_mwaa/aws-console-sso"
}
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,9 @@ def login(headers, query_params=None, user_claims=None,iam_role_arn=None):
except Exception as error:
logger.error(str(error))

if not redirect:
redirect = close(headers, "Login Failed. Please check your Cognito user groups", status_code=401)

return redirect

def get_mwaa_client(role_arn, user):
Expand Down
1 change: 0 additions & 1 deletion terraform/terraform-modules/cognito-auth/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ variable "auth_alb_name" {
variable "auth_alb_listener_port" {
description = "Auth ALB Listener Port"
type = string
sensitive = true
}

variable "auth_alb_listener_certificate_arn" {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ aws ecr get-login-password --region us-west-2 | docker login --username AWS --pa

# Deploy pds-nucleus-config-init ECR image
cd ./terraform-modules/ecs-ecr/docker/config-init
docker build -t pds-nucleus-config-init .
docker build --platform linux/amd64 -t pds-nucleus-config-init .
docker tag pds-nucleus-config-init:latest "${pds_nucleus_aws_account_id}".dkr.ecr.us-west-2.amazonaws.com/pds-nucleus-config-init:latest
docker push "${pds_nucleus_aws_account_id}".dkr.ecr.us-west-2.amazonaws.com/pds-nucleus-config-init:latest

# Deploy pds-nucleus-s3-to-efs-copy ECR image
cd ../s3-to-efs-copy
docker build -t pds-nucleus-s3-to-efs-copy .
docker build --platform linux/amd64 -t pds-nucleus-s3-to-efs-copy .
docker tag pds-nucleus-s3-to-efs-copy:latest "${pds_nucleus_aws_account_id}".dkr.ecr.us-west-2.amazonaws.com/pds-nucleus-s3-to-efs-copy:latest
docker push "${pds_nucleus_aws_account_id}".dkr.ecr.us-west-2.amazonaws.com/pds-nucleus-s3-to-efs-copy:latest

Expand Down
Loading