diff --git a/.devcontainer/devcontainer-lock.json b/.devcontainer/devcontainer-lock.json new file mode 100644 index 0000000..78b0ca3 --- /dev/null +++ b/.devcontainer/devcontainer-lock.json @@ -0,0 +1,19 @@ +{ + "features": { + "ghcr.io/devcontainers/features/docker-in-docker:2": { + "version": "2.10.1", + "resolved": "ghcr.io/devcontainers/features/docker-in-docker@sha256:440bdb81cf8af43f3e922450d33db9775c1097340557a5b7b6fe705bc758c5ef", + "integrity": "sha256:440bdb81cf8af43f3e922450d33db9775c1097340557a5b7b6fe705bc758c5ef" + }, + "ghcr.io/devcontainers/features/python:1": { + "version": "1.4.1", + "resolved": "ghcr.io/devcontainers/features/python@sha256:d7e393af2440444dddb3c275cf7f90c899a24f8e853e4d6315e1be3be7e1d49f", + "integrity": "sha256:d7e393af2440444dddb3c275cf7f90c899a24f8e853e4d6315e1be3be7e1d49f" + }, + "ghcr.io/ministryofjustice/devcontainer-feature/aws:0": { + "version": "0.0.2", + "resolved": "ghcr.io/ministryofjustice/devcontainer-feature/aws@sha256:db720f840ce5015117b1b1e7649dc59b8ac6b34a8786f07ab727dd081140737f", + "integrity": "sha256:db720f840ce5015117b1b1e7649dc59b8ac6b34a8786f07ab727dd081140737f" + } + } +} \ No newline at end of file diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..7712cc5 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,28 @@ +{ + "name": "analytical-platform-ingestion-scan", + "image": "ghcr.io/ministryofjustice/devcontainer-base:latest", + "features": { + "ghcr.io/devcontainers/features/docker-in-docker:2": {}, + "ghcr.io/devcontainers/features/python:1": { + "version": "3.12", + "installTools": false + }, + "ghcr.io/ministryofjustice/devcontainer-feature/aws:0": {} + }, + "postCreateCommand": "bash .devcontainer/post-create.sh", + "customizations": { + "vscode": { + "extensions": [ + "EditorConfig.EditorConfig", + "GitHub.vscode-github-actions", + "GitHub.vscode-pull-request-github", + "ms-python.python", + "ms-python.pylint", + "ms-python.black-formatter", + "ms-python.isort", + "ms-python.flake8", + "ms-python.autopep8" + ] + } + } +} diff --git a/.devcontainer/post-create.sh b/.devcontainer/post-create.sh new file mode 100755 index 0000000..e2a26e1 --- /dev/null +++ b/.devcontainer/post-create.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +# Upgrade Pip +pip install --upgrade pip + +# Install dependencies +pip install --requirement requirements-dev.txt diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..76975a3 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,23 @@ +root = true + +[*] +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true + +[*.json] +indent_style = space +indent_size = 2 + +[{*.sh,bootstrap}] +indent_style = space +indent_size = 2 + +[{*.yml,*.yaml}] +indent_style = space +indent_size = 2 + +# This file is autogenerated +[.devcontainer/devcontainer-lock.json] +end_of_line = unset +insert_final_newline = unset diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..aa079ec --- /dev/null +++ b/.flake8 @@ -0,0 +1,2 @@ +[flake8] +max-line-length=120 diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index ac066e6..1138e83 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,3 +1 @@ -# Add a team or username to this file -# Example: -# * @ministryofjustice/operations-engineering +* @ministryofjustice/analytical-platform diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 15fe7f0..e70af8c 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -1,37 +1,20 @@ --- -# To get started with Dependabot version updates, you'll need to specify which -# package ecosystems to update and where the package manifests are located. -# Please see the documentation for all configuration options: -# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file - version: 2 updates: - - package-ecosystem: "bundler" - directory: "/" - schedule: - interval: "daily" - - package-ecosystem: "terraform" - directory: "/terraform" - schedule: - interval: "daily" - package-ecosystem: "github-actions" directory: "/" schedule: interval: "daily" - - package-ecosystem: "pip" + - package-ecosystem: "devcontainers" directory: "/" schedule: interval: "daily" - - package-ecosystem: "npm" - directory: "/" - schedule: - interval: "daily" - - package-ecosystem: "gomod" + - package-ecosystem: "docker" directory: "/" schedule: interval: "daily" - - package-ecosystem: "docker" + - package-ecosystem: "pip" directory: "/" schedule: interval: "daily" diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml new file mode 100644 index 0000000..f082648 --- /dev/null +++ b/.github/workflows/build-and-test.yml @@ -0,0 +1,26 @@ +--- +name: Build and Test + +on: + pull_request: + branches: + - main + +permissions: {} + +jobs: + build-and-test: + name: Build and Test + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - name: Checkout + id: checkout + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + + - name: Build and Test + id: build_and_test + shell: bash + run: | + bash scripts/build-and-test.sh diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml index 1d2830c..637b9f1 100644 --- a/.github/workflows/dependency-review.yml +++ b/.github/workflows/dependency-review.yml @@ -3,6 +3,8 @@ name: Dependency Review on: pull_request: + branches: + - main types: - edited - opened @@ -22,7 +24,8 @@ jobs: id: checkout uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - - name: Dependency Review + - name: Dependency review + id: dependency_review uses: actions/dependency-review-action@9129d7d40b8c12c1ed0f60400d00c92d437adcce # v4.1.3 with: fail-on-severity: critical diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..274caec --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,60 @@ +--- +name: Release + +on: + push: + tags: + - "*" + +permissions: {} + +jobs: + release: + name: Release + runs-on: ubuntu-latest + permissions: + contents: read + id-token: write + packages: write + steps: + - name: Checkout + id: checkout + uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + + - name: Install cosign + id: install_cosign + uses: sigstore/cosign-installer@e1523de7571e31dbe865fd2e80c5c7c23ae71eb4 # v3.4.0 + + - name: Configure AWS Credentials + id: configure_aws_credentials + uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2 + with: + aws-region: eu-west-2 + role-to-assume: arn:aws:iam::730335344807:role/modernisation-platform-oidc-cicd + + - name: Login to Amazon ECR + id: login_ecr + uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 + with: + registries: 374269020027 + + - name: Build and Push + id: build_and_push + uses: docker/build-push-action@af5a7ed5ba88268d5278f7203fb52cd833f66d6e # v5.2.0 + with: + push: true + tags: 374269020027.dkr.ecr.eu-west-2.amazonaws.com/analytical-platform-scan-ecr-repo:${{ github.ref_name }} + + - name: Sign + id: sign + shell: bash + run: | + cosign sign --yes 374269020027.dkr.ecr.eu-west-2.amazonaws.com/analytical-platform-scan-ecr-repo@${{ steps.build_and_push.outputs.digest }} + + - name: Verify + id: verify + run: | + cosign verify \ + --certificate-oidc-issuer=https://token.actions.githubusercontent.com \ + --certificate-identity=https://github.com/ministryofjustice/analytical-platform-jml-report/.github/workflows/release.yml@refs/tags/${{ github.ref_name }} \ + 374269020027.dkr.ecr.eu-west-2.amazonaws.com/analytical-platform-scan-ecr-repo@${{ steps.build_and_push.outputs.digest }} diff --git a/.github/workflows/scan-image.yml b/.github/workflows/scan-image.yml new file mode 100644 index 0000000..8f501d9 --- /dev/null +++ b/.github/workflows/scan-image.yml @@ -0,0 +1,57 @@ +--- +name: Scan Image + +on: + pull_request: + branches: + - main + +permissions: {} + +jobs: + scan-image: + name: Scan Image + runs-on: ubuntu-latest + permissions: + contents: read + security-events: write + steps: + - name: Checkout + id: checkout + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + + - name: Build Image + id: build_image + uses: docker/build-push-action@4a13e500e55cf31b7a5d59a38ab2040ab0f42f56 # v5.1.0 + with: + push: false + load: true + tags: ingestion-scan + + - name: Scan Image + id: scan_image + uses: aquasecurity/trivy-action@062f2592684a31eb3aa050cc61e7ca1451cecd3d # v0.18.0 + with: + image-ref: ingestion-scan + exit-code: 1 + format: sarif + output: trivy-results.sarif + severity: CRITICAL + limit-severities-for-sarif: true + + - name: Scan Image (On SARIF Scan Failure) + if: failure() && steps.scan_image.outcome == 'failure' + id: scan_image_on_failure + uses: aquasecurity/trivy-action@062f2592684a31eb3aa050cc61e7ca1451cecd3d # v0.18.0 + with: + image-ref: ingestion-scan + exit-code: 1 + format: table + severity: CRITICAL + + - name: Upload SARIF + if: always() + id: upload_sarif + uses: github/codeql-action/upload-sarif@cdcdbb579706841c47f7063dda365e292e5cad7a # v2.2.7 + with: + sarif_file: trivy-results.sarif diff --git a/.github/workflows/super-linter.yml b/.github/workflows/super-linter.yml new file mode 100644 index 0000000..f68378c --- /dev/null +++ b/.github/workflows/super-linter.yml @@ -0,0 +1,36 @@ +--- +name: Super-Linter + +on: + pull_request: + branches: + - main + types: + - edited + - opened + - reopened + - synchronize + +permissions: {} + +jobs: + super-linter: + name: Super-Linter + runs-on: ubuntu-latest + permissions: + contents: read + statuses: write + steps: + - name: Checkout + id: checkout + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + fetch-depth: 0 + + - name: Run Super-Linter + id: super_linter + uses: super-linter/super-linter/slim@e0fc164bba85f4b58c6cd17ba1dfd435d01e8a06 # v6.3.0 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + LINTER_RULES_PATH: / + PYTHON_PYLINT_CONFIG_FILE: pyproject.toml diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..d4cfdcc --- /dev/null +++ b/Dockerfile @@ -0,0 +1,24 @@ +#checkov:skip=CKV_DOCKER_2: HEALTHCHECK not required - AWS Lambda does not support HEALTHCHECK +#checkov:skip=CKV_DOCKER_3: USER not required - A non-root user is used by AWS Lambda +FROM public.ecr.aws/lambda/python:3.12@sha256:1d922f123370801843aad18d0911759c55402af4d0dddb601181df4ed42b2ce2 + +LABEL org.opencontainers.image.vendor="Ministry of Justice" \ + org.opencontainers.image.authors="Analytical Platform (analytical-platform@digital.justice.gov.uk)" \ + org.opencontainers.image.title="Ingestion Scan" \ + org.opencontainers.image.description="Ingestion scan image for Analytical Platform" \ + org.opencontainers.image.url="https://github.com/ministryofjustice/analytical-platform" + +RUN microdnf update \ + && microdnf install --assumeyes \ + clamav-0.103.9-1.amzn2023.0.2.x86_64 \ + clamav-update-0.103.9-1.amzn2023.0.2.x86_64 \ + clamd-0.103.9-1.amzn2023.0.2.x86_64 \ + tar-2:1.34-1.amzn2023.0.4.x86_64 \ + && microdnf clean all + +COPY --chown=nobody:nobody --chmod=0755 src/var/task/ ${LAMBDA_TASK_ROOT} + +RUN python -m pip install --no-cache-dir --upgrade pip==24.0 \ + && python -m pip install --no-cache-dir --requirement requirements.txt + +CMD ["handler.handler"] diff --git a/README.md b/README.md index b6878d8..481b555 100644 --- a/README.md +++ b/README.md @@ -1,66 +1,60 @@ -# Ministry of Justice Template Repository +# Analytical Platform Ingestion Scan -[![repo standards badge](https://img.shields.io/endpoint?labelColor=231f20&color=005ea5&style=for-the-badge&label=MoJ%20Compliant&url=https%3A%2F%2Foperations-engineering-reports.cloud-platform.service.justice.gov.uk%2Fapi%2Fv1%2Fcompliant_public_repositories%2Fendpoint%2Ftemplate-repository&logo=)](https://operations-engineering-reports.cloud-platform.service.justice.gov.uk/public-report/template-repository) +[![repo standards badge](https://img.shields.io/endpoint?labelColor=231f20&color=005ea5&style=for-the-badge&label=MoJ%20Compliant&url=https%3A%2F%2Foperations-engineering-reports.cloud-platform.service.justice.gov.uk%2Fapi%2Fv1%2Fcompliant_public_repositories%2Fendpoint%2Fanalytical-platform-ingestion-scan&logo=)](https://operations-engineering-reports.cloud-platform.service.justice.gov.uk/public-report/analytical-platform-ingestion-scan) -This template repository equips you with the default initial files required for a Ministry of Justice GitHub repository. +This image is used in the Analytical Platform Ingestion service. It is deployed as an AWS Lambda function within the `analytical-platform-ingestion` account and is called as part of the AWS Transfer Family Server workflows. -## Included Files +## Features -The repository comes with the following preset files: +The image comes with two key features - Syncing and Scanning. -- LICENSE -- .gitignore -- CODEOWNERS -- dependabot.yml -- GitHub Actions example files -- Ministry of Justice Compliance Badge (public repositories only) +### Syncing -## Setup Instructions +- `definition_upload` pulls latest ClamAV definitions, archives them, and pushes to Amazon S3. -Once you've created your repository using this template, ensure the following steps: +- `definition_download` downloads archived ClamAV definitions from Amazon S3, and unpacks them. -### Update README +### Scanning -Edit this README.md file to document your project accurately. Take the time to create a clear, engaging, and informative README.md file. Include information like what your project does, how to install and run it, how to contribute, and any other pertinent details. +- `scan` pulls ingested object from landing bucket, scans with ClamAV. If clean, moves to processed bucket for upstream sync, or if infectected, move to quarantined bucket and send message to SNS. -### Update repository description +## Running Locally -After you've created your repository, GitHub provides a brief description field that appears on the top of your repository's main page. This is a summary that gives visitors quick insight into the project. Using this field to provide a succinct overview of your repository is highly recommended. +### Build -This description and your README.md will be one of the first things people see when they visit your repository. It's a good place to make a strong, concise first impression. Remember, this is often visible in search results on GitHub and search engines, so it's also an opportunity to help people discover your project. +```bash +docker build --platform linux/amd64 --file Dockerfile --tag analytical-platform.service.justice.gov.uk/ingestion-scan:local . +``` -### Grant Team Permissions +### Run -Assign permissions to the appropriate Ministry of Justice teams. Ensure at least one team is granted Admin permissions. Whenever possible, assign permissions to teams rather than individual users. +```bash +docker run -it --rm \ + --platform linux/amd64 \ + --hostname ingestion-scan \ + --name analytical-platform-ingestion-scan \ + analytical-platform.service.justice.gov.uk/ingestion-scan:local +``` -### Read about the GitHub repository standards +## Versions -Familiarise yourself with the Ministry of Justice GitHub Repository Standards. These standards ensure consistency, maintainability, and best practices across all our repositories. +### Lambda Image -You can find the standards [here](https://operations-engineering.service.justice.gov.uk/documentation/services/repository-standards.html). +Generally Dependabot does this, but the following command will return the digest: -Please read and understand these standards thoroughly and enable them when you feel comfortable. +```bash +docker pull --platform linux/amd64 public.ecr.aws/lambda/python:3.12 +docker image inspect --format='{{index .RepoDigests 0}}' public.ecr.aws/lambda/python:3.12 +``` -### Modify the GitHub Standards Badge +### DNF Packages -Once you've ensured that all the [GitHub Repository Standards](https://operations-engineering.service.justice.gov.uk/documentation/services/repository-standards.html) have been applied to your repository, it's time to update the Ministry of Justice (MoJ) Compliance Badge located in the README file. +To find latest DNF package versions, you can run the following: -The badge demonstrates that your repository is compliant with MoJ's standards. Please follow these [instructions](https://operations-engineering.service.justice.gov.uk/documentation/runbooks/services/add-repo-badge.html) to modify the badge URL to reflect the status of your repository correctly. +```bash +docker run -it --rm --platform linux/amd64 --entrypoint /bin/bash public.ecr.aws/lambda/python:3.12 -**Please note** the badge will not function correctly if your repository is internal or private. In this case, you may remove the badge from your README. +microdnf update -### Manage Outside Collaborators - -To add an Outside Collaborator to the repository, follow the guidelines detailed [here](https://github.com/ministryofjustice/github-collaborators). - -### Update CODEOWNERS - -(Optional) Modify the CODEOWNERS file to specify the teams or users authorized to approve pull requests. - -### Configure Dependabot - -Adapt the dependabot.yml file to match your project's [dependency manager](https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file#package-ecosystem) and to enable [automated pull requests for package updates](https://docs.github.com/en/code-security/supply-chain-security). - -### Dependency Review - -If your repository is private with no GitHub Advanced Security license, remove the `.github/workflows/dependency-review.yml` file. +microdnf repoquery ${PACKAGE} # for example clamav, clamav-update or clamd +``` diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..f8ad128 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,7 @@ +[tool.pylint.'MESSAGES CONTROL'] +max-line-length = 120 +disable = """ + import-error, + missing-function-docstring, + missing-module-docstring +""" diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..330b727 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,7 @@ +-r src/var/task/requirements.txt +autopep8==2.0.4 +black==24.2.0 +flake8==7.0.0 +isort==5.13.2 +mypy==1.9.0 +pylint==3.1.0 diff --git a/scripts/build-and-test.sh b/scripts/build-and-test.sh new file mode 100755 index 0000000..e778903 --- /dev/null +++ b/scripts/build-and-test.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +IMAGE_TAG="analytical-platform.service.justice.gov.uk/ingestion-scan:local" +CONTAINER_STRUCTURE_TEST_IMAGE="gcr.io/gcp-runtimes/container-structure-test:latest" + +if [[ "${REMOTE_CONTAINERS}" ]] && [[ "$(uname -m)" == "aarch64" ]]; then + echo "(⚠) Looks like you're running in a dev container on Apple Silicon." + echo "(⚠) This script builds linux/amd64 images which might take a long time or even fail." + export PLATFORM_FLAG="--platform linux/amd64" +elif [[ "$(uname)" == "Darwin" ]] && [[ "$(uname -m)" == "arm64" ]]; then + echo "(⚠) Looks like you're running on Apple Silicon." + echo "(⚠) This script builds linux/amd64 images which might take a long time or even fail." + export PLATFORM_FLAG="--platform linux/amd64" +else + export PLATFORM_FLAG="" +fi + +# shellcheck disable=SC2086 +# special case for PLATFORM_FLAG as it can't parse double quotes +docker build ${PLATFORM_FLAG} --file Dockerfile --tag "${IMAGE_TAG}" . + +echo "Running container structure test for [ ${IMAGE_TAG} ]" + +# shellcheck disable=SC2086 +# special case for PLATFORM_FLAG as it can't parse double quotes +docker run --rm ${PLATFORM_FLAG} \ + --volume /var/run/docker.sock:/var/run/docker.sock \ + --volume "${PWD}:/workspace" \ + --workdir /workspace \ + "${CONTAINER_STRUCTURE_TEST_IMAGE}" \ + test --image "${IMAGE_TAG}" --config "/workspace/test/container-structure-test.yml" diff --git a/src/var/task/freshclam.conf b/src/var/task/freshclam.conf new file mode 100644 index 0000000..a927a80 --- /dev/null +++ b/src/var/task/freshclam.conf @@ -0,0 +1,4 @@ +CompressLocalDatabase yes +DatabaseDirectory /tmp/clamav/database +DatabaseMirror database.clamav.net +ScriptedUpdates no diff --git a/src/var/task/handler.py b/src/var/task/handler.py new file mode 100644 index 0000000..b17902c --- /dev/null +++ b/src/var/task/handler.py @@ -0,0 +1,205 @@ +import json +import os +import subprocess +from datetime import datetime + +import boto3 +import botocore.exceptions + +s3_client = boto3.client("s3") +sns_client = boto3.client("sns") +scan_time = datetime.now().isoformat() + + +def run_command(command): + result = subprocess.run( # pylint: disable=subprocess-run-check + command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + return ( + result.returncode, + result.stdout.decode("utf-8"), + result.stderr.decode("utf-8"), + ) + + +def definition_upload(): + try: + # Create the directory to store the definitions + os.makedirs("/tmp/clamav/database", exist_ok=True) + + # Pull the latest definitions + user_id = run_command("id --user")[1].strip() + clamav_config = os.environ.get("LAMBDA_TASK_ROOT", "") + "/freshclam.conf" + run_command( + f'freshclam --no-warnings --user {user_id} --config-file="{clamav_config}"' + ) + + # Archive the definitions + run_command( + "tar --create --gzip --verbose --file=/tmp/clamav/clamav.tar.gz -C /tmp/clamav/database ." + ) + + # Upload the definitions to S3 + bucket_name = os.environ.get("CLAMAV_DEFINITON_BUCKET_NAME") + if not bucket_name: + raise ValueError( + "CLAMAV_DEFINITON_BUCKET_NAME environment variable not set." + ) + s3_client.upload_file("/tmp/clamav/clamav.tar.gz", bucket_name, "clamav.tar.gz") + except botocore.exceptions.ClientError as e: + print(f"Failed to upload ClamAV definitions: {e}") + + +def definition_download(): + try: + # Create the directory to store the definitions + os.makedirs("/tmp/clamav/database", exist_ok=True) + + # Download the definitions from S3 + bucket_name = os.environ.get("CLAMAV_DEFINITON_BUCKET_NAME") + if not bucket_name: + raise ValueError( + "CLAMAV_DEFINITON_BUCKET_NAME environment variable not set." + ) + s3_client.download_file( + bucket_name, "clamav.tar.gz", "/tmp/clamav/clamav.tar.gz" + ) + print("Successfully downloaded ClamAV definitions from S3.") + + # Extract the definitions + run_command( + "tar --extract --gzip --verbose --file=/tmp/clamav/clamav.tar.gz -C /tmp/clamav/database" + ) + print("Successfully extracted ClamAV definitions.") + except botocore.exceptions.ClientError as e: + print(f"Failed to download or extract ClamAV definitions: {e}") + + +def scan(event): + # event_json = json.loads(event_data) + object_key = event["Records"][0]["s3"]["object"]["key"] + object_name = object_key.split("/")[-1] + + # Create the directory for running the scan + os.makedirs("/tmp/clamav/scan", exist_ok=True) + + # Download the file to scan + landing_bucket_name = os.environ.get("LANDING_BUCKET_NAME") + if not landing_bucket_name: + raise ValueError("LANDING_BUCKET_NAME environment variable not set.") + s3_client.download_file( + landing_bucket_name, object_key, f"/tmp/clamav/scan/{object_name}" + ) + + # Scan the test file + exit_code, stdout, _ = run_command( + f"clamscan --database=/tmp/clamav/database /tmp/clamav/scan/{object_name}" + ) + print(stdout) + if exit_code == 0: + print("Scan result: Clean") + move_to_processed(object_key) + else: + print("Scan result: Infected") + move_to_quarantine(object_key) + + +def move_to_processed(object_key): + try: + processed_bucket_name = os.environ.get("PROCESSED_BUCKET_NAME") + if not processed_bucket_name: + raise ValueError("PROCESSED_BUCKET_NAME environment variable not set.") + # Move the file to the processed bucket + copy_source = {"Bucket": os.environ["LANDING_BUCKET_NAME"], "Key": object_key} + s3_client.copy_object( + Bucket=processed_bucket_name, CopySource=copy_source, Key=object_key + ) + s3_client.delete_object( + Bucket=os.environ["LANDING_BUCKET_NAME"], Key=object_key + ) + + # Tag the file with the scan result + s3_client.put_object_tagging( + Bucket=processed_bucket_name, + Key=object_key, + Tagging={ + "TagSet": [ + {"Key": "scan-result", "Value": "clean"}, + {"Key": "scan-time", "Value": scan_time}, + ] + }, + ) + print("File moved to processed and tagged") + except botocore.exceptions.ClientError as e: + print(f"Failed to move file to processed: {e}") + + +def move_to_quarantine(object_key): + try: + quarantine_bucket_name = os.environ.get("QUARANTINE_BUCKET_NAME") + if not quarantine_bucket_name: + raise ValueError("QUARANTINE_BUCKET_NAME environment variable not set.") + # Move the file to the quarantine bucket + copy_source = {"Bucket": os.environ["LANDING_BUCKET_NAME"], "Key": object_key} + s3_client.copy_object( + Bucket=quarantine_bucket_name, CopySource=copy_source, Key=object_key + ) + s3_client.delete_object( + Bucket=os.environ["LANDING_BUCKET_NAME"], Key=object_key + ) + + # Tag the file with the scan result + s3_client.put_object_tagging( + Bucket=quarantine_bucket_name, + Key=object_key, + Tagging={ + "TagSet": [ + {"Key": "scan-result", "Value": "infected"}, + {"Key": "scan-time", "Value": scan_time}, + ] + }, + ) + print("File moved to quarantine and tagged") + except botocore.exceptions.ClientError as e: + print(f"Failed to move file to quarantine: {e}") + + try: + send_sns_message(message=f"quarantined,{object_key},{scan_time}") + except botocore.exceptions.ClientError as e: + print(f"Failed to send SNS message: {e}") + + +def send_sns_message(message): + topic_arn = os.environ.get("SNS_TOPIC_ARN") + if not topic_arn: + raise ValueError("SNS_TOPIC_ARN environment variable not set.") + sns_client.publish(TopicArn=topic_arn, Message=message) + + +def handler(event, context): # pylint: disable=unused-argument + print("Received event:", event) + try: + mode = os.environ.get("MODE") + if mode == "definition-upload": + definition_upload() + elif mode == "scan": + definition_download() + scan(event) + else: + raise ValueError(f"Invalid mode: {mode}") + except ValueError as e: + print(f"Configuration Error: {e}") + return {"statusCode": 400, "body": json.dumps({"message": str(e)})} + except botocore.exceptions.ClientError as e: + print(f"AWS Client Error: {e}") + return {"statusCode": 500, "body": json.dumps({"message": "AWS service error"})} + except Exception as e: # pylint: disable=broad-except + print(f"Unexpected Error: {type(e).__name__}, {e}") + return { + "statusCode": 500, + "body": json.dumps({"message": "An unexpected error occurred"}), + } + return { + "statusCode": 200, + "body": json.dumps({"message": "Operation completed successfully"}), + } diff --git a/src/var/task/requirements.txt b/src/var/task/requirements.txt new file mode 100644 index 0000000..aa60a70 --- /dev/null +++ b/src/var/task/requirements.txt @@ -0,0 +1,2 @@ +boto3==1.34.59 +botocore==1.34.59 diff --git a/test/container-structure-test.yml b/test/container-structure-test.yml new file mode 100644 index 0000000..982c367 --- /dev/null +++ b/test/container-structure-test.yml @@ -0,0 +1,33 @@ +--- +schemaVersion: 2.0.0 + +commandTests: + - name: "clamscan" + command: "clamscan" + args: ["--version"] + expectedOutput: ["ClamAV 0.103.9"] + + - name: "freshclam" + command: "freshclam" + args: ["--version"] + expectedOutput: ["ClamAV 0.103.9"] + + - name: "tar" + command: "tar" + args: ["--version"] + expectedOutput: ["1.34"] + + - name: "pip" + command: "pip" + args: ["--version"] + expectedOutput: ["pip 24.0"] + +fileExistenceTests: + - name: "freshclam.conf" + path: "/var/task/freshclam.conf" + + - name: "handler.py" + path: "/var/task/handler.py" + + - name: "requirements.txt" + path: "/var/task/requirements.txt"