diff --git a/.github/workflows/manual-build.yml b/.github/workflows/manual-build.yml index dbbf15e..d5f964c 100644 --- a/.github/workflows/manual-build.yml +++ b/.github/workflows/manual-build.yml @@ -9,7 +9,7 @@ on: default: 'linux/amd64,linux/arm64/v8' jobs: build-push: - uses: kbase/.github/.github/workflows/reusable_build-push.yml@develop + uses: kbase/.github/.github/workflows/reusable_build-push.yml@main with: name: '${{ github.event.repository.name }}-develop' tags: br-${{ github.ref_name }} diff --git a/.github/workflows/pr_build.yml b/.github/workflows/pr_build.yaml similarity index 83% rename from .github/workflows/pr_build.yml rename to .github/workflows/pr_build.yaml index e826f2b..e7d658d 100644 --- a/.github/workflows/pr_build.yml +++ b/.github/workflows/pr_build.yaml @@ -14,35 +14,35 @@ on: jobs: build-develop-open: if: github.base_ref == 'develop' && github.event.pull_request.merged == false - uses: kbase/.github/.github/workflows/reusable_build.yml@develop + uses: kbase/.github/.github/workflows/reusable_build.yml@main with: - platforms: "linux/amd64,linux/arm64/v8" + platforms: 'linux/amd64,linux/arm64/v8' secrets: inherit build-develop-merge: if: github.base_ref == 'develop' && github.event.pull_request.merged == true - uses: kbase/.github/.github/workflows/reusable_build-push.yml@develop + uses: kbase/.github/.github/workflows/reusable_build-push.yml@main with: name: '${{ github.event.repository.name }}-develop' tags: pr-${{ github.event.number }},latest - platforms: "linux/amd64,linux/arm64/v8" + platforms: 'linux/amd64,linux/arm64/v8' secrets: inherit build-main-open: if: (github.base_ref == 'main' || github.base_ref == 'master') && github.event.pull_request.merged == false - uses: kbase/.github/.github/workflows/reusable_build-push.yml@develop + uses: kbase/.github/.github/workflows/reusable_build-push.yml@main with: name: '${{ github.event.repository.name }}' tags: pr-${{ github.event.number }} - platforms: "linux/amd64,linux/arm64/v8" + platforms: 'linux/amd64,linux/arm64/v8' secrets: inherit build-main-merge: if: (github.base_ref == 'main' || github.base_ref == 'master') && github.event.pull_request.merged == true - uses: kbase/.github/.github/workflows/reusable_build-push.yml@develop + uses: kbase/.github/.github/workflows/reusable_build-push.yml@main with: name: '${{ github.event.repository.name }}' tags: pr-${{ github.event.number }},latest-rc - platforms: "linux/amd64,linux/arm64/v8" + platforms: 'linux/amd64,linux/arm64/v8' secrets: inherit trivy-scans: if: (github.base_ref == 'develop' || github.base_ref == 'main' || github.base_ref == 'master' ) && github.event.pull_request.merged == false - uses: kbase/.github/.github/workflows/reusable_trivy-scans.yml@develop + uses: kbase/.github/.github/workflows/reusable_trivy-scans.yml@main secrets: inherit diff --git a/.github/workflows/release-main.yml b/.github/workflows/release-main.yml index be252a3..4769f02 100644 --- a/.github/workflows/release-main.yml +++ b/.github/workflows/release-main.yml @@ -8,19 +8,19 @@ on: types: [ published ] jobs: check-source-branch: - uses: kbase/.github/.github/workflows/reusable_validate-branch.yml@develop + uses: kbase/.github/.github/workflows/reusable_validate-branch.yml@main with: build_branch: '${{ github.event.release.target_commitish }}' validate-release-tag: needs: check-source-branch - uses: kbase/.github/.github/workflows/reusable_validate-release-tag.yml@develop + uses: kbase/.github/.github/workflows/reusable_validate-release-tag.yml@main with: release_tag: '${{ github.event.release.tag_name }}' build-push: needs: validate-release-tag - uses: kbase/.github/.github/workflows/reusable_build-push.yml@develop + uses: kbase/.github/.github/workflows/reusable_build-push.yml@main with: name: '${{ github.event.repository.name }}' tags: '${{ github.event.release.tag_name }},latest' - platforms: "linux/amd64,linux/arm64/v8" + platforms: 'linux/amd64,linux/arm64/v8' secrets: inherit diff --git a/app/README.md b/app/README.md index 1018cda..c7dc47b 100644 --- a/app/README.md +++ b/app/README.md @@ -1,45 +1,39 @@ -## Zip2Cloud - -A robust zip & upload utility for sending archives to any destination supported by [rclone](https://rclone.org/). - -### Features - -- Only uploads _completed_ archives -- Compares local & remote files with md5 sums -- Only deletes local files once they have been successfully uploaded -- Allows keeping an arbitrary amount of zipped & unzipped backups locally for faster restore - - Script only zips & uploads files that are missing from the remote location - -[//]: # (- Allows mixing backup files with other data) - -[//]: # ( - Only zips folders under the `$DUMP_BASE` directory with a date-based name e.g. `2024-04-01`) - -[//]: # (- Notifies on completion or error via Slack) - -### Operation of `zip2cloud` - -1. Cleans up old zip files and backup dumps based on the retention period set in the environment variables. -2. Retrieves the list of remote backups and their MD5 checksums from the remote S3 bucket. -3. Checks database dumps for completion by looking for a `dump_complete.txt` file in the dump's top-level directory. -4. Compresses new database dumps that do not have a corresponding MD5 file in the remote S3 bucket. -5. Compares the MD5 checksums of local and remote files. - 1. If a local file does not have a matching MD5 checksum in the remote S3 bucket, it is added to the upload list. -6. If there's an MD5 mismatch between a local and a remote file, the script increments the filename of the local file and adds it to the upload list. -7. Finally, it syncs all the files in the upload list to the remote S3 bucket using rclone. - -### Variables - -| Variable | Description | Default | -|-----------------|---------------------------------------------------|---------| -| `BUCKET` | The bucket to store the backups | | -| `BUCKET_PATH` | The path within the bucket to store the backups | | -| `DUMP_BASE` | The base directory for backup dumps | `/dump` | -| `DUMP_RETENTION`| The number of days to keep uncompressed backups locally | | -| `REMOTE` | The remote location to sync backups to | | -| `SECRET` | The encryption key for 7zip | | -| `SLACK_CHANNEL` | The slack channel to send notifications to | | -| `SLACK_WEBHOOK` | The webhook URL for slack notifications | | -| `ZIP_BASE` | The base filename, minus date, for the compressed backups | | -| `ZIP_DIR` | The directory to store all compressed backups | `/zip` | -| `ZIP_RETENTION` | The number of days to keep compressed backups locally | | \ No newline at end of file +# zip2cloud Application + +The `zip2cloud` application is a shell script that manages backup dumps, compresses them into zip files, compares them with existing backups in remote storage, and uploads any new or updated backups to the remote storage. + +## Environment Variables + +The script uses the following environment variables: + +| Variable | Description | Default | +|--------------------|-----------------------------------------------------------------------|----------------------------------| +| `COMPRESSION_LEVEL`| Compression level for 7z files | 0 | +| `DELETE_DUMP` | Optionally deletes exports under `$DUMP_BASE` when done compressing | _Unused_ | +| `DUMP_BASE` | Base directory for dumps | `/dump/full_backup` | +| `DUMP_RETENTION` | Retention policy for dumps | 3 | +| `REMOTE` | Remote storage details | `remote:${BUCKET}/${BUCKETPATH}` | +| `SECRET` | Encryption key for 7z files | | +| `SLACK_CHANNEL` | Slack channel for notifications | _Unused_ | +| `SLACK_WEBHOOK` | Slack webhook for notifications | _Unused_ | +| `ZIP_BASE` | Base name for zip files | `backup_full` | +| `ZIP_DIR` | Directory for zip files | `/zip` | +| `ZIP_RETENTION` | Retention policy for zip files | 4 | + +## Workflow + +The script performs the following steps: + +1. **Cleanup**: Removes old zip files and backup dumps based on the retention policies set in the environment variables. +2. **Checksumming**: Retrieves a list of remote backups and downloads the MD5 checksums for each remote backup into a temporary directory. It then compares the checksums of local zip files against the remote MD5 checksums, adding any files that don't match to an upload list. +3. **Create Upload List**: Verifies and updates the list of files to upload. For each file in the upload list, it compares the local and remote MD5 checksums. If there's a mismatch, it increments the filename and adds it to the final upload list. This incrementing process continues until it finds a filename that doesn't conflict with existing files in the remote storage. +4. **Upload**: Uploads the files in the final upload list to the remote storage using the `rclone` command. + +## Dockerfile + +The Dockerfile for this application is based on the `alpine:latest` image and includes the necessary binaries and files for the `zip2cloud` script. The Dockerfile uses a multi-stage build process to keep the final image size small. + +## GitHub Actions + +The application uses GitHub Actions for continuous integration. The workflows are defined in the `.github/workflows/` directory and include steps for building, tagging, and pushing Docker images, as well as scanning for vulnerabilities with Trivy. diff --git a/app/create-test-dumps.sh b/app/create-test-dumps.sh new file mode 100755 index 0000000..0f3e2a4 --- /dev/null +++ b/app/create-test-dumps.sh @@ -0,0 +1,28 @@ +#!/bin/sh + +# Variables +DUMP_BASE=${DUMP_BASE:-/dump/full_backup} + +# Function to generate random string +generate_random_string() { + cat /dev/urandom | tr -dc 'a-zA-Z0-9' | fold -w 10 | head -n 1 +} + +# Create directories and files +for i in $(seq 1 5); do + # Generate a date string for directory name + dir_date=$(date -d "$i days ago" +%Y-%m-%d) + dir_path="${DUMP_BASE}/${dir_date}" + # Create directory if it doesn't exist + if [ ! -d "$dir_path" ]; then + mkdir -p "$dir_path" + fi + # Create files with random content + for j in $(seq 1 5); do + file_path="${dir_path}/file_${j}.txt" + # Only create file if it doesn't exist + if [ ! -f "$file_path" ]; then + echo "$(generate_random_string)" > "$file_path" + fi + done +done \ No newline at end of file diff --git a/app/zip2cloud b/app/zip2cloud index babf892..dd3fe4b 100755 --- a/app/zip2cloud +++ b/app/zip2cloud @@ -5,6 +5,7 @@ COMPRESSION_LEVEL=${COMPRESSION_LEVEL:-0} # Set to 0 if the db dumps are already DELETE_DUMP=${DELETE_DUMP:-''} DUMP_BASE=${DUMP_BASE:-/dump/full_backup} DUMP_RETENTION=${DUMP_RETENTION:-3} +ENABLE_UPLOAD=${ENABLE_UPLOAD:-true} REMOTE=${REMOTE:-remote:${BUCKET}/${BUCKETPATH}} SECRET=${SECRET:-`cat /run/secrets/encryption_key`} SLACK_CHANNEL=${SLACK_CHANNEL:-''} @@ -32,6 +33,9 @@ find ${DUMP_BASE} -type d -name "[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]" -pr ### End Cleanup +### Checksumming + +#### Get Checksums # Get list of remote backups remote_files=$(rclone ls remote:${BUCKET}/${BUCKETPATH} | grep 7z | awk '{print $2}' | rev | cut -d. -f2- | rev) @@ -41,6 +45,7 @@ for file in $remote_files; do rclone md5sum remote:${BUCKET}/${BUCKETPATH}/$file.7z | awk '{print $1}' > ${ZIP_DIR}/tmp_md5/$file.md5 done +#### Verify Local Zips Vs Checksums # Get all exports from DUMP_BASE for DUMP_DIR in $(ls -d ${DUMP_BASE}/*/); do @@ -67,6 +72,7 @@ done uploads="" cd ${ZIP_DIR} || exit for file in ${ZIP_DIR}/*.7z; do + echo "Comparing $file with all remote md5s" # Get the base name of the file without extension base_name=$(basename "$file" .7z) local_md5=$(md5sum "$file" | awk '{print $1}') @@ -75,8 +81,10 @@ for file in ${ZIP_DIR}/*.7z; do match_found=0 for remote_md5_file in ${ZIP_DIR}/tmp_md5/*.md5; do remote_md5=$(cat "$remote_md5_file") + echo "Comparing $file with $remote_md5_file" if [ "$local_md5" = "$remote_md5" ]; then match_found=1 + echo "$file checksum matches $remote_md5_file, skipping" break fi done @@ -88,41 +96,57 @@ done echo "Current uploads candidates are: $uploads" +### End Checksumming + +### Create Upload List + ## Verify & update list of files to upload final_uploads="" cd ${ZIP_DIR} || exit for file in ${uploads}; do # Get the base name of the file without extension base_name=$(basename "$file" .7z) - # Compare local and remote md5 - remote_md5=$(cat "${ZIP_DIR}/tmp_md5/${base_name}.md5") - local_md5=$(cat "${ZIP_DIR}/${base_name}.md5") - if [ "$local_md5" != "$remote_md5" ]; then - echo "MD5 mismatch for file $file. Incrementing filename and adding to uploads list." - # Extract the last character of the base name - last_char=${base_name: -1} - # Check if the last character is a letter - if [[ $last_char =~ [a-y] ]]; then - # If it's a letter, increment it - next_char=$(echo "$last_char" | tr "a-y" "b-z") - new_base_name=${base_name%?}$next_char - elif [[ $last_char == 'z' ]]; then - # If it's 'z', replace it with 'a' and append 'a' - new_base_name=${base_name%?}aa - else - # If it's not a letter, append 'a' - new_base_name=${base_name}a - fi - # Rename the file - mv "$file" "${ZIP_DIR}/${new_base_name}.7z" - # Add the renamed file to the uploads list - final_uploads="$final_uploads ${ZIP_DIR}/${new_base_name}.7z" - fi + # Compare local and remote md5 + remote_md5=$(cat "${ZIP_DIR}/tmp_md5/${base_name}.md5") + local_md5=$(cat "${ZIP_DIR}/${base_name}.md5") + if [ "$local_md5" != "$remote_md5" ]; then + echo "MD5 mismatch for file $file. Incrementing filename and adding to uploads list." + # Extract the last character of the base name + last_char=${base_name: -1} + # Check if the last character is a letter + while [[ -f "${ZIP_DIR}/tmp_md5/${base_name}.md5" ]]; do + if [[ $last_char =~ [a-y] ]]; then + # If it's a letter, increment it + next_char=$(echo "$last_char" | tr "a-y" "b-z") + base_name=${base_name%?}$next_char + elif [[ $last_char == 'z' ]]; then + # If it's 'z', replace it with 'a' and increment the preceding character + second_last_char=${base_name: -2:1} + if [[ $second_last_char =~ [a-y] ]]; then + next_char=$(echo "$second_last_char" | tr "a-y" "b-z") + base_name=${base_name%??}$next_char"a" + elif [[ $second_last_char == 'z' ]]; then + base_name=${base_name%??}"aa" + else + base_name=${base_name%?}"a" + fi + else + # If it's not a letter, append 'a' + base_name=${base_name}a + fi + last_char=${base_name: -1} + done + # Rename the file + mv "$file" "${ZIP_DIR}/${base_name}.7z" + # Add the renamed file to the uploads list + final_uploads="$final_uploads ${ZIP_DIR}/${base_name}.7z" + fi done - echo "Final uploads: $final_uploads" +### End Create Upload List +### Upload # Before running rclone #for file in "${uploads[@]}"; do for file in ${final_uploads}; do @@ -135,8 +159,10 @@ done ## Sync All Resulting Files (in list!) -cd ${ZIP_DIR} || exit -for file in ${final_uploads}; do - echo "RClone-ing ${file} to GCP ${REMOTE}" - /bin/rclone sync -v "$file" ${REMOTE}/ -done +if [ "$ENABLE_UPLOAD" = true ]; then + for file in ${final_uploads}; do + echo "RClone-ing ${file} to GCP ${REMOTE}" + /usr/bin/rclone sync -v "$file" ${REMOTE}/ + done +fi +### End Upload