-
Notifications
You must be signed in to change notification settings - Fork 53
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #190 from dup05/pipeline_performance
Automated Performance testing framework
- Loading branch information
Showing
10 changed files
with
713 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
name: "Cleanup resources" | ||
description: "Cleanup BQ and Dataflow job" | ||
inputs: | ||
job_id: | ||
description: "JobId" | ||
required: false | ||
project_id: | ||
description: "project_id" | ||
required: true | ||
dataset: | ||
description: "dataset" | ||
required: true | ||
input_gcs_bucket: | ||
description: "Bucket with run time created files" | ||
required: true | ||
job_type: | ||
description: "Batch/Streaming" | ||
required: true | ||
|
||
runs: | ||
using: "composite" | ||
steps: | ||
- name: Cleanup BQ Tables | ||
shell: bash | ||
run: bq rm -r -f -d ${{inputs.project_id}}:${{inputs.dataset}} | ||
|
||
- name: Cleanup GCS files | ||
if: always() && inputs.job_type == 'streaming' | ||
shell: bash | ||
run: gcloud storage rm gs://${{inputs.input_gcs_bucket}}/* | ||
|
||
- name: Delete pub/sub notification config | ||
if: always() && inputs.job_type == 'streaming' | ||
shell: bash | ||
run: gsutil notification delete gs://${{inputs.input_gcs_bucket}} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
[ | ||
{ | ||
"name": "Streaming1_csv", | ||
"type": "streaming", | ||
"file_type": "CSV", | ||
"file_size": "100MB/min", | ||
"gcs_file_path": "gs://input_load_test_streaming_job/*.csv", | ||
"deid_template": "projects/dlp-dataflow-load-test/deidentifyTemplates/dlp-demo-deid-latest-1706594483019", | ||
"raw_file_pattern": "gs://input_dlp_load_test_2/largecsv100MB.csv" | ||
}, | ||
{ | ||
"name": "Streaming2_csv", | ||
"type": "streaming", | ||
"file_type": "CSV", | ||
"file_size": "500MB/min", | ||
"gcs_file_path": "gs://input_load_test_streaming_job/*.csv", | ||
"deid_template": "projects/dlp-dataflow-load-test/deidentifyTemplates/dlp-demo-deid-latest-1706594483019", | ||
"raw_file_pattern": "gs://input_dlp_load_test_2/largecsv500MB.csv" | ||
}, | ||
{ | ||
"name": "Batch1_csv", | ||
"type": "batch", | ||
"file_type": "CSV", | ||
"file_size": "500MB", | ||
"gcs_file_path": "gs://input_dlp_load_test_2/largecsv500MB.csv", | ||
"deid_template": "projects/dlp-dataflow-load-test/deidentifyTemplates/dlp-demo-deid-latest-1706594483019" | ||
|
||
}, | ||
{ | ||
"name": "Batch2_csv", | ||
"type": "batch", | ||
"file_type": "CSV", | ||
"file_size": "1GB", | ||
"gcs_file_path": "gs://input_dlp_load_test_2/largecsv1GB.csv", | ||
"deid_template": "projects/dlp-dataflow-load-test/deidentifyTemplates/dlp-demo-deid-latest-1706594483019" | ||
|
||
}, | ||
{ | ||
"name": "Batch3_csv", | ||
"type": "batch", | ||
"file_type": "CSV", | ||
"file_size": "2GB", | ||
"gcs_file_path": "gs://input_dlp_load_test_2/largecsv2GB.csv", | ||
"deid_template": "projects/dlp-dataflow-load-test/deidentifyTemplates/dlp-demo-deid-latest-1706594483019" | ||
}, | ||
{ | ||
"name": "Batch1_avro", | ||
"type": "batch", | ||
"file_type": "AVRO", | ||
"file_size": "500MB", | ||
"gcs_file_path": "gs://input_dlp_load_test_2/largeavro500MB.avro", | ||
"deid_template": "projects/dlp-dataflow-load-test/deidentifyTemplates/dlp-demo-deid-latest-1706594483019" | ||
|
||
}, | ||
{ | ||
"name": "Batch2_avro", | ||
"type": "batch", | ||
"file_type": "AVRO", | ||
"file_size": "750MB", | ||
"gcs_file_path": "gs://input_dlp_load_test_2/largeavro750MB.avro", | ||
"deid_template": "projects/dlp-dataflow-load-test/deidentifyTemplates/dlp-demo-deid-latest-1706594483019" | ||
}, | ||
{ | ||
"name": "Batch3_avro", | ||
"type": "batch", | ||
"file_type": "AVRO", | ||
"file_size": "1500MB", | ||
"gcs_file_path": "gs://input_dlp_load_test_2/largeavro1500MB.avro", | ||
"deid_template": "projects/dlp-dataflow-load-test/deidentifyTemplates/dlp-demo-deid-latest-1706594483019" | ||
} | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
name: "Execute copy workflow" | ||
description: "Copies files from raw bucket to specified input bucket" | ||
|
||
inputs: | ||
raw_bucket: | ||
description: "GCS Raw bucket name" | ||
required: true | ||
raw_file_pattern: | ||
description: "File name pattern" | ||
required: true | ||
input_gcs_bucket: | ||
description: "GCS bucket name" | ||
required: true | ||
job_id: | ||
description: "Job ID" | ||
required: true | ||
workflow_name: | ||
description: "Workflow name" | ||
required: true | ||
region: | ||
description: "Region" | ||
required: true | ||
|
||
|
||
runs: | ||
using: "composite" | ||
steps: | ||
- name: Execute the workflow | ||
shell: bash | ||
run: | | ||
raw_file_pattern=$(echo "${{inputs.raw_file_pattern}}" | awk -F "/" '{print $NF}') | ||
raw_bucket=$(echo "${{inputs.raw_file_pattern}}" | awk -F "/" '{print $3}') | ||
not_finished=true | ||
num_executions=1 | ||
while [ $num_executions -le 10 ]; | ||
do | ||
echo "Executing workflow: $num_executions" | ||
gcloud workflows run ${{inputs.workflow_name}} \ | ||
--call-log-level=log-errors-only \ | ||
--data="{\"input_bucket\": \"${{inputs.input_gcs_bucket}}\",\"raw_bucket\": \"$raw_bucket\",\"source_file\": \"$raw_file_pattern\"}" | ||
num_executions=$((num_executions+1)) | ||
sleep 60s | ||
done | ||
- name: Drain the pipeline | ||
shell: bash | ||
run: | | ||
gcloud dataflow jobs drain ${{inputs.job_id}} --region ${{inputs.region}} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
name: "Fetch job metrics" | ||
description: "Fetch the metrics of Dataflow job" | ||
inputs: | ||
job_id: | ||
description: "JobId" | ||
required: true | ||
project_id: | ||
description: "gcp project id" | ||
required: true | ||
test_uuid: | ||
description: "test uuid" | ||
required: true | ||
test_name: | ||
description: "Test name" | ||
required: true | ||
test_details: | ||
description: "Test configuration details" | ||
required: true | ||
|
||
|
||
runs: | ||
using: "composite" | ||
steps: | ||
- name: Execute script | ||
shell: bash | ||
run: python3 .github/workflows/scripts/fetchJobMetrics.py ${{inputs.project_id}} ${{inputs.job_id}} ${{inputs.test_uuid}} ${{inputs.test_name}} '${{inputs.test_details}}' | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
name: Performance testing | ||
|
||
on: | ||
workflow_dispatch: | ||
inputs: | ||
test_config_json: | ||
description: test configs | ||
type: String | ||
|
||
|
||
env: | ||
PROJECT_ID: "dlp-dataflow-load-test" | ||
REGION: "us-central1" | ||
INSPECT_TEMPLATE: "projects/dlp-dataflow-load-test/inspectTemplates/dlp-demo-inspect-latest-1706594483019" | ||
DEID_TEMPLATE: "projects/dlp-dataflow-load-test/deidentifyTemplates/dlp-demo-deid-latest-1706594483019" | ||
PUB_SUB_TOPIC: "projects/dlp-dataflow-load-test/topics/load_test_pub_sub_topic" | ||
CLOUD_WORKFLOW: "generate_files_workflow" | ||
|
||
|
||
jobs: | ||
generate-uuid: | ||
runs-on: ubuntu-latest | ||
|
||
timeout-minutes: 5 | ||
|
||
outputs: | ||
uuid: ${{ steps.gen-uuid.outputs.uuid }} | ||
|
||
steps: | ||
- name: Generate UUID for workflow | ||
id: gen-uuid | ||
run: | | ||
new_uuid=$(uuidgen) | ||
modified_uuid=$(echo "$new_uuid" | cut -c1-8 ) | ||
echo "uuid=$modified_uuid" >> "$GITHUB_OUTPUT" | ||
pre-processing: | ||
needs: generate-uuid | ||
runs-on: [self-hosted, load-testing] | ||
timeout-minutes: 5 | ||
outputs: | ||
matrix: ${{ steps.set-matrix.outputs.matrix }} | ||
steps: | ||
- uses: actions/checkout@v2 | ||
|
||
- name: Read test details | ||
id: set-matrix | ||
run: | | ||
matrix=$(jq -c . < .github/workflows/configs/tests_config.json) | ||
if [[ "${{github.event.inputs.test_config_json}}" != "" ]]; then | ||
matrix=$(echo '${{github.event.inputs.test_config_json}}' | jq .) | ||
fi | ||
echo "matrix={\"include\":$(echo $matrix)}" >> $GITHUB_OUTPUT | ||
echo $matrix | ||
run-test: | ||
needs: | ||
- generate-uuid | ||
- pre-processing | ||
|
||
runs-on: [self-hosted, load-testing] | ||
continue-on-error: true | ||
strategy: | ||
max-parallel: 1 | ||
matrix: ${{ fromJSON(needs.pre-processing.outputs.matrix) }} | ||
|
||
steps: | ||
- name: Set job parameters | ||
id: set-job-params | ||
run: | | ||
uuid=${{needs.generate-uuid.outputs.uuid}} | ||
test_name=$(echo "${{matrix.name}}" | tr '_' '-') | ||
echo "job_name=load-test-${{needs.generate-uuid.outputs.uuid}}-test-$test_name" >> $GITHUB_OUTPUT | ||
echo "dataset=dataset_${{needs.generate-uuid.outputs.uuid}}_${{matrix.name}}" >> $GITHUB_OUTPUT | ||
echo "Test details: ${{matrix.name}}" | ||
echo "job_name=load-test-${{needs.generate-uuid.outputs.uuid}}-test-$test_name" | ||
echo "dataset=dataset_${{needs.generate-uuid.outputs.uuid}}_${{matrix.name}}" | ||
input_gcs_bucket=$(echo "${{ matrix.gcs_file_path }}" | awk -F "/" '{print $3}') | ||
echo "input_gcs_bucket=$input_gcs_bucket" >> $GITHUB_OUTPUT | ||
- name: Submit dataflow job | ||
id: submit-dataflow-job | ||
uses: ./.github/workflows/submit-dataflow-job | ||
with: | ||
project_id: ${{env.PROJECT_ID}} | ||
input_gcs_bucket: ${{ steps.set-job-params.outputs.input_gcs_bucket }} | ||
gcs_file_path: ${{ matrix.gcs_file_path }} | ||
dataset: ${{ steps.set-job-params.outputs.dataset }} | ||
inspect_template: ${{env.INSPECT_TEMPLATE}} | ||
deid_template: ${{ matrix.deid_template }} | ||
job_name: ${{steps.set-job-params.outputs.job_name}} | ||
job_type: ${{ matrix.type }} | ||
gcs_notification_topic: ${{env.PUB_SUB_TOPIC}} | ||
|
||
- name: execute copy files workflow for streaming jobs | ||
id: copy-files | ||
if: always() && matrix.type == 'streaming' | ||
uses: ./.github/workflows/execute-copy-workflow | ||
with: | ||
raw_bucket: ${{ matrix.source_file_bucket }} | ||
raw_file_pattern: ${{ matrix.raw_file_pattern }} | ||
input_gcs_bucket: ${{ steps.set-job-params.outputs.input_gcs_bucket }} | ||
job_id: ${{steps.submit-dataflow-job.outputs.job_id}} | ||
workflow_name: ${{env.CLOUD_WORKFLOW}} | ||
region: ${{env.REGION}} | ||
|
||
- name: Poll till job finishes | ||
uses: ./.github/workflows/poll-job | ||
with: | ||
job_id: ${{steps.submit-dataflow-job.outputs.job_id}} | ||
region: ${{env.REGION}} | ||
|
||
- name: Fetch metrics | ||
uses: ./.github/workflows/fetch-metrics | ||
with: | ||
job_id: ${{steps.submit-dataflow-job.outputs.job_id}} | ||
project_id: ${{env.PROJECT_ID}} | ||
test_uuid: ${{needs.generate-uuid.outputs.uuid}} | ||
test_name: ${{ matrix.name }} | ||
test_details: ${{ toJSON(matrix) }} | ||
|
||
- name: Cleanup | ||
if: always() | ||
uses: ./.github/workflows/cleanup | ||
with: | ||
project_id: ${{env.PROJECT_ID}} | ||
job_id: ${{steps.submit-dataflow-job.outputs.job_id}} | ||
dataset: ${{steps.set-job-params.outputs.dataset}} | ||
input_gcs_bucket: ${{ steps.set-job-params.outputs.input_gcs_bucket }} | ||
job_type: ${{ matrix.type }} | ||
|
||
publish-test-results: | ||
needs: | ||
- generate-uuid | ||
- pre-processing | ||
- run-test | ||
runs-on: [self-hosted, load-testing] | ||
steps: | ||
- uses: actions/checkout@v2 | ||
|
||
- name: Execute publishMetrics Script | ||
run: | | ||
python3 .github/workflows/scripts/publishTestReport.py ${{env.PROJECT_ID}} ${{ needs.generate-uuid.outputs.uuid }} | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
name: "Poll for job" | ||
description: "Poll till job completes" | ||
inputs: | ||
job_id: | ||
description: "JobId" | ||
required: true | ||
region: | ||
description: "Region" | ||
required: true | ||
|
||
runs: | ||
using: "composite" | ||
steps: | ||
- name: Poll | ||
shell: bash | ||
run: | | ||
not_finished=true | ||
while $not_finished; do | ||
echo "Polling for job status" | ||
status=$(gcloud dataflow jobs show ${{inputs.job_id}} --region ${{inputs.region}} | grep "state:" | awk '{print $2}') | ||
echo "Job status: $status" | ||
if [[ "$status" == "Done" ]]; then | ||
echo "BATCH JOB PASSED"; | ||
not_finished=false; | ||
elif [[ "$status" == "Drained" ]]; then | ||
echo "STREAMING JOB PASSED"; | ||
not_finished=false; | ||
elif [[ "$status" == "Failed" ]]; then | ||
echo "JOB FAILED"; | ||
not_finished=false; | ||
elif [[ "$status" == "Cancelled" ]]; then | ||
echo "JOB CANCELLED"; | ||
not_finished=false; | ||
else | ||
sleep 60s | ||
fi | ||
done | ||
sleep 150s | ||
echo "Job with id ${{inputs.job_id}} $status" |
Oops, something went wrong.