[single node perf] Recalibrate and improve regression perf test

Recalibrate for RG change Update limits to be based on min_ratio / max_ratio of many runs update module working set to 100
aptos-labs · Oct 9, 2024 · 955648c · 955648c
1 parent cbb4431
commit 955648c
Show file tree

Hide file tree

Showing 4 changed files with 194 additions and 109 deletions.
diff --git a/.github/workflows/execution-performance.yaml b/.github/workflows/execution-performance.yaml
@@ -23,6 +23,6 @@ jobs:
       GIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
       RUNNER_NAME: executor-benchmark-runner
        # Run all tests only on the scheduled cadence, or explicitly requested
-      IS_FULL_RUN: ${{ github.event_name == 'schedule' || contains(github.event.pull_request.labels.*.name, 'CICD:run-execution-performance-full-test') }}
+      FLOW: ${{ (github.event_name == 'schedule' || contains(github.event.pull_request.labels.*.name, 'CICD:run-execution-performance-full-test')) && 'CONTINUOUS' || 'LAND_BLOCKING' }}
       # Ignore target determination if on the scheduled cadence, or explicitly requested
       IGNORE_TARGET_DETERMINATION: ${{ github.event_name == 'schedule' || contains(github.event.pull_request.labels.*.name, 'CICD:run-execution-performance-test') || contains(github.event.pull_request.labels.*.name, 'CICD:run-execution-performance-full-test') }}
diff --git a/.github/workflows/workflow-run-execution-performance.yaml b/.github/workflows/workflow-run-execution-performance.yaml
@@ -12,16 +12,20 @@ on:
         required: false
         default: executor-benchmark-runner 
         type: string
-      IS_FULL_RUN:
+      FLOW:
         required: false
-        default: false
-        type: boolean
-        description: Run complete version of the tests
+        default: CONTINUOUS
+        type: string
+        description: Which set of tests to run.
       IGNORE_TARGET_DETERMINATION:
         required: false
         default: false
         type: boolean
         description: Ignore target determination and run the tests
+      SOURCE:
+        required: false
+        default: CI
+        type: string
   # This allows the workflow to be triggered manually from the Github UI or CLI
   # NOTE: because the "number" type is not supported, we default to 720 minute timeout
   workflow_dispatch:
@@ -36,18 +40,34 @@ on:
         type: choice
         options:
         - executor-benchmark-runner
-        description: The name of the runner to use for the test.
-      IS_FULL_RUN:
+        - benchmark-t2d-32
+        - benchmark-t2d-60
+        - benchmark-c3d-30
+        - benchmark-n4-32
+        - benchmark-c4-32
+        description: The name of the runner to use for the test. (which decides machine specs)
+      FLOW:
         required: false
-        default: false
-        type: boolean
-        description: Run complete version of the tests
+        default: LAND_BLOCKING
+        options:
+        - LAND_BLOCKING
+        - CONTINUOUS
+        - MAINNET
+        - MAINNET_LARGE_DB
+        type: choice
+        description: Which set of tests to run. MAINNET/MAINNET_LARGE_DB are for performance validation of mainnet nodes.
       IGNORE_TARGET_DETERMINATION:
         required: false
-        default: false
+        default: true
         type: boolean
         description: Ignore target determination and run the tests
-
+      SOURCE:
+        required: false
+        default: ADHOC
+        options:
+        - ADHOC
+        type: choice
+        description: Test source (always adhoc from here)
 jobs:
   # This job determines which tests to run
   test-target-determinator:
@@ -63,7 +83,7 @@ jobs:
   # Run single node execution performance tests
   single-node-performance:
     needs: test-target-determinator
-    timeout-minutes: 60
+    timeout-minutes: 120
     runs-on: ${{ inputs.RUNNER_NAME }}
     steps:
       - uses: actions/checkout@v4
@@ -78,13 +98,8 @@ jobs:
 
       - name: Run single node execution benchmark in performance build mode
         shell: bash
-        run: TABULATE_INSTALL=lib-only pip install tabulate && testsuite/single_node_performance.py
-        if: ${{ !inputs.IS_FULL_RUN && (inputs.IGNORE_TARGET_DETERMINATION || needs.test-target-determinator.outputs.run_execution_performance_test == 'true') }}
-
-      - name: Run full version of the single node execution benchmark in performance build mode
-        shell: bash
-        run: TABULATE_INSTALL=lib-only pip install tabulate && FLOW=CONTINUOUS testsuite/single_node_performance.py
-        if: ${{ inputs.IS_FULL_RUN && (inputs.IGNORE_TARGET_DETERMINATION || needs.test-target-determinator.outputs.run_execution_performance_test == 'true') }}
+        run: TABULATE_INSTALL=lib-only pip install tabulate && FLOW=${{ inputs.FLOW }} SOURCE=${{ inputs.SOURCE }} testsuite/single_node_performance.py
+        if: ${{ (inputs.IGNORE_TARGET_DETERMINATION || needs.test-target-determinator.outputs.run_execution_performance_test == 'true') }}
 
       - run: echo "Skipping single node execution performance! Unrelated changes detected."
         if: ${{ !inputs.IGNORE_TARGET_DETERMINATION && needs.test-target-determinator.outputs.run_execution_performance_test != 'true' }}
diff --git a/testsuite/forge_test.py b/testsuite/forge_test.py
@@ -64,7 +64,8 @@
 
 
 class HasAssertMultiLineEqual(Protocol):
-    def assertMultiLineEqual(self, first: str, second: str, msg: Any = ...) -> None: ...
+    def assertMultiLineEqual(self, first: str, second: str, msg: Any = ...) -> None:
+        ...
 
 
 def get_cwd() -> Path: