Skip to content

Commit

Permalink
Script that recalibrates performance benchmarks (#15446)
Browse files Browse the repository at this point in the history
## Description
Move performance benchmark values into tsv files, and add script that updates them. 

## How Has This Been Tested?
used the script to update values based on stable runs over the holidays.
  • Loading branch information
igor-aptos authored Dec 3, 2024
1 parent b011781 commit 4815045
Show file tree
Hide file tree
Showing 7 changed files with 325 additions and 96 deletions.
26 changes: 26 additions & 0 deletions aptos-move/e2e-benchmark/data/calibration_values.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
Loop { loop_count: Some(100000), loop_type: NoOp } 60 0.960 1.119 42122.6
Loop { loop_count: Some(10000), loop_type: Arithmetic } 60 0.956 1.074 26240.7
CreateObjects { num_objects: 10, object_payload_size: 0 } 60 0.938 1.168 156.6
CreateObjects { num_objects: 10, object_payload_size: 10240 } 60 0.924 1.086 9713.2
CreateObjects { num_objects: 100, object_payload_size: 0 } 60 0.922 1.275 1577.0
CreateObjects { num_objects: 100, object_payload_size: 10240 } 60 0.935 1.070 11728.7
InitializeVectorPicture { length: 128 } 60 0.926 1.069 169.4
VectorPicture { length: 128 } 60 0.908 1.075 50.2
VectorPictureRead { length: 128 } 60 0.919 1.059 48.0
InitializeVectorPicture { length: 30720 } 60 0.939 1.127 28404.4
VectorPicture { length: 30720 } 60 0.936 1.095 6935.6
VectorPictureRead { length: 30720 } 60 0.939 1.093 6948.2
SmartTablePicture { length: 30720, num_points_per_txn: 200 } 60 0.947 1.080 43673.3
SmartTablePicture { length: 1048576, num_points_per_txn: 300 } 60 0.947 1.111 74145.8
ResourceGroupsSenderWriteTag { string_length: 1024 } 60 0.918 1.075 15.8
ResourceGroupsSenderMultiChange { string_length: 1024 } 60 0.909 1.169 32.9
TokenV1MintAndTransferFT 60 0.953 1.069 384.6
TokenV1MintAndTransferNFTSequential 60 0.938 1.064 600.3
TokenV2AmbassadorMint { numbered: true } 60 0.951 1.057 516.6
LiquidityPoolSwap { is_stable: true } 60 0.961 1.139 582.6
LiquidityPoolSwap { is_stable: false } 60 0.929 1.099 563.0
CoinInitAndMint 60 0.928 1.130 205.0
FungibleAssetMint 60 0.930 1.098 235.8
IncGlobalMilestoneAggV2 { milestone_every: 1 } 60 0.914 1.051 33.5
IncGlobalMilestoneAggV2 { milestone_every: 2 } 60 0.914 1.105 19.0
EmitEvents { count: 1000 } 60 0.937 1.158 8818.7
36 changes: 5 additions & 31 deletions aptos-move/e2e-benchmark/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use aptos_transaction_generator_lib::{
use aptos_types::{account_address::AccountAddress, transaction::TransactionPayload};
use rand::{rngs::StdRng, SeedableRng};
use serde_json::json;
use std::{collections::HashMap, process::exit};
use std::{collections::HashMap, fs, process::exit};

// bump after a bigger test or perf change, so you can easily distinguish runs
// that are on top of this commit
Expand Down Expand Up @@ -85,42 +85,16 @@ const ALLOWED_REGRESSION: f64 = 0.15;
const ALLOWED_IMPROVEMENT: f64 = 0.15;
const ABSOLUTE_BUFFER_US: f64 = 2.0;

const CALIBRATION_VALUES: &str = "
Loop { loop_count: Some(100000), loop_type: NoOp } 60 0.955 1.074 41893.7
Loop { loop_count: Some(10000), loop_type: Arithmetic } 60 0.965 1.078 25915.0
CreateObjects { num_objects: 10, object_payload_size: 0 } 60 0.924 1.082 158.1
CreateObjects { num_objects: 10, object_payload_size: 10240 } 60 0.951 1.118 9356.2
CreateObjects { num_objects: 100, object_payload_size: 0 } 60 0.926 1.082 1574.2
CreateObjects { num_objects: 100, object_payload_size: 10240 } 60 0.952 1.092 11541.9
InitializeVectorPicture { length: 128 } 10 0.965 1.038 163.3
VectorPicture { length: 128 } 10 0.938 1.060 48.8
VectorPictureRead { length: 128 } 10 0.977 1.077 46.4
InitializeVectorPicture { length: 30720 } 60 0.948 1.123 27893.4
VectorPicture { length: 30720 } 60 0.931 1.125 6923.1
VectorPictureRead { length: 30720 } 60 0.934 1.102 6923.1
SmartTablePicture { length: 30720, num_points_per_txn: 200 } 60 0.952 1.109 43594.7
SmartTablePicture { length: 1048576, num_points_per_txn: 300 } 60 0.957 1.120 73865.4
ResourceGroupsSenderWriteTag { string_length: 1024 } 60 0.934 1.134 15.0
ResourceGroupsSenderMultiChange { string_length: 1024 } 60 0.929 1.122 32.3
TokenV1MintAndTransferFT 60 0.958 1.093 385.2
TokenV1MintAndTransferNFTSequential 60 0.973 1.139 588.1
TokenV2AmbassadorMint { numbered: true } 60 0.960 1.141 512.5
LiquidityPoolSwap { is_stable: true } 60 0.961 1.103 590.3
LiquidityPoolSwap { is_stable: false } 60 0.954 1.134 552.2
CoinInitAndMint 10 0.975 1.043 199.6
FungibleAssetMint 10 0.954 1.038 236.3
IncGlobalMilestoneAggV2 { milestone_every: 1 } 10 0.960 1.047 32.9
IncGlobalMilestoneAggV2 { milestone_every: 2 } 10 0.971 1.066 18.1
EmitEvents { count: 1000 } 10 0.969 1.052 8615.5
";

struct CalibrationInfo {
// count: usize,
expected_time_micros: f64,
}

fn get_parsed_calibration_values() -> HashMap<String, CalibrationInfo> {
CALIBRATION_VALUES
let calibration_values =
fs::read_to_string("aptos-move/e2e-benchmark/data/calibration_values.tsv")
.expect("Unable to read file");
calibration_values
.trim()
.split('\n')
.map(|line| {
Expand Down
9 changes: 6 additions & 3 deletions testsuite/forge.py
Original file line number Diff line number Diff line change
Expand Up @@ -619,16 +619,19 @@ def format_pre_comment(context: ForgeContext) -> str:
context.forge_namespace,
)

return textwrap.dedent(
f"""
return (
textwrap.dedent(
f"""
### Forge is running suite `{context.forge_test_suite}` on {get_testsuite_images(context)}
* [Grafana dashboard (auto-refresh)]({dashboard_link})
* [Humio Logs]({humio_logs_link})
* [Axiom Logs]({axiom_logs_link})
* [Validator CPU Profile]({validator_cpu_profile_link})
* [Fullnode CPU Profile]({fullnode_cpu_profile_link})
"""
).lstrip() + format_github_info(context)
).lstrip()
+ format_github_info(context)
)


def format_comment(context: ForgeContext, result: ForgeResult) -> str:
Expand Down
31 changes: 21 additions & 10 deletions testsuite/replay-verify/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

REPLAY_CONCURRENCY_LEVEL = 1


class Network(Enum):
TESTNET = 1
MAINNET = 2
Expand Down Expand Up @@ -241,6 +242,7 @@ def get_pod_status(self):
def get_humio_log_link(self):
return construct_humio_url(self.label, self.name, self.start_time, time.time())


class ReplayConfig:
def __init__(self, network):
if network == Network.TESTNET:
Expand All @@ -253,9 +255,10 @@ def __init__(self, network):
self.concurrent_replayer = 18
self.pvc_number = 8
self.min_range_size = 10_000
self.range_size = 2_000_000
self.range_size = 2_000_000
self.timeout_secs = 400


class TaskStats:
def __init__(self, name):
self.name = name
Expand Down Expand Up @@ -308,7 +311,7 @@ def __init__(
self.image = image
self.pvcs = []
self.config = replay_config

def __str__(self):
return f"""ReplayScheduler:
id: {self.id}
Expand Down Expand Up @@ -360,7 +363,11 @@ def create_pvc_from_snapshot(self):
else MAINNET_SNAPSHOT_NAME
)
pvcs = create_pvcs_from_snapshot(
self.id, snapshot_name, self.namespace, self.config.pvc_number, self.get_label()
self.id,
snapshot_name,
self.namespace,
self.config.pvc_number,
self.get_label(),
)
assert len(pvcs) == self.config.pvc_number, "failed to create all pvcs"
self.pvcs = pvcs
Expand Down Expand Up @@ -504,12 +511,16 @@ def get_image(image_tag=None):
shell = forge.LocalShell()
git = forge.Git(shell)
image_name = "tools"
default_latest_image = forge.find_recent_images(
shell,
git,
1,
image_name=image_name,
)[0] if image_tag is None else image_tag
default_latest_image = (
forge.find_recent_images(
shell,
git,
1,
image_name=image_name,
)[0]
if image_tag is None
else image_tag
)
full_image = f"{forge.GAR_REPO_NAME}/{image_name}:{default_latest_image}"
return full_image

Expand Down Expand Up @@ -546,7 +557,7 @@ def print_logs(failed_workpod_logs, txn_mismatch_logs):
range_size=range_size,
image=image,
replay_config=config,
network= network,
network=network,
namespace=args.namespace,
)
logger.info(f"scheduler: {scheduler}")
Expand Down
69 changes: 17 additions & 52 deletions testsuite/single_node_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,52 +166,9 @@ class RunGroupConfig:
CALIBRATION_SEPARATOR = " "

# transaction_type module_working_set_size executor_type count min_ratio max_ratio median
CALIBRATION = """
no-op 1 VM 6 0.938 1.019 38925.3
no-op 1000 VM 6 0.943 1.019 36444.6
apt-fa-transfer 1 VM 6 0.927 1.018 26954.7
apt-fa-transfer 1 NativeVM 6 0.927 1.018 35259.7
account-generation 1 VM 6 0.96 1.02 20606.2
account-generation 1 NativeVM 6 0.96 1.02 28216.2
account-resource32-b 1 VM 6 0.94 1.026 34260.4
modify-global-resource 1 VM 6 0.993 1.021 2260.5
modify-global-resource 100 VM 6 0.982 1.02 33129.7
publish-package 1 VM 6 0.983 1.012 1672.6
mix_publish_transfer 1 VM 6 0.972 1.044 20832.8
batch100-transfer 1 VM 6 0.953 1.024 645.1
batch100-transfer 1 NativeVM 6 0.953 1.024 1437.0
vector-picture30k 1 VM 6 0.992 1.039 103.6
vector-picture30k 100 VM 6 0.913 1.015 1831.5
smart-table-picture30-k-with200-change 1 VM 6 0.976 1.034 16.1
smart-table-picture30-k-with200-change 100 VM 6 0.985 1.018 212.9
modify-global-resource-agg-v2 1 VM 6 0.976 1.035 33992.5
modify-global-flag-agg-v2 1 VM 6 0.986 1.016 4224
modify-global-bounded-agg-v2 1 VM 6 0.964 1.047 7661.6
modify-global-milestone-agg-v2 1 VM 6 0.973 1.017 25187.1
resource-groups-global-write-tag1-kb 1 VM 6 0.989 1.03 9215.7
resource-groups-global-write-and-read-tag1-kb 1 VM 6 0.982 1.018 5538.3
resource-groups-sender-write-tag1-kb 1 VM 6 0.985 1.059 20084.2
resource-groups-sender-multi-change1-kb 1 VM 6 0.968 1.034 16400.4
token-v1ft-mint-and-transfer 1 VM 6 0.987 1.022 1156.3
token-v1ft-mint-and-transfer 100 VM 6 0.964 1.024 17842.6
token-v1nft-mint-and-transfer-sequential 1 VM 6 0.984 1.017 735.7
token-v1nft-mint-and-transfer-sequential 100 VM 6 0.966 1.017 12819.7
coin-init-and-mint 1 VM 6 0.95 1.024 26906.4
coin-init-and-mint 100 VM 6 0.985 1.022 22312.6
fungible-asset-mint 1 VM 6 0.955 1.013 23001.6
fungible-asset-mint 100 VM 6 0.955 1.015 19973.5
no-op5-signers 1 VM 6 0.934 1.016 38708.6
token-v2-ambassador-mint 1 VM 6 0.975 1.008 15179.3
token-v2-ambassador-mint 100 VM 6 0.985 1.007 15150.8
liquidity-pool-swap 1 VM 6 0.987 1.018 805.5
liquidity-pool-swap 100 VM 6 0.993 1.02 11156.3
liquidity-pool-swap-stable 1 VM 6 0.985 1.017 778.7
liquidity-pool-swap-stable 100 VM 6 0.982 1.009 11056.6
deserialize-u256 1 VM 6 0.968 1.026 36444.6
no-op-fee-payer 1 VM 6 0.994 1.026 2046
no-op-fee-payer 100 VM 6 0.96 1.014 32866.5
simple-script 1 VM 6 0.941 1.012 38206.1
"""
with open('testsuite/single_node_performance_values.tsv', 'r') as file:
CALIBRATION = file.read()


# when adding a new test, add estimated expected_tps to it, as well as waived=True.
# And then after a day or two - add calibration result for it above, removing expected_tps/waived fields.
Expand All @@ -232,7 +189,7 @@ class RunGroupConfig:
RunGroupConfig(key=RunGroupKey("mix_publish_transfer"), key_extra=RunGroupKeyExtra(
transaction_type_override="publish-package apt-fa-transfer",
transaction_weights_override="1 100",
), included_in=LAND_BLOCKING_AND_C, waived=True),
), included_in=LAND_BLOCKING_AND_C),
RunGroupConfig(key=RunGroupKey("batch100-transfer"), included_in=LAND_BLOCKING_AND_C),
RunGroupConfig(key=RunGroupKey("batch100-transfer", executor_type="NativeVM"), included_in=Flow.CONTINUOUS),

Expand Down Expand Up @@ -294,7 +251,7 @@ class RunGroupConfig:
# fee payer sequentializes transactions today. in these tests module publisher is the fee payer, so larger number of modules tests throughput with multiple fee payers
RunGroupConfig(key=RunGroupKey("no-op-fee-payer"), included_in=LAND_BLOCKING_AND_C),
RunGroupConfig(key=RunGroupKey("no-op-fee-payer", module_working_set_size=DEFAULT_MODULE_WORKING_SET_SIZE), included_in=Flow.CONTINUOUS),
RunGroupConfig(key=RunGroupKey("simple-script"), included_in=LAND_BLOCKING_AND_C, waived=True),
RunGroupConfig(key=RunGroupKey("simple-script"), included_in=LAND_BLOCKING_AND_C),

RunGroupConfig(expected_tps=50000, key=RunGroupKey("coin_transfer_connected_components", executor_type="sharded"), key_extra=RunGroupKeyExtra(sharding_traffic_flags="--connected-tx-grps 5000", transaction_type_override=""), included_in=Flow.REPRESENTATIVE, waived=True),
RunGroupConfig(expected_tps=50000, key=RunGroupKey("coin_transfer_hotspot", executor_type="sharded"), key_extra=RunGroupKeyExtra(sharding_traffic_flags="--hotspot-probability 0.8", transaction_type_override=""), included_in=Flow.REPRESENTATIVE, waived=True),
Expand Down Expand Up @@ -1067,16 +1024,24 @@ def print_table(
"""If you expect your PR to change the performance, you need to recalibrate the values.
To do so, you should run the test on your branch 6 times
(https://github.com/aptos-labs/aptos-core/actions/workflows/workflow-run-execution-performance.yaml ; remember to select CONTINUOUS).
Then go to Humio calibration link (https://gist.github.com/igor-aptos/7b12ca28de03894cddda8e415f37889e),
update it to your branch, and export values as CSV, and then open and copy values inside
testsuite/single_node_performance.py testsuite), and add Blockchain oncall as the reviewer.
Then run the script locally `./testsuite/single_node_performance_calibration.py --branch=YOUR_BRANCH` to update calibration values
and add Blockchain oncall as the reviewer.
"""
)
exit(1)

if move_e2e_benchmark_failed:
print(
"Move e2e benchmark failed, failing the job. See logs at the beginning for more details."
"""
Move e2e benchmark failed, failing the job. See logs at the beginning for more details.
If you expect your PR to change the performance, you need to recalibrate the values.
To do so, you should run the test on your branch 6 times
(https://github.com/aptos-labs/aptos-core/actions/workflows/workflow-run-execution-performance.yaml ; remember to select CONTINUOUS,
and don't select to skip move-only e2e tests).
Then run the script locally `./testsuite/single_node_performance_calibration.py --branch=YOUR_BRANCH --move-e2e` to update calibration values
and add Blockchain oncall as the reviewer.
"""
)
exit(1)

Expand Down
Loading

0 comments on commit 4815045

Please sign in to comment.