Skip to content

Commit

Permalink
Update Wasm benchmarks (#2957)
Browse files Browse the repository at this point in the history
In #2941 we found out
that the new Wasmi (register) is very effective at optimizing away
certain benchmark bytecode constructs in a way that created an unfair
advantage over Wasmi (stack) which yielded our former benchmarks to be
ineffective at properly measuring the performance impact.

This PR adjusts both affected benchmarks to fix the stated problems.
Affected are
- `instr_i64const` -> `instr_i64add`: Renamed since it now measures the
performance impact of the Wasm `i64.add` instruction with locals as
inputs and outputs. This makes it impossible for Wasmi (register) to
aggressively optimize away the entire function body (as it previously
did) but still provides a way for Wasmi (register) to shine with its
register based execution model.
- `call_with_code_per_byte`: Now uses `local.get` instead of `i32.const`
for the `if` condition which prevents Wasmi (register) to aggressively
optimizing away whole parts of the `if` creating an unfair advantage.

cc @athei

---------

Co-authored-by: command-bot <>
Co-authored-by: Alexander Theißen <alex.theissen@me.com>
Co-authored-by: Ignacio Palacios <ignacio.palacios.santos@gmail.com>
  • Loading branch information
3 people authored Jan 19, 2024
1 parent 320b528 commit e02c520
Show file tree
Hide file tree
Showing 4 changed files with 697 additions and 657 deletions.
46 changes: 31 additions & 15 deletions substrate/frame/contracts/src/benchmarking/code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ use sp_std::{borrow::ToOwned, prelude::*};
use wasm_instrument::parity_wasm::{
builder,
elements::{
self, BlockType, CustomSection, External, FuncBody, Instruction, Instructions, Module,
Section, ValueType,
self, BlockType, CustomSection, External, FuncBody, Instruction, Instructions, Local,
Module, Section, ValueType,
},
};

Expand Down Expand Up @@ -281,17 +281,21 @@ impl<T: Config> WasmModule<T> {
/// instrumentation runtime by nesting blocks as deeply as possible given the byte budget.
/// `code_location`: Whether to place the code into `deploy` or `call`.
pub fn sized(target_bytes: u32, code_location: Location) -> Self {
use self::elements::Instruction::{End, I32Const, If, Return};
use self::elements::Instruction::{End, GetLocal, If, Return};
// Base size of a contract is 63 bytes and each expansion adds 6 bytes.
// We do one expansion less to account for the code section and function body
// size fields inside the binary wasm module representation which are leb128 encoded
// and therefore grow in size when the contract grows. We are not allowed to overshoot
// because of the maximum code size that is enforced by `instantiate_with_code`.
let expansions = (target_bytes.saturating_sub(63) / 6).saturating_sub(1);
const EXPANSION: [Instruction; 4] = [I32Const(0), If(BlockType::NoResult), Return, End];
const EXPANSION: [Instruction; 4] = [GetLocal(0), If(BlockType::NoResult), Return, End];
let mut module =
ModuleDefinition { memory: Some(ImportedMemory::max::<T>()), ..Default::default() };
let body = Some(body::repeated(expansions, &EXPANSION));
let body = Some(body::repeated_with_locals(
&[Local::new(1, ValueType::I32)],
expansions,
&EXPANSION,
));
match code_location {
Location::Call => module.call_body = body,
Location::Deploy => module.deploy_body = body,
Expand Down Expand Up @@ -373,15 +377,21 @@ pub mod body {
/// Insert a I32Const with incrementing value for each insertion.
/// (start_at, increment_by)
Counter(u32, u32),
/// Insert the specified amount of I64Const with a random value.
RandomI64Repeated(usize),
}

pub fn plain(instructions: Vec<Instruction>) -> FuncBody {
FuncBody::new(Vec::new(), Instructions::new(instructions))
}

pub fn repeated(repetitions: u32, instructions: &[Instruction]) -> FuncBody {
repeated_with_locals(&[], repetitions, instructions)
}

pub fn repeated_with_locals(
locals: &[Local],
repetitions: u32,
instructions: &[Instruction],
) -> FuncBody {
let instructions = Instructions::new(
instructions
.iter()
Expand All @@ -391,15 +401,23 @@ pub mod body {
.chain(sp_std::iter::once(Instruction::End))
.collect(),
);
FuncBody::new(Vec::new(), instructions)
FuncBody::new(locals.to_vec(), instructions)
}

pub fn repeated_dyn(repetitions: u32, mut instructions: Vec<DynInstr>) -> FuncBody {
use rand::{distributions::Standard, prelude::*};

// We do not need to be secure here.
let mut rng = rand_pcg::Pcg32::seed_from_u64(8446744073709551615);
pub fn repeated_with_locals_using<const N: usize>(
locals: &[Local],
repetitions: u32,
mut f: impl FnMut() -> [Instruction; N],
) -> FuncBody {
let mut instructions = Vec::new();
for _ in 0..repetitions {
instructions.extend(f());
}
instructions.push(Instruction::End);
FuncBody::new(locals.to_vec(), Instructions::new(instructions))
}

pub fn repeated_dyn(repetitions: u32, mut instructions: Vec<DynInstr>) -> FuncBody {
// We need to iterate over indices because we cannot cycle over mutable references
let body = (0..instructions.len())
.cycle()
Expand All @@ -411,8 +429,6 @@ pub mod body {
*offset += *increment_by;
vec![Instruction::I32Const(current as i32)]
},
DynInstr::RandomI64Repeated(num) =>
(&mut rng).sample_iter(Standard).take(*num).map(Instruction::I64Const).collect(),
})
.chain(sp_std::iter::once(Instruction::End))
.collect();
Expand Down
46 changes: 36 additions & 10 deletions substrate/frame/contracts/src/benchmarking/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ use pallet_balances;
use pallet_contracts_uapi::CallFlags;
use sp_runtime::traits::{Bounded, Hash};
use sp_std::prelude::*;
use wasm_instrument::parity_wasm::elements::{BlockType, Instruction, ValueType};
use wasm_instrument::parity_wasm::elements::{BlockType, Instruction, Local, ValueType};

/// How many runs we do per API benchmark.
///
Expand Down Expand Up @@ -2582,19 +2582,45 @@ benchmarks! {
let origin = RawOrigin::Signed(instance.caller.clone());
}: call(origin, instance.addr, 0u32.into(), Weight::MAX, None, vec![])

// We make the assumption that pushing a constant and dropping a value takes roughly
// the same amount of time. We call this weight `w_base`.
// The weight that would result from the respective benchmark we call: `w_bench`.
// We load `i64` values from random linear memory locations and store the loaded
// values back into yet another random linear memory location.
// The random addresses are uniformely distributed across the entire span of the linear memory.
// We do this to enforce random memory accesses which are particularly expensive.
//
// w_base = w_i{32,64}const = w_drop = w_bench / 2
// The combination of this computation is our weight base `w_base`.
#[pov_mode = Ignored]
instr_i64const {
instr_i64_load_store {
let r in 0 .. INSTR_BENCHMARK_RUNS;

use rand::prelude::*;

// We do not need to be secure here. Fixed seed allows for determinstic results.
let mut rng = rand_pcg::Pcg32::seed_from_u64(8446744073709551615);

let memory = ImportedMemory::max::<T>();
let bytes_per_page = 65536;
let bytes_per_memory = memory.max_pages * bytes_per_page;
let mut sbox = Sandbox::from(&WasmModule::<T>::from(ModuleDefinition {
call_body: Some(body::repeated_dyn(r, vec![
RandomI64Repeated(1),
Regular(Instruction::Drop),
])),
memory: Some(memory),
call_body: Some(body::repeated_with_locals_using(
&[Local::new(1, ValueType::I64)],
r,
|| {
// Instruction sequence to load a `i64` from linear memory
// at a random memory location and store it back into another
// location of the linear memory.
let c0: i32 = rng.gen_range(0..bytes_per_memory as i32);
let c1: i32 = rng.gen_range(0..bytes_per_memory as i32);
[
Instruction::I32Const(c0), // address for `i64.load_8s`
Instruction::I64Load8S(0, 0),
Instruction::SetLocal(0), // temporarily store value loaded in `i64.load_8s`
Instruction::I32Const(c1), // address for `i64.store8`
Instruction::GetLocal(0), // value to be stores in `i64.store8`
Instruction::I64Store8(0, 0),
]
}
)),
.. Default::default()
}));
}: {
Expand Down
22 changes: 6 additions & 16 deletions substrate/frame/contracts/src/schedule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -358,25 +358,12 @@ macro_rules! cost_args {
}
}

macro_rules! cost_instr_no_params {
($name:ident) => {
cost_args!($name, 1).ref_time() as u32
};
}

macro_rules! cost {
($name:ident) => {
cost_args!($name, 1)
};
}

macro_rules! cost_instr {
($name:ident, $num_params:expr) => {
cost_instr_no_params!($name)
.saturating_sub((cost_instr_no_params!(instr_i64const) / 2).saturating_mul($num_params))
};
}

impl Default for Limits {
fn default() -> Self {
Self {
Expand All @@ -396,10 +383,13 @@ impl Default for Limits {
}

impl<T: Config> Default for InstructionWeights<T> {
/// We price both `i64.const` and `drop` as `instr_i64const / 2`. The reason
/// for that is that we cannot benchmark either of them on its own.
/// We execute 6 different instructions therefore we have to divide the actual
/// computed gas costs by 6 to have a rough estimate as to how expensive each
/// single executed instruction is going to be.
fn default() -> Self {
Self { base: cost_instr!(instr_i64const, 1), _phantom: PhantomData }
let instr_cost = cost!(instr_i64_load_store).ref_time() as u32;
let base = instr_cost / 6;
Self { base, _phantom: PhantomData }
}
}

Expand Down
Loading

0 comments on commit e02c520

Please sign in to comment.