Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PVF: Add worker check during tests and benches #1771

Merged
merged 16 commits into from
Oct 24, 2023
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .cargo/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,14 @@ rustflags = [
"-Aclippy::stable_sort_primitive", # prefer stable sort
"-Aclippy::extra-unused-type-parameters", # stylistic
]

[env]
# The version of the node.
#
# This is the version that is used for versioning this node binary.
# By default the `minor` version is bumped in every release. `Major` or `patch` releases are only
# expected in very rare cases.
#
# The worker binaries associated to the node binary should ensure that they are using the same
# version as the main node that started them.
POLKADOT_NODE_VERSION = "1.1.0"
mrcnski marked this conversation as resolved.
Show resolved Hide resolved
4 changes: 3 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 0 additions & 10 deletions polkadot/cli/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,6 @@
use clap::Parser;
use std::path::PathBuf;

/// The version of the node.
///
/// This is the version that is used for versioning this node binary.
/// By default the `minor` version is bumped in every release. `Major` or `patch` releases are only
/// expected in very rare cases.
///
/// The worker binaries associated to the node binary should ensure that they are using the same
/// version as the main node that started them.
pub const NODE_VERSION: &'static str = "1.1.0";

#[allow(missing_docs)]
#[derive(Debug, Parser)]
pub enum Subcommand {
Expand Down
11 changes: 7 additions & 4 deletions polkadot/cli/src/command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.

use crate::cli::{Cli, Subcommand, NODE_VERSION};
use crate::cli::{Cli, Subcommand};
use frame_benchmarking_cli::{BenchmarkCmd, ExtrinsicFactory, SUBSTRATE_REFERENCE_HARDWARE};
use futures::future::TryFutureExt;
use log::{info, warn};
Expand Down Expand Up @@ -50,7 +50,7 @@ impl SubstrateCli for Cli {

fn impl_version() -> String {
let commit_hash = env!("SUBSTRATE_CLI_COMMIT_HASH");
format!("{NODE_VERSION}-{commit_hash}")
format!("{}-{commit_hash}", env!("POLKADOT_NODE_VERSION"))
}

fn description() -> String {
Expand Down Expand Up @@ -242,8 +242,11 @@ where
None
};

let node_version =
if cli.run.disable_worker_version_check { None } else { Some(NODE_VERSION.to_string()) };
let node_version = if cli.run.disable_worker_version_check {
None
} else {
Some(env!("POLKADOT_NODE_VERSION").to_string())
};

runner.run_node_until_exit(move |config| async move {
let hwbench = (!cli.run.no_hardware_benchmarks)
Expand Down
13 changes: 9 additions & 4 deletions polkadot/node/core/pvf/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ cfg-if = "1.0"
futures = "0.3.21"
futures-timer = "3.0.2"
gum = { package = "tracing-gum", path = "../../gum" }
is_executable = "1.0.1"
libc = "0.2.139"
pin-project = "1.0.9"
rand = "0.8.5"
Expand All @@ -34,19 +35,23 @@ sp-maybe-compressed-blob = { path = "../../../../substrate/primitives/maybe-comp
polkadot-node-core-pvf-prepare-worker = { path = "prepare-worker", optional = true }
polkadot-node-core-pvf-execute-worker = { path = "execute-worker", optional = true }

[build-dependencies]
substrate-build-script-utils = { path = "../../../../substrate/utils/build-script-utils" }

[dev-dependencies]
assert_matches = "1.4.0"
criterion = { version = "0.4.0", default-features = false, features = ["cargo_bench_support", "async_tokio"] }
hex-literal = "0.4.1"
polkadot-node-core-pvf-common = { path = "common", features = ["test-utils"] }
# For the puppet worker, depend on ourselves with the test-utils feature.
# For benches and integration tests, depend on ourselves with the test-utils
# feature.
polkadot-node-core-pvf = { path = ".", features = ["test-utils"] }
rococo-runtime = { path = "../../../runtime/rococo" }

adder = { package = "test-parachain-adder", path = "../../../parachain/test-parachains/adder" }
halt = { package = "test-parachain-halt", path = "../../../parachain/test-parachains/halt" }

[[bench]]
name = "host_prepare_rococo_runtime"
harness = false

[features]
ci-only-tests = []
jemalloc-allocator = [ "polkadot-node-core-pvf-common/jemalloc-allocator" ]
Expand Down
124 changes: 124 additions & 0 deletions polkadot/node/core/pvf/benches/host_prepare_rococo_runtime.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// This file is part of Polkadot.

// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.

// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.

use criterion::{criterion_group, criterion_main, BatchSize, Criterion, SamplingMode};
use polkadot_node_core_pvf::{
start, testing, Config, Metrics, PrepareError, PrepareJobKind, PrepareStats, PvfPrepData,
ValidationHost,
};
use polkadot_primitives::ExecutorParams;
use std::time::Duration;
use tokio::{runtime::Handle, sync::Mutex};

const TEST_PREPARATION_TIMEOUT: Duration = Duration::from_secs(30);

struct TestHost {
host: Mutex<ValidationHost>,
}

impl TestHost {
fn new_with_config<F>(handle: &Handle, f: F) -> Self
where
F: FnOnce(&mut Config),
{
let (prepare_worker_path, execute_worker_path) = testing::get_and_check_worker_paths();

let cache_dir = tempfile::tempdir().unwrap();
let mut config = Config::new(
cache_dir.path().to_owned(),
None,
prepare_worker_path,
execute_worker_path,
);
f(&mut config);
let (host, task) = start(config, Metrics::default());
let _ = handle.spawn(task);
Self { host: Mutex::new(host) }
}

async fn precheck_pvf(
&self,
code: &[u8],
executor_params: ExecutorParams,
) -> Result<PrepareStats, PrepareError> {
let (result_tx, result_rx) = futures::channel::oneshot::channel();

let code = sp_maybe_compressed_blob::decompress(code, 16 * 1024 * 1024)
.expect("Compression works");

self.host
.lock()
.await
.precheck_pvf(
PvfPrepData::from_code(
code.into(),
executor_params,
TEST_PREPARATION_TIMEOUT,
PrepareJobKind::Prechecking,
),
result_tx,
)
.await
.unwrap();
result_rx.await.unwrap()
}
}

fn host_prepare_rococo_runtime(c: &mut Criterion) {
polkadot_node_core_pvf_common::sp_tracing::try_init_simple();

let rt = tokio::runtime::Runtime::new().unwrap();

let blob = rococo_runtime::WASM_BINARY.unwrap();
let pvf = match sp_maybe_compressed_blob::decompress(&blob, 64 * 1024 * 1024) {
Ok(code) => PvfPrepData::from_code(
code.into_owned(),
ExecutorParams::default(),
Duration::from_secs(360),
PrepareJobKind::Compilation,
),
Err(e) => {
panic!("Cannot decompress blob: {:?}", e);
},
};

let mut group = c.benchmark_group("rococo");
group.sampling_mode(SamplingMode::Flat);
group.sample_size(20);
group.measurement_time(Duration::from_secs(240));
group.bench_function("host: prepare Rococo runtime", |b| {
b.to_async(&rt).iter_batched(
|| {
(
TestHost::new_with_config(rt.handle(), |cfg| {
cfg.prepare_workers_hard_max_num = 1;
}),
pvf.clone().code(),
)
},
|(host, pvf_code)| async move {
// `PvfPrepData` is designed to be cheap to clone, so cloning shouldn't affect the
// benchmark accuracy
let _stats = host.precheck_pvf(&pvf_code, Default::default()).await.unwrap();
},
BatchSize::SmallInput,
)
});
group.finish();
}

criterion_group!(preparation, host_prepare_rococo_runtime);
criterion_main!(preparation);
17 changes: 0 additions & 17 deletions polkadot/node/core/pvf/bin/puppet_worker.rs

This file was deleted.

1 change: 0 additions & 1 deletion polkadot/node/core/pvf/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,5 @@ tempfile = "3.3.0"

[features]
# This feature is used to export test code to other crates without putting it in the production build.
# Also used for building the puppet worker.
test-utils = []
jemalloc-allocator = []
12 changes: 11 additions & 1 deletion polkadot/node/core/pvf/common/src/worker/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,14 @@ use tokio::{io, runtime::Runtime};
/// spawning the desired worker.
#[macro_export]
macro_rules! decl_worker_main {
($expected_command:expr, $entrypoint:expr, $worker_version:expr $(,)*) => {
($expected_command:expr, $entrypoint:expr, $worker_version:expr, $worker_version_hash:expr $(,)*) => {
fn get_full_version() -> String {
format!("{}-{}", $worker_version, $worker_version_hash)
}

fn print_help(expected_command: &str) {
println!("{} {}", expected_command, $worker_version);
println!("commit: {}", $worker_version_hash);
println!();
println!("PVF worker that is called by polkadot.");
}
Expand Down Expand Up @@ -67,6 +72,11 @@ macro_rules! decl_worker_main {
println!("{}", $worker_version);
return
},
// Useful for debugging. --version is used for version checks.
"--full-version" => {
println!("{}", get_full_version());
return
},

"--check-can-enable-landlock" => {
#[cfg(target_os = "linux")]
Expand Down
1 change: 1 addition & 0 deletions polkadot/node/core/pvf/src/artifacts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ impl ArtifactPathId {
}
}

#[derive(Debug)]
pub enum ArtifactState {
/// The artifact is ready to be used by the executor.
///
Expand Down
10 changes: 5 additions & 5 deletions polkadot/node/core/pvf/src/host.rs
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,8 @@ async fn handle_to_host(
/// This tries to prepare the PVF by compiling the WASM blob within a timeout set in
/// `PvfPrepData`.
///
/// If the prepare job failed previously, we may retry it under certain conditions.
/// We don't retry artifacts that previously failed preparation. We don't expect multiple
/// pre-checking requests.
async fn handle_precheck_pvf(
artifacts: &mut Artifacts,
prepare_queue: &mut mpsc::Sender<prepare::ToQueue>,
Expand All @@ -464,8 +465,7 @@ async fn handle_precheck_pvf(
ArtifactState::Preparing { waiting_for_response, num_failures: _ } =>
waiting_for_response.push(result_sender),
ArtifactState::FailedToProcess { error, .. } => {
// Do not retry failed preparation if another pre-check request comes in. We do not
// retry pre-checking, anyway.
// Do not retry an artifact that previously failed preparation.
let _ = result_sender.send(PrepareResult::Err(error.clone()));
},
}
Expand Down Expand Up @@ -764,7 +764,7 @@ async fn handle_prepare_done(
let last_time_failed = SystemTime::now();
let num_failures = *num_failures + 1;

gum::warn!(
gum::error!(
target: LOG_TARGET,
?artifact_id,
time_failed = ?last_time_failed,
Expand Down Expand Up @@ -846,7 +846,7 @@ async fn sweeper_task(mut sweeper_rx: mpsc::Receiver<PathBuf>) {
gum::trace!(
target: LOG_TARGET,
?result,
"Sweeping the artifact file {}",
"Sweeped the artifact file {}",
condemned.display(),
);
},
Expand Down
13 changes: 13 additions & 0 deletions polkadot/node/core/pvf/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,5 +114,18 @@ pub use polkadot_node_core_pvf_common::{
SecurityStatus,
};

use std::{path::Path, process::Command};

/// The log target for this crate.
pub const LOG_TARGET: &str = "parachain::pvf";

/// Utility to get the version of a worker, used for version checks.
///
/// The worker's existence at the given path must be checked separately.
pub fn get_worker_version(worker_path: &Path) -> std::io::Result<String> {
let worker_version = Command::new(worker_path).args(["--version"]).output()?.stdout;
Ok(std::str::from_utf8(&worker_version)
.expect("version is printed as a string; qed")
.trim()
.to_string())
}
Loading