From e847e5e06ae6e82e3c1984266f744a46b2f048e1 Mon Sep 17 00:00:00 2001 From: "Marcin S." Date: Tue, 9 Jan 2024 19:25:27 +0100 Subject: [PATCH 1/5] PVF: Remove artifact persistence across restarts --- Cargo.lock | 2 +- polkadot/node/core/pvf/Cargo.toml | 1 + polkadot/node/core/pvf/common/Cargo.toml | 3 - polkadot/node/core/pvf/common/build.rs | 19 -- polkadot/node/core/pvf/common/src/lib.rs | 2 - polkadot/node/core/pvf/src/artifacts.rs | 314 +----------------- polkadot/node/core/pvf/src/host.rs | 7 +- .../core/pvf/src/prepare/worker_interface.rs | 13 +- .../utils/build-script-utils/src/version.rs | 31 -- 9 files changed, 22 insertions(+), 370 deletions(-) delete mode 100644 polkadot/node/core/pvf/common/build.rs diff --git a/Cargo.lock b/Cargo.lock index 5fba3e620370..86e26d0316ff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -12675,6 +12675,7 @@ dependencies = [ "criterion 0.4.0", "futures", "futures-timer", + "hex", "hex-literal", "is_executable", "libc", @@ -12751,7 +12752,6 @@ dependencies = [ "sp-externalities 0.19.0", "sp-io", "sp-tracing 10.0.0", - "substrate-build-script-utils", "tempfile", "thiserror", "tracing-gum", diff --git a/polkadot/node/core/pvf/Cargo.toml b/polkadot/node/core/pvf/Cargo.toml index 2642377b6e62..968857ec0de4 100644 --- a/polkadot/node/core/pvf/Cargo.toml +++ b/polkadot/node/core/pvf/Cargo.toml @@ -16,6 +16,7 @@ cfg-if = "1.0" futures = "0.3.21" futures-timer = "3.0.2" gum = { package = "tracing-gum", path = "../../gum" } +hex = { version = "0.4.3", default-features = false } is_executable = "1.0.1" libc = "0.2.139" pin-project = "1.0.9" diff --git a/polkadot/node/core/pvf/common/Cargo.toml b/polkadot/node/core/pvf/common/Cargo.toml index c5c09300e8af..974965be5935 100644 --- a/polkadot/node/core/pvf/common/Cargo.toml +++ b/polkadot/node/core/pvf/common/Cargo.toml @@ -39,9 +39,6 @@ seccompiler = "0.4.0" assert_matches = "1.4.0" tempfile = "3.3.0" -[build-dependencies] -substrate-build-script-utils = { path = "../../../../../substrate/utils/build-script-utils" } - [features] # This feature is used to export test code to other crates without putting it in the production build. test-utils = [] diff --git a/polkadot/node/core/pvf/common/build.rs b/polkadot/node/core/pvf/common/build.rs deleted file mode 100644 index 5531ad411da8..000000000000 --- a/polkadot/node/core/pvf/common/build.rs +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright (C) Parity Technologies (UK) Ltd. -// This file is part of Polkadot. - -// Polkadot is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. - -// Polkadot is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. - -// You should have received a copy of the GNU General Public License -// along with Polkadot. If not, see . - -fn main() { - substrate_build_script_utils::generate_wasmtime_version(); -} diff --git a/polkadot/node/core/pvf/common/src/lib.rs b/polkadot/node/core/pvf/common/src/lib.rs index abebd06f71a4..af4a7526e553 100644 --- a/polkadot/node/core/pvf/common/src/lib.rs +++ b/polkadot/node/core/pvf/common/src/lib.rs @@ -31,8 +31,6 @@ pub use sp_tracing; const LOG_TARGET: &str = "parachain::pvf-common"; -pub const RUNTIME_VERSION: &str = env!("SUBSTRATE_WASMTIME_VERSION"); - use parity_scale_codec::{Decode, Encode}; use std::{ io::{self, Read, Write}, diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index 17ce5b443e33..5b31316545fe 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -18,8 +18,7 @@ //! //! # Lifecycle of an artifact //! -//! 1. During node start-up, we will check the cached artifacts, if any. The stale and corrupted -//! ones are pruned. The valid ones are registered in the [`Artifacts`] table. +//! 1. During node start-up, we prune all the cached artifacts, if any. //! //! 2. In order to be executed, a PVF should be prepared first. This means that artifacts should //! have an [`ArtifactState::Prepared`] entry for that artifact in the table. If not, the @@ -55,30 +54,17 @@ //! older by a predefined parameter. This process is run very rarely (say, once a day). Once the //! artifact is expired it is removed from disk eagerly atomically. -use crate::{host::PrecheckResultSender, LOG_TARGET}; +use crate::host::PrecheckResultSender; use always_assert::always; -use polkadot_core_primitives::Hash; -use polkadot_node_core_pvf_common::{ - error::PrepareError, prepare::PrepareStats, pvf::PvfPrepData, RUNTIME_VERSION, -}; -use polkadot_node_primitives::NODE_VERSION; +use polkadot_node_core_pvf_common::{error::PrepareError, prepare::PrepareStats, pvf::PvfPrepData}; use polkadot_parachain_primitives::primitives::ValidationCodeHash; use polkadot_primitives::ExecutorParamsHash; use std::{ collections::HashMap, - io, path::{Path, PathBuf}, - str::FromStr as _, time::{Duration, SystemTime}, }; -const RUNTIME_PREFIX: &str = "wasmtime_v"; -const NODE_PREFIX: &str = "polkadot_v"; - -fn artifact_prefix() -> String { - format!("{}{}_{}{}", RUNTIME_PREFIX, RUNTIME_VERSION, NODE_PREFIX, NODE_VERSION) -} - /// Identifier of an artifact. Encodes a code hash of the PVF and a hash of executor parameter set. #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct ArtifactId { @@ -96,35 +82,6 @@ impl ArtifactId { pub fn from_pvf_prep_data(pvf: &PvfPrepData) -> Self { Self::new(pvf.code_hash(), pvf.executor_params().hash()) } - - /// Returns the canonical path to the concluded artifact. - pub(crate) fn path(&self, cache_path: &Path, checksum: &str) -> PathBuf { - let file_name = format!( - "{}_{:#x}_{:#x}_0x{}", - artifact_prefix(), - self.code_hash, - self.executor_params_hash, - checksum - ); - cache_path.join(file_name) - } - - /// Tries to recover the artifact id from the given file name. - /// Return `None` if the given file name is invalid. - /// VALID_NAME := _ _ _ - fn from_file_name(file_name: &str) -> Option { - let file_name = file_name.strip_prefix(&artifact_prefix())?.strip_prefix('_')?; - let parts: Vec<&str> = file_name.split('_').collect(); - - if let [code_hash, param_hash, _checksum] = parts[..] { - let code_hash = Hash::from_str(code_hash).ok()?.into(); - let executor_params_hash = - ExecutorParamsHash::from_hash(Hash::from_str(param_hash).ok()?); - return Some(Self { code_hash, executor_params_hash }) - } - - None - } } /// A bundle of the artifact ID and the path. @@ -193,121 +150,18 @@ impl Artifacts { Self { inner: HashMap::new() } } - #[cfg(test)] - pub(crate) fn len(&self) -> usize { - self.inner.len() - } - /// Create an empty table and populate it with valid artifacts as [`ArtifactState::Prepared`], /// if any. The existing caches will be checked by their file name to determine whether they are /// valid, e.g., matching the current node version. The ones deemed invalid will be pruned. /// /// Create the cache directory on-disk if it doesn't exist. - pub async fn new_and_prune(cache_path: &Path) -> Self { - let mut artifacts = Self { inner: HashMap::new() }; - let _ = artifacts.insert_and_prune(cache_path).await.map_err(|err| { - gum::error!( - target: LOG_TARGET, - "could not initialize artifacts cache: {err}", - ) - }); - artifacts - } - - async fn insert_and_prune(&mut self, cache_path: &Path) -> Result<(), String> { - async fn is_corrupted(path: &Path) -> bool { - let checksum = match tokio::fs::read(path).await { - Ok(bytes) => blake3::hash(&bytes), - Err(err) => { - // just remove the file if we cannot read it - gum::warn!( - target: LOG_TARGET, - ?err, - "unable to read artifact {:?} when checking integrity, removing...", - path, - ); - return true - }, - }; - - if let Some(file_name) = path.file_name() { - if let Some(file_name) = file_name.to_str() { - return !file_name.ends_with(checksum.to_hex().as_str()) - } - } - true - } - - // Insert the entry into the artifacts table if it is valid. - // Otherwise, prune it. - async fn insert_or_prune( - artifacts: &mut Artifacts, - entry: &tokio::fs::DirEntry, - cache_path: &Path, - ) -> Result<(), String> { - let file_type = entry.file_type().await; - let file_name = entry.file_name(); - - match file_type { - Ok(file_type) => - if !file_type.is_file() { - return Ok(()) - }, - Err(err) => return Err(format!("unable to get file type for {file_name:?}: {err}")), - } - - if let Some(file_name) = file_name.to_str() { - let id = ArtifactId::from_file_name(file_name); - let path = cache_path.join(file_name); - - if id.is_none() || is_corrupted(&path).await { - let _ = tokio::fs::remove_file(&path).await; - return Err(format!("invalid artifact {path:?}, file deleted")) - } - - let id = id.expect("checked is_none() above; qed"); - gum::debug!( - target: LOG_TARGET, - "reusing existing {:?} for node version v{}", - &path, - NODE_VERSION, - ); - artifacts.insert_prepared(id, path, SystemTime::now(), Default::default()); - - Ok(()) - } else { - Err(format!("non-Unicode file name {file_name:?} found in {cache_path:?}")) - } - } - + pub async fn new(cache_path: &Path) -> Self { // Make sure that the cache path directory and all its parents are created. - if let Err(err) = tokio::fs::create_dir_all(cache_path).await { - if err.kind() != io::ErrorKind::AlreadyExists { - return Err(format!("failed to create dir {cache_path:?}: {err}")) - } - } - - let mut dir = tokio::fs::read_dir(cache_path) - .await - .map_err(|err| format!("failed to read dir {cache_path:?}: {err}"))?; + // First delete the entire cache. Nodes are long-running so this should populate shortly. + let _ = tokio::fs::remove_dir_all(cache_path).await; + let _ = tokio::fs::create_dir_all(cache_path).await; - loop { - match dir.next_entry().await { - Ok(Some(entry)) => - if let Err(err) = insert_or_prune(self, &entry, cache_path).await { - gum::warn!( - target: LOG_TARGET, - ?cache_path, - "could not insert entry {:?} into the artifact cache: {}", - entry, - err, - ) - }, - Ok(None) => return Ok(()), - Err(err) => - return Err(format!("error processing artifacts in {cache_path:?}: {err}")), - } - } + Self { inner: HashMap::new() } } /// Returns the state of the given artifact by its ID. @@ -335,6 +189,7 @@ impl Artifacts { /// /// This function should only be used to build the artifact table at startup with valid /// artifact caches. + #[cfg(test)] pub(crate) fn insert_prepared( &mut self, artifact_id: ArtifactId, @@ -373,154 +228,3 @@ impl Artifacts { to_remove } } - -#[cfg(test)] -mod tests { - use super::{artifact_prefix as prefix, ArtifactId, Artifacts, NODE_VERSION, RUNTIME_VERSION}; - use polkadot_primitives::ExecutorParamsHash; - use rand::Rng; - use sp_core::H256; - use std::{ - fs, - io::Write, - path::{Path, PathBuf}, - str::FromStr, - }; - - fn rand_hash(len: usize) -> String { - let mut rng = rand::thread_rng(); - let hex: Vec<_> = "0123456789abcdef".chars().collect(); - (0..len).map(|_| hex[rng.gen_range(0..hex.len())]).collect() - } - - fn file_name(code_hash: &str, param_hash: &str, checksum: &str) -> String { - format!("{}_0x{}_0x{}_0x{}", prefix(), code_hash, param_hash, checksum) - } - - fn create_artifact( - dir: impl AsRef, - prefix: &str, - code_hash: impl AsRef, - params_hash: impl AsRef, - ) -> (PathBuf, String) { - fn artifact_path_without_checksum( - dir: impl AsRef, - prefix: &str, - code_hash: impl AsRef, - params_hash: impl AsRef, - ) -> PathBuf { - let mut path = dir.as_ref().to_path_buf(); - let file_name = - format!("{}_0x{}_0x{}", prefix, code_hash.as_ref(), params_hash.as_ref(),); - path.push(file_name); - path - } - - let (code_hash, params_hash) = (code_hash.as_ref(), params_hash.as_ref()); - let path = artifact_path_without_checksum(dir, prefix, code_hash, params_hash); - let mut file = fs::File::create(&path).unwrap(); - - let content = format!("{}{}", code_hash, params_hash).into_bytes(); - file.write_all(&content).unwrap(); - let checksum = blake3::hash(&content).to_hex().to_string(); - - (path, checksum) - } - - fn create_rand_artifact(dir: impl AsRef, prefix: &str) -> (PathBuf, String) { - create_artifact(dir, prefix, rand_hash(64), rand_hash(64)) - } - - fn concluded_path(path: impl AsRef, checksum: &str) -> PathBuf { - let path = path.as_ref(); - let mut file_name = path.file_name().unwrap().to_os_string(); - file_name.push("_0x"); - file_name.push(checksum); - path.with_file_name(file_name) - } - - #[test] - fn artifact_prefix() { - assert_eq!(prefix(), format!("wasmtime_v{}_polkadot_v{}", RUNTIME_VERSION, NODE_VERSION)); - } - - #[test] - fn from_file_name() { - assert!(ArtifactId::from_file_name("").is_none()); - assert!(ArtifactId::from_file_name("junk").is_none()); - - let file_name = file_name( - "0022800000000000000000000000000000000000000000000000000000000000", - "0033900000000000000000000000000000000000000000000000000000000000", - "00000000000000000000000000000000", - ); - - assert_eq!( - ArtifactId::from_file_name(&file_name), - Some(ArtifactId::new( - hex_literal::hex![ - "0022800000000000000000000000000000000000000000000000000000000000" - ] - .into(), - ExecutorParamsHash::from_hash(sp_core::H256(hex_literal::hex![ - "0033900000000000000000000000000000000000000000000000000000000000" - ])), - )), - ); - } - - #[test] - fn path() { - let dir = Path::new("/test"); - let code_hash = "1234567890123456789012345678901234567890123456789012345678901234"; - let params_hash = "4321098765432109876543210987654321098765432109876543210987654321"; - let checksum = "34567890123456789012345678901234"; - let file_name = file_name(code_hash, params_hash, checksum); - - let code_hash = H256::from_str(code_hash).unwrap(); - let params_hash = H256::from_str(params_hash).unwrap(); - let path = ArtifactId::new(code_hash.into(), ExecutorParamsHash::from_hash(params_hash)) - .path(dir, checksum); - - assert_eq!(path.to_str().unwrap(), format!("/test/{}", file_name)); - } - - #[tokio::test] - async fn remove_stale_cache_on_startup() { - let cache_dir = tempfile::Builder::new().prefix("test-cache-").tempdir().unwrap(); - - // invalid prefix - create_rand_artifact(&cache_dir, ""); - create_rand_artifact(&cache_dir, "wasmtime_polkadot_v"); - create_rand_artifact(&cache_dir, "wasmtime_v8.0.0_polkadot_v1.0.0"); - - let prefix = prefix(); - - // no checksum - create_rand_artifact(&cache_dir, &prefix); - - // invalid hashes - let (path, checksum) = create_artifact(&cache_dir, &prefix, "000", "000001"); - let new_path = concluded_path(&path, &checksum); - fs::rename(&path, &new_path).unwrap(); - - // checksum tampered - let (path, checksum) = create_rand_artifact(&cache_dir, &prefix); - let new_path = concluded_path(&path, checksum.chars().rev().collect::().as_str()); - fs::rename(&path, &new_path).unwrap(); - - // valid - let (path, checksum) = create_rand_artifact(&cache_dir, &prefix); - let new_path = concluded_path(&path, &checksum); - fs::rename(&path, &new_path).unwrap(); - - assert_eq!(fs::read_dir(&cache_dir).unwrap().count(), 7); - - let artifacts = Artifacts::new_and_prune(cache_dir.path()).await; - - assert_eq!(fs::read_dir(&cache_dir).unwrap().count(), 1); - assert_eq!(artifacts.len(), 1); - - fs::remove_dir_all(cache_dir).unwrap(); - } -} diff --git a/polkadot/node/core/pvf/src/host.rs b/polkadot/node/core/pvf/src/host.rs index d17a4d918e00..424fd29917fa 100644 --- a/polkadot/node/core/pvf/src/host.rs +++ b/polkadot/node/core/pvf/src/host.rs @@ -218,7 +218,7 @@ pub async fn start( gum::debug!(target: LOG_TARGET, ?config, "starting PVF validation host"); // Make sure the cache is initialized before doing anything else. - let artifacts = Artifacts::new_and_prune(&config.cache_path).await; + let artifacts = Artifacts::new(&config.cache_path).await; // Run checks for supported security features once per host startup. If some checks fail, warn // if Secure Validator Mode is disabled and return an error otherwise. @@ -918,9 +918,8 @@ pub(crate) mod tests { fn artifact_path(discriminator: u32) -> PathBuf { let pvf = PvfPrepData::from_discriminator(discriminator); let checksum = blake3::hash(pvf.code().as_bytes_ref()); - artifact_id(discriminator) - .path(&PathBuf::from(std::env::temp_dir()), checksum.to_hex().as_str()) - .to_owned() + let file_name = format!("test_{}_0x{}", discriminator, checksum); + std::env::temp_dir().join(file_name) } struct Builder { diff --git a/polkadot/node/core/pvf/src/prepare/worker_interface.rs b/polkadot/node/core/pvf/src/prepare/worker_interface.rs index 984a87ce5c9b..91c397542ab7 100644 --- a/polkadot/node/core/pvf/src/prepare/worker_interface.rs +++ b/polkadot/node/core/pvf/src/prepare/worker_interface.rs @@ -17,7 +17,6 @@ //! Host interface to the prepare worker. use crate::{ - artifacts::ArtifactId, metrics::Metrics, worker_interface::{ clear_worker_dir_path, framed_recv, framed_send, spawn_with_program_path, IdleWorker, @@ -165,7 +164,6 @@ pub async fn start_work( prepare_worker_result, pid, tmp_artifact_file, - &pvf, &cache_path, preparation_timeout, ) @@ -205,7 +203,6 @@ async fn handle_response( result: PrepareWorkerResult, worker_pid: u32, tmp_file: PathBuf, - pvf: &PvfPrepData, cache_path: &Path, preparation_timeout: Duration, ) -> Outcome { @@ -232,8 +229,14 @@ async fn handle_response( return Outcome::TimedOut } - let artifact_id = ArtifactId::from_pvf_prep_data(pvf); - let artifact_path = artifact_id.path(cache_path, &checksum); + let unique_id = { + use rand::RngCore; + let mut bytes = [0u8; 64]; + rand::thread_rng().fill_bytes(&mut bytes); + hex::encode(&bytes) + }; + let file_name = format!("{}_0x{}", unique_id, checksum); + let artifact_path = cache_path.join(file_name); gum::debug!( target: LOG_TARGET, diff --git a/substrate/utils/build-script-utils/src/version.rs b/substrate/utils/build-script-utils/src/version.rs index d85c78d2c997..cc6b4319ecae 100644 --- a/substrate/utils/build-script-utils/src/version.rs +++ b/substrate/utils/build-script-utils/src/version.rs @@ -59,34 +59,3 @@ fn get_version(impl_commit: &str) -> String { impl_commit ) } - -/// Generate `SUBSTRATE_WASMTIME_VERSION` -pub fn generate_wasmtime_version() { - generate_dependency_version("wasmtime", "SUBSTRATE_WASMTIME_VERSION"); -} - -fn generate_dependency_version(dep: &str, env_var: &str) { - // we only care about the root - match std::process::Command::new("cargo") - .args(["tree", "--depth=0", "--locked", "--package", dep]) - .output() - { - Ok(output) if output.status.success() => { - let version = String::from_utf8_lossy(&output.stdout); - - // vX.X.X - if let Some(ver) = version.strip_prefix(&format!("{} v", dep)) { - println!("cargo:rustc-env={}={}", env_var, ver); - } else { - println!("cargo:warning=Unexpected result {}", version); - } - }, - - // command errors out when it could not find the given dependency - // or when having multiple versions of it - Ok(output) => - println!("cargo:warning=`cargo tree` {}", String::from_utf8_lossy(&output.stderr)), - - Err(err) => println!("cargo:warning=Could not run `cargo tree`: {}", err), - } -} From 87bfade71db926ab6458c6b2cea2cb86fb79f1d4 Mon Sep 17 00:00:00 2001 From: "Marcin S." Date: Tue, 9 Jan 2024 19:43:03 +0100 Subject: [PATCH 2/5] Remove checksum from file name --- .../core/pvf/src/prepare/worker_interface.rs | 29 ++++++++++++------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/polkadot/node/core/pvf/src/prepare/worker_interface.rs b/polkadot/node/core/pvf/src/prepare/worker_interface.rs index 91c397542ab7..6fd06f42ce68 100644 --- a/polkadot/node/core/pvf/src/prepare/worker_interface.rs +++ b/polkadot/node/core/pvf/src/prepare/worker_interface.rs @@ -206,15 +206,19 @@ async fn handle_response( cache_path: &Path, preparation_timeout: Duration, ) -> Outcome { - let PrepareWorkerSuccess { checksum, stats: PrepareStats { cpu_time_elapsed, memory_stats } } = - match result.clone() { - Ok(result) => result, - // Timed out on the child. This should already be logged by the child. - Err(PrepareError::TimedOut) => return Outcome::TimedOut, - Err(PrepareError::JobDied { err, job_pid }) => return Outcome::JobDied { err, job_pid }, - Err(PrepareError::OutOfMemory) => return Outcome::OutOfMemory, - Err(err) => return Outcome::Concluded { worker, result: Err(err) }, - }; + // TODO: Add `checksum` to `ArtifactPathId`. See: + // https://github.com/paritytech/polkadot-sdk/issues/2399 + let PrepareWorkerSuccess { + checksum: _, + stats: PrepareStats { cpu_time_elapsed, memory_stats }, + } = match result.clone() { + Ok(result) => result, + // Timed out on the child. This should already be logged by the child. + Err(PrepareError::TimedOut) => return Outcome::TimedOut, + Err(PrepareError::JobDied { err, job_pid }) => return Outcome::JobDied { err, job_pid }, + Err(PrepareError::OutOfMemory) => return Outcome::OutOfMemory, + Err(err) => return Outcome::Concluded { worker, result: Err(err) }, + }; if cpu_time_elapsed > preparation_timeout { // The job didn't complete within the timeout. @@ -229,13 +233,16 @@ async fn handle_response( return Outcome::TimedOut } - let unique_id = { + // The file name should uniquely identify the artifact even across restarts. In case the cache + // for some reason is not cleared correctly, we cannot + // accidentally execute an artifact compiled under a different wasmtime version, host + // environment, etc. + let file_name = { use rand::RngCore; let mut bytes = [0u8; 64]; rand::thread_rng().fill_bytes(&mut bytes); hex::encode(&bytes) }; - let file_name = format!("{}_0x{}", unique_id, checksum); let artifact_path = cache_path.join(file_name); gum::debug!( From bdb03828e3bba7eb2cbf8d67a1ac518eebd603ca Mon Sep 17 00:00:00 2001 From: "Marcin S." Date: Wed, 10 Jan 2024 12:23:47 +0100 Subject: [PATCH 3/5] Don't nuke entire cache dir on startup --- Cargo.lock | 2 +- polkadot/node/core/pvf/Cargo.toml | 2 +- polkadot/node/core/pvf/src/artifacts.rs | 85 +++++++++++++++++-- polkadot/node/core/pvf/src/host.rs | 20 ++--- .../core/pvf/src/prepare/worker_interface.rs | 9 +- .../node/core/pvf/src/worker_interface.rs | 4 +- polkadot/node/core/pvf/tests/it/main.rs | 18 ++++ 7 files changed, 110 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 86e26d0316ff..176749c99098 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -12669,13 +12669,13 @@ name = "polkadot-node-core-pvf" version = "1.0.0" dependencies = [ "always-assert", + "array-bytes 6.1.0", "assert_matches", "blake3", "cfg-if", "criterion 0.4.0", "futures", "futures-timer", - "hex", "hex-literal", "is_executable", "libc", diff --git a/polkadot/node/core/pvf/Cargo.toml b/polkadot/node/core/pvf/Cargo.toml index 968857ec0de4..be35dc47b8a8 100644 --- a/polkadot/node/core/pvf/Cargo.toml +++ b/polkadot/node/core/pvf/Cargo.toml @@ -11,12 +11,12 @@ workspace = true [dependencies] always-assert = "0.1" +array-bytes = "6.1" blake3 = "1.5" cfg-if = "1.0" futures = "0.3.21" futures-timer = "3.0.2" gum = { package = "tracing-gum", path = "../../gum" } -hex = { version = "0.4.3", default-features = false } is_executable = "1.0.1" libc = "0.2.139" pin-project = "1.0.9" diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index 5b31316545fe..647d097eedff 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -54,17 +54,37 @@ //! older by a predefined parameter. This process is run very rarely (say, once a day). Once the //! artifact is expired it is removed from disk eagerly atomically. -use crate::host::PrecheckResultSender; +use crate::{host::PrecheckResultSender, worker_interface::WORKER_DIR_PREFIX}; use always_assert::always; use polkadot_node_core_pvf_common::{error::PrepareError, prepare::PrepareStats, pvf::PvfPrepData}; use polkadot_parachain_primitives::primitives::ValidationCodeHash; use polkadot_primitives::ExecutorParamsHash; use std::{ collections::HashMap, + fs, path::{Path, PathBuf}, time::{Duration, SystemTime}, }; +/// The extension to use for cached artifacts. +const ARTIFACT_EXTENSION: &str = "pvf"; + +/// The prefix that artifacts used to start with under the old naming scheme. +const ARTIFACT_OLD_PREFIX: &str = "wasmtime_"; + +pub fn generate_artifact_path(cache_path: &Path) -> PathBuf { + let file_name = { + use array_bytes::Hex; + use rand::RngCore; + let mut bytes = [0u8; 64]; + rand::thread_rng().fill_bytes(&mut bytes); + bytes.hex("0x") + }; + let mut artifact_path = cache_path.join(file_name); + artifact_path.set_extension(ARTIFACT_EXTENSION); + artifact_path +} + /// Identifier of an artifact. Encodes a code hash of the PVF and a hash of executor parameter set. #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct ArtifactId { @@ -150,17 +170,35 @@ impl Artifacts { Self { inner: HashMap::new() } } - /// Create an empty table and populate it with valid artifacts as [`ArtifactState::Prepared`], - /// if any. The existing caches will be checked by their file name to determine whether they are - /// valid, e.g., matching the current node version. The ones deemed invalid will be pruned. - /// - /// Create the cache directory on-disk if it doesn't exist. + #[cfg(test)] + fn len(&self) -> usize { + self.inner.len() + } + + /// Create an empty table and the cache directory on-disk if it doesn't exist. pub async fn new(cache_path: &Path) -> Self { // Make sure that the cache path directory and all its parents are created. - // First delete the entire cache. Nodes are long-running so this should populate shortly. - let _ = tokio::fs::remove_dir_all(cache_path).await; let _ = tokio::fs::create_dir_all(cache_path).await; + // Delete any leftover artifacts and worker dirs from previous runs. We don't delete the + // entire cache directory in case the user made a mistake and set it to e.g. their home + // directory. This is a best-effort to do clean-up, so ignore any errors. + if let Ok(paths) = fs::read_dir(cache_path) { + for path in paths.map(|res| res.map(|e| e.path())).flatten() { + let file_name = match path.file_name().map(|f| f.to_str()).flatten() { + Some(f) => f, + None => continue, + }; + if path.is_dir() && file_name.starts_with(WORKER_DIR_PREFIX) { + let _ = fs::remove_dir_all(path); + } else if path.extension().map_or(false, |ext| ext == ARTIFACT_EXTENSION) || + file_name.starts_with(ARTIFACT_OLD_PREFIX) + { + let _ = fs::remove_file(path); + } + } + } + Self { inner: HashMap::new() } } @@ -228,3 +266,34 @@ impl Artifacts { to_remove } } + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn cache_cleared_on_startup() { + let tempdir = tempfile::tempdir().unwrap(); + let cache_path = tempdir.path(); + + // These should be cleared. + fs::write(cache_path.join("abcd.pvf"), "test").unwrap(); + fs::write(cache_path.join("wasmtime_..."), "test").unwrap(); + fs::create_dir(cache_path.join("worker-dir-prepare-test")).unwrap(); + + // These should not be touched. + fs::write(cache_path.join("abcd.pvfartifact"), "test").unwrap(); + fs::write(cache_path.join("polkadot_..."), "test").unwrap(); + fs::create_dir(cache_path.join("worker-prepare-test")).unwrap(); + + let artifacts = Artifacts::new(cache_path).await; + + let entries: Vec = fs::read_dir(&cache_path) + .unwrap() + .map(|entry| entry.unwrap().file_name().into_string().unwrap()) + .collect(); + assert_eq!(entries.len(), 3); + assert!(entries.contains(&String::from("abcd.pvfartifact"))); + assert_eq!(artifacts.len(), 0); + } +} diff --git a/polkadot/node/core/pvf/src/host.rs b/polkadot/node/core/pvf/src/host.rs index 424fd29917fa..21e13453edf3 100644 --- a/polkadot/node/core/pvf/src/host.rs +++ b/polkadot/node/core/pvf/src/host.rs @@ -884,14 +884,13 @@ fn pulse_every(interval: std::time::Duration) -> impl futures::Stream #[cfg(test)] pub(crate) mod tests { use super::*; - use crate::PossiblyInvalidError; + use crate::{artifacts::generate_artifact_path, PossiblyInvalidError}; use assert_matches::assert_matches; use futures::future::BoxFuture; use polkadot_node_core_pvf_common::{ error::PrepareError, prepare::{PrepareStats, PrepareSuccess}, }; - use sp_core::hexdisplay::AsBytesRef; const TEST_EXECUTION_TIMEOUT: Duration = Duration::from_secs(3); pub(crate) const TEST_PREPARATION_TIMEOUT: Duration = Duration::from_secs(30); @@ -915,13 +914,6 @@ pub(crate) mod tests { ArtifactId::from_pvf_prep_data(&PvfPrepData::from_discriminator(discriminator)) } - fn artifact_path(discriminator: u32) -> PathBuf { - let pvf = PvfPrepData::from_discriminator(discriminator); - let checksum = blake3::hash(pvf.code().as_bytes_ref()); - let file_name = format!("test_{}_0x{}", discriminator, checksum); - std::env::temp_dir().join(file_name) - } - struct Builder { cleanup_pulse_interval: Duration, artifact_ttl: Duration, @@ -1109,19 +1101,23 @@ pub(crate) mod tests { #[tokio::test] async fn pruning() { let mock_now = SystemTime::now() - Duration::from_millis(1000); + let tempdir = tempfile::tempdir().unwrap(); + let cache_path = tempdir.path(); let mut builder = Builder::default(); builder.cleanup_pulse_interval = Duration::from_millis(100); builder.artifact_ttl = Duration::from_millis(500); + let path1 = generate_artifact_path(cache_path); + let path2 = generate_artifact_path(cache_path); builder.artifacts.insert_prepared( artifact_id(1), - artifact_path(1), + path1.clone(), mock_now, PrepareStats::default(), ); builder.artifacts.insert_prepared( artifact_id(2), - artifact_path(2), + path2.clone(), mock_now, PrepareStats::default(), ); @@ -1134,7 +1130,7 @@ pub(crate) mod tests { run_until( &mut test.run, async { - assert_eq!(to_sweeper_rx.next().await.unwrap(), artifact_path(2)); + assert_eq!(to_sweeper_rx.next().await.unwrap(), path2); } .boxed(), ) diff --git a/polkadot/node/core/pvf/src/prepare/worker_interface.rs b/polkadot/node/core/pvf/src/prepare/worker_interface.rs index 6fd06f42ce68..45e31a5f453f 100644 --- a/polkadot/node/core/pvf/src/prepare/worker_interface.rs +++ b/polkadot/node/core/pvf/src/prepare/worker_interface.rs @@ -17,6 +17,7 @@ //! Host interface to the prepare worker. use crate::{ + artifacts::generate_artifact_path, metrics::Metrics, worker_interface::{ clear_worker_dir_path, framed_recv, framed_send, spawn_with_program_path, IdleWorker, @@ -237,13 +238,7 @@ async fn handle_response( // for some reason is not cleared correctly, we cannot // accidentally execute an artifact compiled under a different wasmtime version, host // environment, etc. - let file_name = { - use rand::RngCore; - let mut bytes = [0u8; 64]; - rand::thread_rng().fill_bytes(&mut bytes); - hex::encode(&bytes) - }; - let artifact_path = cache_path.join(file_name); + let artifact_path = generate_artifact_path(cache_path); gum::debug!( target: LOG_TARGET, diff --git a/polkadot/node/core/pvf/src/worker_interface.rs b/polkadot/node/core/pvf/src/worker_interface.rs index 456c20bd27b2..ad9f0294c094 100644 --- a/polkadot/node/core/pvf/src/worker_interface.rs +++ b/polkadot/node/core/pvf/src/worker_interface.rs @@ -384,10 +384,12 @@ pub struct WorkerDir { tempdir: tempfile::TempDir, } +pub const WORKER_DIR_PREFIX: &str = "worker-dir"; + impl WorkerDir { /// Creates a new, empty worker dir with a random name in the given cache dir. pub async fn new(debug_id: &'static str, cache_dir: &Path) -> Result { - let prefix = format!("worker-dir-{}-", debug_id); + let prefix = format!("{WORKER_DIR_PREFIX}-{debug_id}-"); let tempdir = tempfile::Builder::new() .prefix(&prefix) .tempdir_in(cache_dir) diff --git a/polkadot/node/core/pvf/tests/it/main.rs b/polkadot/node/core/pvf/tests/it/main.rs index 09f975b706d2..e65b0602013a 100644 --- a/polkadot/node/core/pvf/tests/it/main.rs +++ b/polkadot/node/core/pvf/tests/it/main.rs @@ -358,6 +358,24 @@ async fn deleting_prepared_artifact_does_not_dispute() { } } +#[tokio::test] +async fn cache_cleared_on_startup() { + let cache_dir = { + let host = TestHost::new().await; + + let _stats = + host.precheck_pvf(halt::wasm_binary_unwrap(), Default::default()).await.unwrap(); + host.cache_dir.path().to_owned() + }; + + // Start a new host, previous artifact should be cleared. + let _host = TestHost::new_with_config(|cfg| { + cfg.cache_path = cache_dir.clone(); + }) + .await; + assert_eq!(std::fs::read_dir(&cache_dir).unwrap().count(), 0); +} + // This test checks if the adder parachain runtime can be prepared with 10Mb preparation memory // limit enforced. At the moment of writing, the limit if far enough to prepare the PVF. If it // starts failing, either Wasmtime version has changed, or the PVF code itself has changed, and From 9ccd3dd9e26c57c6d0859119b96ad04ddce981dc Mon Sep 17 00:00:00 2001 From: "Marcin S." Date: Wed, 10 Jan 2024 12:40:58 +0100 Subject: [PATCH 4/5] Fix clippy complaints --- polkadot/node/core/pvf/src/artifacts.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index 647d097eedff..ae01e72980e0 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -184,8 +184,8 @@ impl Artifacts { // entire cache directory in case the user made a mistake and set it to e.g. their home // directory. This is a best-effort to do clean-up, so ignore any errors. if let Ok(paths) = fs::read_dir(cache_path) { - for path in paths.map(|res| res.map(|e| e.path())).flatten() { - let file_name = match path.file_name().map(|f| f.to_str()).flatten() { + for path in paths.flat_map(|res| res.map(|e| e.path())) { + let file_name = match path.file_name().and_then(|f| f.to_str()) { Some(f) => f, None => continue, }; From e7435cdec6077be32100b59db3109691d6c0442d Mon Sep 17 00:00:00 2001 From: "Marcin S." Date: Wed, 10 Jan 2024 14:37:30 +0100 Subject: [PATCH 5/5] Address feedback; fix test --- polkadot/node/core/pvf/src/artifacts.rs | 24 +++++++++++------------- polkadot/node/core/pvf/tests/it/main.rs | 13 +++++++------ 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index ae01e72980e0..78dfe71adadd 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -183,19 +183,15 @@ impl Artifacts { // Delete any leftover artifacts and worker dirs from previous runs. We don't delete the // entire cache directory in case the user made a mistake and set it to e.g. their home // directory. This is a best-effort to do clean-up, so ignore any errors. - if let Ok(paths) = fs::read_dir(cache_path) { - for path in paths.flat_map(|res| res.map(|e| e.path())) { - let file_name = match path.file_name().and_then(|f| f.to_str()) { - Some(f) => f, - None => continue, - }; - if path.is_dir() && file_name.starts_with(WORKER_DIR_PREFIX) { - let _ = fs::remove_dir_all(path); - } else if path.extension().map_or(false, |ext| ext == ARTIFACT_EXTENSION) || - file_name.starts_with(ARTIFACT_OLD_PREFIX) - { - let _ = fs::remove_file(path); - } + for entry in fs::read_dir(cache_path).into_iter().flatten().flatten() { + let path = entry.path(); + let Some(file_name) = path.file_name().and_then(|f| f.to_str()) else { continue }; + if path.is_dir() && file_name.starts_with(WORKER_DIR_PREFIX) { + let _ = fs::remove_dir_all(path); + } else if path.extension().map_or(false, |ext| ext == ARTIFACT_EXTENSION) || + file_name.starts_with(ARTIFACT_OLD_PREFIX) + { + let _ = fs::remove_file(path); } } @@ -294,6 +290,8 @@ mod tests { .collect(); assert_eq!(entries.len(), 3); assert!(entries.contains(&String::from("abcd.pvfartifact"))); + assert!(entries.contains(&String::from("polkadot_..."))); + assert!(entries.contains(&String::from("worker-prepare-test"))); assert_eq!(artifacts.len(), 0); } } diff --git a/polkadot/node/core/pvf/tests/it/main.rs b/polkadot/node/core/pvf/tests/it/main.rs index e65b0602013a..15b341dc094c 100644 --- a/polkadot/node/core/pvf/tests/it/main.rs +++ b/polkadot/node/core/pvf/tests/it/main.rs @@ -360,13 +360,14 @@ async fn deleting_prepared_artifact_does_not_dispute() { #[tokio::test] async fn cache_cleared_on_startup() { - let cache_dir = { - let host = TestHost::new().await; + // Don't drop this host, it owns the `TempDir` which gets cleared on drop. + let host = TestHost::new().await; - let _stats = - host.precheck_pvf(halt::wasm_binary_unwrap(), Default::default()).await.unwrap(); - host.cache_dir.path().to_owned() - }; + let _stats = host.precheck_pvf(halt::wasm_binary_unwrap(), Default::default()).await.unwrap(); + + // The cache dir should contain one artifact and one worker dir. + let cache_dir = host.cache_dir.path().to_owned(); + assert_eq!(std::fs::read_dir(&cache_dir).unwrap().count(), 2); // Start a new host, previous artifact should be cleared. let _host = TestHost::new_with_config(|cfg| {