diff --git a/builder/src/core/blob.rs b/builder/src/core/blob.rs index b579963b4d7..ff0cdb5b277 100644 --- a/builder/src/core/blob.rs +++ b/builder/src/core/blob.rs @@ -16,7 +16,7 @@ use sha2::digest::Digest; use super::layout::BlobLayout; use super::node::Node; use crate::core::context::Artifact; -use crate::{BlobContext, BlobManager, BuildContext, ConversionType, Feature, Tree}; +use crate::{BlobContext, BlobManager, BuildContext, ConversionType, Feature}; /// Generator for RAFS data blob. pub(crate) struct Blob {} @@ -25,15 +25,13 @@ impl Blob { /// Dump blob file and generate chunks pub(crate) fn dump( ctx: &BuildContext, - tree: &Tree, blob_mgr: &mut BlobManager, blob_writer: &mut dyn Artifact, ) -> Result<()> { match ctx.conversion_type { ConversionType::DirectoryToRafs => { let mut chunk_data_buf = vec![0u8; RAFS_MAX_CHUNK_SIZE as usize]; - let (inodes, prefetch_entries) = - BlobLayout::layout_blob_simple(&ctx.prefetch, tree)?; + let (inodes, prefetch_entries) = BlobLayout::layout_blob_simple(&ctx.prefetch)?; for (idx, node) in inodes.iter().enumerate() { let mut node = node.lock().unwrap(); let size = node diff --git a/builder/src/core/bootstrap.rs b/builder/src/core/bootstrap.rs index 8999879820e..22805bd3c03 100644 --- a/builder/src/core/bootstrap.rs +++ b/builder/src/core/bootstrap.rs @@ -37,8 +37,7 @@ impl Bootstrap { assert_eq!(index, RAFS_V5_ROOT_INODE); root_node.index = index; root_node.inode.set_ino(index); - ctx.prefetch - .insert_if_need(&self.tree.node, root_node.deref()); + ctx.prefetch.insert(&self.tree.node, root_node.deref()); bootstrap_ctx.inode_map.insert( ( root_node.layer_idx, @@ -160,7 +159,7 @@ impl Bootstrap { if !child_node.is_dir() && ctx.fs_version.is_v6() { child_node.v6_set_offset(bootstrap_ctx, v6_hardlink_offset, block_size)?; } - ctx.prefetch.insert_if_need(&child.node, child_node.deref()); + ctx.prefetch.insert(&child.node, child_node.deref()); if child_node.is_dir() { dirs.push(child); } diff --git a/builder/src/core/layout.rs b/builder/src/core/layout.rs index 50bea8c8452..9a3ef83ddbe 100644 --- a/builder/src/core/layout.rs +++ b/builder/src/core/layout.rs @@ -6,37 +6,26 @@ use anyhow::Result; use std::ops::Deref; use super::node::Node; -use crate::{Overlay, Prefetch, Tree, TreeNode}; +use crate::{Overlay, Prefetch, TreeNode}; #[derive(Clone)] pub struct BlobLayout {} impl BlobLayout { - pub fn layout_blob_simple(prefetch: &Prefetch, tree: &Tree) -> Result<(Vec, usize)> { - let mut inodes = Vec::with_capacity(10000); - - // Put all prefetch inodes at the head - // NOTE: Don't try to sort readahead files by their sizes, thus to keep files - // belonging to the same directory arranged in adjacent in blob file. Together with - // BFS style collecting descendants inodes, it will have a higher merging possibility. - // Later, we might write chunks of data one by one according to inode number order. - let prefetches = prefetch.get_file_nodes(); - for n in prefetches { - let node = n.lock().unwrap(); - if Self::should_dump_node(node.deref()) { - inodes.push(n.clone()); - } - } + pub fn layout_blob_simple(prefetch: &Prefetch) -> Result<(Vec, usize)> { + let (pre, non_pre) = prefetch.get_file_nodes(); + let mut inodes: Vec = pre + .into_iter() + .filter(|x| Self::should_dump_node(x.lock().unwrap().deref())) + .collect(); + let mut non_prefetch_inodes: Vec = non_pre + .into_iter() + .filter(|x| Self::should_dump_node(x.lock().unwrap().deref())) + .collect(); + let prefetch_entries = inodes.len(); - tree.walk_bfs(true, &mut |n| -> Result<()> { - let node = n.lock_node(); - // Ignore lower layer node when dump blob - if !prefetch.contains(node.deref()) && Self::should_dump_node(node.deref()) { - inodes.push(n.node.clone()); - } - Ok(()) - })?; + inodes.append(&mut non_prefetch_inodes); Ok((inodes, prefetch_entries)) } @@ -46,3 +35,28 @@ impl BlobLayout { node.overlay == Overlay::UpperAddition || node.overlay == Overlay::UpperModification } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::{core::node::NodeInfo, Tree}; + use nydus_rafs::metadata::{inode::InodeWrapper, RafsVersion}; + + #[test] + fn test_layout_blob_simple() { + let mut inode = InodeWrapper::new(RafsVersion::V6); + inode.set_mode(0o755 | libc::S_IFREG as u32); + inode.set_size(1); + let mut node1 = Node::new(inode.clone(), NodeInfo::default(), 1); + node1.overlay = Overlay::UpperAddition; + + let tree = Tree::new(node1); + + let mut prefetch = Prefetch::default(); + prefetch.insert(&tree.node, tree.node.lock().unwrap().deref()); + + let (inodes, prefetch_entries) = BlobLayout::layout_blob_simple(&prefetch).unwrap(); + assert_eq!(inodes.len(), 1); + assert_eq!(prefetch_entries, 0); + } +} diff --git a/builder/src/core/prefetch.rs b/builder/src/core/prefetch.rs index 5bc4603c9d9..b5695e05686 100644 --- a/builder/src/core/prefetch.rs +++ b/builder/src/core/prefetch.rs @@ -3,7 +3,6 @@ // // SPDX-License-Identifier: Apache-2.0 -use std::collections::BTreeMap; use std::path::PathBuf; use std::str::FromStr; @@ -73,7 +72,7 @@ fn get_patterns() -> Result>> { fn generate_patterns(input: Vec) -> Result>> { let mut patterns = IndexMap::new(); - for (idx, file) in input.iter().enumerate() { + for file in &input { let file_trimmed: PathBuf = file.trim().into(); // Sanity check for the list format. if !file_trimmed.is_absolute() { @@ -84,13 +83,21 @@ fn generate_patterns(input: Vec) -> Result>, // File list to help optimizing layout of data blobs. - // Files from this list may be put at the head of data blob for better prefetch performance. - files: BTreeMap, + // Files from this list may be put at the head of data blob for better prefetch performance, + // The index of matched prefetch pattern is stored in `usize`, + // which will help to sort the prefetch files in the final layout. + // It only stores regular files. + files_prefetch: Vec<(TreeNode, usize)>, + + // It stores all non-prefetch files that is not stored in `prefetch_files`, + // including regular files, dirs, symlinks, etc., + // with the same order of BFS traversal of file tree. + files_non_prefetch: Vec, } impl Prefetch { @@ -131,50 +146,63 @@ impl Prefetch { policy, disabled: false, patterns, - files: BTreeMap::new(), + files_prefetch: Vec::with_capacity(10000), + files_non_prefetch: Vec::with_capacity(10000), }) } - /// Insert node into the prefetch list if it matches prefetch rules. - pub fn insert_if_need(&mut self, obj: &TreeNode, node: &Node) { + /// Insert node into the prefetch Vector if it matches prefetch rules, + /// while recording the index of matched prefetch pattern, + /// or insert it into non-prefetch Vector. + pub fn insert(&mut self, obj: &TreeNode, node: &Node) { // Newly created root inode of this rafs has zero size if self.policy == PrefetchPolicy::None || self.disabled || (node.inode.is_reg() && node.inode.size() == 0) { + self.files_non_prefetch.push(obj.clone()); return; } - let path = node.target(); - for (f, v) in self.patterns.iter_mut() { - // As path is canonicalized, it should be reliable. - if path == f { - if self.policy == PrefetchPolicy::Fs { + let mut path = node.target().clone(); + let mut exact_match = true; + loop { + if let Some((idx, _, v)) = self.patterns.get_full_mut(&path) { + if exact_match { *v = Some(obj.clone()); } if node.is_reg() { - self.files.insert(path.clone(), obj.clone()); + self.files_prefetch.push((obj.clone(), idx)); + } else { + self.files_non_prefetch.push(obj.clone()); } - } else if path.starts_with(f) && node.is_reg() { - self.files.insert(path.clone(), obj.clone()); + return; } + // If no exact match, try to match parent dir until root. + if !path.pop() { + self.files_non_prefetch.push(obj.clone()); + return; + } + exact_match = false; } } - /// Check whether the node is in the prefetch list. - pub fn contains(&self, node: &Node) -> bool { - self.files.contains_key(node.target()) - } + /// Get node Vector of files in the prefetch list and non-prefetch list. + /// The order of prefetch files is the same as the order of prefetch patterns. + /// The order of non-prefetch files is the same as the order of BFS traversal of file tree. + pub fn get_file_nodes(&self) -> (Vec, Vec) { + let mut p_files = self.files_prefetch.clone(); + p_files.sort_by_key(|k| k.1); - /// Get node index array of files in the prefetch list. - pub fn get_file_nodes(&self) -> Vec { - self.files.values().cloned().collect() + let p_files = p_files.into_iter().map(|(s, _)| s).collect(); + + (p_files, self.files_non_prefetch.clone()) } - /// Get number of prefetch rules. + /// Get the number of ``valid`` prefetch rules. pub fn fs_prefetch_rule_count(&self) -> u32 { if self.policy == PrefetchPolicy::Fs { - self.patterns.values().len() as u32 + self.patterns.values().filter(|v| v.is_some()).count() as u32 } else { 0 } @@ -231,13 +259,18 @@ impl Prefetch { /// Reset to initialization state. pub fn clear(&mut self) { self.disabled = false; - self.files.clear(); + self.patterns.clear(); + self.files_prefetch.clear(); + self.files_non_prefetch.clear(); } } #[cfg(test)] mod tests { use super::*; + use crate::core::node::NodeInfo; + use nydus_rafs::metadata::{inode::InodeWrapper, RafsVersion}; + use std::sync::Mutex; #[test] fn test_generate_pattern() { @@ -273,4 +306,86 @@ mod tests { PrefetchPolicy::from_str("").unwrap_err(); PrefetchPolicy::from_str("invalid").unwrap_err(); } + + #[test] + fn test_prefetch() { + let input = vec![ + "/a/b".to_string(), + "/f".to_string(), + "/h/i".to_string(), + "/k".to_string(), + ]; + let patterns = generate_patterns(input).unwrap(); + let mut prefetch = Prefetch { + policy: PrefetchPolicy::Fs, + disabled: false, + patterns, + files_prefetch: Vec::with_capacity(10), + files_non_prefetch: Vec::with_capacity(10), + }; + let mut inode = InodeWrapper::new(RafsVersion::V6); + inode.set_mode(0o755 | libc::S_IFREG as u32); + inode.set_size(1); + + let info = NodeInfo::default(); + + let mut info1 = info.clone(); + info1.target = PathBuf::from("/f"); + let node1 = Node::new(inode.clone(), info1, 1); + let node1 = TreeNode::new(Mutex::from(node1)); + prefetch.insert(&node1, &node1.lock().unwrap()); + + let inode2 = inode.clone(); + let mut info2 = info.clone(); + info2.target = PathBuf::from("/a/b"); + let node2 = Node::new(inode2, info2, 1); + let node2 = TreeNode::new(Mutex::from(node2)); + prefetch.insert(&node2, &node2.lock().unwrap()); + + let inode3 = inode.clone(); + let mut info3 = info.clone(); + info3.target = PathBuf::from("/h/i/j"); + let node3 = Node::new(inode3, info3, 1); + let node3 = TreeNode::new(Mutex::from(node3)); + prefetch.insert(&node3, &node3.lock().unwrap()); + + let inode4 = inode.clone(); + let mut info4 = info.clone(); + info4.target = PathBuf::from("/z"); + let node4 = Node::new(inode4, info4, 1); + let node4 = TreeNode::new(Mutex::from(node4)); + prefetch.insert(&node4, &node4.lock().unwrap()); + + let inode5 = inode.clone(); + inode.set_mode(0o755 | libc::S_IFDIR as u32); + inode.set_size(0); + let mut info5 = info; + info5.target = PathBuf::from("/a/b/d"); + let node5 = Node::new(inode5, info5, 1); + let node5 = TreeNode::new(Mutex::from(node5)); + prefetch.insert(&node5, &node5.lock().unwrap()); + + // node1, node2 + assert_eq!(prefetch.fs_prefetch_rule_count(), 2); + + let (pre, non_pre) = prefetch.get_file_nodes(); + assert_eq!(pre.len(), 4); + assert_eq!(non_pre.len(), 1); + let pre_str: Vec = pre + .iter() + .map(|n| n.lock().unwrap().target().to_str().unwrap().to_owned()) + .collect(); + assert_eq!(pre_str, vec!["/a/b", "/a/b/d", "/f", "/h/i/j"]); + let non_pre_str: Vec = non_pre + .iter() + .map(|n| n.lock().unwrap().target().to_str().unwrap().to_owned()) + .collect(); + assert_eq!(non_pre_str, vec!["/z"]); + + prefetch.clear(); + assert_eq!(prefetch.fs_prefetch_rule_count(), 0); + let (pre, non_pre) = prefetch.get_file_nodes(); + assert_eq!(pre.len(), 0); + assert_eq!(non_pre.len(), 0); + } } diff --git a/builder/src/directory.rs b/builder/src/directory.rs index 7395cde9785..f934f5111ac 100644 --- a/builder/src/directory.rs +++ b/builder/src/directory.rs @@ -148,7 +148,7 @@ impl Builder for DirectoryBuilder { // Dump blob file timing_tracer!( - { Blob::dump(ctx, &bootstrap.tree, blob_mgr, blob_writer.as_mut(),) }, + { Blob::dump(ctx, blob_mgr, blob_writer.as_mut()) }, "dump_blob" )?; diff --git a/builder/src/stargz.rs b/builder/src/stargz.rs index 3681b8d105c..d84a2d214a8 100644 --- a/builder/src/stargz.rs +++ b/builder/src/stargz.rs @@ -860,7 +860,7 @@ impl Builder for StargzBuilder { // Dump blob file timing_tracer!( - { Blob::dump(ctx, &bootstrap.tree, blob_mgr, blob_writer.as_mut()) }, + { Blob::dump(ctx, blob_mgr, blob_writer.as_mut()) }, "dump_blob" )?; diff --git a/builder/src/tarball.rs b/builder/src/tarball.rs index 47d054cdc62..edc996ac553 100644 --- a/builder/src/tarball.rs +++ b/builder/src/tarball.rs @@ -615,7 +615,7 @@ impl Builder for TarballBuilder { // Dump blob file timing_tracer!( - { Blob::dump(ctx, &bootstrap.tree, blob_mgr, blob_writer.as_mut()) }, + { Blob::dump(ctx, blob_mgr, blob_writer.as_mut()) }, "dump_blob" )?;