Skip to content

Commit

Permalink
storage: refine the way to implement BlobIoChunk
Browse files Browse the repository at this point in the history
Backport the new implementation of BlobIoChunk from master into v2.1.

Fixes: #1198

Signed-off-by: Jiang Liu <gerry@linux.alibaba.com>
  • Loading branch information
jiangliu committed Apr 26, 2023
1 parent a70fdcc commit a86915d
Show file tree
Hide file tree
Showing 13 changed files with 137 additions and 110 deletions.
7 changes: 4 additions & 3 deletions rafs/src/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -554,7 +554,7 @@ impl Rafs {
// - prefetch listed passed in by user
// - or file prefetch list in metadata
let inodes = prefetch_files.map(|files| Self::convert_file_list(&files, &sb));
let res = sb.prefetch_files(&mut reader, root_ino, inodes, &|desc| {
let res = sb.prefetch_files(&device, &mut reader, root_ino, inodes, &|desc| {
if desc.bi_size > 0 {
device.prefetch(&[desc], &[]).unwrap_or_else(|e| {
warn!("Prefetch error, {:?}", e);
Expand Down Expand Up @@ -603,7 +603,7 @@ impl Rafs {
}
} else {
let root = vec![root_ino];
let res = sb.prefetch_files(&mut reader, root_ino, Some(root), &|desc| {
let res = sb.prefetch_files(&device, &mut reader, root_ino, Some(root), &|desc| {
if desc.bi_size > 0 {
device.prefetch(&[desc], &[]).unwrap_or_else(|e| {
warn!("Prefetch error, {:?}", e);
Expand Down Expand Up @@ -761,7 +761,7 @@ impl FileSystem for Rafs {

let real_size = cmp::min(size as u64, inode_size - offset);
let mut result = 0;
let mut descs = inode.alloc_bio_vecs(offset, real_size as usize, true)?;
let mut descs = inode.alloc_bio_vecs(&self.device, offset, real_size as usize, true)?;
debug_assert!(!descs.is_empty() && !descs[0].bi_vec.is_empty());

// Try to amplify user io for Rafs v5, to improve performance.
Expand All @@ -775,6 +775,7 @@ impl FileSystem for Rafs {
if actual_size < self.amplify_io as u64 {
let window_size = self.amplify_io as u64 - actual_size;
self.sb.amplify_io(
&self.device,
self.amplify_io,
&mut descs,
&inode,
Expand Down
19 changes: 13 additions & 6 deletions rafs/src/metadata/cached_v5.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use fuse_backend_rs::api::filesystem::Entry;
use nydus_utils::digest::Algorithm;
use nydus_utils::{digest::RafsDigest, ByteSize};
use storage::device::v5::BlobV5ChunkInfo;
use storage::device::{BlobChunkFlags, BlobChunkInfo, BlobInfo};
use storage::device::{BlobChunkFlags, BlobChunkInfo, BlobDevice, BlobInfo};

use crate::metadata::layout::v5::{
rafsv5_alloc_bio_vecs, rafsv5_validate_digest, RafsV5BlobTable, RafsV5ChunkInfo, RafsV5Inode,
Expand Down Expand Up @@ -598,7 +598,13 @@ impl RafsInode for CachedInodeV5 {
Ok(0)
}

fn alloc_bio_vecs(&self, offset: u64, size: usize, user_io: bool) -> Result<Vec<BlobIoVec>> {
fn alloc_bio_vecs(
&self,
_device: &BlobDevice,
offset: u64,
size: usize,
user_io: bool,
) -> Result<Vec<BlobIoVec>> {
rafsv5_alloc_bio_vecs(self, offset, size, user_io)
}

Expand Down Expand Up @@ -774,7 +780,7 @@ mod cached_tests {
use std::sync::Arc;

use nydus_utils::ByteSize;
use storage::device::BlobFeatures;
use storage::device::{BlobDevice, BlobFeatures};

use crate::metadata::cached_v5::{CachedInodeV5, CachedSuperBlockV5};
use crate::metadata::layout::v5::{
Expand Down Expand Up @@ -952,15 +958,16 @@ mod cached_tests {
);
let mut cached_inode = CachedInodeV5::new(blob_table, meta.clone());
cached_inode.load(&meta, &mut reader).unwrap();
let descs = cached_inode.alloc_bio_vecs(0, 100, true).unwrap();
let device = BlobDevice::default();
let descs = cached_inode.alloc_bio_vecs(&device, 0, 100, true).unwrap();
let desc1 = &descs[0];
assert_eq!(desc1.bi_size, 100);
assert_eq!(desc1.bi_vec.len(), 1);
assert_eq!(desc1.bi_vec[0].offset, 0);
assert_eq!(desc1.bi_vec[0].blob.blob_id(), "123333");

let descs = cached_inode
.alloc_bio_vecs(1024 * 1024 - 100, 200, true)
.alloc_bio_vecs(&device, 1024 * 1024 - 100, 200, true)
.unwrap();
let desc2 = &descs[0];
assert_eq!(desc2.bi_size, 200);
Expand All @@ -971,7 +978,7 @@ mod cached_tests {
assert_eq!(desc2.bi_vec[1].size, 100);

let descs = cached_inode
.alloc_bio_vecs(1024 * 1024 + 8192, 1024 * 1024 * 4, true)
.alloc_bio_vecs(&device, 1024 * 1024 + 8192, 1024 * 1024 * 4, true)
.unwrap();
let desc3 = &descs[0];
assert_eq!(desc3.bi_size, 1024 * 1024 * 2);
Expand Down
10 changes: 8 additions & 2 deletions rafs/src/metadata/direct_v5.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ use std::sync::Arc;
use arc_swap::{ArcSwap, Guard};
use nydus_utils::digest::{Algorithm, RafsDigest};
use storage::device::v5::BlobV5ChunkInfo;
use storage::device::{BlobChunkFlags, BlobChunkInfo, BlobInfo, BlobIoVec};
use storage::device::{BlobChunkFlags, BlobChunkInfo, BlobDevice, BlobInfo, BlobIoVec};
use storage::utils::readahead;

use crate::metadata::layout::v5::{
Expand Down Expand Up @@ -834,7 +834,13 @@ impl RafsInode for OndiskInodeWrapper {
Ok(0)
}

fn alloc_bio_vecs(&self, offset: u64, size: usize, user_io: bool) -> Result<Vec<BlobIoVec>> {
fn alloc_bio_vecs(
&self,
_device: &BlobDevice,
offset: u64,
size: usize,
user_io: bool,
) -> Result<Vec<BlobIoVec>> {
rafsv5_alloc_bio_vecs(self, offset, size, user_io)
}

Expand Down
59 changes: 43 additions & 16 deletions rafs/src/metadata/direct_v6.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,7 @@ use nydus_utils::{
div_round_up, round_up,
};
use storage::device::{
v5::BlobV5ChunkInfo, BlobChunkFlags, BlobChunkInfo, BlobInfo, BlobIoChunk, BlobIoDesc,
BlobIoVec,
v5::BlobV5ChunkInfo, BlobChunkFlags, BlobChunkInfo, BlobDevice, BlobInfo, BlobIoDesc, BlobIoVec,
};
use storage::utils::readahead;

Expand Down Expand Up @@ -570,23 +569,33 @@ impl OndiskInodeWrapper {

fn make_chunk_io(
&self,
state: &Guard<Arc<DirectMappingState>>,
device: &BlobDevice,
chunk_addr: &RafsV6InodeChunkAddr,
content_offset: u32,
content_len: u32,
user_io: bool,
) -> BlobIoDesc {
let state = self.mapping.state.load();
let blob_table = &state.blob_table.entries;

// As ondisk blobs table contains bootstrap as the first blob device
// while `blob_table` doesn't, it is subtracted 1.
let blob_index = chunk_addr.blob_index() - 1;
) -> Option<BlobIoDesc> {
let blob_index = chunk_addr.blob_index();
let blob_index = if blob_index == 0 {
u32::MAX
} else {
blob_index as u32 - 1
};
let chunk_index = chunk_addr.blob_comp_index();
let io_chunk = BlobIoChunk::Address(blob_index as u32, chunk_index);

let blob = blob_table[blob_index as usize].clone();

BlobIoDesc::new(blob, io_chunk, content_offset, content_len, user_io)
match state.blob_table.get(blob_index) {
Err(e) => {
warn!(
"failed to get blob with index {} for chunk address {:?}, {}",
blob_index, chunk_addr, e
);
None
}
Ok(blob) => device
.create_io_chunk(blob.blob_index(), chunk_index)
.map(|v| BlobIoDesc::new(blob, v, content_offset, content_len, user_io)),
}
}

fn chunk_size(&self) -> u32 {
Expand Down Expand Up @@ -1238,7 +1247,14 @@ impl RafsInode for OndiskInodeWrapper {
Ok(0)
}

fn alloc_bio_vecs(&self, offset: u64, size: usize, user_io: bool) -> Result<Vec<BlobIoVec>> {
fn alloc_bio_vecs(
&self,
device: &BlobDevice,
offset: u64,
size: usize,
user_io: bool,
) -> Result<Vec<BlobIoVec>> {
let state = self.mapping.state.load();
let chunk_size = self.chunk_size();
let head_chunk_index = offset / chunk_size as u64;

Expand All @@ -1258,7 +1274,16 @@ impl RafsInode for OndiskInodeWrapper {

// Safe to unwrap because chunks is not empty to reach here.
let first_chunk_addr = chunks.first().unwrap();
let desc = self.make_chunk_io(first_chunk_addr, content_offset, content_len, user_io);
let desc = self
.make_chunk_io(
&state,
device,
first_chunk_addr,
content_offset,
content_len,
user_io,
)
.ok_or_else(|| einval!("failed to get chunk information"))?;

let mut descs = BlobIoVec::new();
descs.bi_vec.push(desc);
Expand All @@ -1269,7 +1294,9 @@ impl RafsInode for OndiskInodeWrapper {
// Handle the rest of chunks since they shares the same content length = 0.
for c in chunks.iter().skip(1) {
content_len = std::cmp::min(chunk_size, left);
let desc = self.make_chunk_io(c, 0, content_len, user_io);
let desc = self
.make_chunk_io(&state, device, c, 0, content_len, user_io)
.ok_or_else(|| einval!("failed to get chunk information"))?;

if desc.blob.blob_index() != descs.bi_vec[0].blob.blob_index() {
trace!(
Expand Down
2 changes: 1 addition & 1 deletion rafs/src/metadata/layout/v5.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1319,7 +1319,7 @@ fn add_chunk_to_bio_desc(

let bio = BlobIoDesc::new(
blob,
BlobIoChunk::Base(io_chunk),
BlobIoChunk(io_chunk),
chunk_start as u32,
(chunk_end - chunk_start) as u32,
user_io,
Expand Down
8 changes: 5 additions & 3 deletions rafs/src/metadata/md_v5.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ impl RafsSuper {

pub(crate) fn prefetch_data_v5<F>(
&self,
device: &BlobDevice,
r: &mut RafsIoReader,
root_ino: Inode,
fetcher: F,
Expand Down Expand Up @@ -123,7 +124,7 @@ impl RafsSuper {
found_root_inode = true;
}
debug!("hint prefetch inode {}", ino);
self.prefetch_data(ino as u64, &mut state, &mut hardlinks, &fetcher)
self.prefetch_data(ino as u64, device, &mut state, &mut hardlinks, &fetcher)
.map_err(|e| RafsError::Prefetch(e.to_string()))?;
}
for (_id, mut desc) in state.drain() {
Expand Down Expand Up @@ -172,6 +173,7 @@ impl RafsSuper {
// expect that those chunks are likely to be continuous with user IO's chunks.
pub(crate) fn amplify_io(
&self,
device: &BlobDevice,
max_size: u32,
descs: &mut [BlobIoVec],
inode: &Arc<dyn RafsInode>,
Expand All @@ -190,7 +192,7 @@ impl RafsSuper {
if window_base < inode_size {
let size = inode_size - window_base;
let sz = std::cmp::min(size, window_size);
let amplified_io_vec = inode.alloc_bio_vecs(window_base, sz as usize, false)?;
let amplified_io_vec = inode.alloc_bio_vecs(device, window_base, sz as usize, false)?;
debug_assert!(!amplified_io_vec.is_empty() && !amplified_io_vec[0].bi_vec.is_empty());
// caller should ensure that `window_base` won't overlap last chunk of user IO.
Self::merge_chunks_io(last_desc, &amplified_io_vec);
Expand All @@ -216,7 +218,7 @@ impl RafsSuper {
}

let sz = std::cmp::min(window_size, next_size);
let amplified_io_vec = ni.alloc_bio_vecs(0, sz as usize, false)?;
let amplified_io_vec = ni.alloc_bio_vecs(device, 0, sz as usize, false)?;
debug_assert!(
!amplified_io_vec.is_empty() && !amplified_io_vec[0].bi_vec.is_empty()
);
Expand Down
3 changes: 2 additions & 1 deletion rafs/src/metadata/md_v6.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ impl RafsSuper {

pub(crate) fn prefetch_data_v6<F>(
&self,
device: &BlobDevice,
r: &mut RafsIoReader,
root_ino: Inode,
fetcher: F,
Expand Down Expand Up @@ -139,7 +140,7 @@ impl RafsSuper {
found_root_inode = true;
}
debug!("hint prefetch inode {}", ino);
self.prefetch_data(ino as u64, &mut state, &mut hardlinks, &fetcher)
self.prefetch_data(ino as u64, device, &mut state, &mut hardlinks, &fetcher)
.map_err(|e| RafsError::Prefetch(e.to_string()))?;
}
// The left chunks whose size is smaller than 4MB will be fetched here.
Expand Down
25 changes: 17 additions & 8 deletions rafs/src/metadata/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use fuse_backend_rs::api::filesystem::Entry;
use nydus_utils::compress;
use nydus_utils::digest::{self, RafsDigest};
use serde::Serialize;
use storage::device::{BlobChunkInfo, BlobInfo, BlobIoMerge, BlobIoVec};
use storage::device::{BlobChunkInfo, BlobDevice, BlobInfo, BlobIoMerge, BlobIoVec};

use self::layout::v5::RafsV5PrefetchTable;
use self::layout::v6::RafsV6PrefetchTable;
Expand Down Expand Up @@ -211,7 +211,13 @@ pub trait RafsInode: Any {
) -> Result<usize>;

/// Allocate blob io vectors to read file data in range [offset, offset + size).
fn alloc_bio_vecs(&self, offset: u64, size: usize, user_io: bool) -> Result<Vec<BlobIoVec>>;
fn alloc_bio_vecs(
&self,
device: &BlobDevice,
offset: u64,
size: usize,
user_io: bool,
) -> Result<Vec<BlobIoVec>>;

fn as_any(&self) -> &dyn Any;

Expand Down Expand Up @@ -651,6 +657,7 @@ impl RafsSuper {
/// Return Ok(true) means root inode is found during performing prefetching and all files should be prefetched.
pub fn prefetch_files(
&self,
device: &BlobDevice,
r: &mut RafsIoReader,
root_ino: Inode,
files: Option<Vec<Inode>>,
Expand All @@ -662,7 +669,7 @@ impl RafsSuper {
let mut hardlinks: HashSet<u64> = HashSet::new();
let mut state = BlobIoMerge::default();
for f_ino in files {
self.prefetch_data(f_ino, &mut state, &mut hardlinks, fetcher)
self.prefetch_data(f_ino, device, &mut state, &mut hardlinks, fetcher)
.map_err(|e| RafsError::Prefetch(e.to_string()))?;
}
for (_id, mut desc) in state.drain() {
Expand All @@ -671,9 +678,9 @@ impl RafsSuper {
// Flush the pending prefetch requests.
Ok(false)
} else if self.meta.is_v5() {
self.prefetch_data_v5(r, root_ino, fetcher)
self.prefetch_data_v5(device, r, root_ino, fetcher)
} else if self.meta.is_v6() {
self.prefetch_data_v6(r, root_ino, fetcher)
self.prefetch_data_v6(device, r, root_ino, fetcher)
} else {
Err(RafsError::Prefetch(
"Unknown filesystem version, prefetch disabled".to_string(),
Expand All @@ -684,6 +691,7 @@ impl RafsSuper {
#[inline]
fn prefetch_inode<F>(
inode: &Arc<dyn RafsInode>,
device: &BlobDevice,
state: &mut BlobIoMerge,
hardlinks: &mut HashSet<u64>,
prefetcher: F,
Expand All @@ -700,7 +708,7 @@ impl RafsSuper {
}
}

let descs = inode.alloc_bio_vecs(0, inode.size() as usize, false)?;
let descs = inode.alloc_bio_vecs(device, 0, inode.size() as usize, false)?;
for desc in descs {
state.append(desc);
prefetcher(state);
Expand All @@ -712,6 +720,7 @@ impl RafsSuper {
fn prefetch_data<F>(
&self,
ino: u64,
device: &BlobDevice,
state: &mut BlobIoMerge,
hardlinks: &mut HashSet<u64>,
fetcher: F,
Expand Down Expand Up @@ -741,15 +750,15 @@ impl RafsSuper {
let mut descendants = Vec::new();
let _ = inode.collect_descendants_inodes(&mut descendants)?;
for i in descendants.iter() {
Self::prefetch_inode(i, state, hardlinks, try_prefetch)?;
Self::prefetch_inode(i, device, state, hardlinks, try_prefetch)?;
}
} else if !inode.is_empty_size() && inode.is_reg() {
// An empty regular file will also be packed into nydus image,
// then it has a size of zero.
// Moreover, for rafs v5, symlink has size of zero but non-zero size
// for symlink size. For rafs v6, symlink size is also represented by i_size.
// So we have to restrain the condition here.
Self::prefetch_inode(&inode, state, hardlinks, try_prefetch)?;
Self::prefetch_inode(&inode, device, state, hardlinks, try_prefetch)?;
}

Ok(())
Expand Down
Loading

0 comments on commit a86915d

Please sign in to comment.