Skip to content

Commit

Permalink
virtual_file: distinguish first and subsequent open() syscalls
Browse files Browse the repository at this point in the history
This helps with identifying thrashing.

While reading the code, I also found a case where we waste
work in a cache pressure situation: #6065

refs neondatabase/cloud#8351
  • Loading branch information
problame committed Dec 7, 2023
1 parent 6f7d248 commit db0a351
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 3 deletions.
3 changes: 2 additions & 1 deletion pageserver/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -774,6 +774,7 @@ const STORAGE_IO_TIME_BUCKETS: &[f64] = &[
)]
pub(crate) enum StorageIoOperation {
Open,
OpenAfterReplace,
Close,
CloseByReplace,
Read,
Expand All @@ -787,6 +788,7 @@ impl StorageIoOperation {
pub fn as_str(&self) -> &'static str {
match self {
StorageIoOperation::Open => "open",
StorageIoOperation::OpenAfterReplace => "open-after-replace",
StorageIoOperation::Close => "close",
StorageIoOperation::CloseByReplace => "close-by-replace",
StorageIoOperation::Read => "read",
Expand Down Expand Up @@ -840,7 +842,6 @@ pub(crate) static STORAGE_IO_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
)
.expect("failed to define a metric")
});

#[derive(Debug)]
struct GlobalAndPerTimelineHistogram {
global: Histogram,
Expand Down
16 changes: 14 additions & 2 deletions pageserver/src/virtual_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,9 @@ impl VirtualFile {
}
let (handle, mut slot_guard) = get_open_files().find_victim_slot().await;

// NB: there is also StorageIoOperation::OpenAfterReplace which is for the case
// where our caller doesn't get to use the returned VirtualFile before its
// slot gets re-used by someone else.
let file = observe_duration!(StorageIoOperation::Open, open_options.open(path))?;

// Strip all options other than read and write.
Expand All @@ -333,6 +336,9 @@ impl VirtualFile {
timeline_id,
};

// TODO: Under pressure, it's likely the slot will get re-used and
// the underlying file closed before they get around to using it.
// => https://github.com/neondatabase/neon/issues/6065
slot_guard.file.replace(file);

Ok(vfile)
Expand Down Expand Up @@ -441,8 +447,14 @@ impl VirtualFile {
// now locked in write-mode. Find a free slot to put it in.
let (handle, mut slot_guard) = open_files.find_victim_slot().await;

// Open the physical file
let file = observe_duration!(StorageIoOperation::Open, self.open_options.open(&self.path))?;
// Re-open the physical file.
// NB: we use StorageIoOperation::OpenAferReplace for this to distinguish this
// case from StorageIoOperation::Open. This helps with identifying thrashing
// of the virtual file descriptor cache.
let file = observe_duration!(
StorageIoOperation::OpenAfterReplace,
self.open_options.open(&self.path)
)?;

// Store the File in the slot and update the handle in the VirtualFile
// to point to it.
Expand Down

0 comments on commit db0a351

Please sign in to comment.