Skip to content

Commit

Permalink
virtual_file: distinguish first and subsequent open() syscalls
Browse files Browse the repository at this point in the history
This helps with identifying thrashing.

I don't love the name, but, there is already "close-by-replace".

While reading the code, I also found a case where we waste
work in a cache pressure situation: #6065

refs neondatabase/cloud#8351
  • Loading branch information
problame committed Dec 7, 2023
1 parent 880663f commit 7a7356e
Showing 2 changed files with 13 additions and 3 deletions.
3 changes: 2 additions & 1 deletion pageserver/src/metrics.rs
Original file line number Diff line number Diff line change
@@ -774,6 +774,7 @@ const STORAGE_IO_TIME_BUCKETS: &[f64] = &[
)]
pub(crate) enum StorageIoOperation {
Open,
OpenAfterReplace,
Close,
CloseByReplace,
Read,
@@ -787,6 +788,7 @@ impl StorageIoOperation {
pub fn as_str(&self) -> &'static str {
match self {
StorageIoOperation::Open => "open",
StorageIoOperation::OpenAfterReplace => "open-after-replace",
StorageIoOperation::Close => "close",
StorageIoOperation::CloseByReplace => "close-by-replace",
StorageIoOperation::Read => "read",
@@ -840,7 +842,6 @@ pub(crate) static STORAGE_IO_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
)
.expect("failed to define a metric")
});

#[derive(Debug)]
struct GlobalAndPerTimelineHistogram {
global: Histogram,
13 changes: 11 additions & 2 deletions pageserver/src/virtual_file.rs
Original file line number Diff line number Diff line change
@@ -288,6 +288,9 @@ impl VirtualFile {
}
let (handle, mut slot_guard) = get_open_files().find_victim_slot();

// NB: there is also StorageIoOperation::OpenAfterReplace which is for the case
// where our caller doesn't get to use the returned VirtualFile before its
// slot gets re-used by someone else.
let file = STORAGE_IO_TIME_METRIC
.get(StorageIoOperation::Open)
.observe_closure_duration(|| open_options.open(path))?;
@@ -311,6 +314,9 @@ impl VirtualFile {
timeline_id,
};

// TODO: Under pressure, it's likely the slot will get re-used and
// the underlying file closed before they get around to using it.
// => https://github.com/neondatabase/neon/issues/6065
slot_guard.file.replace(file);

Ok(vfile)
@@ -421,9 +427,12 @@ impl VirtualFile {
// now locked in write-mode. Find a free slot to put it in.
let (handle, mut slot_guard) = open_files.find_victim_slot();

// Open the physical file
// Re-open the physical file.
// NB: we use StorageIoOperation::OpenAferReplace for this to distinguish this
// case from StorageIoOperation::Open. This helps with identifying thrashing
// of the virtual file descriptor cache.
let file = STORAGE_IO_TIME_METRIC
.get(StorageIoOperation::Open)
.get(StorageIoOperation::OpenAfterReplace)
.observe_closure_duration(|| self.open_options.open(&self.path))?;

// Perform the requested operation on it

0 comments on commit 7a7356e

Please sign in to comment.