From 9836104819ed0b81c70b7dfa38888b53f659b36b Mon Sep 17 00:00:00 2001
From: James Bornholt <bornholt@cs.utexas.edu>
Date: Mon, 26 Feb 2024 17:47:07 -0600
Subject: [PATCH] Revert "Fix issue preventing reads after flush on a file
 handle (#751)"

This reverts commit dd901f33cdd8483f1988bd692f99cb66799c39ac.
---
 mountpoint-s3/CHANGELOG.md                   | 135 --------------
 mountpoint-s3/src/fs.rs                      | 157 +++++++++++++---
 mountpoint-s3/tests/fs.rs                    |  48 ++++-
 mountpoint-s3/tests/fuse_tests/read_test.rs  |  50 +-----
 mountpoint-s3/tests/fuse_tests/write_test.rs | 178 ++++++++++---------
 mountpoint-s3/tests/reftests/harness.rs      |  28 ++-
 6 files changed, 284 insertions(+), 312 deletions(-)
 delete mode 100644 mountpoint-s3/CHANGELOG.md

diff --git a/mountpoint-s3/CHANGELOG.md b/mountpoint-s3/CHANGELOG.md
deleted file mode 100644
index 707abe35b..000000000
--- a/mountpoint-s3/CHANGELOG.md
+++ /dev/null
@@ -1,135 +0,0 @@
-## Unreleased
-
-## v1.4.1 (February 16, 2024)
-
-### Other changes
-* Fix an issue where read file handles could be closed too early, leading to bad file descriptor errors on subsequent reads. As a consequence of this fix, opening an existing file to overwrite it **immediately** after closing a read file handle may occasionally fail with an "Operation not permitted" error. In such cases, Mountpoint logs will also report that the file is "not writable while being read". ([#751](https://github.com/awslabs/mountpoint-s3/pull/751))
-* File handles are no longer initialized lazily. Lazy initialization was introduced in version 1.4.0 but is reverted in this change. If upgrading from 1.4.0, you may see errors that were previously deferred until read/write now raised at open time. ([#751](https://github.com/awslabs/mountpoint-s3/pull/751))
-
-## v1.4.0 (January 26, 2024)
-
-### New features
-* Allow file overwrites when mounting with `--allow-overwrite` option. The upload will start as soon as Mountpoint receives `write` request and cannot be aborted. Once it is started the file is guaranteed to be overwritten. ([#487](https://github.com/awslabs/mountpoint-s3/pull/487))
-
-### Breaking changes
-* No breaking changes.
-
-### Other changes
-* Update default network throughput values for newer EC2 instance types. ([#702](https://github.com/awslabs/mountpoint-s3/pull/702))
-* Improve error logging for various unsupported operations. ([#699](https://github.com/awslabs/mountpoint-s3/pull/699))
-* Fix a race condition where calling `mknod` and `forget` under the same directory could cause Mountpoint to hang indefinitely. ([#711](https://github.com/awslabs/mountpoint-s3/pull/711))
-
-## v1.3.2 (January 11, 2024)
-
-### Breaking changes
-* No breaking changes.
-
-### Other changes
-* Log messages now include file names and S3 keys more consistently.
-  ([#665](https://github.com/awslabs/mountpoint-s3/pull/665))
-* Successful mount message is now output to stdout for both foreground and background mode.
-  ([#668](https://github.com/awslabs/mountpoint-s3/pull/668))
-* Added new metric tracking contiguous reads.
-  This new metric may be used to help understand how much data is being read successfully using prefetching
-  before needing to discard prefetched progress when seeking around the file.
-  ([#629](https://github.com/awslabs/mountpoint-s3/pull/629))
-* Fix a race condition where FUSE `read` operations may have completed and subsequently sent back to the Kernel
-  while locks were still being held on a file handle.
-  If a FUSE `release` operation was executed while the file handle was still held by `read`,
-  this would result in the file handle never being deallocated.
-  ([#691](https://github.com/awslabs/mountpoint-s3/pull/691))
-
-## v1.3.1 (November 30, 2023)
-
-### Breaking changes
-* No breaking changes.
-
-### Other changes
-* Fix an issue where Mountpoint could crash on launch when overriding the default part size with values that are not multiples of 1024. ([#649](https://github.com/awslabs/mountpoint-s3/pull/649))
-
-## v1.3.0 (November 28, 2023)
-
-### New features
-* Mountpoint now supports resolving S3 Express One Zone endpoints and the new SigV4-Express signing algorithm will be used for S3 Express One Zone buckets. Note that `readdir` results on these buckets will not be ordered because ListObjectsV2 is unordered on S3 Express. ([#642](https://github.com/awslabs/mountpoint-s3/pull/642))
-
-### Breaking changes
-* No breaking changes.
-
-### Other changes
-* New Mountpoint cache directories will be created with owner access only permission. Additionally, the cache directory will be removed entirely at mount time rather than just the contents. ([#637](https://github.com/awslabs/mountpoint-s3/pull/637))
-
-## v1.2.0 (November 22, 2023)
-
-### New features
-* Introduced optional caching of object metadata and content, in order to allow reduced cost and improved performance for repeated reads to the same files. To get started, see the [caching section of the configuration documentation](https://github.com/awslabs/mountpoint-s3/blob/main/doc/CONFIGURATION.md#caching-configuration). ([#622](https://github.com/awslabs/mountpoint-s3/pull/622))
-
-### Breaking changes
-* No breaking changes.
-
-## v1.1.1 (November 14, 2023)
-
-### Breaking changes
-* No breaking changes.
-
-### Other changes
-* Some applications that read directory entries out of order (for example, [PHP](https://github.com/awslabs/mountpoint-s3/issues/477)) will now work correctly. ([#581](https://github.com/awslabs/mountpoint-s3/pull/581))
-* Fixed a bug that caused file creation to fail if a file with the same name had previously been created with Mountpoint and then deleted remotely. ([#584](https://github.com/awslabs/mountpoint-s3/pull/584))
-* Fixed an issue where Mountpoint could time out or hang on launch if IMDS was not available. ([#601](https://github.com/awslabs/mountpoint-s3/pull/601))
-
-## v1.1.0 (October 23, 2023)
-
-### Breaking changes
-* Mountpoint will now complete file uploads at `close` time, and `close` will return an error if the upload was not successful. Before this change, `close` did not wait for the upload to complete, which could cause confusing results for applications that try to read a file they just wrote. ([#526](https://github.com/awslabs/mountpoint-s3/pull/526))
-
-### Other changes
-* Fixed a bug that caused poor performance for sequential reads in some cases ([#488](https://github.com/awslabs/mountpoint-s3/pull/488)). A workaround we have previously shared for this issue (setting the `--max-threads` argument to `1`) is no longer necessary with this fix. ([#556](https://github.com/awslabs/mountpoint-s3/pull/556))
-* Introduced the `--user-agent-prefix <PREFIX>` CLI argument to optionally allow specifying an additional prefix for the HTTP User-Agent header sent with all S3 requests. ([#548](https://github.com/awslabs/mountpoint-s3/pull/548))
-
-## v1.0.2 (September 22, 2023)
-
-### Breaking changes
-* No breaking changes.
-
-### Other changes
-* New Mountpoint releases are built on CentOS 7 instead of Amazon Linux 2. This lowers the minimum requirement to run Mountpoint to glibc 2.17 or newer. ([#517](https://github.com/awslabs/mountpoint-s3/pull/517))
-* Fixed a bug where writing to a file for longer than five minutes will result in a panic. ([#513](https://github.com/awslabs/mountpoint-s3/pull/513))
-* Updated the prefetcher to cancel discarded tasks and free up some unused resources on random read workloads. ([#505](https://github.com/awslabs/mountpoint-s3/pull/505))
-* Fixed an issue with internal resource cleanup which could lead to Mountpoint hanging after a high number of file uploads. ([#529](https://github.com/awslabs/mountpoint-s3/pull/529))
-
-## v1.0.1 (August 31, 2023)
-
-### Breaking changes
-* The permissions CLI flags `--allow-other` and `--allow-root` are now mutually exclusive. `--allow-other` implies `--allow-root`, and so should be used if you want the effect of both flags. ([#475](https://github.com/awslabs/mountpoint-s3/pull/475))
-
-### Other changes
-* Added new metrics for object writes, IO sizes, file handles, and directory operations. The existing `fuse.bytes_read` metric has been renamed to `fuse.total_bytes` and is now keyed by operation (`read`/`write`). ([#461](https://github.com/awslabs/mountpoint-s3/pull/461))
-* When running in background mode (the default), Mountpoint now correctly closes standard input and output once mounting succeeds. This should fix issues with scripts that try to fork Mountpoint as a background process, which may previously have hung. ([#489](https://github.com/awslabs/mountpoint-s3/pull/489))
-* Mountpoint can now read objects in the S3 Glacier Flexible Retrieval and S3 Glacier Deep Archive storage classes if they have been restored. Mountpoint cannot issue restore requests, but if you issue a restore request separately, the restored objects will be readable. ([#467](https://github.com/awslabs/mountpoint-s3/pull/467))
-
-## v1.0.0 (August 8, 2023)
-
-### Breaking changes
-
-* Logging to disk is now disabled by default.
-  Logs will no longer be written to `$HOME/.mountpoint-s3/` and should be configured using `--log-directory <DIRECTORY>`.
-* Bucket options of `--virtual-addressing` is now removed and `--path-addressing` is changed to `--force-path-style`.
-  If `--force-path-style` is not provided, addressing style of endpoint will be resolved automatically.
-* The `--thread-count` option has been removed and replaced with `--max-threads` which sets the maximum
-  number of threads the FUSE daemon will dynamically spawn to handle requests.
-
-### Other changes
-
-* New bucket options of `--transfer-acceleration` and `--dual-stack` have been added.
-* ARN is now also supported as <BUCKET_NAME> to mount the corresponding resource using Mountpoint.
-
-## v0.4.1 (August 4, 2023)
-
-An interim release of the Mountpoint alpha.
-
-## v0.4.0 (August 2, 2023)
-
-An interim release of the Mountpoint alpha.
-
-## v0.3.0 (June 30, 2023)
-
-Initial change log entry.
diff --git a/mountpoint-s3/src/fs.rs b/mountpoint-s3/src/fs.rs
index abee43eb7..f357e45ec 100644
--- a/mountpoint-s3/src/fs.rs
+++ b/mountpoint-s3/src/fs.rs
@@ -66,10 +66,17 @@ where
     Client: ObjectClient + Send + Sync + 'static,
     Prefetcher: Prefetch,
 {
+    /// A state where the file handle is created but the type is not yet determined
+    Created { lookup: LookedUp, flags: i32, pid: u32 },
     /// The file handle has been assigned as a read handle
-    Read(Prefetcher::PrefetchResult<Client>),
+    Read {
+        request: Prefetcher::PrefetchResult<Client>,
+        pid: u32,
+    },
     /// The file handle has been assigned as a write handle
     Write(UploadState<Client>),
+    /// The file handle is already closed, currently only used to tell that the read is finished
+    Closed,
 }
 
 impl<Client, Prefetcher> std::fmt::Debug for FileHandleState<Client, Prefetcher>
@@ -79,8 +86,15 @@ where
 {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
-            FileHandleState::Read(_) => f.debug_struct("Read").finish(),
+            FileHandleState::Created { lookup, flags, pid } => f
+                .debug_struct("Created")
+                .field("lookup", lookup)
+                .field("flags", flags)
+                .field("pid", pid)
+                .finish(),
+            FileHandleState::Read { request: _, pid } => f.debug_struct("Read").field("pid", pid).finish(),
             FileHandleState::Write(arg0) => f.debug_tuple("Write").field(arg0).finish(),
+            FileHandleState::Closed => f.debug_struct("Closed").finish(),
         }
     }
 }
@@ -90,6 +104,11 @@ where
     Client: ObjectClient + Send + Sync,
     Prefetcher: Prefetch,
 {
+    async fn new(lookup: LookedUp, flags: i32, pid: u32) -> Self {
+        metrics::increment_gauge!("fs.current_handles", 1.0, "type" => "unassigned");
+        FileHandleState::Created { lookup, flags, pid }
+    }
+
     async fn new_write_handle(
         lookup: &LookedUp,
         ino: InodeNo,
@@ -97,6 +116,10 @@ where
         pid: u32,
         fs: &S3Filesystem<Client, Prefetcher>,
     ) -> Result<FileHandleState<Client, Prefetcher>, Error> {
+        if flags & libc::O_ACCMODE == libc::O_RDONLY {
+            return Err(err!(libc::EBADF, "file handle is not open for writes"));
+        }
+
         let is_truncate = flags & libc::O_TRUNC != 0;
         let handle = fs
             .superblock
@@ -123,8 +146,13 @@ where
 
     async fn new_read_handle(
         lookup: &LookedUp,
+        flags: i32,
+        pid: u32,
         fs: &S3Filesystem<Client, Prefetcher>,
     ) -> Result<FileHandleState<Client, Prefetcher>, Error> {
+        if flags & libc::O_WRONLY != 0 {
+            return Err(err!(libc::EBADF, "file handle is not open for reads",));
+        }
         if !lookup.stat.is_readable {
             return Err(err!(
                 libc::EACCES,
@@ -141,7 +169,7 @@ where
         let request = fs
             .prefetcher
             .prefetch(fs.client.clone(), &fs.bucket, &full_key, object_size, etag.clone());
-        let handle = FileHandleState::Read(request);
+        let handle = FileHandleState::Read { request, pid };
         metrics::increment_gauge!("fs.current_handles", 1.0, "type" => "read");
         Ok(handle)
     }
@@ -698,29 +726,10 @@ where
             return Err(err!(libc::EINVAL, "O_SYNC and O_DSYNC are not supported"));
         }
 
-        let state = if flags & libc::O_RDWR != 0 {
-            let is_truncate = flags & libc::O_TRUNC != 0;
-            if !remote_file || (self.config.allow_overwrite && is_truncate) {
-                // If the file is new or opened in truncate mode, we know it must be a write handle.
-                debug!("fs:open choosing write handle for O_RDWR");
-                FileHandleState::new_write_handle(&lookup, lookup.inode.ino(), flags, pid, self).await?
-            } else {
-                // Otherwise, it must be a read handle.
-                debug!("fs:open choosing read handle for O_RDWR");
-                FileHandleState::new_read_handle(&lookup, self).await?
-            }
-        } else if flags & libc::O_WRONLY != 0 {
-            FileHandleState::new_write_handle(&lookup, lookup.inode.ino(), flags, pid, self).await?
-        } else {
-            FileHandleState::new_read_handle(&lookup, self).await?
-        };
-
+        // All file handles will be lazy initialized on first read/write.
+        let state = FileHandleState::new(lookup, flags, pid).await.into();
         let fh = self.next_handle();
-        let handle = FileHandle {
-            inode,
-            full_key,
-            state: AsyncMutex::new(state),
-        };
+        let handle = FileHandle { inode, full_key, state };
         debug!(fh, ino, "new file handle created");
         self.file_handles.write().await.insert(fh, Arc::new(handle));
 
@@ -757,8 +766,19 @@ where
         logging::record_name(handle.inode.name());
         let mut state = handle.state.lock().await;
         let request = match &mut *state {
-            FileHandleState::Read(request) => request,
+            FileHandleState::Created { lookup, flags, pid, .. } => {
+                metrics::decrement_gauge!("fs.current_handles", 1.0, "type" => "unassigned");
+
+                *state = FileHandleState::new_read_handle(lookup, *flags, *pid, self).await?;
+                if let FileHandleState::Read { request, .. } = &mut *state {
+                    request
+                } else {
+                    unreachable!("handle type always be assigned above");
+                }
+            }
+            FileHandleState::Read { request, .. } => request,
             FileHandleState::Write(_) => return Err(err!(libc::EBADF, "file handle is not open for reads")),
+            FileHandleState::Closed => return Err(err!(libc::EBADF, "file handle is already closed")),
         };
 
         match request.read(offset as u64, size as usize).await {
@@ -850,8 +870,18 @@ where
         let len = {
             let mut state = handle.state.lock().await;
             let request = match &mut *state {
+                FileHandleState::Created { lookup, flags, pid } => {
+                    *state = FileHandleState::new_write_handle(lookup, ino, *flags, *pid, self).await?;
+                    metrics::decrement_gauge!("fs.current_handles", 1.0, "type" => "unassigned");
+                    if let FileHandleState::Write(request) = &mut *state {
+                        request
+                    } else {
+                        unreachable!("handle type always be assigned above");
+                    }
+                }
                 FileHandleState::Read { .. } => return Err(err!(libc::EBADF, "file handle is not open for writes")),
                 FileHandleState::Write(request) => request,
+                FileHandleState::Closed => return Err(err!(libc::EBADF, "file handle is already closed")),
             };
 
             request.write(offset, data, &handle.full_key).await?
@@ -1067,8 +1097,32 @@ where
         logging::record_name(file_handle.inode.name());
         let mut state = file_handle.state.lock().await;
         let request = match &mut *state {
+            FileHandleState::Created { lookup, flags, pid } => {
+                // This happens when users call fsync without any read() or write() requests,
+                // since we don't know what type of handle it would be we need to consider what
+                // to do next for both cases.
+                // * if the file is new or opened in truncate mode, we know it must be a write
+                //   handle so we can start an upload and complete it immediately, result in an
+                //   empty file.
+                // * if the file already exists and it is not opened in truncate mode, we still
+                //   can't be sure of its type so we will do nothing and just return ok.
+                let is_new_file = !lookup.inode.is_remote()?;
+                let is_truncate = *flags & libc::O_TRUNC != 0;
+                if is_new_file || is_truncate {
+                    *state = FileHandleState::new_write_handle(lookup, lookup.inode.ino(), *flags, *pid, self).await?;
+                    metrics::decrement_gauge!("fs.current_handles", 1.0, "type" => "unassigned");
+                    if let FileHandleState::Write(request) = &mut *state {
+                        request
+                    } else {
+                        unreachable!("handle type always be assigned above");
+                    }
+                } else {
+                    return Ok(());
+                }
+            }
             FileHandleState::Read { .. } => return Ok(()),
             FileHandleState::Write(request) => request,
+            FileHandleState::Closed => return Ok(()),
         };
         self.complete_upload(request, &file_handle.full_key, false, None).await
     }
@@ -1087,6 +1141,10 @@ where
         //   process. In many cases, the child will then immediately close (flush) the duplicated
         //   file descriptors. We will not complete the upload if we can detect that the process
         //   invoking flush is different from the one that originally opened the file.
+        //
+        // The same for read path. We want to stop the prefetcher and decrease the reader count
+        // as soon as users close a file descriptor so that we don't block users from doing other
+        // operation like overwrite the file.
         let file_handle = {
             let file_handles = self.file_handles.read().await;
             match file_handles.get(&fh) {
@@ -1097,11 +1155,30 @@ where
         logging::record_name(file_handle.inode.name());
         let mut state = file_handle.state.lock().await;
         match &mut *state {
-            FileHandleState::Read { .. } => Ok(()),
+            FileHandleState::Created { .. } => Ok(()),
+            FileHandleState::Read { pid: open_pid, .. } => {
+                if !are_from_same_process(*open_pid, pid) {
+                    trace!(
+                        file_handle.full_key,
+                        pid,
+                        open_pid,
+                        "not stopping prefetch because current pid differs from pid at open"
+                    );
+                    return Ok(());
+                }
+                // TODO make sure we cancel the inflight PrefetchingGetRequest. is just dropping enough?
+                file_handle.inode.finish_reading()?;
+
+                // Mark the file handle state as closed so we only update the reader count once
+                *state = FileHandleState::Closed;
+                metrics::decrement_gauge!("fs.current_handles", 1.0, "type" => "read");
+                Ok(())
+            }
             FileHandleState::Write(request) => {
                 self.complete_upload(request, &file_handle.full_key, true, Some(pid))
                     .await
             }
+            FileHandleState::Closed => Ok(()),
         }
     }
 
@@ -1133,7 +1210,30 @@ where
             }
         };
 
-        let request = match file_handle.state.into_inner() {
+        let mut state = file_handle.state.into_inner();
+        let request = match state {
+            FileHandleState::Created { lookup, flags, pid } => {
+                metrics::decrement_gauge!("fs.current_handles", 1.0, "type" => "unassigned");
+                // This happens when release is called before any read() or write(),
+                // since we don't know what type of handle it would be we need to consider
+                // what to do next for both cases.
+                // * if the file is new or opened in truncate mode, we know it must be a write
+                //   handle so we can start an upload from here.
+                // * if the file already exists and it is not opened in truncate mode, we still
+                //   can't be sure of its type so we will just drop it.
+                let is_new_file = !lookup.inode.is_remote()?;
+                let is_truncate = flags & libc::O_TRUNC != 0;
+                if is_new_file || is_truncate {
+                    state = FileHandleState::new_write_handle(&lookup, lookup.inode.ino(), flags, pid, self).await?;
+                    if let FileHandleState::Write(request) = state {
+                        request
+                    } else {
+                        unreachable!("handle type always be assigned above");
+                    }
+                } else {
+                    return Ok(());
+                }
+            }
             FileHandleState::Read { .. } => {
                 // TODO make sure we cancel the inflight PrefetchingGetRequest. is just dropping enough?
                 metrics::decrement_gauge!("fs.current_handles", 1.0, "type" => "read");
@@ -1141,6 +1241,7 @@ where
                 return Ok(());
             }
             FileHandleState::Write(request) => request,
+            FileHandleState::Closed => return Ok(()),
         };
 
         let result = request.complete_if_in_progress(&file_handle.full_key).await;
diff --git a/mountpoint-s3/tests/fs.rs b/mountpoint-s3/tests/fs.rs
index 5551cfda4..f63ca294d 100644
--- a/mountpoint-s3/tests/fs.rs
+++ b/mountpoint-s3/tests/fs.rs
@@ -611,9 +611,14 @@ async fn test_sequential_write(write_size: usize) {
     let file_ino = dentry.attr.ino;
 
     // First let's check that we can't write it again
-    let result = fs
+    let fh = fs
         .open(file_ino, libc::S_IFREG as i32 | libc::O_WRONLY, 0)
         .await
+        .unwrap()
+        .fh;
+    let result = fs
+        .write(file_ino, fh, offset, &[0xaa; 27], 0, 0, None)
+        .await
         .expect_err("file should not be overwritable")
         .to_errno();
     assert_eq!(result, libc::EPERM);
@@ -695,14 +700,23 @@ async fn test_duplicate_write_fails() {
     assert_eq!(dentry.attr.size, 0);
     let file_ino = dentry.attr.ino;
 
-    let _opened = fs
+    let opened = fs
         .open(file_ino, libc::S_IFREG as i32 | libc::O_WRONLY, 0)
         .await
         .unwrap();
+    _ = fs
+        .write(file_ino, opened.fh, 0, &[0xaa; 27], 0, 0, None)
+        .await
+        .expect("first write should succeed");
 
-    let err = fs
+    let opened = fs
         .open(file_ino, libc::S_IFREG as i32 | libc::O_WRONLY, 0)
         .await
+        .expect("open should succeed");
+    // Should not be allowed to write the file a second time
+    let err = fs
+        .write(file_ino, opened.fh, 0, &[0xaa; 27], 0, 0, None)
+        .await
         .expect_err("should not be able to write twice")
         .to_errno();
     assert_eq!(err, libc::EPERM);
@@ -1234,12 +1248,20 @@ async fn test_flexible_retrieval_objects() {
         let getattr = fs.getattr(entry.ino).await.unwrap();
         assert_eq!(flexible_retrieval, getattr.attr.perm == 0);
 
-        let open = fs.open(entry.ino, libc::O_RDONLY, 0).await;
+        let open = fs
+            .open(entry.ino, libc::O_RDONLY, 0)
+            .await
+            .expect("open should succeed");
+        let read_result = fs.read(entry.ino, open.fh, 0, 4096, 0, None).await;
         if flexible_retrieval {
-            let err = open.expect_err("can't open flexible retrieval objects");
+            let err = read_result.expect_err("can't read flexible retrieval objects");
             assert_eq!(err.to_errno(), libc::EACCES);
         } else {
-            let open = open.expect("instant retrieval files are readable");
+            assert_eq!(
+                &read_result.unwrap()[..],
+                b"hello world",
+                "instant retrieval files are readable"
+            );
             fs.release(entry.ino, open.fh, 0, None, true).await.unwrap();
         }
     }
@@ -1265,12 +1287,20 @@ async fn test_flexible_retrieval_objects() {
         let getattr = fs.getattr(lookup.attr.ino).await.unwrap();
         assert_eq!(flexible_retrieval, getattr.attr.perm == 0);
 
-        let open = fs.open(lookup.attr.ino, libc::O_RDONLY, 0).await;
+        let open = fs
+            .open(lookup.attr.ino, libc::O_RDONLY, 0)
+            .await
+            .expect("open should succeed");
+        let read_result = fs.read(lookup.attr.ino, open.fh, 0, 4096, 0, None).await;
         if flexible_retrieval {
-            let err = open.expect_err("can't open flexible retrieval objects");
+            let err = read_result.expect_err("can't read flexible retrieval objects");
             assert_eq!(err.to_errno(), libc::EACCES);
         } else {
-            let open = open.expect("instant retrieval files are readable");
+            assert_eq!(
+                &read_result.unwrap()[..],
+                b"hello world",
+                "instant retrieval files are readable"
+            );
             fs.release(lookup.attr.ino, open.fh, 0, None, true).await.unwrap();
         }
     }
diff --git a/mountpoint-s3/tests/fuse_tests/read_test.rs b/mountpoint-s3/tests/fuse_tests/read_test.rs
index 538344852..135ea45fa 100644
--- a/mountpoint-s3/tests/fuse_tests/read_test.rs
+++ b/mountpoint-s3/tests/fuse_tests/read_test.rs
@@ -1,5 +1,5 @@
 use std::fs::{read_dir, File};
-use std::io::{Read, Seek, SeekFrom, Write};
+use std::io::{Read as _, Seek, SeekFrom, Write};
 #[cfg(not(feature = "s3express_tests"))]
 use std::os::unix::prelude::PermissionsExt;
 use std::path::Path;
@@ -273,11 +273,10 @@ where
     assert_eq!(dir_entry_names, vec!["hello.txt"]);
 
     // Overwrite the test file and verify that we can't read from a file opened in O_WRONLY
-    let mut write_fh = File::options().write(true).truncate(true).open(&file_path).unwrap();
+    let mut write_fh = File::options().write(true).open(&file_path).unwrap();
     let mut contents = String::new();
     let err = write_fh.read_to_string(&mut contents).expect_err("read should fail");
     assert_eq!(err.raw_os_error(), Some(libc::EBADF));
-    write_fh.sync_all().unwrap();
     drop(write_fh);
 
     // We shouldn't be able to read from a file mid-write in O_RDWR
@@ -294,8 +293,8 @@ where
     assert_eq!(err.raw_os_error(), Some(libc::EBADF));
 
     // Read should also fail from different file handle
-    let err =
-        open_for_read(&file_path, true).expect_err("opening for read should fail with pending write handles open");
+    let mut read_fh = open_for_read(&file_path, true).unwrap();
+    let err = read_fh.read_to_string(&mut contents).expect_err("read should fail");
     assert_eq!(err.raw_os_error(), Some(libc::EPERM));
 }
 
@@ -310,44 +309,3 @@ fn read_errors_test_s3() {
 fn read_errors_test_mock(prefix: &str) {
     read_errors_test(fuse::mock_session::new, prefix);
 }
-
-fn read_after_flush_test<F>(creator_fn: F)
-where
-    F: FnOnce(&str, TestSessionConfig) -> (TempDir, BackgroundSession, TestClientBox),
-{
-    const KEY: &str = "data.bin";
-    let (mount_point, _session, mut test_client) = creator_fn("read_after_flush_test", Default::default());
-
-    let mut rng = ChaChaRng::seed_from_u64(0x87654321);
-    let mut two_mib_body = vec![0; 2 * 1024 * 1024];
-    rng.fill_bytes(&mut two_mib_body);
-    test_client.put_object(KEY, &two_mib_body).unwrap();
-
-    let path = mount_point.path().join(KEY);
-    let mut f = open_for_read(path, true).unwrap();
-
-    let mut content = vec![0; 128];
-    f.read_exact(&mut content).unwrap();
-
-    let mut f_dup = f.try_clone().unwrap();
-
-    // Close the file. Triggers a flush on the file handle.
-    drop(f);
-
-    // Read using the duplicated instance (same underlying handle).
-    // Seek to the end of the file to avoid relying on the kernel cache.
-    let pos = f_dup.seek(SeekFrom::End(-(content.len() as i64))).unwrap() as usize;
-    f_dup.read_exact(&mut content).unwrap();
-    assert_eq!(content, two_mib_body[pos..]);
-}
-
-#[cfg(feature = "s3_tests")]
-#[test]
-fn read_after_flush_test_s3() {
-    read_after_flush_test(fuse::s3_session::new);
-}
-
-#[test]
-fn read_after_flush_test_mock() {
-    read_after_flush_test(fuse::mock_session::new);
-}
diff --git a/mountpoint-s3/tests/fuse_tests/write_test.rs b/mountpoint-s3/tests/fuse_tests/write_test.rs
index 3f23c6c3c..5711fc484 100644
--- a/mountpoint-s3/tests/fuse_tests/write_test.rs
+++ b/mountpoint-s3/tests/fuse_tests/write_test.rs
@@ -89,6 +89,17 @@ where
     let read_dir_iter = read_dir(&subdir).unwrap();
     let dir_entry_names = read_dir_to_entry_names(read_dir_iter);
     assert_eq!(dir_entry_names, vec!["hello.txt", "new.txt"]);
+
+    if append {
+        // We can't append existing files
+        let err = open_for_write(&path, append, write_only).expect_err("can't append existing file");
+        assert_eq!(err.kind(), ErrorKind::InvalidInput);
+    } else {
+        // We shouldn't be allowed to write the file again
+        let mut f = open_for_write(&path, append, write_only).expect("should be able to open the file");
+        let err = f.write(b"hello world").expect_err("can't write existing file");
+        assert_eq!(err.kind(), ErrorKind::PermissionDenied);
+    }
 }
 
 #[cfg(feature = "s3_tests")]
@@ -125,7 +136,8 @@ where
     // Existing files should not be writable even in O_APPEND
     let err = open_for_write(&path, true, true).expect_err("can't append existing file");
     assert_eq!(err.kind(), ErrorKind::InvalidInput);
-    let err = open_for_write(&path, false, true).expect_err("can't open existing file for write");
+    let mut f = open_for_write(&path, false, true).expect("should be able to open the file");
+    let err = f.write(b"hello world").expect_err("can't write existing file");
     assert_eq!(err.kind(), ErrorKind::PermissionDenied);
 
     // New files can't be opened with O_SYNC
@@ -156,18 +168,11 @@ where
     // For default config, existing files can be opened O_RDWR but only reading should work on them
     let mut file = File::options().read(true).write(true).create(true).open(&path).unwrap();
     assert!(file.read(&mut [0u8; 1]).is_ok());
+
+    let mut file = File::options().read(true).write(true).create(true).open(&path).unwrap();
     let err = file
         .write(b"hello world")
         .expect_err("write to an existing file should fail");
-    assert_eq!(err.raw_os_error(), Some(libc::EBADF));
-
-    // For default config, existing files cannot be opened with O_TRUNC
-    let err = File::options()
-        .read(true)
-        .write(true)
-        .truncate(true)
-        .open(&path)
-        .expect_err("existing file cannot be opened with O_TRUNC");
     assert_eq!(err.raw_os_error(), Some(libc::EPERM));
 }
 
@@ -753,14 +758,14 @@ where
     assert_eq!(err.raw_os_error(), Some(libc::EBADF));
 
     let mut options = File::options();
-    let err = options
-        .write(true)
-        .truncate(true)
-        .open(&path)
-        .expect_err("opening a file for write while it is being read should fail");
+    let mut write_fh = options.write(true).open(&path).unwrap();
+    let err = write_fh
+        .write(b"hello world")
+        .expect_err("writing to a file is being read should fail");
     assert_eq!(err.raw_os_error(), Some(libc::EPERM));
 
     drop(fh);
+    drop(write_fh);
 }
 
 #[cfg(feature = "s3_tests")]
@@ -797,25 +802,17 @@ where
     let _subdir = mount_point.path().join("dir");
     let path = mount_point.path().join("dir/hello.txt");
 
-    // Open should fail without truncate flag
+    // Writes should fail when open the file without truncate flag
     let mut options = File::options();
     if !write_only {
-        let mut read_fh = options
-            .read(true)
-            .write(true)
-            .open(path)
-            .expect("using RW should open for read");
-        let err = read_fh
-            .write(b"hello world")
-            .expect_err("writing to a file opened for read should fail");
-        assert_eq!(err.raw_os_error(), Some(libc::EBADF));
-    } else {
-        let err = options
-            .write(true)
-            .open(path)
-            .expect_err("overwriting a file opened without truncate flag should fail");
-        assert_eq!(err.raw_os_error(), Some(libc::EPERM));
+        options.read(true);
     }
+    let mut write_fh = options.write(true).open(path).unwrap();
+    let err = write_fh
+        .write(b"hello world")
+        .expect_err("overwriting a file opened without truncate flag should fail");
+    assert_eq!(err.raw_os_error(), Some(libc::EPERM));
+    drop(write_fh);
 }
 
 #[cfg(feature = "s3_tests")]
@@ -839,7 +836,7 @@ fn overwrite_fail_on_write_without_truncate_test_mock(write_only: bool) {
     );
 }
 
-fn overwrite_truncate_test<F>(creator_fn: F, prefix: &str, write_only: bool)
+fn overwrite_no_truncate_test<F>(creator_fn: F, prefix: &str, write_only: bool)
 where
     F: FnOnce(&str, TestSessionConfig) -> (TempDir, BackgroundSession, TestClientBox),
 {
@@ -859,39 +856,36 @@ where
     let _subdir = mount_point.path().join("dir");
     let path = mount_point.path().join("dir/hello.txt");
 
-    // File should be empty when opened with O_TRUNC even without any write
+    // Open the file in non-truncate mode and do nothing
     let mut options = File::options();
     if !write_only {
         options.read(true);
     }
-    let write_fh = options
-        .write(true)
-        .truncate(true)
-        .open(&path)
-        .expect("open should succeed");
+    let write_fh = options.write(true).open(&path).expect("open should succeed");
     drop(write_fh);
 
+    // File content should not be changed
     let mut options = File::options();
-    let mut read_fh = options.read(true).open(&path).unwrap();
+    let mut fh = options.read(true).write(true).open(&path).unwrap();
     let mut hello_contents = String::new();
-    read_fh.read_to_string(&mut hello_contents).unwrap();
-    assert!(hello_contents.is_empty());
+    fh.read_to_string(&mut hello_contents).unwrap();
+    assert_eq!(hello_contents, "hello world");
 }
 
 #[cfg(feature = "s3_tests")]
 #[test_case(true; "write_only")]
 #[test_case(false; "read_write")]
-fn overwrite_truncate_test_s3(write_only: bool) {
-    overwrite_truncate_test(fuse::s3_session::new, "overwrite_truncate_test", write_only);
+fn overwrite_no_truncate_test_s3(write_only: bool) {
+    overwrite_no_truncate_test(fuse::s3_session::new, "overwrite_no_truncate_test", write_only);
 }
 
 #[test_case(true; "write_only")]
 #[test_case(false; "read_write")]
-fn overwrite_truncate_test_mock(write_only: bool) {
-    overwrite_truncate_test(fuse::mock_session::new, "overwrite_truncate_test", write_only);
+fn overwrite_no_truncate_test_mock(write_only: bool) {
+    overwrite_no_truncate_test(fuse::mock_session::new, "overwrite_no_truncate_test", write_only);
 }
 
-fn overwrite_after_read_test<F>(creator_fn: F, prefix: &str)
+fn overwrite_truncate_test<F>(creator_fn: F, prefix: &str, write_only: bool)
 where
     F: FnOnce(&str, TestSessionConfig) -> (TempDir, BackgroundSession, TestClientBox),
 {
@@ -911,42 +905,44 @@ where
     let _subdir = mount_point.path().join("dir");
     let path = mount_point.path().join("dir/hello.txt");
 
-    // Read first
-    let mut read_fh = File::options().read(true).open(&path).unwrap();
-    let mut hello_contents = String::new();
-    read_fh.read_to_string(&mut hello_contents).unwrap();
-    assert_eq!(hello_contents, "hello world");
-    drop(read_fh);
-
-    // Try to open the same file for write (overwrite)
-    let write_fh = File::options()
+    // File should be empty when opened with O_TRUNC even without any write
+    let mut options = File::options();
+    if !write_only {
+        options.read(true);
+    }
+    let write_fh = options
         .write(true)
         .truncate(true)
         .open(&path)
         .expect("open should succeed");
     drop(write_fh);
+
+    let mut options = File::options();
+    let mut read_fh = options.read(true).open(&path).unwrap();
+    let mut hello_contents = String::new();
+    read_fh.read_to_string(&mut hello_contents).unwrap();
+    assert!(hello_contents.is_empty());
 }
 
 #[cfg(feature = "s3_tests")]
-#[test]
-#[ignore = "due to a race condition on release of a read handle, overwrite after read may occasionally fail on open"]
-fn overwrite_after_read_test_s3() {
-    overwrite_after_read_test(fuse::s3_session::new, "overwrite_after_read_test");
+#[test_case(true; "write_only")]
+#[test_case(false; "read_write")]
+fn overwrite_truncate_test_s3(write_only: bool) {
+    overwrite_truncate_test(fuse::s3_session::new, "overwrite_truncate_test", write_only);
 }
 
-#[test_case(""; "no prefix")]
-#[test_case("overwrite_after_read_test"; "prefix")]
-#[ignore = "due to a race condition on release of a read handle, overwrite after read may occasionally fail on open"]
-fn overwrite_after_read_test_mock(prefix: &str) {
-    overwrite_after_read_test(fuse::mock_session::new, prefix);
+#[test_case(true; "write_only")]
+#[test_case(false; "read_write")]
+fn overwrite_truncate_test_mock(write_only: bool) {
+    overwrite_truncate_test(fuse::mock_session::new, "overwrite_truncate_test", write_only);
 }
 
-fn write_handle_no_update_existing_empty_file<F>(creator_fn: F, prefix: &str, allow_overwrite: bool)
+fn read_overwrite_read_test<F>(creator_fn: F, prefix: &str)
 where
     F: FnOnce(&str, TestSessionConfig) -> (TempDir, BackgroundSession, TestClientBox),
 {
     let filesystem_config = S3FilesystemConfig {
-        allow_overwrite,
+        allow_overwrite: true,
         ..Default::default()
     };
     let test_config = TestSessionConfig {
@@ -956,37 +952,45 @@ where
     let (mount_point, _session, mut test_client) = creator_fn(prefix, test_config);
 
     // Make sure there's an existing directory and a file
-    test_client.put_object("dir/hello.txt", b"").unwrap();
+    test_client.put_object("dir/hello.txt", b"hello world").unwrap();
 
     let _subdir = mount_point.path().join("dir");
     let path = mount_point.path().join("dir/hello.txt");
 
-    // Open the file in non-truncate mode and do nothing
-    File::options()
+    // Read first
+    let mut options = File::options();
+    let mut read_fh = options.read(true).open(&path).unwrap();
+    let mut hello_contents = String::new();
+    read_fh.read_to_string(&mut hello_contents).unwrap();
+    assert_eq!(hello_contents, "hello world");
+    drop(read_fh);
+
+    // File should be empty when opened with O_WRONLY and O_TRUNC even without any write
+    let mut options = File::options();
+    let write_fh = options
         .write(true)
-        .open(path)
-        .expect_err("write-only open should not succeed without O_TRUNC");
+        .truncate(true)
+        .open(&path)
+        .expect("open should succeed");
+    drop(write_fh);
+
+    let mut options = File::options();
+    let mut read_fh = options.read(true).open(&path).unwrap();
+    let mut hello_contents = String::new();
+    read_fh.read_to_string(&mut hello_contents).unwrap();
+    assert!(hello_contents.is_empty());
 }
 
 #[cfg(feature = "s3_tests")]
-#[test_case(true; "allow overwrite")]
-#[test_case(false; "disallow overwrite")]
-fn write_handle_no_update_existing_empty_file_s3(allow_overwrite: bool) {
-    write_handle_no_update_existing_empty_file(
-        fuse::s3_session::new,
-        "write_handle_no_update_existing_empty_file",
-        allow_overwrite,
-    );
+#[test]
+fn read_overwrite_read_test_s3() {
+    read_overwrite_read_test(fuse::s3_session::new, "read_overwrite_read_test");
 }
 
-#[test_case(true; "allow overwrite")]
-#[test_case(false; "disallow overwrite")]
-fn write_handle_no_update_existing_empty_file_mock(allow_overwrite: bool) {
-    write_handle_no_update_existing_empty_file(
-        fuse::mock_session::new,
-        "write_handle_no_update_existing_empty_file",
-        allow_overwrite,
-    );
+#[test_case(""; "no prefix")]
+#[test_case("read_overwrite_read_test"; "prefix")]
+fn read_overwrite_read_test_mock(prefix: &str) {
+    read_overwrite_read_test(fuse::mock_session::new, prefix);
 }
 
 // This test checks that a write can be performed when IAM session policy enforces the usage of the specific SSE type and a KMS key ID
diff --git a/mountpoint-s3/tests/reftests/harness.rs b/mountpoint-s3/tests/reftests/harness.rs
index 4725c3815..8906b1e94 100644
--- a/mountpoint-s3/tests/reftests/harness.rs
+++ b/mountpoint-s3/tests/reftests/harness.rs
@@ -347,12 +347,20 @@ impl Harness {
             return;
         };
 
-        let open = self.fs.open(inflight_write.inode, libc::O_WRONLY, 0).await;
-        if inflight_write.file_handle.is_some() {
-            // Shouldn't be able to reopen a file that's already open for writing
-            assert!(matches!(open, Err(e) if e.to_errno() == libc::EPERM));
+        let open = self
+            .fs
+            .open(inflight_write.inode, libc::O_WRONLY, 0)
+            .await
+            .expect("open should succeed");
+        if inflight_write.written > 0 {
+            // Shouldn't be able to write to a file that is being written
+            let write = self
+                .fs
+                .write(inflight_write.inode, open.fh, 0, &[0; 8], 0, 0, None)
+                .await
+                .expect_err("write from another file handle should fail");
+            assert_eq!(write.to_errno(), libc::EPERM);
         } else {
-            let open = open.expect("open should succeed");
             let inflight_write = self.inflight_writes.get_mut(index).unwrap();
             inflight_write.file_handle = Some(open.fh);
         }
@@ -702,8 +710,14 @@ impl Harness {
     /// readable (open should fail).
     async fn check_local_file(&self, inode: InodeNo) {
         let _stat = self.fs.getattr(inode).await.expect("stat should succeed");
-        let open = self.fs.open(inode, libc::O_RDONLY, 0).await;
-        assert!(matches!(open, Err(e) if e.to_errno() == libc::EPERM));
+        let open = self
+            .fs
+            .open(inode, libc::O_RDONLY, 0)
+            .await
+            .expect("open should succeed");
+        let read_result = self.fs.read(inode, open.fh, 0, 4096, 0, None).await;
+        let error = read_result.expect_err("read should fail");
+        assert_eq!(error.to_errno(), libc::EPERM);
     }
 }