Skip to content

Commit

Permalink
pythongh-120754: Reduce system calls in full-file FileIO.readall() ca…
Browse files Browse the repository at this point in the history
…se (python#120755)

This reduces the system call count of a simple program[0] that reads all
the `.rst` files in Doc by over 10% (5706 -> 4734 system calls on my
linux system, 5813 -> 4875 on my macOS)

This reduces the number of `fstat()` calls always and seek calls most
the time. Stat was always called twice, once at open (to error early on
directories), and a second time to get the size of the file to be able
to read the whole file in one read. Now the size is cached with the
first call.

The code keeps an optimization that if the user had previously read a
lot of data, the current position is subtracted from the number of bytes
to read. That is somewhat expensive so only do it on larger files,
otherwise just try and read the extra bytes and resize the PyBytes as
needeed.

I built a little test program to validate the behavior + assumptions
around relative costs and then ran it under `strace` to get a log of the
system calls. Full samples below[1].

After the changes, this is everything in one `filename.read_text()`:

```python3
openat(AT_FDCWD, "cpython/Doc/howto/clinic.rst", O_RDONLY|O_CLOEXEC) = 3`
fstat(3, {st_mode=S_IFREG|0644, st_size=343, ...}) = 0`
ioctl(3, TCGETS, 0x7ffdfac04b40)        = -1 ENOTTY (Inappropriate ioctl for device)
lseek(3, 0, SEEK_CUR)                   = 0
read(3, ":orphan:\n\n.. This page is retain"..., 344) = 343
read(3, "", 1)                          = 0
close(3)                                = 0
```

This does make some tradeoffs
1. If the file size changes between open() and readall(), this will
still get all the data but might have more read calls.
2. I experimented with avoiding the stat + cached result for small files
in general, but on my dev workstation at least that tended to reduce
performance compared to using the fstat().

[0]

```python3
from pathlib import Path

nlines = []
for filename in Path("cpython/Doc").glob("**/*.rst"):
    nlines.append(len(filename.read_text()))
```

[1]
Before small file:

```
openat(AT_FDCWD, "cpython/Doc/howto/clinic.rst", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=343, ...}) = 0
ioctl(3, TCGETS, 0x7ffe52525930)        = -1 ENOTTY (Inappropriate ioctl for device)
lseek(3, 0, SEEK_CUR)                   = 0
lseek(3, 0, SEEK_CUR)                   = 0
fstat(3, {st_mode=S_IFREG|0644, st_size=343, ...}) = 0
read(3, ":orphan:\n\n.. This page is retain"..., 344) = 343
read(3, "", 1)                          = 0
close(3)                                = 0
```

After small file:

```
openat(AT_FDCWD, "cpython/Doc/howto/clinic.rst", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=343, ...}) = 0
ioctl(3, TCGETS, 0x7ffdfac04b40)        = -1 ENOTTY (Inappropriate ioctl for device)
lseek(3, 0, SEEK_CUR)                   = 0
read(3, ":orphan:\n\n.. This page is retain"..., 344) = 343
read(3, "", 1)                          = 0
close(3)                                = 0
```

Before large file:

```
openat(AT_FDCWD, "cpython/Doc/c-api/typeobj.rst", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=133104, ...}) = 0
ioctl(3, TCGETS, 0x7ffe52525930)        = -1 ENOTTY (Inappropriate ioctl for device)
lseek(3, 0, SEEK_CUR)                   = 0
lseek(3, 0, SEEK_CUR)                   = 0
fstat(3, {st_mode=S_IFREG|0644, st_size=133104, ...}) = 0
read(3, ".. highlight:: c\n\n.. _type-struc"..., 133105) = 133104
read(3, "", 1)                          = 0
close(3)                                = 0
```

After large file:

```
openat(AT_FDCWD, "cpython/Doc/c-api/typeobj.rst", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=133104, ...}) = 0
ioctl(3, TCGETS, 0x7ffdfac04b40)        = -1 ENOTTY (Inappropriate ioctl for device)
lseek(3, 0, SEEK_CUR)                   = 0
lseek(3, 0, SEEK_CUR)                   = 0
read(3, ".. highlight:: c\n\n.. _type-struc"..., 133105) = 133104
read(3, "", 1)                          = 0
close(3)                                = 0
```

Co-authored-by: Shantanu <12621235+hauntsaninja@users.noreply.github.com>
Co-authored-by: Erlend E. Aasland <erlend.aasland@protonmail.com>
Co-authored-by: Victor Stinner <vstinner@python.org>
  • Loading branch information
4 people authored Jul 4, 2024
1 parent 9728ead commit 2f5f19e
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 33 deletions.
22 changes: 14 additions & 8 deletions Lib/_pyio.py
Original file line number Diff line number Diff line change
Expand Up @@ -1577,6 +1577,7 @@ def __init__(self, file, mode='r', closefd=True, opener=None):
self._blksize = getattr(fdfstat, 'st_blksize', 0)
if self._blksize <= 1:
self._blksize = DEFAULT_BUFFER_SIZE
self._estimated_size = fdfstat.st_size

if _setmode:
# don't translate newlines (\r\n <=> \n)
Expand Down Expand Up @@ -1654,14 +1655,18 @@ def readall(self):
"""
self._checkClosed()
self._checkReadable()
bufsize = DEFAULT_BUFFER_SIZE
try:
pos = os.lseek(self._fd, 0, SEEK_CUR)
end = os.fstat(self._fd).st_size
if end >= pos:
bufsize = end - pos + 1
except OSError:
pass
if self._estimated_size <= 0:
bufsize = DEFAULT_BUFFER_SIZE
else:
bufsize = self._estimated_size + 1

if self._estimated_size > 65536:
try:
pos = os.lseek(self._fd, 0, SEEK_CUR)
if self._estimated_size >= pos:
bufsize = self._estimated_size - pos + 1
except OSError:
pass

result = bytearray()
while True:
Expand Down Expand Up @@ -1737,6 +1742,7 @@ def truncate(self, size=None):
if size is None:
size = self.tell()
os.ftruncate(self._fd, size)
self._estimated_size = size
return size

def close(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Reduce the number of system calls invoked when reading a whole file (ex. ``open('a.txt').read()``). For a sample program that reads the contents of the 400+ ``.rst`` files in the cpython repository ``Doc`` folder, there is an over 10% reduction in system call count.
70 changes: 45 additions & 25 deletions Modules/_io/fileio.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@
# define SMALLCHUNK BUFSIZ
#endif

/* Size at which a buffer is considered "large" and behavior should change to
avoid excessive memory allocation */
#define LARGE_BUFFER_CUTOFF_SIZE 65536

/*[clinic input]
module _io
Expand All @@ -72,6 +75,7 @@ typedef struct {
unsigned int closefd : 1;
char finalizing;
unsigned int blksize;
Py_off_t estimated_size;
PyObject *weakreflist;
PyObject *dict;
} fileio;
Expand Down Expand Up @@ -196,6 +200,7 @@ fileio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
self->appending = 0;
self->seekable = -1;
self->blksize = 0;
self->estimated_size = -1;
self->closefd = 1;
self->weakreflist = NULL;
}
Expand Down Expand Up @@ -482,6 +487,9 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
if (fdfstat.st_blksize > 1)
self->blksize = fdfstat.st_blksize;
#endif /* HAVE_STRUCT_STAT_ST_BLKSIZE */
if (fdfstat.st_size < PY_SSIZE_T_MAX) {
self->estimated_size = (Py_off_t)fdfstat.st_size;
}
}

#if defined(MS_WINDOWS) || defined(__CYGWIN__)
Expand Down Expand Up @@ -684,7 +692,7 @@ new_buffersize(fileio *self, size_t currentsize)
giving us amortized linear-time behavior. For bigger sizes, use a
less-than-double growth factor to avoid excessive allocation. */
assert(currentsize <= PY_SSIZE_T_MAX);
if (currentsize > 65536)
if (currentsize > LARGE_BUFFER_CUTOFF_SIZE)
addend = currentsize >> 3;
else
addend = 256 + currentsize;
Expand All @@ -707,43 +715,56 @@ static PyObject *
_io_FileIO_readall_impl(fileio *self)
/*[clinic end generated code: output=faa0292b213b4022 input=dbdc137f55602834]*/
{
struct _Py_stat_struct status;
Py_off_t pos, end;
PyObject *result;
Py_ssize_t bytes_read = 0;
Py_ssize_t n;
size_t bufsize;
int fstat_result;

if (self->fd < 0)
if (self->fd < 0) {
return err_closed();
}

Py_BEGIN_ALLOW_THREADS
_Py_BEGIN_SUPPRESS_IPH
#ifdef MS_WINDOWS
pos = _lseeki64(self->fd, 0L, SEEK_CUR);
#else
pos = lseek(self->fd, 0L, SEEK_CUR);
#endif
_Py_END_SUPPRESS_IPH
fstat_result = _Py_fstat_noraise(self->fd, &status);
Py_END_ALLOW_THREADS

if (fstat_result == 0)
end = status.st_size;
else
end = (Py_off_t)-1;

if (end > 0 && end >= pos && pos >= 0 && end - pos < PY_SSIZE_T_MAX) {
end = self->estimated_size;
if (end <= 0) {
/* Use a default size and resize as needed. */
bufsize = SMALLCHUNK;
}
else {
/* This is probably a real file, so we try to allocate a
buffer one byte larger than the rest of the file. If the
calculation is right then we should get EOF without having
to enlarge the buffer. */
bufsize = (size_t)(end - pos + 1);
} else {
bufsize = SMALLCHUNK;
if (end > _PY_READ_MAX - 1) {
bufsize = _PY_READ_MAX;
}
else {
bufsize = (size_t)end + 1;
}

/* While a lot of code does open().read() to get the whole contents
of a file it is possible a caller seeks/reads a ways into the file
then calls readall() to get the rest, which would result in allocating
more than required. Guard against that for larger files where we expect
the I/O time to dominate anyways while keeping small files fast. */
if (bufsize > LARGE_BUFFER_CUTOFF_SIZE) {
Py_BEGIN_ALLOW_THREADS
_Py_BEGIN_SUPPRESS_IPH
#ifdef MS_WINDOWS
pos = _lseeki64(self->fd, 0L, SEEK_CUR);
#else
pos = lseek(self->fd, 0L, SEEK_CUR);
#endif
_Py_END_SUPPRESS_IPH
Py_END_ALLOW_THREADS

if (end >= pos && pos >= 0 && (end - pos) < (_PY_READ_MAX - 1)) {
bufsize = (size_t)(end - pos) + 1;
}
}
}


result = PyBytes_FromStringAndSize(NULL, bufsize);
if (result == NULL)
return NULL;
Expand Down Expand Up @@ -783,7 +804,6 @@ _io_FileIO_readall_impl(fileio *self)
return NULL;
}
bytes_read += n;
pos += n;
}

if (PyBytes_GET_SIZE(result) > bytes_read) {
Expand Down

0 comments on commit 2f5f19e

Please sign in to comment.