diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 index 3f7485fa78ca..539a309eac7d 100644 --- a/man/man4/zfs.4 +++ b/man/man4/zfs.4 @@ -1774,6 +1774,24 @@ intact. Unlike predictive prefetch, prescient prefetch never issues I/O that ends up not being needed, so it can't hurt performance. . +.It Sy zfs_readdir_dnode_prefetch_limit Ns = Ns Sy 0 Pq u64 +Disable prefetches in readdir for large directories. +When readdir searches a directory, it normally prefetches metadata for +all objects in the directory it checks, even if it's just +looking for a single object. +Setting this to a non-zero value disables that prefetching for directories +with a greater size than that value. +Disabling it for large directories can greatly lower CPU usage on NFS servers +where directories have a very large number of subdirectories. +Directory size in this case is the size returned from calling +.Sy stat +on the directory (stat.st_size). +On ZFS, this directory size value is approximately the number of files +and subdirectories in the directory. +A reasonable value would be 20000. +A zero value (the default) means no limit on directory metadata prefetching. +This parameter only applies on Linux. +. .It Sy zfs_qat_checksum_disable Ns = Ns Sy 0 Ns | Ns 1 Pq int Disable QAT hardware acceleration for SHA256 checksums. May be unset after the ZFS modules have been loaded to initialize the QAT diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c index 1cecad9f7755..efe2371fae35 100644 --- a/module/os/linux/zfs/zfs_vnops_os.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -1504,6 +1504,7 @@ zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd, cred_t *cr, * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, * we use the offset 2 for the '.zfs' directory. */ +static ulong_t zfs_readdir_dnode_prefetch_limit = 0UL; int zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr) { @@ -1537,6 +1538,9 @@ zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr) os = zfsvfs->z_os; offset = ctx->pos; prefetch = zp->z_zn_prefetch; + if (zfs_readdir_dnode_prefetch_limit && + zp->z_size > zfs_readdir_dnode_prefetch_limit) + prefetch = B_FALSE; /* * Initialize the iterator cursor. @@ -4252,4 +4256,9 @@ EXPORT_SYMBOL(zfs_map); /* CSTYLED */ module_param(zfs_delete_blocks, ulong, 0644); MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async"); + +/* CSTYLED */ +module_param(zfs_readdir_dnode_prefetch_limit, ulong, 0644); +MODULE_PARM_DESC(zfs_readdir_dnode_prefetch_limit, + "No zfs_readdir prefetch if non-zero and size > this"); #endif