Skip to content

Commit

Permalink
kernfs: switch global kernfs_rwsem lock to per-fs lock
Browse files Browse the repository at this point in the history
The kernfs implementation has big lock granularity(kernfs_rwsem) so
every kernfs-based(e.g., sysfs, cgroup) fs are able to compete the
lock. It makes trouble for some cases to wait the global lock
for a long time even though they are totally independent contexts
each other.

A general example is process A goes under direct reclaim with holding
the lock when it accessed the file in sysfs and process B is waiting
the lock with exclusive mode and then process C is waiting the lock
until process B could finish the job after it gets the lock from
process A.

This patch switches the global kernfs_rwsem to per-fs lock, which
put the rwsem into kernfs_root.

Suggested-by: Tejun Heo <tj@kernel.org>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Minchan Kim <minchan@kernel.org>
Link: https://lore.kernel.org/r/20211118230008.2679780-1-minchan@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  • Loading branch information
minchank authored and gregkh committed Nov 24, 2021
1 parent 1360572 commit 393c371
Show file tree
Hide file tree
Showing 6 changed files with 97 additions and 63 deletions.
110 changes: 65 additions & 45 deletions fs/kernfs/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

#include "kernfs-internal.h"

DECLARE_RWSEM(kernfs_rwsem);
static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */
static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by rename_lock */
static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */
Expand All @@ -26,7 +25,7 @@ static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */

static bool kernfs_active(struct kernfs_node *kn)
{
lockdep_assert_held(&kernfs_rwsem);
lockdep_assert_held(&kernfs_root(kn)->kernfs_rwsem);
return atomic_read(&kn->active) >= 0;
}

Expand Down Expand Up @@ -457,14 +456,15 @@ void kernfs_put_active(struct kernfs_node *kn)
* return after draining is complete.
*/
static void kernfs_drain(struct kernfs_node *kn)
__releases(&kernfs_rwsem) __acquires(&kernfs_rwsem)
__releases(&kernfs_root(kn)->kernfs_rwsem)
__acquires(&kernfs_root(kn)->kernfs_rwsem)
{
struct kernfs_root *root = kernfs_root(kn);

lockdep_assert_held_write(&kernfs_rwsem);
lockdep_assert_held_write(&root->kernfs_rwsem);
WARN_ON_ONCE(kernfs_active(kn));

up_write(&kernfs_rwsem);
up_write(&root->kernfs_rwsem);

if (kernfs_lockdep(kn)) {
rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
Expand All @@ -483,7 +483,7 @@ static void kernfs_drain(struct kernfs_node *kn)

kernfs_drain_open_files(kn);

down_write(&kernfs_rwsem);
down_write(&root->kernfs_rwsem);
}

/**
Expand Down Expand Up @@ -718,11 +718,12 @@ struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root,
int kernfs_add_one(struct kernfs_node *kn)
{
struct kernfs_node *parent = kn->parent;
struct kernfs_root *root = kernfs_root(parent);
struct kernfs_iattrs *ps_iattr;
bool has_ns;
int ret;

down_write(&kernfs_rwsem);
down_write(&root->kernfs_rwsem);

ret = -EINVAL;
has_ns = kernfs_ns_enabled(parent);
Expand Down Expand Up @@ -753,7 +754,7 @@ int kernfs_add_one(struct kernfs_node *kn)
ps_iattr->ia_mtime = ps_iattr->ia_ctime;
}

up_write(&kernfs_rwsem);
up_write(&root->kernfs_rwsem);

/*
* Activate the new node unless CREATE_DEACTIVATED is requested.
Expand All @@ -767,7 +768,7 @@ int kernfs_add_one(struct kernfs_node *kn)
return 0;

out_unlock:
up_write(&kernfs_rwsem);
up_write(&root->kernfs_rwsem);
return ret;
}

Expand All @@ -788,7 +789,7 @@ static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,
bool has_ns = kernfs_ns_enabled(parent);
unsigned int hash;

lockdep_assert_held(&kernfs_rwsem);
lockdep_assert_held(&kernfs_root(parent)->kernfs_rwsem);

if (has_ns != (bool)ns) {
WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
Expand Down Expand Up @@ -820,7 +821,7 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent,
size_t len;
char *p, *name;

lockdep_assert_held_read(&kernfs_rwsem);
lockdep_assert_held_read(&kernfs_root(parent)->kernfs_rwsem);

/* grab kernfs_rename_lock to piggy back on kernfs_pr_cont_buf */
spin_lock_irq(&kernfs_rename_lock);
Expand Down Expand Up @@ -859,11 +860,12 @@ struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
const char *name, const void *ns)
{
struct kernfs_node *kn;
struct kernfs_root *root = kernfs_root(parent);

down_read(&kernfs_rwsem);
down_read(&root->kernfs_rwsem);
kn = kernfs_find_ns(parent, name, ns);
kernfs_get(kn);
up_read(&kernfs_rwsem);
up_read(&root->kernfs_rwsem);

return kn;
}
Expand All @@ -883,11 +885,12 @@ struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent,
const char *path, const void *ns)
{
struct kernfs_node *kn;
struct kernfs_root *root = kernfs_root(parent);

down_read(&kernfs_rwsem);
down_read(&root->kernfs_rwsem);
kn = kernfs_walk_ns(parent, path, ns);
kernfs_get(kn);
up_read(&kernfs_rwsem);
up_read(&root->kernfs_rwsem);

return kn;
}
Expand All @@ -912,6 +915,7 @@ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
return ERR_PTR(-ENOMEM);

idr_init(&root->ino_idr);
init_rwsem(&root->kernfs_rwsem);
INIT_LIST_HEAD(&root->supers);

/*
Expand Down Expand Up @@ -1035,6 +1039,7 @@ struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
{
struct kernfs_node *kn;
struct kernfs_root *root;

if (flags & LOOKUP_RCU)
return -ECHILD;
Expand All @@ -1046,18 +1051,19 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
/* If the kernfs parent node has changed discard and
* proceed to ->lookup.
*/
down_read(&kernfs_rwsem);
spin_lock(&dentry->d_lock);
parent = kernfs_dentry_node(dentry->d_parent);
if (parent) {
spin_unlock(&dentry->d_lock);
root = kernfs_root(parent);
down_read(&root->kernfs_rwsem);
if (kernfs_dir_changed(parent, dentry)) {
spin_unlock(&dentry->d_lock);
up_read(&kernfs_rwsem);
up_read(&root->kernfs_rwsem);
return 0;
}
}
spin_unlock(&dentry->d_lock);
up_read(&kernfs_rwsem);
up_read(&root->kernfs_rwsem);
} else
spin_unlock(&dentry->d_lock);

/* The kernfs parent node hasn't changed, leave the
* dentry negative and return success.
Expand All @@ -1066,7 +1072,8 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
}

kn = kernfs_dentry_node(dentry);
down_read(&kernfs_rwsem);
root = kernfs_root(kn);
down_read(&root->kernfs_rwsem);

/* The kernfs node has been deactivated */
if (!kernfs_active(kn))
Expand All @@ -1085,10 +1092,10 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
kernfs_info(dentry->d_sb)->ns != kn->ns)
goto out_bad;

up_read(&kernfs_rwsem);
up_read(&root->kernfs_rwsem);
return 1;
out_bad:
up_read(&kernfs_rwsem);
up_read(&root->kernfs_rwsem);
return 0;
}

Expand All @@ -1102,10 +1109,12 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir,
{
struct kernfs_node *parent = dir->i_private;
struct kernfs_node *kn;
struct kernfs_root *root;
struct inode *inode = NULL;
const void *ns = NULL;

down_read(&kernfs_rwsem);
root = kernfs_root(parent);
down_read(&root->kernfs_rwsem);
if (kernfs_ns_enabled(parent))
ns = kernfs_info(dir->i_sb)->ns;

Expand All @@ -1116,7 +1125,7 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir,
* create a negative.
*/
if (!kernfs_active(kn)) {
up_read(&kernfs_rwsem);
up_read(&root->kernfs_rwsem);
return NULL;
}
inode = kernfs_get_inode(dir->i_sb, kn);
Expand All @@ -1131,7 +1140,7 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir,
*/
if (!IS_ERR(inode))
kernfs_set_rev(parent, dentry);
up_read(&kernfs_rwsem);
up_read(&root->kernfs_rwsem);

/* instantiate and hash (possibly negative) dentry */
return d_splice_alias(inode, dentry);
Expand Down Expand Up @@ -1254,7 +1263,7 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
{
struct rb_node *rbn;

lockdep_assert_held_write(&kernfs_rwsem);
lockdep_assert_held_write(&kernfs_root(root)->kernfs_rwsem);

/* if first iteration, visit leftmost descendant which may be root */
if (!pos)
Expand Down Expand Up @@ -1289,8 +1298,9 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
void kernfs_activate(struct kernfs_node *kn)
{
struct kernfs_node *pos;
struct kernfs_root *root = kernfs_root(kn);

down_write(&kernfs_rwsem);
down_write(&root->kernfs_rwsem);

pos = NULL;
while ((pos = kernfs_next_descendant_post(pos, kn))) {
Expand All @@ -1304,14 +1314,14 @@ void kernfs_activate(struct kernfs_node *kn)
pos->flags |= KERNFS_ACTIVATED;
}

up_write(&kernfs_rwsem);
up_write(&root->kernfs_rwsem);
}

static void __kernfs_remove(struct kernfs_node *kn)
{
struct kernfs_node *pos;

lockdep_assert_held_write(&kernfs_rwsem);
lockdep_assert_held_write(&kernfs_root(kn)->kernfs_rwsem);

/*
* Short-circuit if non-root @kn has already finished removal.
Expand Down Expand Up @@ -1381,9 +1391,11 @@ static void __kernfs_remove(struct kernfs_node *kn)
*/
void kernfs_remove(struct kernfs_node *kn)
{
down_write(&kernfs_rwsem);
struct kernfs_root *root = kernfs_root(kn);

down_write(&root->kernfs_rwsem);
__kernfs_remove(kn);
up_write(&kernfs_rwsem);
up_write(&root->kernfs_rwsem);
}

/**
Expand Down Expand Up @@ -1469,8 +1481,9 @@ void kernfs_unbreak_active_protection(struct kernfs_node *kn)
bool kernfs_remove_self(struct kernfs_node *kn)
{
bool ret;
struct kernfs_root *root = kernfs_root(kn);

down_write(&kernfs_rwsem);
down_write(&root->kernfs_rwsem);
kernfs_break_active_protection(kn);

/*
Expand Down Expand Up @@ -1498,9 +1511,9 @@ bool kernfs_remove_self(struct kernfs_node *kn)
atomic_read(&kn->active) == KN_DEACTIVATED_BIAS)
break;

up_write(&kernfs_rwsem);
up_write(&root->kernfs_rwsem);
schedule();
down_write(&kernfs_rwsem);
down_write(&root->kernfs_rwsem);
}
finish_wait(waitq, &wait);
WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb));
Expand All @@ -1513,7 +1526,7 @@ bool kernfs_remove_self(struct kernfs_node *kn)
*/
kernfs_unbreak_active_protection(kn);

up_write(&kernfs_rwsem);
up_write(&root->kernfs_rwsem);
return ret;
}

Expand All @@ -1530,20 +1543,22 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
const void *ns)
{
struct kernfs_node *kn;
struct kernfs_root *root;

if (!parent) {
WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n",
name);
return -ENOENT;
}

down_write(&kernfs_rwsem);
root = kernfs_root(parent);
down_write(&root->kernfs_rwsem);

kn = kernfs_find_ns(parent, name, ns);
if (kn)
__kernfs_remove(kn);

up_write(&kernfs_rwsem);
up_write(&root->kernfs_rwsem);

if (kn)
return 0;
Expand All @@ -1562,14 +1577,16 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
const char *new_name, const void *new_ns)
{
struct kernfs_node *old_parent;
struct kernfs_root *root;
const char *old_name = NULL;
int error;

/* can't move or rename root */
if (!kn->parent)
return -EINVAL;

down_write(&kernfs_rwsem);
root = kernfs_root(kn);
down_write(&root->kernfs_rwsem);

error = -ENOENT;
if (!kernfs_active(kn) || !kernfs_active(new_parent) ||
Expand Down Expand Up @@ -1623,7 +1640,7 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,

error = 0;
out:
up_write(&kernfs_rwsem);
up_write(&root->kernfs_rwsem);
return error;
}

Expand Down Expand Up @@ -1694,11 +1711,14 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
struct dentry *dentry = file->f_path.dentry;
struct kernfs_node *parent = kernfs_dentry_node(dentry);
struct kernfs_node *pos = file->private_data;
struct kernfs_root *root;
const void *ns = NULL;

if (!dir_emit_dots(file, ctx))
return 0;
down_read(&kernfs_rwsem);

root = kernfs_root(parent);
down_read(&root->kernfs_rwsem);

if (kernfs_ns_enabled(parent))
ns = kernfs_info(dentry->d_sb)->ns;
Expand All @@ -1715,12 +1735,12 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
file->private_data = pos;
kernfs_get(pos);

up_read(&kernfs_rwsem);
up_read(&root->kernfs_rwsem);
if (!dir_emit(ctx, name, len, ino, type))
return 0;
down_read(&kernfs_rwsem);
down_read(&root->kernfs_rwsem);
}
up_read(&kernfs_rwsem);
up_read(&root->kernfs_rwsem);
file->private_data = NULL;
ctx->pos = INT_MAX;
return 0;
Expand Down
Loading

0 comments on commit 393c371

Please sign in to comment.