Skip to content

Commit

Permalink
vfs: add the concept of vnode state transitions
Browse files Browse the repository at this point in the history
To quote from a comment above vput_final:
<quote>
* XXX Some filesystems pass in an exclusively locked vnode and strongly depend
* on the lock being held all the way until VOP_INACTIVE. This in particular
* happens with UFS which adds half-constructed vnodes to the hash, where they
* can be found by other code.
</quote>

As is there is no mechanism which allows filesystems to denote that a
vnode is fully initialized, consequently problems like the above are
only found the hard way(tm).

Add rudimentary support for state transitions, which in particular allow
to assert the vnode is not legally unlocked until its fate is decided
(either construction finishes or vgone is called to abort it).

The new field lands in a 1-byte hole, thus it does not grow the struct.

Bump __FreeBSD_version to 1400077

Reviewed by:	kib (previous version)
Tested by:	pho
Differential Revision:	https://reviews.freebsd.org/D37759
  • Loading branch information
mjguzik authored and bsdjhb committed Feb 28, 2023
2 parents c7c50f5 + 829f0bc commit 9e9fbe5
Show file tree
Hide file tree
Showing 23 changed files with 121 additions and 4 deletions.
1 change: 1 addition & 0 deletions sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
* Acquire vnode lock before making it available to the world.
*/
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
vn_set_state(vp, VSTATE_CONSTRUCTED);
VN_LOCK_AREC(vp);
if (vp->v_type != VFIFO)
VN_LOCK_ASHARE(vp);
Expand Down
1 change: 1 addition & 0 deletions sys/fs/autofs/autofs_vnops.c
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,7 @@ autofs_node_vn(struct autofs_node *anp, struct mount *mp, int flags,

sx_xunlock(&anp->an_vnode_lock);

vn_set_state(vp, VSTATE_CONSTRUCTED);
*vpp = vp;
return (0);
}
1 change: 1 addition & 0 deletions sys/fs/cd9660/cd9660_vfsops.c
Original file line number Diff line number Diff line change
Expand Up @@ -822,6 +822,7 @@ cd9660_vget_internal(struct mount *mp, cd_ino_t ino, int flags,
* XXX need generation number?
*/

vn_set_state(vp, VSTATE_CONSTRUCTED);
*vpp = vp;
return (0);
}
2 changes: 2 additions & 0 deletions sys/fs/devfs/devfs_vnops.c
Original file line number Diff line number Diff line change
Expand Up @@ -614,13 +614,15 @@ devfs_allocv(struct devfs_dirent *de, struct mount *mp, int lockmode,
return (error);
}
if (devfs_allocv_drop_refs(0, dmp, de)) {
vgone(vp);
vput(vp);
return (ENOENT);
}
#ifdef MAC
mac_devfs_vnode_associate(mp, de, vp);
#endif
sx_xunlock(&dmp->dm_lock);
vn_set_state(vp, VSTATE_CONSTRUCTED);
*vpp = vp;
return (0);
}
Expand Down
1 change: 1 addition & 0 deletions sys/fs/ext2fs/ext2_alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,7 @@ ext2_valloc(struct vnode *pvp, int mode, struct ucred *cred, struct vnode **vpp)
ip->i_birthtime = ts.tv_sec;
ip->i_birthnsec = ts.tv_nsec;

vn_set_state(vp, VSTATE_CONSTRUCTED);
*vpp = vp;

return (0);
Expand Down
1 change: 1 addition & 0 deletions sys/fs/ext2fs/ext2_vfsops.c
Original file line number Diff line number Diff line change
Expand Up @@ -1308,6 +1308,7 @@ ext2_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
* Finish inode initialization.
*/

vn_set_state(vp, VSTATE_CONSTRUCTED);
*vpp = vp;
return (0);
}
Expand Down
1 change: 1 addition & 0 deletions sys/fs/fdescfs/fdesc_vnops.c
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ fdesc_allocvp(fdntype ftype, unsigned fd_fd, int ix, struct mount *mp,
/* If we came here, we can insert it safely. */
LIST_INSERT_HEAD(fc, fd, fd_hash);
mtx_unlock(&fdesc_hashmtx);
vn_set_state(vp, VSTATE_CONSTRUCTED);
*vpp = vp;
return (0);
}
Expand Down
1 change: 1 addition & 0 deletions sys/fs/fuse/fuse_node.c
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ fuse_vnode_alloc(struct mount *mp,
if (data->dataflags & FSESS_ASYNC_READ && vtyp != VFIFO)
VN_LOCK_ASHARE(*vpp);

vn_set_state(*vpp, VSTATE_CONSTRUCTED);
err = vfs_hash_insert(*vpp, fuse_vnode_hash(nodeid), LK_EXCLUSIVE,
td, &vp2, fuse_vnode_cmp, &nodeid);
if (err) {
Expand Down
1 change: 1 addition & 0 deletions sys/fs/mntfs/mntfs_vnops.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ mntfs_allocvp(struct mount *mp, struct vnode *ovp)

VOP_UNLOCK(ovp);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
vn_set_state(vp, VSTATE_CONSTRUCTED);
return (vp);
}

Expand Down
1 change: 1 addition & 0 deletions sys/fs/msdosfs/msdosfs_denode.c
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,7 @@ deget(struct msdosfsmount *pmp, u_long dirclust, u_long diroffset,
}
} else
nvp->v_type = VREG;
vn_set_state(nvp, VSTATE_CONSTRUCTED);
ldep->de_modrev = init_va_filerev();
*depp = ldep;
return (0);
Expand Down
1 change: 1 addition & 0 deletions sys/fs/nfsclient/nfs_clnode.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ ncl_nget(struct mount *mntp, u_int8_t *fhp, int fhsize, struct nfsnode **npp,
uma_zfree(newnfsnode_zone, np);
return (error);
}
vn_set_state(vp, VSTATE_CONSTRUCTED);
error = vfs_hash_insert(vp, hash, lkflags,
td, &nvp, newnfs_vncmpf, np->n_fhp);
if (error)
Expand Down
1 change: 1 addition & 0 deletions sys/fs/nfsclient/nfs_clport.c
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ nfscl_nget(struct mount *mntp, struct vnode *dvp, struct nfsfh *nfhp,
uma_zfree(newnfsnode_zone, np);
return (error);
}
vn_set_state(vp, VSTATE_CONSTRUCTED);
error = vfs_hash_insert(vp, hash, lkflags,
td, &nvp, newnfs_vncmpf, nfhp);
if (error)
Expand Down
1 change: 1 addition & 0 deletions sys/fs/nullfs/null_subr.c
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,7 @@ null_nodeget(struct mount *mp, struct vnode *lowervp, struct vnode **vpp)
}

null_hashins(mp, xp);
vn_set_state(vp, VSTATE_CONSTRUCTED);
rw_wunlock(&null_hash_lock);
*vpp = vp;

Expand Down
1 change: 1 addition & 0 deletions sys/fs/pseudofs/pseudofs_vncache.c
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ pfs_vncache_alloc(struct mount *mp, struct vnode **vpp,
*vpp = NULLVP;
return (error);
}
vn_set_state(*vpp, VSTATE_CONSTRUCTED);
retry2:
mtx_lock(&pfs_vncache_mutex);
/*
Expand Down
1 change: 1 addition & 0 deletions sys/fs/smbfs/smbfs_node.c
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ smbfs_node_alloc(struct mount *mp, struct vnode *dvp, const char *dirnm,
free(np, M_SMBNODE);
return (error);
}
vn_set_state(vp, VSTATE_CONSTRUCTED);
error = vfs_hash_insert(vp, smbfs_hash(name, nmlen), LK_EXCLUSIVE,
td, &vp2, smbfs_vnode_cmp, &sc);
if (error)
Expand Down
2 changes: 2 additions & 0 deletions sys/fs/tmpfs/tmpfs_subr.c
Original file line number Diff line number Diff line change
Expand Up @@ -1080,6 +1080,8 @@ tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag,
vgone(vp);
vput(vp);
vp = NULL;
} else {
vn_set_state(vp, VSTATE_CONSTRUCTED);
}

unlock:
Expand Down
1 change: 1 addition & 0 deletions sys/fs/udf/udf_vfsops.c
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,7 @@ udf_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
if (ino == udf_getid(&udfmp->root_icb))
vp->v_vflag |= VV_ROOT;

vn_set_state(vp, VSTATE_CONSTRUCTED);
*vpp = vp;

return (0);
Expand Down
2 changes: 2 additions & 0 deletions sys/fs/unionfs/union_subr.c
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,8 @@ unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
return (ENOENT);
}

vn_set_state(vp, VSTATE_CONSTRUCTED);

if (dvp != NULLVP && vt == VDIR)
*vpp = unionfs_ins_cached_vnode(unp, dvp);
if (*vpp != NULLVP) {
Expand Down
1 change: 1 addition & 0 deletions sys/kern/vfs_lookup.c
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ nameiinit(void *dummy __unused)
UMA_ALIGN_PTR, 0);
vfs_vector_op_register(&crossmp_vnodeops);
getnewvnode("crossmp", NULL, &crossmp_vnodeops, &vp_crossmp);
vp_crossmp->v_state = VSTATE_CONSTRUCTED;
}
SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nameiinit, NULL);

Expand Down
76 changes: 73 additions & 3 deletions sys/kern/vfs_subr.c
Original file line number Diff line number Diff line change
Expand Up @@ -607,6 +607,8 @@ vnode_init(void *mem, int size, int flags)

vp->v_dbatchcpu = NOCPU;

vp->v_state = VSTATE_DEAD;

/*
* Check vhold_recycle_free for an explanation.
*/
Expand Down Expand Up @@ -1796,6 +1798,9 @@ getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops,
vp = vn_alloc(mp);
}
counter_u64_add(vnodes_created, 1);

vn_set_state(vp, VSTATE_UNINITIALIZED);

/*
* Locks are given the generic name "vnode" when created.
* Follow the historic practice of using the filesystem
Expand Down Expand Up @@ -4019,14 +4024,18 @@ vgonel(struct vnode *vp)
/*
* Don't vgonel if we're already doomed.
*/
if (VN_IS_DOOMED(vp))
if (VN_IS_DOOMED(vp)) {
VNPASS(vn_get_state(vp) == VSTATE_DESTROYING || \
vn_get_state(vp) == VSTATE_DEAD, vp);
return;
}
/*
* Paired with freevnode.
*/
vn_seqc_write_begin_locked(vp);
vunlazy_gone(vp);
vn_irflag_set_locked(vp, VIRF_DOOMED);
vn_set_state(vp, VSTATE_DESTROYING);

/*
* Check to see if the vnode is in use. If so, we have to
Expand Down Expand Up @@ -4144,6 +4153,7 @@ vgonel(struct vnode *vp)
vp->v_vnlock = &vp->v_lock;
vp->v_op = &dead_vnodeops;
vp->v_type = VBAD;
vn_set_state(vp, VSTATE_DEAD);
}

/*
Expand All @@ -4164,6 +4174,15 @@ static const char *const vtypename[] = {
_Static_assert(nitems(vtypename) == VLASTTYPE + 1,
"vnode type name not added to vtypename");

static const char *const vstatename[] = {
[VSTATE_UNINITIALIZED] = "VSTATE_UNINITIALIZED",
[VSTATE_CONSTRUCTED] = "VSTATE_CONSTRUCTED",
[VSTATE_DESTROYING] = "VSTATE_DESTROYING",
[VSTATE_DEAD] = "VSTATE_DEAD",
};
_Static_assert(nitems(vstatename) == VLASTSTATE + 1,
"vnode state name not added to vstatename");

_Static_assert((VHOLD_ALL_FLAGS & ~VHOLD_NO_SMR) == 0,
"new hold count flag not added to vn_printf");

Expand All @@ -4180,7 +4199,7 @@ vn_printf(struct vnode *vp, const char *fmt, ...)
vprintf(fmt, ap);
va_end(ap);
printf("%p: ", (void *)vp);
printf("type %s\n", vtypename[vp->v_type]);
printf("type %s state %s\n", vtypename[vp->v_type], vstatename[vp->v_state]);
holdcnt = atomic_load_int(&vp->v_holdcnt);
printf(" usecount %d, writecount %d, refcount %d seqc users %d",
vp->v_usecount, vp->v_writecount, holdcnt & ~VHOLD_ALL_FLAGS,
Expand Down Expand Up @@ -5112,6 +5131,7 @@ vfs_allocate_syncvnode(struct mount *mp)
if (error != 0)
panic("vfs_allocate_syncvnode: insmntque() failed");
vp->v_vflag &= ~VV_FORCEINSMQ;
vn_set_state(vp, VSTATE_CONSTRUCTED);
VOP_UNLOCK(vp);
/*
* Place the vnode onto the syncer worklist. We attempt to
Expand Down Expand Up @@ -5758,8 +5778,10 @@ void
vop_unlock_debugpre(void *ap)
{
struct vop_unlock_args *a = ap;
struct vnode *vp = a->a_vp;

ASSERT_VOP_LOCKED(a->a_vp, "VOP_UNLOCK");
VNPASS(vn_get_state(vp) != VSTATE_UNINITIALIZED, vp);
ASSERT_VOP_LOCKED(vp, "VOP_UNLOCK");
}

void
Expand Down Expand Up @@ -7130,6 +7152,54 @@ vn_irflag_unset(struct vnode *vp, short tounset)
vn_irflag_unset_locked(vp, tounset);
VI_UNLOCK(vp);
}

#ifdef INVARIANTS
void
vn_set_state_validate(struct vnode *vp, enum vstate state)
{

switch (vp->v_state) {
case VSTATE_UNINITIALIZED:
switch (state) {
case VSTATE_CONSTRUCTED:
case VSTATE_DESTROYING:
return;
default:
break;
}
break;
case VSTATE_CONSTRUCTED:
ASSERT_VOP_ELOCKED(vp, __func__);
switch (state) {
case VSTATE_DESTROYING:
return;
default:
break;
}
break;
case VSTATE_DESTROYING:
ASSERT_VOP_ELOCKED(vp, __func__);
switch (state) {
case VSTATE_DEAD:
return;
default:
break;
}
break;
case VSTATE_DEAD:
switch (state) {
case VSTATE_UNINITIALIZED:
return;
default:
break;
}
break;
}

vn_printf(vp, "invalid state transition %d -> %d\n", vp->v_state, state);
panic("invalid state transition %d -> %d\n", vp->v_state, state);
}
#endif
// CHERI CHANGES START
// {
// "updated": 20221205,
Expand Down
2 changes: 1 addition & 1 deletion sys/sys/param.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
* cannot include sys/param.h and should only be updated here.
*/
#undef __FreeBSD_version
#define __FreeBSD_version 1400076
#define __FreeBSD_version 1400077

/*
* __CheriBSD_version numbers describe CheriBSD ABIs.
Expand Down
24 changes: 24 additions & 0 deletions sys/sys/vnode.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD,
VMARKER };
#define VLASTTYPE VMARKER

enum vstate { VSTATE_UNINITIALIZED, VSTATE_CONSTRUCTED, VSTATE_DESTROYING,
VSTATE_DEAD };
#define VLASTSTATE VSTATE_DEAD

enum vgetstate { VGET_NONE, VGET_HOLDCNT, VGET_USECOUNT };
/*
* Each underlying filesystem allocates its own private area and hangs
Expand Down Expand Up @@ -107,6 +111,7 @@ struct vnode {
* owned by the filesystem (XXX: and vgone() ?)
*/
enum vtype v_type:8; /* u vnode type */
enum vstate v_state:8; /* u vnode state */
short v_irflag; /* i frequently read flags */
seqc_t v_seqc; /* i modification count */
uint32_t v_nchash; /* u namecache hash */
Expand Down Expand Up @@ -1139,6 +1144,25 @@ void vn_fsid(struct vnode *vp, struct vattr *va);
int vn_dir_check_exec(struct vnode *vp, struct componentname *cnp);
int vn_lktype_write(struct mount *mp, struct vnode *vp);

#ifdef INVARIANTS
void vn_set_state_validate(struct vnode *vp, enum vstate state);
#endif

static inline void
vn_set_state(struct vnode *vp, enum vstate state)
{
#ifdef INVARIANTS
vn_set_state_validate(vp, state);
#endif
vp->v_state = state;
}

static inline enum vstate
vn_get_state(struct vnode *vp)
{
return (vp->v_state);
}

#define VOP_UNLOCK_FLAGS(vp, flags) ({ \
struct vnode *_vp = (vp); \
int _flags = (flags); \
Expand Down
1 change: 1 addition & 0 deletions sys/ufs/ffs/ffs_vfsops.c
Original file line number Diff line number Diff line change
Expand Up @@ -2026,6 +2026,7 @@ ffs_vgetf(struct mount *mp,
}
#endif

vn_set_state(vp, VSTATE_CONSTRUCTED);
*vpp = vp;
return (0);
}
Expand Down

0 comments on commit 9e9fbe5

Please sign in to comment.