Skip to content

Commit

Permalink
Introduce vnode v_parent and calls
Browse files Browse the repository at this point in the history
Add vnode_parent() vnode_setparent() to have easiler access
to the parent vnode. This is something XNU does internally
to VFS, but Windows port initially skipped.
Some care is needed with symlinks, as the parent can change
depending on how it was opened.

Potentially the real parent needs to be stored in zccb, due
to how Windows handles things.

This changes a lot and is likely to be a bit unstable.
Most noticably, we now actually call zfs_reclaim()
on deleted files, and reclaim the diskspace they took up.

Quite a number of vnode iocount leaks were also corrected.

Additional illegal stream names, rename name and characters
are also checked for now. This is still incomplete.

Signed-off-by: Jorgen Lundman <lundman@lundman.net>
  • Loading branch information
lundman committed Jul 5, 2023
1 parent 2409c5a commit 7983980
Show file tree
Hide file tree
Showing 6 changed files with 355 additions and 144 deletions.
15 changes: 10 additions & 5 deletions include/os/windows/spl/sys/vnode.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,15 +87,18 @@ struct vnode {
kmutex_t v_mutex;

mount_t *v_mount;
struct vnode *v_parent;
void *v_data;
REPARSE_DATA_BUFFER *v_reparse;
SECURITY_DESCRIPTOR *security_descriptor;

uint32_t v_flags;
uint32_t v_iocount; // Short term holds
uint32_t v_usecount; // Long term holds
uint32_t v_type;
uint32_t v_unlink;
REPARSE_DATA_BUFFER *v_reparse;
size_t v_reparse_size;
uint32_t v_unused;
void *v_data;
size_t v_reparse_size;
uint64_t v_id;
uint64_t v_easize;
hrtime_t v_age; // How long since entered DEAD
Expand All @@ -105,7 +108,6 @@ struct vnode {
ERESOURCE resource; // Holder for FileHeader.Resource
ERESOURCE pageio_resource; // Holder for FileHeader.PageIoResource
FILE_LOCK lock;
SECURITY_DESCRIPTOR *security_descriptor;
SHARE_ACCESS share_access;

list_node_t v_list; // vnode_all_list member node.
Expand Down Expand Up @@ -486,12 +488,15 @@ int vnode_recycle(vnode_t *vp);
int vnode_isvroot(vnode_t *vp);
mount_t *vnode_mount(vnode_t *vp);
void vnode_clearfsnode(vnode_t *vp);
void vnode_create(mount_t *, void *v_data, int type, int flags,
void vnode_create(mount_t *, struct vnode *,
void *v_data, int type, int flags,
struct vnode **vpp);
int vnode_ref(vnode_t *vp);
void vnode_rele(vnode_t *vp);
void *vnode_sectionpointer(vnode_t *vp);
void *vnode_security(vnode_t *vp);
vnode_t *vnode_parent(vnode_t *vp);
void vnode_setparent(vnode_t *vp, vnode_t *dvp);
void vnode_setsecurity(vnode_t *vp, void *sd);
void vnode_couplefileobject(vnode_t *vp, FILE_OBJECT *fileobject,
uint64_t size);
Expand Down
175 changes: 128 additions & 47 deletions module/os/windows/spl/spl-vnode.c
Original file line number Diff line number Diff line change
Expand Up @@ -952,6 +952,46 @@ vnode_iocount(vnode_t *vp)
return (vp->v_iocount);
}

vnode_t *
vnode_parent(vnode_t *vp)
{
VERIFY3P(vp->v_parent, !=, NULL);
return (vp->v_parent);
}

/*
* Update a vnode's parent, this is typically not done
* by the FS, except after rename operation when there
* might be a new parent.
* We do not expect newparent to be NULL here, as you
* can not become root. If we need that, we should
* implement pivot_root()
*/
void
vnode_setparent(vnode_t *vp, vnode_t *newparent)
{
int error;
struct vnode *oldparent;

VERIFY((vp->v_parent != NULL) || (vp->v_flags & VNODE_MARKROOT));
VERIFY3P(newparent, !=, NULL);

oldparent = vp->v_parent;
if (oldparent == newparent)
return;

vnode_ref(newparent);
vp->v_parent = newparent;

// Try holding it, so we call vnode_put()
if (oldparent != NULL) {
error = VN_HOLD(oldparent);
vnode_rele(oldparent);
if (!error)
vnode_put(oldparent);
}
}

#ifdef DEBUG_IOCOUNT
int
vnode_getwithref(vnode_t *vp, char *file, int line)
Expand Down Expand Up @@ -1043,8 +1083,6 @@ int
vnode_put(vnode_t *vp)
#endif
{
// KIRQL OldIrql;
int calldrain = 0;
ASSERT(!(vp->v_flags & VNODE_DEAD));
ASSERT(vp->v_iocount > 0);
ASSERT((vp->v_flags & ~VNODE_VALIDBITS) == 0);
Expand All @@ -1063,38 +1101,29 @@ vnode_put(vnode_t *vp)
// Now idle?
mutex_enter(&vp->v_mutex);

if (vp->v_iocount == 0) {

if (vp->v_usecount == 0)
calldrain = 1;

if (vp->v_flags & VNODE_NEEDINACTIVE) {
vp->v_flags &= ~VNODE_NEEDINACTIVE;
mutex_exit(&vp->v_mutex);
zfs_inactive(vp, NULL, NULL);
mutex_enter(&vp->v_mutex);
}
if ((vp->v_usecount == 0) && (vp->v_iocount == 0)) {
// XNU always calls inactive in vnode_put
vp->v_flags &= ~VNODE_NEEDINACTIVE;
mutex_exit(&vp->v_mutex);
zfs_inactive(vp, NULL, NULL);
mutex_enter(&vp->v_mutex);
}

vp->v_flags &= ~VNODE_NEEDINACTIVE;

#if 0
#if 1
// Re-test for idle, as we may have dropped lock for inactive
if ((vp->v_usecount == 0) && (vp->v_iocount == 0)) {
// Was it marked TERM, but we were waiting for last ref
if ((vp->v_flags & VNODE_MARKTERM)) {
KeReleaseSpinLock(&vp->v_spinlock, OldIrql);
vnode_recycle_int(vp, 0); // OldIrql is lost!
if ((vp->v_flags & (VNODE_MARKTERM | VNODE_DEAD)) ==
VNODE_MARKTERM) {
vnode_recycle_int(vp, VNODELOCKED);
return (0);
}
}
#endif
mutex_exit(&vp->v_mutex);

// Temporarily - should perhaps be own thread?
// if (calldrain)
// vnode_drain_delayclose(0);

return (0);
}

Expand All @@ -1104,17 +1133,50 @@ vnode_recycle_int(vnode_t *vp, int flags)
// KIRQL OldIrql;
ASSERT((vp->v_flags & VNODE_DEAD) == 0);

// Already locked calling in...
if (!(flags & VNODELOCKED)) {
mutex_enter(&vp->v_mutex);
}

// Mark it for recycle, if we are not ROOT.
if (!(vp->v_flags&VNODE_MARKROOT)) {
if (vp->v_flags & VNODE_MARKTERM)

if (vp->v_flags & VNODE_MARKTERM) {
dprintf("already marked\n");
vp->v_flags |= VNODE_MARKTERM; // Mark it terminating
dprintf("%s: marking %p VNODE_MARKTERM\n", __func__, vp);
}
} else {
vp->v_flags |= VNODE_MARKTERM; // Mark it terminating
dprintf("%s: marking %p VNODE_MARKTERM\n",
__func__, vp);

// Already locked calling in...
if (!(flags & VNODELOCKED)) {
mutex_enter(&vp->v_mutex);
// Call inactive?
mutex_exit(&vp->v_mutex);
if (vp->v_flags & VNODE_NEEDINACTIVE) {
vp->v_flags &= ~VNODE_NEEDINACTIVE;
zfs_inactive(vp, NULL, NULL);
VERIFY3U(vp->v_iocount, ==, 1);
}

// Call sync? If vnode_write
// zfs_fsync(vp, 0, NULL, NULL);

// Call reclaim and Tell FS to release node.
if (vp->v_data != NULL)
if (zfs_vnop_reclaim(vp))
panic("vnode_recycle: cannot reclaim\n");

// Remove parent hold.
VERIFY((vp->v_parent != NULL) ||
(vp->v_flags & VNODE_MARKROOT));
// hold iocount cos of ASSERT in vnode_rele
if ((vp->v_parent != NULL) &&
(vnode_getwithref(vp->v_parent) == 0)) {
vnode_rele(vp->v_parent);
vnode_put(vp->v_parent);
}
vp->v_parent = NULL;

mutex_enter(&vp->v_mutex);
}
}

// Doublecheck CcMgr is gone (should be if avl is empty)
Expand All @@ -1131,10 +1193,11 @@ vnode_recycle_int(vnode_t *vp, int flags)
#endif

// We will only reclaim idle nodes, and not mountpoints(ROOT)
// lets try letting zfs reclaim, then linger nodes.
if ((flags & FORCECLOSE) ||
((vp->v_usecount == 0) &&
(vp->v_iocount <= 1) &&
avl_is_empty(&vp->v_fileobjects) &&
/* avl_is_empty(&vp->v_fileobjects) && */
((vp->v_flags&VNODE_MARKROOT) == 0))) {

ASSERT3P(vp->SectionObjectPointers.DataSectionObject, ==, NULL);
Expand All @@ -1145,32 +1208,27 @@ vnode_recycle_int(vnode_t *vp, int flags)
vp->v_flags |= VNODE_DEAD; // Mark it dead
// Since we might get swapped out (noticably FsRtlTeardownPerStreamContexts)
// we hold a look until the very end.
vp->v_iocount = 1;
atomic_inc_32(&vp->v_iocount);

mutex_exit(&vp->v_mutex);

FsRtlTeardownPerStreamContexts(&vp->FileHeader);
FsRtlUninitializeFileLock(&vp->lock);

// Call sync? If vnode_write
// zfs_fsync(vp, 0, NULL, NULL);

// Call inactive?
if (vp->v_flags & VNODE_NEEDINACTIVE) {
vp->v_flags &= ~VNODE_NEEDINACTIVE;
zfs_inactive(vp, NULL, NULL);
}


// Tell FS to release node.
if (zfs_vnop_reclaim(vp))
panic("vnode_recycle: cannot reclaim\n");

// KIRQL OldIrql;
mutex_enter(&vp->v_mutex);

dprintf("Dropping %d references",
avl_numnodes(&vp->v_fileobjects));
vnode_fileobjects_t *node;
while (node = avl_first(&vp->v_fileobjects)) {
avl_remove(&vp->v_fileobjects, node);
kmem_free(node, sizeof (*node));
}
ASSERT(avl_is_empty(&vp->v_fileobjects));
// We are all done with it.
vp->v_iocount = 0;
VERIFY3U(vp->v_iocount, ==, 1);
atomic_dec_32(&vp->v_iocount);
mutex_exit(&vp->v_mutex);

#ifdef FIND_MAF
Expand Down Expand Up @@ -1229,7 +1287,7 @@ vp_oplock(struct vnode *vp)
}

void
vnode_create(mount_t *mp, void *v_data, int type, int flags,
vnode_create(mount_t *mp, struct vnode *dvp, void *v_data, int type, int flags,
struct vnode **vpp)
{
struct vnode *vp;
Expand All @@ -1240,6 +1298,7 @@ vnode_create(mount_t *mp, void *v_data, int type, int flags,
*vpp = vp;
vp->v_flags = 0;
vp->v_mount = mp;
vp->v_parent = dvp;
vp->v_data = v_data;
vp->v_type = type;
vp->v_id = atomic_inc_64_nv(&(vnode_vid_counter));
Expand All @@ -1257,6 +1316,10 @@ vnode_create(mount_t *mp, void *v_data, int type, int flags,
if (flags & VNODE_MARKROOT)
vp->v_flags |= VNODE_MARKROOT;

// Hold parent reference
VERIFY((dvp != NULL) || (vp->v_flags&VNODE_MARKROOT));
if (dvp != NULL)
vnode_ref(dvp);

// Initialise the Windows specific data.
memset(&vp->SectionObjectPointers, 0,
Expand Down Expand Up @@ -1430,6 +1493,13 @@ vnode_drain_delayclose(int force)

// dprintf("age is %llu %d\n", (curtime - vp->v_age),
// NSEC2SEC(curtime - vp->v_age));
dprintf("Dropping %d references 2",
avl_numnodes(&vp->v_fileobjects));
vnode_fileobjects_t *node;
while (node = avl_first(&vp->v_fileobjects)) {
avl_remove(&vp->v_fileobjects, node);
kmem_free(node, sizeof (*node));
}

// Finally free vp.
list_remove(&vnode_all_list, vp);
Expand Down Expand Up @@ -1497,10 +1567,13 @@ vflush(struct mount *mp, struct vnode *skipvp, int flags)
vnode_fileobjects_t *node;
struct vnode *rvp;
int Status;
boolean_t filesonly = B_TRUE;

dprintf("vflush start\n");

mutex_enter(&vnode_all_list_lock);

filesanddirs:
while (1) {
for (rvp = list_head(&vnode_all_list);
rvp;
Expand All @@ -1510,6 +1583,9 @@ vflush(struct mount *mp, struct vnode *skipvp, int flags)
if (mp && rvp->v_mount != mp)
continue;

if (filesonly && vnode_isdir(rvp))
continue;

// If we aren't FORCE and asked to SKIPROOT, and node
// is MARKROOT, then go to next.
if (!(flags & FORCECLOSE))
Expand Down Expand Up @@ -1603,6 +1679,11 @@ vflush(struct mount *mp, struct vnode *skipvp, int flags)
break;
}

if (filesonly) {
filesonly = B_FALSE;
goto filesanddirs;
}

mutex_exit(&vnode_all_list_lock);

if (mp == NULL && reclaims > 0) {
Expand Down Expand Up @@ -1644,7 +1725,7 @@ vflush(struct mount *mp, struct vnode *skipvp, int flags)
if (FORCECLOSE)
vnode_drain_delayclose(1);

xprintf("vflush end: deadlisted %d nodes\n", deadlist);
dprintf("vflush end: deadlisted %d nodes\n", deadlist);

return (0);
}
Expand Down
27 changes: 26 additions & 1 deletion module/os/windows/zfs/zfs_ctldir.c
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ struct zfsctl_unmount_delay {
};
typedef struct zfsctl_unmount_delay zfsctl_unmount_delay_t;

static struct vnode *
zfsctl_vnode_lookup(zfsvfs_t *zfsvfs, uint64_t id,
char *name);

/*
* Check if the given vnode is a part of the virtual .zfs directory.
Expand Down Expand Up @@ -233,8 +236,30 @@ zfsctl_vnode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
* Because of this, we need to call vnode_recycle() ourselves in destroy
*/

/* We need parent */
znode_t *parentzp = NULL;
struct vnode *parentvp = NULL;
int error = 0;
if (id == ZFSCTL_INO_ROOT)
error = zfs_zget(zfsvfs, zfsvfs->z_root, &parentzp);
else if (id == ZFSCTL_INO_SNAPDIR)
parentvp = zfsctl_root(zp);
else
parentvp = zfsctl_vnode_lookup(zfsvfs, ZFSCTL_INO_SNAPDIR,
ZFS_SNAPDIR_NAME);

if (error && !parentvp) {
dprintf("%s: unable to get parent?", __func__);
return (SET_ERROR(EINVAL));
}

if (!parentvp && parentzp)
parentvp = ZTOV(parentzp);

vnode_create(zfsvfs->z_vfs, parentvp,
zp, VDIR, flags, &vp);

vnode_create(zfsvfs->z_vfs, zp, VDIR, flags, &vp);
VN_RELE(parentvp);

dprintf("Assigned zp %p with vp %p zfsvfs %p\n", zp, vp, zp->z_zfsvfs);

Expand Down
Loading

0 comments on commit 7983980

Please sign in to comment.