diff --git a/include/sys/zvol_impl.h b/include/sys/zvol_impl.h index 6c15c84b6bf4..3cd0d78c353d 100644 --- a/include/sys/zvol_impl.h +++ b/include/sys/zvol_impl.h @@ -18,22 +18,19 @@ * * CDDL HEADER END */ +/* + * Copyright (c) 2024, Klara, Inc. + */ #ifndef _SYS_ZVOL_IMPL_H #define _SYS_ZVOL_IMPL_H #include -#define ZVOL_RDONLY 0x1 -/* - * Whether the zvol has been written to (as opposed to ZVOL_RDONLY, which - * specifies whether or not the zvol _can_ be written to) - */ -#define ZVOL_WRITTEN_TO 0x2 - -#define ZVOL_DUMPIFIED 0x4 - -#define ZVOL_EXCL 0x8 +#define ZVOL_RDONLY (1<<0) /* zvol is readonly (writes rejected) */ +#define ZVOL_WRITTEN_TO (1<<1) /* zvol has been written to (needs flush) */ +#define ZVOL_EXCL (1<<2) /* zvol has O_EXCL client right now */ +#define ZVOL_REMOVING (1<<3) /* zvol waiting to remove minor */ /* * The in-core state of each volume. @@ -57,6 +54,7 @@ typedef struct zvol_state { kmutex_t zv_state_lock; /* protects zvol_state_t */ atomic_t zv_suspend_ref; /* refcount for suspend */ krwlock_t zv_suspend_lock; /* suspend lock */ + kcondvar_t zv_removing_cv; /* ready to remove minor */ struct zvol_state_os *zv_zso; /* private platform state */ boolean_t zv_threading; /* volthreading property */ } zvol_state_t; diff --git a/module/os/freebsd/zfs/zvol_os.c b/module/os/freebsd/zfs/zvol_os.c index 38e9debbe877..0d526484cec3 100644 --- a/module/os/freebsd/zfs/zvol_os.c +++ b/module/os/freebsd/zfs/zvol_os.c @@ -30,6 +30,7 @@ * Copyright (c) 2012, 2017 by Delphix. All rights reserved. * Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright (c) 2014 Integros [integros.com] + * Copyright (c) 2024, Klara, Inc. */ /* Portions Copyright 2011 Martin Matuska */ @@ -250,7 +251,7 @@ zvol_geom_open(struct g_provider *pp, int flag, int count) } mutex_enter(&zv->zv_state_lock); - if (zv->zv_zso->zso_dying) { + if (zv->zv_zso->zso_dying || zv->zv_flags & ZVOL_REMOVING) { rw_exit(&zvol_state_lock); err = SET_ERROR(ENXIO); goto out_zv_locked; @@ -683,6 +684,11 @@ zvol_geom_bio_strategy(struct bio *bp) rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); + if (zv->zv_flags & ZVOL_REMOVING) { + error = SET_ERROR(ENXIO); + goto resume; + } + switch (bp->bio_cmd) { case BIO_READ: doread = B_TRUE; @@ -1362,6 +1368,7 @@ zvol_os_free(zvol_state_t *zv) } mutex_destroy(&zv->zv_state_lock); + cv_destroy(&zv->zv_removing_cv); dataset_kstats_destroy(&zv->zv_kstat); kmem_free(zv->zv_zso, sizeof (struct zvol_state_os)); kmem_free(zv, sizeof (zvol_state_t)); @@ -1419,6 +1426,7 @@ zvol_os_create_minor(const char *name) zv = kmem_zalloc(sizeof (*zv), KM_SLEEP); zv->zv_hash = hash; mutex_init(&zv->zv_state_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&zv->zv_removing_cv, NULL, CV_DEFAULT, NULL); zv->zv_zso = kmem_zalloc(sizeof (struct zvol_state_os), KM_SLEEP); zv->zv_volmode = volmode; if (zv->zv_volmode == ZFS_VOLMODE_GEOM) { diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c index c01caa6da8b4..83f80f62aee7 100644 --- a/module/os/linux/zfs/zvol_os.c +++ b/module/os/linux/zfs/zvol_os.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2012, 2020 by Delphix. All rights reserved. + * Copyright (c) 2024, Klara, Inc. */ #include @@ -526,6 +527,11 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, uint64_t size = io_size(bio, rq); int rw = io_data_dir(bio, rq); + if (unlikely(zv->zv_flags & ZVOL_REMOVING)) { + END_IO(zv, bio, rq, -SET_ERROR(ENXIO)); + goto out; + } + if (zvol_request_sync || zv->zv_threading == B_FALSE) force_sync = 1; @@ -730,10 +736,17 @@ zvol_open(struct block_device *bdev, fmode_t flag) #endif if (zv == NULL) { rw_exit(&zvol_state_lock); - return (SET_ERROR(-ENXIO)); + return (-SET_ERROR(ENXIO)); } mutex_enter(&zv->zv_state_lock); + + if (unlikely(zv->zv_flags & ZVOL_REMOVING)) { + mutex_exit(&zv->zv_state_lock); + rw_exit(&zvol_state_lock); + return (-SET_ERROR(ENXIO)); + } + /* * Make sure zvol is not suspended during first open * (hold zv_suspend_lock) and respect proper lock acquisition @@ -795,10 +808,10 @@ zvol_open(struct block_device *bdev, fmode_t flag) #ifdef HAVE_BLKDEV_GET_ERESTARTSYS schedule(); - return (SET_ERROR(-ERESTARTSYS)); + return (-SET_ERROR(ERESTARTSYS)); #else if ((gethrtime() - start) > timeout) - return (SET_ERROR(-ERESTARTSYS)); + return (-SET_ERROR(ERESTARTSYS)); schedule_timeout_interruptible( MSEC_TO_TICK(10)); @@ -821,7 +834,7 @@ zvol_open(struct block_device *bdev, fmode_t flag) if (zv->zv_open_count == 0) zvol_last_close(zv); - error = SET_ERROR(-EROFS); + error = -SET_ERROR(EROFS); } else { zv->zv_open_count++; } @@ -1313,6 +1326,7 @@ zvol_alloc(dev_t dev, const char *name) list_link_init(&zv->zv_next); mutex_init(&zv->zv_state_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&zv->zv_removing_cv, NULL, CV_DEFAULT, NULL); #ifdef HAVE_BLK_MQ zv->zv_zso->use_blk_mq = zvol_use_blk_mq; @@ -1438,6 +1452,7 @@ zvol_os_free(zvol_state_t *zv) ida_simple_remove(&zvol_ida, MINOR(zv->zv_zso->zvo_dev) >> ZVOL_MINOR_BITS); + cv_destroy(&zv->zv_removing_cv); mutex_destroy(&zv->zv_state_lock); dataset_kstats_destroy(&zv->zv_kstat); diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 5b6a3f5cb410..001f774a6d16 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -37,6 +37,7 @@ * Copyright 2014 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2016 Actifio, Inc. All rights reserved. * Copyright (c) 2012, 2019 by Delphix. All rights reserved. + * Copyright (c) 2024, Klara, Inc. */ /* @@ -894,6 +895,9 @@ zvol_resume(zvol_state_t *zv) */ atomic_dec(&zv->zv_suspend_ref); + if (zv->zv_flags & ZVOL_REMOVING) + cv_broadcast(&zv->zv_removing_cv); + return (SET_ERROR(error)); } @@ -929,6 +933,9 @@ zvol_last_close(zvol_state_t *zv) ASSERT(RW_READ_HELD(&zv->zv_suspend_lock)); ASSERT(MUTEX_HELD(&zv->zv_state_lock)); + if (zv->zv_flags & ZVOL_REMOVING) + cv_broadcast(&zv->zv_removing_cv); + zvol_shutdown_zv(zv); dmu_objset_disown(zv->zv_objset, 1, zv); @@ -1221,6 +1228,41 @@ zvol_create_minor(const char *name) * Remove minors for specified dataset including children and snapshots. */ +/* + * Remove the minor for a given zvol. This will do it all: + * - flag the zvol for removal, so new requests are rejected + * - wait until outstanding requests are completed + * - remove it from lists + * - free it + * It's also usable as a taskq task, and smells nice too. + */ +static void +zvol_remove_minor_task(void *arg) +{ + zvol_state_t *zv = (zvol_state_t *)arg; + + ASSERT(!RW_LOCK_HELD(&zvol_state_lock)); + ASSERT(!MUTEX_HELD(&zv->zv_state_lock)); + + mutex_enter(&zv->zv_state_lock); + while (zv->zv_open_count > 0 || atomic_read(&zv->zv_suspend_ref)) { + zv->zv_flags |= ZVOL_REMOVING; + cv_wait(&zv->zv_removing_cv, &zv->zv_state_lock); + } + mutex_exit(&zv->zv_state_lock); + + rw_enter(&zvol_state_lock, RW_WRITER); + mutex_enter(&zv->zv_state_lock); + + zvol_remove(zv); + zvol_os_clear_private(zv); + + mutex_exit(&zv->zv_state_lock); + rw_exit(&zvol_state_lock); + + zvol_os_free(zv); +} + static void zvol_free_task(void *arg) { @@ -1233,11 +1275,13 @@ zvol_remove_minors_impl(const char *name) zvol_state_t *zv, *zv_next; int namelen = ((name) ? strlen(name) : 0); taskqid_t t; - list_t free_list; + list_t delay_list, free_list; if (zvol_inhibit_dev) return; + list_create(&delay_list, sizeof (zvol_state_t), + offsetof(zvol_state_t, zv_next)); list_create(&free_list, sizeof (zvol_state_t), offsetof(zvol_state_t, zv_next)); @@ -1256,9 +1300,24 @@ zvol_remove_minors_impl(const char *name) * one is currently using this zv */ - /* If in use, leave alone */ + /* + * If in use, try to throw everyone off and try again + * later. + */ if (zv->zv_open_count > 0 || atomic_read(&zv->zv_suspend_ref)) { + zv->zv_flags |= ZVOL_REMOVING; + t = taskq_dispatch( + zv->zv_objset->os_spa->spa_zvol_taskq, + zvol_remove_minor_task, zv, TQ_SLEEP); + if (t == TASKQID_INVALID) { + /* + * Couldn't create the task, so we'll + * do it in place once the loop is + * finished. + */ + list_insert_head(&delay_list, zv); + } mutex_exit(&zv->zv_state_lock); continue; } @@ -1285,7 +1344,11 @@ zvol_remove_minors_impl(const char *name) } rw_exit(&zvol_state_lock); - /* Drop zvol_state_lock before calling zvol_free() */ + /* Wait for zvols that we couldn't create a remove task for */ + while ((zv = list_remove_head(&delay_list)) != NULL) + zvol_remove_minor_task(zv); + + /* Free any that we couldn't free in parallel earlier */ while ((zv = list_remove_head(&free_list)) != NULL) zvol_os_free(zv); } @@ -1305,33 +1368,38 @@ zvol_remove_minor_impl(const char *name) zv_next = list_next(&zvol_state_list, zv); mutex_enter(&zv->zv_state_lock); - if (strcmp(zv->zv_name, name) == 0) { - /* - * By holding zv_state_lock here, we guarantee that no - * one is currently using this zv - */ + if (strcmp(zv->zv_name, name) == 0) + /* Found, leave the the loop with zv_lock held */ + break; + mutex_exit(&zv->zv_state_lock); + } - /* If in use, leave alone */ - if (zv->zv_open_count > 0 || - atomic_read(&zv->zv_suspend_ref)) { - mutex_exit(&zv->zv_state_lock); - continue; - } - zvol_remove(zv); + if (zv == NULL) { + rw_exit(&zvol_state_lock); + return; + } - zvol_os_clear_private(zv); - mutex_exit(&zv->zv_state_lock); - break; - } else { - mutex_exit(&zv->zv_state_lock); - } + ASSERT(MUTEX_HELD(&zv->zv_state_lock)); + + if (zv->zv_open_count > 0 || atomic_read(&zv->zv_suspend_ref)) { + /* + * In use, so try to throw everyone off, then wait + * until finished. + */ + zv->zv_flags |= ZVOL_REMOVING; + mutex_exit(&zv->zv_state_lock); + rw_exit(&zvol_state_lock); + zvol_remove_minor_task(zv); + return; } - /* Drop zvol_state_lock before calling zvol_free() */ + zvol_remove(zv); + zvol_os_clear_private(zv); + + mutex_exit(&zv->zv_state_lock); rw_exit(&zvol_state_lock); - if (zv != NULL) - zvol_os_free(zv); + zvol_os_free(zv); } /* diff --git a/tests/test-runner/bin/zts-report.py.in b/tests/test-runner/bin/zts-report.py.in index de06c7c6e2c1..422c750e2bc6 100755 --- a/tests/test-runner/bin/zts-report.py.in +++ b/tests/test-runner/bin/zts-report.py.in @@ -350,10 +350,6 @@ elif sys.platform.startswith('linux'): 'mmp/mmp_active_import': ['FAIL', known_reason], 'mmp/mmp_exported_import': ['FAIL', known_reason], 'mmp/mmp_inactive_import': ['FAIL', known_reason], - 'zvol/zvol_misc/zvol_misc_fua': ['SKIP', 14872], - 'zvol/zvol_misc/zvol_misc_snapdev': ['FAIL', 12621], - 'zvol/zvol_misc/zvol_misc_trim': ['SKIP', 14872], - 'zvol/zvol_misc/zvol_misc_volmode': ['FAIL', known_reason], }) # Not all Github actions runners have scsi_debug module, so we may skip diff --git a/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh b/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh index 619d8d0e8f07..9ebd5b149118 100755 --- a/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh +++ b/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh @@ -45,15 +45,6 @@ fi if ! is_linux ; then log_unsupported "Only linux supports dd with oflag=dsync for FUA writes" -else - if [[ $(linux_version) -gt $(linux_version "6.2") ]]; then - log_unsupported "Disabled while issue #14872 is being worked" - fi - - # Disabled for the CentOS 9 kernel - if [[ $(linux_version) -eq $(linux_version "5.14") ]]; then - log_unsupported "Disabled while issue #14872 is being worked" - fi fi typeset datafile1="$(mktemp zvol_misc_fua1.XXXXXX)" diff --git a/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh b/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh index c0b191aafd45..47cc42b9be7d 100755 --- a/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh +++ b/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh @@ -44,15 +44,6 @@ verify_runnable "global" if is_linux ; then - if [[ $(linux_version) -gt $(linux_version "6.2") ]]; then - log_unsupported "Disabled while issue #14872 is being worked" - fi - - # Disabled for the CentOS 9 kernel - if [[ $(linux_version) -eq $(linux_version "5.14") ]]; then - log_unsupported "Disabled while issue #14872 is being worked" - fi - # We need '--force' here since the prior tests may leave a filesystem # on the zvol, and blkdiscard will see that filesystem and print a # warning unless you force it.