Skip to content

Commit

Permalink
Improved zpool status output, list all affected datasets
Browse files Browse the repository at this point in the history
Currently, determining which datasets are affected by corruption is
a manual process.

The primary difficulty in reporting the list of affected snapshots is
that since the error was initially found, the snapshot where the error
originally occurred in, may have been deleted. To solve this issue, we
add the ID of the head dataset of the original snapshot which the error
was detected in, to the stored error report. Then any time a filesystem
is deleted, the errors associated with it are deleted as well. Any time
a clone promote occurs, we modify reports associated with the original
head to refer to the new head. The stored error reports are identified
by this head ID, the birth time of the block which the error occurred
in, as well as some information about the error itself are also stored.

Once this information is stored, we can find the set of datasets
affected by an error by walking back the list of snapshots in the given
head until we find one with the appropriate birth txg, and then traverse
through the snapshots of the clone family, terminating a branch if the
block was replaced in a given snapshot. Then we report this information
back to libzfs, and to the zpool status command, where it is displayed
as follows:

 pool: test
 state: ONLINE
status: One or more devices has experienced an error resulting in data
        corruption.  Applications may be affected.
action: Restore the file in question if possible.  Otherwise restore the
        entire pool from backup.
   see: https://openzfs.github.io/openzfs-docs/msg/ZFS-8000-8A
  scan: scrub repaired 0B in 00:00:00 with 800 errors on Fri Dec  3
08:27:57 2021
config:

        NAME        STATE     READ WRITE CKSUM
        test        ONLINE       0     0     0
          sdb       ONLINE       0     0 1.58K

errors: Permanent errors have been detected in the following files:

        test@1:/test.0.0
        /test/test.0.0
        /test/1clone/test.0.0

A new feature flag is introduced to mark the presence of this change, as
well as promotion and backwards compatibility logic. This is an updated
version of #9175. Rebase required fixing the tests, updating the ABI of
libzfs, and updating the man pages.

Signed-off-by: George Amanakis <gamanakis@gmail.com>
  • Loading branch information
gamanakis committed Dec 17, 2021
1 parent 8a02d01 commit a4ff6d1
Show file tree
Hide file tree
Showing 36 changed files with 2,289 additions and 121 deletions.
111 changes: 102 additions & 9 deletions cmd/zpool/zpool_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ get_usage(zpool_help_t idx)
return (gettext("\tinitialize [-c | -s] [-w] <pool> "
"[<device> ...]\n"));
case HELP_SCRUB:
return (gettext("\tscrub [-s | -p] [-w] <pool> ...\n"));
return (gettext("\tscrub [-s | -p] [-w] [-e] <pool> ...\n"));
case HELP_RESILVER:
return (gettext("\tresilver <pool> ...\n"));
case HELP_TRIM:
Expand Down Expand Up @@ -7242,8 +7242,9 @@ wait_callback(zpool_handle_t *zhp, void *data)
}

/*
* zpool scrub [-s | -p] [-w] <pool> ...
* zpool scrub [-s | -p] [-w] [-e] <pool> ...
*
* -e Only scrub blocks in the error log.
* -s Stop. Stops any in-progress scrub.
* -p Pause. Pause in-progress scrub.
* -w Wait. Blocks until scrub has completed.
Expand All @@ -7259,14 +7260,21 @@ zpool_do_scrub(int argc, char **argv)
cb.cb_type = POOL_SCAN_SCRUB;
cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;

boolean_t is_error_scrub = B_FALSE;
boolean_t is_pause = B_FALSE;
boolean_t is_stop = B_FALSE;

/* check options */
while ((c = getopt(argc, argv, "spw")) != -1) {
while ((c = getopt(argc, argv, "spwe")) != -1) {
switch (c) {
case 'e':
is_error_scrub = B_TRUE;
break;
case 's':
cb.cb_type = POOL_SCAN_NONE;
is_stop = B_TRUE;
break;
case 'p':
cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
is_pause = B_TRUE;
break;
case 'w':
wait = B_TRUE;
Expand All @@ -7278,11 +7286,27 @@ zpool_do_scrub(int argc, char **argv)
}
}

if (cb.cb_type == POOL_SCAN_NONE &&
cb.cb_scrub_cmd == POOL_SCRUB_PAUSE) {
(void) fprintf(stderr, gettext("invalid option combination: "
"-s and -p are mutually exclusive\n"));
if (is_pause && is_stop) {
(void) fprintf(stderr, gettext("invalid option "
"combination :-s and -p are mutually exclusive\n"));
usage(B_FALSE);
} else {
if (is_error_scrub) {
cb.cb_type = POOL_ERRORSCRUB;
if (is_pause) {
cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
} else if (is_stop) {
cb.cb_scrub_cmd = POOL_ERRORSCRUB_STOP;
} else {
cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;
}
} else {
if (is_pause) {
cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
} else if (is_stop) {
cb.cb_type = POOL_SCAN_NONE;
}
}
}

if (wait && (cb.cb_type == POOL_SCAN_NONE ||
Expand Down Expand Up @@ -7506,6 +7530,70 @@ secs_to_dhms(uint64_t total, char *buf)
}
}

/*
* Print out detailed error scrub status.
*/
static void
print_err_scrub_status(pool_scan_stat_t *ps)
{
time_t start, end, pause;
uint64_t total_secs_left;
uint64_t secs_left, mins_left, hours_left, days_left;
uint64_t examined, to_be_examined;

if (ps == NULL || ps->pss_error_scrub_func != POOL_ERRORSCRUB) {
return;
}

(void) printf(gettext(" scrub: "));

start = ps->pss_error_scrub_start;
end = ps->pss_error_scrub_end;
pause = ps->pss_pass_error_scrub_pause;
examined = ps->pss_error_scrub_examined;
to_be_examined = ps->pss_error_scrub_to_be_examined;

assert(ps->pss_error_scrub_func == POOL_ERRORSCRUB);

if (ps->pss_error_scrub_state == DSS_FINISHED) {
total_secs_left = end - start;
days_left = total_secs_left / 60 / 60 / 24;
hours_left = (total_secs_left / 60 / 60) % 24;
mins_left = (total_secs_left / 60) % 60;
secs_left = (total_secs_left % 60);

(void) printf(gettext("scrubbed %llu error blocks in %llu days "
"%02llu:%02llu:%02llu on %s"), (u_longlong_t)examined,
(u_longlong_t)days_left, (u_longlong_t)hours_left,
(u_longlong_t)mins_left, (u_longlong_t)secs_left,
ctime(&end));

return;
} else if (ps->pss_error_scrub_state == DSS_CANCELED) {
(void) printf(gettext("error scrub canceled on %s"),
ctime(&end));
return;
}
assert(ps->pss_error_scrub_state == DSS_ERRORSCRUBBING);

/* Error scrub is in progress. */
if (pause == 0) {
(void) printf(gettext("error scrub in progress since %s"),
ctime(&start));
} else {
(void) printf(gettext("error scrub paused since %s"),
ctime(&pause));
(void) printf(gettext("\terror scrub started on %s"),
ctime(&start));
}

double fraction_done = (double)examined / (to_be_examined + examined);
(void) printf(gettext("\t%.2f%% done, issued I/O for %llu error"
" blocks"), 100 * fraction_done, (u_longlong_t)examined);

(void) printf("\n");
}

/*
* Print out detailed scrub status.
*/
Expand Down Expand Up @@ -7831,6 +7919,7 @@ print_scan_status(zpool_handle_t *zhp, nvlist_t *nvroot)
{
uint64_t rebuild_end_time = 0, resilver_end_time = 0;
boolean_t have_resilver = B_FALSE, have_scrub = B_FALSE;
boolean_t have_errorscrub = B_FALSE;
boolean_t active_resilver = B_FALSE;
pool_checkpoint_stat_t *pcs = NULL;
pool_scan_stat_t *ps = NULL;
Expand All @@ -7845,6 +7934,7 @@ print_scan_status(zpool_handle_t *zhp, nvlist_t *nvroot)

have_resilver = (ps->pss_func == POOL_SCAN_RESILVER);
have_scrub = (ps->pss_func == POOL_SCAN_SCRUB);
have_errorscrub = (ps->pss_error_scrub_func == POOL_ERRORSCRUB);
}

boolean_t active_rebuild = check_rebuilding(nvroot, &rebuild_end_time);
Expand All @@ -7853,6 +7943,8 @@ print_scan_status(zpool_handle_t *zhp, nvlist_t *nvroot)
/* Always print the scrub status when available. */
if (have_scrub)
print_scan_scrub_resilver_status(ps);
if (have_errorscrub)
print_err_scrub_status(ps);

/*
* When there is an active resilver or rebuild print its status.
Expand Down Expand Up @@ -8561,6 +8653,7 @@ status_callback(zpool_handle_t *zhp, void *data)

(void) nvlist_lookup_uint64_array(nvroot,
ZPOOL_CONFIG_REMOVAL_STATS, (uint64_t **)&prs, &c);

print_removal_status(zhp, prs);

(void) nvlist_lookup_uint64_array(nvroot,
Expand Down
4 changes: 4 additions & 0 deletions include/libzfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,15 @@ typedef enum zfs_error {
EZFS_THREADCREATEFAILED, /* thread create failed */
EZFS_POSTSPLIT_ONLINE, /* onlining a disk after splitting it */
EZFS_SCRUBBING, /* currently scrubbing */
EZFS_ERRORSCRUBBING, /* currently error scrubbing */
EZFS_ERRORSCRUB_PAUSED, /* error scrub currently paused */
EZFS_NO_SCRUB, /* no active scrub */
EZFS_NO_ERRORSCRUB, /* no active error scrub */
EZFS_DIFF, /* general failure of zfs diff */
EZFS_DIFFDATA, /* bad zfs diff data */
EZFS_POOLREADONLY, /* pool is in read-only mode */
EZFS_SCRUB_PAUSED, /* scrub currently paused */
EZFS_SCRUB_PAUSED_TO_CANCEL, /* scrub currently paused */
EZFS_ACTIVE_POOL, /* pool is imported on a different system */
EZFS_CRYPTOFAILED, /* failed to setup encryption */
EZFS_NO_PENDING, /* cannot cancel, no operation is pending */
Expand Down
2 changes: 2 additions & 0 deletions include/libzfs_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ _LIBZFS_CORE_H int lzc_get_bootenv(const char *, nvlist_t **);
_LIBZFS_CORE_H int lzc_get_vdev_prop(const char *, nvlist_t *, nvlist_t **);
_LIBZFS_CORE_H int lzc_set_vdev_prop(const char *, nvlist_t *, nvlist_t **);

_LIBZFS_CORE_H int lzc_scrub(zfs_ioc_t, const char *, nvlist_t *, nvlist_t **);

#ifdef __cplusplus
}
#endif
Expand Down
1 change: 1 addition & 0 deletions include/sys/dmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,7 @@ typedef struct dmu_buf {
#define DMU_POOL_DDT_STATS "DDT-statistics"
#define DMU_POOL_CREATION_VERSION "creation_version"
#define DMU_POOL_SCAN "scan"
#define DMU_POOL_ERRORSCRUB "error_scrub"
#define DMU_POOL_FREE_BPOBJ "free_bpobj"
#define DMU_POOL_BPTREE_OBJ "bptree_obj"
#define DMU_POOL_EMPTY_BPOBJ "empty_bpobj"
Expand Down
30 changes: 28 additions & 2 deletions include/sys/dsl_scan.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

#include <sys/zfs_context.h>
#include <sys/zio.h>
#include <sys/zap.h>
#include <sys/ddt.h>
#include <sys/bplist.h>

Expand Down Expand Up @@ -78,6 +79,21 @@ typedef enum dsl_scan_flags {

#define DSL_SCAN_FLAGS_MASK (DSF_VISIT_DS_AGAIN)

typedef struct dsl_errorscrub_phys {
uint64_t dep_func; /* pool_scan_func_t */
uint64_t dep_state; /* dsl_scan_state_t */
uint64_t dep_cursor; /* serialized zap cursor for tracing progress */
uint64_t dep_start_time; /* error scrub start time, unix timestamp */
uint64_t dep_end_time; /* error scrub end time, unix timestamp */
uint64_t dep_to_examine; /* total error blocks to be scrubbed */
uint64_t dep_examined; /* blocks scrubbed so far */
uint64_t dep_errors; /* error scrub I/O error count */
uint64_t dep_paused_flags; /* flag for paused */
} dsl_errorscrub_phys_t;

#define ERRORSCRUB_PHYS_NUMINTS (sizeof (dsl_errorscrub_phys_t) \
/ sizeof (uint64_t))

/*
* Every pool will have one dsl_scan_t and this structure will contain
* in-memory information about the scan and a pointer to the on-disk
Expand Down Expand Up @@ -151,11 +167,16 @@ typedef struct dsl_scan {
uint64_t scn_avg_zio_size_this_txg;
uint64_t scn_zios_this_txg;

/* zap cursor for tracing error scrub progress */
zap_cursor_t errorscrub_cursor;
/* members needed for syncing scan status to disk */
dsl_scan_phys_t scn_phys; /* on disk representation of scan */
dsl_scan_phys_t scn_phys_cached;
avl_tree_t scn_queue; /* queue of datasets to scan */
uint64_t scn_bytes_pending; /* outstanding data to issue */

/* members needed for syncing error scrub status to disk */
dsl_errorscrub_phys_t errorscrub_phys;
} dsl_scan_t;

typedef struct dsl_scan_io_queue dsl_scan_io_queue_t;
Expand All @@ -167,12 +188,16 @@ int dsl_scan_setup_check(void *, dmu_tx_t *);
void dsl_scan_setup_sync(void *, dmu_tx_t *);
void dsl_scan_fini(struct dsl_pool *dp);
void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *);
int dsl_scan_cancel(struct dsl_pool *);
int dsl_scan_cancel(struct dsl_pool *, pool_scan_func_t func);
int dsl_scan(struct dsl_pool *, pool_scan_func_t);
void dsl_scan_assess_vdev(struct dsl_pool *dp, vdev_t *vd);
boolean_t dsl_scan_scrubbing(const struct dsl_pool *dp);
int dsl_scrub_set_pause_resume(const struct dsl_pool *dp, pool_scrub_cmd_t cmd);
boolean_t dsl_errorscrubbing(const struct dsl_pool *dp);
boolean_t dsl_errorscrub_active(dsl_scan_t *scn);
void dsl_scan_restart_resilver(struct dsl_pool *, uint64_t txg);
int dsl_scrub_set_pause_resume(const struct dsl_pool *dp,
pool_scrub_cmd_t cmd, pool_scan_func_t func);
void dsl_errorscrub_sync(struct dsl_pool *, dmu_tx_t *);
boolean_t dsl_scan_resilvering(struct dsl_pool *dp);
boolean_t dsl_scan_resilver_scheduled(struct dsl_pool *dp);
boolean_t dsl_dataset_unstable(struct dsl_dataset *ds);
Expand All @@ -184,6 +209,7 @@ void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
struct dmu_tx *tx);
boolean_t dsl_scan_active(dsl_scan_t *scn);
boolean_t dsl_scan_is_paused_scrub(const dsl_scan_t *scn);
boolean_t dsl_errorscrub_is_paused(const dsl_scan_t *scn);
void dsl_scan_freed(spa_t *spa, const blkptr_t *bp);
void dsl_scan_io_queue_destroy(dsl_scan_io_queue_t *queue);
void dsl_scan_io_queue_vdev_xfer(vdev_t *svd, vdev_t *tvd);
Expand Down
20 changes: 19 additions & 1 deletion include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -1019,6 +1019,7 @@ typedef enum pool_scan_func {
POOL_SCAN_NONE,
POOL_SCAN_SCRUB,
POOL_SCAN_RESILVER,
POOL_ERRORSCRUB,
POOL_SCAN_FUNCS
} pool_scan_func_t;

Expand All @@ -1028,6 +1029,7 @@ typedef enum pool_scan_func {
typedef enum pool_scrub_cmd {
POOL_SCRUB_NORMAL = 0,
POOL_SCRUB_PAUSE,
POOL_ERRORSCRUB_STOP,
POOL_SCRUB_FLAGS_END
} pool_scrub_cmd_t;

Expand Down Expand Up @@ -1082,6 +1084,20 @@ typedef struct pool_scan_stat {
uint64_t pss_pass_scrub_spent_paused;
uint64_t pss_pass_issued; /* issued bytes per scan pass */
uint64_t pss_issued; /* total bytes checked by scanner */

/* error scrub values stored on disk */
uint64_t pss_error_scrub_func; /* pool_scan_func_t */
uint64_t pss_error_scrub_state; /* dsl_scan_state_t */
uint64_t pss_error_scrub_start; /* error scrub start time */
uint64_t pss_error_scrub_end; /* error scrub end time */
uint64_t pss_error_scrub_examined; /* error blocks issued I/O */
/* error blocks to be issued I/O */
uint64_t pss_error_scrub_to_be_examined;

/* error scrub values not stored on disk */
/* error scrub pause time in milliseconds */
uint64_t pss_pass_error_scrub_pause;

} pool_scan_stat_t;

typedef struct pool_removal_stat {
Expand All @@ -1103,6 +1119,7 @@ typedef enum dsl_scan_state {
DSS_SCANNING,
DSS_FINISHED,
DSS_CANCELED,
DSS_ERRORSCRUBBING,
DSS_NUM_STATES
} dsl_scan_state_t;

Expand Down Expand Up @@ -1342,7 +1359,7 @@ typedef enum {
*/
typedef enum zfs_ioc {
/*
* Core features - 81/128 numbers reserved.
* Core features - 88/128 numbers reserved.
*/
#ifdef __FreeBSD__
ZFS_IOC_FIRST = 0,
Expand Down Expand Up @@ -1437,6 +1454,7 @@ typedef enum zfs_ioc {
ZFS_IOC_WAIT_FS, /* 0x5a54 */
ZFS_IOC_VDEV_GET_PROPS, /* 0x5a55 */
ZFS_IOC_VDEV_SET_PROPS, /* 0x5a56 */
ZFS_IOC_POOL_SCRUB, /* 0x5a57 */

/*
* Per-platform (Optional) - 8/128 numbers reserved.
Expand Down
19 changes: 16 additions & 3 deletions include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ typedef struct spa_aux_vdev spa_aux_vdev_t;
typedef struct ddt ddt_t;
typedef struct ddt_entry ddt_entry_t;
typedef struct zbookmark_phys zbookmark_phys_t;
typedef struct zbookmark_err_phys zbookmark_err_phys_t;

struct bpobj;
struct bplist;
Expand Down Expand Up @@ -819,8 +820,9 @@ extern void spa_l2cache_drop(spa_t *spa);

/* scanning */
extern int spa_scan(spa_t *spa, pool_scan_func_t func);
extern int spa_scan_stop(spa_t *spa);
extern int spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t flag);
extern int spa_scan_stop(spa_t *spa, pool_scan_func_t func);
extern int spa_scrub_pause_resume(spa_t *spa, pool_scan_func_t func,
pool_scrub_cmd_t flag);

/* spa syncing */
extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
Expand Down Expand Up @@ -1144,11 +1146,22 @@ extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
extern void zfs_post_state_change(spa_t *spa, vdev_t *vd, uint64_t laststate);
extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
extern uint64_t spa_get_errlog_size(spa_t *spa);
extern int spa_get_errlog(spa_t *spa, void *uaddr, size_t *count);
extern int spa_get_errlog(spa_t *spa, void *uaddr, uint64_t *count);
extern uint64_t spa_get_last_errlog_size(spa_t *spa);
extern void spa_errlog_rotate(spa_t *spa);
extern void spa_errlog_drain(spa_t *spa);
extern void spa_errlog_sync(spa_t *spa, uint64_t txg);
extern void spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub);
extern void spa_delete_dataset_errlog(spa_t *spa, uint64_t ds, dmu_tx_t *tx);
extern void spa_swap_errlog(spa_t *spa, uint64_t new_head_ds,
uint64_t old_head_ds, dmu_tx_t *tx);
extern uint64_t find_top_affected_fs(spa_t *spa, uint64_t head_ds,
zbookmark_err_phys_t *zep);
extern int find_block_txg(struct dsl_dataset *ds, zbookmark_err_phys_t *zep,
uint64_t *birth_txg);
extern void zep_to_zb(uint64_t dataset, zbookmark_err_phys_t *zep,
zbookmark_phys_t *zb);
extern void name_to_errphys(char *buf, zbookmark_err_phys_t *zep);

/* vdev cache */
extern void vdev_cache_stat_init(void);
Expand Down
Loading

0 comments on commit a4ff6d1

Please sign in to comment.