From 87c5d0a58e177b54640cca8801235aa3faed405f Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 20 Mar 2024 14:24:56 +1030 Subject: [PATCH] btrfs: scrub: ensure we output at least one error message for unrepaired corruption For btrfs scrub error messages, I have hit a lot of support cases on older kernels where no filename is outputted at all, with only error messages like: BTRFS error (device dm-0): bdev /dev/mapper/sys-rootlv errs: wr 0, rd 0, flush 0, corrupt 2823, gen 0 BTRFS error (device dm-0): unable to fixup (regular) error at logical 1563504640 on dev /dev/mapper/sys-rootlv BTRFS error (device dm-0): bdev /dev/mapper/sys-rootlv errs: wr 0, rd 0, flush 0, corrupt 2824, gen 0 BTRFS error (device dm-0): unable to fixup (regular) error at logical 1579016192 on dev /dev/mapper/sys-rootlv BTRFS error (device dm-0): bdev /dev/mapper/sys-rootlv errs: wr 0, rd 0, flush 0, corrupt 2825, gen 0 The "unable to fixup (regular) error" line shows we hit an unrepairable error, then normally we would do data/metadata backref walk to grab the correct info. But we can hit cases like the inode is already orphan (unlinked from any parent inode), or even the subvolume is orphan (unlinked and waiting to be deleted). In that case we're not sure what's the proper way to continue (Is it some critical data/metadata? Would it prevent the system from booting?) To improve the situation, this patch would: - Ensure we at least output one message for each unrepairable error If by somehow we didn't output any error message for the error, we always fallback to the basic logical/physical error output, with its type (data or metadata). Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/scrub.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index bcfc05edf7347c..ff923278126446 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -226,6 +226,7 @@ struct scrub_warning { u64 physical; u64 logical; struct btrfs_device *dev; + bool message_printed; }; static void release_scrub_stripe(struct scrub_stripe *stripe) @@ -425,7 +426,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes, * we deliberately ignore the bit ipath might have been too small to * hold all of the paths here */ - for (i = 0; i < ipath->fspath->elem_cnt; ++i) + for (i = 0; i < ipath->fspath->elem_cnt; ++i) { btrfs_warn_in_rcu(fs_info, "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, path: %s", swarn->errstr, swarn->logical, @@ -433,6 +434,8 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes, swarn->physical, root, inum, offset, (char *)(unsigned long)ipath->fspath->val[i]); + swarn->message_printed = true; + } btrfs_put_root(local_root); free_ipath(ipath); @@ -445,7 +448,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes, btrfs_dev_name(swarn->dev), swarn->physical, root, inum, offset, ret); - + swarn->message_printed = true; free_ipath(ipath); return 0; } @@ -471,6 +474,7 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device * swarn.logical = logical; swarn.errstr = errstr; swarn.dev = NULL; + swarn.message_printed = false; ret = extent_from_logical(fs_info, swarn.logical, path, &found_key, &flags); @@ -492,12 +496,8 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device * ret = tree_backref_for_extent(&ptr, eb, &found_key, ei, item_size, &ref_root, &ref_level); - if (ret < 0) { - btrfs_warn(fs_info, - "failed to resolve tree backref for logical %llu: %d", - swarn.logical, ret); + if (ret < 0) break; - } if (ret > 0) break; btrfs_warn_in_rcu(fs_info, @@ -505,7 +505,13 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device * errstr, swarn.logical, btrfs_dev_name(dev), swarn.physical, (ref_level ? "node" : "leaf"), ref_level, ref_root); + swarn.message_printed = true; } + if (!swarn.message_printed) + btrfs_warn_in_rcu(fs_info, + "%s at metadata, logical %llu on dev %s physical %llu", + errstr, swarn.logical, + btrfs_dev_name(dev), swarn.physical); btrfs_release_path(path); } else { struct btrfs_backref_walk_ctx ctx = { 0 }; @@ -520,6 +526,11 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device * swarn.dev = dev; iterate_extent_inodes(&ctx, true, scrub_print_warning_inode, &swarn); + if (!swarn.message_printed) + btrfs_warn_in_rcu(fs_info, + "%s at data, filename unresolved, logical %llu on dev %s physical %llu", + errstr, swarn.logical, + btrfs_dev_name(dev), swarn.physical); } out: