From 814c231d73f34d1e07264d0085ee6b7c1faeda9b Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 2 Jan 2025 02:06:36 +0800 Subject: [PATCH] btrfs: add read policy to set a preferred device Add read policy that will force all reads to go to the given device (specified by devid) on the RAID1 profiles. This will be used for testing, e.g. to read from stale device. Users may find other use cases. Can be set in sysfs, the value format is "devid:" to the file /sys/fs/btrfs/FSID/read_policy Signed-off-by: Anand Jain Signed-off-by: David Sterba --- fs/btrfs/sysfs.c | 31 ++++++++++++++++++++++++++++++- fs/btrfs/volumes.c | 17 +++++++++++++++++ fs/btrfs/volumes.h | 5 +++++ 3 files changed, 52 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index e155b7ce1ee584..5211d13d73f8aa 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -1309,6 +1309,7 @@ static const char *btrfs_read_policy_name[] = { "pid", #ifdef CONFIG_BTRFS_EXPERIMENTAL "round-robin", + "devid", #endif }; @@ -1364,8 +1365,11 @@ static ssize_t btrfs_read_policy_show(struct kobject *kobj, if (i == BTRFS_READ_POLICY_RR) ret += sysfs_emit_at(buf, ret, ":%u", READ_ONCE(fs_devices->rr_min_contig_read)); -#endif + if (i == BTRFS_READ_POLICY_DEVID) + ret += sysfs_emit_at(buf, ret, ":%llu", + READ_ONCE(fs_devices->read_devid)); +#endif if (i == policy) ret += sysfs_emit_at(buf, ret, "]"); } @@ -1421,6 +1425,31 @@ static ssize_t btrfs_read_policy_store(struct kobject *kobj, return len; } + + if (index == BTRFS_READ_POLICY_DEVID) { + if (value != -1) { + BTRFS_DEV_LOOKUP_ARGS(args); + + /* Validate input devid. */ + args.devid = value; + if (btrfs_find_device(fs_devices, &args) == NULL) + return -EINVAL; + } else { + /* Set default devid to the devid of the latest device. */ + value = fs_devices->latest_dev->devid; + } + + if (index != READ_ONCE(fs_devices->read_policy) || + value != READ_ONCE(fs_devices->read_devid)) { + WRITE_ONCE(fs_devices->read_policy, index); + WRITE_ONCE(fs_devices->read_devid, value); + + btrfs_info(fs_devices->fs_info, "read policy set to '%s:%llu'", + btrfs_read_policy_name[index], value); + } + + return len; + } #endif if (index != READ_ONCE(fs_devices->read_policy)) { WRITE_ONCE(fs_devices->read_policy, index); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a31ba441b752ac..d633bf1e107749 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1337,6 +1337,7 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices, fs_devices->read_policy = BTRFS_READ_POLICY_PID; #ifdef CONFIG_BTRFS_EXPERIMENTAL fs_devices->rr_min_contig_read = BTRFS_DEFAULT_RR_MIN_CONTIG_READ; + fs_devices->read_devid = latest_dev->devid; #endif return 0; @@ -5971,6 +5972,19 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info, } #ifdef CONFIG_BTRFS_EXPERIMENTAL +static int btrfs_read_preferred(struct btrfs_chunk_map *map, int first, int num_stripes) +{ + for (int index = first; index < first + num_stripes; index++) { + const struct btrfs_device *device = map->stripes[index].dev; + + if (device->devid == READ_ONCE(device->fs_devices->read_devid)) + return index; + } + + /* If no read-preferred device is set use the first stripe. */ + return first; +} + struct stripe_mirror { u64 devid; int num; @@ -6059,6 +6073,9 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info, case BTRFS_READ_POLICY_RR: preferred_mirror = btrfs_read_rr(map, first, num_stripes); break; + case BTRFS_READ_POLICY_DEVID: + preferred_mirror = btrfs_read_preferred(map, first, num_stripes); + break; #endif } diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 26034c29ac87fc..f197e152f3188c 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -309,6 +309,8 @@ enum btrfs_read_policy { #ifdef CONFIG_BTRFS_EXPERIMENTAL /* Balancing RAID1 reads across all striped devices (round-robin). */ BTRFS_READ_POLICY_RR, + /* Read from a specific device. */ + BTRFS_READ_POLICY_DEVID, #endif BTRFS_NR_READ_POLICY, }; @@ -449,6 +451,9 @@ struct btrfs_fs_devices { */ u32 rr_min_contig_read; + /* Device to be used for reading in case of RAID1. */ + u64 read_devid; + /* Checksum mode - offload it or do it synchronously. */ enum btrfs_offload_csum_mode offload_csum_mode; #endif