Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BRT: Rework structures and locks to be per-vdev #16740

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 13 additions & 21 deletions cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -2119,9 +2119,6 @@ dump_brt(spa_t *spa)
return;
}

brt_t *brt = spa->spa_brt;
VERIFY(brt);

char count[32], used[32], saved[32];
zdb_nicebytes(brt_get_used(spa), used, sizeof (used));
zdb_nicebytes(brt_get_saved(spa), saved, sizeof (saved));
Expand All @@ -2132,11 +2129,8 @@ dump_brt(spa_t *spa)
if (dump_opt['T'] < 2)
return;

for (uint64_t vdevid = 0; vdevid < brt->brt_nvdevs; vdevid++) {
brt_vdev_t *brtvd = &brt->brt_vdevs[vdevid];
if (brtvd == NULL)
continue;

for (uint64_t vdevid = 0; vdevid < spa->spa_brt_nvdevs; vdevid++) {
brt_vdev_t *brtvd = spa->spa_brt_vdevs[vdevid];
if (!brtvd->bv_initiated) {
printf("BRT: vdev %" PRIu64 ": empty\n", vdevid);
continue;
Expand All @@ -2160,20 +2154,21 @@ dump_brt(spa_t *spa)
if (!do_histo)
printf("\n%-16s %-10s\n", "DVA", "REFCNT");

for (uint64_t vdevid = 0; vdevid < brt->brt_nvdevs; vdevid++) {
brt_vdev_t *brtvd = &brt->brt_vdevs[vdevid];
if (brtvd == NULL || !brtvd->bv_initiated)
for (uint64_t vdevid = 0; vdevid < spa->spa_brt_nvdevs; vdevid++) {
brt_vdev_t *brtvd = spa->spa_brt_vdevs[vdevid];
if (!brtvd->bv_initiated)
continue;

uint64_t counts[64] = {};

zap_cursor_t zc;
zap_attribute_t *za = zap_attribute_alloc();
for (zap_cursor_init(&zc, brt->brt_mos, brtvd->bv_mos_entries);
for (zap_cursor_init(&zc, spa->spa_meta_objset,
brtvd->bv_mos_entries);
zap_cursor_retrieve(&zc, za) == 0;
zap_cursor_advance(&zc)) {
uint64_t refcnt;
VERIFY0(zap_lookup_uint64(brt->brt_mos,
VERIFY0(zap_lookup_uint64(spa->spa_meta_objset,
brtvd->bv_mos_entries,
(const uint64_t *)za->za_name, 1,
za->za_integer_length, za->za_num_integers,
Expand Down Expand Up @@ -8227,14 +8222,11 @@ dump_mos_leaks(spa_t *spa)
}
}

if (spa->spa_brt != NULL) {
brt_t *brt = spa->spa_brt;
for (uint64_t vdevid = 0; vdevid < brt->brt_nvdevs; vdevid++) {
brt_vdev_t *brtvd = &brt->brt_vdevs[vdevid];
if (brtvd != NULL && brtvd->bv_initiated) {
mos_obj_refd(brtvd->bv_mos_brtvdev);
mos_obj_refd(brtvd->bv_mos_entries);
}
for (uint64_t vdevid = 0; vdevid < spa->spa_brt_nvdevs; vdevid++) {
brt_vdev_t *brtvd = spa->spa_brt_vdevs[vdevid];
if (brtvd->bv_initiated) {
mos_obj_refd(brtvd->bv_mos_brtvdev);
mos_obj_refd(brtvd->bv_mos_entries);
}
}

Expand Down
91 changes: 43 additions & 48 deletions include/sys/brt_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,28 +86,38 @@ typedef struct brt_vdev_phys {
uint64_t bvp_savedspace;
} brt_vdev_phys_t;

typedef struct brt_vdev {
struct brt_vdev {
/*
* VDEV id.
* Pending changes from open contexts.
*/
uint64_t bv_vdevid;
kmutex_t bv_pending_lock;
avl_tree_t bv_pending_tree[TXG_SIZE];
/*
* Is the structure initiated?
* (bv_entcount and bv_bitmap are allocated?)
* Protects bv_mos_*.
*/
boolean_t bv_initiated;
krwlock_t bv_mos_entries_lock ____cacheline_aligned;
/*
* Protects all the fields starting from bv_initiated.
*/
krwlock_t bv_lock ____cacheline_aligned;
/*
* VDEV id.
*/
uint64_t bv_vdevid ____cacheline_aligned;
/*
* Object number in the MOS for the entcount array and brt_vdev_phys.
*/
uint64_t bv_mos_brtvdev;
/*
* Object number in the MOS for the entries table.
* Object number in the MOS and dnode for the entries table.
*/
uint64_t bv_mos_entries;
dnode_t *bv_mos_entries_dnode;
/*
* Entries to sync.
* Is the structure initiated?
* (bv_entcount and bv_bitmap are allocated?)
*/
avl_tree_t bv_tree;
boolean_t bv_initiated;
/*
* Does the bv_entcount[] array needs byte swapping?
*/
Expand All @@ -120,6 +130,26 @@ typedef struct brt_vdev {
* This is the array with BRT entry count per BRT_RANGESIZE.
*/
uint16_t *bv_entcount;
/*
* bv_entcount[] potentially can be a bit too big to sychronize it all
* when we just changed few entcounts. The fields below allow us to
* track updates to bv_entcount[] array since the last sync.
* A single bit in the bv_bitmap represents as many entcounts as can
* fit into a single BRT_BLOCKSIZE.
* For example we have 65536 entcounts in the bv_entcount array
* (so the whole array is 128kB). We updated bv_entcount[2] and
* bv_entcount[5]. In that case only first bit in the bv_bitmap will
* be set and we will write only first BRT_BLOCKSIZE out of 128kB.
*/
ulong_t *bv_bitmap;
/*
* bv_entcount[] needs updating on disk.
*/
boolean_t bv_entcount_dirty;
/*
* brt_vdev_phys needs updating on disk.
*/
boolean_t bv_meta_dirty;
/*
* Sum of all bv_entcount[]s.
*/
Expand All @@ -133,45 +163,10 @@ typedef struct brt_vdev {
*/
uint64_t bv_savedspace;
/*
* brt_vdev_phys needs updating on disk.
*/
boolean_t bv_meta_dirty;
/*
* bv_entcount[] needs updating on disk.
*/
boolean_t bv_entcount_dirty;
/*
* bv_entcount[] potentially can be a bit too big to sychronize it all
* when we just changed few entcounts. The fields below allow us to
* track updates to bv_entcount[] array since the last sync.
* A single bit in the bv_bitmap represents as many entcounts as can
* fit into a single BRT_BLOCKSIZE.
* For example we have 65536 entcounts in the bv_entcount array
* (so the whole array is 128kB). We updated bv_entcount[2] and
* bv_entcount[5]. In that case only first bit in the bv_bitmap will
* be set and we will write only first BRT_BLOCKSIZE out of 128kB.
* Entries to sync.
*/
ulong_t *bv_bitmap;
uint64_t bv_nblocks;
} brt_vdev_t;

/*
* In-core brt
*/
typedef struct brt {
krwlock_t brt_lock;
spa_t *brt_spa;
#define brt_mos brt_spa->spa_meta_objset
uint64_t brt_rangesize;
uint64_t brt_usedspace;
uint64_t brt_savedspace;
avl_tree_t brt_pending_tree[TXG_SIZE];
kmutex_t brt_pending_lock[TXG_SIZE];
/* Sum of all entries across all bv_trees. */
uint64_t brt_nentries;
brt_vdev_t *brt_vdevs;
uint64_t brt_nvdevs;
} brt_t;
avl_tree_t bv_tree;
};

/* Size of bre_offset / sizeof (uint64_t). */
#define BRT_KEY_WORDS (1)
Expand All @@ -188,7 +183,7 @@ typedef struct brt_entry {

typedef struct brt_pending_entry {
blkptr_t bpe_bp;
int bpe_count;
uint64_t bpe_count;
avl_node_t bpe_node;
} brt_pending_entry_t;

Expand Down
1 change: 1 addition & 0 deletions include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ extern "C" {
/*
* Forward references that lots of things need.
*/
typedef struct brt_vdev brt_vdev_t;
typedef struct spa spa_t;
typedef struct vdev vdev_t;
typedef struct metaslab metaslab_t;
Expand Down
6 changes: 5 additions & 1 deletion include/sys/spa_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -412,8 +412,12 @@ struct spa {
uint64_t spa_dedup_dspace; /* Cache get_dedup_dspace() */
uint64_t spa_dedup_checksum; /* default dedup checksum */
uint64_t spa_dspace; /* dspace in normal class */
uint64_t spa_rdspace; /* raw (non-dedup) --//-- */
boolean_t spa_active_ddt_prune; /* ddt prune process active */
struct brt *spa_brt; /* in-core BRT */
brt_vdev_t **spa_brt_vdevs; /* array of per-vdev BRTs */
uint64_t spa_brt_nvdevs; /* number of vdevs in BRT */
uint64_t spa_brt_rangesize; /* pool's BRT range size */
krwlock_t spa_brt_lock; /* Protects brt_vdevs/nvdevs */
kmutex_t spa_vdev_top_lock; /* dueling offline/remove */
kmutex_t spa_proc_lock; /* protects spa_proc* */
kcondvar_t spa_proc_cv; /* spa_proc_state transitions */
Expand Down
7 changes: 4 additions & 3 deletions include/sys/zap.h
Original file line number Diff line number Diff line change
Expand Up @@ -223,11 +223,15 @@ int zap_lookup_norm(objset_t *ds, uint64_t zapobj, const char *name,
boolean_t *normalization_conflictp);
int zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf);
int zap_lookup_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf);
int zap_contains(objset_t *ds, uint64_t zapobj, const char *name);
int zap_prefetch(objset_t *os, uint64_t zapobj, const char *name);
int zap_prefetch_object(objset_t *os, uint64_t zapobj);
int zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int key_numints);
int zap_prefetch_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
int key_numints);

int zap_lookup_by_dnode(dnode_t *dn, const char *name,
uint64_t integer_size, uint64_t num_integers, void *buf);
Expand All @@ -236,9 +240,6 @@ int zap_lookup_norm_by_dnode(dnode_t *dn, const char *name,
matchtype_t mt, char *realname, int rn_len,
boolean_t *ncp);

int zap_count_write_by_dnode(dnode_t *dn, const char *name,
int add, zfs_refcount_t *towrite, zfs_refcount_t *tooverwrite);

/*
* Create an attribute with the given name and value.
*
Expand Down
Loading