From 004e221190cd5521593db5f462fd51f998a3265f Mon Sep 17 00:00:00 2001 From: dormando Date: Mon, 28 Sep 2015 17:32:49 -0700 Subject: [PATCH] slab mover rescues valid items with free chunks During a slab page move items are typically ejected regardless of their validity. Now, if an item is valid and free chunks are available in the same slab class, copy the item over and replace it. It's up to external systems to try to ensure free chunks are available before moving a slab page. If there is no memory it will simply evict them as normal. Also adds counters so we can finally tell how often these cases happen. --- items.c | 8 +++--- items.h | 2 ++ memcached.c | 3 +++ memcached.h | 3 +++ slabs.c | 64 +++++++++++++++++++++++++++++++++++++++++++-- t/slabs-reassign2.t | 9 ++++--- 6 files changed, 80 insertions(+), 9 deletions(-) diff --git a/items.c b/items.c index 2a4f4ee847..950ba60997 100644 --- a/items.c +++ b/items.c @@ -98,7 +98,7 @@ uint64_t get_cas_id(void) { return next_id; } -static int is_flushed(item *it) { +int item_is_flushed(item *it) { rel_time_t oldest_live = settings.oldest_live; uint64_t cas = ITEM_get_cas(it); uint64_t oldest_cas = settings.oldest_cas; @@ -712,7 +712,7 @@ item *do_item_get(const char *key, const size_t nkey, const uint32_t hv) { } if (it != NULL) { - if (is_flushed(it)) { + if (item_is_flushed(it)) { do_item_unlink(it, hv); do_item_remove(it); it = NULL; @@ -803,7 +803,7 @@ static int lru_pull_tail(const int orig_id, const int cur_lru, /* Expired or flushed */ if ((search->exptime != 0 && search->exptime < current_time) - || is_flushed(search)) { + || item_is_flushed(search)) { itemstats[id].reclaimed++; if ((search->it_flags & ITEM_FETCHED) == 0) { itemstats[id].expired_unfetched++; @@ -1199,7 +1199,7 @@ static void item_crawler_evaluate(item *search, uint32_t hv, int i) { crawlerstats_t *s = &crawlerstats[slab_id]; itemstats[i].crawler_items_checked++; if ((search->exptime != 0 && search->exptime < current_time) - || is_flushed(search)) { + || item_is_flushed(search)) { itemstats[i].crawler_reclaimed++; s->reclaimed++; diff --git a/items.h b/items.h index b091fa0b56..f47de8f973 100644 --- a/items.h +++ b/items.h @@ -14,6 +14,8 @@ void do_item_update(item *it); /** update LRU time to current and reposition * void do_item_update_nolock(item *it); int do_item_replace(item *it, item *new_it, const uint32_t hv); +int item_is_flushed(item *it); + /*@null@*/ char *item_cachedump(const unsigned int slabs_clsid, const unsigned int limit, unsigned int *bytes); void item_stats(ADD_STAT add_stats, void *c); diff --git a/memcached.c b/memcached.c index 1a9bfaf236..a1de52521b 100644 --- a/memcached.c +++ b/memcached.c @@ -2631,6 +2631,9 @@ static void server_stats(ADD_STAT add_stats, conn *c) { APPEND_STAT("hash_bytes", "%llu", (unsigned long long)stats.hash_bytes); APPEND_STAT("hash_is_expanding", "%u", stats.hash_is_expanding); if (settings.slab_reassign) { + APPEND_STAT("slab_reassign_rescues", "%llu", stats.slab_reassign_rescues); + APPEND_STAT("slab_reassign_evictions", "%llu", stats.slab_reassign_evictions); + APPEND_STAT("slab_reassign_busy_items", "%llu", stats.slab_reassign_busy_items); APPEND_STAT("slab_reassign_running", "%u", stats.slab_reassign_running); APPEND_STAT("slabs_moved", "%llu", stats.slabs_moved); } diff --git a/memcached.h b/memcached.h index 4ae3164ed2..05eeb049b2 100644 --- a/memcached.h +++ b/memcached.h @@ -285,6 +285,9 @@ struct stats { uint64_t evicted_unfetched; /* items evicted but never touched */ bool slab_reassign_running; /* slab reassign in progress */ uint64_t slabs_moved; /* times slabs were moved around */ + uint64_t slab_reassign_rescues; /* items rescued during slab move */ + uint64_t slab_reassign_evictions; /* valid items lost during slab move */ + uint64_t slab_reassign_busy_items; /* valid temporarily unmovable */ uint64_t lru_crawler_starts; /* Number of item crawlers kicked off */ bool lru_crawler_running; /* crawl in progress */ uint64_t lru_maintainer_juggles; /* number of LRU bg pokes */ diff --git a/slabs.c b/slabs.c index c9e29ac89b..4bc064c088 100644 --- a/slabs.c +++ b/slabs.c @@ -237,7 +237,9 @@ static void *do_slabs_alloc(const size_t size, unsigned int id, unsigned int *to p = &slabclass[id]; assert(p->sl_curr == 0 || ((item *)p->slots)->slabs_clsid == 0); - *total_chunks = p->slabs * p->perslab; + if (total_chunks != NULL) { + *total_chunks = p->slabs * p->perslab; + } /* fail unless we have space at the end of a recently allocated page, we have something on our freelist, or we could allocate a new page */ if (! (p->sl_curr != 0 || do_slabs_newslab(id) != 0)) { @@ -606,6 +608,9 @@ static int slab_rebalance_move(void) { } } + int save_item = 0; + item *new_it = NULL; + size_t ntotal = 0; switch (status) { case MOVE_FROM_LRU: /* Lock order is LRU locks -> slabs_lock. unlink uses LRU lock. @@ -614,10 +619,62 @@ static int slab_rebalance_move(void) { * (2) + the item is locked. Drop slabs lock, drop item to * refcount 1 (just our own, then fall through and wipe it */ + /* Check if expired or flushed */ + ntotal = ITEM_ntotal(it); + /* REQUIRES slabs_lock: CHECK FOR cls->sl_curr > 0 */ + if ((it->exptime != 0 && it->exptime < current_time) + || item_is_flushed(it)) { + /* TODO: maybe we only want to save if item is in HOT or + * WARM LRU? + */ + save_item = 0; + } else if (s_cls->sl_curr < 1) { + save_item = 0; + STATS_LOCK(); + stats.slab_reassign_evictions++; + STATS_UNLOCK(); + } else { + save_item = 1; + /* BIT OF A HACK: if sl_curr is > 0 alloc won't try to + * pull from global pool to satisfy the request. + * FIXME: pile on more flags? + */ + new_it = do_slabs_alloc(ntotal, slab_rebal.s_clsid, NULL); + /* check that memory isn't within the range to clear */ + if ((void *)new_it >= slab_rebal.slab_start + && (void *)new_it < slab_rebal.slab_end) { + /* Pulled something we intend to free. Put it back + * and use the main loop to kill it. + */ + do_slabs_free(new_it, ntotal, slab_rebal.s_clsid); + save_item = 0; + STATS_LOCK(); + stats.slab_reassign_evictions++; + STATS_UNLOCK(); + } + } pthread_mutex_unlock(&slabs_lock); - do_item_unlink(it, hv); + if (save_item) { + /* if free memory, memcpy. clear prev/next/h_bucket */ + memcpy(new_it, it, ntotal); + new_it->prev = 0; + new_it->next = 0; + new_it->h_next = 0; + /* These are definitely required. else fails assert */ + new_it->it_flags &= ~ITEM_LINKED; + new_it->refcount = 0; + do_item_replace(it, new_it, hv); + STATS_LOCK(); + stats.slab_reassign_rescues++; + STATS_UNLOCK(); + } else { + do_item_unlink(it, hv); + } item_trylock_unlock(hold_lock); pthread_mutex_lock(&slabs_lock); + if (save_item == 0) { + s_cls->requested -= ntotal; + } case MOVE_FROM_SLAB: it->refcount = 0; it->it_flags = 0; @@ -625,6 +682,9 @@ static int slab_rebalance_move(void) { break; case MOVE_BUSY: case MOVE_LOCKED: + STATS_LOCK(); + stats.slab_reassign_busy_items++; + STATS_UNLOCK(); slab_rebal.busy_items++; was_busy++; break; diff --git a/t/slabs-reassign2.t b/t/slabs-reassign2.t index d0a8518f45..8de4a05e51 100644 --- a/t/slabs-reassign2.t +++ b/t/slabs-reassign2.t @@ -2,7 +2,7 @@ use strict; use warnings; -use Test::More tests => 3; +use Test::More tests => 5; use FindBin qw($Bin); use lib "$Bin/lib"; use MemcachedTest; @@ -42,7 +42,7 @@ for (1 .. $keycount) { } else { $body .= scalar(<$sock>) . scalar(<$sock>); if ($body ne $expected) { - print STDERR "Something terrible has happened: $body\n"; + print STDERR "Something terrible has happened: $expected\nBODY:\n$body\nDONETEST\n"; } else { $hits++; } @@ -52,10 +52,13 @@ for (1 .. $keycount) { { my $stats = mem_stats($sock); - cmp_ok($stats->{evictions}, '<', 1000, 'evictions were less than 1000'); + cmp_ok($stats->{evictions}, '<', 2000, 'evictions were less than 2000'); # for ('evictions', 'reclaimed', 'curr_items', 'cmd_set', 'bytes') { # print STDERR "$_: ", $stats->{$_}, "\n"; # } } cmp_ok($hits, '>', 4000, 'were able to fetch back 2/3rds of 8k keys'); +my $stats_done = mem_stats($sock); +cmp_ok($stats_done->{slab_reassign_rescues}, '>', 0, 'some reassign rescues happened'); +cmp_ok($stats_done->{slab_reassign_evictions}, '>', 0, 'some reassing evictions happened');