Skip to content

Commit

Permalink
slab mover rescues valid items with free chunks
Browse files Browse the repository at this point in the history
During a slab page move items are typically ejected regardless of their
validity. Now, if an item is valid and free chunks are available in the same
slab class, copy the item over and replace it.

It's up to external systems to try to ensure free chunks are available before
moving a slab page. If there is no memory it will simply evict them as normal.

Also adds counters so we can finally tell how often these cases happen.
  • Loading branch information
dormando committed Nov 19, 2015
1 parent 826403d commit 004e221
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 9 deletions.
8 changes: 4 additions & 4 deletions items.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ uint64_t get_cas_id(void) {
return next_id;
}

static int is_flushed(item *it) {
int item_is_flushed(item *it) {
rel_time_t oldest_live = settings.oldest_live;
uint64_t cas = ITEM_get_cas(it);
uint64_t oldest_cas = settings.oldest_cas;
Expand Down Expand Up @@ -712,7 +712,7 @@ item *do_item_get(const char *key, const size_t nkey, const uint32_t hv) {
}

if (it != NULL) {
if (is_flushed(it)) {
if (item_is_flushed(it)) {
do_item_unlink(it, hv);
do_item_remove(it);
it = NULL;
Expand Down Expand Up @@ -803,7 +803,7 @@ static int lru_pull_tail(const int orig_id, const int cur_lru,

/* Expired or flushed */
if ((search->exptime != 0 && search->exptime < current_time)
|| is_flushed(search)) {
|| item_is_flushed(search)) {
itemstats[id].reclaimed++;
if ((search->it_flags & ITEM_FETCHED) == 0) {
itemstats[id].expired_unfetched++;
Expand Down Expand Up @@ -1199,7 +1199,7 @@ static void item_crawler_evaluate(item *search, uint32_t hv, int i) {
crawlerstats_t *s = &crawlerstats[slab_id];
itemstats[i].crawler_items_checked++;
if ((search->exptime != 0 && search->exptime < current_time)
|| is_flushed(search)) {
|| item_is_flushed(search)) {
itemstats[i].crawler_reclaimed++;
s->reclaimed++;

Expand Down
2 changes: 2 additions & 0 deletions items.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ void do_item_update(item *it); /** update LRU time to current and reposition *
void do_item_update_nolock(item *it);
int do_item_replace(item *it, item *new_it, const uint32_t hv);

int item_is_flushed(item *it);

/*@null@*/
char *item_cachedump(const unsigned int slabs_clsid, const unsigned int limit, unsigned int *bytes);
void item_stats(ADD_STAT add_stats, void *c);
Expand Down
3 changes: 3 additions & 0 deletions memcached.c
Original file line number Diff line number Diff line change
Expand Up @@ -2631,6 +2631,9 @@ static void server_stats(ADD_STAT add_stats, conn *c) {
APPEND_STAT("hash_bytes", "%llu", (unsigned long long)stats.hash_bytes);
APPEND_STAT("hash_is_expanding", "%u", stats.hash_is_expanding);
if (settings.slab_reassign) {
APPEND_STAT("slab_reassign_rescues", "%llu", stats.slab_reassign_rescues);
APPEND_STAT("slab_reassign_evictions", "%llu", stats.slab_reassign_evictions);
APPEND_STAT("slab_reassign_busy_items", "%llu", stats.slab_reassign_busy_items);
APPEND_STAT("slab_reassign_running", "%u", stats.slab_reassign_running);
APPEND_STAT("slabs_moved", "%llu", stats.slabs_moved);
}
Expand Down
3 changes: 3 additions & 0 deletions memcached.h
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,9 @@ struct stats {
uint64_t evicted_unfetched; /* items evicted but never touched */
bool slab_reassign_running; /* slab reassign in progress */
uint64_t slabs_moved; /* times slabs were moved around */
uint64_t slab_reassign_rescues; /* items rescued during slab move */
uint64_t slab_reassign_evictions; /* valid items lost during slab move */
uint64_t slab_reassign_busy_items; /* valid temporarily unmovable */
uint64_t lru_crawler_starts; /* Number of item crawlers kicked off */
bool lru_crawler_running; /* crawl in progress */
uint64_t lru_maintainer_juggles; /* number of LRU bg pokes */
Expand Down
64 changes: 62 additions & 2 deletions slabs.c
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,9 @@ static void *do_slabs_alloc(const size_t size, unsigned int id, unsigned int *to
p = &slabclass[id];
assert(p->sl_curr == 0 || ((item *)p->slots)->slabs_clsid == 0);

*total_chunks = p->slabs * p->perslab;
if (total_chunks != NULL) {
*total_chunks = p->slabs * p->perslab;
}
/* fail unless we have space at the end of a recently allocated page,
we have something on our freelist, or we could allocate a new page */
if (! (p->sl_curr != 0 || do_slabs_newslab(id) != 0)) {
Expand Down Expand Up @@ -606,6 +608,9 @@ static int slab_rebalance_move(void) {
}
}

int save_item = 0;
item *new_it = NULL;
size_t ntotal = 0;
switch (status) {
case MOVE_FROM_LRU:
/* Lock order is LRU locks -> slabs_lock. unlink uses LRU lock.
Expand All @@ -614,17 +619,72 @@ static int slab_rebalance_move(void) {
* (2) + the item is locked. Drop slabs lock, drop item to
* refcount 1 (just our own, then fall through and wipe it
*/
/* Check if expired or flushed */
ntotal = ITEM_ntotal(it);
/* REQUIRES slabs_lock: CHECK FOR cls->sl_curr > 0 */
if ((it->exptime != 0 && it->exptime < current_time)
|| item_is_flushed(it)) {
/* TODO: maybe we only want to save if item is in HOT or
* WARM LRU?
*/
save_item = 0;
} else if (s_cls->sl_curr < 1) {
save_item = 0;
STATS_LOCK();
stats.slab_reassign_evictions++;
STATS_UNLOCK();
} else {
save_item = 1;
/* BIT OF A HACK: if sl_curr is > 0 alloc won't try to
* pull from global pool to satisfy the request.
* FIXME: pile on more flags?
*/
new_it = do_slabs_alloc(ntotal, slab_rebal.s_clsid, NULL);
/* check that memory isn't within the range to clear */
if ((void *)new_it >= slab_rebal.slab_start
&& (void *)new_it < slab_rebal.slab_end) {
/* Pulled something we intend to free. Put it back
* and use the main loop to kill it.
*/
do_slabs_free(new_it, ntotal, slab_rebal.s_clsid);
save_item = 0;
STATS_LOCK();
stats.slab_reassign_evictions++;
STATS_UNLOCK();
}
}
pthread_mutex_unlock(&slabs_lock);
do_item_unlink(it, hv);
if (save_item) {
/* if free memory, memcpy. clear prev/next/h_bucket */
memcpy(new_it, it, ntotal);
new_it->prev = 0;
new_it->next = 0;
new_it->h_next = 0;
/* These are definitely required. else fails assert */
new_it->it_flags &= ~ITEM_LINKED;
new_it->refcount = 0;
do_item_replace(it, new_it, hv);
STATS_LOCK();
stats.slab_reassign_rescues++;
STATS_UNLOCK();
} else {
do_item_unlink(it, hv);
}
item_trylock_unlock(hold_lock);
pthread_mutex_lock(&slabs_lock);
if (save_item == 0) {
s_cls->requested -= ntotal;
}
case MOVE_FROM_SLAB:
it->refcount = 0;
it->it_flags = 0;
it->slabs_clsid = 255;
break;
case MOVE_BUSY:
case MOVE_LOCKED:
STATS_LOCK();
stats.slab_reassign_busy_items++;
STATS_UNLOCK();
slab_rebal.busy_items++;
was_busy++;
break;
Expand Down
9 changes: 6 additions & 3 deletions t/slabs-reassign2.t
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

use strict;
use warnings;
use Test::More tests => 3;
use Test::More tests => 5;
use FindBin qw($Bin);
use lib "$Bin/lib";
use MemcachedTest;
Expand Down Expand Up @@ -42,7 +42,7 @@ for (1 .. $keycount) {
} else {
$body .= scalar(<$sock>) . scalar(<$sock>);
if ($body ne $expected) {
print STDERR "Something terrible has happened: $body\n";
print STDERR "Something terrible has happened: $expected\nBODY:\n$body\nDONETEST\n";
} else {
$hits++;
}
Expand All @@ -52,10 +52,13 @@ for (1 .. $keycount) {

{
my $stats = mem_stats($sock);
cmp_ok($stats->{evictions}, '<', 1000, 'evictions were less than 1000');
cmp_ok($stats->{evictions}, '<', 2000, 'evictions were less than 2000');
# for ('evictions', 'reclaimed', 'curr_items', 'cmd_set', 'bytes') {
# print STDERR "$_: ", $stats->{$_}, "\n";
# }
}

cmp_ok($hits, '>', 4000, 'were able to fetch back 2/3rds of 8k keys');
my $stats_done = mem_stats($sock);
cmp_ok($stats_done->{slab_reassign_rescues}, '>', 0, 'some reassign rescues happened');
cmp_ok($stats_done->{slab_reassign_evictions}, '>', 0, 'some reassing evictions happened');

0 comments on commit 004e221

Please sign in to comment.