Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Temporal client accounting #1178

Merged
merged 6 commits into from
Feb 28, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions etc/tempesta_fw.conf
Original file line number Diff line number Diff line change
Expand Up @@ -1129,3 +1129,23 @@
# Default:
# Health monitor is disabled.
#

# TAG: client_db
#
# Path to a client database file used as a storage for clients info.
# The same as cache_db.
#
# Default:
# client_db /opt/tempesta/db/client.tdb;
#

# TAG: client_tbl_size
#
# Size of client drop table.
#
# Syntax:
# client_tbl_size SIZE
#
# Default:
# client_tbl_size 16777216; # 16MB
#
32 changes: 32 additions & 0 deletions lib/common.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/**
* Tempesta kernel library
*
* Copyright (C) 2019 Tempesta Technologies, Inc.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License,
* or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59
* Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef __LIB_COMMON_H__
#define __LIB_COMMON_H__

/* Get current timestamp in secs. */
static inline time_t
tfw_current_timestamp(void)
{
struct timespec ts;
getnstimeofday(&ts);
return ts.tv_sec;
}

#endif /* __LIB_COMMON_H__ */
122 changes: 99 additions & 23 deletions tempesta_db/core/htrie.c
Original file line number Diff line number Diff line change
Expand Up @@ -806,6 +806,28 @@ tdb_htrie_lookup(TdbHdr *dbh, unsigned long key)
return TDB_PTR(dbh, o);
}

#define TDB_HTRIE_FOREACH_REC(dbh, b_tmp, b, r, body) \
read_lock_bh(&(*b)->lock); \
do { \
r = TDB_HTRIE_BCKT_1ST_REC(*b); \
do { \
size_t rlen = sizeof(*r) + \
TDB_HTRIE_RBODYLEN(dbh, r); \
rlen = TDB_HTRIE_RALIGN(rlen); \
if ((char *)r + rlen - (char *)*b \
> TDB_HTRIE_MINDREC \
&& r != TDB_HTRIE_BCKT_1ST_REC(*b)) \
break; \
body; \
r = (TdbRec *)((char *)r + rlen); \
} while ((char *)r + sizeof(*r) - (char *)*b \
<= TDB_HTRIE_MINDREC); \
b_tmp = TDB_HTRIE_BUCKET_NEXT(dbh, *b); \
if (b_tmp) \
read_lock_bh(&b_tmp->lock); \
read_unlock_bh(&(*b)->lock); \
*b = b_tmp; \
} while (*b)
/**
* Iterate over all records in collision chain with locked buckets.
* Buckets are inspected according to following rules:
Expand All @@ -821,29 +843,11 @@ tdb_htrie_bscan_for_rec(TdbHdr *dbh, TdbBucket **b, unsigned long key)
TdbBucket *b_tmp;
TdbRec *r;

read_lock_bh(&(*b)->lock);

do {
r = TDB_HTRIE_BCKT_1ST_REC(*b);
do {
size_t rlen = sizeof(*r) + TDB_HTRIE_RBODYLEN(dbh, r);
rlen = TDB_HTRIE_RALIGN(rlen);
if ((char *)r + rlen - (char *)*b > TDB_HTRIE_MINDREC
&& r != TDB_HTRIE_BCKT_1ST_REC(*b))
break;
if (tdb_live_rec(dbh, r) && r->key == key)
/* Unlock the bucket by tdb_rec_put(). */
return r;
r = (TdbRec *)((char *)r + rlen);
} while ((char *)r + sizeof(*r) - (char *)*b
<= TDB_HTRIE_MINDREC);

b_tmp = TDB_HTRIE_BUCKET_NEXT(dbh, *b);
if (b_tmp)
read_lock_bh(&b_tmp->lock);
read_unlock_bh(&(*b)->lock);
*b = b_tmp;
} while (*b);
TDB_HTRIE_FOREACH_REC(dbh, b_tmp, b, r, {
if (tdb_live_rec(dbh, r) && r->key == key)
/* Unlock the bucket by tdb_rec_put(). */
return r;
});

return NULL;
}
Expand Down Expand Up @@ -931,3 +935,75 @@ tdb_htrie_exit(TdbHdr *dbh)
{
free_percpu(dbh->pcpu);
}

static int
tdb_htrie_bucket_walk(TdbHdr *dbh, TdbBucket *b, int (*fn)(void *))
{
TdbBucket *b_tmp;
TdbRec *r;

TDB_HTRIE_FOREACH_REC(dbh, b_tmp, &b, r, {
if (tdb_live_rec(dbh, r)) {
int res = fn(r->data);
if (unlikely(res)) {
read_unlock_bh(&b->lock);
return res;
}
}
});

return 0;
}

static int
tdb_htrie_node_visit(TdbHdr *dbh, TdbHtrieNode *node, int (*fn)(void *))
{
int bits;
int res;

for (bits = 0; bits < TDB_HTRIE_FANOUT; ++bits) {
unsigned long o;

BUG_ON(TDB_HTRIE_RESOLVED(bits));

o = node->shifts[bits];

if (likely(!o))
continue;

BUG_ON(TDB_DI2O(o & ~TDB_HTRIE_DBIT) < TDB_HDR_SZ(dbh) + sizeof(TdbExt)
|| TDB_DI2O(o & ~TDB_HTRIE_DBIT) > dbh->dbsz);

if (o & TDB_HTRIE_DBIT) {
TdbBucket *b;

/* We're at a data pointer - resolve it. */
o ^= TDB_HTRIE_DBIT;
BUG_ON(!o);

b = (TdbBucket *)TDB_PTR(dbh, TDB_DI2O(o));
res = tdb_htrie_bucket_walk(dbh, b, fn);
if (unlikely(res))
return res;
} else {
/*
* The recursion depth being hard-limited.
* The function has the deepest nesting 16.
*/
res = tdb_htrie_node_visit(dbh, TDB_PTR(dbh,
TDB_II2O(o)), fn);
if (unlikely(res))
return res;
}
}

return 0;
}

int
tdb_htrie_walk(TdbHdr *dbh, int (*fn)(void *))
{
TdbHtrieNode *node = TDB_HTRIE_ROOT(dbh);

return tdb_htrie_node_visit(dbh, node, fn);
}
1 change: 1 addition & 0 deletions tempesta_db/core/htrie.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,5 +140,6 @@ TdbRec *tdb_htrie_next_rec(TdbHdr *dbh, TdbRec *r, TdbBucket **b,
unsigned long key);
TdbHdr *tdb_htrie_init(void *p, size_t db_size, unsigned int rec_len);
void tdb_htrie_exit(TdbHdr *dbh);
int tdb_htrie_walk(TdbHdr *dbh, int (*fn)(void *));

#endif /* __HTRIE_H__ */
63 changes: 62 additions & 1 deletion tempesta_db/core/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,16 @@
#include "table.h"
#include "tdb_if.h"

#define TDB_VERSION "0.1.16"
#define TDB_VERSION "0.1.17"

MODULE_AUTHOR("Tempesta Technologies");
MODULE_DESCRIPTION("Tempesta DB");
MODULE_VERSION(TDB_VERSION);
MODULE_LICENSE("GPL");

/* Lock for atomic execution of lookup and create a record TDB */
static DEFINE_SPINLOCK(get_alloc_lock);

/**
* Create TDB entry and copy @len contiguous bytes from @data to the entry.
*/
Expand All @@ -52,6 +55,7 @@ EXPORT_SYMBOL(tdb_entry_create);

/**
* Create TDB entry to store @len bytes.
* TODO #515 function must holds a lock upon return.
*/
TdbRec *
tdb_entry_alloc(TDB *db, unsigned long key, size_t *len)
Expand Down Expand Up @@ -222,6 +226,63 @@ tdb_get_db(const char *path, int node)
return tdb_get(db);
}

/**
* Lookup and get a record if the record is found or create TDB entry to store
* @len bytes. If record exist then since we don't copy returned records,
* we have to lock the memory location where the record is placed and
* the user must call tdb_rec_put() when finish with the record.
*
* The caller must not call sleeping functions during work with the record.
* Typically there is only one large record per bucket, so the bucket lock
* is exactly the same as to lock the record. While there could be many
* small records in a bucket, so the caller should not perform long jobs
* with small records.
*
* @return pointer to record with acquired bucket lock if the record is
* found and create TDB entry without acquired locks otherwise.
*
* TODO #515 rework the function in lock-free way.
* TODO #515 TDB must be extended to support small records with constant memory
* address.
*/
TdbRec *
tdb_rec_get_alloc(TDB *db, unsigned long key, size_t *len,
bool (*predicate)(TdbRec *, void (*)(void *), void *),
void (*init)(TdbRec *, void (*)(void *), void *),
void (*cb)(void *), void *data, bool *is_new)
{
TdbIter iter;
TdbRec *r;

spin_lock(&get_alloc_lock);

*is_new = false;
iter = tdb_rec_get(db, key);
while (!TDB_ITER_BAD(iter)) {
if ((*predicate)(iter.rec, cb, data)) {
spin_unlock(&get_alloc_lock);
return iter.rec;
}
tdb_rec_next(db, &iter);
}

*is_new = true;
r = tdb_entry_alloc(db, key, len);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function interface is inconsistent now: we may exit with got and not records. tdb_rec_put() just releases read spin lock for a bucket handling the record, so the bucket can not be split and the record won't be replaced. I reckon the function must exit with acquired bucket read lock.

Moreover, I explored __cache_add_node() and it does exactly the same: a memory area for the record is inserted into HTrie, so it becomes accessible for parallel lookups, but we go to write the record content (even with prossible record extensions!) without any locks. This may lead to retrieving partially written cache records as well as to crashes if the record is in extension progress.

Please review this behavior and if I'm right, then please add a TODO comment to the function and a requirement to fix this to #515.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see that tdb_rec_get() returns with a lock to the bucket held. But couldn't find in the code if tdb_entry_alloc() holds a lock upon return. I believe it is not. So if we return from the function if (*predicate)() produces true, lock is held. But if we create a new record, it is not.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment seems addressed in wrong way

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Must check that *len is equal (or larger than) *len before. Function tdb_entry_alloc() can allocate less than *len, and then returns actual number of allocated storage in *len.

init(r, cb, data);

spin_unlock(&get_alloc_lock);

return r;
}
EXPORT_SYMBOL(tdb_rec_get_alloc);

int
tdb_entry_walk(TDB *db, int (*fn)(void *))
{
return tdb_htrie_walk(db->hdr, fn);
}
EXPORT_SYMBOL(tdb_entry_walk);

/**
* Open database file and @return its descriptor.
* If the database is already opened, then returns the handler.
Expand Down
6 changes: 6 additions & 0 deletions tempesta_db/core/tdb.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,12 @@ TdbIter tdb_rec_get(TDB *db, unsigned long key);
void tdb_rec_next(TDB *db, TdbIter *iter);
void tdb_rec_put(void *rec);
int tdb_info(char *buf, size_t len);
TdbRec * tdb_rec_get_alloc(TDB *db, unsigned long key, size_t *len,
bool (*predicate)(TdbRec *, void (*)(void *), void *),
void (*init)(TdbRec *, void (*)(void *), void *),
void (*cb)(void *), void *data, bool *is_new);
int tdb_entry_walk(TDB *db, int (*fn)(void *));
void tdb_rec_get_lock(void *rec);

/* Open/close database handler. */
TDB *tdb_open(const char *path, size_t fsize, unsigned int rec_size, int node);
Expand Down
1 change: 1 addition & 0 deletions tempesta_fw/cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include "procfs.h"
#include "sync_socket.h"
#include "work_queue.h"
#include "lib/common.h"

#if MAX_NUMNODES > ((1 << 16) - 1)
#warning "Please set CONFIG_NODES_SHIFT to less than 16"
Expand Down
Loading