Skip to content

Commit

Permalink
#173: try to be a good kernel citizen and clone skbs if necessary
Browse files Browse the repository at this point in the history
  • Loading branch information
krizhanovsky committed Oct 11, 2015
1 parent e4a41eb commit 2ff0ce2
Show file tree
Hide file tree
Showing 9 changed files with 60 additions and 112 deletions.
58 changes: 4 additions & 54 deletions linux-3.10.10.patch
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ index 99c9f0c..a6fd726 100644
struct kvec;

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index dec1748..69ae157 100644
index dec1748..f620738 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -418,8 +418,12 @@ struct sk_buff {
Expand All @@ -383,15 +383,7 @@ index dec1748..69ae157 100644

unsigned long _skb_refdst;
#ifdef CONFIG_XFRM
@@ -627,6 +631,7 @@ static inline struct rtable *skb_rtable(const struct sk_buff *skb)
}

extern void kfree_skb(struct sk_buff *skb);
+extern void kfree_skb_untraced(struct sk_buff *skb);
extern void kfree_skb_list(struct sk_buff *segs);
extern void skb_tx_error(struct sk_buff *skb);
extern void consume_skb(struct sk_buff *skb);
@@ -2933,5 +2938,38 @@ static inline bool skb_head_is_locked(const struct sk_buff *skb)
@@ -2933,5 +2937,38 @@ static inline bool skb_head_is_locked(const struct sk_buff *skb)
{
return !skb->head_frag || skb_cloned(skb);
}
Expand Down Expand Up @@ -484,19 +476,6 @@ index 0000000..f5f3f38
+
+#endif /* __TEMPESTA_H__ */
+
diff --git a/include/net/sock.h b/include/net/sock.h
index 66772cf..436c4d8 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1417,7 +1417,7 @@ static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
sk->sk_wmem_queued -= skb->truesize;
sk_mem_uncharge(sk, skb->truesize);
- __kfree_skb(skb);
+ kfree_skb_untraced(skb);
}

/* Used by processes to "lock" a socket state, so that
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5bba80f..407a603 100644
--- a/include/net/tcp.h
Expand Down Expand Up @@ -883,7 +862,7 @@ index 4425148..aad84f4 100644
}
+EXPORT_SYMBOL(reqsk_fastopen_remove);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 1c1738c..5864702 100644
index 1c1738c..ecc80f9 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -619,6 +619,9 @@ static void skb_release_all(struct sk_buff *skb)
Expand All @@ -896,36 +875,7 @@ index 1c1738c..5864702 100644
skb_release_all(skb);
kfree_skbmem(skb);
}
@@ -696,6 +699,28 @@ void consume_skb(struct sk_buff *skb)
}
EXPORT_SYMBOL(consume_skb);

+/**
+ * kfree_skb_untraced - free an skbuff
+ * @skb: buffer to free
+ *
+ * Drop a reference to the buffer and free it if the usage count has
+ * hit zero. Functions identically to kfree_skb() or consume_skb(),
+ * but kfree_skb_untraced() doesn't make any assumptions on semantics,
+ * i.e. whether the frame is being dropped after a failure or consumed.
+ * Consequently, it doesn't contain any instrumentation for tracing.
+ */
+void kfree_skb_untraced(struct sk_buff *skb)
+{
+ if (unlikely(!skb))
+ return;
+ if (likely(atomic_read(&skb->users) == 1))
+ smp_rmb();
+ else if (likely(!atomic_dec_and_test(&skb->users)))
+ return;
+ __kfree_skb(skb);
+}
+EXPORT_SYMBOL(kfree_skb_untraced);
+
static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
{
new->tstamp = old->tstamp;
@@ -715,7 +740,9 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
@@ -715,7 +718,9 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
#ifdef CONFIG_XFRM
new->sp = secpath_get(old->sp);
#endif
Expand Down
12 changes: 11 additions & 1 deletion tempesta_fw/cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ tfw_cache_copy_str(char **p, TdbVRec **trec, TfwStr *src, size_t tot_len)
int room = (char *)(*trec + 1) + (*trec)->len - *p;
BUG_ON(room < 0);

TFW_DBG3("copy [%.*s](%u) to rec=%p(len=%u), p=%p"
TFW_DBG3("Cache: copy [%.*s](%u) to rec=%p(len=%u), p=%p"
" tot_len=%lu room=%d copied=%ld\n",
min(10, (int)src->len), (char *)src->ptr, src->len,
*trec, (*trec)->len, *p, tot_len, room, copied);
Expand Down Expand Up @@ -417,6 +417,7 @@ __cache_req_process_node(TfwHttpReq *req, unsigned long key,

/* TODO process collisions. */

TFW_DBG("Cache: service request w/ key %lx\n", key);
if (!ce->resp)
if (tfw_cache_build_resp(ce))
/*
Expand All @@ -428,6 +429,15 @@ __cache_req_process_node(TfwHttpReq *req, unsigned long key,
goto finish_req_processing;

/* We have already assembled response. */
/*
* FIXME #173 -> #122
* The response must be adjusted independently for each client
* and send to different sockets, so we can't use the same skb.
* We must generate new skb with cached data as page fragments
* and copy headers. The headers must be adjusted after the function.
* Probably, we shouldn't copy to TDB headers which must be generated
* from scratch (e.g. Server or Connection).
*/
resp = ce->resp;

finish_req_processing:
Expand Down
6 changes: 4 additions & 2 deletions tempesta_fw/connection.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,13 @@ tfw_connection_destruct(TfwConnection *conn)
* for the life of @conn instance. The socket itself may have been
* closed, but not deleted. ss_send() makes sure that data is sent
* only on an active socket.
*
* @unref_data is true if we won't use @msg any more.
*/
void
tfw_connection_send(TfwConnection *conn, TfwMsg *msg)
tfw_connection_send(TfwConnection *conn, TfwMsg *msg, bool unref_data)
{
ss_send(conn->sk, &msg->skb_list);
ss_send(conn->sk, &msg->skb_list, unref_data);
}

/*
Expand Down
2 changes: 1 addition & 1 deletion tempesta_fw/connection.h
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ tfw_connection_validate_cleanup(TfwConnection *conn)

void tfw_connection_hooks_register(TfwConnHooks *hooks, int type);
void tfw_connection_hooks_unregister(int type);
void tfw_connection_send(TfwConnection *conn, TfwMsg *msg);
void tfw_connection_send(TfwConnection *conn, TfwMsg *msg, bool unref_data);

/* Generic helpers, used for both client and server connections. */
void tfw_connection_init(TfwConnection *conn);
Expand Down
10 changes: 6 additions & 4 deletions tempesta_fw/http.c
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ tfw_http_send_resp(TfwHttpMsg *hm, const TfwStr *msg, const TfwStr *date)
tfw_http_prep_date(date->ptr);
tfw_http_msg_write(&it, resp, msg);

tfw_connection_send(hm->conn, (TfwMsg *)resp);
tfw_connection_send(hm->conn, (TfwMsg *)resp, true);
tfw_http_msg_free(resp);

return 0;
Expand Down Expand Up @@ -481,9 +481,11 @@ tfw_http_req_cache_cb(TfwHttpReq *req, TfwHttpResp *resp, void *data)
if (resp) {
/*
* We have prepared response, send it as is.
* The response is pass through ot was generated from
* the cache, so unrefer all its data.
* TODO should we adjust it somehow?
*/
tfw_connection_send(req->conn, (TfwMsg *)resp);
tfw_connection_send(req->conn, (TfwMsg *)resp, true);
return;
} else {
/*
Expand Down Expand Up @@ -513,7 +515,7 @@ tfw_http_req_cache_cb(TfwHttpReq *req, TfwHttpResp *resp, void *data)
spin_unlock(&conn->msg_qlock);

/* Send request to the server. */
tfw_connection_send(conn, (TfwMsg *)req);
tfw_connection_send(conn, (TfwMsg *)req, false);
goto conn_put;
}
BUG(); /* NOTREACHED */
Expand Down Expand Up @@ -766,7 +768,7 @@ tfw_http_resp_process(TfwConnection *conn, struct sk_buff *skb,
* The cache frees the response and the request.
* conn->msg will get NULLed in the process.
*/
tfw_connection_send(hmreq->conn, (TfwMsg *)hmresp);
tfw_connection_send(hmreq->conn, (TfwMsg *)hmresp, false);
tfw_cache_add((TfwHttpResp *)hmresp, (TfwHttpReq *)hmreq);

return TFW_PASS;
Expand Down
2 changes: 1 addition & 1 deletion tempesta_fw/http_sticky.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ tfw_http_sticky_send_302(TfwHttpMsg *hm)
if ((resp = tfw_http_prep_302(hm, &cookie)) == NULL) {
return -1;
}
tfw_connection_send(conn, (TfwMsg *)resp);
tfw_connection_send(conn, (TfwMsg *)resp, true);
tfw_http_msg_free(resp);

return 0;
Expand Down
71 changes: 27 additions & 44 deletions tempesta_fw/sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ ss_skb_route(struct sk_buff *skb, struct tcp_sock *tp)
* TODO use MSG_MORE untill we reach end of message.
*/
void
ss_send(struct sock *sk, const SsSkbList *skb_list)
ss_send(struct sock *sk, SsSkbList *skb_list, bool pass_skb)
{
struct sk_buff *skb, *iskb;
struct tcp_sock *tp;
Expand All @@ -156,47 +156,27 @@ ss_send(struct sock *sk, const SsSkbList *skb_list)
for (iskb = ss_skb_peek(skb_list), skb = iskb;
iskb; iskb = ss_skb_next(skb_list, iskb), skb = iskb)
{
#if 0 /* Original code. */
/*
* When SKBs are removed from socket's receive queue and
* passed to Tempesta, control over these SKBs is passed
* from kernel to Tempesta as well. Tempesta becomes the
* sole owner of these SKBs. When these SKBs are sent out
* to a client or a backend by Tempesta, the kernel becomes
* an extra owner of the SKBs in addition to Tempesta.
* To account for that it's necessary to increment SKB's
* count of users so that SKBs are not freed from under
* Tempesta or from under the kernel.
* Remvoe the skb from Tempesta lists if we won't use it
* or clone it if it's going to be used by Tempesta during
* and after the transmission.
*/
skb_get(skb);
#else /* Workaround. */
/*
* An SKB may be split in the TCP stack. When that happens,
* part of original SKB data is moved to a new SKB that is
* unknown to Tempesta. Tempesta has no control over that
* new SKB and the original data that was moved to it. The
* new SKB is freed after it is successfully transmitted,
* and original data that was moved to the new SKB is freed
* as well. Tempesta keeps pointers into original SKB data
* that point to parts of HTTP message in the SKB. Those
* pointers are required for caching of an HTTP message.
* After the new SKB with part of original data is freed,
* the pointers become invalid.
* Please see issues #173 and #122 for more information.
*
* Until this issue is properly solved, make a copy of each
* SKB as a workaround, and pass the copy to Linux TCP stack.
* That way all original data stay under complete control of
* Tempesta. As a copy is passed to TCP stack, there's no
* need to make these SKBs shared for the purpose of keeping
* control over them.
*/
skb = pskb_copy(skb, GFP_ATOMIC);
if (!skb) {
SS_ERR("Unable to make a copy of original SKB.\n");
continue;
if (pass_skb) {
ss_skb_unlink(skb_list, skb);
} else {
skb = skb_clone(skb, GFP_ATOMIC);
if (!skb) {
SS_ERR("Unable to clone an egress SKB.\n");
/*
* Send what we collected so far.
* The peer should react somehow and we can
* go further instead of hang on the socket.
* FIXME seems dirty...
*/
break;
}
}
#endif

skb->ip_summed = CHECKSUM_PARTIAL;
skb_shinfo(skb)->gso_segs = 0;

Expand Down Expand Up @@ -494,11 +474,14 @@ ss_tcp_process_data(struct sock *sk)
BUG();
/*
* Cloned SKBs come here if a client or a back end are
* on the same host as Tempesta. That's the way it works
* through the loopback interface. That's excessive when
* Tempesta is in the middle, and should be eliminated.
* In the meantime unclone these SKBs as Tempesta needs
* to be able to modify SKB's data.
* on the same host as Tempesta. Cloning is happen in
* tcp_transmit_skb() as it is for all egress packets,
* but packets on loopback go to us as is, i.e. cloned.
*
* Tempesta adjusts skb pointers, but leave original data
* untouched (this is also required to keep pointers of
* parsed out HTTP data structures unchnaged).
* So skb uncloning is enough here.
*/
if (skb_cloned(skb))
pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
Expand Down
2 changes: 1 addition & 1 deletion tempesta_fw/sync_socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ void ss_proto_init(SsProto *proto, const SsHooks *hooks, int type);
void ss_proto_inherit(const SsProto *parent, SsProto *child, int child_type);
void ss_set_callbacks(struct sock *sk);
void ss_set_listen(struct sock *sk);
void ss_send(struct sock *sk, const SsSkbList *skb_list);
void ss_send(struct sock *sk, SsSkbList *skb_list, bool pass_skb);
void ss_close(struct sock *sk);
int ss_sock_create(int family, int type, int protocol, struct sock **res);
void ss_release(struct sock *sk);
Expand Down
9 changes: 5 additions & 4 deletions tempesta_fw/t/unit/test_http_sticky.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ static struct {

/* custom version for testing purposes */
void
tfw_connection_send(TfwConnection *conn, TfwMsg *msg)
tfw_connection_send(TfwConnection *conn, TfwMsg *msg,
bool unused __attribute__((unused)))
{
struct sk_buff *skb;
unsigned int data_off = 0;
Expand Down Expand Up @@ -316,7 +317,7 @@ TEST(http_sticky, req_no_cookie)

/* since response was modified, we need to parse it again */
EXPECT_EQ(http_parse_resp_helper(), 0);
tfw_connection_send(&mock.connection, &mock.hmresp->msg);
tfw_connection_send(&mock.connection, &mock.hmresp->msg, false);

EXPECT_TRUE(mock.tfw_connection_send_was_called);
EXPECT_TRUE(mock.seen_set_cookie_header);
Expand All @@ -343,7 +344,7 @@ TEST(http_sticky, req_have_cookie)

/* since response could be modified, we need to parse it again */
EXPECT_EQ(http_parse_resp_helper(), 0);
tfw_connection_send(&mock.connection, &mock.hmresp->msg);
tfw_connection_send(&mock.connection, &mock.hmresp->msg, false);

/* no Set-Cookie headers are expected */
EXPECT_FALSE(mock.seen_set_cookie_header);
Expand Down Expand Up @@ -387,7 +388,7 @@ TEST(http_sticky, req_have_cookie_enforce)

/* since response could be modified, we need to parse it again */
EXPECT_EQ(http_parse_resp_helper(), 0);
tfw_connection_send(&mock.connection, &mock.hmresp->msg);
tfw_connection_send(&mock.connection, &mock.hmresp->msg, false);

/* no Set-Cookie headers are expected */
EXPECT_FALSE(mock.seen_set_cookie_header);
Expand Down

0 comments on commit 2ff0ce2

Please sign in to comment.