From 193d3627a5e9381d5fe7cb6348d00a3fbee00bab Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Tue, 20 Sep 2016 14:21:43 +0300
Subject: [PATCH 01/65] Send responses in correct order - first working
 version. (#419)

---
 tempesta_fw/connection.h           |   8 +-
 tempesta_fw/http.c                 | 443 ++++++++++++++++++++++-------
 tempesta_fw/http.h                 |  16 ++
 tempesta_fw/http_msg.c             |  18 +-
 tempesta_fw/http_sess.c            |  11 +-
 tempesta_fw/msg.h                  |  10 +-
 tempesta_fw/sched/tfw_sched_hash.c |   4 +-
 tempesta_fw/sched/tfw_sched_rr.c   |   2 +-
 tempesta_fw/sock.c                 |   2 +-
 tempesta_fw/sock_srv.c             |   2 +-
 10 files changed, 386 insertions(+), 130 deletions(-)

diff --git a/tempesta_fw/connection.h b/tempesta_fw/connection.h
index 45a2940a0..9093e6046 100644
--- a/tempesta_fw/connection.h
+++ b/tempesta_fw/connection.h
@@ -91,6 +91,7 @@ typedef struct {
 	struct list_head	msg_queue;
 	spinlock_t		msg_qlock;
 	atomic_t		refcnt;
+	unsigned long		flags;
 	struct timer_list	timer;
 	TfwMsg			*msg;
 	TfwPeer 		*peer;
@@ -102,6 +103,9 @@ typedef struct {
 
 #define TFW_CONN_TYPE(c)	((c)->proto.type)
 
+/* Connection flags. */
+#define TFW_CONN_FWD_HOLD	0x0001		/* Hold sending messages */
+
 /**
  * TLS hardened connection.
  */
@@ -153,7 +157,7 @@ extern TfwConnHooks *conn_hooks[TFW_CONN_MAX_PROTOS];
 	tfw_conn_hook_call(TFW_CONN_TYPE2IDX(TFW_CONN_TYPE(c)), c, f)
 
 static inline bool
-tfw_connection_nfo(TfwConnection *conn)
+tfw_connection_live(TfwConnection *conn)
 {
 	return atomic_read(&conn->refcnt) > 0;
 }
@@ -169,7 +173,7 @@ tfw_connection_get(TfwConnection *conn)
  * process, i.e. @refcnt > 0.
  */
 static inline bool
-tfw_connection_get_if_nfo(TfwConnection *conn)
+tfw_connection_get_if_live(TfwConnection *conn)
 {
 	int old, rc = atomic_read(&conn->refcnt);
 
diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 9d3d6340f..890282d01 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -184,25 +184,6 @@ tfw_http_prep_302(TfwHttpMsg *resp, TfwHttpReq *req, TfwStr *cookie)
 	return TFW_PASS;
 }
 
-static inline void
-__init_req_ss_flags(TfwHttpReq *req)
-{
-	/*
-	 * We need skb data only for calculating cache key by the request
-	 * fields. In all other cases we can just pass skb data to network
-	 * layer.
-	 */
-	if (tfw_cache_msg_cacheable(req))
-		((TfwMsg *)req)->ss_flags |= SS_F_KEEP_SKB;
-}
-
-static inline void
-__init_resp_ss_flags(TfwHttpResp *resp, const TfwHttpReq *req)
-{
-	if (req->flags & TFW_HTTP_CONN_CLOSE)
-		((TfwMsg *)resp)->ss_flags |= SS_F_CONN_CLOSE;
-}
-
 /*
  * Perform operations common to sending an error response to a client.
  * Set current date in the header of an HTTP error response, and set
@@ -215,7 +196,7 @@ tfw_http_send_resp(TfwHttpReq *req, TfwStr *msg, const TfwStr *date)
 {
 	int conn_flag = req->flags & __TFW_HTTP_CONN_MASK;
 	TfwStr *crlf = __TFW_STR_CH(msg, TFW_STR_CHUNKN(msg) - 1);
-	TfwHttpMsg resp;
+	TfwHttpMsg *hmresp;
 	TfwMsgIter it;
 
 	if (conn_flag) {
@@ -230,15 +211,16 @@ tfw_http_send_resp(TfwHttpReq *req, TfwStr *msg, const TfwStr *date)
 		msg->len += crlf->len - crlf_len;
 	}
 
-	if (!tfw_http_msg_create(&resp, &it, Conn_Srv, msg->len))
+	if (!(hmresp = tfw_http_msg_create(NULL, &it, Conn_Srv, msg->len)))
 		return -ENOMEM;
 
 	tfw_http_prep_date(date->ptr);
-	tfw_http_msg_write(&it, &resp, msg);
+	tfw_http_msg_write(&it, hmresp, msg);
 
-	__init_resp_ss_flags((TfwHttpResp *)&resp, req);
+	__init_resp_ss_flags((TfwHttpResp *)hmresp, req);
+	tfw_http_resp_fwd(req, (TfwHttpResp *)hmresp);
 
-	return tfw_cli_conn_send(req->conn, (TfwMsg *)&resp);
+	return 0;
 }
 
 #define S_200_PART_01	S_200 S_CRLF S_F_DATE
@@ -410,7 +392,7 @@ tfw_http_conn_msg_alloc(TfwConnection *conn)
 
 		spin_lock(&conn->msg_qlock);
 		req = (TfwHttpReq *)list_first_entry_or_null(&conn->msg_queue,
-							     TfwMsg, msg_list);
+							     TfwMsg, fwd_list);
 		spin_unlock(&conn->msg_qlock);
 		if (req && (req->method == TFW_HTTP_METH_HEAD))
 			hm->flags |= TFW_HTTP_VOID_BODY;
@@ -482,29 +464,63 @@ tfw_http_conn_init(TfwConnection *conn)
 /*
  * Connection with a peer is released.
  *
- * For server connections requests that were sent to that server are kept
- * in the queue until a paired response comes. That will never happen now.
- * For each request that has been unanswered send an error response, then
- * delete the request and drop the connection with the client if required.
+ * For server connections the requests that were sent to that server are
+ * kept in the queue until a paired response comes. That will never happen
+ * now, and requests will remain unanswered. For each request in the queue
+ * send an error response to the corresponding client connection. Both the
+ * request and the response will be freed when the response is sent out.
  *
  * Called when a connection is released. There are no users at that time,
  * so locks are not needed.
  */
 static void
-tfw_http_conn_release(TfwConnection *conn)
+tfw_http_conn_release(TfwConnection *srv_conn)
 {
-	TfwMsg *msg, *tmp;
-
-	list_for_each_entry_safe(msg, tmp, &conn->msg_queue, msg_list) {
-		BUG_ON(((TfwHttpMsg *)msg)->conn
-			&& (((TfwHttpMsg *)msg)->conn == conn));
-		list_del(&msg->msg_list);
-		tfw_http_send_502((TfwHttpReq *)msg,
-				  "peer connection released");
-		tfw_http_conn_msg_free((TfwHttpMsg *)msg);
+	TfwHttpReq *req, *tmp;
+	struct list_head *zap_queue = &srv_conn->msg_queue;
+
+	TFW_DBG3("%s: conn = %p\n", __func__, srv_conn);
+	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
+
+	list_for_each_entry_safe(req, tmp, zap_queue, msg.fwd_list) {
+		BUG_ON(req->conn && (req->conn == srv_conn));
+		list_del_init(&req->msg.fwd_list);
+		tfw_http_send_404(req);
 		TFW_INC_STAT_BH(clnt.msgs_otherr);
 	}
-	INIT_LIST_HEAD(&conn->msg_queue);
+	INIT_LIST_HEAD(&srv_conn->msg_queue);
+}
+
+/*
+ * Drop client connection's resources.
+ *
+ * Desintegrate the list, but do not free the requests. These requests
+ * have not been answered yet. They are held in the lists of respective
+ * server connections until paired responses come. If a response comes
+ * after the list is destroyed, then both the request and the response
+ * are dropped at the sight of an empty list. The requests from the
+ * dead client connection are then removed from that server connection.
+ *
+ * Locking is necessary as the list is constantly probed from server
+ * connection threads.
+ */
+static void
+tfw_http_conn_cli_drop(TfwConnection *cli_conn)
+{
+	TfwHttpMsg *hmreq, *tmp;
+	struct list_head *seq_queue = &cli_conn->msg_queue;
+
+	TFW_DBG3("%s: conn = %p\n", __func__, cli_conn);
+	BUG_ON(!(TFW_CONN_TYPE(cli_conn) & Conn_Clnt));
+
+	if (list_empty_careful(seq_queue))
+		return;
+
+	spin_lock(&cli_conn->msg_qlock);
+	list_for_each_entry_safe(hmreq, tmp, seq_queue, msg.seq_list) {
+		list_del_init(&hmreq->msg.seq_list);
+	}
+	spin_unlock(&cli_conn->msg_qlock);
 }
 
 /*
@@ -518,7 +534,9 @@ static void tfw_http_resp_terminate(TfwHttpMsg *hm);
 static void
 tfw_http_conn_drop(TfwConnection *conn)
 {
-	if (conn->msg && (TFW_CONN_TYPE(conn) & Conn_Srv)) {
+	if (TFW_CONN_TYPE(conn) & Conn_Clnt) {
+		tfw_http_conn_cli_drop(conn);
+	} else if (conn->msg) {
 		if (tfw_http_parse_terminate((TfwHttpMsg *)conn->msg))
 			tfw_http_resp_terminate((TfwHttpMsg *)conn->msg);
 	}
@@ -796,6 +814,223 @@ tfw_http_adjust_resp(TfwHttpResp *resp, TfwHttpReq *req)
 				     TFW_HTTP_HDR_SERVER, 0);
 }
 
+static inline bool
+tfw_http_req_is_nonidempotent(TfwHttpReq *req)
+{
+	return (req->flags & TFW_HTTP_NON_IDEMPOTENT);
+}
+
+/*
+ * Forward request @req to server connection @srv_conn.
+ */
+static void
+tfw_http_conn_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
+{
+	TFW_DBG2("%s: srv_conn=[%p], req=[%p]\n", __func__, srv_conn, req);
+	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
+
+	/*
+	 * A request is added to the server connection queue.
+	 * If the connection is not on hold, then the request
+	 * is forwarded to the server immediately. Otherwise,
+	 * it is forwarded when the hold is removed. A server
+	 * connection is put on hold when an non-idempotent
+	 * request is forwarded to the server.
+	 */
+	spin_lock(&srv_conn->msg_qlock);
+	list_add_tail(&req->msg.fwd_list, &srv_conn->msg_queue);
+	if (srv_conn->flags & TFW_CONN_FWD_HOLD) {
+		spin_unlock(&srv_conn->msg_qlock);
+		return;
+	}
+	if (tfw_connection_send(srv_conn, (TfwMsg *)req)) {
+		list_del_init(&req->msg.fwd_list);
+		spin_unlock(&srv_conn->msg_qlock);
+		tfw_http_send_500(req);
+		return;
+	}
+	if (tfw_http_req_is_nonidempotent(req))
+		srv_conn->flags |= TFW_CONN_FWD_HOLD;
+	spin_unlock(&srv_conn->msg_qlock);
+}
+
+/*
+ * Forward stalled requests in server connection @srv_conn.
+ */
+static void
+tfw_http_conn_req_fwd_stalled(TfwConnection *srv_conn)
+{
+	TfwHttpReq *req, *tmp, *end;
+	struct list_head zap_queue, err_queue;
+	struct list_head *fwd_queue = &srv_conn->msg_queue;
+
+	TFW_DBG2("%s: conn = %p\n", __func__, srv_conn);
+	BUG_ON(!(srv_conn->flags & TFW_CONN_FWD_HOLD));
+
+	INIT_LIST_HEAD(&zap_queue);
+	INIT_LIST_HEAD(&err_queue);
+	/*
+	 * Process the server connection's queue of pending requests.
+	 * The queue is locked against concurrent updates: inserts of
+	 * outgoing requests, or closing of the server connection. Do
+	 * it as fast as possible by moving failed requests to other
+	 * queues that can be processed without this lock.
+	 */
+	spin_lock(&srv_conn->msg_qlock);
+	end = container_of(fwd_queue, TfwHttpReq, msg.fwd_list);
+	list_for_each_entry_safe(req, tmp, fwd_queue, msg.fwd_list) {
+		/*
+		 * If the client connection is dead, then don't send
+		 * the request to the server. Move it to @zap_queue
+		 * for deletion later.
+		 */
+		if (!tfw_connection_live(req->conn)) {
+			list_move_tail(&req->msg.fwd_list, &zap_queue);
+			continue;
+		}
+		/*
+		 * If the server connection is dead, then there's
+		 * nothing to do here. The procedure of closing the
+		 * server connection will do whatever is necessary.
+		 */
+		if (!tfw_connection_live(srv_conn))
+			break;
+		/*
+		 * If unable to send to the server connection due to
+		 * an error, then move the request to @err_queue for
+		 * sending a 500 error response later. That is safe
+		 * as the response will be sent in proper seq order.
+		 */
+		if (tfw_connection_send(srv_conn, (TfwMsg *)req)) {
+			list_move_tail(&req->msg.fwd_list, &err_queue);
+			continue;
+		}
+		/* Stop sending if the request is non-idempotent. */
+		if (tfw_http_req_is_nonidempotent(req))
+			break;
+	}
+	/*
+	 * If the full server connection queue has been processed,
+	 * then upcoming requests may be send to the server right away.
+	 */
+	if (req == end)
+		srv_conn->flags &= ~TFW_CONN_FWD_HOLD;
+	spin_unlock(&srv_conn->msg_qlock);
+
+        /*
+	 * Delete requests from dead client connections. The requests
+	 * need to be removed from @seq_list. The process for closing
+	 * a client connection does the same, so there may be certain
+	 * concurrency here.
+	 */
+        list_for_each_entry_safe(req, tmp, &zap_queue, msg.fwd_list) {
+                list_del_init(&req->msg.fwd_list);
+                if (!list_empty_careful(&req->msg.seq_list)) {
+                        spin_lock_bh(&req->conn->msg_qlock);
+                        list_del_init(&req->msg.seq_list);
+                        spin_unlock_bh(&req->conn->msg_qlock);
+                }
+                tfw_http_conn_msg_free((TfwHttpMsg *)req);
+        }
+        /*
+	 * Requests that were not forwarded due to an error. Send an
+	 * error response to a client. The response will be attached
+	 * to the request and sent to the client in proper seq order.
+	 */
+        list_for_each_entry_safe(req, tmp, &err_queue, msg.fwd_list) {
+                list_del_init(&req->msg.fwd_list);
+                tfw_http_send_500(req);
+        }
+}
+
+/*
+ * Forward responses to the client in the correct order.
+ */
+void
+tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
+{
+	TfwHttpReq *tmp;
+	TfwConnection *cli_conn = req->conn;
+	struct list_head out_queue, *seq_queue = &cli_conn->msg_queue;
+
+	TFW_DBG2("%s: req=[%p], resp=[%p]\n", __func__, req, resp);
+
+	INIT_LIST_HEAD(&out_queue);
+	/*
+	 * Starting with the first request on the list, pick consecutive
+	 * requests that have a paired response. Remove those requests
+	 * from the list, and put them on the list of outgoing responses.
+	 *
+	 * However, if the list is empty, then then it's either a bug,
+	 * or the client connection had been closed. If it's a bug, then
+	 * the correct order of responses to requests may be broken. The
+	 * client connection needs to be closed.
+	 */
+	spin_lock(&cli_conn->msg_qlock);
+	if (list_empty(seq_queue)) {
+		spin_unlock(&cli_conn->msg_qlock);
+		ss_close_sync(cli_conn->sk, true);
+		tfw_http_conn_msg_free((TfwHttpMsg *)resp);
+		tfw_http_conn_msg_free((TfwHttpMsg *)req);
+		return;
+	}
+	req->resp = (TfwHttpMsg *)resp;
+	do {
+		req = list_first_entry(seq_queue, TfwHttpReq, msg.seq_list);
+		if (req->resp == NULL)
+			break;
+		list_move_tail(&req->msg.seq_list, &out_queue);
+	} while(!list_empty(seq_queue));
+	spin_unlock(&cli_conn->msg_qlock);
+
+	/* Forward responses to the client. */
+	list_for_each_entry_safe(req, tmp, &out_queue, msg.seq_list) {
+		list_del_init(&req->msg.seq_list);
+		resp = (TfwHttpResp *)req->resp;
+		/*
+		 * If the client connection is dead, then discard
+		 * all @req and @resp in the @out_queue. Remaining requests
+		 * from the client in the @seq_queue will be handled when
+		 * the client connection is released.
+		 */
+		if (!tfw_connection_live(cli_conn))
+			goto loop_discard;
+		/*
+		 * Close the client connection in case of an error.
+		 * Otherwise, the correct order of responses may be broken.
+		 *
+		 * FIXME Sending is asynchronous. An error may still occur
+		 * when the response is actually sent out. If that happens
+		 * it breaks the correct order of responses. Perhaps, the
+		 * client connection needs to be closed in that case.
+		 */
+		if (tfw_cli_conn_send(cli_conn, (TfwMsg *)resp)) {
+			ss_close_sync(cli_conn->sk, true);
+			goto loop_discard;
+		}
+		/*
+		 * If this is a response to a non-idempotent request, then
+		 * it's time to continue forwarding requests to the server
+		 * connection the response has come on. If the server is in
+		 * failover state, then the stalled requests will be taken
+		 * care of by the failover processing.
+		 *
+		 * FIXME It might be better to mark the server connection
+		 * somehow, then forward stalled requests for each marked
+		 * server connection outside of this @out_queue processing.
+		 */
+		if (tfw_http_req_is_nonidempotent(req) && resp->conn
+		    && (tfw_connection_get_if_live(resp->conn)))
+		{
+			tfw_http_conn_req_fwd_stalled(resp->conn);
+			tfw_connection_put(resp->conn);
+		}
+loop_discard:
+		tfw_http_conn_msg_free((TfwHttpMsg *)resp);
+		tfw_http_conn_msg_free((TfwHttpMsg *)req);
+	}
+}
+
 /**
  * The request is served from cache.
  * Send the response as is and unrefer its data.
@@ -805,20 +1040,13 @@ tfw_http_req_cache_service(TfwHttpReq *req, TfwHttpResp *resp)
 {
 	if (tfw_http_adjust_resp(resp, req))
 		goto resp_err;
-
-	if (tfw_cli_conn_send(req->conn, (TfwMsg *)resp))
-		goto resp_err;
-
+	tfw_http_resp_fwd(req, resp);
 	TFW_INC_STAT_BH(clnt.msgs_fromcache);
-
-resp_out:
-	tfw_http_conn_msg_free((TfwHttpMsg *)resp);
-	tfw_http_conn_msg_free((TfwHttpMsg *)req);
 	return;
 resp_err:
 	tfw_http_send_500(req, "cannot send response from cache");
 	TFW_INC_STAT_BH(clnt.msgs_otherr);
-	goto resp_out;
+	return;
 }
 
 /**
@@ -832,23 +1060,22 @@ tfw_http_req_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)
 	int r;
 	TfwConnection *srv_conn = NULL;
 
+	TFW_DBG2("%s: req = %p, resp = %p\n", __func__, req, resp);
+
 	/*
 	 * Sticky cookie module used for HTTP session identification may send
-	 * a response to the client when sticky cookie presence is enforced and
-	 * the cookie is missing from the request.
+	 * a response to the client when sticky cookie presence is enforced
+	 * and the cookie is missing from the request.
 	 *
-	 * HTTP session can be required for the request schduling, so obtain it
-	 * first. However, req->sess still can be NULL if sticky cookies aren't
-	 * enabled.
+	 * HTTP session may be required for request scheduling, so obtain it
+	 * first. However, req->sess still may be NULL if sticky cookies are
+	 * not enabled.
 	 */
 	r = tfw_http_sess_obtain(req);
 	if (r < 0)
 		goto send_500;
-	if (r > 0) {
-		/* Response sent, nothing to do. */
-		tfw_http_conn_msg_free((TfwHttpMsg *)req);
+	if (r > 0)	/* Response sent, nothing to do. */
 		return;
-	}
 
 	if (resp) {
 		tfw_http_req_cache_service(req, resp);
@@ -856,24 +1083,23 @@ tfw_http_req_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)
 	}
 
 	/*
-	 * Dispatch request to an appropriate server. Schedulers
-	 * should make a decision based on an unmodified request,
-	 * so this must be done before any request mangling.
+	 * Dispatch request to an appropriate server. Schedulers should
+	 * make a decision based on an unmodified request, so this must
+	 * be done before any request mangling.
 	 *
-	 * The code below is typically called on remote NUMA node.
-	 * That's not good, but we must run TDB lookup on the node
-	 * before this is executed, to avoid unnecessary work in
-	 * SoftIRQ and to speed up the cache operation.
-	 * At the same time, cache hits are expected to prevail
-	 * over cache misses, so this is not a frequent path.
+	 * The code below is usually called on a remote NUMA node. That's
+	 * not good, but TDB lookup must be run on the node before it is
+	 * executed, to avoid unnecessary work in SoftIRQ and to speed up
+	 * the cache operation. At the same time, cache hits are expected
+	 * to prevail over cache misses, so this is not a frequent path.
 	 *
-	 * TODO #593: check whether req->sess->srv_conn is alive or
-	 * get a new connection for req->sess->srv_conn->peer from appropriate
-	 * scheduler otherwise. This eliminates long generic scheduling work
-	 * flow. When a first request in the session is scheduled by the generic
-	 * logic, TfwSession->srv_conn must be initialized by poniter to
-	 * appropriate TfwConnection, so all following session hits will be
-	 * scheduled much faster.
+	 * TODO #593: check whether req->sess->srv_conn is alive. If not,
+	 * then get a new connection for req->sess->srv_conn->peer from
+	 * an appropriate scheduler. That eliminates the long generic
+	 * scheduling work flow. When the first request in a session is
+	 * scheduled by the generic logic, TfwSession->srv_conn must be
+	 * initialized to point at the appropriate TfwConnection, so that
+	 * all subsequent session hits are scheduled much faster.
 	 */
 	srv_conn = tfw_sched_get_srv_conn((TfwMsg *)req);
 	if (srv_conn == NULL) {
@@ -884,36 +1110,31 @@ tfw_http_req_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)
 	if (tfw_http_adjust_req(req))
 		goto send_500;
 
-	/* Add request to the server connection. */
-	spin_lock(&srv_conn->msg_qlock);
-	list_add_tail(&req->msg.msg_list, &srv_conn->msg_queue);
-	spin_unlock(&srv_conn->msg_qlock);
-
 	/* Send request to the server. */
-	if (tfw_connection_send(srv_conn, (TfwMsg *)req)) {
-		spin_lock(&srv_conn->msg_qlock);
-		list_del(&req->msg.msg_list);
-		spin_unlock(&srv_conn->msg_qlock);
-		goto send_500;
-	}
-	req->flags |= TFW_HTTP_MSG_SENT;
-
-	TFW_INC_STAT_BH(clnt.msgs_forwarded);
+	tfw_http_conn_req_fwd(srv_conn, req);
 	goto conn_put;
 
 send_502:
 	tfw_http_send_502(req, "request proxy error");
-	tfw_http_conn_msg_free((TfwHttpMsg *)req);
 	TFW_INC_STAT_BH(clnt.msgs_otherr);
 	return;
 send_500:
-	tfw_http_send_500(req, "request proxy error");
-	tfw_http_conn_msg_free((TfwHttpMsg *)req);
+	tfw_http_send_500(req);
 	TFW_INC_STAT_BH(clnt.msgs_otherr);
 conn_put:
 	tfw_connection_put(srv_conn);
 }
 
+/*
+ * Set a flag if the request is idempotent.
+*/
+static inline void
+tfw_http_req_set_nonidempotent(TfwHttpReq *req)
+{
+	if (req->method == TFW_HTTP_METH_POST)
+		req->flags |= TFW_HTTP_NON_IDEMPOTENT;
+}
+
 static int
 tfw_http_req_set_context(TfwHttpReq *req)
 {
@@ -1098,6 +1319,17 @@ tfw_http_req_process(TfwConnection *conn, struct sk_buff *skb, unsigned int off)
 		 */
 		tfw_connection_unlink_msg(conn);
 
+		/* Set a flag if the request is idempotent. */
+		tfw_http_req_set_nonidempotent(req);
+
+		/*
+		 * Add the request to the list of the client connection
+		 * to preserve the correct order of responses to requests.
+		 */
+		spin_lock(&conn->msg_qlock);
+		list_add_tail(&req->msg.seq_list, &conn->msg_queue);
+		spin_unlock(&conn->msg_qlock);
+
 		/*
 		 * The request should either be stored or released.
 		 * Otherwise we lose the reference to it and get a leak.
@@ -1148,6 +1380,7 @@ tfw_http_req_process(TfwConnection *conn, struct sk_buff *skb, unsigned int off)
 static void
 tfw_http_resp_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)
 {
+	TFW_DBG2("%s: req = %p, resp = %p\n", __func__, req, resp);
 	/*
 	 * Typically we're at a node far from the node where @resp was
 	 * received, so we do an inter-node transfer. However, this is
@@ -1155,25 +1388,21 @@ tfw_http_resp_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)
 	 * requests will get responded to by the current node without
 	 * inter-node data transfers. (see tfw_http_req_cache_cb())
 	 */
-	if (tfw_http_adjust_resp(resp, req))
-		goto err;
-
-	if (tfw_cli_conn_send(req->conn, (TfwMsg *)resp))
-		goto err;
-
-	TFW_INC_STAT_BH(serv.msgs_forwarded);
+	if (tfw_http_adjust_resp(resp, req)) {
+		tfw_http_conn_msg_free((TfwHttpMsg *)resp);
+		tfw_http_send_500(req);
+		TFW_INC_STAT_BH(serv.msgs_otherr);
+		return;
+	}
+	tfw_http_resp_fwd(req, resp);
+	/* Responses from cache don't have @resp->conn. */
+TFW_DBG2("%s: resp=[%p] resp->conn=[%p] resp->conn->peer=[%p] resp->conn->peer->apm=[%p]\n",
+	__func__, resp, resp->conn, resp->conn->peer, ((TfwServer *)resp->conn->peer)->apm);
 	if (resp->conn)
 		tfw_apm_update(((TfwServer *)resp->conn->peer)->apm,
 			       resp->jtstamp, resp->jtstamp - req->jtstamp);
-out:
-	/* Now we don't need the request and the response anymore. */
-	tfw_http_conn_msg_free((TfwHttpMsg *)resp);
-	tfw_http_conn_msg_free((TfwHttpMsg *)req);
+	TFW_INC_STAT_BH(serv.msgs_forwarded);
 	return;
-err:
-	tfw_http_send_500(req, "response proxy error");
-	TFW_INC_STAT_BH(serv.msgs_otherr);
-	goto out;
 }
 
 /*
diff --git a/tempesta_fw/http.h b/tempesta_fw/http.h
index dbbb50f68..61714fc6b 100644
--- a/tempesta_fw/http.h
+++ b/tempesta_fw/http.h
@@ -262,6 +262,7 @@ typedef struct {
 #define TFW_HTTP_FIELD_DUPENTRY		0x000200	/* Duplicate field */
 /* URI has form http://authority/path, not just /path */
 #define TFW_HTTP_URI_FULL		0x000400
+#define TFW_HTTP_NON_IDEMPOTENT		0x000800
 
 /* Response flags */
 #define TFW_HTTP_VOID_BODY		0x010000	/* Resp to HEAD req */
@@ -364,6 +365,7 @@ typedef struct {
 	unsigned long		tm_header;
 	unsigned long		tm_bchunk;
 	unsigned long		hash;
+	TfwHttpMsg		*resp;
 } TfwHttpReq;
 
 #define TFW_HTTP_REQ_STR_START(r)	__MSG_STR_START(r)
@@ -412,6 +414,19 @@ tfw_current_timestamp(void)
 	return ts.tv_sec;
 }
 
+static inline void
+__init_req_ss_flags(TfwHttpReq *req)
+{
+	((TfwMsg *)req)->ss_flags |= SS_F_KEEP_SKB;
+}
+
+static inline void
+__init_resp_ss_flags(TfwHttpResp *resp, const TfwHttpReq *req)
+{
+	if (req->flags & TFW_HTTP_CONN_CLOSE)
+		((TfwMsg *)resp)->ss_flags |= SS_F_CONN_CLOSE;
+}
+
 typedef void (*tfw_http_cache_cb_t)(TfwHttpReq *, TfwHttpResp *);
 
 /* Internal (parser) HTTP functions. */
@@ -423,6 +438,7 @@ bool tfw_http_parse_terminate(TfwHttpMsg *hm);
 int tfw_http_msg_process(void *conn, struct sk_buff *skb, unsigned int off);
 unsigned long tfw_http_req_key_calc(TfwHttpReq *req);
 void tfw_http_req_destruct(void *msg);
+void tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp);
 
 /*
  * Functions to send an HTTP error response to a client.
diff --git a/tempesta_fw/http_msg.c b/tempesta_fw/http_msg.c
index 6dbb99bb1..e08a181e9 100644
--- a/tempesta_fw/http_msg.c
+++ b/tempesta_fw/http_msg.c
@@ -627,10 +627,12 @@ tfw_http_msg_hdr_add(TfwHttpMsg *hm, TfwStr *hdr)
 }
 
 /**
- * Allocate skb space for further @hm data writing.
- * Put as much as possible to one skb, TCP GSO will care about segmentation.
+ * Given the total message length as @len, allocate an appropriate number
+ * of SKBs and page fragments to hold the payload, and add them to the
+ * message. Put as much as possible in one SKB. TCP GSO will take care of
+ * segmentation. The allocated payload space will be filled with data.
  *
- * tfw_http_msg_free() is expected to be called for @hm if the function fails.
+ * Call tfw_http_msg_free() is for @hm if the function fails.
  */
 static int
 __msg_alloc_skb_data(TfwHttpMsg *hm, size_t len)
@@ -664,7 +666,8 @@ tfw_http_msg_create(TfwHttpMsg *hm, TfwMsgIter *it, int type, size_t data_len)
 	if (hm) {
 		memset(hm, 0, sizeof(*hm));
 		ss_skb_queue_head_init(&hm->msg.skb_list);
-		INIT_LIST_HEAD(&hm->msg.msg_list);
+		INIT_LIST_HEAD(&hm->msg.seq_list);
+		INIT_LIST_HEAD(&hm->msg.fwd_list);
 		if (__msg_alloc_skb_data(hm, data_len))
 			return NULL;
 	} else {
@@ -844,9 +847,7 @@ __hbh_parser_init_resp(TfwHttpResp *resp)
 
 /**
  * Allocate a new HTTP message.
- * Given the total message length as @data_len, it allocates an appropriate
- * number of SKBs and page fragments to hold the payload, and sets them up
- * in Tempesta message.
+ * The space to hold the payload is allocated separately.
  */
 TfwHttpMsg *
 tfw_http_msg_alloc(int type)
@@ -870,8 +871,9 @@ tfw_http_msg_alloc(int type)
 	hm->h_tbl->off = TFW_HTTP_HDR_RAW;
 	memset(hm->h_tbl->tbl, 0, __HHTBL_SZ(1) * sizeof(TfwStr));
 
+	INIT_LIST_HEAD(&hm->msg.fwd_list);
+	INIT_LIST_HEAD(&hm->msg.seq_list);
 	ss_skb_queue_head_init(&hm->msg.skb_list);
-	INIT_LIST_HEAD(&hm->msg.msg_list);
 
 	hm->parser.to_read = -1; /* unknown body size */
 	if (type & Conn_Clnt)
diff --git a/tempesta_fw/http_sess.c b/tempesta_fw/http_sess.c
index 5803602c2..6b8ba61ed 100644
--- a/tempesta_fw/http_sess.c
+++ b/tempesta_fw/http_sess.c
@@ -94,13 +94,14 @@ static struct kmem_cache *sess_cache;
 static int
 tfw_http_sticky_send_302(TfwHttpReq *req, StickyVal *sv)
 {
-	TfwConnection *conn = req->conn;
 	unsigned long ts_be64 = cpu_to_be64(sv->ts);
 	TfwStr chunks[3], cookie = { 0 };
 	DEFINE_TFW_STR(s_eq, "=");
-	TfwHttpMsg resp;
+	TfwHttpMsg *hmresp;
 	char buf[sizeof(*sv) * 2];
 
+	if (!(hmresp = tfw_http_msg_alloc(Conn_Srv)))
+		return -ENOMEM;
 	/*
 	 * Form the cookie as:
 	 *
@@ -124,9 +125,11 @@ tfw_http_sticky_send_302(TfwHttpReq *req, StickyVal *sv)
 	cookie.len = chunks[0].len + chunks[1].len + chunks[2].len;
 	__TFW_STR_CHUNKN_SET(&cookie, 3);
 
-	if (tfw_http_prep_302(&resp, req, &cookie))
+	if (tfw_http_prep_302(hmresp, req, &cookie))
 		return -1;
-	tfw_cli_conn_send(conn, (TfwMsg *)&resp);
+
+	__init_resp_ss_flags((TfwHttpResp *)hmresp, req);
+	tfw_http_resp_fwd(req, (TfwHttpResp *)hmresp);
 
 	return 0;
 }
diff --git a/tempesta_fw/msg.h b/tempesta_fw/msg.h
index 531b80d7e..c47e8bfb7 100644
--- a/tempesta_fw/msg.h
+++ b/tempesta_fw/msg.h
@@ -30,13 +30,15 @@
 #include "sync_socket.h"
 
 /**
- * @msg_list	- messages queue to send to peer;
- * @skb_list	- list of sk_buff's belonging to the message;
+ * @seq_list	- member in the ordered queue of incoming requests;
+ * @fwd_list	- member in the queue of forwarded/backlogged requests;
+ * @skb_list	- list of sk_buff that belong to the message;
  * @ss_flags	- message processing flags;
- * @len		- total body length;
+ * @len		- total message length;
  */
 typedef struct {
-	struct list_head	msg_list;
+	struct list_head	seq_list;
+	struct list_head	fwd_list;
 	int			ss_flags;
 	SsSkbList		skb_list;
 	size_t			len;
diff --git a/tempesta_fw/sched/tfw_sched_hash.c b/tempesta_fw/sched/tfw_sched_hash.c
index 1302a6a28..43a40cbf0 100644
--- a/tempesta_fw/sched/tfw_sched_hash.c
+++ b/tempesta_fw/sched/tfw_sched_hash.c
@@ -146,7 +146,7 @@ tfw_sched_hash_get_srv_conn(TfwMsg *msg, TfwSrvGroup *sg)
 	for (tries = 0; tries < __HLIST_SZ(TFW_SG_MAX_CONN); ++tries) {
 		for (ch = sg->sched_data; ch->conn; ++ch) {
 			curr_weight = msg_hash ^ ch->hash;
-			if (likely(tfw_connection_nfo(ch->conn))
+			if (likely(tfw_connection_live(ch->conn))
 			    && curr_weight > best_weight)
 			{
 				best_weight = curr_weight;
@@ -156,7 +156,7 @@ tfw_sched_hash_get_srv_conn(TfwMsg *msg, TfwSrvGroup *sg)
 
 		if (unlikely(!best_conn))
 			return NULL;
-		if (tfw_connection_get_if_nfo(best_conn))
+		if (tfw_connection_get_if_live(best_conn))
 			return best_conn;
 	}
 	return NULL;
diff --git a/tempesta_fw/sched/tfw_sched_rr.c b/tempesta_fw/sched/tfw_sched_rr.c
index 84cb167ad..4f90b6052 100644
--- a/tempesta_fw/sched/tfw_sched_rr.c
+++ b/tempesta_fw/sched/tfw_sched_rr.c
@@ -123,7 +123,7 @@ tfw_sched_rr_get_srv_conn(TfwMsg *msg, TfwSrvGroup *sg)
 			i = atomic64_inc_return(&srv_cl->rr_counter)
 			    % srv_cl->conn_n;
 			conn = srv_cl->conns[i];
-			if (tfw_connection_get_if_nfo(conn))
+			if (tfw_connection_get_if_live(conn))
 				return conn;
 		}
 	}
diff --git a/tempesta_fw/sock.c b/tempesta_fw/sock.c
index b01f287f2..4aa1da038 100644
--- a/tempesta_fw/sock.c
+++ b/tempesta_fw/sock.c
@@ -524,7 +524,7 @@ ss_linkerror(struct sock *sk)
 int
 __ss_close(struct sock *sk, int flags)
 {
-	if (unlikely(!sk))
+	if (unlikely(!(sk && ss_sock_live(sk))))
 		return SS_OK;
 	sk_incoming_cpu_update(sk);
 
diff --git a/tempesta_fw/sock_srv.c b/tempesta_fw/sock_srv.c
index 71a854c5f..6b817bbc1 100644
--- a/tempesta_fw/sock_srv.c
+++ b/tempesta_fw/sock_srv.c
@@ -344,7 +344,7 @@ tfw_sock_srv_connect_failover(struct sock *sk)
 	 * connection reference to indicate that the connection is in the
 	 * failover state.
 	 */
-	if (tfw_connection_nfo(conn)) {
+	if (tfw_connection_live(conn)) {
 		tfw_connection_put_to_death(conn);
 		tfw_connection_drop(conn);
 		TFW_INC_STAT_BH(serv.conn_disconnects);

From 363b52b76e82c90eb698c41d58fc05ee8c9bc282 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Wed, 5 Oct 2016 19:02:38 +0300
Subject: [PATCH 02/65] Small refactoring of tfw_sg_for_each_srv().

---
 tempesta_fw/server.c | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/tempesta_fw/server.c b/tempesta_fw/server.c
index 52483954f..f40a6a4dc 100644
--- a/tempesta_fw/server.c
+++ b/tempesta_fw/server.c
@@ -206,6 +206,20 @@ tfw_sg_set_sched(TfwSrvGroup *sg, const char *sched_name)
 	return 0;
 }
 
+static int
+__tfw_sg_for_each_srv(TfwSrvGroup *sg, int (*cb)(TfwServer *srv))
+{
+	int ret = 0;
+	TfwServer *srv;
+
+	write_lock(&sg->lock);
+	list_for_each_entry(srv, &sg->srv_list, list)
+		if ((ret = cb(srv)))
+			break;
+	write_unlock(&sg->lock);
+	return ret;
+}
+
 /**
  * Iterate over all server groups and call @cb for each server.
  * @cb is called under spin-lock, so can't sleep.
@@ -215,25 +229,12 @@ int
 tfw_sg_for_each_srv(int (*cb)(TfwServer *srv))
 {
 	int ret = 0;
-	TfwServer *srv;
 	TfwSrvGroup *sg;
 
 	write_lock(&sg_lock);
-
-	list_for_each_entry(sg, &sg_list, list) {
-		write_lock(&sg->lock);
-
-		list_for_each_entry(srv, &sg->srv_list, list) {
-			if ((ret = cb(srv))) {
-				write_unlock(&sg->lock);
-				goto unlock;
-			}
-		}
-
-		write_unlock(&sg->lock);
-	}
-
-unlock:
+	list_for_each_entry(sg, &sg_list, list)
+		if ((ret = __tfw_sg_for_each_srv(sg, cb)))
+			break;
 	write_unlock(&sg_lock);
 	return ret;
 }

From c4b0d070a943d3abff9e656ce4361f65e326d02e Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Wed, 5 Oct 2016 19:41:47 +0300
Subject: [PATCH 03/65] Introduce 'connect_retries' directive. (#419)

Limit the number of reconnect attempts, and use the initial timeout
between the attempts. The timeout still grows exponentially as the
number of attempts increases.
---
 tempesta_fw/sock_srv.c | 184 +++++++++++++++++++++++++++++++++--------
 1 file changed, 148 insertions(+), 36 deletions(-)

diff --git a/tempesta_fw/sock_srv.c b/tempesta_fw/sock_srv.c
index 6b817bbc1..313326254 100644
--- a/tempesta_fw/sock_srv.c
+++ b/tempesta_fw/sock_srv.c
@@ -55,6 +55,11 @@
  * ------------------------------------------------------------------------
  */
 
+/*
+ * Default number of reconnect attempts. Zero means unlimited number.
+ */
+#define TFW_SOCK_SRV_RETRY_ATTEMPTS_DEF	0		/* default value */
+
 /**
  * TfwConnection extension for server sockets.
  *
@@ -112,6 +117,7 @@ typedef struct {
 	TfwConnection		conn;
 	unsigned long		timeout;
 	unsigned int		attempts;
+	unsigned int		max_attempts;
 } TfwSrvConnection;
 
 /**
@@ -188,18 +194,18 @@ static inline void
 tfw_sock_srv_connect_try_later(TfwSrvConnection *srv_conn)
 {
 	/*
-	 * Timeout between connect attempts is increased with each unsuccessful
-	 * attempt. Length of the timeout is decided with a variant of
-	 * exponential backoff delay algorithm.
+	 * Timeout between connect attempts is increased with each
+	 * unsuccessful attempt. Length of the timeout is decided with
+	 * a variant of exponential backoff delay algorithm.
 	 *
-	 * It's essential that the new connection is established and failed
-	 * connection is restored as fast as possible, so the min retry interval
-	 * is set to 1. The next, second, step is good for loopback
-	 * reconnection, e.g. if upstream is configured to reset connection
-	 * periodically. Following steps are almost pure backoff starting from
-	 * 100ms, good RTT for fast 10Gbps link. We do not increase timeout
-	 * after 1 second as it have moderate overhead and still good in
-	 * response time.
+	 * It's essential that the new connection is established and the
+	 * failed connection is restored ASAP, so the min retry interval
+	 * is set to 1. The next step is good for loopback reconnection,
+	 * e.g. if an upstream is configured to reset a connection
+	 * periodically. The following steps are almost pure backoff algo
+	 * starting from 100ms, which is a good RTT for a fast 10Gbps link.
+	 * The timeout is not increased after 1 second as it has moderate
+	 * overhead, and it's still good in response time.
 	 */
 	static const unsigned long timeouts[] = { 1, 10, 100, 250, 500, 1000 };
 
@@ -207,6 +213,17 @@ tfw_sock_srv_connect_try_later(TfwSrvConnection *srv_conn)
 	if (unlikely(!ss_active()))
 		return;
 
+	if (unlikely(srv_conn->max_attempts
+		     && (srv_conn->attempts >= srv_conn->max_attempts)))
+	{
+		TfwAddr *srv_addr = &srv_conn->conn.peer->addr;
+		char s_addr[TFW_ADDR_STR_BUF_SIZE] = { 0 };
+		tfw_addr_ntop(srv_addr, s_addr, sizeof(s_addr));
+		TFW_WARN("The limit of [%d] on reconnect attempts exceeded. "
+			 "The server connection [%s] is down permanently.\n",
+			 srv_conn->max_attempts, s_addr);
+		return;
+	}
 	if (srv_conn->attempts < ARRAY_SIZE(timeouts)) {
 		srv_conn->timeout = timeouts[srv_conn->attempts];
 		TFW_DBG_ADDR("Cannot establish connection",
@@ -530,7 +547,52 @@ tfw_sock_srv_delete_all_conns(void)
  * ------------------------------------------------------------------------
  */
 
-#define TFW_SRV_CFG_DEF_CONNS_N		"4"
+#define TFW_SRV_CFG_DEF_CONNS_N		"32"
+
+static int tfw_srv_cfg_in_attempts = TFW_SOCK_SRV_RETRY_ATTEMPTS_DEF;
+static int tfw_srv_cfg_out_attempts = TFW_SOCK_SRV_RETRY_ATTEMPTS_DEF;
+
+static int
+tfw_srv_cfg_set_conn_retries(TfwServer *srv, int attempts)
+{
+	TfwSrvConnection *srv_conn, *tmp;
+
+	list_for_each_entry_safe(srv_conn, tmp, &srv->conn_list, conn.list)
+		srv_conn->max_attempts = attempts;
+
+	return 0;
+}
+
+static int
+tfw_srv_cfg_handle_conn_retries(TfwCfgSpec *cs, TfwCfgEntry *ce, int *attempts)
+{
+	int ret;
+
+	if (ce->val_n != 1) {
+		TFW_ERR("%s: Invalid number of arguments: %zd\n",
+			cs->name, ce->val_n);
+		return -EINVAL;
+	}
+
+	if ((ret = tfw_cfg_parse_int(ce->vals[0], attempts)))
+		return ret;
+
+	return 0;
+}
+
+static int
+tfw_srv_cfg_handle_in_conn_retries(TfwCfgSpec *cs, TfwCfgEntry *ce)
+{
+	return tfw_srv_cfg_handle_conn_retries(cs, ce,
+					       &tfw_srv_cfg_in_attempts);
+}
+
+static int
+tfw_srv_cfg_handle_out_conn_retries(TfwCfgSpec *cs, TfwCfgEntry *ce)
+{
+	return tfw_srv_cfg_handle_conn_retries(cs, ce,
+					       &tfw_srv_cfg_out_attempts);
+}
 
 /**
  * A "srv_group" which is currently being parsed.
@@ -549,7 +611,7 @@ static TfwScheduler *tfw_srv_cfg_dflt_sched;
  *
  * Every server is simply added to the tfw_srv_cfg_curr_group.
  */
-static int
+static TfwServer *
 tfw_srv_cfg_handle_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
 	TfwAddr addr;
@@ -559,38 +621,51 @@ tfw_srv_cfg_handle_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
 
 	BUG_ON(!tfw_srv_cfg_curr_group);
 
-	r = tfw_cfg_check_val_n(ce, 1);
-	if (r)
-		return -EINVAL;
+	if ((r = tfw_cfg_check_val_n(ce, 1)))
+		return NULL;
 
 	in_addr = ce->vals[0];
 	in_conns_n = tfw_cfg_get_attr(ce, "conns_n", TFW_SRV_CFG_DEF_CONNS_N);
 
-	r = tfw_addr_pton(&TFW_STR_FROM(in_addr), &addr);
-	if (r)
-		return r;
-	r = tfw_cfg_parse_int(in_conns_n, &conns_n);
-	if (r)
-		return r;
+	if ((r = tfw_addr_pton(&TFW_STR_FROM(in_addr), &addr)))
+		return NULL;
+	if ((r = tfw_cfg_parse_int(in_conns_n, &conns_n)))
+		return NULL;
 
 	if (conns_n > TFW_SRV_MAX_CONN) {
 		TFW_ERR("can't use more than %d connections", TFW_SRV_MAX_CONN);
-		return -EINVAL;
+		return NULL;
 	}
 
-	srv = tfw_server_create(&addr);
-	if (!srv) {
+	if (!(srv = tfw_server_create(&addr))) {
 		TFW_ERR("can't create a server socket\n");
-		return -EPERM;
+		return NULL;
 	}
 	tfw_sg_add(tfw_srv_cfg_curr_group, srv);
 
-	r = tfw_sock_srv_add_conns(srv, conns_n);
-	if (r) {
+	if ((r = tfw_sock_srv_add_conns(srv, conns_n))) {
 		TFW_ERR("can't add connections to the server\n");
-		return r;
+		return NULL;
 	}
 
+	return srv;
+}
+
+static TfwServer *tfw_srv_cfg_in_lst[TFW_SG_MAX_SRV];
+static int tfw_srv_cfg_in_lstsz = 0;
+static int tfw_srv_cfg_out_lstsz = 0;
+
+static int
+tfw_srv_cfg_handle_in_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
+{
+	TfwServer *srv;
+
+	if (tfw_srv_cfg_in_lstsz >= TFW_SG_MAX_SRV)
+		return -EINVAL;
+	if (!(srv = tfw_srv_cfg_handle_server(cs, ce)))
+		return -EINVAL;
+	tfw_srv_cfg_in_lst[tfw_srv_cfg_in_lstsz++] = srv;
+
 	return 0;
 }
 
@@ -614,13 +689,16 @@ tfw_srv_cfg_handle_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
  *    }
  */
 static int
-tfw_srv_cfg_handle_server_outside_group(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_srv_cfg_handle_out_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
 	int ret;
+	TfwServer *srv;
 	const char *dflt_sched_name;
 	static const char __read_mostly s_default[] = "default";
 	TfwSrvGroup *sg = tfw_sg_lookup(s_default);
 
+	if (tfw_srv_cfg_out_lstsz >= TFW_SG_MAX_SRV)
+		return -EINVAL;
 	/* The group "default" is created implicitly. */
 	if (sg == NULL) {
 		if ((sg = tfw_sg_new(s_default, GFP_KERNEL)) == NULL) {
@@ -639,7 +717,12 @@ tfw_srv_cfg_handle_server_outside_group(TfwCfgSpec *cs, TfwCfgEntry *ce)
 	}
 	tfw_srv_cfg_curr_group = sg;
 
-	return tfw_srv_cfg_handle_server(cs, ce);
+	if (!(srv = tfw_srv_cfg_handle_server(cs, ce)))
+		return -EINVAL;
+
+	tfw_srv_cfg_set_conn_retries(srv, tfw_srv_cfg_out_attempts);
+
+	return 0;
 }
 
 /**
@@ -685,6 +768,10 @@ tfw_srv_cfg_begin_srv_group(TfwCfgSpec *cs, TfwCfgEntry *ce)
 
 	/* Set the current group. All nested "server"s are added to it. */
 	tfw_srv_cfg_curr_group = sg;
+
+	tfw_srv_cfg_in_lstsz = 0;
+	tfw_srv_cfg_in_attempts = tfw_srv_cfg_out_attempts;
+
 	return 0;
 }
 
@@ -701,23 +788,32 @@ tfw_srv_cfg_begin_srv_group(TfwCfgSpec *cs, TfwCfgEntry *ce)
 static int
 tfw_srv_cfg_finish_srv_group(TfwCfgSpec *cs)
 {
+	int i;
+
 	BUG_ON(!tfw_srv_cfg_curr_group);
 	BUG_ON(list_empty(&tfw_srv_cfg_curr_group->srv_list));
 	TFW_DBG("finish srv_group: %s\n", tfw_srv_cfg_curr_group->name);
+
+	for (i = 0; i < tfw_srv_cfg_in_lstsz; ++i)
+		tfw_srv_cfg_set_conn_retries(tfw_srv_cfg_in_lst[i],
+					     tfw_srv_cfg_in_attempts);
 	tfw_srv_cfg_curr_group = NULL;
+
 	return 0;
 }
 
 static int
-tfw_srv_cfg_handle_sched_outside_group(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_srv_cfg_handle_sched(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
 	if (tfw_cfg_check_val_n(ce, 1))
 		return -EINVAL;
+
 	tfw_srv_cfg_dflt_sched = tfw_sched_lookup(ce->vals[0]);
 	if (tfw_srv_cfg_dflt_sched == NULL) {
 		TFW_ERR("Unrecognized scheduler: '%s'\n", ce->vals[0]);
 		return -EINVAL;
 	}
+
 	return 0;
 }
 
@@ -735,11 +831,19 @@ tfw_srv_cfg_clean_srv_groups(TfwCfgSpec *cs)
 static TfwCfgSpec tfw_sock_srv_cfg_srv_group_specs[] = {
 	{
 		"server", NULL,
-		tfw_srv_cfg_handle_server,
+		tfw_srv_cfg_handle_in_server,
 		.allow_repeat = true,
 		.cleanup = tfw_srv_cfg_clean_srv_groups
 	},
-	{ }
+	{
+		"connect_retries",
+		NULL,
+		tfw_srv_cfg_handle_in_conn_retries,
+		.allow_none = true,
+		.allow_repeat = false,
+		.cleanup = tfw_srv_cfg_clean_srv_groups,
+	},
+	{}
 };
 
 TfwCfgMod tfw_sock_srv_cfg_mod = {
@@ -750,7 +854,15 @@ TfwCfgMod tfw_sock_srv_cfg_mod = {
 		{
 			"server",
 			NULL,
-			tfw_srv_cfg_handle_server_outside_group,
+			tfw_srv_cfg_handle_out_server,
+			.allow_none = true,
+			.allow_repeat = true,
+			.cleanup = tfw_srv_cfg_clean_srv_groups,
+		},
+		{
+			"connect_retries",
+			NULL,
+			tfw_srv_cfg_handle_out_conn_retries,
 			.allow_none = true,
 			.allow_repeat = true,
 			.cleanup = tfw_srv_cfg_clean_srv_groups,
@@ -758,7 +870,7 @@ TfwCfgMod tfw_sock_srv_cfg_mod = {
 		{
 			"sched",
 			NULL,
-			tfw_srv_cfg_handle_sched_outside_group,
+			tfw_srv_cfg_handle_sched,
 			.allow_none = true,
 			.allow_repeat = true,
 			.cleanup = tfw_srv_cfg_clean_srv_groups,

From c13332f7b8bd3081e5d0b1c8b359524374e8c6eb Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Fri, 7 Oct 2016 15:08:16 +0300
Subject: [PATCH 04/65] Multiple changes. Remove the HOLD flag. (#419)

The "on hold" state is now derived on the fly as it can be dynamic.
If another request comes from a client after an non-idempotent
request, then the client knows what it is doing, and these requests
can be pipelined. In that case remove the flag of non-idempotency
from the preceding request.
---
 tempesta_fw/connection.h |   1 +
 tempesta_fw/http.c       | 411 +++++++++++++++++++++++++++------------
 tempesta_fw/http.h       |   6 +-
 3 files changed, 291 insertions(+), 127 deletions(-)

diff --git a/tempesta_fw/connection.h b/tempesta_fw/connection.h
index 9093e6046..d945e400d 100644
--- a/tempesta_fw/connection.h
+++ b/tempesta_fw/connection.h
@@ -94,6 +94,7 @@ typedef struct {
 	unsigned long		flags;
 	struct timer_list	timer;
 	TfwMsg			*msg;
+	TfwMsg			*msg_sent;
 	TfwPeer 		*peer;
 	struct sock		*sk;
 	void			(*destructor)(void *);
diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 890282d01..5da70cc19 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -477,12 +477,18 @@ static void
 tfw_http_conn_release(TfwConnection *srv_conn)
 {
 	TfwHttpReq *req, *tmp;
-	struct list_head *zap_queue = &srv_conn->msg_queue;
+	struct list_head zap_queue;
 
-	TFW_DBG3("%s: conn = %p\n", __func__, srv_conn);
+	TFW_DBG2("%s: conn = %p\n", __func__, srv_conn);
 	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
 
-	list_for_each_entry_safe(req, tmp, zap_queue, msg.fwd_list) {
+	INIT_LIST_HEAD(&zap_queue);
+
+	spin_lock(&srv_conn->msg_qlock);
+	list_splice_tail_init(&srv_conn->msg_queue, &zap_queue);
+	spin_unlock(&srv_conn->msg_qlock);
+
+	list_for_each_entry_safe(req, tmp, &zap_queue, msg.fwd_list) {
 		BUG_ON(req->conn && (req->conn == srv_conn));
 		list_del_init(&req->msg.fwd_list);
 		tfw_http_send_404(req);
@@ -510,7 +516,7 @@ tfw_http_conn_cli_drop(TfwConnection *cli_conn)
 	TfwHttpMsg *hmreq, *tmp;
 	struct list_head *seq_queue = &cli_conn->msg_queue;
 
-	TFW_DBG3("%s: conn = %p\n", __func__, cli_conn);
+	TFW_DBG2("%s: conn = %p\n", __func__, cli_conn);
 	BUG_ON(!(TFW_CONN_TYPE(cli_conn) & Conn_Clnt));
 
 	if (list_empty_careful(seq_queue))
@@ -817,58 +823,106 @@ tfw_http_adjust_resp(TfwHttpResp *resp, TfwHttpReq *req)
 static inline bool
 tfw_http_req_is_nonidempotent(TfwHttpReq *req)
 {
-	return (req->flags & TFW_HTTP_NON_IDEMPOTENT);
+	return ((req->flags & __TFW_HTTP_IDEMP_MASK) == TFW_HTTP_NON_IDEMP);
 }
 
 /*
- * Forward request @req to server connection @srv_conn.
+ * Tell if the server connection's forwarding queue is on hold.
+ * It's on hold it the request that was sent last was non-idempotent.
  */
-static void
-tfw_http_conn_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
+static bool
+__tfw_http_conn_on_hold(TfwConnection *srv_conn)
 {
-	TFW_DBG2("%s: srv_conn=[%p], req=[%p]\n", __func__, srv_conn, req);
+	TfwHttpReq *req = (TfwHttpReq *)srv_conn->msg_sent;
+
 	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
+	return (req && tfw_http_req_is_nonidempotent(req));
+}
 
-	/*
-	 * A request is added to the server connection queue.
-	 * If the connection is not on hold, then the request
-	 * is forwarded to the server immediately. Otherwise,
-	 * it is forwarded when the hold is removed. A server
-	 * connection is put on hold when an non-idempotent
-	 * request is forwarded to the server.
-	 */
-	spin_lock(&srv_conn->msg_qlock);
-	list_add_tail(&req->msg.fwd_list, &srv_conn->msg_queue);
-	if (srv_conn->flags & TFW_CONN_FWD_HOLD) {
-		spin_unlock(&srv_conn->msg_qlock);
-		return;
+/*
+ * Tell if the server connection's forwarding queue is drained.
+ * It's drained if there're no requests in the queue after the
+ * request that was sent last.
+ */
+static bool
+__tfw_http_conn_drained(TfwConnection *srv_conn)
+{
+	TfwMsg *lmsg;
+
+	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
+	if (list_empty(&srv_conn->msg_queue)) {
+		TFW_DBG2("%s: Empty: srv_conn=[%p]\n", __func__, srv_conn);
+		return true;
 	}
-	if (tfw_connection_send(srv_conn, (TfwMsg *)req)) {
-		list_del_init(&req->msg.fwd_list);
-		spin_unlock(&srv_conn->msg_qlock);
-		tfw_http_send_500(req);
-		return;
+	if (!srv_conn->msg_sent) {
+		TFW_DBG2("%s: None sent: srv_conn=[%p]\n", __func__, srv_conn);
+		return false;
 	}
-	if (tfw_http_req_is_nonidempotent(req))
-		srv_conn->flags |= TFW_CONN_FWD_HOLD;
-	spin_unlock(&srv_conn->msg_qlock);
+	lmsg = list_last_entry(&srv_conn->msg_queue, TfwMsg, seq_list);
+	if (srv_conn->msg_sent == lmsg)
+		return true;
+	TFW_DBG2("%s: Some not sent: srv_conn=[%p]\n", __func__, srv_conn);
+	return false;
 }
 
 /*
- * Forward stalled requests in server connection @srv_conn.
+ * Delete requests from dead client connections. The requests need
+ * to be removed from @seq_list. The process for closing a client
+ * connection does the same, so there may be certain concurrency.
  */
 static void
-tfw_http_conn_req_fwd_stalled(TfwConnection *srv_conn)
+tfw_http_req_zap_dead(struct list_head *zap_queue)
 {
-	TfwHttpReq *req, *tmp, *end;
-	struct list_head zap_queue, err_queue;
+	TfwHttpReq *req, *tmp;
+
+	TFW_DBG2("%s: queue is %sempty\n",
+		 __func__, list_empty(zap_queue) ? "" : "NOT ");
+
+        list_for_each_entry_safe(req, tmp, zap_queue, msg.fwd_list) {
+                list_del_init(&req->msg.fwd_list);
+                if (!list_empty_careful(&req->msg.seq_list)) {
+                        spin_lock_bh(&req->conn->msg_qlock);
+                        list_del_init(&req->msg.seq_list);
+                        spin_unlock_bh(&req->conn->msg_qlock);
+                }
+                tfw_http_conn_msg_free((TfwHttpMsg *)req);
+        }
+}
+
+/*
+ * Delete requests that were not forwarded due to an error. Send an
+ * error response to a client. The response will be attached to the
+ * request and sent to the client in proper seq order.
+ */
+static void
+tfw_http_req_zap_error(struct list_head *err_queue)
+{
+	TfwHttpReq *req, *tmp;
+
+	TFW_DBG2("%s: queue is %sempty\n",
+		 __func__, list_empty(err_queue) ? "" : "NOT ");
+
+        list_for_each_entry_safe(req, tmp, err_queue, msg.fwd_list) {
+                list_del_init(&req->msg.fwd_list);
+                tfw_http_send_500(req);
+        }
+}
+
+/*
+ * Forward requests in server connection @srv_conn. The requests are
+ * forwarded until a non-idempotent requests is found in the queue.
+ * Must be called with a lock on the server connection's @msg_queue.
+ */
+static void
+__tfw_http_req_fwd_many(TfwConnection *srv_conn,
+			      struct list_head *zap_queue,
+			      struct list_head *err_queue)
+{
+	TfwHttpReq *req = (TfwHttpReq *)srv_conn->msg_sent, *tmp;
 	struct list_head *fwd_queue = &srv_conn->msg_queue;
 
 	TFW_DBG2("%s: conn = %p\n", __func__, srv_conn);
-	BUG_ON(!(srv_conn->flags & TFW_CONN_FWD_HOLD));
 
-	INIT_LIST_HEAD(&zap_queue);
-	INIT_LIST_HEAD(&err_queue);
 	/*
 	 * Process the server connection's queue of pending requests.
 	 * The queue is locked against concurrent updates: inserts of
@@ -876,16 +930,16 @@ tfw_http_conn_req_fwd_stalled(TfwConnection *srv_conn)
 	 * it as fast as possible by moving failed requests to other
 	 * queues that can be processed without this lock.
 	 */
-	spin_lock(&srv_conn->msg_qlock);
-	end = container_of(fwd_queue, TfwHttpReq, msg.fwd_list);
-	list_for_each_entry_safe(req, tmp, fwd_queue, msg.fwd_list) {
+	list_for_each_entry_safe_continue(req, tmp, fwd_queue, msg.fwd_list) {
 		/*
 		 * If the client connection is dead, then don't send
 		 * the request to the server. Move it to @zap_queue
 		 * for deletion later.
 		 */
 		if (!tfw_connection_live(req->conn)) {
-			list_move_tail(&req->msg.fwd_list, &zap_queue);
+			list_move_tail(&req->msg.fwd_list, zap_queue);
+			TFW_DBG2("%s: Client connection dead: conn=[%p]\n",
+				 __func__, req->conn);
 			continue;
 		}
 		/*
@@ -893,8 +947,11 @@ tfw_http_conn_req_fwd_stalled(TfwConnection *srv_conn)
 		 * nothing to do here. The procedure of closing the
 		 * server connection will do whatever is necessary.
 		 */
-		if (!tfw_connection_live(srv_conn))
+		if (!tfw_connection_live(srv_conn)) {
+			TFW_DBG2("%s: Server connection dead: conn=[%p]\n",
+				 __func__, srv_conn);
 			break;
+		}
 		/*
 		 * If unable to send to the server connection due to
 		 * an error, then move the request to @err_queue for
@@ -902,45 +959,119 @@ tfw_http_conn_req_fwd_stalled(TfwConnection *srv_conn)
 		 * as the response will be sent in proper seq order.
 		 */
 		if (tfw_connection_send(srv_conn, (TfwMsg *)req)) {
-			list_move_tail(&req->msg.fwd_list, &err_queue);
+			list_move_tail(&req->msg.fwd_list, err_queue);
+			TFW_DBG2("%s: Error sending to server connection: "
+				 "conn=[%p] req=[%p]\n",
+				 __func__, srv_conn, req);
 			continue;
 		}
+		srv_conn->msg_sent = (TfwMsg *)req;
 		/* Stop sending if the request is non-idempotent. */
 		if (tfw_http_req_is_nonidempotent(req))
 			break;
 	}
-	/*
-	 * If the full server connection queue has been processed,
-	 * then upcoming requests may be send to the server right away.
-	 */
-	if (req == end)
-		srv_conn->flags &= ~TFW_CONN_FWD_HOLD;
+}
+
+#if 0
+/*
+ * Forward stalled requests in server connection @srv_conn.
+ * This is the locked version.
+ */
+static void
+__tfw_http_req_fwd_stalled(TfwConnection *srv_conn)
+{
+	struct list_head zap_queue, err_queue;
+
+	TFW_DBG2("%s: conn = %p\n", __func__, srv_conn);
+
+	INIT_LIST_HEAD(&zap_queue);
+	INIT_LIST_HEAD(&err_queue);
+
+	__tfw_http_req_fwd_many(srv_conn, &zap_queue, &err_queue);
+	tfw_http_req_zap_dead(&zap_queue);
+	tfw_http_req_zap_error(&err_queue);
+}
+#endif
+
+/*
+ * Forward stalled requests in server connection @srv_conn.
+ * This is the unlocked version.
+ */
+static void
+tfw_http_req_fwd_stalled(TfwConnection *srv_conn)
+{
+	struct list_head zap_queue, err_queue;
+
+	TFW_DBG2("%s: conn = %p\n", __func__, srv_conn);
+
+	INIT_LIST_HEAD(&zap_queue);
+	INIT_LIST_HEAD(&err_queue);
+
+	spin_lock(&srv_conn->msg_qlock);
+	if (!__tfw_http_conn_drained(srv_conn))
+		__tfw_http_req_fwd_many(srv_conn, &zap_queue, &err_queue);
 	spin_unlock(&srv_conn->msg_qlock);
 
-        /*
-	 * Delete requests from dead client connections. The requests
-	 * need to be removed from @seq_list. The process for closing
-	 * a client connection does the same, so there may be certain
-	 * concurrency here.
-	 */
-        list_for_each_entry_safe(req, tmp, &zap_queue, msg.fwd_list) {
-                list_del_init(&req->msg.fwd_list);
-                if (!list_empty_careful(&req->msg.seq_list)) {
-                        spin_lock_bh(&req->conn->msg_qlock);
-                        list_del_init(&req->msg.seq_list);
-                        spin_unlock_bh(&req->conn->msg_qlock);
-                }
-                tfw_http_conn_msg_free((TfwHttpMsg *)req);
-        }
-        /*
-	 * Requests that were not forwarded due to an error. Send an
-	 * error response to a client. The response will be attached
-	 * to the request and sent to the client in proper seq order.
-	 */
-        list_for_each_entry_safe(req, tmp, &err_queue, msg.fwd_list) {
-                list_del_init(&req->msg.fwd_list);
-                tfw_http_send_500(req);
-        }
+	tfw_http_req_zap_dead(&zap_queue);
+	tfw_http_req_zap_error(&err_queue);
+}
+
+/*
+ * Forward the request @req to server connection @srv_conn.
+ *
+ * The request is added to the server connection (forwarding) queue.
+ * If forwarding is on hold at this moment, then the request will be
+ * forwarded later. Otherwise, if the queue is drained, then forward
+ * the request to the server immediately. If the queue is not drained,
+ * then forward all stalled requests to the server.
+ *
+ * Forwarding to a server is considered to be on hold after
+ * a non-idempotent request is forwarded to the server. The hold
+ * is removed when the holding non-idempotent request is followed
+ * by another request from the same client, which enables pipelining.
+ */
+static void
+tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
+{
+	bool drained;
+
+	TFW_DBG2("%s: srv_conn=[%p], req=[%p]\n", __func__, srv_conn, req);
+	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
+
+	spin_lock(&srv_conn->msg_qlock);
+	drained = __tfw_http_conn_drained(srv_conn);
+	list_add_tail(&req->msg.fwd_list, &srv_conn->msg_queue);
+	if (__tfw_http_conn_on_hold(srv_conn)) {
+		spin_unlock(&srv_conn->msg_qlock);
+		TFW_DBG2("%s: Server connection is on hold: conn=[%p]\n",
+			 __func__, srv_conn);
+		return;
+	}
+	if (!drained) {
+		struct list_head zap_queue, err_queue;
+
+		TFW_DBG2("%s: Server connection is not drained: conn=[%p]\n",
+			 __func__, srv_conn);
+		INIT_LIST_HEAD(&zap_queue);
+		INIT_LIST_HEAD(&err_queue);
+
+		__tfw_http_req_fwd_many(srv_conn, &zap_queue, &err_queue);
+		spin_unlock(&srv_conn->msg_qlock);
+
+		tfw_http_req_zap_dead(&zap_queue);
+		tfw_http_req_zap_error(&err_queue);
+		return;
+	}
+	if (tfw_connection_send(srv_conn, (TfwMsg *)req)) {
+		list_del_init(&req->msg.fwd_list);
+		spin_unlock(&srv_conn->msg_qlock);
+		TFW_DBG2("%s: Error sending to server connection: "
+			 "conn=[%p] req=[%p]\n", __func__, srv_conn, req);
+		tfw_http_send_500(req);
+		return;
+	}
+	srv_conn->msg_sent = (TfwMsg *)req;
+	spin_unlock(&srv_conn->msg_qlock);
 }
 
 /*
@@ -969,6 +1100,8 @@ tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
 	spin_lock(&cli_conn->msg_qlock);
 	if (list_empty(seq_queue)) {
 		spin_unlock(&cli_conn->msg_qlock);
+		TFW_DBG2("%s: Missing client requests: conn=[%p]\n",
+			 __func__, cli_conn);
 		ss_close_sync(cli_conn->sk, true);
 		tfw_http_conn_msg_free((TfwHttpMsg *)resp);
 		tfw_http_conn_msg_free((TfwHttpMsg *)req);
@@ -988,23 +1121,24 @@ tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
 		list_del_init(&req->msg.seq_list);
 		resp = (TfwHttpResp *)req->resp;
 		/*
-		 * If the client connection is dead, then discard
-		 * all @req and @resp in the @out_queue. Remaining requests
-		 * from the client in the @seq_queue will be handled when
-		 * the client connection is released.
+		 * If the client connection is dead, then discard all
+		 * @req and @resp in the @out_queue. Remaining requests
+		 * from the client in the @seq_queue will be handled at
+		 * the time the client connection is released.
 		 */
-		if (!tfw_connection_live(cli_conn))
+		if (!tfw_connection_live(cli_conn)) {
+			TFW_DBG2("%s: Client connection dead: conn=[%p]\n",
+				 __func__, cli_conn);
 			goto loop_discard;
+		}
 		/*
 		 * Close the client connection in case of an error.
 		 * Otherwise, the correct order of responses may be broken.
-		 *
-		 * FIXME Sending is asynchronous. An error may still occur
-		 * when the response is actually sent out. If that happens
-		 * it breaks the correct order of responses. Perhaps, the
-		 * client connection needs to be closed in that case.
 		 */
 		if (tfw_cli_conn_send(cli_conn, (TfwMsg *)resp)) {
+			TFW_DBG2("%s: Error sending to client connection: "
+				 "conn=[%p] resp=[%p]\n",
+				 __func__, cli_conn, resp);
 			ss_close_sync(cli_conn->sk, true);
 			goto loop_discard;
 		}
@@ -1017,12 +1151,14 @@ tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
 		 *
 		 * FIXME It might be better to mark the server connection
 		 * somehow, then forward stalled requests for each marked
-		 * server connection outside of this @out_queue processing.
+		 * server connection outside of the @out_queue processing.
 		 */
-		if (tfw_http_req_is_nonidempotent(req) && resp->conn
-		    && (tfw_connection_get_if_live(resp->conn)))
+		if (tfw_http_req_is_nonidempotent(req)
+		    && resp->conn && tfw_connection_get_if_live(resp->conn))
 		{
-			tfw_http_conn_req_fwd_stalled(resp->conn);
+			TFW_DBG2("%s: Response to non-idempotent request: "
+				 "conn=[%p]\n", __func__, resp->conn);
+			tfw_http_req_fwd_stalled(resp->conn);
 			tfw_connection_put(resp->conn);
 		}
 loop_discard:
@@ -1111,7 +1247,7 @@ tfw_http_req_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)
 		goto send_500;
 
 	/* Send request to the server. */
-	tfw_http_conn_req_fwd(srv_conn, req);
+	tfw_http_req_fwd(srv_conn, req);
 	goto conn_put;
 
 send_502:
@@ -1125,14 +1261,36 @@ tfw_http_req_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)
 	tfw_connection_put(srv_conn);
 }
 
-/*
- * Set a flag if the request is idempotent.
-*/
-static inline void
-tfw_http_req_set_nonidempotent(TfwHttpReq *req)
+static void
+tfw_http_req_mark_nonidempotent(TfwHttpReq *req)
 {
 	if (req->method == TFW_HTTP_METH_POST)
-		req->flags |= TFW_HTTP_NON_IDEMPOTENT;
+		req->flags |= TFW_HTTP_NON_IDEMP;
+}
+
+/*
+ * Set a flag if the request is non-idempotent. Add the request to
+ * the list of the client connection to preserve the correct order
+ * of responses. If the request follows a non-idempotent request
+ * in flight, then the preceding request becomes idempotent.
+ */
+static void
+tfw_http_req_add_seq_queue(TfwHttpReq *req)
+{
+	TfwHttpReq *preq;
+	TfwConnection *cli_conn = req->conn;
+	struct list_head *seq_queue = &cli_conn->msg_queue;
+
+	tfw_http_req_mark_nonidempotent(req);
+
+	spin_lock(&cli_conn->msg_qlock);
+	preq = !list_empty(seq_queue)
+	     ? list_last_entry(seq_queue, TfwHttpReq, msg.seq_list)
+	     : NULL;
+	if (preq && (preq->flags & TFW_HTTP_NON_IDEMP))
+		preq->flags |= TFW_HTTP_CHG_IDEMP;
+	list_add_tail(&req->msg.seq_list, seq_queue);
+	spin_unlock(&cli_conn->msg_qlock);
 }
 
 static int
@@ -1319,16 +1477,11 @@ tfw_http_req_process(TfwConnection *conn, struct sk_buff *skb, unsigned int off)
 		 */
 		tfw_connection_unlink_msg(conn);
 
-		/* Set a flag if the request is idempotent. */
-		tfw_http_req_set_nonidempotent(req);
-
 		/*
 		 * Add the request to the list of the client connection
 		 * to preserve the correct order of responses to requests.
 		 */
-		spin_lock(&conn->msg_qlock);
-		list_add_tail(&req->msg.seq_list, &conn->msg_queue);
-		spin_unlock(&conn->msg_qlock);
+		tfw_http_req_add_seq_queue(req);
 
 		/*
 		 * The request should either be stored or released.
@@ -1396,8 +1549,6 @@ tfw_http_resp_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)
 	}
 	tfw_http_resp_fwd(req, resp);
 	/* Responses from cache don't have @resp->conn. */
-TFW_DBG2("%s: resp=[%p] resp->conn=[%p] resp->conn->peer=[%p] resp->conn->peer->apm=[%p]\n",
-	__func__, resp, resp->conn, resp->conn->peer, ((TfwServer *)resp->conn->peer)->apm);
 	if (resp->conn)
 		tfw_apm_update(((TfwServer *)resp->conn->peer)->apm,
 			       resp->jtstamp, resp->jtstamp - req->jtstamp);
@@ -1416,33 +1567,43 @@ TFW_DBG2("%s: resp=[%p] resp->conn=[%p] resp->conn->peer=[%p] resp->conn->peer->
 static TfwHttpReq *
 tfw_http_popreq(TfwHttpMsg *hmresp)
 {
-	TfwHttpReq *req = NULL;
-	TfwConnection *conn = hmresp->conn;
+	TfwMsg *msg;
+	TfwConnection *srv_conn = hmresp->conn;
+	struct list_head *fwd_queue = &srv_conn->msg_queue;
+
+	spin_lock(&srv_conn->msg_qlock);
+	if (unlikely(list_empty(fwd_queue))) {
+		spin_unlock(&srv_conn->msg_qlock);
+		/* @conn->msg will get NULLed in the process. */
+		TFW_WARN("Paired request missing\n");
+		TFW_WARN("Possible HTTP Response Splitting attack.\n");
+		tfw_http_conn_msg_free(hmresp);
+		TFW_INC_STAT_BH(serv.msgs_otherr);
+		return NULL;
+	}
+	msg = list_first_entry(fwd_queue, TfwMsg, fwd_list);
+	list_del_init(&msg->fwd_list);
+	if (srv_conn->msg_sent == msg)
+		srv_conn->msg_sent = NULL;
+	/*
+	 * If the server connection is no longer on hold, and its queue
+	 * is not drained, then forward pending messages to the server.
+	 */
+	if (!__tfw_http_conn_on_hold(srv_conn)
+	    && !__tfw_http_conn_drained(srv_conn))
+	{
+		struct list_head zap_queue, err_queue;
 
-	spin_lock(&conn->msg_qlock);
+		INIT_LIST_HEAD(&zap_queue);
+		INIT_LIST_HEAD(&err_queue);
 
-	req = list_first_entry_or_null(&conn->msg_queue, TfwHttpReq,
-				       msg.msg_list);
-	if (likely(req)) {
-		list_del(&req->msg.msg_list);
-		spin_unlock(&conn->msg_qlock);
+		__tfw_http_req_fwd_many(srv_conn, &zap_queue, &err_queue);
+		spin_unlock(&srv_conn->msg_qlock);
 
-		while (unlikely(!(req->flags & TFW_HTTP_MSG_SENT))) {
-			/*
-			 * Wait for tfw_connection_send() completion, it
-			 * shouldn't take too long, but don't stress system
-			 * bus by too frequent access to the cache line.
-			 */
-			int i;
-			for (i = 0; i < 10; ++i)
-				cpu_relax();
-		}
+		tfw_http_req_zap_dead(&zap_queue);
+		tfw_http_req_zap_error(&err_queue);
 	} else {
-		spin_unlock(&conn->msg_qlock);
-
-		TFW_WARN("Paired request missing,"
-			 " HTTP Response Splitting attack?\n");
-		TFW_INC_STAT_BH(serv.msgs_otherr);
+		spin_unlock(&srv_conn->msg_qlock);
 	}
 
 	return req;
diff --git a/tempesta_fw/http.h b/tempesta_fw/http.h
index 61714fc6b..6f0f3c8bb 100644
--- a/tempesta_fw/http.h
+++ b/tempesta_fw/http.h
@@ -262,7 +262,9 @@ typedef struct {
 #define TFW_HTTP_FIELD_DUPENTRY		0x000200	/* Duplicate field */
 /* URI has form http://authority/path, not just /path */
 #define TFW_HTTP_URI_FULL		0x000400
-#define TFW_HTTP_NON_IDEMPOTENT		0x000800
+#define TFW_HTTP_CHG_IDEMP		0x001000
+#define TFW_HTTP_NON_IDEMP		0x002000
+#define __TFW_HTTP_IDEMP_MASK		(TFW_HTTP_CHG_IDEMP | TFW_HTTP_NON_IDEMP)
 
 /* Response flags */
 #define TFW_HTTP_VOID_BODY		0x010000	/* Resp to HEAD req */
@@ -298,9 +300,9 @@ typedef struct {
  *			  aren't alowed. So use atomic operations if concurrent
  *			  updates are possible;
  * @content_length	- the value of Content-Length header field;
- * @conn		- connection which the message was received on;
  * @jtstamp		- time the message has been received, in jiffies;
  * @keep_alive		- the value of timeout specified in Keep-Alive header;
+ * @conn		- connection which the message was received on;
  * @crlf		- pointer to CRLF between headers and body;
  * @body		- pointer to the body of a message;
  *

From 31f0f7dd0b06015fd0b117da5d568844a7bb750c Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Wed, 12 Oct 2016 12:05:42 +0300
Subject: [PATCH 05/65] Simplify the code by removing unnecessary logic. (#419)

---
 tempesta_fw/http.c | 163 ++++++++++-----------------------------------
 tempesta_fw/http.h |   4 +-
 2 files changed, 35 insertions(+), 132 deletions(-)

diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 5da70cc19..bf9048089 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -823,14 +823,14 @@ tfw_http_adjust_resp(TfwHttpResp *resp, TfwHttpReq *req)
 static inline bool
 tfw_http_req_is_nonidempotent(TfwHttpReq *req)
 {
-	return ((req->flags & __TFW_HTTP_IDEMP_MASK) == TFW_HTTP_NON_IDEMP);
+	return (req->flags & TFW_HTTP_NON_IDEMP);
 }
 
 /*
  * Tell if the server connection's forwarding queue is on hold.
  * It's on hold it the request that was sent last was non-idempotent.
  */
-static bool
+static inline bool
 __tfw_http_conn_on_hold(TfwConnection *srv_conn)
 {
 	TfwHttpReq *req = (TfwHttpReq *)srv_conn->msg_sent;
@@ -844,7 +844,7 @@ __tfw_http_conn_on_hold(TfwConnection *srv_conn)
  * It's drained if there're no requests in the queue after the
  * request that was sent last.
  */
-static bool
+static inline bool
 __tfw_http_conn_drained(TfwConnection *srv_conn)
 {
 	TfwMsg *lmsg;
@@ -865,28 +865,11 @@ __tfw_http_conn_drained(TfwConnection *srv_conn)
 	return false;
 }
 
-/*
- * Delete requests from dead client connections. The requests need
- * to be removed from @seq_list. The process for closing a client
- * connection does the same, so there may be certain concurrency.
- */
-static void
-tfw_http_req_zap_dead(struct list_head *zap_queue)
+static inline bool
+__tfw_http_conn_req_need_fwd(TfwConnection *srv_conn)
 {
-	TfwHttpReq *req, *tmp;
-
-	TFW_DBG2("%s: queue is %sempty\n",
-		 __func__, list_empty(zap_queue) ? "" : "NOT ");
-
-        list_for_each_entry_safe(req, tmp, zap_queue, msg.fwd_list) {
-                list_del_init(&req->msg.fwd_list);
-                if (!list_empty_careful(&req->msg.seq_list)) {
-                        spin_lock_bh(&req->conn->msg_qlock);
-                        list_del_init(&req->msg.seq_list);
-                        spin_unlock_bh(&req->conn->msg_qlock);
-                }
-                tfw_http_conn_msg_free((TfwHttpMsg *)req);
-        }
+	return (!__tfw_http_conn_on_hold(srv_conn)
+		&& !__tfw_http_conn_drained(srv_conn));
 }
 
 /*
@@ -910,48 +893,29 @@ tfw_http_req_zap_error(struct list_head *err_queue)
 
 /*
  * Forward requests in server connection @srv_conn. The requests are
- * forwarded until a non-idempotent requests is found in the queue.
+ * forwarded until a non-idempotent request is found in the queue.
  * Must be called with a lock on the server connection's @msg_queue.
  */
 static void
-__tfw_http_req_fwd_many(TfwConnection *srv_conn,
-			      struct list_head *zap_queue,
-			      struct list_head *err_queue)
+__tfw_http_req_fwd_many(TfwConnection *srv_conn, struct list_head *err_queue)
 {
-	TfwHttpReq *req = (TfwHttpReq *)srv_conn->msg_sent, *tmp;
+	TfwHttpReq *req, *tmp;
 	struct list_head *fwd_queue = &srv_conn->msg_queue;
 
 	TFW_DBG2("%s: conn = %p\n", __func__, srv_conn);
+	BUG_ON(list_empty(fwd_queue));
 
 	/*
 	 * Process the server connection's queue of pending requests.
 	 * The queue is locked against concurrent updates: inserts of
 	 * outgoing requests, or closing of the server connection. Do
 	 * it as fast as possible by moving failed requests to other
-	 * queues that can be processed without this lock.
+	 * queues that can be processed without the lock.
 	 */
-	list_for_each_entry_safe_continue(req, tmp, fwd_queue, msg.fwd_list) {
-		/*
-		 * If the client connection is dead, then don't send
-		 * the request to the server. Move it to @zap_queue
-		 * for deletion later.
-		 */
-		if (!tfw_connection_live(req->conn)) {
-			list_move_tail(&req->msg.fwd_list, zap_queue);
-			TFW_DBG2("%s: Client connection dead: conn=[%p]\n",
-				 __func__, req->conn);
-			continue;
-		}
-		/*
-		 * If the server connection is dead, then there's
-		 * nothing to do here. The procedure of closing the
-		 * server connection will do whatever is necessary.
-		 */
-		if (!tfw_connection_live(srv_conn)) {
-			TFW_DBG2("%s: Server connection dead: conn=[%p]\n",
-				 __func__, srv_conn);
-			break;
-		}
+	req = srv_conn->msg_sent
+	    ? (TfwHttpReq *)list_next_entry(srv_conn->msg_sent, fwd_list)
+	    : (TfwHttpReq *)list_first_entry(fwd_queue, TfwMsg, fwd_list);
+	list_for_each_entry_safe_from(req, tmp, fwd_queue, msg.fwd_list) {
 		/*
 		 * If unable to send to the server connection due to
 		 * an error, then move the request to @err_queue for
@@ -972,48 +936,28 @@ __tfw_http_req_fwd_many(TfwConnection *srv_conn,
 	}
 }
 
-#if 0
 /*
  * Forward stalled requests in server connection @srv_conn.
- * This is the locked version.
+ *
+ * This function expect that the queue in the server connection
+ * is locked. The queue in unlocked inside the function which is
+ * very non-traditional. Please use with caution.
  */
 static void
 __tfw_http_req_fwd_stalled(TfwConnection *srv_conn)
 {
-	struct list_head zap_queue, err_queue;
-
-	TFW_DBG2("%s: conn = %p\n", __func__, srv_conn);
-
-	INIT_LIST_HEAD(&zap_queue);
-	INIT_LIST_HEAD(&err_queue);
-
-	__tfw_http_req_fwd_many(srv_conn, &zap_queue, &err_queue);
-	tfw_http_req_zap_dead(&zap_queue);
-	tfw_http_req_zap_error(&err_queue);
-}
-#endif
-
-/*
- * Forward stalled requests in server connection @srv_conn.
- * This is the unlocked version.
- */
-static void
-tfw_http_req_fwd_stalled(TfwConnection *srv_conn)
-{
-	struct list_head zap_queue, err_queue;
+	struct list_head err_queue;
 
 	TFW_DBG2("%s: conn = %p\n", __func__, srv_conn);
+	BUG_ON(!spin_is_locked(&srv_conn->msg_qlock));
 
-	INIT_LIST_HEAD(&zap_queue);
 	INIT_LIST_HEAD(&err_queue);
 
-	spin_lock(&srv_conn->msg_qlock);
-	if (!__tfw_http_conn_drained(srv_conn))
-		__tfw_http_req_fwd_many(srv_conn, &zap_queue, &err_queue);
+	__tfw_http_req_fwd_many(srv_conn, &err_queue);
 	spin_unlock(&srv_conn->msg_qlock);
 
-	tfw_http_req_zap_dead(&zap_queue);
-	tfw_http_req_zap_error(&err_queue);
+	if (!list_empty(&err_queue))
+		tfw_http_req_zap_error(&err_queue);
 }
 
 /*
@@ -1048,18 +992,10 @@ tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
 		return;
 	}
 	if (!drained) {
-		struct list_head zap_queue, err_queue;
-
 		TFW_DBG2("%s: Server connection is not drained: conn=[%p]\n",
 			 __func__, srv_conn);
-		INIT_LIST_HEAD(&zap_queue);
-		INIT_LIST_HEAD(&err_queue);
-
-		__tfw_http_req_fwd_many(srv_conn, &zap_queue, &err_queue);
-		spin_unlock(&srv_conn->msg_qlock);
-
-		tfw_http_req_zap_dead(&zap_queue);
-		tfw_http_req_zap_error(&err_queue);
+		/* The queue is unlocked inside the function. */
+		__tfw_http_req_fwd_stalled(srv_conn);
 		return;
 	}
 	if (tfw_connection_send(srv_conn, (TfwMsg *)req)) {
@@ -1140,26 +1076,6 @@ tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
 				 "conn=[%p] resp=[%p]\n",
 				 __func__, cli_conn, resp);
 			ss_close_sync(cli_conn->sk, true);
-			goto loop_discard;
-		}
-		/*
-		 * If this is a response to a non-idempotent request, then
-		 * it's time to continue forwarding requests to the server
-		 * connection the response has come on. If the server is in
-		 * failover state, then the stalled requests will be taken
-		 * care of by the failover processing.
-		 *
-		 * FIXME It might be better to mark the server connection
-		 * somehow, then forward stalled requests for each marked
-		 * server connection outside of the @out_queue processing.
-		 */
-		if (tfw_http_req_is_nonidempotent(req)
-		    && resp->conn && tfw_connection_get_if_live(resp->conn))
-		{
-			TFW_DBG2("%s: Response to non-idempotent request: "
-				 "conn=[%p]\n", __func__, resp->conn);
-			tfw_http_req_fwd_stalled(resp->conn);
-			tfw_connection_put(resp->conn);
 		}
 loop_discard:
 		tfw_http_conn_msg_free((TfwHttpMsg *)resp);
@@ -1288,7 +1204,7 @@ tfw_http_req_add_seq_queue(TfwHttpReq *req)
 	     ? list_last_entry(seq_queue, TfwHttpReq, msg.seq_list)
 	     : NULL;
 	if (preq && (preq->flags & TFW_HTTP_NON_IDEMP))
-		preq->flags |= TFW_HTTP_CHG_IDEMP;
+		preq->flags &= ~TFW_HTTP_NON_IDEMP;
 	list_add_tail(&req->msg.seq_list, seq_queue);
 	spin_unlock(&cli_conn->msg_qlock);
 }
@@ -1586,25 +1502,14 @@ tfw_http_popreq(TfwHttpMsg *hmresp)
 	if (srv_conn->msg_sent == msg)
 		srv_conn->msg_sent = NULL;
 	/*
-	 * If the server connection is no longer on hold, and its queue
-	 * is not drained, then forward pending messages to the server.
+	 * If the server connection is no longer on hold, and the queue
+	 * is not drained, then forward pending requests to the server.
+	 * Note: The queue is unlocked inside __tfw_http_req_fwd_stalled().
 	 */
-	if (!__tfw_http_conn_on_hold(srv_conn)
-	    && !__tfw_http_conn_drained(srv_conn))
-	{
-		struct list_head zap_queue, err_queue;
-
-		INIT_LIST_HEAD(&zap_queue);
-		INIT_LIST_HEAD(&err_queue);
-
-		__tfw_http_req_fwd_many(srv_conn, &zap_queue, &err_queue);
-		spin_unlock(&srv_conn->msg_qlock);
-
-		tfw_http_req_zap_dead(&zap_queue);
-		tfw_http_req_zap_error(&err_queue);
-	} else {
+	if (__tfw_http_conn_req_need_fwd(srv_conn))
+		__tfw_http_req_fwd_stalled(srv_conn);
+	else
 		spin_unlock(&srv_conn->msg_qlock);
-	}
 
 	return req;
 }
diff --git a/tempesta_fw/http.h b/tempesta_fw/http.h
index 6f0f3c8bb..17a61eb87 100644
--- a/tempesta_fw/http.h
+++ b/tempesta_fw/http.h
@@ -262,9 +262,7 @@ typedef struct {
 #define TFW_HTTP_FIELD_DUPENTRY		0x000200	/* Duplicate field */
 /* URI has form http://authority/path, not just /path */
 #define TFW_HTTP_URI_FULL		0x000400
-#define TFW_HTTP_CHG_IDEMP		0x001000
-#define TFW_HTTP_NON_IDEMP		0x002000
-#define __TFW_HTTP_IDEMP_MASK		(TFW_HTTP_CHG_IDEMP | TFW_HTTP_NON_IDEMP)
+#define TFW_HTTP_NON_IDEMP		0x000800
 
 /* Response flags */
 #define TFW_HTTP_VOID_BODY		0x010000	/* Resp to HEAD req */

From e032033f570a46e4eed7a31dc8a87ea5c8746393 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Thu, 13 Oct 2016 15:52:45 +0300
Subject: [PATCH 06/65] Provide the count of non-idempotent reqs in a server
 conn. (#419)

Non-idempotent requests make up an internal @nip_queue within the
server's fwd_queue. @nipcnt keeps the count of those requests in
@nip_queue that can be used by schedulers when making decision.
Special care is taken for the case where a non-idempotent request
is followed by another requests, which re-enables pipelining of
those messages.
---
 tempesta_fw/connection.h | 10 +++--
 tempesta_fw/http.c       | 92 ++++++++++++++++++++++++++++++----------
 tempesta_fw/http.h       |  2 +
 3 files changed, 77 insertions(+), 27 deletions(-)

diff --git a/tempesta_fw/connection.h b/tempesta_fw/connection.h
index d945e400d..fae22d979 100644
--- a/tempesta_fw/connection.h
+++ b/tempesta_fw/connection.h
@@ -77,21 +77,26 @@ enum {
  * @state	- connection processing state;
  * @list	- member in the list of connections with @peer;
  * @msg_queue	- queue of messages to be sent over the connection;
+ * @nip_queue	- queue of non-idempotent messages within @msg_queue;
  * @msg_qlock	- lock for accessing @msg_queue;
  * @refcnt	- number of users of the connection structure instance;
+ * @nipcnt	- number of non-idempotent requests in the connection;
  * @timer	- The keep-alive/retry timer for the connection;
  * @msg		- message that is currently being processed;
+ * @msg_sent	- message that was sent last in the connection;
  * @peer	- TfwClient or TfwServer handler;
  * @sk		- an appropriate sock handler;
+ * @destructor	- called when a connection is destroyed;
  */
 typedef struct {
 	SsProto			proto;
 	TfwGState		state;
 	struct list_head	list;
 	struct list_head	msg_queue;
+	struct list_head	nip_queue;
 	spinlock_t		msg_qlock;
 	atomic_t		refcnt;
-	unsigned long		flags;
+	atomic_t		nipcnt;
 	struct timer_list	timer;
 	TfwMsg			*msg;
 	TfwMsg			*msg_sent;
@@ -104,9 +109,6 @@ typedef struct {
 
 #define TFW_CONN_TYPE(c)	((c)->proto.type)
 
-/* Connection flags. */
-#define TFW_CONN_FWD_HOLD	0x0001		/* Hold sending messages */
-
 /**
  * TLS hardened connection.
  */
diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index bf9048089..be0ef94e6 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -370,6 +370,45 @@ tfw_http_send_504(TfwHttpReq *req, const char *reason)
 	return tfw_http_send_resp(req, &rh, __TFW_STR_CH(&rh, 1));
 }
 
+static inline bool
+tfw_http_req_is_nonidempotent(TfwHttpReq *req)
+{
+	return (req->flags & TFW_HTTP_NON_IDEMP);
+}
+
+static inline void
+__tfw_http_req_flip_nonidempotent(TfwConnection *srv_conn, TfwHttpReq *req)
+{
+	list_del_init(&req->nip_list);
+	atomic_dec(&srv_conn->nipcnt);
+}
+
+/*
+ * Flip a non-idempotent request. If @req in server connection @srv_conn
+ * is non-idempotent, then make it idempotent.
+ */
+static inline void
+tfw_http_req_flip_nonidempotent(TfwConnection *srv_conn, TfwHttpReq *req)
+{
+	if (!list_empty(&req->nip_list))
+		__tfw_http_req_flip_nonidempotent(srv_conn, req);
+}
+
+/*
+ * If a request on the list of non-idempotent requests in server
+ * connection @srv_conn had become an idempotent request, then flip it
+ * and make it idempotent.
+ */
+static inline void
+tfw_http_conn_flip_nonidempotent(TfwConnection *srv_conn)
+{
+	TfwHttpReq *req, *tmp;
+
+	list_for_each_entry_safe(req, tmp, &srv_conn->nip_queue, nip_list)
+		if (!tfw_http_req_is_nonidempotent(req))
+			__tfw_http_req_flip_nonidempotent(srv_conn, req);
+}
+
 /*
  * Allocate a new HTTP message structure, and link it with
  * the connection structure. Increment the number of users
@@ -385,9 +424,10 @@ tfw_http_conn_msg_alloc(TfwConnection *conn)
 	hm->conn = conn;
 	tfw_connection_get(conn);
 
-        if (TFW_CONN_TYPE(conn) & Conn_Clnt) {
-                TFW_INC_STAT_BH(clnt.rx_messages);
-        } else {
+	if (TFW_CONN_TYPE(conn) & Conn_Clnt) {
+		INIT_LIST_HEAD(&((TfwHttpReq *)hm)->nip_list);
+		TFW_INC_STAT_BH(clnt.rx_messages);
+	} else {
 		TfwHttpReq *req;
 
 		spin_lock(&conn->msg_qlock);
@@ -396,7 +436,7 @@ tfw_http_conn_msg_alloc(TfwConnection *conn)
 		spin_unlock(&conn->msg_qlock);
 		if (req && (req->method == TFW_HTTP_METH_HEAD))
 			hm->flags |= TFW_HTTP_VOID_BODY;
-                TFW_INC_STAT_BH(serv.rx_messages);
+		TFW_INC_STAT_BH(serv.rx_messages);
 	}
 
 	return (TfwMsg *)hm;
@@ -407,6 +447,10 @@ tfw_http_req_destruct(void *msg)
 {
 	TfwHttpReq *req = msg;
 
+	BUG_ON(!list_empty(&req->msg.seq_list));
+	BUG_ON(!list_empty(&req->msg.fwd_list));
+	BUG_ON(!list_empty(&req->nip_list));
+
 	if (req->sess)
 		tfw_http_sess_put(req->sess);
 }
@@ -457,6 +501,10 @@ tfw_http_conn_msg_free(TfwHttpMsg *hm)
 static int
 tfw_http_conn_init(TfwConnection *conn)
 {
+	if (TFW_CONN_TYPE(conn) & Conn_Srv) {
+		atomic_set(&conn->nipcnt, 0);
+		INIT_LIST_HEAD(&conn->nip_queue);
+	}
 	tfw_gfsm_state_init(&conn->state, conn, TFW_HTTP_FSM_INIT);
 	return 0;
 }
@@ -477,24 +525,20 @@ static void
 tfw_http_conn_release(TfwConnection *srv_conn)
 {
 	TfwHttpReq *req, *tmp;
-	struct list_head zap_queue;
+	struct list_head *fwd_queue = &srv_conn->msg_queue;
 
 	TFW_DBG2("%s: conn = %p\n", __func__, srv_conn);
 	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
 
-	INIT_LIST_HEAD(&zap_queue);
-
-	spin_lock(&srv_conn->msg_qlock);
-	list_splice_tail_init(&srv_conn->msg_queue, &zap_queue);
-	spin_unlock(&srv_conn->msg_qlock);
-
-	list_for_each_entry_safe(req, tmp, &zap_queue, msg.fwd_list) {
+	list_for_each_entry_safe(req, tmp, fwd_queue, msg.fwd_list) {
 		BUG_ON(req->conn && (req->conn == srv_conn));
 		list_del_init(&req->msg.fwd_list);
+		tfw_http_req_flip_nonidempotent(srv_conn, req);
 		tfw_http_send_404(req);
 		TFW_INC_STAT_BH(clnt.msgs_otherr);
 	}
-	INIT_LIST_HEAD(&srv_conn->msg_queue);
+	BUG_ON(atomic_read(&srv_conn->nipcnt) != 0);
+	BUG_ON(!list_empty(&srv_conn->nip_queue));
 }
 
 /*
@@ -820,12 +864,6 @@ tfw_http_adjust_resp(TfwHttpResp *resp, TfwHttpReq *req)
 				     TFW_HTTP_HDR_SERVER, 0);
 }
 
-static inline bool
-tfw_http_req_is_nonidempotent(TfwHttpReq *req)
-{
-	return (req->flags & TFW_HTTP_NON_IDEMP);
-}
-
 /*
  * Tell if the server connection's forwarding queue is on hold.
  * It's on hold it the request that was sent last was non-idempotent.
@@ -933,6 +971,7 @@ __tfw_http_req_fwd_many(TfwConnection *srv_conn, struct list_head *err_queue)
 		/* Stop sending if the request is non-idempotent. */
 		if (tfw_http_req_is_nonidempotent(req))
 			break;
+		tfw_http_req_flip_nonidempotent(srv_conn, req);
 	}
 }
 
@@ -985,6 +1024,10 @@ tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
 	spin_lock(&srv_conn->msg_qlock);
 	drained = __tfw_http_conn_drained(srv_conn);
 	list_add_tail(&req->msg.fwd_list, &srv_conn->msg_queue);
+	if (tfw_http_req_is_nonidempotent(req)) {
+		list_add_tail(&req->nip_list, &srv_conn->nip_queue);
+		atomic_inc(&srv_conn->nipcnt);
+	}
 	if (__tfw_http_conn_on_hold(srv_conn)) {
 		spin_unlock(&srv_conn->msg_qlock);
 		TFW_DBG2("%s: Server connection is on hold: conn=[%p]\n",
@@ -1000,6 +1043,7 @@ tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
 	}
 	if (tfw_connection_send(srv_conn, (TfwMsg *)req)) {
 		list_del_init(&req->msg.fwd_list);
+		tfw_http_req_flip_nonidempotent(srv_conn, req);
 		spin_unlock(&srv_conn->msg_qlock);
 		TFW_DBG2("%s: Error sending to server connection: "
 			 "conn=[%p] req=[%p]\n", __func__, srv_conn, req);
@@ -1483,7 +1527,7 @@ tfw_http_resp_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)
 static TfwHttpReq *
 tfw_http_popreq(TfwHttpMsg *hmresp)
 {
-	TfwMsg *msg;
+	TfwHttpReq *req;
 	TfwConnection *srv_conn = hmresp->conn;
 	struct list_head *fwd_queue = &srv_conn->msg_queue;
 
@@ -1497,10 +1541,12 @@ tfw_http_popreq(TfwHttpMsg *hmresp)
 		TFW_INC_STAT_BH(serv.msgs_otherr);
 		return NULL;
 	}
-	msg = list_first_entry(fwd_queue, TfwMsg, fwd_list);
-	list_del_init(&msg->fwd_list);
-	if (srv_conn->msg_sent == msg)
+	req = list_first_entry(fwd_queue, TfwHttpReq, msg.fwd_list);
+	list_del_init(&req->msg.fwd_list);
+	if (srv_conn->msg_sent == (TfwMsg *)req)
 		srv_conn->msg_sent = NULL;
+	tfw_http_req_flip_nonidempotent(srv_conn, req);
+	tfw_http_conn_flip_nonidempotent(srv_conn);
 	/*
 	 * If the server connection is no longer on hold, and the queue
 	 * is not drained, then forward pending requests to the server.
diff --git a/tempesta_fw/http.h b/tempesta_fw/http.h
index 17a61eb87..74d9f37e1 100644
--- a/tempesta_fw/http.h
+++ b/tempesta_fw/http.h
@@ -341,6 +341,7 @@ typedef struct {
  * @userinfo	- userinfo in URI, not mandatory.
  * @host	- host in URI, may differ from Host header;
  * @uri_path	- path + query + fragment from URI (RFC3986.3);
+ * @nip_list	- member in the queue of non-idempotent requests;
  * @method	- HTTP request method, one of GET/PORT/HEAD/etc;
  * @node	- NUMA node where request is serviced;
  * @frang_st	- current state of FRANG classifier;
@@ -358,6 +359,7 @@ typedef struct {
 	TfwStr			userinfo;
 	TfwStr			host;
 	TfwStr			uri_path;
+	struct list_head	nip_list;
 	unsigned char		method;
 	unsigned short		node;
 	unsigned int		frang_st;

From b6f4f2a0d0ebc88668b6f58db3d6cb85820478cb Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Fri, 14 Oct 2016 16:52:23 +0300
Subject: [PATCH 07/65] RR scheduler: consider the presence of non-idempotent
 reqs. (#419)

---
 tempesta_fw/sched/tfw_sched_rr.c | 36 ++++++++++++++++++++++----------
 1 file changed, 25 insertions(+), 11 deletions(-)

diff --git a/tempesta_fw/sched/tfw_sched_rr.c b/tempesta_fw/sched/tfw_sched_rr.c
index 4f90b6052..230af4cd8 100644
--- a/tempesta_fw/sched/tfw_sched_rr.c
+++ b/tempesta_fw/sched/tfw_sched_rr.c
@@ -101,32 +101,46 @@ tfw_sched_rr_add_conn(TfwSrvGroup *sg, TfwServer *srv, TfwConnection *conn)
 }
 
 /**
- * On each subsequent call the function returns the next server in the group.
- * Parallel connections to the same server are also rotated in the
- * round-robin manner.
+ * On each subsequent call the function returns the next server in the
+ * group. Parallel connections to the same server are also rotated in
+ * the round-robin manner.
+ *
  * Dead connections and servers w/o live connections are skipped.
+ * Initially, connections with non-idempotent requests are also skipped
+ * in attempt to increase throughput. However, if all live connections
+ * contain non-idempotent requests, then re-run the algorithm and get
+ * the first live connection as it is usually done.
  */
 static TfwConnection *
 tfw_sched_rr_get_srv_conn(TfwMsg *msg, TfwSrvGroup *sg)
 {
-	int c, s, i;
-	TfwConnection *conn;
+	unsigned long idx;
+	int c, s, skipnip = 1, nipconn = 0;
 	TfwRrSrvList *sl = sg->sched_data;
 	TfwRrSrv *srv_cl;
+	TfwConnection *conn;
 
 	BUG_ON(!sl);
-
+rerun:
 	for (s = 0; s < sl->srv_n; ++s) {
-		i = atomic64_inc_return(&sl->rr_counter) % sl->srv_n;
-		srv_cl = &sl->srvs[i];
+		idx = atomic64_inc_return(&sl->rr_counter);
+		srv_cl = &sl->srvs[idx % sl->srv_n];
 		for (c = 0; c < srv_cl->conn_n; ++c) {
-			i = atomic64_inc_return(&srv_cl->rr_counter)
-			    % srv_cl->conn_n;
-			conn = srv_cl->conns[i];
+			idx = atomic64_inc_return(&srv_cl->rr_counter);
+			conn = srv_cl->conns[idx % srv_cl->conn_n];
+			if (skipnip && atomic_read(&conn->nipcnt)) {
+				if (tfw_connection_live(conn))
+					nipconn++;
+				continue;
+			}
 			if (tfw_connection_get_if_live(conn))
 				return conn;
 		}
 	}
+	if (skipnip && nipconn) {
+		skipnip = 0;
+		goto rerun;
+	}
 	return NULL;
 }
 

From 5cda3019c54e03d7aebc6712ce1d5a2c9ae52680 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Fri, 14 Oct 2016 17:14:09 +0300
Subject: [PATCH 08/65] A small fix after rebase on top of master branch.
 (#419)

---
 tempesta_fw/t/unit/sched_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tempesta_fw/t/unit/sched_helper.c b/tempesta_fw/t/unit/sched_helper.c
index 320213b4d..edc0d6b61 100644
--- a/tempesta_fw/t/unit/sched_helper.c
+++ b/tempesta_fw/t/unit/sched_helper.c
@@ -145,7 +145,7 @@ test_conn_release_all(TfwSrvGroup *sg)
 		list_for_each_entry_safe(conn, conn_tmp, &srv->conn_list, list) {
 			conn->sk = NULL;
 			tfw_connection_unlink_from_peer(conn);
-			while (tfw_connection_nfo(conn)) {
+			while (tfw_connection_live(conn)) {
 				tfw_connection_put(conn);
 			}
 			tfw_srv_conn_free((TfwSrvConnection *)conn);

From af18f8e93d583300ddc4d2e048cf590a7a827247 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Mon, 17 Oct 2016 13:46:45 +0300
Subject: [PATCH 09/65] Few cosmetic changes for style. (#419)

---
 tempesta_fw/http.c       | 2 +-
 tempesta_fw/http_match.c | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index be0ef94e6..433a5c9ae 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -1093,7 +1093,7 @@ tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
 		if (req->resp == NULL)
 			break;
 		list_move_tail(&req->msg.seq_list, &out_queue);
-	} while(!list_empty(seq_queue));
+	} while (!list_empty(seq_queue));
 	spin_unlock(&cli_conn->msg_qlock);
 
 	/* Forward responses to the client. */
diff --git a/tempesta_fw/http_match.c b/tempesta_fw/http_match.c
index 0131c6c76..b917afadc 100644
--- a/tempesta_fw/http_match.c
+++ b/tempesta_fw/http_match.c
@@ -198,9 +198,8 @@ match_hdr(const TfwHttpReq *req, const TfwHttpMatchRule *rule)
 }
 
 #define _MOVE_TO_COND(p, end, cond)			\
-	while((p) < (end) && !(cond)) {			\
-		(p)++;					\
-	}
+	while ((p) < (end) && !(cond))			\
+		(p)++;
 
 /* It would be hard to apply some header-specific rules here, so ignore
  * case for all headers according to the robustness principle.

From c4c79ea74e105be8d4a59730be94592fcebaf26c Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Thu, 20 Oct 2016 12:05:56 +0300
Subject: [PATCH 10/65] Re-sending of requests within a restored server
 connection. (#419)

This commit implements the logic of re-sending requests that were
in the server connection's queue when the connection failed. When
the connection is restored, it's not scheduled until all requests
in the forwarding queue are re-sent or sent to the server.
---
 tempesta_fw/connection.c           |   9 +
 tempesta_fw/connection.h           |  61 +++-
 tempesta_fw/http.c                 | 512 ++++++++++++++++++-----------
 tempesta_fw/sched/tfw_sched_hash.c |   8 +-
 tempesta_fw/sched/tfw_sched_rr.c   |   6 +-
 tempesta_fw/sock_srv.c             |   7 +-
 6 files changed, 391 insertions(+), 212 deletions(-)

diff --git a/tempesta_fw/connection.c b/tempesta_fw/connection.c
index f8bd72140..f7f5ac23c 100644
--- a/tempesta_fw/connection.c
+++ b/tempesta_fw/connection.c
@@ -58,6 +58,15 @@ tfw_connection_new(TfwConnection *conn)
 	return TFW_CONN_HOOK_CALL(conn, conn_init);
 }
 
+/**
+ * Call connection repairing via TfwConnHooks.
+ */
+void
+tfw_connection_repair(TfwConnection *conn)
+{
+	TFW_CONN_HOOK_CALL(conn, conn_repair);
+}
+
 /**
  * Publish the "connection is dropped" event via TfwConnHooks.
  */
diff --git a/tempesta_fw/connection.h b/tempesta_fw/connection.h
index fae22d979..d0c81f7e0 100644
--- a/tempesta_fw/connection.h
+++ b/tempesta_fw/connection.h
@@ -79,36 +79,51 @@ enum {
  * @msg_queue	- queue of messages to be sent over the connection;
  * @nip_queue	- queue of non-idempotent messages within @msg_queue;
  * @msg_qlock	- lock for accessing @msg_queue;
+ * @flags	- various atomic flags related to connection's state;
  * @refcnt	- number of users of the connection structure instance;
- * @nipcnt	- number of non-idempotent requests in the connection;
  * @timer	- The keep-alive/retry timer for the connection;
  * @msg		- message that is currently being processed;
  * @msg_sent	- message that was sent last in the connection;
+ * @msg_resent	- message that was re-sent last in the connection;
  * @peer	- TfwClient or TfwServer handler;
  * @sk		- an appropriate sock handler;
  * @destructor	- called when a connection is destroyed;
+ * @forward	- called when a request is forwarded to server;
  */
-typedef struct {
+typedef struct tfw_connection_t {
 	SsProto			proto;
 	TfwGState		state;
 	struct list_head	list;
 	struct list_head	msg_queue;
-	struct list_head	nip_queue;
+	struct list_head	nip_queue;				/*srv*/
 	spinlock_t		msg_qlock;
+	unsigned long		flags;					/*srv*/
 	atomic_t		refcnt;
-	atomic_t		nipcnt;
 	struct timer_list	timer;
 	TfwMsg			*msg;
-	TfwMsg			*msg_sent;
+	TfwMsg			*msg_sent;				/*srv*/
+	TfwMsg			*msg_resent;				/*srv*/
 	TfwPeer 		*peer;
 	struct sock		*sk;
 	void			(*destructor)(void *);
+	void			(*forward)(struct tfw_connection_t *);	/*srv*/
 } TfwConnection;
 
 #define TFW_CONN_DEATHCNT	(INT_MIN / 2)
 
 #define TFW_CONN_TYPE(c)	((c)->proto.type)
 
+/* Connection flags are defined by the bit number. */
+enum {
+	TFW_CONN_B_RESEND = 0,	/* Need to re-send requests. */
+	TFW_CONN_B_QFORWD,	/* Need to forward requests in the queue. */
+	TFW_CONN_B_HASNIP,	/* Has non-idempotent requests. */
+};
+
+#define TFW_CONN_F_RESEND	(1 << TFW_CONN_B_RESEND)
+#define TFW_CONN_F_QFORWD	(1 << TFW_CONN_B_QFORWD)
+#define TFW_CONN_F_HASNIP	(1 << TFW_CONN_B_HASNIP)
+
 /**
  * TLS hardened connection.
  */
@@ -129,6 +144,14 @@ typedef struct {
 	 */
 	int (*conn_init)(TfwConnection *conn);
 
+	/*
+	 * Called when a new connection is initialized and before
+	 * the initialization is complete. Makes sense only for
+	 * server connections. Used to re-send requests that were
+	 * left in the connection queue.
+	 */
+	void (*conn_repair)(TfwConnection *conn);
+
 	/*
 	 * Called when closing a connection (client or server,
 	 * as in conn_init()). This is required for modules that
@@ -159,6 +182,25 @@ extern TfwConnHooks *conn_hooks[TFW_CONN_MAX_PROTOS];
 #define TFW_CONN_HOOK_CALL(c, f...)		\
 	tfw_conn_hook_call(TFW_CONN_TYPE2IDX(TFW_CONN_TYPE(c)), c, f)
 
+/*
+ * Tell if a connection is restricted. When restricted, a connection
+ * cannot be scheduled.
+ */
+static inline bool
+tfw_connection_restricted(TfwConnection *conn)
+{
+	return test_bit(TFW_CONN_B_RESEND, &conn->flags);
+}
+
+/*
+ * Tell if a connection has non-idempotent requests.
+ */
+static inline bool
+tfw_connection_hasnip(TfwConnection *conn)
+{
+	return test_bit(TFW_CONN_B_HASNIP, &conn->flags);
+}
+
 static inline bool
 tfw_connection_live(TfwConnection *conn)
 {
@@ -172,8 +214,8 @@ tfw_connection_get(TfwConnection *conn)
 }
 
 /**
- * Increment reference counter and return true if @conn isn't in failovering
- * process, i.e. @refcnt > 0.
+ * Increment reference counter and return true if @conn isi not in
+ * failovering process, i.e. @refcnt wasn't less or equal to zero.
  */
 static inline bool
 tfw_connection_get_if_live(TfwConnection *conn)
@@ -295,7 +337,9 @@ tfw_connection_validate_cleanup(TfwConnection *conn)
 
 	BUG_ON(!conn);
 	BUG_ON(!list_empty(&conn->list));
-	BUG_ON(!list_empty(&conn->msg_queue));
+	BUG_ON((TFW_CONN_TYPE(conn) & Conn_Clnt)
+	       && !list_empty(&conn->msg_queue));
+	BUG_ON(atomic_read(&conn->refcnt) & ~1);
 	BUG_ON(conn->msg);
 
 	rc = atomic_read(&conn->refcnt);
@@ -311,6 +355,7 @@ void tfw_connection_init(TfwConnection *conn);
 void tfw_connection_link_peer(TfwConnection *conn, TfwPeer *peer);
 
 int tfw_connection_new(TfwConnection *conn);
+void tfw_connection_repair(TfwConnection *conn);
 void tfw_connection_drop(TfwConnection *conn);
 void tfw_connection_release(TfwConnection *conn);
 
diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 433a5c9ae..923879a33 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -376,37 +376,266 @@ tfw_http_req_is_nonidempotent(TfwHttpReq *req)
 	return (req->flags & TFW_HTTP_NON_IDEMP);
 }
 
+/*
+ * Set the request @req in server connection @srv_conn as idempotent.
+ */
 static inline void
-__tfw_http_req_flip_nonidempotent(TfwConnection *srv_conn, TfwHttpReq *req)
+__tfw_http_req_set_idempotent(TfwConnection *srv_conn, TfwHttpReq *req)
 {
 	list_del_init(&req->nip_list);
-	atomic_dec(&srv_conn->nipcnt);
+	if (list_empty(&srv_conn->nip_queue))
+		clear_bit(TFW_CONN_B_HASNIP, &srv_conn->flags);
 }
 
 /*
- * Flip a non-idempotent request. If @req in server connection @srv_conn
- * is non-idempotent, then make it idempotent.
+ * If @req in server connection @srv_conn is non-idempotent, then set it
+ * as idempotent.
  */
 static inline void
-tfw_http_req_flip_nonidempotent(TfwConnection *srv_conn, TfwHttpReq *req)
+tfw_http_req_flip_if_nonidempotent(TfwConnection *srv_conn, TfwHttpReq *req)
 {
 	if (!list_empty(&req->nip_list))
-		__tfw_http_req_flip_nonidempotent(srv_conn, req);
+		__tfw_http_req_set_idempotent(srv_conn, req);
 }
 
 /*
  * If a request on the list of non-idempotent requests in server
- * connection @srv_conn had become an idempotent request, then flip it
- * and make it idempotent.
+ * connection @srv_conn had become idempotent, then set it as idempotent.
  */
 static inline void
-tfw_http_conn_flip_nonidempotent(TfwConnection *srv_conn)
+tfw_http_conn_flip_if_nonidempotent(TfwConnection *srv_conn)
 {
 	TfwHttpReq *req, *tmp;
 
 	list_for_each_entry_safe(req, tmp, &srv_conn->nip_queue, nip_list)
 		if (!tfw_http_req_is_nonidempotent(req))
-			__tfw_http_req_flip_nonidempotent(srv_conn, req);
+			__tfw_http_req_set_idempotent(srv_conn, req);
+}
+
+/*
+ * Set the request @req in server connection @srv_conn as non-idempotent.
+ */
+static inline void
+__tfw_http_req_set_nonidempotent(TfwConnection *srv_conn, TfwHttpReq *req)
+{
+	list_add_tail(&req->nip_list, &srv_conn->nip_queue);
+	set_bit(TFW_CONN_B_HASNIP, &srv_conn->flags);
+}
+
+/*
+ * Set the request @req in server connection @srv_conn is idempotent,
+ * then set it as non-idempotent.
+ */
+static inline void
+tfw_http_req_flip_if_idempotent(TfwConnection *srv_conn, TfwHttpReq *req)
+{
+	if (list_empty(&req->nip_list))
+		__tfw_http_req_set_nonidempotent(srv_conn, req);
+}
+
+/*
+ * Tell if the server connection's forwarding queue is on hold.
+ * It's on hold it the request that was sent last was non-idempotent.
+ */
+static inline bool
+tfw_http_conn_on_hold(TfwConnection *srv_conn)
+{
+	TfwHttpReq *req = (TfwHttpReq *)srv_conn->msg_sent;
+
+	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
+	return (req && tfw_http_req_is_nonidempotent(req));
+}
+
+/*
+ * Tell if the server connection's forwarding queue is drained.
+ * It's drained if there're no requests in the queue after the
+ * request that was sent last.
+ */
+static inline bool
+tfw_http_conn_drained(TfwConnection *srv_conn)
+{
+	TfwMsg *lmsg;
+
+	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
+	if (list_empty(&srv_conn->msg_queue)) {
+		TFW_DBG2("%s: Empty: srv_conn=[%p]\n", __func__, srv_conn);
+		return true;
+	}
+	if (!srv_conn->msg_sent) {
+		TFW_DBG2("%s: None sent: srv_conn=[%p]\n", __func__, srv_conn);
+		return false;
+	}
+	lmsg = list_last_entry(&srv_conn->msg_queue, TfwMsg, seq_list);
+	if (srv_conn->msg_sent == lmsg)
+		return true;
+	TFW_DBG2("%s: Some not sent: srv_conn=[%p]\n", __func__, srv_conn);
+	return false;
+}
+
+static inline bool
+tfw_http_conn_req_need_fwd(TfwConnection *srv_conn)
+{
+	return (!tfw_http_conn_on_hold(srv_conn)
+		&& !tfw_http_conn_drained(srv_conn));
+}
+
+/*
+ * Delete requests that were not forwarded due to an error. Send an
+ * error response to a client. The response will be attached to the
+ * request and sent to the client in proper seq order.
+ */
+static void
+tfw_http_req_zap_error(struct list_head *err_queue)
+{
+	TfwHttpReq *req, *tmp;
+
+	TFW_DBG2("%s: queue is %sempty\n",
+		 __func__, list_empty(err_queue) ? "" : "NOT ");
+
+        list_for_each_entry_safe(req, tmp, err_queue, msg.fwd_list) {
+                list_del_init(&req->msg.fwd_list);
+                tfw_http_send_500(req);
+        }
+}
+
+/*
+ * Forward requests in the server connection @srv_conn. The requests
+ * are forwarded until a non-idempotent request is found in the queue.
+ * Must be called with a lock on the server connection's @msg_queue.
+ */
+static void
+__tfw_http_req_fwd_stalled(TfwConnection *srv_conn, struct list_head *err_queue)
+{
+	TfwHttpReq *req, *tmp;
+	struct list_head *fwd_queue = &srv_conn->msg_queue;
+
+	TFW_DBG2("%s: conn = %p\n", __func__, srv_conn);
+
+	/*
+	 * Process the server connection's queue of pending requests.
+	 * The queue is locked against concurrent updates: inserts of
+	 * outgoing requests, or closing of the server connection. Do
+	 * it as fast as possible by moving failed requests to other
+	 * queues that can be processed without the lock.
+	 */
+	req = srv_conn->msg_sent
+	    ? (TfwHttpReq *)list_next_entry(srv_conn->msg_sent, fwd_list)
+	    : (TfwHttpReq *)list_first_entry(fwd_queue, TfwMsg, fwd_list);
+	list_for_each_entry_safe_from(req, tmp, fwd_queue, msg.fwd_list) {
+		/*
+		 * If unable to send to the server connection due to
+		 * an error, then move the request to @err_queue for
+		 * sending a 500 error response later. That is safe
+		 * as the response will be sent in proper seq order.
+		 */
+		if (tfw_connection_send(srv_conn, (TfwMsg *)req)) {
+			tfw_http_req_flip_if_nonidempotent(srv_conn, req);
+			list_move_tail(&req->msg.fwd_list, err_queue);
+			TFW_DBG2("%s: Error sending to server connection: "
+				 "conn=[%p] req=[%p]\n",
+				 __func__, srv_conn, req);
+			continue;
+		}
+		srv_conn->msg_sent = (TfwMsg *)req;
+		/* Stop sending if the request is non-idempotent. */
+		if (tfw_http_req_is_nonidempotent(req))
+			break;
+		/* See if the request has become idempotent. */
+		tfw_http_req_flip_if_nonidempotent(srv_conn, req);
+	}
+}
+
+/*
+ * Forward stalled requests in server connection @srv_conn.
+ *
+ * This function expects that the queue in the server connection
+ * is locked. The queue in unlocked inside the function which is
+ * very non-traditional. Please use with caution.
+ */
+static void
+tfw_http_req_fwd_stalled(TfwConnection *srv_conn)
+{
+	LIST_HEAD(err_queue);
+
+	TFW_DBG2("%s: conn = %p\n", __func__, srv_conn);
+	WARN_ON(!spin_is_locked(&srv_conn->msg_qlock));
+	BUG_ON(list_empty(&srv_conn->msg_queue));
+
+	__tfw_http_req_fwd_stalled(srv_conn, &err_queue);
+	spin_unlock(&srv_conn->msg_qlock);
+
+	if (!list_empty(&err_queue))
+		tfw_http_req_zap_error(&err_queue);
+}
+
+static void
+__tfw_http_req_fwd_resend(TfwConnection *srv_conn,
+			  bool one_msg, struct list_head *err_queue)
+{
+	TfwHttpReq *req, *tmp;
+	struct list_head *end, *fwd_queue = &srv_conn->msg_queue;
+
+	TFW_DBG2("%s: conn = %p\n", __func__, srv_conn);
+	BUG_ON(!srv_conn->msg_sent);
+
+	req = srv_conn->msg_resent
+	    ? (TfwHttpReq *)list_next_entry(srv_conn->msg_resent, fwd_list)
+	    : (TfwHttpReq *)list_first_entry(fwd_queue, TfwMsg, fwd_list);
+	end = srv_conn->msg_sent->fwd_list.next;
+
+	/* An equivalent of list_for_each_entry_safe_from() */
+	for (tmp = list_next_entry(req, msg.fwd_list);
+	     &req->msg.fwd_list != end;
+	     req = tmp, tmp = list_next_entry(tmp, msg.fwd_list))
+	{
+		if (tfw_connection_send(srv_conn, (TfwMsg *)req)) {
+			tfw_http_req_flip_if_nonidempotent(srv_conn, req);
+			list_move_tail(&req->msg.fwd_list, err_queue);
+			TFW_DBG2("%s: Error sending to server connection: "
+				 "conn=[%p] req=[%p]\n",
+				 __func__, srv_conn, req);
+			continue;
+		}
+		srv_conn->msg_resent = (TfwMsg *)req;
+		if (unlikely(one_msg))
+			break;
+	}
+}
+
+static void
+__tfw_http_req_fwd_qforwd(TfwConnection *srv_conn, struct list_head *err_queue)
+{
+	__tfw_http_req_fwd_stalled(srv_conn, err_queue);
+	if (list_empty(&srv_conn->msg_queue)) {
+		srv_conn->forward = tfw_http_req_fwd_stalled;
+		clear_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
+		clear_bit(TFW_CONN_B_RESEND, &srv_conn->flags);
+	}
+}
+
+static void
+tfw_http_req_fwd_repair(TfwConnection *srv_conn)
+{
+	LIST_HEAD(err_queue);
+
+	TFW_DBG2("%s: conn = %p\n", __func__, srv_conn);
+	WARN_ON(!spin_is_locked(&srv_conn->msg_qlock));
+	BUG_ON(list_empty(&srv_conn->msg_queue));
+
+	if (test_bit(TFW_CONN_B_QFORWD, &srv_conn->flags)) {
+		__tfw_http_req_fwd_qforwd(srv_conn, &err_queue);
+	} else {
+		if (!srv_conn->msg_sent)
+			__tfw_http_req_fwd_resend(srv_conn, false, &err_queue);
+		if (srv_conn->msg_resent == srv_conn->msg_sent) {
+			set_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
+			__tfw_http_req_fwd_qforwd(srv_conn, &err_queue);
+		}
+	}
+	spin_unlock(&srv_conn->msg_qlock);
+
+	if (!list_empty(&err_queue))
+		tfw_http_req_zap_error(&err_queue);
 }
 
 /*
@@ -431,8 +660,8 @@ tfw_http_conn_msg_alloc(TfwConnection *conn)
 		TfwHttpReq *req;
 
 		spin_lock(&conn->msg_qlock);
-		req = (TfwHttpReq *)list_first_entry_or_null(&conn->msg_queue,
-							     TfwMsg, fwd_list);
+		req = list_first_entry_or_null(&conn->msg_queue,
+					       TfwHttpReq, msg.fwd_list);
 		spin_unlock(&conn->msg_qlock);
 		if (req && (req->method == TFW_HTTP_METH_HEAD))
 			hm->flags |= TFW_HTTP_VOID_BODY;
@@ -442,19 +671,6 @@ tfw_http_conn_msg_alloc(TfwConnection *conn)
 	return (TfwMsg *)hm;
 }
 
-void
-tfw_http_req_destruct(void *msg)
-{
-	TfwHttpReq *req = msg;
-
-	BUG_ON(!list_empty(&req->msg.seq_list));
-	BUG_ON(!list_empty(&req->msg.fwd_list));
-	BUG_ON(!list_empty(&req->nip_list));
-
-	if (req->sess)
-		tfw_http_sess_put(req->sess);
-}
-
 /*
  * Free an HTTP message.
  * Also, free the connection structure if there's no more references.
@@ -492,6 +708,37 @@ tfw_http_conn_msg_free(TfwHttpMsg *hm)
 	tfw_http_msg_free(hm);
 }
 
+/*
+ * Find requests in the server's connection queue that were forwarded
+ * to the server. These are unanswered requests. According to RFC 7230
+ * 6.3.2, "a client MUST NOT pipeline immediately after connection
+ * establishment". To address that, re-send the first request to the
+ * server. When a response comes, that will trigger resending of the
+ * rest of those unanswered requests.
+ */
+static void
+tfw_http_conn_repair(TfwConnection *srv_conn)
+{
+	LIST_HEAD(err_queue);
+
+	TFW_DBG2("%s: conn = %p\n", __func__, srv_conn);
+	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
+	BUG_ON(!tfw_connection_restricted(srv_conn));
+
+	/* Resend the first unanswered request. */
+	spin_lock(&srv_conn->msg_qlock);
+	if (!srv_conn->msg_sent)
+		__tfw_http_req_fwd_resend(srv_conn, true, &err_queue);
+	if (!srv_conn->msg_resent) {
+		set_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
+		__tfw_http_req_fwd_qforwd(srv_conn, &err_queue);
+	}
+	spin_unlock(&srv_conn->msg_qlock);
+
+	if (!list_empty(&err_queue))
+		tfw_http_req_zap_error(&err_queue);
+}
+
 /*
  * Connection with a peer is created.
  *
@@ -502,21 +749,38 @@ static int
 tfw_http_conn_init(TfwConnection *conn)
 {
 	if (TFW_CONN_TYPE(conn) & Conn_Srv) {
-		atomic_set(&conn->nipcnt, 0);
-		INIT_LIST_HEAD(&conn->nip_queue);
+		if (list_empty(&conn->msg_queue)) {
+			conn->forward = tfw_http_req_fwd_stalled;
+		} else {
+			conn->msg_resent = NULL;
+			conn->forward = tfw_http_req_fwd_repair;
+			set_bit(TFW_CONN_B_RESEND, &conn->flags);
+		}
 	}
 	tfw_gfsm_state_init(&conn->state, conn, TFW_HTTP_FSM_INIT);
 	return 0;
 }
 
+void
+tfw_http_req_destruct(void *msg)
+{
+	TfwHttpReq *req = msg;
+
+	BUG_ON(!list_empty(&req->msg.seq_list));
+	BUG_ON(!list_empty(&req->msg.fwd_list));
+	BUG_ON(!list_empty(&req->nip_list));
+
+	if (req->sess)
+		tfw_http_sess_put(req->sess);
+}
+
 /*
  * Connection with a peer is released.
  *
  * For server connections the requests that were sent to that server are
  * kept in the queue until a paired response comes. That will never happen
- * now, and requests will remain unanswered. For each request in the queue
- * send an error response to the corresponding client connection. Both the
- * request and the response will be freed when the response is sent out.
+ * now. Keep the queue. When the connection is restored the requests will
+ * be re-sent to the server.
  *
  * Called when a connection is released. There are no users at that time,
  * so locks are not needed.
@@ -524,21 +788,11 @@ tfw_http_conn_init(TfwConnection *conn)
 static void
 tfw_http_conn_release(TfwConnection *srv_conn)
 {
-	TfwHttpReq *req, *tmp;
-	struct list_head *fwd_queue = &srv_conn->msg_queue;
-
 	TFW_DBG2("%s: conn = %p\n", __func__, srv_conn);
 	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
 
-	list_for_each_entry_safe(req, tmp, fwd_queue, msg.fwd_list) {
-		BUG_ON(req->conn && (req->conn == srv_conn));
-		list_del_init(&req->msg.fwd_list);
-		tfw_http_req_flip_nonidempotent(srv_conn, req);
-		tfw_http_send_404(req);
-		TFW_INC_STAT_BH(clnt.msgs_otherr);
-	}
-	BUG_ON(atomic_read(&srv_conn->nipcnt) != 0);
-	BUG_ON(!list_empty(&srv_conn->nip_queue));
+	clear_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
+	clear_bit(TFW_CONN_B_RESEND, &srv_conn->flags);
 }
 
 /*
@@ -559,6 +813,7 @@ tfw_http_conn_cli_drop(TfwConnection *cli_conn)
 {
 	TfwHttpMsg *hmreq, *tmp;
 	struct list_head *seq_queue = &cli_conn->msg_queue;
+	LIST_HEAD(zap_queue);
 
 	TFW_DBG2("%s: conn = %p\n", __func__, cli_conn);
 	BUG_ON(!(TFW_CONN_TYPE(cli_conn) & Conn_Clnt));
@@ -567,10 +822,11 @@ tfw_http_conn_cli_drop(TfwConnection *cli_conn)
 		return;
 
 	spin_lock(&cli_conn->msg_qlock);
-	list_for_each_entry_safe(hmreq, tmp, seq_queue, msg.seq_list) {
-		list_del_init(&hmreq->msg.seq_list);
-	}
+	list_splice_tail_init(seq_queue, &zap_queue);
 	spin_unlock(&cli_conn->msg_qlock);
+
+	list_for_each_entry_safe(hmreq, tmp, &zap_queue, msg.seq_list)
+		list_del_init(&hmreq->msg.seq_list);
 }
 
 /*
@@ -864,141 +1120,6 @@ tfw_http_adjust_resp(TfwHttpResp *resp, TfwHttpReq *req)
 				     TFW_HTTP_HDR_SERVER, 0);
 }
 
-/*
- * Tell if the server connection's forwarding queue is on hold.
- * It's on hold it the request that was sent last was non-idempotent.
- */
-static inline bool
-__tfw_http_conn_on_hold(TfwConnection *srv_conn)
-{
-	TfwHttpReq *req = (TfwHttpReq *)srv_conn->msg_sent;
-
-	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
-	return (req && tfw_http_req_is_nonidempotent(req));
-}
-
-/*
- * Tell if the server connection's forwarding queue is drained.
- * It's drained if there're no requests in the queue after the
- * request that was sent last.
- */
-static inline bool
-__tfw_http_conn_drained(TfwConnection *srv_conn)
-{
-	TfwMsg *lmsg;
-
-	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
-	if (list_empty(&srv_conn->msg_queue)) {
-		TFW_DBG2("%s: Empty: srv_conn=[%p]\n", __func__, srv_conn);
-		return true;
-	}
-	if (!srv_conn->msg_sent) {
-		TFW_DBG2("%s: None sent: srv_conn=[%p]\n", __func__, srv_conn);
-		return false;
-	}
-	lmsg = list_last_entry(&srv_conn->msg_queue, TfwMsg, seq_list);
-	if (srv_conn->msg_sent == lmsg)
-		return true;
-	TFW_DBG2("%s: Some not sent: srv_conn=[%p]\n", __func__, srv_conn);
-	return false;
-}
-
-static inline bool
-__tfw_http_conn_req_need_fwd(TfwConnection *srv_conn)
-{
-	return (!__tfw_http_conn_on_hold(srv_conn)
-		&& !__tfw_http_conn_drained(srv_conn));
-}
-
-/*
- * Delete requests that were not forwarded due to an error. Send an
- * error response to a client. The response will be attached to the
- * request and sent to the client in proper seq order.
- */
-static void
-tfw_http_req_zap_error(struct list_head *err_queue)
-{
-	TfwHttpReq *req, *tmp;
-
-	TFW_DBG2("%s: queue is %sempty\n",
-		 __func__, list_empty(err_queue) ? "" : "NOT ");
-
-        list_for_each_entry_safe(req, tmp, err_queue, msg.fwd_list) {
-                list_del_init(&req->msg.fwd_list);
-                tfw_http_send_500(req);
-        }
-}
-
-/*
- * Forward requests in server connection @srv_conn. The requests are
- * forwarded until a non-idempotent request is found in the queue.
- * Must be called with a lock on the server connection's @msg_queue.
- */
-static void
-__tfw_http_req_fwd_many(TfwConnection *srv_conn, struct list_head *err_queue)
-{
-	TfwHttpReq *req, *tmp;
-	struct list_head *fwd_queue = &srv_conn->msg_queue;
-
-	TFW_DBG2("%s: conn = %p\n", __func__, srv_conn);
-	BUG_ON(list_empty(fwd_queue));
-
-	/*
-	 * Process the server connection's queue of pending requests.
-	 * The queue is locked against concurrent updates: inserts of
-	 * outgoing requests, or closing of the server connection. Do
-	 * it as fast as possible by moving failed requests to other
-	 * queues that can be processed without the lock.
-	 */
-	req = srv_conn->msg_sent
-	    ? (TfwHttpReq *)list_next_entry(srv_conn->msg_sent, fwd_list)
-	    : (TfwHttpReq *)list_first_entry(fwd_queue, TfwMsg, fwd_list);
-	list_for_each_entry_safe_from(req, tmp, fwd_queue, msg.fwd_list) {
-		/*
-		 * If unable to send to the server connection due to
-		 * an error, then move the request to @err_queue for
-		 * sending a 500 error response later. That is safe
-		 * as the response will be sent in proper seq order.
-		 */
-		if (tfw_connection_send(srv_conn, (TfwMsg *)req)) {
-			list_move_tail(&req->msg.fwd_list, err_queue);
-			TFW_DBG2("%s: Error sending to server connection: "
-				 "conn=[%p] req=[%p]\n",
-				 __func__, srv_conn, req);
-			continue;
-		}
-		srv_conn->msg_sent = (TfwMsg *)req;
-		/* Stop sending if the request is non-idempotent. */
-		if (tfw_http_req_is_nonidempotent(req))
-			break;
-		tfw_http_req_flip_nonidempotent(srv_conn, req);
-	}
-}
-
-/*
- * Forward stalled requests in server connection @srv_conn.
- *
- * This function expect that the queue in the server connection
- * is locked. The queue in unlocked inside the function which is
- * very non-traditional. Please use with caution.
- */
-static void
-__tfw_http_req_fwd_stalled(TfwConnection *srv_conn)
-{
-	struct list_head err_queue;
-
-	TFW_DBG2("%s: conn = %p\n", __func__, srv_conn);
-	BUG_ON(!spin_is_locked(&srv_conn->msg_qlock));
-
-	INIT_LIST_HEAD(&err_queue);
-
-	__tfw_http_req_fwd_many(srv_conn, &err_queue);
-	spin_unlock(&srv_conn->msg_qlock);
-
-	if (!list_empty(&err_queue))
-		tfw_http_req_zap_error(&err_queue);
-}
-
 /*
  * Forward the request @req to server connection @srv_conn.
  *
@@ -1022,13 +1143,11 @@ tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
 	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
 
 	spin_lock(&srv_conn->msg_qlock);
-	drained = __tfw_http_conn_drained(srv_conn);
+	drained = tfw_http_conn_drained(srv_conn);
 	list_add_tail(&req->msg.fwd_list, &srv_conn->msg_queue);
-	if (tfw_http_req_is_nonidempotent(req)) {
-		list_add_tail(&req->nip_list, &srv_conn->nip_queue);
-		atomic_inc(&srv_conn->nipcnt);
-	}
-	if (__tfw_http_conn_on_hold(srv_conn)) {
+	if (tfw_http_req_is_nonidempotent(req))
+		__tfw_http_req_set_nonidempotent(srv_conn, req);
+	if (tfw_http_conn_on_hold(srv_conn)) {
 		spin_unlock(&srv_conn->msg_qlock);
 		TFW_DBG2("%s: Server connection is on hold: conn=[%p]\n",
 			 __func__, srv_conn);
@@ -1037,13 +1156,13 @@ tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
 	if (!drained) {
 		TFW_DBG2("%s: Server connection is not drained: conn=[%p]\n",
 			 __func__, srv_conn);
+		tfw_http_req_fwd_stalled(srv_conn);
 		/* The queue is unlocked inside the function. */
-		__tfw_http_req_fwd_stalled(srv_conn);
 		return;
 	}
 	if (tfw_connection_send(srv_conn, (TfwMsg *)req)) {
 		list_del_init(&req->msg.fwd_list);
-		tfw_http_req_flip_nonidempotent(srv_conn, req);
+		tfw_http_req_flip_if_nonidempotent(srv_conn, req);
 		spin_unlock(&srv_conn->msg_qlock);
 		TFW_DBG2("%s: Error sending to server connection: "
 			 "conn=[%p] req=[%p]\n", __func__, srv_conn, req);
@@ -1062,11 +1181,11 @@ tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
 {
 	TfwHttpReq *tmp;
 	TfwConnection *cli_conn = req->conn;
-	struct list_head out_queue, *seq_queue = &cli_conn->msg_queue;
+	struct list_head *seq_queue = &cli_conn->msg_queue;
+	LIST_HEAD(out_queue);
 
 	TFW_DBG2("%s: req=[%p], resp=[%p]\n", __func__, req, resp);
 
-	INIT_LIST_HEAD(&out_queue);
 	/*
 	 * Starting with the first request on the list, pick consecutive
 	 * requests that have a paired response. Remove those requests
@@ -1543,17 +1662,17 @@ tfw_http_popreq(TfwHttpMsg *hmresp)
 	}
 	req = list_first_entry(fwd_queue, TfwHttpReq, msg.fwd_list);
 	list_del_init(&req->msg.fwd_list);
-	if (srv_conn->msg_sent == (TfwMsg *)req)
+	if ((TfwMsg *)req == srv_conn->msg_sent)
 		srv_conn->msg_sent = NULL;
-	tfw_http_req_flip_nonidempotent(srv_conn, req);
-	tfw_http_conn_flip_nonidempotent(srv_conn);
+	tfw_http_req_flip_if_nonidempotent(srv_conn, req);
+	tfw_http_conn_flip_if_nonidempotent(srv_conn);
 	/*
 	 * If the server connection is no longer on hold, and the queue
 	 * is not drained, then forward pending requests to the server.
-	 * Note: The queue is unlocked inside __tfw_http_req_fwd_stalled().
+	 * Note: The queue is unlocked inside srv_conn->forward().
 	 */
-	if (__tfw_http_conn_req_need_fwd(srv_conn))
-		__tfw_http_req_fwd_stalled(srv_conn);
+	if (tfw_http_conn_req_need_fwd(srv_conn))
+		srv_conn->forward(srv_conn);
 	else
 		spin_unlock(&srv_conn->msg_qlock);
 
@@ -1886,6 +2005,7 @@ EXPORT_SYMBOL(tfw_http_req_key_calc);
 
 static TfwConnHooks http_conn_hooks = {
 	.conn_init	= tfw_http_conn_init,
+	.conn_repair	= tfw_http_conn_repair,
 	.conn_drop	= tfw_http_conn_drop,
 	.conn_release	= tfw_http_conn_release,
 	.conn_send	= tfw_http_conn_send,
diff --git a/tempesta_fw/sched/tfw_sched_hash.c b/tempesta_fw/sched/tfw_sched_hash.c
index 43a40cbf0..913c7907b 100644
--- a/tempesta_fw/sched/tfw_sched_hash.c
+++ b/tempesta_fw/sched/tfw_sched_hash.c
@@ -145,15 +145,15 @@ tfw_sched_hash_get_srv_conn(TfwMsg *msg, TfwSrvGroup *sg)
 	msg_hash = tfw_http_req_key_calc((TfwHttpReq *)msg);
 	for (tries = 0; tries < __HLIST_SZ(TFW_SG_MAX_CONN); ++tries) {
 		for (ch = sg->sched_data; ch->conn; ++ch) {
+			if (unlikely(tfw_connection_restricted(ch->conn))
+			    || unlikely(!tfw_connection_live(ch->conn)))
+				continue;
 			curr_weight = msg_hash ^ ch->hash;
-			if (likely(tfw_connection_live(ch->conn))
-			    && curr_weight > best_weight)
-			{
+			if (curr_weight > best_weight) {
 				best_weight = curr_weight;
 				best_conn = ch->conn;
 			}
 		}
-
 		if (unlikely(!best_conn))
 			return NULL;
 		if (tfw_connection_get_if_live(best_conn))
diff --git a/tempesta_fw/sched/tfw_sched_rr.c b/tempesta_fw/sched/tfw_sched_rr.c
index 230af4cd8..dd675cd04 100644
--- a/tempesta_fw/sched/tfw_sched_rr.c
+++ b/tempesta_fw/sched/tfw_sched_rr.c
@@ -128,8 +128,10 @@ tfw_sched_rr_get_srv_conn(TfwMsg *msg, TfwSrvGroup *sg)
 		for (c = 0; c < srv_cl->conn_n; ++c) {
 			idx = atomic64_inc_return(&srv_cl->rr_counter);
 			conn = srv_cl->conns[idx % srv_cl->conn_n];
-			if (skipnip && atomic_read(&conn->nipcnt)) {
-				if (tfw_connection_live(conn))
+			if (unlikely(tfw_connection_restricted(conn)))
+				continue;
+			if (skipnip && tfw_connection_hasnip(conn)) {
+				if (likely(tfw_connection_live(conn)))
 					nipconn++;
 				continue;
 			}
diff --git a/tempesta_fw/sock_srv.c b/tempesta_fw/sock_srv.c
index 313326254..3543ee95f 100644
--- a/tempesta_fw/sock_srv.c
+++ b/tempesta_fw/sock_srv.c
@@ -308,8 +308,7 @@ tfw_sock_srv_connect_complete(struct sock *sk)
 	tfw_connection_link_to_sk(conn, sk);
 
 	/* Notify higher level layers. */
-	r = tfw_connection_new(conn);
-	if (r) {
+	if ((r = tfw_connection_new(conn))) {
 		TFW_ERR("conn_init() hook returned error\n");
 		return r;
 	}
@@ -317,6 +316,10 @@ tfw_sock_srv_connect_complete(struct sock *sk)
 	/* Let schedulers use the connection hereafter. */
 	tfw_connection_revive(conn);
 
+	/* Repair the connection is necessary. */
+	if (unlikely(tfw_connection_restricted(conn)))
+		tfw_connection_repair(conn);
+
 	__reset_retry_timer(srv_conn);
 
 	TFW_DBG_ADDR("connected", &srv->addr);

From ed89a5715258e7b3dc7921f904b9e59027f76203 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Fri, 21 Oct 2016 12:46:18 +0300
Subject: [PATCH 11/65] Bug fixes related to the previous commit. (#419)

---
 tempesta_fw/connection.c |  3 ++-
 tempesta_fw/http.c       | 53 +++++++++++++++++++++++++---------------
 2 files changed, 35 insertions(+), 21 deletions(-)

diff --git a/tempesta_fw/connection.c b/tempesta_fw/connection.c
index f7f5ac23c..726f9ea97 100644
--- a/tempesta_fw/connection.c
+++ b/tempesta_fw/connection.c
@@ -86,7 +86,8 @@ tfw_connection_release(TfwConnection *conn)
 {
 	/* Ask higher levels to free resources at connection release. */
 	TFW_CONN_HOOK_CALL(conn, conn_release);
-	BUG_ON(!list_empty(&conn->msg_queue));
+	BUG_ON((TFW_CONN_TYPE(conn) & Conn_Clnt)
+	       && !list_empty(&conn->msg_queue));
 }
 
 /*
diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 923879a33..44ee69cd9 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -454,10 +454,11 @@ tfw_http_conn_on_hold(TfwConnection *srv_conn)
 static inline bool
 tfw_http_conn_drained(TfwConnection *srv_conn)
 {
-	TfwMsg *lmsg;
+	struct list_head *fwd_queue = &srv_conn->msg_queue;
 
 	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
-	if (list_empty(&srv_conn->msg_queue)) {
+
+	if (list_empty(fwd_queue)) {
 		TFW_DBG2("%s: Empty: srv_conn=[%p]\n", __func__, srv_conn);
 		return true;
 	}
@@ -465,9 +466,9 @@ tfw_http_conn_drained(TfwConnection *srv_conn)
 		TFW_DBG2("%s: None sent: srv_conn=[%p]\n", __func__, srv_conn);
 		return false;
 	}
-	lmsg = list_last_entry(&srv_conn->msg_queue, TfwMsg, seq_list);
-	if (srv_conn->msg_sent == lmsg)
+	if (srv_conn->msg_sent == list_last_entry(fwd_queue, TfwMsg, seq_list))
 		return true;
+
 	TFW_DBG2("%s: Some not sent: srv_conn=[%p]\n", __func__, srv_conn);
 	return false;
 }
@@ -509,7 +510,7 @@ __tfw_http_req_fwd_stalled(TfwConnection *srv_conn, struct list_head *err_queue)
 	TfwHttpReq *req, *tmp;
 	struct list_head *fwd_queue = &srv_conn->msg_queue;
 
-	TFW_DBG2("%s: conn = %p\n", __func__, srv_conn);
+	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
 
 	/*
 	 * Process the server connection's queue of pending requests.
@@ -557,7 +558,7 @@ tfw_http_req_fwd_stalled(TfwConnection *srv_conn)
 {
 	LIST_HEAD(err_queue);
 
-	TFW_DBG2("%s: conn = %p\n", __func__, srv_conn);
+	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
 	WARN_ON(!spin_is_locked(&srv_conn->msg_qlock));
 	BUG_ON(list_empty(&srv_conn->msg_queue));
 
@@ -575,12 +576,12 @@ __tfw_http_req_fwd_resend(TfwConnection *srv_conn,
 	TfwHttpReq *req, *tmp;
 	struct list_head *end, *fwd_queue = &srv_conn->msg_queue;
 
-	TFW_DBG2("%s: conn = %p\n", __func__, srv_conn);
+	TFW_DBG2("%s: conn=[%p] one_msg=[%s]\n",
+		 __func__, srv_conn, one_msg ? "true" : "false");
 	BUG_ON(!srv_conn->msg_sent);
+	BUG_ON(list_empty(&srv_conn->msg_sent->fwd_list));
 
-	req = srv_conn->msg_resent
-	    ? (TfwHttpReq *)list_next_entry(srv_conn->msg_resent, fwd_list)
-	    : (TfwHttpReq *)list_first_entry(fwd_queue, TfwMsg, fwd_list);
+	req = list_first_entry(fwd_queue, TfwHttpReq, msg.fwd_list);
 	end = srv_conn->msg_sent->fwd_list.next;
 
 	/* An equivalent of list_for_each_entry_safe_from() */
@@ -605,6 +606,8 @@ __tfw_http_req_fwd_resend(TfwConnection *srv_conn,
 static void
 __tfw_http_req_fwd_qforwd(TfwConnection *srv_conn, struct list_head *err_queue)
 {
+	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
+
 	__tfw_http_req_fwd_stalled(srv_conn, err_queue);
 	if (list_empty(&srv_conn->msg_queue)) {
 		srv_conn->forward = tfw_http_req_fwd_stalled;
@@ -618,19 +621,21 @@ tfw_http_req_fwd_repair(TfwConnection *srv_conn)
 {
 	LIST_HEAD(err_queue);
 
-	TFW_DBG2("%s: conn = %p\n", __func__, srv_conn);
+	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
 	WARN_ON(!spin_is_locked(&srv_conn->msg_qlock));
 	BUG_ON(list_empty(&srv_conn->msg_queue));
 
 	if (test_bit(TFW_CONN_B_QFORWD, &srv_conn->flags)) {
 		__tfw_http_req_fwd_qforwd(srv_conn, &err_queue);
 	} else {
-		if (!srv_conn->msg_sent)
+		srv_conn->msg_resent = NULL;
+		if (srv_conn->msg_sent) {
 			__tfw_http_req_fwd_resend(srv_conn, false, &err_queue);
-		if (srv_conn->msg_resent == srv_conn->msg_sent) {
-			set_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
-			__tfw_http_req_fwd_qforwd(srv_conn, &err_queue);
+			if (srv_conn->msg_resent != srv_conn->msg_sent)
+				srv_conn->msg_sent = srv_conn->msg_resent;
 		}
+		set_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
+		__tfw_http_req_fwd_qforwd(srv_conn, &err_queue);
 	}
 	spin_unlock(&srv_conn->msg_qlock);
 
@@ -721,14 +726,18 @@ tfw_http_conn_repair(TfwConnection *srv_conn)
 {
 	LIST_HEAD(err_queue);
 
-	TFW_DBG2("%s: conn = %p\n", __func__, srv_conn);
+	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
 	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
 	BUG_ON(!tfw_connection_restricted(srv_conn));
 
 	/* Resend the first unanswered request. */
 	spin_lock(&srv_conn->msg_qlock);
-	if (!srv_conn->msg_sent)
+	srv_conn->msg_resent = NULL;
+	if (srv_conn->msg_sent) {
 		__tfw_http_req_fwd_resend(srv_conn, true, &err_queue);
+		if (!srv_conn->msg_resent)
+			srv_conn->msg_sent = NULL;
+	}
 	if (!srv_conn->msg_resent) {
 		set_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
 		__tfw_http_req_fwd_qforwd(srv_conn, &err_queue);
@@ -748,14 +757,16 @@ tfw_http_conn_repair(TfwConnection *srv_conn)
 static int
 tfw_http_conn_init(TfwConnection *conn)
 {
+	TFW_DBG2("%s: conn=[%p]\n", __func__, conn);
+
 	if (TFW_CONN_TYPE(conn) & Conn_Srv) {
 		if (list_empty(&conn->msg_queue)) {
 			conn->forward = tfw_http_req_fwd_stalled;
 		} else {
-			conn->msg_resent = NULL;
 			conn->forward = tfw_http_req_fwd_repair;
 			set_bit(TFW_CONN_B_RESEND, &conn->flags);
 		}
+		INIT_LIST_HEAD(&conn->nip_queue);
 	}
 	tfw_gfsm_state_init(&conn->state, conn, TFW_HTTP_FSM_INIT);
 	return 0;
@@ -788,7 +799,7 @@ tfw_http_req_destruct(void *msg)
 static void
 tfw_http_conn_release(TfwConnection *srv_conn)
 {
-	TFW_DBG2("%s: conn = %p\n", __func__, srv_conn);
+	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
 	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
 
 	clear_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
@@ -815,7 +826,7 @@ tfw_http_conn_cli_drop(TfwConnection *cli_conn)
 	struct list_head *seq_queue = &cli_conn->msg_queue;
 	LIST_HEAD(zap_queue);
 
-	TFW_DBG2("%s: conn = %p\n", __func__, cli_conn);
+	TFW_DBG2("%s: conn=[%p]\n", __func__, cli_conn);
 	BUG_ON(!(TFW_CONN_TYPE(cli_conn) & Conn_Clnt));
 
 	if (list_empty_careful(seq_queue))
@@ -840,6 +851,8 @@ static void tfw_http_resp_terminate(TfwHttpMsg *hm);
 static void
 tfw_http_conn_drop(TfwConnection *conn)
 {
+	TFW_DBG2("%s: conn=[%p]\n", __func__, conn);
+
 	if (TFW_CONN_TYPE(conn) & Conn_Clnt) {
 		tfw_http_conn_cli_drop(conn);
 	} else if (conn->msg) {

From 2dc1f91b0728f3207bde4ca5b23bca53d143bb14 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Fri, 21 Oct 2016 16:59:50 +0300
Subject: [PATCH 12/65] Re-schedule requests from a dead server connection.
 (#419)

---
 tempesta_fw/http.c     | 153 ++++++++++++++++++++++++++---------------
 tempesta_fw/sock_srv.c |   7 ++
 2 files changed, 106 insertions(+), 54 deletions(-)

diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 44ee69cd9..7f2fd4b46 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -569,6 +569,59 @@ tfw_http_req_fwd_stalled(TfwConnection *srv_conn)
 		tfw_http_req_zap_error(&err_queue);
 }
 
+/*
+ * Forward the request @req to server connection @srv_conn.
+ *
+ * The request is added to the server connection (forwarding) queue.
+ * If forwarding is on hold at this moment, then the request will be
+ * forwarded later. Otherwise, if the queue is drained, then forward
+ * the request to the server immediately. If the queue is not drained,
+ * then forward all stalled requests to the server.
+ *
+ * Forwarding to a server is considered to be on hold after
+ * a non-idempotent request is forwarded to the server. The hold
+ * is removed when the holding non-idempotent request is followed
+ * by another request from the same client, which enables pipelining.
+ */
+static void
+tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
+{
+	bool drained;
+
+	TFW_DBG2("%s: srv_conn=[%p], req=[%p]\n", __func__, srv_conn, req);
+	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
+
+	spin_lock(&srv_conn->msg_qlock);
+	drained = tfw_http_conn_drained(srv_conn);
+	list_add_tail(&req->msg.fwd_list, &srv_conn->msg_queue);
+	if (tfw_http_req_is_nonidempotent(req))
+		__tfw_http_req_set_nonidempotent(srv_conn, req);
+	if (tfw_http_conn_on_hold(srv_conn)) {
+		spin_unlock(&srv_conn->msg_qlock);
+		TFW_DBG2("%s: Server connection is on hold: conn=[%p]\n",
+			 __func__, srv_conn);
+		return;
+	}
+	if (!drained) {
+		TFW_DBG2("%s: Server connection is not drained: conn=[%p]\n",
+			 __func__, srv_conn);
+		tfw_http_req_fwd_stalled(srv_conn);
+		/* The queue is unlocked inside the function. */
+		return;
+	}
+	if (tfw_connection_send(srv_conn, (TfwMsg *)req)) {
+		list_del_init(&req->msg.fwd_list);
+		tfw_http_req_flip_if_nonidempotent(srv_conn, req);
+		spin_unlock(&srv_conn->msg_qlock);
+		TFW_DBG2("%s: Error sending to server connection: "
+			 "conn=[%p] req=[%p]\n", __func__, srv_conn, req);
+		tfw_http_send_500(req);
+		return;
+	}
+	srv_conn->msg_sent = (TfwMsg *)req;
+	spin_unlock(&srv_conn->msg_qlock);
+}
+
 static void
 __tfw_http_req_fwd_resend(TfwConnection *srv_conn,
 			  bool one_msg, struct list_head *err_queue)
@@ -713,6 +766,45 @@ tfw_http_conn_msg_free(TfwHttpMsg *hm)
 	tfw_http_msg_free(hm);
 }
 
+/*
+ * Re-schedule requests in a dead server connection's queue to a live
+ * server connection. Idempotent requests are always rescheduled.
+ * Non-idempotent requests are not re-scheduled by default, but it
+ * can be configured to re-schedule those requests as well.
+ *
+ * FIXME: It appears that a re-scheduled request should be put in a
+ * new server connection's queue according to its original timestamp,
+ * and NOT just added at the end of the queue. That will matter when
+ * eviction of old requests is implemented.
+ */
+static void
+tfw_http_req_fwd_resched(TfwConnection *srv_conn)
+{
+	TfwHttpReq *req, *tmp;
+	TfwConnection *new_conn;
+	struct list_head *fwd_queue = &srv_conn->msg_queue;
+
+	TFW_DBG2("%s: conn=[%p]\n", __func__, conn);
+
+	list_for_each_entry_safe(req, tmp, fwd_queue, msg.fwd_list) {
+		list_del_init(&req->msg.fwd_list);
+		/* FIXME: Need config option. */
+		if (tfw_http_req_is_nonidempotent(req)) {
+			__tfw_http_req_set_idempotent(srv_conn, req);
+			goto send_err;
+		}
+		if (!(new_conn = tfw_sched_get_srv_conn((TfwMsg *)req))) {
+			TFW_WARN("Unable to find a backend server\n");
+			goto send_err;
+		}
+		tfw_http_req_fwd(new_conn, req);
+		continue;
+send_err:
+		tfw_http_send_404(req);
+		TFW_INC_STAT_BH(clnt.msgs_otherr);
+	}
+}
+
 /*
  * Find requests in the server's connection queue that were forwarded
  * to the server. These are unanswered requests. According to RFC 7230
@@ -730,7 +822,13 @@ tfw_http_conn_repair(TfwConnection *srv_conn)
 	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
 	BUG_ON(!tfw_connection_restricted(srv_conn));
 
-	/* Resend the first unanswered request. */
+	/* See if requests need to be rescheduled. */
+	if (unlikely(!tfw_connection_live(srv_conn))) {
+		tfw_http_req_fwd_resched(srv_conn);
+		return;
+	}
+
+	/* Re-send the first unanswered request. */
 	spin_lock(&srv_conn->msg_qlock);
 	srv_conn->msg_resent = NULL;
 	if (srv_conn->msg_sent) {
@@ -1133,59 +1231,6 @@ tfw_http_adjust_resp(TfwHttpResp *resp, TfwHttpReq *req)
 				     TFW_HTTP_HDR_SERVER, 0);
 }
 
-/*
- * Forward the request @req to server connection @srv_conn.
- *
- * The request is added to the server connection (forwarding) queue.
- * If forwarding is on hold at this moment, then the request will be
- * forwarded later. Otherwise, if the queue is drained, then forward
- * the request to the server immediately. If the queue is not drained,
- * then forward all stalled requests to the server.
- *
- * Forwarding to a server is considered to be on hold after
- * a non-idempotent request is forwarded to the server. The hold
- * is removed when the holding non-idempotent request is followed
- * by another request from the same client, which enables pipelining.
- */
-static void
-tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
-{
-	bool drained;
-
-	TFW_DBG2("%s: srv_conn=[%p], req=[%p]\n", __func__, srv_conn, req);
-	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
-
-	spin_lock(&srv_conn->msg_qlock);
-	drained = tfw_http_conn_drained(srv_conn);
-	list_add_tail(&req->msg.fwd_list, &srv_conn->msg_queue);
-	if (tfw_http_req_is_nonidempotent(req))
-		__tfw_http_req_set_nonidempotent(srv_conn, req);
-	if (tfw_http_conn_on_hold(srv_conn)) {
-		spin_unlock(&srv_conn->msg_qlock);
-		TFW_DBG2("%s: Server connection is on hold: conn=[%p]\n",
-			 __func__, srv_conn);
-		return;
-	}
-	if (!drained) {
-		TFW_DBG2("%s: Server connection is not drained: conn=[%p]\n",
-			 __func__, srv_conn);
-		tfw_http_req_fwd_stalled(srv_conn);
-		/* The queue is unlocked inside the function. */
-		return;
-	}
-	if (tfw_connection_send(srv_conn, (TfwMsg *)req)) {
-		list_del_init(&req->msg.fwd_list);
-		tfw_http_req_flip_if_nonidempotent(srv_conn, req);
-		spin_unlock(&srv_conn->msg_qlock);
-		TFW_DBG2("%s: Error sending to server connection: "
-			 "conn=[%p] req=[%p]\n", __func__, srv_conn, req);
-		tfw_http_send_500(req);
-		return;
-	}
-	srv_conn->msg_sent = (TfwMsg *)req;
-	spin_unlock(&srv_conn->msg_qlock);
-}
-
 /*
  * Forward responses to the client in the correct order.
  */
diff --git a/tempesta_fw/sock_srv.c b/tempesta_fw/sock_srv.c
index 3543ee95f..688a222ac 100644
--- a/tempesta_fw/sock_srv.c
+++ b/tempesta_fw/sock_srv.c
@@ -206,6 +206,12 @@ tfw_sock_srv_connect_try_later(TfwSrvConnection *srv_conn)
 	 * starting from 100ms, which is a good RTT for a fast 10Gbps link.
 	 * The timeout is not increased after 1 second as it has moderate
 	 * overhead, and it's still good in response time.
+	 *
+	 * FIXME: The limit on the number of reconnect attempts is used
+	 * to re-schedule requests that would never be forwarded otherwise.
+	 * Still, attempts to reconnect may be continued in hopes that the
+	 * connection will be established sooner or later. Otherwise thei
+	 * connection will stay dead until restart.
 	 */
 	static const unsigned long timeouts[] = { 1, 10, 100, 250, 500, 1000 };
 
@@ -222,6 +228,7 @@ tfw_sock_srv_connect_try_later(TfwSrvConnection *srv_conn)
 		TFW_WARN("The limit of [%d] on reconnect attempts exceeded. "
 			 "The server connection [%s] is down permanently.\n",
 			 srv_conn->max_attempts, s_addr);
+		tfw_connection_repair(&srv_conn->conn);
 		return;
 	}
 	if (srv_conn->attempts < ARRAY_SIZE(timeouts)) {

From f245c8fc41d9fd8c32878a2a7d003fd027c548d0 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Mon, 24 Oct 2016 11:44:57 +0300
Subject: [PATCH 13/65] Minor bug fixes. (#419)

---
 tempesta_fw/connection.h | 4 +---
 tempesta_fw/http.c       | 1 -
 tempesta_fw/sock_clnt.c  | 2 ++
 tempesta_fw/sock_srv.c   | 5 +++++
 4 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/tempesta_fw/connection.h b/tempesta_fw/connection.h
index d0c81f7e0..8314a2435 100644
--- a/tempesta_fw/connection.h
+++ b/tempesta_fw/connection.h
@@ -337,9 +337,7 @@ tfw_connection_validate_cleanup(TfwConnection *conn)
 
 	BUG_ON(!conn);
 	BUG_ON(!list_empty(&conn->list));
-	BUG_ON((TFW_CONN_TYPE(conn) & Conn_Clnt)
-	       && !list_empty(&conn->msg_queue));
-	BUG_ON(atomic_read(&conn->refcnt) & ~1);
+	BUG_ON(atomic_read(&conn->refcnt) & ~1);	/* FIXME */
 	BUG_ON(conn->msg);
 
 	rc = atomic_read(&conn->refcnt);
diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 7f2fd4b46..eeb76fda3 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -864,7 +864,6 @@ tfw_http_conn_init(TfwConnection *conn)
 			conn->forward = tfw_http_req_fwd_repair;
 			set_bit(TFW_CONN_B_RESEND, &conn->flags);
 		}
-		INIT_LIST_HEAD(&conn->nip_queue);
 	}
 	tfw_gfsm_state_init(&conn->state, conn, TFW_HTTP_FSM_INIT);
 	return 0;
diff --git a/tempesta_fw/sock_clnt.c b/tempesta_fw/sock_clnt.c
index 01c514df4..7c184b3d7 100644
--- a/tempesta_fw/sock_clnt.c
+++ b/tempesta_fw/sock_clnt.c
@@ -87,6 +87,8 @@ tfw_cli_conn_free(TfwConnection *conn)
 
 	/* Check that all nested resources are freed. */
 	tfw_connection_validate_cleanup(conn);
+	BUG_ON(!list_empty(&conn->msg_queue));
+
 	kmem_cache_free(tfw_cli_cache(TFW_CONN_TYPE(conn)), conn);
 }
 
diff --git a/tempesta_fw/sock_srv.c b/tempesta_fw/sock_srv.c
index 688a222ac..65b431cd9 100644
--- a/tempesta_fw/sock_srv.c
+++ b/tempesta_fw/sock_srv.c
@@ -500,6 +500,8 @@ tfw_srv_conn_alloc(void)
 		return NULL;
 
 	tfw_connection_init(&srv_conn->conn);
+	atomic_set(&srv_conn->conn.msg_qsize, 0);
+	INIT_LIST_HEAD(&srv_conn->conn.nip_queue);
 	__setup_retry_timer(srv_conn);
 	ss_proto_init(&srv_conn->conn.proto,
 		      &tfw_sock_srv_ss_hooks, Conn_HttpSrv);
@@ -514,6 +516,9 @@ tfw_srv_conn_free(TfwSrvConnection *srv_conn)
 
 	/* Check that all nested resources are freed. */
 	tfw_connection_validate_cleanup(&srv_conn->conn);
+	BUG_ON(atomic_read(&srv_conn->conn.msg_qsize));
+	BUG_ON(!list_empty(&srv_conn->conn.nip_queue));
+
 	kmem_cache_free(tfw_srv_conn_cache, srv_conn);
 }
 

From 52c35937626523e9dadb57ba023f7a122aec86d1 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Thu, 27 Oct 2016 16:21:31 +0300
Subject: [PATCH 14/65] Multiple changes, fixes, tune-ups, and options. (#419)

---
 tempesta_fw/connection.h           |  11 +-
 tempesta_fw/http.c                 | 189 +++++++++++--------
 tempesta_fw/http.h                 |  10 +-
 tempesta_fw/sched/tfw_sched_hash.c |   1 +
 tempesta_fw/sched/tfw_sched_rr.c   |   3 +-
 tempesta_fw/server.h               |  14 +-
 tempesta_fw/sock_srv.c             | 291 ++++++++++++++++++++---------
 7 files changed, 339 insertions(+), 180 deletions(-)

diff --git a/tempesta_fw/connection.h b/tempesta_fw/connection.h
index 8314a2435..b7d95d69c 100644
--- a/tempesta_fw/connection.h
+++ b/tempesta_fw/connection.h
@@ -77,14 +77,15 @@ enum {
  * @state	- connection processing state;
  * @list	- member in the list of connections with @peer;
  * @msg_queue	- queue of messages to be sent over the connection;
- * @nip_queue	- queue of non-idempotent messages within @msg_queue;
+ * @nip_queue	- queue of non-idempotent messages in server's @msg_queue;
  * @msg_qlock	- lock for accessing @msg_queue;
- * @flags	- various atomic flags related to connection's state;
+ * @flags	- atomic flags related to server connection's state;
  * @refcnt	- number of users of the connection structure instance;
+ * @qsize	- current number of requests in server's @msg_queue;
  * @timer	- The keep-alive/retry timer for the connection;
  * @msg		- message that is currently being processed;
- * @msg_sent	- message that was sent last in the connection;
- * @msg_resent	- message that was re-sent last in the connection;
+ * @msg_sent	- message that was sent last in a server connection;
+ * @msg_resent	- message that was re-sent last in a server connection;
  * @peer	- TfwClient or TfwServer handler;
  * @sk		- an appropriate sock handler;
  * @destructor	- called when a connection is destroyed;
@@ -99,6 +100,7 @@ typedef struct tfw_connection_t {
 	spinlock_t		msg_qlock;
 	unsigned long		flags;					/*srv*/
 	atomic_t		refcnt;
+	atomic_t		qsize;					/*srv*/
 	struct timer_list	timer;
 	TfwMsg			*msg;
 	TfwMsg			*msg_sent;				/*srv*/
@@ -106,7 +108,6 @@ typedef struct tfw_connection_t {
 	TfwPeer 		*peer;
 	struct sock		*sk;
 	void			(*destructor)(void *);
-	void			(*forward)(struct tfw_connection_t *);	/*srv*/
 } TfwConnection;
 
 #define TFW_CONN_DEATHCNT	(INT_MIN / 2)
diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index eeb76fda3..97c1b849e 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -458,45 +458,54 @@ tfw_http_conn_drained(TfwConnection *srv_conn)
 
 	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
 
-	if (list_empty(fwd_queue)) {
-		TFW_DBG2("%s: Empty: srv_conn=[%p]\n", __func__, srv_conn);
+	if (list_empty(fwd_queue))
 		return true;
-	}
-	if (!srv_conn->msg_sent) {
-		TFW_DBG2("%s: None sent: srv_conn=[%p]\n", __func__, srv_conn);
+	if (!srv_conn->msg_sent)
 		return false;
-	}
-	if (srv_conn->msg_sent == list_last_entry(fwd_queue, TfwMsg, seq_list))
+	if (srv_conn->msg_sent == list_last_entry(fwd_queue, TfwMsg, fwd_list))
 		return true;
-
-	TFW_DBG2("%s: Some not sent: srv_conn=[%p]\n", __func__, srv_conn);
 	return false;
 }
 
 static inline bool
-tfw_http_conn_req_need_fwd(TfwConnection *srv_conn)
+tfw_http_conn_need_fwd(TfwConnection *srv_conn)
 {
 	return (!tfw_http_conn_on_hold(srv_conn)
 		&& !tfw_http_conn_drained(srv_conn));
 }
 
+static inline void
+tfw_http_req_move2equeue(TfwConnection *srv_conn, TfwHttpReq *req,
+			 struct list_head *equeue, unsigned short status)
+{
+	tfw_http_req_flip_if_nonidempotent(srv_conn, req);
+	list_move_tail(&req->msg.fwd_list, equeue);
+	req->rstatus = status;
+	atomic_dec(&srv_conn->qsize);
+}
+
 /*
  * Delete requests that were not forwarded due to an error. Send an
  * error response to a client. The response will be attached to the
  * request and sent to the client in proper seq order.
  */
 static void
-tfw_http_req_zap_error(struct list_head *err_queue)
+tfw_http_req_zap_error(struct list_head *equeue)
 {
 	TfwHttpReq *req, *tmp;
 
 	TFW_DBG2("%s: queue is %sempty\n",
 		 __func__, list_empty(err_queue) ? "" : "NOT ");
 
-        list_for_each_entry_safe(req, tmp, err_queue, msg.fwd_list) {
-                list_del_init(&req->msg.fwd_list);
-                tfw_http_send_500(req);
-        }
+	list_for_each_entry_safe(req, tmp, equeue, msg.fwd_list) {
+		list_del_init(&req->msg.fwd_list);
+		if (req->rstatus == 500)
+			tfw_http_send_500(req);
+		else if (req->rstatus == 504)
+			tfw_http_send_504(req);
+		else
+			BUG();
+	}
 }
 
 /*
@@ -505,9 +514,10 @@ tfw_http_req_zap_error(struct list_head *err_queue)
  * Must be called with a lock on the server connection's @msg_queue.
  */
 static void
-__tfw_http_req_fwd_stalled(TfwConnection *srv_conn, struct list_head *err_queue)
+__tfw_http_req_fwd_stalled(TfwConnection *srv_conn, struct list_head *equeue)
 {
 	TfwHttpReq *req, *tmp;
+	TfwServer *srv = (TfwServer *)srv_conn->peer;
 	struct list_head *fwd_queue = &srv_conn->msg_queue;
 
 	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
@@ -523,6 +533,14 @@ __tfw_http_req_fwd_stalled(TfwConnection *srv_conn, struct list_head *err_queue)
 	    ? (TfwHttpReq *)list_next_entry(srv_conn->msg_sent, fwd_list)
 	    : (TfwHttpReq *)list_first_entry(fwd_queue, TfwMsg, fwd_list);
 	list_for_each_entry_safe_from(req, tmp, fwd_queue, msg.fwd_list) {
+		unsigned long jtimeout = jiffies - req->jtstamp;
+		if (time_after(jtimeout, srv->qjtimeout)) {
+			TFW_DBG2("%s: Eviction: req=[%p] overdue=[%dms]\n",
+				 __func__, req,
+				jiffies_to_msecs(jtimeout - srv->qjtimeout));
+			tfw_http_req_move2equeue(srv_conn, req, equeue, 504);
+			continue;
+		}
 		/*
 		 * If unable to send to the server connection due to
 		 * an error, then move the request to @err_queue for
@@ -530,17 +548,18 @@ __tfw_http_req_fwd_stalled(TfwConnection *srv_conn, struct list_head *err_queue)
 		 * as the response will be sent in proper seq order.
 		 */
 		if (tfw_connection_send(srv_conn, (TfwMsg *)req)) {
-			tfw_http_req_flip_if_nonidempotent(srv_conn, req);
-			list_move_tail(&req->msg.fwd_list, err_queue);
-			TFW_DBG2("%s: Error sending to server connection: "
-				 "conn=[%p] req=[%p]\n",
+			TFW_DBG2("%s: Forwarding error: conn=[%p] req=[%p]\n",
 				 __func__, srv_conn, req);
+			tfw_http_req_move2equeue(srv_conn, req, equeue, 500);
 			continue;
 		}
 		srv_conn->msg_sent = (TfwMsg *)req;
 		/* Stop sending if the request is non-idempotent. */
-		if (tfw_http_req_is_nonidempotent(req))
+		if (tfw_http_req_is_nonidempotent(req)) {
+			TFW_DBG2("%s: Break on non-idempotent: req=[%p]\n",
+				 __func__, req);
 			break;
+		}
 		/* See if the request has become idempotent. */
 		tfw_http_req_flip_if_nonidempotent(srv_conn, req);
 	}
@@ -556,17 +575,17 @@ __tfw_http_req_fwd_stalled(TfwConnection *srv_conn, struct list_head *err_queue)
 static void
 tfw_http_req_fwd_stalled(TfwConnection *srv_conn)
 {
-	LIST_HEAD(err_queue);
+	LIST_HEAD(equeue);
 
 	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
 	WARN_ON(!spin_is_locked(&srv_conn->msg_qlock));
 	BUG_ON(list_empty(&srv_conn->msg_queue));
 
-	__tfw_http_req_fwd_stalled(srv_conn, &err_queue);
+	__tfw_http_req_fwd_stalled(srv_conn, &equeue);
 	spin_unlock(&srv_conn->msg_qlock);
 
-	if (!list_empty(&err_queue))
-		tfw_http_req_zap_error(&err_queue);
+	if (!list_empty(&equeue))
+		tfw_http_req_zap_error(&equeue);
 }
 
 /*
@@ -594,6 +613,7 @@ tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
 	spin_lock(&srv_conn->msg_qlock);
 	drained = tfw_http_conn_drained(srv_conn);
 	list_add_tail(&req->msg.fwd_list, &srv_conn->msg_queue);
+	atomic_inc(&srv_conn->qsize);
 	if (tfw_http_req_is_nonidempotent(req))
 		__tfw_http_req_set_nonidempotent(srv_conn, req);
 	if (tfw_http_conn_on_hold(srv_conn)) {
@@ -612,9 +632,10 @@ tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
 	if (tfw_connection_send(srv_conn, (TfwMsg *)req)) {
 		list_del_init(&req->msg.fwd_list);
 		tfw_http_req_flip_if_nonidempotent(srv_conn, req);
+		atomic_dec(&srv_conn->qsize);
 		spin_unlock(&srv_conn->msg_qlock);
-		TFW_DBG2("%s: Error sending to server connection: "
-			 "conn=[%p] req=[%p]\n", __func__, srv_conn, req);
+		TFW_DBG2("%s: Forwarding error: conn=[%p] req=[%p]\n",
+			 __func__, srv_conn, req);
 		tfw_http_send_500(req);
 		return;
 	}
@@ -624,9 +645,10 @@ tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
 
 static void
 __tfw_http_req_fwd_resend(TfwConnection *srv_conn,
-			  bool one_msg, struct list_head *err_queue)
+			  bool one_msg, struct list_head *equeue)
 {
 	TfwHttpReq *req, *tmp;
+	TfwServer *srv = (TfwServer *)srv_conn->peer;
 	struct list_head *end, *fwd_queue = &srv_conn->msg_queue;
 
 	TFW_DBG2("%s: conn=[%p] one_msg=[%s]\n",
@@ -642,12 +664,16 @@ __tfw_http_req_fwd_resend(TfwConnection *srv_conn,
 	     &req->msg.fwd_list != end;
 	     req = tmp, tmp = list_next_entry(tmp, msg.fwd_list))
 	{
+		if (req->retries++ >= srv->retry_max) {
+			TFW_DBG2("%s: Eviction: req=[%p] retries=[%d]\n",
+				 __func__, req, req->retries);
+			tfw_http_req_move2equeue(srv_conn, req, equeue, 504);
+			continue;
+		}
 		if (tfw_connection_send(srv_conn, (TfwMsg *)req)) {
-			tfw_http_req_flip_if_nonidempotent(srv_conn, req);
-			list_move_tail(&req->msg.fwd_list, err_queue);
-			TFW_DBG2("%s: Error sending to server connection: "
-				 "conn=[%p] req=[%p]\n",
+			TFW_DBG2("%s: Forwarding error: conn=[%p] req=[%p]\n",
 				 __func__, srv_conn, req);
+			tfw_http_req_move2equeue(srv_conn, req, equeue, 500);
 			continue;
 		}
 		srv_conn->msg_resent = (TfwMsg *)req;
@@ -656,44 +682,36 @@ __tfw_http_req_fwd_resend(TfwConnection *srv_conn,
 	}
 }
 
-static void
-__tfw_http_req_fwd_qforwd(TfwConnection *srv_conn, struct list_head *err_queue)
-{
-	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
-
-	__tfw_http_req_fwd_stalled(srv_conn, err_queue);
-	if (list_empty(&srv_conn->msg_queue)) {
-		srv_conn->forward = tfw_http_req_fwd_stalled;
-		clear_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
-		clear_bit(TFW_CONN_B_RESEND, &srv_conn->flags);
-	}
-}
-
 static void
 tfw_http_req_fwd_repair(TfwConnection *srv_conn)
 {
-	LIST_HEAD(err_queue);
+	LIST_HEAD(equeue);
 
 	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
 	WARN_ON(!spin_is_locked(&srv_conn->msg_qlock));
-	BUG_ON(list_empty(&srv_conn->msg_queue));
+	BUG_ON(!(srv_conn->flags & (TFW_CONN_B_QFORWD | TFW_CONN_B_RESEND)));
 
-	if (test_bit(TFW_CONN_B_QFORWD, &srv_conn->flags)) {
-		__tfw_http_req_fwd_qforwd(srv_conn, &err_queue);
+	if (list_empty(&srv_conn->msg_queue)) {
+		clear_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
+		clear_bit(TFW_CONN_B_RESEND, &srv_conn->flags);
+	} else if (test_bit(TFW_CONN_B_QFORWD, &srv_conn->flags)) {
+		if (tfw_http_conn_need_fwd(srv_conn))
+			__tfw_http_req_fwd_stalled(srv_conn, &equeue);
 	} else {
 		srv_conn->msg_resent = NULL;
 		if (srv_conn->msg_sent) {
-			__tfw_http_req_fwd_resend(srv_conn, false, &err_queue);
+			__tfw_http_req_fwd_resend(srv_conn, false, &equeue);
 			if (srv_conn->msg_resent != srv_conn->msg_sent)
 				srv_conn->msg_sent = srv_conn->msg_resent;
 		}
 		set_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
-		__tfw_http_req_fwd_qforwd(srv_conn, &err_queue);
+		if (tfw_http_conn_need_fwd(srv_conn))
+			__tfw_http_req_fwd_stalled(srv_conn, &equeue);
 	}
 	spin_unlock(&srv_conn->msg_qlock);
 
-	if (!list_empty(&err_queue))
-		tfw_http_req_zap_error(&err_queue);
+	if (!list_empty(&equeue))
+		tfw_http_req_zap_error(&equeue);
 }
 
 /*
@@ -773,9 +791,9 @@ tfw_http_conn_msg_free(TfwHttpMsg *hm)
  * can be configured to re-schedule those requests as well.
  *
  * FIXME: It appears that a re-scheduled request should be put in a
- * new server connection's queue according to its original timestamp,
- * and NOT just added at the end of the queue. That will matter when
- * eviction of old requests is implemented.
+ * new server connection's queue according to its original timestamp.
+ * It may matter as old requests are evicted. However, that is time
+ * consuming. For now just put them at the end of the queue.
  */
 static void
 tfw_http_req_fwd_resched(TfwConnection *srv_conn)
@@ -788,6 +806,7 @@ tfw_http_req_fwd_resched(TfwConnection *srv_conn)
 
 	list_for_each_entry_safe(req, tmp, fwd_queue, msg.fwd_list) {
 		list_del_init(&req->msg.fwd_list);
+		atomic_dec(&srv_conn->qsize);
 		/* FIXME: Need config option. */
 		if (tfw_http_req_is_nonidempotent(req)) {
 			__tfw_http_req_set_idempotent(srv_conn, req);
@@ -803,6 +822,7 @@ tfw_http_req_fwd_resched(TfwConnection *srv_conn)
 		tfw_http_send_404(req);
 		TFW_INC_STAT_BH(clnt.msgs_otherr);
 	}
+	BUG_ON(atomic_read(&srv_conn->qsize));
 }
 
 /*
@@ -816,7 +836,7 @@ tfw_http_req_fwd_resched(TfwConnection *srv_conn)
 static void
 tfw_http_conn_repair(TfwConnection *srv_conn)
 {
-	LIST_HEAD(err_queue);
+	LIST_HEAD(equeue);
 
 	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
 	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
@@ -827,23 +847,23 @@ tfw_http_conn_repair(TfwConnection *srv_conn)
 		tfw_http_req_fwd_resched(srv_conn);
 		return;
 	}
-
 	/* Re-send the first unanswered request. */
 	spin_lock(&srv_conn->msg_qlock);
 	srv_conn->msg_resent = NULL;
 	if (srv_conn->msg_sent) {
-		__tfw_http_req_fwd_resend(srv_conn, true, &err_queue);
+		__tfw_http_req_fwd_resend(srv_conn, true, &equeue);
 		if (!srv_conn->msg_resent)
 			srv_conn->msg_sent = NULL;
 	}
 	if (!srv_conn->msg_resent) {
 		set_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
-		__tfw_http_req_fwd_qforwd(srv_conn, &err_queue);
+		if (tfw_http_conn_need_fwd(srv_conn))
+			__tfw_http_req_fwd_stalled(srv_conn, &equeue);
 	}
 	spin_unlock(&srv_conn->msg_qlock);
 
-	if (!list_empty(&err_queue))
-		tfw_http_req_zap_error(&err_queue);
+	if (!list_empty(&equeue))
+		tfw_http_req_zap_error(&equeue);
 }
 
 /*
@@ -858,12 +878,8 @@ tfw_http_conn_init(TfwConnection *conn)
 	TFW_DBG2("%s: conn=[%p]\n", __func__, conn);
 
 	if (TFW_CONN_TYPE(conn) & Conn_Srv) {
-		if (list_empty(&conn->msg_queue)) {
-			conn->forward = tfw_http_req_fwd_stalled;
-		} else {
-			conn->forward = tfw_http_req_fwd_repair;
+		if (!list_empty(&conn->msg_queue))
 			set_bit(TFW_CONN_B_RESEND, &conn->flags);
-		}
 	}
 	tfw_gfsm_state_init(&conn->state, conn, TFW_HTTP_FSM_INIT);
 	return 0;
@@ -1256,7 +1272,7 @@ tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
 	spin_lock(&cli_conn->msg_qlock);
 	if (list_empty(seq_queue)) {
 		spin_unlock(&cli_conn->msg_qlock);
-		TFW_DBG2("%s: Missing client requests: conn=[%p]\n",
+		TFW_DBG2("%s: The client's request missing: conn=[%p]\n",
 			 __func__, cli_conn);
 		ss_close_sync(cli_conn->sk, true);
 		tfw_http_conn_msg_free((TfwHttpMsg *)resp);
@@ -1292,8 +1308,7 @@ tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
 		 * Otherwise, the correct order of responses may be broken.
 		 */
 		if (tfw_cli_conn_send(cli_conn, (TfwMsg *)resp)) {
-			TFW_DBG2("%s: Error sending to client connection: "
-				 "conn=[%p] resp=[%p]\n",
+			TFW_DBG2("%s: Forwarding error: conn=[%p] resp=[%p]\n",
 				 __func__, cli_conn, resp);
 			ss_close_sync(cli_conn->sk, true);
 		}
@@ -1373,8 +1388,7 @@ tfw_http_req_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)
 	 * initialized to point at the appropriate TfwConnection, so that
 	 * all subsequent session hits are scheduled much faster.
 	 */
-	srv_conn = tfw_sched_get_srv_conn((TfwMsg *)req);
-	if (srv_conn == NULL) {
+	if (!(srv_conn = tfw_sched_get_srv_conn((TfwMsg *)req))) {
 		TFW_WARN("Unable to find a backend server\n");
 		goto send_502;
 	}
@@ -1683,11 +1697,21 @@ tfw_http_resp_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)
 		TFW_INC_STAT_BH(serv.msgs_otherr);
 		return;
 	}
-	tfw_http_resp_fwd(req, resp);
-	/* Responses from cache don't have @resp->conn. */
-	if (resp->conn)
+	/*
+	 * Responses from cache don't have @resp->conn.
+	 *
+	 * FIXME: The same check is performed in tfw_http_popreq()
+	 * which happens just a bit earlier. Is there a way to avoid
+	 * it here? The condition is considered rare, and there's no
+	 * need to check for it in the regular path. The real issue
+	 * here is that APM stats can't handle response times that
+	 * are >= USHORT_MAX. So for now don't count the requests
+	 * that are re-sent after a server connection is restored.
+	 */
+	if (resp->conn && !tfw_connection_restricted(resp->conn))
 		tfw_apm_update(((TfwServer *)resp->conn->peer)->apm,
 			       resp->jtstamp, resp->jtstamp - req->jtstamp);
+	tfw_http_resp_fwd(req, resp);
 	TFW_INC_STAT_BH(serv.msgs_forwarded);
 	return;
 }
@@ -1709,6 +1733,7 @@ tfw_http_popreq(TfwHttpMsg *hmresp)
 
 	spin_lock(&srv_conn->msg_qlock);
 	if (unlikely(list_empty(fwd_queue))) {
+		BUG_ON(atomic_read(&srv_conn->qsize));
 		spin_unlock(&srv_conn->msg_qlock);
 		/* @conn->msg will get NULLed in the process. */
 		TFW_WARN("Paired request missing\n");
@@ -1719,17 +1744,21 @@ tfw_http_popreq(TfwHttpMsg *hmresp)
 	}
 	req = list_first_entry(fwd_queue, TfwHttpReq, msg.fwd_list);
 	list_del_init(&req->msg.fwd_list);
+	atomic_dec(&srv_conn->qsize);
 	if ((TfwMsg *)req == srv_conn->msg_sent)
 		srv_conn->msg_sent = NULL;
 	tfw_http_req_flip_if_nonidempotent(srv_conn, req);
 	tfw_http_conn_flip_if_nonidempotent(srv_conn);
 	/*
-	 * If the server connection is no longer on hold, and the queue
-	 * is not drained, then forward pending requests to the server.
-	 * Note: The queue is unlocked inside srv_conn->forward().
+	 * Perform special processing if the connection is in repair
+	 * mode. Otherwise, forward pending requests to the server.
+	 * Note: The queue is unlocked inside tfw_http_req_fwd_repair()
+	 * or tfw_http_req_fwd_stalled().
 	 */
-	if (tfw_http_conn_req_need_fwd(srv_conn))
-		srv_conn->forward(srv_conn);
+	if (tfw_connection_restricted(srv_conn))
+		tfw_http_req_fwd_repair(srv_conn);
+	else if (tfw_http_conn_need_fwd(srv_conn))
+		tfw_http_req_fwd_stalled(srv_conn);
 	else
 		spin_unlock(&srv_conn->msg_qlock);
 
diff --git a/tempesta_fw/http.h b/tempesta_fw/http.h
index 74d9f37e1..40c7a825e 100644
--- a/tempesta_fw/http.h
+++ b/tempesta_fw/http.h
@@ -345,9 +345,13 @@ typedef struct {
  * @method	- HTTP request method, one of GET/PORT/HEAD/etc;
  * @node	- NUMA node where request is serviced;
  * @frang_st	- current state of FRANG classifier;
+ * @chunk_cnt	- header or body chunk count for Frang classifier;
  * @tm_header	- time HTTP header started coming;
  * @tm_bchunk	- time previous chunk of HTTP body had come at;
  * @hash	- hash value for caching calculated for the request;
+ * @resp	- the response paired with this request;
+ * @rstatus	- response HTTP status until the response is prepared;
+ * @retries	- the number of re-send attempts;
  *
  * TfwStr members must be the first for efficient scanning.
  */
@@ -367,7 +371,11 @@ typedef struct {
 	unsigned long		tm_header;
 	unsigned long		tm_bchunk;
 	unsigned long		hash;
-	TfwHttpMsg		*resp;
+	union {
+		TfwHttpMsg	*resp;
+		unsigned short	rstatus;
+		unsigned short	retries;
+	};
 } TfwHttpReq;
 
 #define TFW_HTTP_REQ_STR_START(r)	__MSG_STR_START(r)
diff --git a/tempesta_fw/sched/tfw_sched_hash.c b/tempesta_fw/sched/tfw_sched_hash.c
index 913c7907b..64b6abdd7 100644
--- a/tempesta_fw/sched/tfw_sched_hash.c
+++ b/tempesta_fw/sched/tfw_sched_hash.c
@@ -146,6 +146,7 @@ tfw_sched_hash_get_srv_conn(TfwMsg *msg, TfwSrvGroup *sg)
 	for (tries = 0; tries < __HLIST_SZ(TFW_SG_MAX_CONN); ++tries) {
 		for (ch = sg->sched_data; ch->conn; ++ch) {
 			if (unlikely(tfw_connection_restricted(ch->conn))
+			    || unlikely(tfw_server_queue_full(ch->conn))
 			    || unlikely(!tfw_connection_live(ch->conn)))
 				continue;
 			curr_weight = msg_hash ^ ch->hash;
diff --git a/tempesta_fw/sched/tfw_sched_rr.c b/tempesta_fw/sched/tfw_sched_rr.c
index dd675cd04..4413e6041 100644
--- a/tempesta_fw/sched/tfw_sched_rr.c
+++ b/tempesta_fw/sched/tfw_sched_rr.c
@@ -128,7 +128,8 @@ tfw_sched_rr_get_srv_conn(TfwMsg *msg, TfwSrvGroup *sg)
 		for (c = 0; c < srv_cl->conn_n; ++c) {
 			idx = atomic64_inc_return(&srv_cl->rr_counter);
 			conn = srv_cl->conns[idx % srv_cl->conn_n];
-			if (unlikely(tfw_connection_restricted(conn)))
+			if (unlikely(tfw_connection_restricted(conn))
+			    || unlikely(tfw_server_queue_full(conn)))
 				continue;
 			if (skipnip && tfw_connection_hasnip(conn)) {
 				if (likely(tfw_connection_live(conn)))
diff --git a/tempesta_fw/server.h b/tempesta_fw/server.h
index f4eafe9ec..e4fd72bdf 100644
--- a/tempesta_fw/server.h
+++ b/tempesta_fw/server.h
@@ -43,14 +43,19 @@ typedef struct tfw_scheduler_t TfwScheduler;
  * @list	- member pointer in the list of servers of a server group;
  * @sg		- back-reference to the server group;
  * @apm		- opaque handle for APM stats;
+ * @qsize_max	- maximum queue size of a server connection;
+ * @qjtimeout	- maximum age of a request in a server connection, in jiffies;
+ * @retry_max	- maximum number of tries for forwarding a request;
  */
 typedef struct {
 	TFW_PEER_COMMON;
 	struct list_head	list;
 	TfwSrvGroup		*sg;
 	void			*apm;
-	unsigned int		flags;
 	int			stress;
+	unsigned int		qsize_max;
+	unsigned long		qjtimeout;
+	unsigned int		retry_max;
 } TfwServer;
 
 /**
@@ -118,6 +123,13 @@ void tfw_server_destroy(TfwServer *srv);
 
 void tfw_srv_conn_release(TfwConnection *conn);
 
+static inline bool
+tfw_server_queue_full(TfwConnection *srv_conn)
+{
+	TfwServer *srv = (TfwServer *)srv_conn->peer;
+	return atomic_read(&srv_conn->qsize) >= srv->qsize_max;
+}
+
 /* Server group routines. */
 TfwSrvGroup *tfw_sg_lookup(const char *name);
 TfwSrvGroup *tfw_sg_new(const char *name, gfp_t flags);
diff --git a/tempesta_fw/sock_srv.c b/tempesta_fw/sock_srv.c
index 65b431cd9..8cc938486 100644
--- a/tempesta_fw/sock_srv.c
+++ b/tempesta_fw/sock_srv.c
@@ -58,7 +58,7 @@
 /*
  * Default number of reconnect attempts. Zero means unlimited number.
  */
-#define TFW_SOCK_SRV_RETRY_ATTEMPTS_DEF	0		/* default value */
+#define TFW_SRV_RETRY_ATTEMPTS_DEF	0		/* default value */
 
 /**
  * TfwConnection extension for server sockets.
@@ -495,13 +495,12 @@ tfw_srv_conn_alloc(void)
 {
 	TfwSrvConnection *srv_conn;
 
-	srv_conn = kmem_cache_alloc(tfw_srv_conn_cache, GFP_ATOMIC);
-	if (!srv_conn)
+	if (!(srv_conn = kmem_cache_alloc(tfw_srv_conn_cache, GFP_ATOMIC)))
 		return NULL;
 
 	tfw_connection_init(&srv_conn->conn);
-	atomic_set(&srv_conn->conn.msg_qsize, 0);
 	INIT_LIST_HEAD(&srv_conn->conn.nip_queue);
+	atomic_set(&srv_conn->conn.qsize, 0);
 	__setup_retry_timer(srv_conn);
 	ss_proto_init(&srv_conn->conn.proto,
 		      &tfw_sock_srv_ss_hooks, Conn_HttpSrv);
@@ -516,8 +515,8 @@ tfw_srv_conn_free(TfwSrvConnection *srv_conn)
 
 	/* Check that all nested resources are freed. */
 	tfw_connection_validate_cleanup(&srv_conn->conn);
-	BUG_ON(atomic_read(&srv_conn->conn.msg_qsize));
 	BUG_ON(!list_empty(&srv_conn->conn.nip_queue));
+	BUG_ON(atomic_read(&srv_conn->conn.qsize));
 
 	kmem_cache_free(tfw_srv_conn_cache, srv_conn);
 }
@@ -562,59 +561,112 @@ tfw_sock_srv_delete_all_conns(void)
  * ------------------------------------------------------------------------
  */
 
-#define TFW_SRV_CFG_DEF_CONNS_N		"32"
+/* Default number of connections per server. */
+#define TFW_SRV_CONNS_N_DEF		"32"
 
-static int tfw_srv_cfg_in_attempts = TFW_SOCK_SRV_RETRY_ATTEMPTS_DEF;
-static int tfw_srv_cfg_out_attempts = TFW_SOCK_SRV_RETRY_ATTEMPTS_DEF;
-
-static int
-tfw_srv_cfg_set_conn_retries(TfwServer *srv, int attempts)
-{
-	TfwSrvConnection *srv_conn, *tmp;
+/*
+ * Server connection's maximum queue size, and default timeout for
+ * requests in the queue.
+ */
+#define TFW_SRV_QUEUE_SIZE_DEF		1000	/* Max queue size */
+#define TFW_SRV_QUEUE_TIMEOUT_DEF	60	/* Default request timeout */
+#define TFW_SRV_QUEUE_TRIES_DEF		5	/* Default number of tries */
 
-	list_for_each_entry_safe(srv_conn, tmp, &srv->conn_list, conn.list)
-		srv_conn->max_attempts = attempts;
+static int tfw_cfg_in_queue_size = TFW_SRV_QUEUE_SIZE_DEF;
+static int tfw_cfg_in_queue_timeout = TFW_SRV_QUEUE_TIMEOUT_DEF;
+static int tfw_cfg_in_queue_tries = TFW_SRV_QUEUE_TRIES_DEF;
+static int tfw_cfg_out_queue_size = TFW_SRV_QUEUE_SIZE_DEF;
+static int tfw_cfg_out_queue_timeout = TFW_SRV_QUEUE_TIMEOUT_DEF;
+static int tfw_cfg_out_queue_tries = TFW_SRV_QUEUE_TRIES_DEF;
 
-	return 0;
-}
+static int tfw_cfg_in_retry_attempts = TFW_SRV_RETRY_ATTEMPTS_DEF;
+static int tfw_cfg_out_retry_attempts = TFW_SRV_RETRY_ATTEMPTS_DEF;
 
 static int
-tfw_srv_cfg_handle_conn_retries(TfwCfgSpec *cs, TfwCfgEntry *ce, int *attempts)
+tfw_handle_opt_val(TfwCfgSpec *cs, TfwCfgEntry *ce, int *optval)
 {
 	int ret;
 
+	if (ce->attr_n) {
+		TFW_ERR("%s: Arguments may not have the \'=\' sign\n",
+			cs->name);
+		return -EINVAL;
+	}
 	if (ce->val_n != 1) {
-		TFW_ERR("%s: Invalid number of arguments: %zd\n",
-			cs->name, ce->val_n);
+		TFW_ERR("%s: Invalid number of arguments: %d\n",
+			cs->name, (int)ce->val_n);
 		return -EINVAL;
 	}
-
-	if ((ret = tfw_cfg_parse_int(ce->vals[0], attempts)))
+	if ((ret = tfw_cfg_parse_int(ce->vals[0], optval)))
 		return ret;
 
 	return 0;
 }
 
 static int
-tfw_srv_cfg_handle_in_conn_retries(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_handle_in_queue_size(TfwCfgSpec *cs, TfwCfgEntry *ce)
+{
+	return tfw_handle_opt_val(cs, ce, &tfw_cfg_in_queue_size);
+}
+
+static int
+tfw_handle_out_queue_size(TfwCfgSpec *cs, TfwCfgEntry *ce)
+{
+	return tfw_handle_opt_val(cs, ce, &tfw_cfg_out_queue_size);
+}
+
+static int
+tfw_handle_in_queue_timeout(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
-	return tfw_srv_cfg_handle_conn_retries(cs, ce,
-					       &tfw_srv_cfg_in_attempts);
+	return tfw_handle_opt_val(cs, ce, &tfw_cfg_in_queue_timeout);
 }
 
 static int
-tfw_srv_cfg_handle_out_conn_retries(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_handle_out_queue_timeout(TfwCfgSpec *cs, TfwCfgEntry *ce)
+{
+	return tfw_handle_opt_val(cs, ce, &tfw_cfg_out_queue_timeout);
+}
+
+static int
+tfw_handle_in_queue_tries(TfwCfgSpec *cs, TfwCfgEntry *ce)
+{
+	return tfw_handle_opt_val(cs, ce, &tfw_cfg_in_queue_tries);
+}
+
+static int
+tfw_handle_out_queue_tries(TfwCfgSpec *cs, TfwCfgEntry *ce)
+{
+	return tfw_handle_opt_val(cs, ce, &tfw_cfg_out_queue_tries);
+}
+
+static int
+tfw_handle_in_conn_tries(TfwCfgSpec *cs, TfwCfgEntry *ce)
+{
+	return tfw_handle_opt_val(cs, ce, &tfw_cfg_in_retry_attempts);
+}
+
+static int
+tfw_handle_out_conn_tries(TfwCfgSpec *cs, TfwCfgEntry *ce)
+{
+	return tfw_handle_opt_val(cs, ce, &tfw_cfg_out_retry_attempts);
+}
+
+tfw_cfg_set_conn_tries(TfwServer *srv, int attempts)
 {
-	return tfw_srv_cfg_handle_conn_retries(cs, ce,
-					       &tfw_srv_cfg_out_attempts);
+	TfwSrvConnection *srv_conn;
+
+	list_for_each_entry(srv_conn, &srv->conn_list, conn.list) {
+		srv_conn->max_attempts = attempts;
+
+	return 0;
 }
 
 /**
  * A "srv_group" which is currently being parsed.
  * All "server" entries are added to this group.
  */
-static TfwSrvGroup *tfw_srv_cfg_curr_group;
-static TfwScheduler *tfw_srv_cfg_dflt_sched;
+static TfwSrvGroup *tfw_cfg_curr_group;
+static TfwScheduler *tfw_cfg_dflt_sched;
 
 /**
  * Handle "server" within an "srv_group", e.g.:
@@ -627,20 +679,20 @@ static TfwScheduler *tfw_srv_cfg_dflt_sched;
  * Every server is simply added to the tfw_srv_cfg_curr_group.
  */
 static TfwServer *
-tfw_srv_cfg_handle_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_handle_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
 	TfwAddr addr;
 	TfwServer *srv;
 	int r, conns_n;
 	const char *in_addr, *in_conns_n;
 
-	BUG_ON(!tfw_srv_cfg_curr_group);
+	BUG_ON(!tfw_cfg_curr_group);
 
 	if ((r = tfw_cfg_check_val_n(ce, 1)))
 		return NULL;
 
 	in_addr = ce->vals[0];
-	in_conns_n = tfw_cfg_get_attr(ce, "conns_n", TFW_SRV_CFG_DEF_CONNS_N);
+	in_conns_n = tfw_cfg_get_attr(ce, "conns_n", TFW_SRV_CONNS_N_DEF);
 
 	if ((r = tfw_addr_pton(&TFW_STR_FROM(in_addr), &addr)))
 		return NULL;
@@ -656,7 +708,7 @@ tfw_srv_cfg_handle_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
 		TFW_ERR("can't create a server socket\n");
 		return NULL;
 	}
-	tfw_sg_add(tfw_srv_cfg_curr_group, srv);
+	tfw_sg_add(tfw_cfg_curr_group, srv);
 
 	if ((r = tfw_sock_srv_add_conns(srv, conns_n))) {
 		TFW_ERR("can't add connections to the server\n");
@@ -666,20 +718,20 @@ tfw_srv_cfg_handle_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
 	return srv;
 }
 
-static TfwServer *tfw_srv_cfg_in_lst[TFW_SG_MAX_SRV];
-static int tfw_srv_cfg_in_lstsz = 0;
-static int tfw_srv_cfg_out_lstsz = 0;
+static TfwServer *tfw_cfg_in_lst[TFW_SG_MAX_SRV];
+static int tfw_cfg_in_lstsz = 0;
+static int tfw_cfg_out_lstsz = 0;
 
 static int
-tfw_srv_cfg_handle_in_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_handle_in_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
 	TfwServer *srv;
 
-	if (tfw_srv_cfg_in_lstsz >= TFW_SG_MAX_SRV)
+	if (tfw_cfg_in_lstsz >= TFW_SG_MAX_SRV)
 		return -EINVAL;
-	if (!(srv = tfw_srv_cfg_handle_server(cs, ce)))
+	if (!(srv = tfw_handle_server(cs, ce)))
 		return -EINVAL;
-	tfw_srv_cfg_in_lst[tfw_srv_cfg_in_lstsz++] = srv;
+	tfw_cfg_in_lst[tfw_cfg_in_lstsz++] = srv;
 
 	return 0;
 }
@@ -704,7 +756,7 @@ tfw_srv_cfg_handle_in_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
  *    }
  */
 static int
-tfw_srv_cfg_handle_out_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_handle_out_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
 	int ret;
 	TfwServer *srv;
@@ -712,7 +764,7 @@ tfw_srv_cfg_handle_out_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
 	static const char __read_mostly s_default[] = "default";
 	TfwSrvGroup *sg = tfw_sg_lookup(s_default);
 
-	if (tfw_srv_cfg_out_lstsz >= TFW_SG_MAX_SRV)
+	if (tfw_cfg_out_lstsz >= TFW_SG_MAX_SRV)
 		return -EINVAL;
 	/* The group "default" is created implicitly. */
 	if (sg == NULL) {
@@ -720,8 +772,8 @@ tfw_srv_cfg_handle_out_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
 			TFW_ERR("Unable to add server group '%s'\n", s_default);
 			return -EINVAL;
 		}
-		dflt_sched_name = tfw_srv_cfg_dflt_sched
-				  ? tfw_srv_cfg_dflt_sched->name
+		dflt_sched_name = tfw_cfg_dflt_sched
+				  ? tfw_cfg_dflt_sched->name
 				  : "round-robin";
 		if ((ret = tfw_sg_set_sched(sg, dflt_sched_name)) != 0) {
 			TFW_ERR("Unable to set scheduler '%s' "
@@ -730,12 +782,15 @@ tfw_srv_cfg_handle_out_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
 			return ret;
 		}
 	}
-	tfw_srv_cfg_curr_group = sg;
+	tfw_cfg_curr_group = sg;
 
-	if (!(srv = tfw_srv_cfg_handle_server(cs, ce)))
+	if (!(srv = tfw_handle_server(cs, ce)))
 		return -EINVAL;
 
-	tfw_srv_cfg_set_conn_retries(srv, tfw_srv_cfg_out_attempts);
+	tfw_cfg_set_conn_tries(srv, tfw_cfg_out_retry_attempts)
+	srv->qsize_max = tfw_cfg_out_queue_size;
+	srv->qjtimeout = msecs_to_jiffies(tfw_cfg_out_queue_timeout * 1000);
+	srv->retry_max = tfw_cfg_out_queue_tries;
 
 	return 0;
 }
@@ -753,39 +808,39 @@ tfw_srv_cfg_handle_out_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
  * new TfwSrvGroup object and sets the context for parsing nested "server"s.
  */
 static int
-tfw_srv_cfg_begin_srv_group(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_begin_srv_group(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
 	int r;
 	TfwSrvGroup *sg;
 	const char *sg_name, *sched_name, *dflt_sched_name;
 
-	r = tfw_cfg_check_val_n(ce, 1);
-	if (r)
+	if ((r = tfw_cfg_check_val_n(ce, 1)))
 		return r;
 	sg_name = ce->vals[0];
-	dflt_sched_name = tfw_srv_cfg_dflt_sched
-			  ? tfw_srv_cfg_dflt_sched->name : "round-robin";
+	dflt_sched_name = tfw_cfg_dflt_sched
+			  ? tfw_cfg_dflt_sched->name : "round-robin";
 	sched_name = tfw_cfg_get_attr(ce, "sched", dflt_sched_name);
 
 	TFW_DBG("begin srv_group: %s\n", sg_name);
 
-	sg = tfw_sg_new(sg_name, GFP_KERNEL);
-	if (!sg) {
+	if (!(sg = tfw_sg_new(sg_name, GFP_KERNEL))) {
 		TFW_ERR("Unable to add server group '%s'\n", sg_name);
 		return -EINVAL;
 	}
-	r = tfw_sg_set_sched(sg, sched_name);
-	if (r) {
+	if ((r = tfw_sg_set_sched(sg, sched_name))) {
 		TFW_ERR("Unable to set scheduler '%s' "
 			"for server group '%s'\n", sched_name, sg_name);
 		return r;
 	}
 
 	/* Set the current group. All nested "server"s are added to it. */
-	tfw_srv_cfg_curr_group = sg;
+	tfw_cfg_curr_group = sg;
 
-	tfw_srv_cfg_in_lstsz = 0;
-	tfw_srv_cfg_in_attempts = tfw_srv_cfg_out_attempts;
+	tfw_cfg_in_lstsz = 0;
+	tfw_cfg_in_retry_attempts = tfw_cfg_out_retry_attempts;
+	tfw_cfg_in_queue_size = tfw_cfg_out_queue_size;
+	tfw_cfg_in_queue_timeout = tfw_cfg_out_queue_timeout;
+	tfw_cfg_in_queue_tries = tfw_cfg_out_queue_tries;
 
 	return 0;
 }
@@ -801,30 +856,34 @@ tfw_srv_cfg_begin_srv_group(TfwCfgSpec *cs, TfwCfgEntry *ce)
  *   }  <--- The position at the moment of call.
  */
 static int
-tfw_srv_cfg_finish_srv_group(TfwCfgSpec *cs)
+tfw_finish_srv_group(TfwCfgSpec *cs)
 {
 	int i;
 
-	BUG_ON(!tfw_srv_cfg_curr_group);
-	BUG_ON(list_empty(&tfw_srv_cfg_curr_group->srv_list));
-	TFW_DBG("finish srv_group: %s\n", tfw_srv_cfg_curr_group->name);
-
-	for (i = 0; i < tfw_srv_cfg_in_lstsz; ++i)
-		tfw_srv_cfg_set_conn_retries(tfw_srv_cfg_in_lst[i],
-					     tfw_srv_cfg_in_attempts);
-	tfw_srv_cfg_curr_group = NULL;
+	BUG_ON(!tfw_cfg_curr_group);
+	BUG_ON(list_empty(&tfw_cfg_curr_group->srv_list));
+	TFW_DBG("finish srv_group: %s\n", tfw_cfg_curr_group->name);
+
+	for (i = 0; i < tfw_cfg_in_lstsz; ++i) {
+		tfw_cfg_set_conn_tries(tfw_cfg_in_lst[i],
+				       tfw_cfg_in_retry_attempts);
+		srv->qsize_max = tfw_cfg_in_queue_size;
+		srv->qjtimeout =
+			msecs_to_jiffies(tfw_cfg_in_queue_timeout * 1000);
+		srv->retry_max = tfw_cfg_in_queue_tries;
+	}
+	tfw_cfg_curr_group = NULL;
 
 	return 0;
 }
 
 static int
-tfw_srv_cfg_handle_sched(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_handle_sched(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
 	if (tfw_cfg_check_val_n(ce, 1))
 		return -EINVAL;
 
-	tfw_srv_cfg_dflt_sched = tfw_sched_lookup(ce->vals[0]);
-	if (tfw_srv_cfg_dflt_sched == NULL) {
+	if (!(tfw_cfg_dflt_sched = tfw_sched_lookup(ce->vals[0]))) {
 		TFW_ERR("Unrecognized scheduler: '%s'\n", ce->vals[0]);
 		return -EINVAL;
 	}
@@ -836,29 +895,53 @@ tfw_srv_cfg_handle_sched(TfwCfgSpec *cs, TfwCfgEntry *ce)
  * Clean everything produced during parsing "server" and "srv_group" entries.
  */
 static void
-tfw_srv_cfg_clean_srv_groups(TfwCfgSpec *cs)
+tfw_clean_srv_groups(TfwCfgSpec *cs)
 {
 	tfw_sock_srv_delete_all_conns();
 	tfw_sg_release_all();
-	tfw_srv_cfg_curr_group = NULL;
+	tfw_cfg_curr_group = NULL;
 }
 
-static TfwCfgSpec tfw_sock_srv_cfg_srv_group_specs[] = {
+static TfwCfgSpec tfw_srv_group_specs[] = {
 	{
 		"server", NULL,
-		tfw_srv_cfg_handle_in_server,
+		tfw_handle_in_server,
 		.allow_repeat = true,
-		.cleanup = tfw_srv_cfg_clean_srv_groups
+		.cleanup = tfw_clean_srv_groups
+	},
+	{
+		"server_queue_size",
+		NULL,
+		tfw_handle_in_queue_size,
+		.allow_none = true,
+		.allow_repeat = false,
+		.cleanup = tfw_clean_srv_groups,
+	},
+	{
+		"server_queue_timeout",
+		NULL,
+		tfw_handle_in_queue_timeout,
+		.allow_none = true,
+		.allow_repeat = false,
+		.cleanup = tfw_clean_srv_groups,
 	},
 	{
-		"connect_retries",
+		"server_queue_tries",
 		NULL,
-		tfw_srv_cfg_handle_in_conn_retries,
+		tfw_handle_in_queue_tries,
 		.allow_none = true,
 		.allow_repeat = false,
-		.cleanup = tfw_srv_cfg_clean_srv_groups,
+		.cleanup = tfw_clean_srv_groups,
 	},
-	{}
+	{
+		"connect_tries",
+		NULL,
+		tfw_handle_in_conn_tries,
+		.allow_none = true,
+		.allow_repeat = false,
+		.cleanup = tfw_clean_srv_groups,
+	},
+	{ 0 }
 };
 
 TfwCfgMod tfw_sock_srv_cfg_mod = {
@@ -869,40 +952,64 @@ TfwCfgMod tfw_sock_srv_cfg_mod = {
 		{
 			"server",
 			NULL,
-			tfw_srv_cfg_handle_out_server,
+			tfw_handle_out_server,
+			.allow_none = true,
+			.allow_repeat = true,
+			.cleanup = tfw_clean_srv_groups,
+		},
+		{
+			"server_queue_size",
+			NULL,
+			tfw_handle_out_queue_size,
+			.allow_none = true,
+			.allow_repeat = true,
+			.cleanup = tfw_clean_srv_groups,
+		},
+		{
+			"server_queue_timeout",
+			NULL,
+			tfw_handle_out_queue_timeout,
+			.allow_none = true,
+			.allow_repeat = true,
+			.cleanup = tfw_clean_srv_groups,
+		},
+		{
+			"server_queue_tries",
+			NULL,
+			tfw_handle_out_queue_tries,
 			.allow_none = true,
 			.allow_repeat = true,
-			.cleanup = tfw_srv_cfg_clean_srv_groups,
+			.cleanup = tfw_clean_srv_groups,
 		},
 		{
-			"connect_retries",
+			"connect_tries",
 			NULL,
-			tfw_srv_cfg_handle_out_conn_retries,
+			tfw_handle_out_conn_tries,
 			.allow_none = true,
 			.allow_repeat = true,
-			.cleanup = tfw_srv_cfg_clean_srv_groups,
+			.cleanup = tfw_clean_srv_groups,
 		},
 		{
 			"sched",
 			NULL,
-			tfw_srv_cfg_handle_sched,
+			tfw_handle_sched,
 			.allow_none = true,
 			.allow_repeat = true,
-			.cleanup = tfw_srv_cfg_clean_srv_groups,
+			.cleanup = tfw_clean_srv_groups,
 		},
 		{
 			"srv_group",
 			NULL,
 			tfw_cfg_handle_children,
-			tfw_sock_srv_cfg_srv_group_specs,
+			tfw_srv_group_specs,
 			&(TfwCfgSpecChild ) {
-				.begin_hook = tfw_srv_cfg_begin_srv_group,
-				.finish_hook = tfw_srv_cfg_finish_srv_group
+				.begin_hook = tfw_begin_srv_group,
+				.finish_hook = tfw_finish_srv_group
 			},
 			.allow_none = true,
 			.allow_repeat = true,
 		},
-		{}
+		{ 0 }
 	}
 };
 

From b633cd31fead44ec03ce8a907631d1d0693adfc9 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Wed, 2 Nov 2016 17:11:24 +0300
Subject: [PATCH 15/65] Bug fixes, refactorings, etc. (#419)

---
 tempesta_fw/http.c     | 163 +++++++++++++++++++++++++++--------------
 tempesta_fw/server.h   |  10 +--
 tempesta_fw/sock_srv.c | 111 ++++++++++++++++++++--------
 3 files changed, 194 insertions(+), 90 deletions(-)

diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 97c1b849e..16c271370 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -378,9 +378,10 @@ tfw_http_req_is_nonidempotent(TfwHttpReq *req)
 
 /*
  * Set the request @req in server connection @srv_conn as idempotent.
+ * Called only when a request turns idempotent from a non-idempotent.
  */
 static inline void
-__tfw_http_req_set_idempotent(TfwConnection *srv_conn, TfwHttpReq *req)
+__tfw_http_req_nonidemp_delist(TfwConnection *srv_conn, TfwHttpReq *req)
 {
 	list_del_init(&req->nip_list);
 	if (list_empty(&srv_conn->nip_queue))
@@ -388,49 +389,41 @@ __tfw_http_req_set_idempotent(TfwConnection *srv_conn, TfwHttpReq *req)
 }
 
 /*
- * If @req in server connection @srv_conn is non-idempotent, then set it
- * as idempotent.
+ * Set the request @req in server connection @srv_conn as non-idempotent.
  */
 static inline void
-tfw_http_req_flip_if_nonidempotent(TfwConnection *srv_conn, TfwHttpReq *req)
+__tfw_http_req_nonidemp_enlist(TfwConnection *srv_conn, TfwHttpReq *req)
 {
-	if (!list_empty(&req->nip_list))
-		__tfw_http_req_set_idempotent(srv_conn, req);
+	BUG_ON(!list_empty(&req->nip_list));
+	list_add_tail(&req->nip_list, &srv_conn->nip_queue);
+	set_bit(TFW_CONN_B_HASNIP, &srv_conn->flags);
 }
 
 /*
- * If a request on the list of non-idempotent requests in server
- * connection @srv_conn had become idempotent, then set it as idempotent.
+ * If @req in server connection @srv_conn is non-idempotent, then set
+ * it as idempotent.
  */
 static inline void
-tfw_http_conn_flip_if_nonidempotent(TfwConnection *srv_conn)
+tfw_http_req_nonidemp_delist(TfwConnection *srv_conn, TfwHttpReq *req)
 {
-	TfwHttpReq *req, *tmp;
-
-	list_for_each_entry_safe(req, tmp, &srv_conn->nip_queue, nip_list)
-		if (!tfw_http_req_is_nonidempotent(req))
-			__tfw_http_req_set_idempotent(srv_conn, req);
+	if (!list_empty(&req->nip_list))
+		__tfw_http_req_nonidemp_delist(srv_conn, req);
 }
 
 /*
- * Set the request @req in server connection @srv_conn as non-idempotent.
+ * If a request on the list of non-idempotent requests in server
+ * connection @srv_conn had become idempotent, then set it as such.
  */
 static inline void
-__tfw_http_req_set_nonidempotent(TfwConnection *srv_conn, TfwHttpReq *req)
+tfw_http_conn_nonidemp_delist(TfwConnection *srv_conn)
 {
-	list_add_tail(&req->nip_list, &srv_conn->nip_queue);
-	set_bit(TFW_CONN_B_HASNIP, &srv_conn->flags);
-}
+	TfwHttpReq *req, *tmp;
 
-/*
- * Set the request @req in server connection @srv_conn is idempotent,
- * then set it as non-idempotent.
- */
-static inline void
-tfw_http_req_flip_if_idempotent(TfwConnection *srv_conn, TfwHttpReq *req)
-{
-	if (list_empty(&req->nip_list))
-		__tfw_http_req_set_nonidempotent(srv_conn, req);
+	list_for_each_entry_safe(req, tmp, &srv_conn->nip_queue, nip_list)
+		if (!tfw_http_req_is_nonidempotent(req)) {
+			BUG_ON(list_empty(&req->nip_list));
+			__tfw_http_req_nonidemp_delist(srv_conn, req);
+		}
 }
 
 /*
@@ -467,6 +460,10 @@ tfw_http_conn_drained(TfwConnection *srv_conn)
 	return false;
 }
 
+/*
+ * Tell if the server connection's forwarding queue has requests
+ * that need to be forwarded.
+ */
 static inline bool
 tfw_http_conn_need_fwd(TfwConnection *srv_conn)
 {
@@ -474,11 +471,16 @@ tfw_http_conn_need_fwd(TfwConnection *srv_conn)
 		&& !tfw_http_conn_drained(srv_conn));
 }
 
+/*
+ * Common actions in case of an error while forwarding requests.
+ * Erroneous requests are removed from the forwarding queue and placed
+ * in @equeue. The error code for an error response is saved as well.
+ */
 static inline void
 tfw_http_req_move2equeue(TfwConnection *srv_conn, TfwHttpReq *req,
 			 struct list_head *equeue, unsigned short status)
 {
-	tfw_http_req_flip_if_nonidempotent(srv_conn, req);
+	tfw_http_req_nonidemp_delist(srv_conn, req);
 	list_move_tail(&req->msg.fwd_list, equeue);
 	req->rstatus = status;
 	atomic_dec(&srv_conn->qsize);
@@ -505,6 +507,7 @@ tfw_http_req_zap_error(struct list_head *equeue)
 			tfw_http_send_504(req);
 		else
 			BUG();
+		TFW_INC_STAT_BH(clnt.msgs_otherr);
 	}
 }
 
@@ -532,6 +535,7 @@ __tfw_http_req_fwd_stalled(TfwConnection *srv_conn, struct list_head *equeue)
 	req = srv_conn->msg_sent
 	    ? (TfwHttpReq *)list_next_entry(srv_conn->msg_sent, fwd_list)
 	    : (TfwHttpReq *)list_first_entry(fwd_queue, TfwMsg, fwd_list);
+
 	list_for_each_entry_safe_from(req, tmp, fwd_queue, msg.fwd_list) {
 		unsigned long jtimeout = jiffies - req->jtstamp;
 		if (time_after(jtimeout, srv->qjtimeout)) {
@@ -560,8 +564,8 @@ __tfw_http_req_fwd_stalled(TfwConnection *srv_conn, struct list_head *equeue)
 				 __func__, req);
 			break;
 		}
-		/* See if the request has become idempotent. */
-		tfw_http_req_flip_if_nonidempotent(srv_conn, req);
+		/* See if a non-idempotent request has become idempotent. */
+		tfw_http_req_nonidemp_delist(srv_conn, req);
 	}
 }
 
@@ -615,7 +619,7 @@ tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
 	list_add_tail(&req->msg.fwd_list, &srv_conn->msg_queue);
 	atomic_inc(&srv_conn->qsize);
 	if (tfw_http_req_is_nonidempotent(req))
-		__tfw_http_req_set_nonidempotent(srv_conn, req);
+		__tfw_http_req_nonidemp_enlist(srv_conn, req);
 	if (tfw_http_conn_on_hold(srv_conn)) {
 		spin_unlock(&srv_conn->msg_qlock);
 		TFW_DBG2("%s: Server connection is on hold: conn=[%p]\n",
@@ -630,19 +634,56 @@ tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
 		return;
 	}
 	if (tfw_connection_send(srv_conn, (TfwMsg *)req)) {
+		tfw_http_req_nonidemp_delist(srv_conn, req);
 		list_del_init(&req->msg.fwd_list);
-		tfw_http_req_flip_if_nonidempotent(srv_conn, req);
 		atomic_dec(&srv_conn->qsize);
 		spin_unlock(&srv_conn->msg_qlock);
 		TFW_DBG2("%s: Forwarding error: conn=[%p] req=[%p]\n",
 			 __func__, srv_conn, req);
 		tfw_http_send_500(req);
+		TFW_INC_STAT_BH(clnt.msgs_otherr);
 		return;
 	}
 	srv_conn->msg_sent = (TfwMsg *)req;
 	spin_unlock(&srv_conn->msg_qlock);
 }
 
+/*
+ * Handle non-idempotent requests in case of a connection repair
+ * (re-send or re-schedule).
+ *
+ * Non-idempotent requests that were forwarded but not responded to
+ * are not re-sent or re-scheduled by default. Configuration option
+ * can be used to have those requests re-sent or re-scheduled as well.
+ *
+ * Note: @srv_conn->msg_sent may change in result.
+ */
+static inline void
+tfw_http_req_fwd_handlenip(TfwConnection *srv_conn)
+{
+	TfwServer *srv = (TfwServer *)srv_conn->peer;
+	TfwHttpReq *req_sent = (TfwHttpReq *)srv_conn->msg_sent;
+
+	if (req_sent && tfw_http_req_is_nonidempotent(req_sent)
+	    && likely(!(srv->flags & TFW_SRV_RETRY_NON_IDEMP)))
+	{
+		struct list_head *lent = &req_sent->msg.fwd_list;
+		BUG_ON(list_empty(&req_sent->nip_list));
+		srv_conn->msg_sent = (lent == srv_conn->msg_queue.next)
+				   ? NULL
+				   : list_entry(lent->prev, TfwMsg, fwd_list);
+		__tfw_http_req_nonidemp_delist(srv_conn, req_sent);
+		list_del_init(&req_sent->msg.fwd_list);
+		atomic_dec(&srv_conn->qsize);
+		tfw_http_send_404(req_sent);
+		TFW_INC_STAT_BH(clnt.msgs_otherr);
+	}
+}
+
+/*
+ * Re-forward requests in a server connection. Requests that exceed
+ * the set limits are evicted.
+ */
 static void
 __tfw_http_req_fwd_resend(TfwConnection *srv_conn,
 			  bool one_msg, struct list_head *equeue)
@@ -682,6 +723,11 @@ __tfw_http_req_fwd_resend(TfwConnection *srv_conn,
 	}
 }
 
+/*
+ * Handle the complete re-forwarding of requests in a server connection
+ * that is being repaired, after the first request had been re-forwarded.
+ * The connection is not scheduled until all requests in it are re-sent.
+ */
 static void
 tfw_http_req_fwd_repair(TfwConnection *srv_conn)
 {
@@ -787,8 +833,8 @@ tfw_http_conn_msg_free(TfwHttpMsg *hm)
 /*
  * Re-schedule requests in a dead server connection's queue to a live
  * server connection. Idempotent requests are always rescheduled.
- * Non-idempotent requests are not re-scheduled by default, but it
- * can be configured to re-schedule those requests as well.
+ * Non-idempotent requests may be rescheduled depending on the option
+ * in configuration.
  *
  * FIXME: It appears that a re-scheduled request should be put in a
  * new server connection's queue according to its original timestamp.
@@ -799,28 +845,35 @@ static void
 tfw_http_req_fwd_resched(TfwConnection *srv_conn)
 {
 	TfwHttpReq *req, *tmp;
-	TfwConnection *new_conn;
+	TfwConnection *sconn;
 	struct list_head *fwd_queue = &srv_conn->msg_queue;
 
 	TFW_DBG2("%s: conn=[%p]\n", __func__, conn);
 
+	/* Handle non-idempotent requests. */
+	tfw_http_req_fwd_handlenip(srv_conn);
+
+	/* Process complete queue. */
 	list_for_each_entry_safe(req, tmp, fwd_queue, msg.fwd_list) {
+		tfw_http_req_nonidemp_delist(srv_conn, req);
 		list_del_init(&req->msg.fwd_list);
 		atomic_dec(&srv_conn->qsize);
-		/* FIXME: Need config option. */
-		if (tfw_http_req_is_nonidempotent(req)) {
-			__tfw_http_req_set_idempotent(srv_conn, req);
-			goto send_err;
-		}
-		if (!(new_conn = tfw_sched_get_srv_conn((TfwMsg *)req))) {
+		if (!(sconn = tfw_sched_get_srv_conn((TfwMsg *)req))) {
 			TFW_WARN("Unable to find a backend server\n");
-			goto send_err;
+			tfw_http_send_404(req);
+			TFW_INC_STAT_BH(clnt.msgs_otherr);
+			continue;
 		}
-		tfw_http_req_fwd(new_conn, req);
-		continue;
-send_err:
-		tfw_http_send_404(req);
-		TFW_INC_STAT_BH(clnt.msgs_otherr);
+		if (req->retries++ >= ((TfwServer *)sconn->peer)->retry_max) {
+			TFW_DBG2("%s: Eviction: req=[%p] retries=[%d]\n",
+				 __func__, req, req->retries);
+			tfw_http_send_504(req);
+			TFW_INC_STAT_BH(clnt.msgs_otherr);
+			tfw_connection_put(sconn);
+			continue;
+		}
+		tfw_http_req_fwd(sconn, req);
+		tfw_connection_put(sconn);
 	}
 	BUG_ON(atomic_read(&srv_conn->qsize));
 }
@@ -831,7 +884,7 @@ tfw_http_req_fwd_resched(TfwConnection *srv_conn)
  * 6.3.2, "a client MUST NOT pipeline immediately after connection
  * establishment". To address that, re-send the first request to the
  * server. When a response comes, that will trigger resending of the
- * rest of those unanswered requests.
+ * rest of those unanswered requests (tfw_http_req_fwd_repair()).
  */
 static void
 tfw_http_conn_repair(TfwConnection *srv_conn)
@@ -847,14 +900,17 @@ tfw_http_conn_repair(TfwConnection *srv_conn)
 		tfw_http_req_fwd_resched(srv_conn);
 		return;
 	}
-	/* Re-send the first unanswered request. */
 	spin_lock(&srv_conn->msg_qlock);
+	/* Handle non-idempotent requests. */
+	tfw_http_req_fwd_handlenip(srv_conn);
+	/* Re-send the first unanswered request. */
 	srv_conn->msg_resent = NULL;
 	if (srv_conn->msg_sent) {
 		__tfw_http_req_fwd_resend(srv_conn, true, &equeue);
 		if (!srv_conn->msg_resent)
 			srv_conn->msg_sent = NULL;
 	}
+	/* Send the remaining unsent requests. */
 	if (!srv_conn->msg_resent) {
 		set_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
 		if (tfw_http_conn_need_fwd(srv_conn))
@@ -1639,7 +1695,6 @@ tfw_http_req_process(TfwConnection *conn, struct sk_buff *skb, unsigned int off)
 		 */
 		if (tfw_cache_process(req, NULL, tfw_http_req_cache_cb)) {
 			tfw_http_send_500(req, "request cache error");
-			tfw_http_conn_msg_free((TfwHttpMsg *)req);
 			TFW_INC_STAT_BH(clnt.msgs_otherr);
 			return TFW_PASS;
 		}
@@ -1747,15 +1802,15 @@ tfw_http_popreq(TfwHttpMsg *hmresp)
 	atomic_dec(&srv_conn->qsize);
 	if ((TfwMsg *)req == srv_conn->msg_sent)
 		srv_conn->msg_sent = NULL;
-	tfw_http_req_flip_if_nonidempotent(srv_conn, req);
-	tfw_http_conn_flip_if_nonidempotent(srv_conn);
+	tfw_http_req_nonidemp_delist(srv_conn, req);
+	tfw_http_conn_nonidemp_delist(srv_conn);
 	/*
 	 * Perform special processing if the connection is in repair
 	 * mode. Otherwise, forward pending requests to the server.
 	 * Note: The queue is unlocked inside tfw_http_req_fwd_repair()
 	 * or tfw_http_req_fwd_stalled().
 	 */
-	if (tfw_connection_restricted(srv_conn))
+	if (unlikely(tfw_connection_restricted(srv_conn)))
 		tfw_http_req_fwd_repair(srv_conn);
 	else if (tfw_http_conn_need_fwd(srv_conn))
 		tfw_http_req_fwd_stalled(srv_conn);
diff --git a/tempesta_fw/server.h b/tempesta_fw/server.h
index e4fd72bdf..e5d8c66db 100644
--- a/tempesta_fw/server.h
+++ b/tempesta_fw/server.h
@@ -29,11 +29,6 @@
 #define TFW_SG_MAX_SRV		32	/* TfwServer per TfwSrvGroup */
 #define TFW_SG_MAX_CONN		(TFW_SG_MAX_SRV * TFW_SRV_MAX_CONN)
 
-typedef enum {
-	TFW_SG_SRV_ADD,
-	TFW_SG_SRV_DEL,
-} TfwSgSrvUpdate;
-
 typedef struct tfw_srv_group_t TfwSrvGroup;
 typedef struct tfw_scheduler_t TfwScheduler;
 
@@ -46,6 +41,7 @@ typedef struct tfw_scheduler_t TfwScheduler;
  * @qsize_max	- maximum queue size of a server connection;
  * @qjtimeout	- maximum age of a request in a server connection, in jiffies;
  * @retry_max	- maximum number of tries for forwarding a request;
+ * @flags	- server related flags;
  */
 typedef struct {
 	TFW_PEER_COMMON;
@@ -56,8 +52,12 @@ typedef struct {
 	unsigned int		qsize_max;
 	unsigned long		qjtimeout;
 	unsigned int		retry_max;
+	unsigned int		flags;
 } TfwServer;
 
+/* Server related flags. */
+#define TFW_SRV_RETRY_NON_IDEMP		0x0001	/* Retry non-idemporent req. */
+
 /**
  * The servers group with the same load balancing, failovering and eviction
  * policies.
diff --git a/tempesta_fw/sock_srv.c b/tempesta_fw/sock_srv.c
index 8cc938486..efb3abe3e 100644
--- a/tempesta_fw/sock_srv.c
+++ b/tempesta_fw/sock_srv.c
@@ -569,15 +569,18 @@ tfw_sock_srv_delete_all_conns(void)
  * requests in the queue.
  */
 #define TFW_SRV_QUEUE_SIZE_DEF		1000	/* Max queue size */
-#define TFW_SRV_QUEUE_TIMEOUT_DEF	60	/* Default request timeout */
-#define TFW_SRV_QUEUE_TRIES_DEF		5	/* Default number of tries */
+#define TFW_SRV_SEND_TIMEOUT_DEF	60	/* Default request timeout */
+#define TFW_SRV_SEND_TRIES_DEF		5	/* Default number of tries */
+#define TFW_SRV_RETRY_NIP_DEF		0	/* Do NOT resend NIP reqs */
 
 static int tfw_cfg_in_queue_size = TFW_SRV_QUEUE_SIZE_DEF;
-static int tfw_cfg_in_queue_timeout = TFW_SRV_QUEUE_TIMEOUT_DEF;
-static int tfw_cfg_in_queue_tries = TFW_SRV_QUEUE_TRIES_DEF;
+static int tfw_cfg_in_send_timeout = TFW_SRV_SEND_TIMEOUT_DEF;
+static int tfw_cfg_in_send_tries = TFW_SRV_SEND_TRIES_DEF;
+static int tfw_cfg_in_retry_nip = TFW_SRV_RETRY_NIP_DEF;
 static int tfw_cfg_out_queue_size = TFW_SRV_QUEUE_SIZE_DEF;
-static int tfw_cfg_out_queue_timeout = TFW_SRV_QUEUE_TIMEOUT_DEF;
-static int tfw_cfg_out_queue_tries = TFW_SRV_QUEUE_TRIES_DEF;
+static int tfw_cfg_out_send_timeout = TFW_SRV_SEND_TIMEOUT_DEF;
+static int tfw_cfg_out_send_tries = TFW_SRV_SEND_TRIES_DEF;
+static int tfw_cfg_out_retry_nip = TFW_SRV_RETRY_NIP_DEF;
 
 static int tfw_cfg_in_retry_attempts = TFW_SRV_RETRY_ATTEMPTS_DEF;
 static int tfw_cfg_out_retry_attempts = TFW_SRV_RETRY_ATTEMPTS_DEF;
@@ -616,27 +619,50 @@ tfw_handle_out_queue_size(TfwCfgSpec *cs, TfwCfgEntry *ce)
 }
 
 static int
-tfw_handle_in_queue_timeout(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_handle_in_send_timeout(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
-	return tfw_handle_opt_val(cs, ce, &tfw_cfg_in_queue_timeout);
+	return tfw_handle_opt_val(cs, ce, &tfw_cfg_in_send_timeout);
 }
 
 static int
-tfw_handle_out_queue_timeout(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_handle_out_send_timeout(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
-	return tfw_handle_opt_val(cs, ce, &tfw_cfg_out_queue_timeout);
+	return tfw_handle_opt_val(cs, ce, &tfw_cfg_out_send_timeout);
 }
 
 static int
-tfw_handle_in_queue_tries(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_handle_in_send_tries(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
-	return tfw_handle_opt_val(cs, ce, &tfw_cfg_in_queue_tries);
+	return tfw_handle_opt_val(cs, ce, &tfw_cfg_in_send_tries);
 }
 
 static int
-tfw_handle_out_queue_tries(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_handle_out_send_tries(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
-	return tfw_handle_opt_val(cs, ce, &tfw_cfg_out_queue_tries);
+	return tfw_handle_opt_val(cs, ce, &tfw_cfg_out_send_tries);
+}
+
+static inline int
+__tfw_handle_retry_nip(TfwCfgSpec *cs, TfwCfgEntry *ce, int *retry_nip)
+{
+	if (ce->attr_n || ce->val_n) {
+		TFW_ERR("%s: The option may not have arguments.\n", cs->name);
+		return -EINVAL;
+	}
+	*retry_nip = 1;
+	return 0;
+}
+
+static int
+tfw_handle_in_retry_nip(TfwCfgSpec *cs, TfwCfgEntry *ce)
+{
+	return __tfw_handle_retry_nip(cs, ce, &tfw_cfg_in_retry_nip);
+}
+
+static int
+tfw_handle_out_retry_nip(TfwCfgSpec *cs, TfwCfgEntry *ce)
+{
+	return __tfw_handle_retry_nip(cs, ce, &tfw_cfg_out_retry_nip);
 }
 
 static int
@@ -788,9 +814,12 @@ tfw_handle_out_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
 		return -EINVAL;
 
 	tfw_cfg_set_conn_tries(srv, tfw_cfg_out_retry_attempts)
-	srv->qsize_max = tfw_cfg_out_queue_size;
-	srv->qjtimeout = msecs_to_jiffies(tfw_cfg_out_queue_timeout * 1000);
-	srv->retry_max = tfw_cfg_out_queue_tries;
+	srv->qsize_max = tfw_cfg_out_queue_size ? : UINT_MAX;
+	srv->qjtimeout = tfw_cfg_out_send_timeout
+		       ? msecs_to_jiffies(tfw_cfg_out_send_timeout * 1000)
+		       : ULONG_MAX;
+	srv->retry_max = tfw_cfg_out_send_tries ? : UINT_MAX;
+	srv->flags |= tfw_cfg_out_retry_nip ? TFW_SRV_RETRY_NON_IDEMP : 0;
 
 	return 0;
 }
@@ -839,8 +868,9 @@ tfw_begin_srv_group(TfwCfgSpec *cs, TfwCfgEntry *ce)
 	tfw_cfg_in_lstsz = 0;
 	tfw_cfg_in_retry_attempts = tfw_cfg_out_retry_attempts;
 	tfw_cfg_in_queue_size = tfw_cfg_out_queue_size;
-	tfw_cfg_in_queue_timeout = tfw_cfg_out_queue_timeout;
-	tfw_cfg_in_queue_tries = tfw_cfg_out_queue_tries;
+	tfw_cfg_in_send_timeout = tfw_cfg_out_send_timeout;
+	tfw_cfg_in_send_tries = tfw_cfg_out_send_tries;
+	tfw_cfg_in_retry_nip = tfw_cfg_out_retry_nip;
 
 	return 0;
 }
@@ -865,12 +895,15 @@ tfw_finish_srv_group(TfwCfgSpec *cs)
 	TFW_DBG("finish srv_group: %s\n", tfw_cfg_curr_group->name);
 
 	for (i = 0; i < tfw_cfg_in_lstsz; ++i) {
+		unsigned long jtmout =
+			msecs_to_jiffies(tfw_cfg_in_send_timeout * 1000);
 		tfw_cfg_set_conn_tries(tfw_cfg_in_lst[i],
 				       tfw_cfg_in_retry_attempts);
-		srv->qsize_max = tfw_cfg_in_queue_size;
-		srv->qjtimeout =
-			msecs_to_jiffies(tfw_cfg_in_queue_timeout * 1000);
-		srv->retry_max = tfw_cfg_in_queue_tries;
+		srv->qsize_max = tfw_cfg_in_queue_size ? : UINT_MAX;
+		srv->qjtimeout = tfw_cfg_in_send_timeout ? jtmout : ULONG_MAX;
+		srv->retry_max = tfw_cfg_in_send_tries ? : UINT_MAX;
+		srv->flags |= tfw_cfg_in_retry_nip ?
+			      TFW_SRV_RETRY_NON_IDEMP : 0;
 	}
 	tfw_cfg_curr_group = NULL;
 
@@ -918,17 +951,25 @@ static TfwCfgSpec tfw_srv_group_specs[] = {
 		.cleanup = tfw_clean_srv_groups,
 	},
 	{
-		"server_queue_timeout",
+		"server_send_timeout",
+		NULL,
+		tfw_handle_in_send_timeout,
+		.allow_none = true,
+		.allow_repeat = false,
+		.cleanup = tfw_clean_srv_groups,
+	},
+	{
+		"server_send_tries",
 		NULL,
-		tfw_handle_in_queue_timeout,
+		tfw_handle_in_send_tries,
 		.allow_none = true,
 		.allow_repeat = false,
 		.cleanup = tfw_clean_srv_groups,
 	},
 	{
-		"server_queue_tries",
+		"server_retry_non_idempotent",
 		NULL,
-		tfw_handle_in_queue_tries,
+		tfw_handle_in_retry_nip,
 		.allow_none = true,
 		.allow_repeat = false,
 		.cleanup = tfw_clean_srv_groups,
@@ -966,17 +1007,25 @@ TfwCfgMod tfw_sock_srv_cfg_mod = {
 			.cleanup = tfw_clean_srv_groups,
 		},
 		{
-			"server_queue_timeout",
+			"server_send_timeout",
+			NULL,
+			tfw_handle_out_send_timeout,
+			.allow_none = true,
+			.allow_repeat = true,
+			.cleanup = tfw_clean_srv_groups,
+		},
+		{
+			"server_send_tries",
 			NULL,
-			tfw_handle_out_queue_timeout,
+			tfw_handle_out_send_tries,
 			.allow_none = true,
 			.allow_repeat = true,
 			.cleanup = tfw_clean_srv_groups,
 		},
 		{
-			"server_queue_tries",
+			"server_retry_non_idempotent",
 			NULL,
-			tfw_handle_out_queue_tries,
+			tfw_handle_out_retry_nip,
 			.allow_none = true,
 			.allow_repeat = true,
 			.cleanup = tfw_clean_srv_groups,

From 160a9549d944fae46c3f24bb8add9478f3643db2 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Mon, 5 Dec 2016 02:14:44 +0300
Subject: [PATCH 16/65] Implement a way to define non-idempotent requests in
 config. (#419)

---
 tempesta_fw/addr.h  |   2 +-
 tempesta_fw/http.c  |  36 +++++-
 tempesta_fw/vhost.c | 293 +++++++++++++++++++++++++++++++++++++++-----
 tempesta_fw/vhost.h |  24 +++-
 4 files changed, 321 insertions(+), 34 deletions(-)

diff --git a/tempesta_fw/addr.h b/tempesta_fw/addr.h
index d3822caa5..390e48a33 100644
--- a/tempesta_fw/addr.h
+++ b/tempesta_fw/addr.h
@@ -46,10 +46,10 @@
 	in6_addr.in6_u.u6_addr8[14]))		\
 
 typedef union {
+	sa_family_t family;
 	struct sockaddr_in v4;
 	struct sockaddr_in6 v6;
 	struct sockaddr sa;
-	sa_family_t family;
 #define in6_prefix	v6.sin6_scope_id
 } TfwAddr;
 
diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 16c271370..b3524081f 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -1470,8 +1470,40 @@ tfw_http_req_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)
 static void
 tfw_http_req_mark_nonidempotent(TfwHttpReq *req)
 {
-	if (req->method == TFW_HTTP_METH_POST)
-		req->flags |= TFW_HTTP_NON_IDEMP;
+	/* See RFC 7231 4.2.1 */
+	static const unsigned int __read_mostly safe_methods =
+		(1 << TFW_HTTP_METH_GET) | (1 << TFW_HTTP_METH_HEAD);
+	TfwLocation *loc = req->location;
+	TfwLocation *loc_dflt = req->vhost->loc_dflt;
+	TfwLocation *base_loc = (tfw_vhost_get_default())->loc_dflt;
+
+	/*
+	 * Search in the current location of the current vhost. If there
+	 * are no entries there, then search in the default location of
+	 * the current vhost. If there are no entries there either, then
+	 * search in the default location of the default vhost - that is,
+	 * in the global policies.
+	 */
+	if (loc && loc->nipdef_sz) {
+		if (tfw_nipdef_match(loc, req->method, &req->uri_path))
+			goto nip_match;
+	} else if (loc_dflt && loc_dflt->nipdef_sz) {
+		if (tfw_nipdef_match(loc_dflt, req->method, &req->uri_path))
+			goto nip_match;
+	} else if ((base_loc != loc_dflt) && base_loc && base_loc->nipdef_sz) {
+		if (tfw_nipdef_match(base_loc, req->method, &req->uri_path))
+			goto nip_match;
+	}
+
+	if (safe_methods & (1 << req->method))
+		return;
+
+nip_match:
+	TFW_DBG2("non-idempotent: method=[%d] uri=[%.*s]\n",
+		 req->method, (int)TFW_STR_CHUNK(&req->uri_path, 0)->len,
+		 (char *)TFW_STR_CHUNK(&req->uri_path, 0)->ptr);
+	req->flags |= TFW_HTTP_NON_IDEMP;
+	return;
 }
 
 /*
diff --git a/tempesta_fw/vhost.c b/tempesta_fw/vhost.c
index 6ed24d267..76743aae2 100644
--- a/tempesta_fw/vhost.c
+++ b/tempesta_fw/vhost.c
@@ -18,6 +18,7 @@
  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  */
 #include "tempesta_fw.h"
+#include "http.h"
 #include "http_match.h"
 #include "vhost.h"
 #include "str.h"
@@ -28,7 +29,17 @@ static const TfwCfgEnum const __read_mostly tfw_match_enum[] = {
 	{ "eq",		TFW_HTTP_MATCH_O_EQ },
 	{ "prefix",	TFW_HTTP_MATCH_O_PREFIX },
 	{ "suffix",	TFW_HTTP_MATCH_O_SUFFIX },
-	{}
+	{ 0 }
+};
+
+/* Mappings for HTTP request methods. */
+static const TfwCfgEnum const __read_mostly tfw_method_enum[] = {
+	{ "*",		UINT_MAX },
+	{ "GET",	1 << TFW_HTTP_METH_GET },
+	{ "HEAD",	1 << TFW_HTTP_METH_HEAD },
+	{ "POST",	1 << TFW_HTTP_METH_POST },
+	{ "PURGE",	1 << TFW_HTTP_METH_PURGE },
+	{ 0 }
 };
 
 /*
@@ -42,6 +53,15 @@ static const TfwCfgEnum const __read_mostly tfw_match_enum[] = {
 static TfwCaPolicy	tfw_capolicy[TFW_CAPOLICY_ARRAY_SZ];
 static unsigned int	tfw_capolicy_sz = 0;	/* Current size. */
 
+/*
+ * All non-idempotent request directives are put into a fixed size
+ * array. The directives are deduplicated when put into the array.
+ */
+#define TFW_NIPDEF_ARRAY_SZ	(64)
+
+static TfwNipDef	tfw_nipdef[TFW_NIPDEF_ARRAY_SZ];
+static unsigned int	tfw_nipdef_sz = 0;	/* Current size. */
+
 /*
  * All 'location' directives are put into a fixed size array.
  * Duplicate directives are not allowed.
@@ -53,9 +73,11 @@ static unsigned int	tfw_location_sz = 0;	/* Current size. */
 
 /*
  * Default location is a wildcard location. It matches any URI.
- * It may (or may not) contain a set of cache matching directives.
+ * It may (or may not) contain a set of cache matching directives,
+ * or a set of non-idempotent request definitions.
  */
-static TfwCaPolicy *tfw_capolicy_dflt[TFW_CAPOLICY_ARRAY_SZ];
+static TfwCaPolicy	*tfw_capolicy_dflt[TFW_CAPOLICY_ARRAY_SZ];
+static TfwNipDef	*tfw_nipdef_dflt[TFW_NIPDEF_ARRAY_SZ];
 
 static TfwLocation tfw_location_dflt = {
 	.op = TFW_HTTP_MATCH_O_WILDCARD,
@@ -63,6 +85,8 @@ static TfwLocation tfw_location_dflt = {
 	.len = 1,
 	.capo = tfw_capolicy_dflt,
 	.capo_sz = 0,
+	.nipdef = tfw_nipdef_dflt,
+	.nipdef_sz = 0,
 };
 
 /*
@@ -77,7 +101,7 @@ static TfwAddr		tfw_capuacl[TFW_CAPUACL_ARRAY_SZ];
 
 /*
  * Default vhost is a wildcard vhost. It matches any URI.
- * It may (or may not) ontain a set of various directives.
+ * It may (or may not) contain a set of various directives.
  *
  * Note that @loc_dflt in the default vhost serves as global
  * default caching policy.
@@ -159,6 +183,38 @@ static const __tfw_match_fn const __read_mostly __tfw_match_fn_tbl[] = {
 	[TFW_HTTP_MATCH_O_SUFFIX]	= __tfw_match_suffix,
 };
 
+/*
+ * Find a matching non-idempotent request directive. Strings
+ * are compared according to the match operator in the directive.
+ * A pointer to the matching TfwNipDef structure is returned if
+ * the match is found. NULL is returned if there's no match.
+ */
+static inline bool
+__tfw_nipdef_match_fn(TfwNipDef *nipdef, TfwStr *arg)
+{
+	__tfw_match_fn match_fn = __tfw_match_fn_tbl[nipdef->op];
+	BUG_ON(!match_fn);
+
+	return match_fn(nipdef->op, nipdef->arg, nipdef->len, arg);
+}
+
+TfwNipDef *
+tfw_nipdef_match(TfwLocation *loc, unsigned char method, TfwStr *arg)
+{
+	int i;
+
+	BUG_ON(!loc);
+	BUG_ON(!arg);
+
+	for (i = 0; i < loc->nipdef_sz; ++i) {
+		TfwNipDef *nipdef = loc->nipdef[i];
+		if ((nipdef->method & (1 << method))
+		    && __tfw_nipdef_match_fn(nipdef, arg))
+			return nipdef;
+	}
+	return NULL;
+}
+
 /*
  * Find a matching cache policy directive. Strings are compared
  * according to the match operator in the directive. A pointer
@@ -225,6 +281,7 @@ tfw_location_match(TfwVhost *vhost, TfwStr *arg)
  * to the match operator in the directive. A pointer to the matching
  * TfwVhost structure is returned if the match is found. A pointer
  * to the default vhost structure is returned if there's no match.
+ * Thus the returned value is always a valid address.
  */
 TfwVhost *
 tfw_vhost_get_default(void)
@@ -247,10 +304,168 @@ tfw_vhost_match(TfwStr *arg)
 
 /*
  * Pointer to the current location structure.
- * The pointer is shared among several functions below.
+ * The pointer is shared among multiple functions below.
  */
 static TfwLocation *tfwcfg_this_location;
 
+/*
+ * Find a non-idempotent request entry in the array that holds
+ * all non-idempotent request directives from all location sections.
+ */
+static TfwNipDef *
+tfw_nipdef_lookup_all(int method, int op, char *arg, int len)
+{
+	int i;
+
+	for (i = 0; i < tfw_nipdef_sz; ++i) {
+		TfwNipDef *nipdef = &tfw_nipdef[i];
+		if ((nipdef->method & method)
+		    && (nipdef->op == op)
+		    && (nipdef->len == len)
+		    && !strncasecmp(nipdef->arg, arg, len))
+			return nipdef;
+	}
+
+	return NULL;
+}
+
+/*
+ * Find a non-idempotent request entry within specified location.
+ */
+static TfwNipDef *
+tfw_nipdef_lookup_loc(TfwLocation *loc, int method, int op, char *arg)
+{
+	int i;
+
+	for (i = 0; i < loc->nipdef_sz; ++i) {
+		TfwNipDef *nipdef = loc->nipdef[i];
+		if ((nipdef->method & method)
+		    && (nipdef->op == op)
+		    && !strcasecmp(nipdef->arg, arg))
+			return nipdef;
+	}
+
+	return NULL;
+}
+
+/*
+ * Create and initialize a new non-idempotent request entry. The entry
+ * is placed in the array for all non-idempotent request entries from
+ * all location sections.
+ */
+static TfwNipDef *
+tfw_nipdef_new(int method, int op, char *arg, int len)
+{
+	char *argmem;
+	TfwNipDef *nipdef;
+
+	if (tfw_nipdef_sz == TFW_NIPDEF_ARRAY_SZ)
+		return NULL;
+
+	if ((argmem = kmalloc(len + 1, GFP_KERNEL)) == NULL)
+		return NULL;
+
+	nipdef = &tfw_nipdef[tfw_nipdef_sz++];
+	nipdef->method = method;
+	nipdef->op = op;
+	nipdef->arg = argmem;
+	nipdef->len = len;
+	memcpy((void *)nipdef->arg, (void *)arg, len + 1);
+
+	return nipdef;
+}
+
+/*
+ * Add a new non-idempotent request entry to the given location structure.
+ * The entry is added as a pointer into the array for all non-idempotent
+ * request entries.
+ */
+static TfwNipDef *
+tfw_nipdef_add(TfwLocation *loc, TfwNipDef *nipdef)
+{
+	if (loc->nipdef_sz == TFW_NIPDEF_ARRAY_SZ)
+		return NULL;
+	loc->nipdef[loc->nipdef_sz++] = nipdef;
+	return nipdef;
+}
+
+static int
+tfw_handle_nonidempotent(TfwCfgSpec *cs, TfwCfgEntry *ce)
+{
+	int ret, len, method, op;
+	char *in_method, *in_op, *arg;
+	TfwLocation *loc = tfwcfg_this_location;
+	TfwNipDef *nipdef;
+
+	BUG_ON(!tfwcfg_this_location);
+
+	if (ce->attr_n) {
+		TFW_ERR("%s: Arguments may not have the \'=\' sign\n",
+			cs->name);
+		return -EINVAL;
+	}
+	if (ce->val_n != 3) {
+		TFW_ERR("%s: Invalid number of arguments.\n", cs->name);
+		return -EINVAL;
+	}
+
+	/* The method: one of GET, PUT, POST, etc. in form of a bitmask. */
+	in_method = (char *)ce->vals[0];
+	ret = tfw_cfg_map_enum(tfw_method_enum, in_method, &method);
+	if (ret) {
+		TFW_ERR("Unsupported HTTP method: '%s %s'\n",
+			cs->name, in_method);
+		return -EINVAL;
+	}
+
+	/* The match operator. */
+	in_op = (char *)ce->vals[1];
+	ret = tfw_cfg_map_enum(tfw_match_enum, in_op, &op);
+	if (ret) {
+		TFW_ERR("Unsupported match OP: '%s %s'\n", cs->name, in_op);
+		return -EINVAL;
+	}
+
+	/* The match string. */
+	arg = (char *)ce->vals[2];
+	len = strlen(arg);
+
+	/* Do not create a duplicate entry in the storage array. */
+	nipdef = tfw_nipdef_lookup_all(method, op, arg, len);
+	if (!nipdef) {
+		nipdef = tfw_nipdef_new(method, op, arg, len);
+		if (nipdef == NULL)
+			return -ENOMEM;
+	} else if (tfw_nipdef_lookup_loc(loc, method, op, arg)) {
+		/* Do not add a duplicate entry within a location. */
+		TFW_WARN("%s: Duplicate entry in location '%s': "
+			 "'%s %s %s %s'\n", cs->name,
+			 loc == &tfw_location_dflt ? "default" : loc->arg,
+			 cs->name, in_method, in_op, arg);
+		return 0;
+	}
+
+	/* Add the entry to the location. */
+	if (!tfw_nipdef_add(loc, nipdef))
+		return -ENOENT;
+
+	return 0;
+}
+
+static int
+tfw_handle_in_nonidempotent(TfwCfgSpec *cs, TfwCfgEntry *ce)
+{
+	return tfw_handle_nonidempotent(cs, ce);
+}
+
+static int
+tfw_handle_out_nonidempotent(TfwCfgSpec *cs, TfwCfgEntry *ce)
+{
+	if (!tfwcfg_this_location)
+		tfwcfg_this_location = &tfw_location_dflt;
+	return tfw_handle_nonidempotent(cs, ce);
+}
+
 /*
  * Find a cache policy directive entry. The entry is looked up
  * in the array that holds all cache policy directives from all
@@ -321,9 +536,9 @@ tfw_capolicy_add(TfwLocation *loc, TfwCaPolicy *capo)
 static int
 tfw_handle_capolicy(TfwCfgSpec *cs, TfwCfgEntry *ce, const short cmd)
 {
-	int i, ret, in_len;
+	int i, ret, len;
 	tfw_match_t op;
-	const char *in_op, *in_arg;
+	const char *in_op, *arg;
 
 	BUG_ON(!tfwcfg_this_location);
 	BUG_ON((cmd != TFW_D_CACHE_BYPASS) && (cmd != TFW_D_CACHE_FULFILL));
@@ -352,17 +567,17 @@ tfw_handle_capolicy(TfwCfgSpec *cs, TfwCfgEntry *ce, const short cmd)
 	for (i = 1; i < ce->val_n; ++i) {
 		TfwCaPolicy *capo;
 
-		in_arg = ce->vals[i];
-		in_len = strlen(in_arg);
+		arg = ce->vals[i];
+		len = strlen(arg);
 
 		/* Get the cache policy entry. */
-		capo = tfw_capolicy_lookup(cmd, op, in_arg, in_len);
+		capo = tfw_capolicy_lookup(cmd, op, arg, len);
 		if (capo) {
 			TFW_WARN("%s: Duplicate entry: '%s %s %s'\n",
-				 cs->name, cs->name, in_op, in_arg);
+				 cs->name, cs->name, in_op, arg);
 			continue;
 		}
-		capo = tfw_capolicy_new(cmd, op, in_arg, in_len);
+		capo = tfw_capolicy_new(cmd, op, arg, len);
 		if (!capo)
 			return -ENOMEM;
 		/* Link the cache policy entry with the location entry. */
@@ -434,17 +649,17 @@ tfw_location_lookup(tfw_match_t op, const char *arg, int len)
 static TfwLocation *
 tfw_location_new(tfw_match_t op, const char *arg, int len)
 {
-	char *argmem;
 	TfwLocation *loc;
-	TfwCaPolicy **capo;
-	size_t size = sizeof(TfwCaPolicy *) * TFW_CAPOLICY_ARRAY_SZ;
+	char *argmem, *data;
+	int size = sizeof(TfwCaPolicy *) * TFW_CAPOLICY_ARRAY_SZ
+		   + sizeof(TfwNipDef *) * TFW_NIPDEF_ARRAY_SZ;
 
 	if (tfw_location_sz == TFW_LOCATION_ARRAY_SZ)
 		return NULL;
 
 	if ((argmem = kmalloc(len + 1, GFP_KERNEL)) == NULL)
 		return NULL;
-	if ((capo = kmalloc(size, GFP_KERNEL)) == NULL) {
+	if ((data = kmalloc(size, GFP_KERNEL)) == NULL) {
 		kfree(argmem);
 		return NULL;
 	}
@@ -453,8 +668,10 @@ tfw_location_new(tfw_match_t op, const char *arg, int len)
 	loc->op = op;
 	loc->arg = argmem;
 	loc->len = len;
-	loc->capo = capo;
+	loc->capo = (TfwCaPolicy **)data;
 	loc->capo_sz = 0;
+	loc->nipdef = (TfwNipDef **)(loc->capo + TFW_CAPOLICY_ARRAY_SZ);
+	loc->nipdef_sz = 0;
 	memcpy((void *)loc->arg, (void *)arg, len + 1);
 
 	return loc;
@@ -467,9 +684,9 @@ tfw_location_new(tfw_match_t op, const char *arg, int len)
 static int
 tfw_begin_location(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
-	int ret, in_len;
+	int ret, len;
 	tfw_match_t op;
-	const char *in_op, *in_arg;
+	const char *in_op, *arg;
 
 	if (ce->attr_n) {
 		TFW_ERR("%s: Arguments may not have the \'=\' sign\n",
@@ -484,29 +701,29 @@ tfw_begin_location(TfwCfgSpec *cs, TfwCfgEntry *ce)
 
 	/* Get the values of the 'location' directive. */
 	in_op = ce->vals[0];	/* Match operator. */
-	in_arg = ce->vals[1];	/* String for the match operator. */
-	in_len = strlen(in_arg);
+	arg = ce->vals[1];	/* String for the match operator. */
+	len = strlen(arg);
 
 	/* Convert the match operator string to the enum value. */
 	ret = tfw_cfg_map_enum(tfw_match_enum, in_op, &op);
 	if (ret) {
 		TFW_ERR("%s: Unknown match OP: '%s %s %s'\n",
-			cs->name, cs->name, in_op, in_arg);
+			cs->name, cs->name, in_op, arg);
 		return -EINVAL;
 	}
 
 	/* Make sure the location is not a duplicate. */
-	if (tfw_location_lookup(op, in_arg, in_len)) {
+	if (tfw_location_lookup(op, arg, len)) {
 		TFW_ERR("%s: Duplicate entry: '%s %s %s'\n",
-			cs->name, cs->name, in_op, in_arg);
+			cs->name, cs->name, in_op, arg);
 		return -EINVAL;
 	}
 
 	/* Add new location and set it to be the current one. */
-	tfwcfg_this_location = tfw_location_new(op, in_arg, in_len);
+	tfwcfg_this_location = tfw_location_new(op, arg, len);
 	if (tfwcfg_this_location == NULL) {
 		TFW_ERR("%s: Unable to add new location: '%s %s %s'\n",
-			cs->name, cs->name, in_op, in_arg);
+			cs->name, cs->name, in_op, arg);
 		return -EINVAL;
 	}
 
@@ -592,7 +809,7 @@ tfw_handle_cache_purge_acl(TfwCfgSpec *cs, TfwCfgEntry *ce)
 		return -EINVAL;
 	}
 	TFW_CFG_ENTRY_FOR_EACH_VAL(ce, i, val) {
-		TfwAddr addr = {};
+		TfwAddr addr = { 0 };
 
 		if (tfw_addr_pton_cidr(val, &addr)) {
 			TFW_ERR("%s: Invalid ACL entry: '%s'\n",
@@ -705,6 +922,10 @@ tfw_cleanup_hdrvia(TfwCfgSpec *cs)
 static int
 tfw_vhost_cfg_start(void)
 {
+	BUILD_BUG_ON(sizeof(tfw_nipdef[0].method)*8-1 < _TFW_HTTP_METH_COUNT);
+	BUILD_BUG_ON(sizeof(tfw_capolicy[0].op)*8-1 < _TFW_HTTP_MATCH_O_COUNT);
+	BUILD_BUG_ON(sizeof(tfw_location[0].op)*8-1 < _TFW_HTTP_MATCH_O_COUNT);
+
 	if (tfw_vhost_dflt.cache_purge && !tfw_vhost_dflt.cache_purge_acl)
 		TFW_WARN("cache_purge directive works only in combination"
 			 " with cache_purge_acl directive.\n");
@@ -734,7 +955,14 @@ static TfwCfgSpec tfw_location_specs[] = {
 		.allow_repeat = true,
 		.cleanup = tfw_cleanup_locache
         },
-        {}
+        {
+		"nonidempotent", NULL,
+		tfw_handle_in_nonidempotent,
+		.allow_none = true,
+		.allow_repeat = true,
+		.cleanup = tfw_cleanup_locache
+        },
+        { 0 }
 };
 
 static TfwCfgSpec tfw_vhost_cfg_specs[] = {
@@ -773,6 +1001,13 @@ static TfwCfgSpec tfw_vhost_cfg_specs[] = {
 		.allow_repeat = true,
 		.cleanup = tfw_cleanup_locache
         },
+        {
+		"nonidempotent", NULL,
+		tfw_handle_out_nonidempotent,
+		.allow_none = true,
+		.allow_repeat = true,
+		.cleanup = tfw_cleanup_locache
+        },
 	{
 		"location", NULL,
 		tfw_cfg_handle_children,
@@ -786,7 +1021,7 @@ static TfwCfgSpec tfw_vhost_cfg_specs[] = {
 		/* .cleanup function in a section with
 		   children causes a BUG_ON in cfg.c. */
 	},
-	{},
+	{ 0 },
 };
 
 TfwCfgMod tfw_vhost_cfg_mod = {
diff --git a/tempesta_fw/vhost.h b/tempesta_fw/vhost.h
index da4314881..566f21882 100644
--- a/tempesta_fw/vhost.h
+++ b/tempesta_fw/vhost.h
@@ -23,6 +23,21 @@
 #include "str.h"
 #include "addr.h"
 
+/*
+ * Non-Idempotent Request definition.
+ *
+ * @method	- One bit for each value defined in tfw_http_meth_t.
+ * @op		- Match operator: eq, prefix, suffix, etc.
+ * @len		- Length of the string in @arg.
+ * @arg		- String for the match operator.
+ */
+typedef struct {
+	unsigned int	method;
+	short		op;
+	unsigned int	len;
+	const char	*arg;
+} TfwNipDef;
+
 /* Cache policy configuration directives. */
 typedef enum {
 	TFW_D_CACHE_BYPASS,
@@ -34,7 +49,7 @@ typedef enum {
  *
  * @cmd	- One of defined in tfw_capo_t.
  * @op	- Match operator: eq, prefix, suffix, etc.
- * @len	- Length of the sting in @arg.
+ * @len	- Length of the string in @arg.
  * @arg	- String for the match operator.
  */
 typedef struct {
@@ -49,16 +64,20 @@ typedef struct {
  *
  * @op		- Match operator: eq, prefix, suffix, etc.
  * @arg		- String for the match operator.
- * @len		- Length of the sting in @arg.
+ * @len		- Length of the string in @arg.
  * @capo_sz	- Size of @capo array.
+ * @nipdef_sz	- Size of @nipdef array.
  * @capo	- Array of pointers to Cache Policy definitions.
+ * @nipdef	- Array of pointers to Non-Idempotent Request definitions.
  */
 typedef struct {
 	short		op;
 	const char	*arg;
 	unsigned int	len;
 	unsigned int	capo_sz;
+	unsigned int	nipdef_sz;
 	TfwCaPolicy	**capo;
+	TfwNipDef	**nipdef;
 } TfwLocation;
 
 /* Cache purge configuration modes. */
@@ -87,6 +106,7 @@ typedef struct {
 	u8		cache_purge_acl:1;
 } TfwVhost;
 
+TfwNipDef *tfw_nipdef_match(TfwLocation *loc, unsigned char meth, TfwStr *arg);
 bool tfw_capuacl_match(TfwVhost *vhost, TfwAddr *addr);
 TfwCaPolicy *tfw_capolicy_match(TfwLocation *loc, TfwStr *arg);
 TfwLocation *tfw_location_match(TfwVhost *vhost, TfwStr *arg);

From 6bd9011b41ae346af35b630f44a09b8352f72d11 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Mon, 5 Dec 2016 16:19:23 +0300
Subject: [PATCH 17/65] Rework the handling of non-idempotent req definitions
 in config. (#419)

---
 tempesta_fw/vhost.c | 146 ++++++++++++++++++++++++--------------------
 1 file changed, 81 insertions(+), 65 deletions(-)

diff --git a/tempesta_fw/vhost.c b/tempesta_fw/vhost.c
index 76743aae2..be0727c25 100644
--- a/tempesta_fw/vhost.c
+++ b/tempesta_fw/vhost.c
@@ -54,14 +54,13 @@ static TfwCaPolicy	tfw_capolicy[TFW_CAPOLICY_ARRAY_SZ];
 static unsigned int	tfw_capolicy_sz = 0;	/* Current size. */
 
 /*
- * All non-idempotent request directives are put into a fixed size
- * array. The directives are deduplicated when put into the array.
+ * Each non-idempotent request definition directive is put into
+ * a separately allocated memory area. The pointers to the memory
+ * are put into a fixed size array of pointers within a location
+ * definition.
  */
 #define TFW_NIPDEF_ARRAY_SZ	(64)
 
-static TfwNipDef	tfw_nipdef[TFW_NIPDEF_ARRAY_SZ];
-static unsigned int	tfw_nipdef_sz = 0;	/* Current size. */
-
 /*
  * All 'location' directives are put into a fixed size array.
  * Duplicate directives are not allowed.
@@ -74,7 +73,7 @@ static unsigned int	tfw_location_sz = 0;	/* Current size. */
 /*
  * Default location is a wildcard location. It matches any URI.
  * It may (or may not) contain a set of cache matching directives,
- * or a set of non-idempotent request definitions.
+ * and/or a set of non-idempotent request definitions.
  */
 static TfwCaPolicy	*tfw_capolicy_dflt[TFW_CAPOLICY_ARRAY_SZ];
 static TfwNipDef	*tfw_nipdef_dflt[TFW_NIPDEF_ARRAY_SZ];
@@ -309,83 +308,78 @@ tfw_vhost_match(TfwStr *arg)
 static TfwLocation *tfwcfg_this_location;
 
 /*
- * Find a non-idempotent request entry in the array that holds
- * all non-idempotent request directives from all location sections.
+ * Find a non-idempotent request definition entry within specified location.
+ * Entries are processed in the order they are defined in the configuration.
+ * That means the matching entry must be the last entry in the array, and it
+ * must have the same match @op and the same @arg.
  */
 static TfwNipDef *
-tfw_nipdef_lookup_all(int method, int op, char *arg, int len)
+tfw_nipdef_lookup(TfwLocation *loc, int op, char *arg, int len)
 {
-	int i;
+	TfwNipDef *nipdef;
 
-	for (i = 0; i < tfw_nipdef_sz; ++i) {
-		TfwNipDef *nipdef = &tfw_nipdef[i];
-		if ((nipdef->method & method)
-		    && (nipdef->op == op)
-		    && (nipdef->len == len)
-		    && !strncasecmp(nipdef->arg, arg, len))
-			return nipdef;
-	}
+	if (!loc->nipdef_sz)
+		return NULL;
+
+	nipdef = loc->nipdef[loc->nipdef_sz - 1];
+	if ((nipdef->op == op) && (nipdef->len == len)
+	    && !strcasecmp(nipdef->arg, arg))
+		return nipdef;
 
 	return NULL;
 }
 
-/*
- * Find a non-idempotent request entry within specified location.
- */
 static TfwNipDef *
-tfw_nipdef_lookup_loc(TfwLocation *loc, int method, int op, char *arg)
+tfw_nipdef_lookup_dup(TfwLocation *loc, int method, int op, char *arg, int len)
 {
 	int i;
+	TfwNipDef *nipdef;
 
-	for (i = 0; i < loc->nipdef_sz; ++i) {
-		TfwNipDef *nipdef = loc->nipdef[i];
-		if ((nipdef->method & method)
-		    && (nipdef->op == op)
+	if (!loc->nipdef_sz)
+		return NULL;
+
+	/* Check all entries but the last one. */
+	for (i = 0; i < loc->nipdef_sz - 1; ++i) {
+		nipdef = loc->nipdef[i];
+		if ((nipdef->op == op) && (nipdef->len == len)
 		    && !strcasecmp(nipdef->arg, arg))
 			return nipdef;
 	}
+	/* Check the last entry. */
+	nipdef = loc->nipdef[i];
+	if ((nipdef->method & method) && (nipdef->op == op)
+	    && (nipdef->len == len) && !strcasecmp(nipdef->arg, arg))
+		return nipdef;
 
 	return NULL;
 }
 
 /*
- * Create and initialize a new non-idempotent request entry. The entry
- * is placed in the array for all non-idempotent request entries from
- * all location sections.
+ * Create and initialize a new non-idempotent request definition entry,
+ * and add it to the given location structure. The entry is added as
+ * a pointer to the memory allocated to hold the definition.
  */
 static TfwNipDef *
-tfw_nipdef_new(int method, int op, char *arg, int len)
+tfw_nipdef_addnew(TfwLocation *loc, int method, int op, char *arg, int len)
 {
-	char *argmem;
+	char *data;
 	TfwNipDef *nipdef;
 
-	if (tfw_nipdef_sz == TFW_NIPDEF_ARRAY_SZ)
+	if (loc->nipdef_sz == TFW_NIPDEF_ARRAY_SZ)
 		return NULL;
 
-	if ((argmem = kmalloc(len + 1, GFP_KERNEL)) == NULL)
+	if ((data = kmalloc(sizeof(TfwNipDef) + len + 1, GFP_KERNEL)) == NULL)
 		return NULL;
 
-	nipdef = &tfw_nipdef[tfw_nipdef_sz++];
+	nipdef = (TfwNipDef *)data;
 	nipdef->method = method;
 	nipdef->op = op;
-	nipdef->arg = argmem;
+	nipdef->arg = data + sizeof(TfwNipDef);
 	nipdef->len = len;
 	memcpy((void *)nipdef->arg, (void *)arg, len + 1);
 
-	return nipdef;
-}
-
-/*
- * Add a new non-idempotent request entry to the given location structure.
- * The entry is added as a pointer into the array for all non-idempotent
- * request entries.
- */
-static TfwNipDef *
-tfw_nipdef_add(TfwLocation *loc, TfwNipDef *nipdef)
-{
-	if (loc->nipdef_sz == TFW_NIPDEF_ARRAY_SZ)
-		return NULL;
 	loc->nipdef[loc->nipdef_sz++] = nipdef;
+
 	return nipdef;
 }
 
@@ -430,24 +424,29 @@ tfw_handle_nonidempotent(TfwCfgSpec *cs, TfwCfgEntry *ce)
 	arg = (char *)ce->vals[2];
 	len = strlen(arg);
 
-	/* Do not create a duplicate entry in the storage array. */
-	nipdef = tfw_nipdef_lookup_all(method, op, arg, len);
-	if (!nipdef) {
-		nipdef = tfw_nipdef_new(method, op, arg, len);
-		if (nipdef == NULL)
-			return -ENOMEM;
-	} else if (tfw_nipdef_lookup_loc(loc, method, op, arg)) {
-		/* Do not add a duplicate entry within a location. */
+	/*
+	 * Issue a warning if there's an entry with the same argument
+	 * (URI path) that is not the last entry.
+	 */
+	if (tfw_nipdef_lookup_dup(loc, method, op, arg, len))
 		TFW_WARN("%s: Duplicate entry in location '%s': "
 			 "'%s %s %s %s'\n", cs->name,
 			 loc == &tfw_location_dflt ? "default" : loc->arg,
 			 cs->name, in_method, in_op, arg);
-		return 0;
-	}
 
-	/* Add the entry to the location. */
-	if (!tfw_nipdef_add(loc, nipdef))
-		return -ENOENT;
+	/*
+	 * Do not add a "duplicate" entry within a location. If the
+	 * preceding entry has the same @op and @arg, then just add
+	 * the new method to the entry.
+	 */
+	nipdef = tfw_nipdef_lookup(loc, op, arg, len);
+	if (nipdef) {
+		nipdef->method |= method;
+	} else {
+		nipdef = tfw_nipdef_addnew(loc, method, op, arg, len);
+		if (nipdef == NULL)
+			return -ENOMEM;
+	}
 
 	return 0;
 }
@@ -748,7 +747,7 @@ tfw_finish_location(TfwCfgSpec *cs)
 static void
 __tfw_cleanup_locache(void)
 {
-	int i;
+	int i, k;
 
 	for (i = 0; i < tfw_location_sz; ++i) {
 		TfwLocation *loc = &tfw_location[i];
@@ -756,6 +755,11 @@ __tfw_cleanup_locache(void)
 			kfree(loc->arg);
 			loc->arg = NULL;
 		}
+		for (k = 0; k < loc->nipdef_sz; ++k) {
+			if (loc->nipdef[k])
+				kfree(loc->nipdef[k]);
+		}
+		/* Free both loc->capo and loc->nipdef. */
 		if (loc->capo) {
 			kfree(loc->capo);
 			loc->capo = NULL;
@@ -768,6 +772,14 @@ __tfw_cleanup_locache(void)
 			capo->arg = NULL;
 		}
 	}
+	for (i = 0; i < tfw_location_dflt.nipdef_sz; ++i) {
+		if (tfw_location_dflt.nipdef[i])
+			kfree(tfw_location_dflt.nipdef[i]);
+	}
+	tfw_capolicy_sz = 0;
+	tfw_location_sz = 0;
+	tfw_location_dflt.capo_sz = 0;
+	tfw_location_dflt.nipdef_sz = 0;
 }
 
 static void
@@ -922,14 +934,18 @@ tfw_cleanup_hdrvia(TfwCfgSpec *cs)
 static int
 tfw_vhost_cfg_start(void)
 {
-	BUILD_BUG_ON(sizeof(tfw_nipdef[0].method)*8-1 < _TFW_HTTP_METH_COUNT);
-	BUILD_BUG_ON(sizeof(tfw_capolicy[0].op)*8-1 < _TFW_HTTP_MATCH_O_COUNT);
-	BUILD_BUG_ON(sizeof(tfw_location[0].op)*8-1 < _TFW_HTTP_MATCH_O_COUNT);
+	BUILD_BUG_ON(sizeof(tfw_nipdef_dflt[0]->method) * 8 - 1
+		     < _TFW_HTTP_METH_COUNT);
+	BUILD_BUG_ON(sizeof(tfw_capolicy_dflt[0]->op) * 8 - 1
+		     < _TFW_HTTP_MATCH_O_COUNT);
+	BUILD_BUG_ON(sizeof(tfw_location_dflt.op) * 8 - 1
+		     < _TFW_HTTP_MATCH_O_COUNT);
 
 	if (tfw_vhost_dflt.cache_purge && !tfw_vhost_dflt.cache_purge_acl)
 		TFW_WARN("cache_purge directive works only in combination"
 			 " with cache_purge_acl directive.\n");
 	tfw_vhost_dflt.loc_sz = tfw_location_sz;
+
 	return 0;
 }
 

From d113a32d40e389cae48e55503125a97275a97266 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Sat, 10 Dec 2016 13:18:05 +0300
Subject: [PATCH 18/65] Fix a number of rebase glitches. (#419)

---
 tempesta_fw/cache.c                   | 10 ++++----
 tempesta_fw/http.c                    | 37 ++++++++++++---------------
 tempesta_fw/http.h                    |  8 +++---
 tempesta_fw/http_msg.c                |  4 ++-
 tempesta_fw/sock_srv.c                | 13 +++++-----
 tempesta_fw/t/unit/test_http_sticky.c |  2 +-
 6 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/tempesta_fw/cache.c b/tempesta_fw/cache.c
index 04f5e135f..129fd21be 100644
--- a/tempesta_fw/cache.c
+++ b/tempesta_fw/cache.c
@@ -935,12 +935,12 @@ tfw_cache_purge_method(TfwHttpReq *req)
 
 	/* Deny PURGE requests by default. */
 	if (!(cache_cfg.cache && vhost->cache_purge && vhost->cache_purge_acl))
-		return tfw_http_send_403(req, "unconfigured purge request");
+		return tfw_http_send_403(req);
 
 	/* Accept requests from configured hosts only. */
 	ss_getpeername(req->conn->sk, &saddr);
 	if (!tfw_capuacl_match(vhost, &saddr))
-		return tfw_http_send_403(req, "purge request ACL violation");
+		return tfw_http_send_403(req);
 
 	/* Only "invalidate" option is implemented at this time. */
 	switch (vhost->cache_purge_mode) {
@@ -948,11 +948,11 @@ tfw_cache_purge_method(TfwHttpReq *req)
 		ret = tfw_cache_purge_invalidate(req);
 		break;
 	default:
-		return tfw_http_send_403(req, "bad purge option");
+		return tfw_http_send_403(req);
 	}
 
 	return ret
-		? tfw_http_send_404(req, "purge error")
+		? tfw_http_send_404(req)
 		: tfw_http_send_200(req);
 }
 
@@ -1207,7 +1207,7 @@ cache_req_process_node(TfwHttpReq *req, tfw_http_cache_cb_t action)
 		resp->flags |= TFW_HTTP_RESP_STALE;
 out:
 	if (!resp && (req->cache_ctl.flags & TFW_HTTP_CC_OIFCACHED))
-		tfw_http_send_504(req, "resource not cached");
+		tfw_http_send_504(req);
 	else
 		action(req, resp);
 
diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index b3524081f..12dba2110 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -253,7 +253,7 @@ tfw_http_send_200(TfwHttpReq *req)
  * HTTP 403 response: Access is forbidden.
  */
 int
-tfw_http_send_403(TfwHttpReq *req, const char *reason)
+tfw_http_send_403(TfwHttpReq *req)
 {
 	TfwStr rh = {
 		.ptr = (TfwStr []){
@@ -266,7 +266,7 @@ tfw_http_send_403(TfwHttpReq *req, const char *reason)
 		.flags = 4 << TFW_STR_CN_SHIFT
 	};
 
-	TFW_DBG("Send HTTP 404 response: %s\n", reason);
+	TFW_DBG("Send HTTP 403 response\n");
 
 	return tfw_http_send_resp(req, &rh, __TFW_STR_CH(&rh, 1));
 }
@@ -277,7 +277,7 @@ tfw_http_send_403(TfwHttpReq *req, const char *reason)
  * HTTP 404 response: Tempesta is unable to find the requested data.
  */
 int
-tfw_http_send_404(TfwHttpReq *req, const char *reason)
+tfw_http_send_404(TfwHttpReq *req)
 {
 	TfwStr rh = {
 		.ptr = (TfwStr []){
@@ -302,7 +302,7 @@ tfw_http_send_404(TfwHttpReq *req, const char *reason)
  * the request to a server.
  */
 static int
-tfw_http_send_500(TfwHttpReq *req, const char *reason)
+tfw_http_send_500(TfwHttpReq *req)
 {
 	TfwStr rh = {
 		.ptr = (TfwStr []){
@@ -315,7 +315,7 @@ tfw_http_send_500(TfwHttpReq *req, const char *reason)
 		.flags = 4 << TFW_STR_CN_SHIFT
 	};
 
-	TFW_DBG("Send HTTP 500 response: %s\n", reason);
+	TFW_DBG("Send HTTP 500 response\n");
 
 	return tfw_http_send_resp(req, &rh, __TFW_STR_CH(&rh, 1));
 }
@@ -327,7 +327,7 @@ tfw_http_send_500(TfwHttpReq *req, const char *reason)
  * the designated server.
  */
 int
-tfw_http_send_502(TfwHttpReq *req, const char *reason)
+tfw_http_send_502(TfwHttpReq *req)
 {
 	TfwStr rh = {
 		.ptr = (TfwStr []){
@@ -340,7 +340,7 @@ tfw_http_send_502(TfwHttpReq *req, const char *reason)
 		.flags = 4 << TFW_STR_CN_SHIFT
 	};
 
-	TFW_DBG("Send HTTP 502 response: %s\n", reason);
+	TFW_DBG("Send HTTP 502 response\n");
 
 	return tfw_http_send_resp(req, &rh, __TFW_STR_CH(&rh, 1));
 }
@@ -352,7 +352,7 @@ tfw_http_send_502(TfwHttpReq *req, const char *reason)
  * the designated server.
  */
 int
-tfw_http_send_504(TfwHttpReq *req, const char *reason)
+tfw_http_send_504(TfwHttpReq *req)
 {
 	TfwStr rh = {
 		.ptr = (TfwStr []){
@@ -365,7 +365,7 @@ tfw_http_send_504(TfwHttpReq *req, const char *reason)
 		.flags = 4 << TFW_STR_CN_SHIFT
 	};
 
-	TFW_DBG("Send HTTP 504 response: %s\n", reason);
+	TFW_DBG("Send HTTP 504 response\n");
 
 	return tfw_http_send_resp(req, &rh, __TFW_STR_CH(&rh, 1));
 }
@@ -776,7 +776,6 @@ tfw_http_conn_msg_alloc(TfwConnection *conn)
 	tfw_connection_get(conn);
 
 	if (TFW_CONN_TYPE(conn) & Conn_Clnt) {
-		INIT_LIST_HEAD(&((TfwHttpReq *)hm)->nip_list);
 		TFW_INC_STAT_BH(clnt.rx_messages);
 	} else {
 		TfwHttpReq *req;
@@ -1387,7 +1386,7 @@ tfw_http_req_cache_service(TfwHttpReq *req, TfwHttpResp *resp)
 	TFW_INC_STAT_BH(clnt.msgs_fromcache);
 	return;
 resp_err:
-	tfw_http_send_500(req, "cannot send response from cache");
+	tfw_http_send_500(req);
 	TFW_INC_STAT_BH(clnt.msgs_otherr);
 	return;
 }
@@ -1452,12 +1451,12 @@ tfw_http_req_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)
 	if (tfw_http_adjust_req(req))
 		goto send_500;
 
-	/* Send request to the server. */
+	/* Forward request to the server. */
 	tfw_http_req_fwd(srv_conn, req);
 	goto conn_put;
 
 send_502:
-	tfw_http_send_502(req, "request proxy error");
+	tfw_http_send_502(req);
 	TFW_INC_STAT_BH(clnt.msgs_otherr);
 	return;
 send_500:
@@ -1726,7 +1725,7 @@ tfw_http_req_process(TfwConnection *conn, struct sk_buff *skb, unsigned int off)
 		 * Otherwise we lose the reference to it and get a leak.
 		 */
 		if (tfw_cache_process(req, NULL, tfw_http_req_cache_cb)) {
-			tfw_http_send_500(req, "request cache error");
+			tfw_http_send_500(req);
 			TFW_INC_STAT_BH(clnt.msgs_otherr);
 			return TFW_PASS;
 		}
@@ -1823,8 +1822,8 @@ tfw_http_popreq(TfwHttpMsg *hmresp)
 		BUG_ON(atomic_read(&srv_conn->qsize));
 		spin_unlock(&srv_conn->msg_qlock);
 		/* @conn->msg will get NULLed in the process. */
-		TFW_WARN("Paired request missing\n");
-		TFW_WARN("Possible HTTP Response Splitting attack.\n");
+		TFW_WARN("Paired request missing, "
+			 "HTTP Response Splitting attack?\n");
 		tfw_http_conn_msg_free(hmresp);
 		TFW_INC_STAT_BH(serv.msgs_otherr);
 		return NULL;
@@ -1889,9 +1888,8 @@ tfw_http_resp_gfsm(TfwHttpMsg *hmresp, struct sk_buff *skb, unsigned int off)
 		return TFW_BLOCK;
 	}
 
-	tfw_http_send_502(req, "response filtered");
+	tfw_http_send_502(req);
 	tfw_http_conn_msg_free(hmresp);
-	tfw_http_conn_msg_free((TfwHttpMsg *)req);
 	TFW_INC_STAT_BH(serv.msgs_filtout);
 	return r;
 }
@@ -1938,9 +1936,8 @@ tfw_http_resp_cache(TfwHttpMsg *hmresp)
 	if (tfw_cache_process(req, (TfwHttpResp *)hmresp,
 			      tfw_http_resp_cache_cb))
 	{
-		tfw_http_send_500(req, "response cache error");
+		tfw_http_send_500(req);
 		tfw_http_conn_msg_free(hmresp);
-		tfw_http_conn_msg_free((TfwHttpMsg *)req);
 		TFW_INC_STAT_BH(serv.msgs_otherr);
 		/* Proceed with processing of the next response. */
 	}
diff --git a/tempesta_fw/http.h b/tempesta_fw/http.h
index 40c7a825e..6f95abea6 100644
--- a/tempesta_fw/http.h
+++ b/tempesta_fw/http.h
@@ -455,10 +455,10 @@ void tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp);
  */
 int tfw_http_send_200(TfwHttpReq *req);
 int tfw_http_prep_302(TfwHttpMsg *resp, TfwHttpReq *req, TfwStr *cookie);
-int tfw_http_send_403(TfwHttpReq *req, const char *reason);
-int tfw_http_send_404(TfwHttpReq *req, const char *reason);
-int tfw_http_send_502(TfwHttpReq *req, const char *reason);
-int tfw_http_send_504(TfwHttpReq *req, const char *reason);
+int tfw_http_send_403(TfwHttpReq *req);
+int tfw_http_send_404(TfwHttpReq *req);
+int tfw_http_send_502(TfwHttpReq *req);
+int tfw_http_send_504(TfwHttpReq *req);
 
 /*
  * Functions to create SKBs with data stream.
diff --git a/tempesta_fw/http_msg.c b/tempesta_fw/http_msg.c
index e08a181e9..176b802e5 100644
--- a/tempesta_fw/http_msg.c
+++ b/tempesta_fw/http_msg.c
@@ -881,8 +881,10 @@ tfw_http_msg_alloc(int type)
 	else
 		__hbh_parser_init_resp((TfwHttpResp *)hm);
 
-	if (type & Conn_Clnt)
+	if (type & Conn_Clnt) {
+		INIT_LIST_HEAD(&((TfwHttpReq *)hm)->nip_list);
 		hm->destructor = tfw_http_req_destruct;
+	}
 
 	return hm;
 }
diff --git a/tempesta_fw/sock_srv.c b/tempesta_fw/sock_srv.c
index efb3abe3e..1503ef1d0 100644
--- a/tempesta_fw/sock_srv.c
+++ b/tempesta_fw/sock_srv.c
@@ -195,8 +195,8 @@ tfw_sock_srv_connect_try_later(TfwSrvConnection *srv_conn)
 {
 	/*
 	 * Timeout between connect attempts is increased with each
-	 * unsuccessful attempt. Length of the timeout is decided with
-	 * a variant of exponential backoff delay algorithm.
+	 * unsuccessful attempt. Length of the timeout is decided
+	 * with a variant of exponential backoff delay algorithm.
 	 *
 	 * It's essential that the new connection is established and the
 	 * failed connection is restored ASAP, so the min retry interval
@@ -677,11 +677,12 @@ tfw_handle_out_conn_tries(TfwCfgSpec *cs, TfwCfgEntry *ce)
 	return tfw_handle_opt_val(cs, ce, &tfw_cfg_out_retry_attempts);
 }
 
+static int
 tfw_cfg_set_conn_tries(TfwServer *srv, int attempts)
 {
 	TfwSrvConnection *srv_conn;
 
-	list_for_each_entry(srv_conn, &srv->conn_list, conn.list) {
+	list_for_each_entry(srv_conn, &srv->conn_list, conn.list)
 		srv_conn->max_attempts = attempts;
 
 	return 0;
@@ -813,7 +814,7 @@ tfw_handle_out_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
 	if (!(srv = tfw_handle_server(cs, ce)))
 		return -EINVAL;
 
-	tfw_cfg_set_conn_tries(srv, tfw_cfg_out_retry_attempts)
+	tfw_cfg_set_conn_tries(srv, tfw_cfg_out_retry_attempts);
 	srv->qsize_max = tfw_cfg_out_queue_size ? : UINT_MAX;
 	srv->qjtimeout = tfw_cfg_out_send_timeout
 		       ? msecs_to_jiffies(tfw_cfg_out_send_timeout * 1000)
@@ -895,10 +896,10 @@ tfw_finish_srv_group(TfwCfgSpec *cs)
 	TFW_DBG("finish srv_group: %s\n", tfw_cfg_curr_group->name);
 
 	for (i = 0; i < tfw_cfg_in_lstsz; ++i) {
+		TfwServer *srv = tfw_cfg_in_lst[i];
 		unsigned long jtmout =
 			msecs_to_jiffies(tfw_cfg_in_send_timeout * 1000);
-		tfw_cfg_set_conn_tries(tfw_cfg_in_lst[i],
-				       tfw_cfg_in_retry_attempts);
+		tfw_cfg_set_conn_tries(srv, tfw_cfg_in_retry_attempts);
 		srv->qsize_max = tfw_cfg_in_queue_size ? : UINT_MAX;
 		srv->qjtimeout = tfw_cfg_in_send_timeout ? jtmout : ULONG_MAX;
 		srv->retry_max = tfw_cfg_in_send_tries ? : UINT_MAX;
diff --git a/tempesta_fw/t/unit/test_http_sticky.c b/tempesta_fw/t/unit/test_http_sticky.c
index a914d0846..05899de9b 100644
--- a/tempesta_fw/t/unit/test_http_sticky.c
+++ b/tempesta_fw/t/unit/test_http_sticky.c
@@ -266,7 +266,7 @@ TEST(http_sticky, sending_502)
 	StickyVal sv = { .ts = 1 };
 
 	EXPECT_EQ(__sticky_calc(mock.req, &sv), 0);
-	EXPECT_EQ(tfw_http_send_502(mock.req, __func__), 0);
+	EXPECT_EQ(tfw_http_send_502(mock.req), 0);
 
 	/* HTTP 502 response have no Set-Cookie header */
 	EXPECT_TRUE(mock.tfw_connection_send_was_called);

From 7479c7d3dbc080ae85bcf391f7d9727d48305484 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Sat, 10 Dec 2016 13:22:01 +0300
Subject: [PATCH 19/65] Fix sticky cookie unit tests. (#419)

---
 tempesta_fw/t/unit/helpers.c          |  2 ++
 tempesta_fw/t/unit/test_http_sticky.c | 35 +++++++++++++++++----------
 2 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/tempesta_fw/t/unit/helpers.c b/tempesta_fw/t/unit/helpers.c
index 5d22cff02..8c2cf31c4 100644
--- a/tempesta_fw/t/unit/helpers.c
+++ b/tempesta_fw/t/unit/helpers.c
@@ -50,6 +50,7 @@ test_req_alloc(size_t data_len)
 	BUG_ON(!req);
 
 	memset(&conn_req, 0, sizeof(TfwConnection));
+	tfw_connection_init(&conn_req);
 	conn_req.proto.type = Conn_HttpClnt;
 	req->conn = &conn_req;
 
@@ -77,6 +78,7 @@ test_resp_alloc(size_t data_len)
 	BUG_ON(!resp);
 
 	memset(&conn_resp, 0, sizeof(TfwConnection));
+	tfw_connection_init(&conn_req);
 	conn_resp.proto.type = Conn_HttpSrv;
 	resp->conn = &conn_resp;
 
diff --git a/tempesta_fw/t/unit/test_http_sticky.c b/tempesta_fw/t/unit/test_http_sticky.c
index 05899de9b..fe5d621b6 100644
--- a/tempesta_fw/t/unit/test_http_sticky.c
+++ b/tempesta_fw/t/unit/test_http_sticky.c
@@ -81,7 +81,8 @@ static struct {
 
 	TfwHttpReq	*req;
 	TfwHttpResp	*resp;
-	TfwConnection   connection;
+	TfwConnection   conn_req;
+	TfwConnection   conn_resp;
 	TfwClient	client;
 	struct sock	sock;
 } mock;
@@ -209,19 +210,26 @@ http_sticky_suite_setup(void)
 	skb_reserve(skb, MAX_TCP_HEADER);
 	ss_skb_queue_tail(&mock.resp->msg.skb_list, skb);
 
-	mock.req->conn = &mock.connection;
-	mock.resp->conn = &mock.connection;
-	mock.connection.peer = (TfwPeer *)&mock.client;
-	mock.connection.sk = &mock.sock;
+	tfw_connection_init(&mock.conn_req);
+	tfw_connection_init(&mock.conn_resp);
+
+	tfw_connection_revive(&mock.conn_req);
+	mock.conn_req.peer = (TfwPeer *)&mock.client;
 	mock.sock.sk_family = AF_INET;
+	mock.conn_req.sk = &mock.sock;
+
+	mock.req->conn = &mock.conn_req;
+	mock.resp->conn = &mock.conn_resp;
+	mock.req->vhost = tfw_vhost_get_default();
+
+	tfw_http_req_add_seq_queue(mock.req);
+	mock.req->resp = (TfwHttpMsg *)mock.resp;
 }
 
 static void
 http_sticky_suite_teardown(void)
 {
-	tfw_http_msg_free((TfwHttpMsg *)mock.req);
-	tfw_http_msg_free((TfwHttpMsg *)mock.resp);
-
+	tfw_connection_put(mock.req->conn);
 	memset(&mock, 0, sizeof(mock));
 }
 
@@ -242,8 +250,9 @@ TEST(http_sticky, sending_302)
 	{
 		StickyVal sv = { .ts = 1 };
 
-		/* Need host header and
-		 *it must be compound as special header
+		/*
+		 * Need host header.
+		 * It must be compound as a special header.
 		 */
 		TFW_STR2(hdr1, "Host: ", "localhost");
 
@@ -413,7 +422,7 @@ TEST(http_sticky, req_no_cookie)
 
 	/* since response was modified, we need to parse it again */
 	EXPECT_EQ(http_parse_resp_helper(), 0);
-	tfw_connection_send(&mock.connection, &mock.resp->msg);
+	tfw_connection_send(&mock.conn_req, &mock.resp->msg);
 
 	EXPECT_TRUE(mock.tfw_connection_send_was_called);
 	EXPECT_TRUE(mock.seen_set_cookie_header);
@@ -452,7 +461,7 @@ TEST(http_sticky, req_have_cookie)
 
 	/* since response could be modified, we need to parse it again */
 	EXPECT_EQ(http_parse_resp_helper(), 0);
-	tfw_connection_send(&mock.connection, &mock.resp->msg);
+	tfw_connection_send(&mock.conn_req, &mock.resp->msg);
 
 	/* no Set-Cookie headers are expected */
 	EXPECT_FALSE(mock.seen_set_cookie_header);
@@ -508,7 +517,7 @@ TEST(http_sticky, req_have_cookie_enforce)
 
 	/* since response could be modified, we need to parse it again */
 	EXPECT_EQ(http_parse_resp_helper(), 0);
-	tfw_connection_send(&mock.connection, &mock.resp->msg);
+	tfw_connection_send(&mock.conn_req, &mock.resp->msg);
 
 	/* no Set-Cookie headers are expected */
 	EXPECT_FALSE(mock.seen_set_cookie_header);

From 8b4b52655dde8716841844151d236066d157f7a8 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Sat, 10 Dec 2016 18:04:52 +0300
Subject: [PATCH 20/65] Fix schedulers unit tests. (#419)

---
 tempesta_fw/t/unit/sched_helper.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tempesta_fw/t/unit/sched_helper.c b/tempesta_fw/t/unit/sched_helper.c
index edc0d6b61..842e9163f 100644
--- a/tempesta_fw/t/unit/sched_helper.c
+++ b/tempesta_fw/t/unit/sched_helper.c
@@ -105,6 +105,7 @@ test_create_srv(const char *in_addr, TfwSrvGroup *sg)
 	srv = tfw_server_create(&addr);
 	BUG_ON(!srv);
 
+	srv->qsize_max = 100;
 	tfw_sg_add(sg, srv);
 
 	return srv;
@@ -123,8 +124,8 @@ test_create_conn(TfwPeer *peer)
 	if (!tfw_srv_conn_cache)
 		tfw_sock_srv_init();
 	srv_conn = tfw_srv_conn_alloc();
-
 	BUG_ON(!srv_conn);
+
 	tfw_connection_link_peer(&srv_conn->conn, peer);
 	srv_conn->conn.sk = &__test_sock;
 	/* A connection is skipped by schedulers if (refcnt <= 0). */

From 8c9afa43775b0d0eb94c4493c4645131c0ef9aa4 Mon Sep 17 00:00:00 2001
From: Alexander K <ak@natsys-lab.com>
Date: Tue, 27 Dec 2016 14:04:41 +0300
Subject: [PATCH 21/65] Port to 4.8.15

---
 tempesta_fw/cfg.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tempesta_fw/cfg.c b/tempesta_fw/cfg.c
index 35c828bdb..ed13e0595 100644
--- a/tempesta_fw/cfg.c
+++ b/tempesta_fw/cfg.c
@@ -88,6 +88,7 @@
 #include <linux/kernel.h>
 #include <linux/moduleparam.h>
 #include <linux/vmalloc.h>
+#include <net/net_namespace.h> /* for sysctl */
 
 #include "addr.h"
 #include "cfg.h"

From dbe3adf335c0d61b160523ff841e63cf6b594b43 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Mon, 2 Jan 2017 18:17:56 +0300
Subject: [PATCH 22/65] More careful handling of various data types in
 vhost.[ch]

---
 tempesta_fw/vhost.c | 91 ++++++++++++++++++++++++---------------------
 tempesta_fw/vhost.h | 20 +++++-----
 2 files changed, 58 insertions(+), 53 deletions(-)

diff --git a/tempesta_fw/vhost.c b/tempesta_fw/vhost.c
index be0727c25..bfa4cc65a 100644
--- a/tempesta_fw/vhost.c
+++ b/tempesta_fw/vhost.c
@@ -24,7 +24,7 @@
 #include "str.h"
 
 /* Mappings for match operators. */
-static const TfwCfgEnum const __read_mostly tfw_match_enum[] = {
+static TfwCfgEnum const __read_mostly tfw_match_enum[] = {
 	{ "*",		TFW_HTTP_MATCH_O_WILDCARD },
 	{ "eq",		TFW_HTTP_MATCH_O_EQ },
 	{ "prefix",	TFW_HTTP_MATCH_O_PREFIX },
@@ -33,7 +33,7 @@ static const TfwCfgEnum const __read_mostly tfw_match_enum[] = {
 };
 
 /* Mappings for HTTP request methods. */
-static const TfwCfgEnum const __read_mostly tfw_method_enum[] = {
+static TfwCfgEnum const __read_mostly tfw_method_enum[] = {
 	{ "*",		UINT_MAX },
 	{ "GET",	1 << TFW_HTTP_METH_GET },
 	{ "HEAD",	1 << TFW_HTTP_METH_HEAD },
@@ -51,7 +51,7 @@ static const TfwCfgEnum const __read_mostly tfw_method_enum[] = {
 #define TFW_CAPOLICY_ARRAY_SZ	(64)
 
 static TfwCaPolicy	tfw_capolicy[TFW_CAPOLICY_ARRAY_SZ];
-static unsigned int	tfw_capolicy_sz = 0;	/* Current size. */
+static size_t		tfw_capolicy_sz = 0;	/* Current size. */
 
 /*
  * Each non-idempotent request definition directive is put into
@@ -68,7 +68,7 @@ static unsigned int	tfw_capolicy_sz = 0;	/* Current size. */
 #define TFW_LOCATION_ARRAY_SZ	(64)
 
 static TfwLocation	tfw_location[TFW_LOCATION_ARRAY_SZ];
-static unsigned int	tfw_location_sz = 0;	/* Current size. */
+static size_t		tfw_location_sz = 0;	/* Current size. */
 
 /*
  * Default location is a wildcard location. It matches any URI.
@@ -105,7 +105,7 @@ static TfwAddr		tfw_capuacl[TFW_CAPUACL_ARRAY_SZ];
  * Note that @loc_dflt in the default vhost serves as global
  * default caching policy.
  */
-static const char __read_mostly s_hdr_via_dflt[] =
+static char const __read_mostly s_hdr_via_dflt[] =
 	"tempesta_fw" " (" TFW_NAME " " TFW_VERSION ")";
 
 static TfwVhost		tfw_vhost_dflt = {
@@ -127,7 +127,7 @@ static TfwVhost		tfw_vhost_dflt = {
 bool
 tfw_capuacl_match(TfwVhost *vhost, TfwAddr *addr)
 {
-	int i;
+	size_t i;
 	struct in6_addr *inaddr = &addr->v6.sin6_addr;
 
 	for (i = 0; i < vhost->capuacl_sz; ++i) {
@@ -145,36 +145,36 @@ tfw_capuacl_match(TfwVhost *vhost, TfwAddr *addr)
  * The functions are generic.
  */
 static bool
-__tfw_match_wildcard(tfw_match_t op, const char *cstr, int len, TfwStr *arg)
+__tfw_match_wildcard(tfw_match_t op, const char *cstr, size_t len, TfwStr *arg)
 {
 	return ((op == TFW_HTTP_MATCH_O_WILDCARD)
 		&& (len == 1) && (*cstr == '*'));
 }
 
 static bool
-__tfw_match_suffix(tfw_match_t op, const char *cstr, int len, TfwStr *arg)
+__tfw_match_suffix(tfw_match_t op, const char *cstr, size_t len, TfwStr *arg)
 {
 	tfw_str_eq_flags_t flags = TFW_STR_EQ_DEFAULT | TFW_STR_EQ_CASEI;
 	return tfw_str_eq_cstr_off(arg, arg->len - len, cstr, len, flags);
 }
 
 static bool
-__tfw_match_eq(tfw_match_t op, const char *cstr, int len, TfwStr *arg)
+__tfw_match_eq(tfw_match_t op, const char *cstr, size_t len, TfwStr *arg)
 {
 	tfw_str_eq_flags_t flags = TFW_STR_EQ_DEFAULT | TFW_STR_EQ_CASEI;
 	return tfw_str_eq_cstr(arg, cstr, len, flags);
 }
 
 static bool
-__tfw_match_prefix(tfw_match_t op, const char *cstr, int len, TfwStr *arg)
+__tfw_match_prefix(tfw_match_t op, const char *cstr, size_t len, TfwStr *arg)
 {
 	tfw_str_eq_flags_t flags = TFW_STR_EQ_PREFIX | TFW_STR_EQ_CASEI;
 	return tfw_str_eq_cstr(arg, cstr, len, flags);
 }
 
-typedef bool (*__tfw_match_fn)(tfw_match_t, const char *, int, TfwStr *);
+typedef bool (*__tfw_match_fn)(tfw_match_t, const char *, size_t, TfwStr *);
 
-static const __tfw_match_fn const __read_mostly __tfw_match_fn_tbl[] = {
+static __tfw_match_fn const __read_mostly __tfw_match_fn_tbl[] = {
 	[0 ... _TFW_HTTP_MATCH_O_COUNT] = NULL,
 	[TFW_HTTP_MATCH_O_WILDCARD]	= __tfw_match_wildcard,
 	[TFW_HTTP_MATCH_O_EQ]		= __tfw_match_eq,
@@ -200,7 +200,7 @@ __tfw_nipdef_match_fn(TfwNipDef *nipdef, TfwStr *arg)
 TfwNipDef *
 tfw_nipdef_match(TfwLocation *loc, unsigned char method, TfwStr *arg)
 {
-	int i;
+	size_t i;
 
 	BUG_ON(!loc);
 	BUG_ON(!arg);
@@ -232,7 +232,7 @@ __tfw_capolicy_match_fn(TfwCaPolicy *capo, TfwStr *arg)
 TfwCaPolicy *
 tfw_capolicy_match(TfwLocation *loc, TfwStr *arg)
 {
-	int i;
+	size_t i;
 
 	BUG_ON(!loc);
 	BUG_ON(!arg);
@@ -262,7 +262,7 @@ __tfw_location_match(TfwLocation *loc, TfwStr *arg)
 TfwLocation *
 tfw_location_match(TfwVhost *vhost, TfwStr *arg)
 {
-	int i;
+	size_t i;
 
 	BUG_ON(!vhost);
 	BUG_ON(!arg);
@@ -314,7 +314,7 @@ static TfwLocation *tfwcfg_this_location;
  * must have the same match @op and the same @arg.
  */
 static TfwNipDef *
-tfw_nipdef_lookup(TfwLocation *loc, int op, char *arg, int len)
+tfw_nipdef_lookup(TfwLocation *loc, int op, const char *arg, size_t len)
 {
 	TfwNipDef *nipdef;
 
@@ -330,9 +330,10 @@ tfw_nipdef_lookup(TfwLocation *loc, int op, char *arg, int len)
 }
 
 static TfwNipDef *
-tfw_nipdef_lookup_dup(TfwLocation *loc, int method, int op, char *arg, int len)
+tfw_nipdef_lookup_dup(TfwLocation *loc, int method,
+		      int op, const char *arg, size_t len)
 {
-	int i;
+	size_t i;
 	TfwNipDef *nipdef;
 
 	if (!loc->nipdef_sz)
@@ -360,7 +361,8 @@ tfw_nipdef_lookup_dup(TfwLocation *loc, int method, int op, char *arg, int len)
  * a pointer to the memory allocated to hold the definition.
  */
 static TfwNipDef *
-tfw_nipdef_addnew(TfwLocation *loc, int method, int op, char *arg, int len)
+tfw_nipdef_addnew(TfwLocation *loc, int method,
+		  int op, const char *arg, size_t len)
 {
 	char *data;
 	TfwNipDef *nipdef;
@@ -386,8 +388,9 @@ tfw_nipdef_addnew(TfwLocation *loc, int method, int op, char *arg, int len)
 static int
 tfw_handle_nonidempotent(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
-	int ret, len, method, op;
-	char *in_method, *in_op, *arg;
+	size_t len;
+	int ret, method, op;
+	const char *in_method, *in_op, *arg;
 	TfwLocation *loc = tfwcfg_this_location;
 	TfwNipDef *nipdef;
 
@@ -404,7 +407,7 @@ tfw_handle_nonidempotent(TfwCfgSpec *cs, TfwCfgEntry *ce)
 	}
 
 	/* The method: one of GET, PUT, POST, etc. in form of a bitmask. */
-	in_method = (char *)ce->vals[0];
+	in_method = ce->vals[0];
 	ret = tfw_cfg_map_enum(tfw_method_enum, in_method, &method);
 	if (ret) {
 		TFW_ERR("Unsupported HTTP method: '%s %s'\n",
@@ -413,7 +416,7 @@ tfw_handle_nonidempotent(TfwCfgSpec *cs, TfwCfgEntry *ce)
 	}
 
 	/* The match operator. */
-	in_op = (char *)ce->vals[1];
+	in_op = ce->vals[1];
 	ret = tfw_cfg_map_enum(tfw_match_enum, in_op, &op);
 	if (ret) {
 		TFW_ERR("Unsupported match OP: '%s %s'\n", cs->name, in_op);
@@ -421,7 +424,7 @@ tfw_handle_nonidempotent(TfwCfgSpec *cs, TfwCfgEntry *ce)
 	}
 
 	/* The match string. */
-	arg = (char *)ce->vals[2];
+	arg = ce->vals[2];
 	len = strlen(arg);
 
 	/*
@@ -471,9 +474,9 @@ tfw_handle_out_nonidempotent(TfwCfgSpec *cs, TfwCfgEntry *ce)
  * location sections.
  */
 static TfwCaPolicy *
-tfw_capolicy_lookup(const short cmd, const short op, const char *arg, int len)
+tfw_capolicy_lookup(int cmd, int op, const char *arg, size_t len)
 {
-	int i;
+	size_t i;
 
 	for (i = 0; i < tfw_capolicy_sz; ++i) {
 		TfwCaPolicy *capo = &tfw_capolicy[i];
@@ -490,7 +493,7 @@ tfw_capolicy_lookup(const short cmd, const short op, const char *arg, int len)
  * in the array for all cache policy entries from all location sections.
  */
 static TfwCaPolicy *
-tfw_capolicy_new(const short cmd, const short op, const char *arg, int len)
+tfw_capolicy_new(int cmd, int op, const char *arg, size_t len)
 {
 	char *argmem;
 	TfwCaPolicy *capo;
@@ -533,9 +536,10 @@ tfw_capolicy_add(TfwLocation *loc, TfwCaPolicy *capo)
  * string that is listed.
  */
 static int
-tfw_handle_capolicy(TfwCfgSpec *cs, TfwCfgEntry *ce, const short cmd)
+tfw_handle_capolicy(TfwCfgSpec *cs, TfwCfgEntry *ce, int cmd)
 {
-	int i, ret, len;
+	int ret;
+	size_t i, len;
 	tfw_match_t op;
 	const char *in_op, *arg;
 
@@ -627,9 +631,9 @@ tfw_handle_out_cache_bypass(TfwCfgSpec *cs, TfwCfgEntry *ce)
  * in the array that holds all location directives.
  */
 static TfwLocation *
-tfw_location_lookup(tfw_match_t op, const char *arg, int len)
+tfw_location_lookup(tfw_match_t op, const char *arg, size_t len)
 {
-	int i;
+	size_t i;
 
 	for (i = 0; i < tfw_location_sz; ++i) {
 		TfwLocation *loc = &tfw_location[i];
@@ -646,12 +650,12 @@ tfw_location_lookup(tfw_match_t op, const char *arg, int len)
  * The entry is placed in the array that holds all location directives.
  */
 static TfwLocation *
-tfw_location_new(tfw_match_t op, const char *arg, int len)
+tfw_location_new(tfw_match_t op, const char *arg, size_t len)
 {
 	TfwLocation *loc;
 	char *argmem, *data;
-	int size = sizeof(TfwCaPolicy *) * TFW_CAPOLICY_ARRAY_SZ
-		   + sizeof(TfwNipDef *) * TFW_NIPDEF_ARRAY_SZ;
+	size_t size = sizeof(TfwCaPolicy *) * TFW_CAPOLICY_ARRAY_SZ
+		    + sizeof(TfwNipDef *) * TFW_NIPDEF_ARRAY_SZ;
 
 	if (tfw_location_sz == TFW_LOCATION_ARRAY_SZ)
 		return NULL;
@@ -683,7 +687,8 @@ tfw_location_new(tfw_match_t op, const char *arg, int len)
 static int
 tfw_begin_location(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
-	int ret, len;
+	int ret;
+	size_t len;
 	tfw_match_t op;
 	const char *in_op, *arg;
 
@@ -747,7 +752,7 @@ tfw_finish_location(TfwCfgSpec *cs)
 static void
 __tfw_cleanup_locache(void)
 {
-	int i, k;
+	size_t i, k;
 
 	for (i = 0; i < tfw_location_sz; ++i) {
 		TfwLocation *loc = &tfw_location[i];
@@ -794,7 +799,7 @@ tfw_cleanup_locache(TfwCfgSpec *cs)
 static bool
 tfw_capuacl_lookup(TfwVhost *vhost, TfwAddr *addr)
 {
-	int i;
+	size_t i;
 	struct in6_addr *inaddr = &addr->v6.sin6_addr;
 
 	for (i = 0; i < vhost->capuacl_sz; ++i) {
@@ -811,7 +816,7 @@ tfw_capuacl_lookup(TfwVhost *vhost, TfwAddr *addr)
 static int
 tfw_handle_cache_purge_acl(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
-	unsigned int i;
+	size_t i;
 	const char *val;
 	TfwVhost *vhost = &tfw_vhost_dflt;
 
@@ -853,7 +858,7 @@ tfw_handle_cache_purge_acl(TfwCfgSpec *cs, TfwCfgEntry *ce)
 static int
 tfw_handle_cache_purge(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
-	unsigned int i;
+	size_t i;
 	const char *val;
 	TfwVhost *vhost = &tfw_vhost_dflt;
 
@@ -889,7 +894,7 @@ tfw_handle_cache_purge(TfwCfgSpec *cs, TfwCfgEntry *ce)
 static int
 tfw_handle_hdr_via(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
-	int len;
+	size_t len;
 	TfwVhost *vhost = &tfw_vhost_dflt;
 
 	if (ce->attr_n) {
@@ -1017,13 +1022,13 @@ static TfwCfgSpec tfw_vhost_cfg_specs[] = {
 		.allow_repeat = true,
 		.cleanup = tfw_cleanup_locache
         },
-        {
+	{
 		"nonidempotent", NULL,
 		tfw_handle_out_nonidempotent,
 		.allow_none = true,
 		.allow_repeat = true,
 		.cleanup = tfw_cleanup_locache
-        },
+	},
 	{
 		"location", NULL,
 		tfw_cfg_handle_children,
@@ -1056,7 +1061,7 @@ tfw_vhost_init(void)
 void
 tfw_vhost_exit(void)
 {
-	int i;
+	size_t i;
 
 	for (i = 0; i < tfw_location_sz; ++i)
 		if (tfw_location[i].capo)
diff --git a/tempesta_fw/vhost.h b/tempesta_fw/vhost.h
index 566f21882..6b23ace3a 100644
--- a/tempesta_fw/vhost.h
+++ b/tempesta_fw/vhost.h
@@ -32,9 +32,9 @@
  * @arg		- String for the match operator.
  */
 typedef struct {
-	unsigned int	method;
+	int		method;
 	short		op;
-	unsigned int	len;
+	size_t		len;
 	const char	*arg;
 } TfwNipDef;
 
@@ -55,7 +55,7 @@ typedef enum {
 typedef struct {
 	short		cmd;
 	short		op;
-	unsigned int	len;
+	size_t		len;
 	const char	*arg;
 } TfwCaPolicy;
 
@@ -73,9 +73,9 @@ typedef struct {
 typedef struct {
 	short		op;
 	const char	*arg;
-	unsigned int	len;
-	unsigned int	capo_sz;
-	unsigned int	nipdef_sz;
+	size_t		len;
+	size_t		capo_sz;
+	size_t		nipdef_sz;
 	TfwCaPolicy	**capo;
 	TfwNipDef	**nipdef;
 } TfwLocation;
@@ -97,10 +97,10 @@ typedef struct {
 	TfwLocation	*loc_dflt;
 	TfwAddr		*capuacl;
 	const char	*hdr_via;
-	unsigned int	loc_sz;
-	unsigned int	loc_dflt_sz;
-	unsigned int	capuacl_sz;
-	unsigned int	hdr_via_len;
+	size_t		loc_sz;
+	size_t		loc_dflt_sz;
+	size_t		capuacl_sz;
+	size_t		hdr_via_len;
 	u8		cache_purge:1;
 	u8		cache_purge_mode:2;
 	u8		cache_purge_acl:1;

From c8dccc0781fafafffce0ce19db1dc1a85ec1fe99 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Tue, 3 Jan 2017 02:20:11 +0300
Subject: [PATCH 23/65] Better handling of the number of reconnect attempts.

---
 tempesta_fw/sock_srv.c | 39 ++++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/tempesta_fw/sock_srv.c b/tempesta_fw/sock_srv.c
index 1503ef1d0..f4ab89b6f 100644
--- a/tempesta_fw/sock_srv.c
+++ b/tempesta_fw/sock_srv.c
@@ -55,11 +55,6 @@
  * ------------------------------------------------------------------------
  */
 
-/*
- * Default number of reconnect attempts. Zero means unlimited number.
- */
-#define TFW_SRV_RETRY_ATTEMPTS_DEF	0		/* default value */
-
 /**
  * TfwConnection extension for server sockets.
  *
@@ -113,6 +108,9 @@
  *    reused. So the attempt to reconnect has to wait. It is started as
  *    soon as the last client releases the server connection.
  */
+/**
+ * Note: `attempts` and `max_attempts` must be of the same type.
+ */
 typedef struct {
 	TfwConnection		conn;
 	unsigned long		timeout;
@@ -207,11 +205,11 @@ tfw_sock_srv_connect_try_later(TfwSrvConnection *srv_conn)
 	 * The timeout is not increased after 1 second as it has moderate
 	 * overhead, and it's still good in response time.
 	 *
-	 * FIXME: The limit on the number of reconnect attempts is used
+	 * Note that the limit on the number of reconnect attempts is used
 	 * to re-schedule requests that would never be forwarded otherwise.
-	 * Still, attempts to reconnect may be continued in hopes that the
-	 * connection will be established sooner or later. Otherwise thei
-	 * connection will stay dead until restart.
+	 * However, the attempts to reconnect are continued in hopes that
+	 * the connection will be re-established sooner or later. Otherwise
+	 * the connection will stay dead until Tempesta's restart.
 	 */
 	static const unsigned long timeouts[] = { 1, 10, 100, 250, 500, 1000 };
 
@@ -219,9 +217,13 @@ tfw_sock_srv_connect_try_later(TfwSrvConnection *srv_conn)
 	if (unlikely(!ss_active()))
 		return;
 
-	if (unlikely(srv_conn->max_attempts
-		     && (srv_conn->attempts >= srv_conn->max_attempts)))
-	{
+	/*
+	 * max_attempts can be the maximum value for the data type to mean
+	 * the unlimited number of attempts, which is the value that should
+	 * never be reached. UINT_MAX seconds is more than 136 years. It's
+	 * safe to assume that it's not reached in a single run of Tempesta.
+	 */
+	if (unlikely(srv_conn->attempts >= srv_conn->max_attempts)) {
 		TfwAddr *srv_addr = &srv_conn->conn.peer->addr;
 		char s_addr[TFW_ADDR_STR_BUF_SIZE] = { 0 };
 		tfw_addr_ntop(srv_addr, s_addr, sizeof(s_addr));
@@ -229,7 +231,6 @@ tfw_sock_srv_connect_try_later(TfwSrvConnection *srv_conn)
 			 "The server connection [%s] is down permanently.\n",
 			 srv_conn->max_attempts, s_addr);
 		tfw_connection_repair(&srv_conn->conn);
-		return;
 	}
 	if (srv_conn->attempts < ARRAY_SIZE(timeouts)) {
 		srv_conn->timeout = timeouts[srv_conn->attempts];
@@ -323,7 +324,7 @@ tfw_sock_srv_connect_complete(struct sock *sk)
 	/* Let schedulers use the connection hereafter. */
 	tfw_connection_revive(conn);
 
-	/* Repair the connection is necessary. */
+	/* Repair the connection if necessary. */
 	if (unlikely(tfw_connection_restricted(conn)))
 		tfw_connection_repair(conn);
 
@@ -565,24 +566,24 @@ tfw_sock_srv_delete_all_conns(void)
 #define TFW_SRV_CONNS_N_DEF		"32"
 
 /*
- * Server connection's maximum queue size, and default timeout for
- * requests in the queue.
+ * Default values for various configuration directives and options.
  */
 #define TFW_SRV_QUEUE_SIZE_DEF		1000	/* Max queue size */
 #define TFW_SRV_SEND_TIMEOUT_DEF	60	/* Default request timeout */
 #define TFW_SRV_SEND_TRIES_DEF		5	/* Default number of tries */
 #define TFW_SRV_RETRY_NIP_DEF		0	/* Do NOT resend NIP reqs */
+#define TFW_SRV_RETRY_ATTEMPTS_DEF	10	/* Reconnect attempts. */
 
 static int tfw_cfg_in_queue_size = TFW_SRV_QUEUE_SIZE_DEF;
 static int tfw_cfg_in_send_timeout = TFW_SRV_SEND_TIMEOUT_DEF;
 static int tfw_cfg_in_send_tries = TFW_SRV_SEND_TRIES_DEF;
 static int tfw_cfg_in_retry_nip = TFW_SRV_RETRY_NIP_DEF;
+static int tfw_cfg_in_retry_attempts = TFW_SRV_RETRY_ATTEMPTS_DEF;
+
 static int tfw_cfg_out_queue_size = TFW_SRV_QUEUE_SIZE_DEF;
 static int tfw_cfg_out_send_timeout = TFW_SRV_SEND_TIMEOUT_DEF;
 static int tfw_cfg_out_send_tries = TFW_SRV_SEND_TRIES_DEF;
 static int tfw_cfg_out_retry_nip = TFW_SRV_RETRY_NIP_DEF;
-
-static int tfw_cfg_in_retry_attempts = TFW_SRV_RETRY_ATTEMPTS_DEF;
 static int tfw_cfg_out_retry_attempts = TFW_SRV_RETRY_ATTEMPTS_DEF;
 
 static int
@@ -683,7 +684,7 @@ tfw_cfg_set_conn_tries(TfwServer *srv, int attempts)
 	TfwSrvConnection *srv_conn;
 
 	list_for_each_entry(srv_conn, &srv->conn_list, conn.list)
-		srv_conn->max_attempts = attempts;
+		srv_conn->max_attempts = attempts ? : UINT_MAX;
 
 	return 0;
 }

From 2736acaeb963aeafc66bac89fc9e8e7300698f98 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Tue, 3 Jan 2017 02:41:02 +0300
Subject: [PATCH 24/65] Consider that srv_conn->qsize is always modified under
 a lock.

---
 tempesta_fw/connection.c |  1 +
 tempesta_fw/connection.h |  2 +-
 tempesta_fw/http.c       | 16 ++++++++--------
 tempesta_fw/server.h     |  2 +-
 tempesta_fw/sock_srv.c   |  4 +---
 5 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/tempesta_fw/connection.c b/tempesta_fw/connection.c
index 726f9ea97..573dfe51f 100644
--- a/tempesta_fw/connection.c
+++ b/tempesta_fw/connection.c
@@ -38,6 +38,7 @@ tfw_connection_init(TfwConnection *conn)
 
 	INIT_LIST_HEAD(&conn->list);
 	INIT_LIST_HEAD(&conn->msg_queue);
+	INIT_LIST_HEAD(&conn->nip_queue);
 	spin_lock_init(&conn->msg_qlock);
 }
 
diff --git a/tempesta_fw/connection.h b/tempesta_fw/connection.h
index b7d95d69c..4a7d5e16a 100644
--- a/tempesta_fw/connection.h
+++ b/tempesta_fw/connection.h
@@ -100,7 +100,7 @@ typedef struct tfw_connection_t {
 	spinlock_t		msg_qlock;
 	unsigned long		flags;					/*srv*/
 	atomic_t		refcnt;
-	atomic_t		qsize;					/*srv*/
+	int			qsize;					/*srv*/
 	struct timer_list	timer;
 	TfwMsg			*msg;
 	TfwMsg			*msg_sent;				/*srv*/
diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 12dba2110..7e7e63d02 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -482,8 +482,8 @@ tfw_http_req_move2equeue(TfwConnection *srv_conn, TfwHttpReq *req,
 {
 	tfw_http_req_nonidemp_delist(srv_conn, req);
 	list_move_tail(&req->msg.fwd_list, equeue);
+	srv_conn->qsize--;
 	req->rstatus = status;
-	atomic_dec(&srv_conn->qsize);
 }
 
 /*
@@ -617,7 +617,7 @@ tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
 	spin_lock(&srv_conn->msg_qlock);
 	drained = tfw_http_conn_drained(srv_conn);
 	list_add_tail(&req->msg.fwd_list, &srv_conn->msg_queue);
-	atomic_inc(&srv_conn->qsize);
+	srv_conn->qsize++;
 	if (tfw_http_req_is_nonidempotent(req))
 		__tfw_http_req_nonidemp_enlist(srv_conn, req);
 	if (tfw_http_conn_on_hold(srv_conn)) {
@@ -636,7 +636,7 @@ tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
 	if (tfw_connection_send(srv_conn, (TfwMsg *)req)) {
 		tfw_http_req_nonidemp_delist(srv_conn, req);
 		list_del_init(&req->msg.fwd_list);
-		atomic_dec(&srv_conn->qsize);
+		srv_conn->qsize--;
 		spin_unlock(&srv_conn->msg_qlock);
 		TFW_DBG2("%s: Forwarding error: conn=[%p] req=[%p]\n",
 			 __func__, srv_conn, req);
@@ -674,7 +674,7 @@ tfw_http_req_fwd_handlenip(TfwConnection *srv_conn)
 				   : list_entry(lent->prev, TfwMsg, fwd_list);
 		__tfw_http_req_nonidemp_delist(srv_conn, req_sent);
 		list_del_init(&req_sent->msg.fwd_list);
-		atomic_dec(&srv_conn->qsize);
+		srv_conn->qsize--;
 		tfw_http_send_404(req_sent);
 		TFW_INC_STAT_BH(clnt.msgs_otherr);
 	}
@@ -856,7 +856,7 @@ tfw_http_req_fwd_resched(TfwConnection *srv_conn)
 	list_for_each_entry_safe(req, tmp, fwd_queue, msg.fwd_list) {
 		tfw_http_req_nonidemp_delist(srv_conn, req);
 		list_del_init(&req->msg.fwd_list);
-		atomic_dec(&srv_conn->qsize);
+		srv_conn->qsize--;
 		if (!(sconn = tfw_sched_get_srv_conn((TfwMsg *)req))) {
 			TFW_WARN("Unable to find a backend server\n");
 			tfw_http_send_404(req);
@@ -874,7 +874,7 @@ tfw_http_req_fwd_resched(TfwConnection *srv_conn)
 		tfw_http_req_fwd(sconn, req);
 		tfw_connection_put(sconn);
 	}
-	BUG_ON(atomic_read(&srv_conn->qsize));
+	BUG_ON(srv_conn->qsize);
 }
 
 /*
@@ -1819,7 +1819,7 @@ tfw_http_popreq(TfwHttpMsg *hmresp)
 
 	spin_lock(&srv_conn->msg_qlock);
 	if (unlikely(list_empty(fwd_queue))) {
-		BUG_ON(atomic_read(&srv_conn->qsize));
+		BUG_ON(srv_conn->qsize);
 		spin_unlock(&srv_conn->msg_qlock);
 		/* @conn->msg will get NULLed in the process. */
 		TFW_WARN("Paired request missing, "
@@ -1830,7 +1830,7 @@ tfw_http_popreq(TfwHttpMsg *hmresp)
 	}
 	req = list_first_entry(fwd_queue, TfwHttpReq, msg.fwd_list);
 	list_del_init(&req->msg.fwd_list);
-	atomic_dec(&srv_conn->qsize);
+	srv_conn->qsize--;
 	if ((TfwMsg *)req == srv_conn->msg_sent)
 		srv_conn->msg_sent = NULL;
 	tfw_http_req_nonidemp_delist(srv_conn, req);
diff --git a/tempesta_fw/server.h b/tempesta_fw/server.h
index e5d8c66db..4f593fe15 100644
--- a/tempesta_fw/server.h
+++ b/tempesta_fw/server.h
@@ -127,7 +127,7 @@ static inline bool
 tfw_server_queue_full(TfwConnection *srv_conn)
 {
 	TfwServer *srv = (TfwServer *)srv_conn->peer;
-	return atomic_read(&srv_conn->qsize) >= srv->qsize_max;
+	return ACCESS_ONCE(srv_conn->qsize) >= srv->qsize_max;
 }
 
 /* Server group routines. */
diff --git a/tempesta_fw/sock_srv.c b/tempesta_fw/sock_srv.c
index f4ab89b6f..c672ffd2f 100644
--- a/tempesta_fw/sock_srv.c
+++ b/tempesta_fw/sock_srv.c
@@ -500,8 +500,6 @@ tfw_srv_conn_alloc(void)
 		return NULL;
 
 	tfw_connection_init(&srv_conn->conn);
-	INIT_LIST_HEAD(&srv_conn->conn.nip_queue);
-	atomic_set(&srv_conn->conn.qsize, 0);
 	__setup_retry_timer(srv_conn);
 	ss_proto_init(&srv_conn->conn.proto,
 		      &tfw_sock_srv_ss_hooks, Conn_HttpSrv);
@@ -517,7 +515,7 @@ tfw_srv_conn_free(TfwSrvConnection *srv_conn)
 	/* Check that all nested resources are freed. */
 	tfw_connection_validate_cleanup(&srv_conn->conn);
 	BUG_ON(!list_empty(&srv_conn->conn.nip_queue));
-	BUG_ON(atomic_read(&srv_conn->conn.qsize));
+	BUG_ON(ACCESS_ONCE(srv_conn->conn.qsize));
 
 	kmem_cache_free(tfw_srv_conn_cache, srv_conn);
 }

From 10d818eb49ba2554716003aef98841ebbf61faf3 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Tue, 3 Jan 2017 02:57:31 +0300
Subject: [PATCH 25/65] __init_{req/resp}_ss_flags() to
 tfw_http_{req/resp}_init_ss_flags().

These functions are no longer internal, so the are renamed according
to name conventions.
---
 tempesta_fw/http.c      | 6 +++---
 tempesta_fw/http.h      | 4 ++--
 tempesta_fw/http_sess.c | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 7e7e63d02..9599b5882 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -217,7 +217,7 @@ tfw_http_send_resp(TfwHttpReq *req, TfwStr *msg, const TfwStr *date)
 	tfw_http_prep_date(date->ptr);
 	tfw_http_msg_write(&it, hmresp, msg);
 
-	__init_resp_ss_flags((TfwHttpResp *)hmresp, req);
+	tfw_http_resp_init_ss_flags((TfwHttpResp *)hmresp, req);
 	tfw_http_resp_fwd(req, (TfwHttpResp *)hmresp);
 
 	return 0;
@@ -1232,7 +1232,7 @@ tfw_http_adjust_req(TfwHttpReq *req)
 	int r;
 	TfwHttpMsg *hm = (TfwHttpMsg *)req;
 
-	__init_req_ss_flags(req);
+	tfw_http_req_init_ss_flags(req);
 
 	r = tfw_http_add_x_forwarded_for(hm);
 	if (r)
@@ -1258,7 +1258,7 @@ tfw_http_adjust_resp(TfwHttpResp *resp, TfwHttpReq *req)
 	int r, conn_flg = req->flags & __TFW_HTTP_CONN_MASK;
 	TfwHttpMsg *hm = (TfwHttpMsg *)resp;
 
-	__init_resp_ss_flags(resp, req);
+	tfw_http_resp_init_ss_flags(resp, req);
 
 	r = tfw_http_sess_resp_process(resp, req);
 	if (r < 0)
diff --git a/tempesta_fw/http.h b/tempesta_fw/http.h
index 6f95abea6..7e2163a7f 100644
--- a/tempesta_fw/http.h
+++ b/tempesta_fw/http.h
@@ -425,13 +425,13 @@ tfw_current_timestamp(void)
 }
 
 static inline void
-__init_req_ss_flags(TfwHttpReq *req)
+tfw_http_req_init_ss_flags(TfwHttpReq *req)
 {
 	((TfwMsg *)req)->ss_flags |= SS_F_KEEP_SKB;
 }
 
 static inline void
-__init_resp_ss_flags(TfwHttpResp *resp, const TfwHttpReq *req)
+tfw_http_resp_init_ss_flags(TfwHttpResp *resp, const TfwHttpReq *req)
 {
 	if (req->flags & TFW_HTTP_CONN_CLOSE)
 		((TfwMsg *)resp)->ss_flags |= SS_F_CONN_CLOSE;
diff --git a/tempesta_fw/http_sess.c b/tempesta_fw/http_sess.c
index 6b8ba61ed..66f473474 100644
--- a/tempesta_fw/http_sess.c
+++ b/tempesta_fw/http_sess.c
@@ -128,7 +128,7 @@ tfw_http_sticky_send_302(TfwHttpReq *req, StickyVal *sv)
 	if (tfw_http_prep_302(hmresp, req, &cookie))
 		return -1;
 
-	__init_resp_ss_flags((TfwHttpResp *)hmresp, req);
+	tfw_http_resp_init_ss_flags((TfwHttpResp *)hmresp, req);
 	tfw_http_resp_fwd(req, (TfwHttpResp *)hmresp);
 
 	return 0;

From 7cce5a01ecb828db7328f589bff29651608325fb Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Wed, 4 Jan 2017 23:43:02 +0300
Subject: [PATCH 26/65] Remove 'resp' from a union in TfwHttpReq{}.

This fixes a bug where req->resp may temporary be a different entity
than a pointer to TfwHttpResp{}, and then picked up by the response
sending function while it's that different entity.
---
 tempesta_fw/http.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tempesta_fw/http.h b/tempesta_fw/http.h
index 7e2163a7f..ae5baf20a 100644
--- a/tempesta_fw/http.h
+++ b/tempesta_fw/http.h
@@ -371,8 +371,8 @@ typedef struct {
 	unsigned long		tm_header;
 	unsigned long		tm_bchunk;
 	unsigned long		hash;
+	TfwHttpMsg		*resp;
 	union {
-		TfwHttpMsg	*resp;
 		unsigned short	rstatus;
 		unsigned short	retries;
 	};

From 6ba9b0dafdfa4d0b18b9eb8cb1ab483e59fcee15 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Sat, 7 Jan 2017 15:52:22 +0300
Subject: [PATCH 27/65] Separate message's SKB data set up from message
 allocation.

Now it's assumed that a message is allocated and initialized to
a level sufficient for intended use. That makes it possible to do
shorter/faster message allocation and initialization for specific
purposes, such as HTTP error responses.
---
 tempesta_fw/cache.c          |   9 +--
 tempesta_fw/http.c           |  24 ++++----
 tempesta_fw/http_msg.c       | 104 ++++++++++++++++++++---------------
 tempesta_fw/http_msg.h       |   4 +-
 tempesta_fw/t/bomber.c       |  11 ++--
 tempesta_fw/t/unit/helpers.c |  30 ++++++----
 6 files changed, 105 insertions(+), 77 deletions(-)

diff --git a/tempesta_fw/cache.c b/tempesta_fw/cache.c
index 129fd21be..1497cd17d 100644
--- a/tempesta_fw/cache.c
+++ b/tempesta_fw/cache.c
@@ -1120,14 +1120,14 @@ tfw_cache_build_resp(TfwCacheEntry *ce)
 	TfwMsgIter it;
 
 	/*
-	 * Allocated response won't be checked by any filters and
+	 * The allocated response won't be checked by any filters and
 	 * is used for sending response data only, so don't initialize
 	 * connection and GFSM fields.
 	 */
-	resp = (TfwHttpResp *)tfw_http_msg_create(NULL, &it, Conn_Srv,
-						  ce->hdr_len + 2);
-	if (!resp)
+	if (!(resp = ((TfwHttpResp *)tfw_http_msg_alloc(Conn_Srv))))
 		return NULL;
+	if (tfw_http_msg_setup((TfwHttpMsg *)resp, &it, ce->hdr_len + 2))
+		goto free;
 
 	/*
 	 * Allocate HTTP headers table of proper size.
@@ -1175,6 +1175,7 @@ tfw_cache_build_resp(TfwCacheEntry *ce)
 	return resp;
 err:
 	TFW_WARN("Cannot use cached response, key=%lx\n", ce->key);
+free:
 	tfw_http_msg_free((TfwHttpMsg *)resp);
 	return NULL;
 }
diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 9599b5882..6bd70e87b 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -122,7 +122,7 @@ unsigned long tfw_hash_str(const TfwStr *str);
  * but it includes 'Set-Cookie:' header field that sets Tempesta sticky cookie.
  */
 int
-tfw_http_prep_302(TfwHttpMsg *resp, TfwHttpReq *req, TfwStr *cookie)
+tfw_http_prep_302(TfwHttpMsg *hmresp, TfwHttpReq *req, TfwStr *cookie)
 {
 	size_t data_len = S_302_FIXLEN;
 	int conn_flag = req->flags & __TFW_HTTP_CONN_MASK;
@@ -162,24 +162,24 @@ tfw_http_prep_302(TfwHttpMsg *resp, TfwHttpReq *req, TfwStr *cookie)
 	data_len += req->uri_path.len + cookie->len;
 	data_len += crlf->len;
 
-	if (!tfw_http_msg_create(resp, &it, Conn_Srv, data_len))
+	if (tfw_http_msg_setup(hmresp, &it, data_len))
 		return TFW_BLOCK;
 
 	tfw_http_prep_date(__TFW_STR_CH(&rh, 1)->ptr);
-	tfw_http_msg_write(&it, resp, &rh);
+	tfw_http_msg_write(&it, hmresp, &rh);
 	/*
 	 * HTTP/1.0 may have no host part, so we create relative URI.
 	 * See RFC 1945 9.3 and RFC 7231 7.1.2.
 	 */
 	if (host.len) {
 		static TfwStr proto = { .ptr = S_HTTP, .len = SLEN(S_HTTP) };
-		tfw_http_msg_write(&it, resp, &proto);
-		tfw_http_msg_write(&it, resp, &host);
+		tfw_http_msg_write(&it, hmresp, &proto);
+		tfw_http_msg_write(&it, hmresp, &host);
 	}
-	tfw_http_msg_write(&it, resp, &req->uri_path);
-	tfw_http_msg_write(&it, resp, &part03);
-	tfw_http_msg_write(&it, resp, cookie);
-	tfw_http_msg_write(&it, resp, crlf);
+	tfw_http_msg_write(&it, hmresp, &req->uri_path);
+	tfw_http_msg_write(&it, hmresp, &part03);
+	tfw_http_msg_write(&it, hmresp, cookie);
+	tfw_http_msg_write(&it, hmresp, crlf);
 
 	return TFW_PASS;
 }
@@ -211,8 +211,12 @@ tfw_http_send_resp(TfwHttpReq *req, TfwStr *msg, const TfwStr *date)
 		msg->len += crlf->len - crlf_len;
 	}
 
-	if (!(hmresp = tfw_http_msg_create(NULL, &it, Conn_Srv, msg->len)))
+	if (!(hmresp = tfw_http_msg_alloc_err_resp()))
 		return -ENOMEM;
+	if (tfw_http_msg_setup(hmresp, &it, msg->len)) {
+		tfw_http_msg_free(hmresp);
+		return -ENOMEM;
+	}
 
 	tfw_http_prep_date(date->ptr);
 	tfw_http_msg_write(&it, hmresp, msg);
diff --git a/tempesta_fw/http_msg.c b/tempesta_fw/http_msg.c
index 176b802e5..27c4d8c7a 100644
--- a/tempesta_fw/http_msg.c
+++ b/tempesta_fw/http_msg.c
@@ -631,8 +631,6 @@ tfw_http_msg_hdr_add(TfwHttpMsg *hm, TfwStr *hdr)
  * of SKBs and page fragments to hold the payload, and add them to the
  * message. Put as much as possible in one SKB. TCP GSO will take care of
  * segmentation. The allocated payload space will be filled with data.
- *
- * Call tfw_http_msg_free() is for @hm if the function fails.
  */
 static int
 __msg_alloc_skb_data(TfwHttpMsg *hm, size_t len)
@@ -651,33 +649,30 @@ __msg_alloc_skb_data(TfwHttpMsg *hm, size_t len)
 }
 
 /**
- * Initialize @hm or allocate an HTTP message if it's NULL.
- * Sets @hm up with empty SKB space of size @data_len for data writing.
- * An iterator @it is set up to support consecutive writes.
+ * Set up @hm with empty SKB space of size @data_len for data writing.
+ * Set up the iterator @it to support consecutive writes.
  *
  * This function is intended to work together with tfw_http_msg_write()
- * that uses the @it iterator.
- * Use dynamic allocation if you need to do the message transformations
- * (e.g. adjust headers) and avoid it if you just need to send the message.
+ * or tfw_http_msg_add_data() which use the @it iterator.
+ *
+ * @hm must be allocated dynamically (NOT statically) as it may have
+ * to sit in a queue long after the caller has finished. It's assumed
+ * that @hm is properly initialized.
+ *
+ * It's essential to understand, that "properly initialized" for @hm
+ * may mean different things depending on the intended use. Currently
+ * this function is called to send a response from cache, or to send
+ * an error response. An error response is not parsed or adjusted, so
+ * a shorter/faster version of message allocation and initialization
+ * may be used. (See tfw_http_msg_alloc_err_resp()).
  */
-TfwHttpMsg *
-tfw_http_msg_create(TfwHttpMsg *hm, TfwMsgIter *it, int type, size_t data_len)
+int
+tfw_http_msg_setup(TfwHttpMsg *hm, TfwMsgIter *it, size_t data_len)
 {
-	if (hm) {
-		memset(hm, 0, sizeof(*hm));
-		ss_skb_queue_head_init(&hm->msg.skb_list);
-		INIT_LIST_HEAD(&hm->msg.seq_list);
-		INIT_LIST_HEAD(&hm->msg.fwd_list);
-		if (__msg_alloc_skb_data(hm, data_len))
-			return NULL;
-	} else {
-		if (!(hm = tfw_http_msg_alloc(type)))
-			return NULL;
-		if (__msg_alloc_skb_data(hm, data_len)) {
-			tfw_http_msg_free(hm);
-			return NULL;
-		}
-	}
+	int ret;
+
+	if ((ret = __msg_alloc_skb_data(hm, data_len)))
+		return ret;
 
 	it->skb = ss_skb_peek(&hm->msg.skb_list);
 	it->frag = 0;
@@ -685,22 +680,22 @@ tfw_http_msg_create(TfwHttpMsg *hm, TfwMsgIter *it, int type, size_t data_len)
 	BUG_ON(!it->skb);
 	BUG_ON(!skb_shinfo(it->skb)->nr_frags);
 
-	TFW_DBG2("Created new HTTP message %p: type=%d len=%lu\n",
-		 hm, type, data_len);
-	return hm;
+	TFW_DBG2("Set up new HTTP message %p: len=%lu\n", hm, data_len);
+
+	return 0;
 }
-EXPORT_SYMBOL(tfw_http_msg_create);
+EXPORT_SYMBOL(tfw_http_msg_setup);
 
 /*
  * Fill up an HTTP message @hm with data from string @data.
- * This is a quick message creator which doesn't properly initialized
- * the message structure like headers table. So @hm couldn't be used in
- * HTTP message transformations.
+ * This is a quick message creator which doesn't maintain properly
+ * parts of the message structure like headers table. So @hm cannot
+ * be used where HTTP message transformations are required.
  *
- * An iterator @it is used to support multiple calls to this functions after
- * set up. This function can only be called after a call to
- * tfw_http_msg_create(). It works only with empty SKB space prepared by
- * the function.
+ * An iterator @it is used to support multiple calls to this function
+ * after the set up. This function can only be called after a call to
+ * tfw_http_msg_setup(). It works only with empty SKB space prepared
+ * by the function.
  */
 int
 tfw_http_msg_write(TfwMsgIter *it, TfwHttpMsg *hm, const TfwStr *data)
@@ -727,7 +722,7 @@ tfw_http_msg_write(TfwMsgIter *it, TfwHttpMsg *hm, const TfwStr *data)
 
 		if (c_size < f_room) {
 			/*
-			 * The chunk has fit in the SKB fragment with room
+			 * The chunk fits in the SKB fragment with room
 			 * to spare. Stay in the same SKB fragment, swith
 			 * to next chunk of the string.
 			 */
@@ -736,8 +731,8 @@ tfw_http_msg_write(TfwMsgIter *it, TfwHttpMsg *hm, const TfwStr *data)
 			frag = ss_skb_frag_next(&it->skb, &it->frag);
 			/*
 			 * If all data from the chunk has been copied,
-			 * then switch to next chunk. Otherwise, stay
-			 * in the current chunk.
+			 * then switch to the next chunk. Otherwise,
+			 * stay in the current chunk.
 			 */
 			if (c_size == f_room) {
 				c_off = 0;
@@ -753,10 +748,10 @@ tfw_http_msg_write(TfwMsgIter *it, TfwHttpMsg *hm, const TfwStr *data)
 EXPORT_SYMBOL(tfw_http_msg_write);
 
 /**
- * Like tfw_http_msg_write(), but properly initialize HTTP message fields,
- * so it can be used in regular transformations.
- * However, the header name and value aren't splitted into different chunks,
- * so advanced headers matching aren't available for @hm.
+ * Similar to tfw_http_msg_write(), but properly maintain @hm header
+ * fields, so that @hm can be used in regular transformations. However,
+ * the header name and the value are not split into different chunks,
+ * so advanced headers matching is not available for @hm.
  */
 int
 tfw_http_msg_add_data(TfwMsgIter *it, TfwHttpMsg *hm, TfwStr *field,
@@ -812,6 +807,25 @@ tfw_http_msg_free(TfwHttpMsg *m)
 }
 EXPORT_SYMBOL(tfw_http_msg_free);
 
+/**
+ * Allocate a new error response message.
+ * This type of message is not parsed or adjusted before it's sent out.
+ * That allows for a short (limited) initialization.
+ */
+TfwHttpMsg *
+tfw_http_msg_alloc_err_resp(void)
+{
+	TfwHttpMsg *hm;
+
+	if (!(hm = (TfwHttpMsg *)tfw_pool_new(TfwHttpResp, TFW_POOL_ZERO)))
+		return NULL;
+
+	INIT_LIST_HEAD(&hm->msg.seq_list);
+	ss_skb_queue_head_init(&hm->msg.skb_list);
+
+	return hm;
+}
+
 /**
  * Add spec header indexes to list of hop-by-hop headers.
  */
@@ -842,12 +856,12 @@ __hbh_parser_init_resp(TfwHttpResp *resp)
 	 */
 	hbh_hdrs->spec = (0x1 << TFW_HTTP_HDR_CONNECTION) |
 			 (0x1 << TFW_HTTP_HDR_SERVER);
-
 }
 
 /**
  * Allocate a new HTTP message.
- * The space to hold the payload is allocated separately.
+ * The allocated message is set up and initialized with full support
+ * for parsing and subsequent adjustment.
  */
 TfwHttpMsg *
 tfw_http_msg_alloc(int type)
diff --git a/tempesta_fw/http_msg.h b/tempesta_fw/http_msg.h
index 86053a071..c322f2d17 100644
--- a/tempesta_fw/http_msg.h
+++ b/tempesta_fw/http_msg.h
@@ -76,8 +76,7 @@ int tfw_http_msg_hdr_xfrm(TfwHttpMsg *hm, char *name, size_t n_len,
 
 int tfw_http_msg_del_hbh_hdrs(TfwHttpMsg *hm);
 
-TfwHttpMsg *tfw_http_msg_create(TfwHttpMsg *hm, TfwMsgIter *it, int type,
-				size_t data_len);
+int tfw_http_msg_setup(TfwHttpMsg *hm, TfwMsgIter *it, size_t data_len);
 int tfw_http_msg_write(TfwMsgIter *it, TfwHttpMsg *hm, const TfwStr *data);
 int tfw_http_msg_add_data(TfwMsgIter *it, TfwHttpMsg *hm, TfwStr *field,
 			  const TfwStr *data);
@@ -88,6 +87,7 @@ int tfw_http_msg_hdr_close(TfwHttpMsg *hm, unsigned int id);
 int tfw_http_msg_grow_hdr_tbl(TfwHttpMsg *hm);
 
 TfwHttpMsg *tfw_http_msg_alloc(int type);
+TfwHttpMsg *tfw_http_msg_alloc_err_resp(void);
 void tfw_http_msg_free(TfwHttpMsg *m);
 
 #endif /* __TFW_HTTP_MSG_H__ */
diff --git a/tempesta_fw/t/bomber.c b/tempesta_fw/t/bomber.c
index 1ed5a3896..0d7e6e0d6 100644
--- a/tempesta_fw/t/bomber.c
+++ b/tempesta_fw/t/bomber.c
@@ -237,8 +237,8 @@ tfw_bmb_msg_send(TfwBmbTask *task, int cn)
 {
 	int fz_tries = 0, r;
 	TfwStr msg;
-	TfwHttpMsg req;
 	TfwMsgIter it;
+	TfwHttpMsg hmreq;
 
 	do {
 		if (++fz_tries > 10) {
@@ -262,7 +262,10 @@ tfw_bmb_msg_send(TfwBmbTask *task, int cn)
 	msg.flags = 0;
 	BUG_ON(msg.len > BUF_SIZE);
 
-	if (!tfw_http_msg_create(&req, &it, Conn_Clnt, msg.len)) {
+	memset(&hmreq, 0, sizeof(hmreq));
+	ss_skb_queue_head_init(&hmreq.msg.skb_list);
+
+	if (!tfw_http_msg_setup(&hmreq, &it, msg.len)) {
 		TFW_WARN("Cannot create HTTP request.\n");
 		return;
 	}
@@ -274,8 +277,8 @@ tfw_bmb_msg_send(TfwBmbTask *task, int cn)
 			"------------------------------\n",
 			task->buf);
 
-	tfw_http_msg_write(&it, &req, &msg);
-	ss_send(task->conn[cn].sk, &req.msg.skb_list, true);
+	tfw_http_msg_write(&it, &hmreq, &msg);
+	ss_send(task->conn[cn].sk, &hmreq.msg.skb_list, true);
 
 	atomic_inc(&bmb_request_send);
 }
diff --git a/tempesta_fw/t/unit/helpers.c b/tempesta_fw/t/unit/helpers.c
index 8c2cf31c4..016b0b555 100644
--- a/tempesta_fw/t/unit/helpers.c
+++ b/tempesta_fw/t/unit/helpers.c
@@ -38,23 +38,26 @@ static TfwConnection conn_req, conn_resp;
 TfwHttpReq *
 test_req_alloc(size_t data_len)
 {
-	TfwHttpReq *req;
+	int ret;
 	TfwMsgIter it;
+	TfwHttpMsg *hmreq;
 
 	/* Actually there were more code here, mostly it was copy-paste from
 	 * tfw_http_msg_alloc(). It is removed because we need to test how it
 	 * initializes the message and we would not like to test the copy-paste.
 	 */
-	req = (TfwHttpReq *)tfw_http_msg_create(NULL, &it, Conn_HttpClnt,
-						data_len);
-	BUG_ON(!req);
+	hmreq = tfw_http_msg_alloc(Conn_HttpClnt);
+	BUG_ON(!hmreq);
+
+	ret = tfw_http_msg_setup(hmreq, &it, data_len);
+	BUG_ON(ret);
 
 	memset(&conn_req, 0, sizeof(TfwConnection));
 	tfw_connection_init(&conn_req);
 	conn_req.proto.type = Conn_HttpClnt;
-	req->conn = &conn_req;
+	hmreq->conn = &conn_req;
 
-	return req;
+	return (TfwHttpReq *)hmreq;
 }
 
 void
@@ -70,19 +73,22 @@ test_req_free(TfwHttpReq *req)
 TfwHttpResp *
 test_resp_alloc(size_t data_len)
 {
-	TfwHttpResp *resp;
+	int ret;
 	TfwMsgIter it;
+	TfwHttpMsg *hmresp;
 
-	resp = (TfwHttpResp *)tfw_http_msg_create(NULL, &it, Conn_HttpSrv,
-						  data_len);
-	BUG_ON(!resp);
+	hmresp = tfw_http_msg_alloc(Conn_HttpSrv);
+	BUG_ON(!hmresp);
+
+	ret = tfw_http_msg_setup(hmresp, &it, data_len);
+	BUG_ON(ret);
 
 	memset(&conn_resp, 0, sizeof(TfwConnection));
 	tfw_connection_init(&conn_req);
 	conn_resp.proto.type = Conn_HttpSrv;
-	resp->conn = &conn_resp;
+	hmresp->conn = &conn_resp;
 
-	return resp;
+	return (TfwHttpResp *)hmresp;
 }
 
 void

From 6a873a82d4c0c601d4cbdfb6d24b7faf52697edc Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Sat, 7 Jan 2017 19:27:42 +0300
Subject: [PATCH 28/65] Move fwd_list from TfwMsg{} to TfwHttpReq{}.

This member is used only for requests.
---
 tempesta_fw/http.c     | 53 +++++++++++++++++++++---------------------
 tempesta_fw/http.h     |  2 ++
 tempesta_fw/http_msg.c |  2 +-
 tempesta_fw/msg.h      |  4 +---
 4 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 6bd70e87b..3ef34884a 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -451,6 +451,7 @@ tfw_http_conn_on_hold(TfwConnection *srv_conn)
 static inline bool
 tfw_http_conn_drained(TfwConnection *srv_conn)
 {
+	TfwMsg *msg;
 	struct list_head *fwd_queue = &srv_conn->msg_queue;
 
 	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
@@ -459,7 +460,8 @@ tfw_http_conn_drained(TfwConnection *srv_conn)
 		return true;
 	if (!srv_conn->msg_sent)
 		return false;
-	if (srv_conn->msg_sent == list_last_entry(fwd_queue, TfwMsg, fwd_list))
+	msg = (TfwMsg *)list_last_entry(fwd_queue, TfwHttpReq, fwd_list);
+	if (srv_conn->msg_sent == msg)
 		return true;
 	return false;
 }
@@ -485,7 +487,7 @@ tfw_http_req_move2equeue(TfwConnection *srv_conn, TfwHttpReq *req,
 			 struct list_head *equeue, unsigned short status)
 {
 	tfw_http_req_nonidemp_delist(srv_conn, req);
-	list_move_tail(&req->msg.fwd_list, equeue);
+	list_move_tail(&req->fwd_list, equeue);
 	srv_conn->qsize--;
 	req->rstatus = status;
 }
@@ -503,8 +505,8 @@ tfw_http_req_zap_error(struct list_head *equeue)
 	TFW_DBG2("%s: queue is %sempty\n",
 		 __func__, list_empty(err_queue) ? "" : "NOT ");
 
-	list_for_each_entry_safe(req, tmp, equeue, msg.fwd_list) {
-		list_del_init(&req->msg.fwd_list);
+	list_for_each_entry_safe(req, tmp, equeue, fwd_list) {
+		list_del_init(&req->fwd_list);
 		if (req->rstatus == 500)
 			tfw_http_send_500(req);
 		else if (req->rstatus == 504)
@@ -537,10 +539,10 @@ __tfw_http_req_fwd_stalled(TfwConnection *srv_conn, struct list_head *equeue)
 	 * queues that can be processed without the lock.
 	 */
 	req = srv_conn->msg_sent
-	    ? (TfwHttpReq *)list_next_entry(srv_conn->msg_sent, fwd_list)
-	    : (TfwHttpReq *)list_first_entry(fwd_queue, TfwMsg, fwd_list);
+	    ? list_next_entry((TfwHttpReq *)srv_conn->msg_sent, fwd_list)
+	    : list_first_entry(fwd_queue, TfwHttpReq, fwd_list);
 
-	list_for_each_entry_safe_from(req, tmp, fwd_queue, msg.fwd_list) {
+	list_for_each_entry_safe_from(req, tmp, fwd_queue, fwd_list) {
 		unsigned long jtimeout = jiffies - req->jtstamp;
 		if (time_after(jtimeout, srv->qjtimeout)) {
 			TFW_DBG2("%s: Eviction: req=[%p] overdue=[%dms]\n",
@@ -620,7 +622,7 @@ tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
 
 	spin_lock(&srv_conn->msg_qlock);
 	drained = tfw_http_conn_drained(srv_conn);
-	list_add_tail(&req->msg.fwd_list, &srv_conn->msg_queue);
+	list_add_tail(&req->fwd_list, &srv_conn->msg_queue);
 	srv_conn->qsize++;
 	if (tfw_http_req_is_nonidempotent(req))
 		__tfw_http_req_nonidemp_enlist(srv_conn, req);
@@ -639,7 +641,7 @@ tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
 	}
 	if (tfw_connection_send(srv_conn, (TfwMsg *)req)) {
 		tfw_http_req_nonidemp_delist(srv_conn, req);
-		list_del_init(&req->msg.fwd_list);
+		list_del_init(&req->fwd_list);
 		srv_conn->qsize--;
 		spin_unlock(&srv_conn->msg_qlock);
 		TFW_DBG2("%s: Forwarding error: conn=[%p] req=[%p]\n",
@@ -671,13 +673,12 @@ tfw_http_req_fwd_handlenip(TfwConnection *srv_conn)
 	if (req_sent && tfw_http_req_is_nonidempotent(req_sent)
 	    && likely(!(srv->flags & TFW_SRV_RETRY_NON_IDEMP)))
 	{
-		struct list_head *lent = &req_sent->msg.fwd_list;
 		BUG_ON(list_empty(&req_sent->nip_list));
-		srv_conn->msg_sent = (lent == srv_conn->msg_queue.next)
-				   ? NULL
-				   : list_entry(lent->prev, TfwMsg, fwd_list);
+		srv_conn->msg_sent =
+			(&req_sent->fwd_list == srv_conn->msg_queue.next) ?
+			NULL : (TfwMsg *)list_prev_entry(req_sent, fwd_list);
 		__tfw_http_req_nonidemp_delist(srv_conn, req_sent);
-		list_del_init(&req_sent->msg.fwd_list);
+		list_del_init(&req_sent->fwd_list);
 		srv_conn->qsize--;
 		tfw_http_send_404(req_sent);
 		TFW_INC_STAT_BH(clnt.msgs_otherr);
@@ -699,15 +700,15 @@ __tfw_http_req_fwd_resend(TfwConnection *srv_conn,
 	TFW_DBG2("%s: conn=[%p] one_msg=[%s]\n",
 		 __func__, srv_conn, one_msg ? "true" : "false");
 	BUG_ON(!srv_conn->msg_sent);
-	BUG_ON(list_empty(&srv_conn->msg_sent->fwd_list));
+	BUG_ON(list_empty(&((TfwHttpReq *)srv_conn->msg_sent)->fwd_list));
 
-	req = list_first_entry(fwd_queue, TfwHttpReq, msg.fwd_list);
-	end = srv_conn->msg_sent->fwd_list.next;
+	req = list_first_entry(fwd_queue, TfwHttpReq, fwd_list);
+	end = ((TfwHttpReq *)srv_conn->msg_sent)->fwd_list.next;
 
 	/* An equivalent of list_for_each_entry_safe_from() */
-	for (tmp = list_next_entry(req, msg.fwd_list);
-	     &req->msg.fwd_list != end;
-	     req = tmp, tmp = list_next_entry(tmp, msg.fwd_list))
+	for (tmp = list_next_entry(req, fwd_list);
+	     &req->fwd_list != end;
+	     req = tmp, tmp = list_next_entry(tmp, fwd_list))
 	{
 		if (req->retries++ >= srv->retry_max) {
 			TFW_DBG2("%s: Eviction: req=[%p] retries=[%d]\n",
@@ -786,7 +787,7 @@ tfw_http_conn_msg_alloc(TfwConnection *conn)
 
 		spin_lock(&conn->msg_qlock);
 		req = list_first_entry_or_null(&conn->msg_queue,
-					       TfwHttpReq, msg.fwd_list);
+					       TfwHttpReq, fwd_list);
 		spin_unlock(&conn->msg_qlock);
 		if (req && (req->method == TFW_HTTP_METH_HEAD))
 			hm->flags |= TFW_HTTP_VOID_BODY;
@@ -857,9 +858,9 @@ tfw_http_req_fwd_resched(TfwConnection *srv_conn)
 	tfw_http_req_fwd_handlenip(srv_conn);
 
 	/* Process complete queue. */
-	list_for_each_entry_safe(req, tmp, fwd_queue, msg.fwd_list) {
+	list_for_each_entry_safe(req, tmp, fwd_queue, fwd_list) {
 		tfw_http_req_nonidemp_delist(srv_conn, req);
-		list_del_init(&req->msg.fwd_list);
+		list_del_init(&req->fwd_list);
 		srv_conn->qsize--;
 		if (!(sconn = tfw_sched_get_srv_conn((TfwMsg *)req))) {
 			TFW_WARN("Unable to find a backend server\n");
@@ -950,7 +951,7 @@ tfw_http_req_destruct(void *msg)
 	TfwHttpReq *req = msg;
 
 	BUG_ON(!list_empty(&req->msg.seq_list));
-	BUG_ON(!list_empty(&req->msg.fwd_list));
+	BUG_ON(!list_empty(&req->fwd_list));
 	BUG_ON(!list_empty(&req->nip_list));
 
 	if (req->sess)
@@ -1832,8 +1833,8 @@ tfw_http_popreq(TfwHttpMsg *hmresp)
 		TFW_INC_STAT_BH(serv.msgs_otherr);
 		return NULL;
 	}
-	req = list_first_entry(fwd_queue, TfwHttpReq, msg.fwd_list);
-	list_del_init(&req->msg.fwd_list);
+	req = list_first_entry(fwd_queue, TfwHttpReq, fwd_list);
+	list_del_init(&req->fwd_list);
 	srv_conn->qsize--;
 	if ((TfwMsg *)req == srv_conn->msg_sent)
 		srv_conn->msg_sent = NULL;
diff --git a/tempesta_fw/http.h b/tempesta_fw/http.h
index ae5baf20a..952e959d1 100644
--- a/tempesta_fw/http.h
+++ b/tempesta_fw/http.h
@@ -341,6 +341,7 @@ typedef struct {
  * @userinfo	- userinfo in URI, not mandatory.
  * @host	- host in URI, may differ from Host header;
  * @uri_path	- path + query + fragment from URI (RFC3986.3);
+ * @fwd_list	- member in the queue of forwarded/backlogged requests;
  * @nip_list	- member in the queue of non-idempotent requests;
  * @method	- HTTP request method, one of GET/PORT/HEAD/etc;
  * @node	- NUMA node where request is serviced;
@@ -363,6 +364,7 @@ typedef struct {
 	TfwStr			userinfo;
 	TfwStr			host;
 	TfwStr			uri_path;
+	struct list_head	fwd_list;
 	struct list_head	nip_list;
 	unsigned char		method;
 	unsigned short		node;
diff --git a/tempesta_fw/http_msg.c b/tempesta_fw/http_msg.c
index 27c4d8c7a..38601433c 100644
--- a/tempesta_fw/http_msg.c
+++ b/tempesta_fw/http_msg.c
@@ -885,7 +885,6 @@ tfw_http_msg_alloc(int type)
 	hm->h_tbl->off = TFW_HTTP_HDR_RAW;
 	memset(hm->h_tbl->tbl, 0, __HHTBL_SZ(1) * sizeof(TfwStr));
 
-	INIT_LIST_HEAD(&hm->msg.fwd_list);
 	INIT_LIST_HEAD(&hm->msg.seq_list);
 	ss_skb_queue_head_init(&hm->msg.skb_list);
 
@@ -896,6 +895,7 @@ tfw_http_msg_alloc(int type)
 		__hbh_parser_init_resp((TfwHttpResp *)hm);
 
 	if (type & Conn_Clnt) {
+		INIT_LIST_HEAD(&((TfwHttpReq *)hm)->fwd_list);
 		INIT_LIST_HEAD(&((TfwHttpReq *)hm)->nip_list);
 		hm->destructor = tfw_http_req_destruct;
 	}
diff --git a/tempesta_fw/msg.h b/tempesta_fw/msg.h
index c47e8bfb7..4066573d3 100644
--- a/tempesta_fw/msg.h
+++ b/tempesta_fw/msg.h
@@ -30,15 +30,13 @@
 #include "sync_socket.h"
 
 /**
- * @seq_list	- member in the ordered queue of incoming requests;
- * @fwd_list	- member in the queue of forwarded/backlogged requests;
+ * @seq_list	- member in the ordered queue of messages;
  * @skb_list	- list of sk_buff that belong to the message;
  * @ss_flags	- message processing flags;
  * @len		- total message length;
  */
 typedef struct {
 	struct list_head	seq_list;
-	struct list_head	fwd_list;
 	int			ss_flags;
 	SsSkbList		skb_list;
 	size_t			len;

From cc0829c94c81e213ad6ad2a83b055a59d61aad65 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Mon, 9 Jan 2017 15:21:18 +0300
Subject: [PATCH 29/65] Fix an bug where sets of ordered responses may be mixed
 up.

Responses must be sent to a client in a strict sequential order.
Before this fix, sets of ordered responses prepared on different
CPUs could have been sent in different order. Now it's serialized
with a separate lock.

Also, client and server queues are clearly distinguished now by
specific (different) names.
---
 tempesta_fw/connection.c |   6 +-
 tempesta_fw/connection.h |  31 ++++++--
 tempesta_fw/http.c       | 161 ++++++++++++++++++++++-----------------
 tempesta_fw/msg.h        |   8 ++
 tempesta_fw/sock_clnt.c  |   6 +-
 tempesta_fw/sock_srv.c   |   4 +
 6 files changed, 131 insertions(+), 85 deletions(-)

diff --git a/tempesta_fw/connection.c b/tempesta_fw/connection.c
index 573dfe51f..84135c3ef 100644
--- a/tempesta_fw/connection.c
+++ b/tempesta_fw/connection.c
@@ -35,11 +35,7 @@ void
 tfw_connection_init(TfwConnection *conn)
 {
 	memset(conn, 0, sizeof(*conn));
-
 	INIT_LIST_HEAD(&conn->list);
-	INIT_LIST_HEAD(&conn->msg_queue);
-	INIT_LIST_HEAD(&conn->nip_queue);
-	spin_lock_init(&conn->msg_qlock);
 }
 
 void
@@ -88,7 +84,7 @@ tfw_connection_release(TfwConnection *conn)
 	/* Ask higher levels to free resources at connection release. */
 	TFW_CONN_HOOK_CALL(conn, conn_release);
 	BUG_ON((TFW_CONN_TYPE(conn) & Conn_Clnt)
-	       && !list_empty(&conn->msg_queue));
+	       && !list_empty(&conn->seq_queue));
 }
 
 /*
diff --git a/tempesta_fw/connection.h b/tempesta_fw/connection.h
index 4a7d5e16a..0beba5eba 100644
--- a/tempesta_fw/connection.h
+++ b/tempesta_fw/connection.h
@@ -76,9 +76,12 @@ enum {
  * @proto	- protocol handler. Base class, must be first;
  * @state	- connection processing state;
  * @list	- member in the list of connections with @peer;
- * @msg_queue	- queue of messages to be sent over the connection;
- * @nip_queue	- queue of non-idempotent messages in server's @msg_queue;
- * @msg_qlock	- lock for accessing @msg_queue;
+ * @fwd_queue	- queue of messages to be sent to a back-end server;
+ * @nip_queue	- queue of non-idempotent messages in server's @fwd_queue;
+ * @seq_queue	- queue of client's messages in the order they came;
+ * @fwd_qlock	- lock for accessing @fwd_queue and @nip_queue;
+ * @seq_qlock	- lock for accessing @seq_queue;
+ * @ret_qlock	- lock for accessing @ret_queue;
  * @flags	- atomic flags related to server connection's state;
  * @refcnt	- number of users of the connection structure instance;
  * @qsize	- current number of requests in server's @msg_queue;
@@ -95,9 +98,16 @@ typedef struct tfw_connection_t {
 	SsProto			proto;
 	TfwGState		state;
 	struct list_head	list;
-	struct list_head	msg_queue;
-	struct list_head	nip_queue;				/*srv*/
-	spinlock_t		msg_qlock;
+	struct list_head	fwd_queue;				/*srv*/
+	union {
+		struct list_head	nip_queue;			/*srv*/
+		struct list_head	seq_queue;			/*cli*/
+	};
+	union {
+		spinlock_t		fwd_qlock;			/*srv*/
+		spinlock_t		seq_qlock;			/*cli*/
+	};
+	spinlock_t		ret_qlock;				/*cli*/
 	unsigned long		flags;					/*srv*/
 	atomic_t		refcnt;
 	int			qsize;					/*srv*/
@@ -185,7 +195,14 @@ extern TfwConnHooks *conn_hooks[TFW_CONN_MAX_PROTOS];
 
 /*
  * Tell if a connection is restricted. When restricted, a connection
- * cannot be scheduled.
+ * is not available to schedulers.
+ *
+ * The flag RESEND is set when a newly established server connection
+ * has messages in the forwarding queue. That means that the connection
+ * had been closed prematurely, and the messages in the queue need to
+ * be re-sent to a back-end server. The new connection is not available
+ * to schedulers (restricted) until all messages in the forwarding queue
+ * are re-sent.
  */
 static inline bool
 tfw_connection_restricted(TfwConnection *conn)
diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 3ef34884a..b6ae93b76 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -452,7 +452,7 @@ static inline bool
 tfw_http_conn_drained(TfwConnection *srv_conn)
 {
 	TfwMsg *msg;
-	struct list_head *fwd_queue = &srv_conn->msg_queue;
+	struct list_head *fwd_queue = &srv_conn->fwd_queue;
 
 	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
 
@@ -520,14 +520,14 @@ tfw_http_req_zap_error(struct list_head *equeue)
 /*
  * Forward requests in the server connection @srv_conn. The requests
  * are forwarded until a non-idempotent request is found in the queue.
- * Must be called with a lock on the server connection's @msg_queue.
+ * Must be called with a lock on the server connection's @fwd_queue.
  */
 static void
 __tfw_http_req_fwd_stalled(TfwConnection *srv_conn, struct list_head *equeue)
 {
 	TfwHttpReq *req, *tmp;
 	TfwServer *srv = (TfwServer *)srv_conn->peer;
-	struct list_head *fwd_queue = &srv_conn->msg_queue;
+	struct list_head *fwd_queue = &srv_conn->fwd_queue;
 
 	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
 
@@ -588,11 +588,11 @@ tfw_http_req_fwd_stalled(TfwConnection *srv_conn)
 	LIST_HEAD(equeue);
 
 	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
-	WARN_ON(!spin_is_locked(&srv_conn->msg_qlock));
-	BUG_ON(list_empty(&srv_conn->msg_queue));
+	WARN_ON(!spin_is_locked(&srv_conn->fwd_qlock));
+	BUG_ON(list_empty(&srv_conn->fwd_queue));
 
 	__tfw_http_req_fwd_stalled(srv_conn, &equeue);
-	spin_unlock(&srv_conn->msg_qlock);
+	spin_unlock(&srv_conn->fwd_qlock);
 
 	if (!list_empty(&equeue))
 		tfw_http_req_zap_error(&equeue);
@@ -620,14 +620,14 @@ tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
 	TFW_DBG2("%s: srv_conn=[%p], req=[%p]\n", __func__, srv_conn, req);
 	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
 
-	spin_lock(&srv_conn->msg_qlock);
+	spin_lock(&srv_conn->fwd_qlock);
 	drained = tfw_http_conn_drained(srv_conn);
-	list_add_tail(&req->fwd_list, &srv_conn->msg_queue);
+	list_add_tail(&req->fwd_list, &srv_conn->fwd_queue);
 	srv_conn->qsize++;
 	if (tfw_http_req_is_nonidempotent(req))
 		__tfw_http_req_nonidemp_enlist(srv_conn, req);
 	if (tfw_http_conn_on_hold(srv_conn)) {
-		spin_unlock(&srv_conn->msg_qlock);
+		spin_unlock(&srv_conn->fwd_qlock);
 		TFW_DBG2("%s: Server connection is on hold: conn=[%p]\n",
 			 __func__, srv_conn);
 		return;
@@ -643,7 +643,7 @@ tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
 		tfw_http_req_nonidemp_delist(srv_conn, req);
 		list_del_init(&req->fwd_list);
 		srv_conn->qsize--;
-		spin_unlock(&srv_conn->msg_qlock);
+		spin_unlock(&srv_conn->fwd_qlock);
 		TFW_DBG2("%s: Forwarding error: conn=[%p] req=[%p]\n",
 			 __func__, srv_conn, req);
 		tfw_http_send_500(req);
@@ -651,7 +651,7 @@ tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
 		return;
 	}
 	srv_conn->msg_sent = (TfwMsg *)req;
-	spin_unlock(&srv_conn->msg_qlock);
+	spin_unlock(&srv_conn->fwd_qlock);
 }
 
 /*
@@ -675,7 +675,7 @@ tfw_http_req_fwd_handlenip(TfwConnection *srv_conn)
 	{
 		BUG_ON(list_empty(&req_sent->nip_list));
 		srv_conn->msg_sent =
-			(&req_sent->fwd_list == srv_conn->msg_queue.next) ?
+			(&req_sent->fwd_list == srv_conn->fwd_queue.next) ?
 			NULL : (TfwMsg *)list_prev_entry(req_sent, fwd_list);
 		__tfw_http_req_nonidemp_delist(srv_conn, req_sent);
 		list_del_init(&req_sent->fwd_list);
@@ -695,7 +695,7 @@ __tfw_http_req_fwd_resend(TfwConnection *srv_conn,
 {
 	TfwHttpReq *req, *tmp;
 	TfwServer *srv = (TfwServer *)srv_conn->peer;
-	struct list_head *end, *fwd_queue = &srv_conn->msg_queue;
+	struct list_head *end, *fwd_queue = &srv_conn->fwd_queue;
 
 	TFW_DBG2("%s: conn=[%p] one_msg=[%s]\n",
 		 __func__, srv_conn, one_msg ? "true" : "false");
@@ -739,10 +739,10 @@ tfw_http_req_fwd_repair(TfwConnection *srv_conn)
 	LIST_HEAD(equeue);
 
 	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
-	WARN_ON(!spin_is_locked(&srv_conn->msg_qlock));
+	WARN_ON(!spin_is_locked(&srv_conn->fwd_qlock));
 	BUG_ON(!(srv_conn->flags & (TFW_CONN_B_QFORWD | TFW_CONN_B_RESEND)));
 
-	if (list_empty(&srv_conn->msg_queue)) {
+	if (list_empty(&srv_conn->fwd_queue)) {
 		clear_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
 		clear_bit(TFW_CONN_B_RESEND, &srv_conn->flags);
 	} else if (test_bit(TFW_CONN_B_QFORWD, &srv_conn->flags)) {
@@ -759,7 +759,7 @@ tfw_http_req_fwd_repair(TfwConnection *srv_conn)
 		if (tfw_http_conn_need_fwd(srv_conn))
 			__tfw_http_req_fwd_stalled(srv_conn, &equeue);
 	}
-	spin_unlock(&srv_conn->msg_qlock);
+	spin_unlock(&srv_conn->fwd_qlock);
 
 	if (!list_empty(&equeue))
 		tfw_http_req_zap_error(&equeue);
@@ -785,10 +785,10 @@ tfw_http_conn_msg_alloc(TfwConnection *conn)
 	} else {
 		TfwHttpReq *req;
 
-		spin_lock(&conn->msg_qlock);
-		req = list_first_entry_or_null(&conn->msg_queue,
+		spin_lock(&conn->fwd_qlock);
+		req = list_first_entry_or_null(&conn->fwd_queue,
 					       TfwHttpReq, fwd_list);
-		spin_unlock(&conn->msg_qlock);
+		spin_unlock(&conn->fwd_qlock);
 		if (req && (req->method == TFW_HTTP_METH_HEAD))
 			hm->flags |= TFW_HTTP_VOID_BODY;
 		TFW_INC_STAT_BH(serv.rx_messages);
@@ -850,7 +850,7 @@ tfw_http_req_fwd_resched(TfwConnection *srv_conn)
 {
 	TfwHttpReq *req, *tmp;
 	TfwConnection *sconn;
-	struct list_head *fwd_queue = &srv_conn->msg_queue;
+	struct list_head *fwd_queue = &srv_conn->fwd_queue;
 
 	TFW_DBG2("%s: conn=[%p]\n", __func__, conn);
 
@@ -904,7 +904,7 @@ tfw_http_conn_repair(TfwConnection *srv_conn)
 		tfw_http_req_fwd_resched(srv_conn);
 		return;
 	}
-	spin_lock(&srv_conn->msg_qlock);
+	spin_lock(&srv_conn->fwd_qlock);
 	/* Handle non-idempotent requests. */
 	tfw_http_req_fwd_handlenip(srv_conn);
 	/* Re-send the first unanswered request. */
@@ -920,7 +920,7 @@ tfw_http_conn_repair(TfwConnection *srv_conn)
 		if (tfw_http_conn_need_fwd(srv_conn))
 			__tfw_http_req_fwd_stalled(srv_conn, &equeue);
 	}
-	spin_unlock(&srv_conn->msg_qlock);
+	spin_unlock(&srv_conn->fwd_qlock);
 
 	if (!list_empty(&equeue))
 		tfw_http_req_zap_error(&equeue);
@@ -938,7 +938,7 @@ tfw_http_conn_init(TfwConnection *conn)
 	TFW_DBG2("%s: conn=[%p]\n", __func__, conn);
 
 	if (TFW_CONN_TYPE(conn) & Conn_Srv) {
-		if (!list_empty(&conn->msg_queue))
+		if (!list_empty(&conn->fwd_queue))
 			set_bit(TFW_CONN_B_RESEND, &conn->flags);
 	}
 	tfw_gfsm_state_init(&conn->state, conn, TFW_HTTP_FSM_INIT);
@@ -996,7 +996,7 @@ static void
 tfw_http_conn_cli_drop(TfwConnection *cli_conn)
 {
 	TfwHttpMsg *hmreq, *tmp;
-	struct list_head *seq_queue = &cli_conn->msg_queue;
+	struct list_head *seq_queue = &cli_conn->seq_queue;
 	LIST_HEAD(zap_queue);
 
 	TFW_DBG2("%s: conn=[%p]\n", __func__, cli_conn);
@@ -1005,9 +1005,9 @@ tfw_http_conn_cli_drop(TfwConnection *cli_conn)
 	if (list_empty_careful(seq_queue))
 		return;
 
-	spin_lock(&cli_conn->msg_qlock);
+	spin_lock(&cli_conn->seq_qlock);
 	list_splice_tail_init(seq_queue, &zap_queue);
-	spin_unlock(&cli_conn->msg_qlock);
+	spin_unlock(&cli_conn->seq_qlock);
 
 	list_for_each_entry_safe(hmreq, tmp, &zap_queue, msg.seq_list)
 		list_del_init(&hmreq->msg.seq_list);
@@ -1309,49 +1309,17 @@ tfw_http_adjust_resp(TfwHttpResp *resp, TfwHttpReq *req)
 /*
  * Forward responses to the client in the correct order.
  */
-void
-tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
+static void
+__tfw_http_resp_fwd(TfwConnection *cli_conn, struct list_head *ret_queue)
 {
-	TfwHttpReq *tmp;
-	TfwConnection *cli_conn = req->conn;
-	struct list_head *seq_queue = &cli_conn->msg_queue;
-	LIST_HEAD(out_queue);
-
-	TFW_DBG2("%s: req=[%p], resp=[%p]\n", __func__, req, resp);
-
-	/*
-	 * Starting with the first request on the list, pick consecutive
-	 * requests that have a paired response. Remove those requests
-	 * from the list, and put them on the list of outgoing responses.
-	 *
-	 * However, if the list is empty, then then it's either a bug,
-	 * or the client connection had been closed. If it's a bug, then
-	 * the correct order of responses to requests may be broken. The
-	 * client connection needs to be closed.
-	 */
-	spin_lock(&cli_conn->msg_qlock);
-	if (list_empty(seq_queue)) {
-		spin_unlock(&cli_conn->msg_qlock);
-		TFW_DBG2("%s: The client's request missing: conn=[%p]\n",
-			 __func__, cli_conn);
-		ss_close_sync(cli_conn->sk, true);
-		tfw_http_conn_msg_free((TfwHttpMsg *)resp);
-		tfw_http_conn_msg_free((TfwHttpMsg *)req);
-		return;
-	}
-	req->resp = (TfwHttpMsg *)resp;
-	do {
-		req = list_first_entry(seq_queue, TfwHttpReq, msg.seq_list);
-		if (req->resp == NULL)
-			break;
-		list_move_tail(&req->msg.seq_list, &out_queue);
-	} while (!list_empty(seq_queue));
-	spin_unlock(&cli_conn->msg_qlock);
+	TfwHttpReq *req, *tmp;
+	TfwHttpResp *resp;
 
 	/* Forward responses to the client. */
-	list_for_each_entry_safe(req, tmp, &out_queue, msg.seq_list) {
+	list_for_each_entry_safe(req, tmp, ret_queue, msg.seq_list) {
 		list_del_init(&req->msg.seq_list);
 		resp = (TfwHttpResp *)req->resp;
+		BUG_ON(!resp);
 		/*
 		 * If the client connection is dead, then discard all
 		 * @req and @resp in the @out_queue. Remaining requests
@@ -1378,6 +1346,55 @@ tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
 	}
 }
 
+void
+tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
+{
+	TfwConnection *cli_conn = req->conn;
+	struct list_head *seq_queue = &cli_conn->seq_queue;
+	struct list_head *req_retent = NULL;
+	LIST_HEAD(ret_queue);
+
+	TFW_DBG2("%s: req=[%p], resp=[%p]\n", __func__, req, resp);
+
+	/*
+	 * Starting with the first request on the list, pick consecutive
+	 * requests that have a paired response. Remove those requests
+	 * from the list, and put them on the list of outgoing responses.
+	 * Take care of concurrent calls to this function from different
+	 * CPUs, all going for the same client connection.
+	 *
+	 * If the list is empty, then it's either a bug, or the client
+	 * connection had been closed. If it's a bug, then the correct
+	 * order of responses to requests may be broken. The client
+	 * connection needs to be closed.
+	 */
+	spin_lock(&cli_conn->seq_qlock);
+	if (list_empty(seq_queue)) {
+		spin_unlock(&cli_conn->seq_qlock);
+		TFW_DBG2("%s: The client's request missing: conn=[%p]\n",
+			 __func__, cli_conn);
+		ss_close_sync(cli_conn->sk, true);
+		tfw_http_conn_msg_free((TfwHttpMsg *)resp);
+		tfw_http_conn_msg_free((TfwHttpMsg *)req);
+		return;
+	}
+	req->resp = (TfwHttpMsg *)resp;
+	list_for_each_entry(req, seq_queue, msg.seq_list) {
+		if (req->resp == NULL)
+			break;
+		req_retent = &req->msg.seq_list;
+	}
+	if (!req_retent) {
+		spin_unlock(&cli_conn->seq_qlock);
+		return;
+	}
+	__list_cut_position(&ret_queue, seq_queue, req_retent);
+	spin_lock(&cli_conn->ret_qlock);
+	spin_unlock(&cli_conn->seq_qlock);
+	__tfw_http_resp_fwd(cli_conn, &ret_queue);
+	spin_unlock(&cli_conn->ret_qlock);
+}
+
 /**
  * The request is served from cache.
  * Send the response as is and unrefer its data.
@@ -1521,18 +1538,18 @@ tfw_http_req_add_seq_queue(TfwHttpReq *req)
 {
 	TfwHttpReq *preq;
 	TfwConnection *cli_conn = req->conn;
-	struct list_head *seq_queue = &cli_conn->msg_queue;
+	struct list_head *seq_queue = &cli_conn->seq_queue;
 
 	tfw_http_req_mark_nonidempotent(req);
 
-	spin_lock(&cli_conn->msg_qlock);
+	spin_lock(&cli_conn->seq_qlock);
 	preq = !list_empty(seq_queue)
 	     ? list_last_entry(seq_queue, TfwHttpReq, msg.seq_list)
 	     : NULL;
 	if (preq && (preq->flags & TFW_HTTP_NON_IDEMP))
 		preq->flags &= ~TFW_HTTP_NON_IDEMP;
 	list_add_tail(&req->msg.seq_list, seq_queue);
-	spin_unlock(&cli_conn->msg_qlock);
+	spin_unlock(&cli_conn->seq_qlock);
 }
 
 static int
@@ -1809,7 +1826,7 @@ tfw_http_resp_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)
 
 /*
  * Request messages that were forwarded to a backend server are added
- * to and kept in @msg_queue of the connection @conn for that server.
+ * to and kept in @fwd_queue of the connection @conn for that server.
  * If a paired request is not found, then the response is deleted.
  *
  * If a paired client request is missing, then it seems upsream server is
@@ -1820,12 +1837,12 @@ tfw_http_popreq(TfwHttpMsg *hmresp)
 {
 	TfwHttpReq *req;
 	TfwConnection *srv_conn = hmresp->conn;
-	struct list_head *fwd_queue = &srv_conn->msg_queue;
+	struct list_head *fwd_queue = &srv_conn->fwd_queue;
 
-	spin_lock(&srv_conn->msg_qlock);
+	spin_lock(&srv_conn->fwd_qlock);
 	if (unlikely(list_empty(fwd_queue))) {
 		BUG_ON(srv_conn->qsize);
-		spin_unlock(&srv_conn->msg_qlock);
+		spin_unlock(&srv_conn->fwd_qlock);
 		/* @conn->msg will get NULLed in the process. */
 		TFW_WARN("Paired request missing, "
 			 "HTTP Response Splitting attack?\n");
@@ -1851,7 +1868,7 @@ tfw_http_popreq(TfwHttpMsg *hmresp)
 	else if (tfw_http_conn_need_fwd(srv_conn))
 		tfw_http_req_fwd_stalled(srv_conn);
 	else
-		spin_unlock(&srv_conn->msg_qlock);
+		spin_unlock(&srv_conn->fwd_qlock);
 
 	return req;
 }
diff --git a/tempesta_fw/msg.h b/tempesta_fw/msg.h
index 4066573d3..ccde0f90a 100644
--- a/tempesta_fw/msg.h
+++ b/tempesta_fw/msg.h
@@ -34,6 +34,14 @@
  * @skb_list	- list of sk_buff that belong to the message;
  * @ss_flags	- message processing flags;
  * @len		- total message length;
+ *
+ * TODO: Currently seq_list is used only in requests. Responses are not
+ * put in any queues, they are simply attached to requests as req->resp.
+ * However, a queue for responses may also be needed to mitigate sending
+ * of responses and improve the distribution of load in Tempesta. Please
+ * refer to issues #391 and #488.
+ * After these issues are resolved, it may well be that seq_list is more
+ * suitable to stay in TfwHttpReq{} rather than here in TfwMsg{}.
  */
 typedef struct {
 	struct list_head	seq_list;
diff --git a/tempesta_fw/sock_clnt.c b/tempesta_fw/sock_clnt.c
index 7c184b3d7..3df8bd5c9 100644
--- a/tempesta_fw/sock_clnt.c
+++ b/tempesta_fw/sock_clnt.c
@@ -73,6 +73,10 @@ tfw_cli_conn_alloc(int type)
 		return NULL;
 
 	tfw_connection_init(conn);
+	INIT_LIST_HEAD(&conn->seq_queue);
+	spin_lock_init(&conn->seq_qlock);
+	spin_lock_init(&conn->ret_qlock);
+
 	setup_timer(&conn->timer,
 		    tfw_sock_cli_keepalive_timer_cb,
 		    (unsigned long)conn);
@@ -87,7 +91,7 @@ tfw_cli_conn_free(TfwConnection *conn)
 
 	/* Check that all nested resources are freed. */
 	tfw_connection_validate_cleanup(conn);
-	BUG_ON(!list_empty(&conn->msg_queue));
+	BUG_ON(!list_empty(&conn->seq_queue));
 
 	kmem_cache_free(tfw_cli_cache(TFW_CONN_TYPE(conn)), conn);
 }
diff --git a/tempesta_fw/sock_srv.c b/tempesta_fw/sock_srv.c
index c672ffd2f..e52ac49a4 100644
--- a/tempesta_fw/sock_srv.c
+++ b/tempesta_fw/sock_srv.c
@@ -500,6 +500,10 @@ tfw_srv_conn_alloc(void)
 		return NULL;
 
 	tfw_connection_init(&srv_conn->conn);
+	INIT_LIST_HEAD(&srv_conn->conn.fwd_queue);
+	INIT_LIST_HEAD(&srv_conn->conn.nip_queue);
+	spin_lock_init(&srv_conn->conn.fwd_qlock);
+
 	__setup_retry_timer(srv_conn);
 	ss_proto_init(&srv_conn->conn.proto,
 		      &tfw_sock_srv_ss_hooks, Conn_HttpSrv);

From c19388d5a44b98e9f733f27cd7949cdaaaf85a72 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Wed, 11 Jan 2017 13:54:12 +0300
Subject: [PATCH 30/65] Eliminate TfwConnection{}->msg_resent.

---
 tempesta_fw/connection.h              |   3 +-
 tempesta_fw/http.c                    | 159 +++++++++++++++++---------
 tempesta_fw/t/unit/test_http_sticky.c |   4 +-
 3 files changed, 108 insertions(+), 58 deletions(-)

diff --git a/tempesta_fw/connection.h b/tempesta_fw/connection.h
index 0beba5eba..d286c6d11 100644
--- a/tempesta_fw/connection.h
+++ b/tempesta_fw/connection.h
@@ -114,7 +114,6 @@ typedef struct tfw_connection_t {
 	struct timer_list	timer;
 	TfwMsg			*msg;
 	TfwMsg			*msg_sent;				/*srv*/
-	TfwMsg			*msg_resent;				/*srv*/
 	TfwPeer 		*peer;
 	struct sock		*sk;
 	void			(*destructor)(void *);
@@ -232,7 +231,7 @@ tfw_connection_get(TfwConnection *conn)
 }
 
 /**
- * Increment reference counter and return true if @conn isi not in
+ * Increment reference counter and return true if @conn is not in
  * failovering process, i.e. @refcnt wasn't less or equal to zero.
  */
 static inline bool
diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index b6ae93b76..e9dee1ef9 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -517,6 +517,54 @@ tfw_http_req_zap_error(struct list_head *equeue)
 	}
 }
 
+static inline bool
+__tfw_http_req_evict_timeout(TfwConnection *srv_conn, TfwServer *srv,
+			     TfwHttpReq *req, struct list_head *equeue)
+{
+	unsigned long jtimeout = jiffies - req->jtstamp;
+
+	if (unlikely(time_after(jtimeout, srv->qjtimeout))) {
+		TFW_DBG2("%s: Eviction: req=[%p] overdue=[%dms]\n",
+			 __func__, req,
+			jiffies_to_msecs(jtimeout - srv->qjtimeout));
+		tfw_http_req_move2equeue(srv_conn, req, equeue, 504);
+		return true;
+	}
+	return false;
+}
+
+static inline bool
+__tfw_http_req_evict_retries(TfwConnection *srv_conn, TfwServer *srv,
+			     TfwHttpReq *req, struct list_head *equeue)
+{
+	if (unlikely(req->retries++ >= srv->retry_max)) {
+		TFW_DBG2("%s: Eviction: req=[%p] retries=[%d]\n",
+			 __func__, req, req->retries);
+		tfw_http_req_move2equeue(srv_conn, req, equeue, 504);
+		return true;
+	}
+	return false;
+}
+
+static inline bool
+__tfw_http_req_fwd_send(TfwConnection *srv_conn, TfwServer *srv,
+			TfwHttpReq *req, struct list_head *equeue)
+{
+	/*
+	 * If unable to send to the server connection due to an error,
+	 * then move the request to @err_queue for sending a 500 error
+	 * response later. That is safe as the response will be sent
+	 * in proper seq order.
+	 */
+	if (tfw_connection_send(srv_conn, (TfwMsg *)req)) {
+		TFW_DBG2("%s: Forwarding error: conn=[%p] req=[%p]\n",
+			 __func__, srv_conn, req);
+		tfw_http_req_move2equeue(srv_conn, req, equeue, 500);
+		return false;
+	}
+	return true;
+}
+
 /*
  * Forward requests in the server connection @srv_conn. The requests
  * are forwarded until a non-idempotent request is found in the queue.
@@ -543,26 +591,10 @@ __tfw_http_req_fwd_stalled(TfwConnection *srv_conn, struct list_head *equeue)
 	    : list_first_entry(fwd_queue, TfwHttpReq, fwd_list);
 
 	list_for_each_entry_safe_from(req, tmp, fwd_queue, fwd_list) {
-		unsigned long jtimeout = jiffies - req->jtstamp;
-		if (time_after(jtimeout, srv->qjtimeout)) {
-			TFW_DBG2("%s: Eviction: req=[%p] overdue=[%dms]\n",
-				 __func__, req,
-				jiffies_to_msecs(jtimeout - srv->qjtimeout));
-			tfw_http_req_move2equeue(srv_conn, req, equeue, 504);
+		if (__tfw_http_req_evict_timeout(srv_conn, srv, req, equeue))
 			continue;
-		}
-		/*
-		 * If unable to send to the server connection due to
-		 * an error, then move the request to @err_queue for
-		 * sending a 500 error response later. That is safe
-		 * as the response will be sent in proper seq order.
-		 */
-		if (tfw_connection_send(srv_conn, (TfwMsg *)req)) {
-			TFW_DBG2("%s: Forwarding error: conn=[%p] req=[%p]\n",
-				 __func__, srv_conn, req);
-			tfw_http_req_move2equeue(srv_conn, req, equeue, 500);
+		if (!__tfw_http_req_fwd_send(srv_conn, srv, req, equeue))
 			continue;
-		}
 		srv_conn->msg_sent = (TfwMsg *)req;
 		/* Stop sending if the request is non-idempotent. */
 		if (tfw_http_req_is_nonidempotent(req)) {
@@ -685,15 +717,28 @@ tfw_http_req_fwd_handlenip(TfwConnection *srv_conn)
 	}
 }
 
+static inline bool
+__tfw_http_req_resend_one(TfwConnection *srv_conn, TfwServer *srv,
+			      TfwHttpReq *req, struct list_head *equeue)
+{
+	if (__tfw_http_req_evict_timeout(srv_conn, srv, req, equeue))
+		return false;
+	if (__tfw_http_req_evict_retries(srv_conn, srv, req, equeue))
+		return false;
+	if (!__tfw_http_req_fwd_send(srv_conn, srv, req, equeue))
+		return false;
+	return true;
+}
+
 /*
  * Re-forward requests in a server connection. Requests that exceed
  * the set limits are evicted.
  */
-static void
-__tfw_http_req_fwd_resend(TfwConnection *srv_conn,
-			  bool one_msg, struct list_head *equeue)
+static TfwMsg *
+__tfw_http_req_resend(TfwConnection *srv_conn,
+		      bool first, struct list_head *equeue)
 {
-	TfwHttpReq *req, *tmp;
+	TfwHttpReq *req, *tmp, *req_resent = NULL;
 	TfwServer *srv = (TfwServer *)srv_conn->peer;
 	struct list_head *end, *fwd_queue = &srv_conn->fwd_queue;
 
@@ -705,27 +750,31 @@ __tfw_http_req_fwd_resend(TfwConnection *srv_conn,
 	req = list_first_entry(fwd_queue, TfwHttpReq, fwd_list);
 	end = ((TfwHttpReq *)srv_conn->msg_sent)->fwd_list.next;
 
-	/* An equivalent of list_for_each_entry_safe_from() */
+	/* Similar to list_for_each_entry_safe() */
 	for (tmp = list_next_entry(req, fwd_list);
 	     &req->fwd_list != end;
 	     req = tmp, tmp = list_next_entry(tmp, fwd_list))
 	{
-		if (req->retries++ >= srv->retry_max) {
-			TFW_DBG2("%s: Eviction: req=[%p] retries=[%d]\n",
-				 __func__, req, req->retries);
-			tfw_http_req_move2equeue(srv_conn, req, equeue, 504);
-			continue;
-		}
-		if (tfw_connection_send(srv_conn, (TfwMsg *)req)) {
-			TFW_DBG2("%s: Forwarding error: conn=[%p] req=[%p]\n",
-				 __func__, srv_conn, req);
-			tfw_http_req_move2equeue(srv_conn, req, equeue, 500);
-			continue;
+		if (__tfw_http_req_resend_one(srv_conn, srv, req, equeue)) {
+			req_resent = req;
+			if (unlikely(first))
+				break;
 		}
-		srv_conn->msg_resent = (TfwMsg *)req;
-		if (unlikely(one_msg))
-			break;
 	}
+
+	return (TfwMsg *)req_resent;
+}
+
+static inline TfwMsg *
+__tfw_http_req_resend_first(TfwConnection *srv_conn, struct list_head *equeue)
+{
+	return __tfw_http_req_resend(srv_conn, true, equeue);
+}
+
+static inline TfwMsg *
+__tfw_http_req_resend_all(TfwConnection *srv_conn, struct list_head *equeue)
+{
+	return __tfw_http_req_resend(srv_conn, false, equeue);
 }
 
 /*
@@ -749,12 +798,9 @@ tfw_http_req_fwd_repair(TfwConnection *srv_conn)
 		if (tfw_http_conn_need_fwd(srv_conn))
 			__tfw_http_req_fwd_stalled(srv_conn, &equeue);
 	} else {
-		srv_conn->msg_resent = NULL;
-		if (srv_conn->msg_sent) {
-			__tfw_http_req_fwd_resend(srv_conn, false, &equeue);
-			if (srv_conn->msg_resent != srv_conn->msg_sent)
-				srv_conn->msg_sent = srv_conn->msg_resent;
-		}
+		if (srv_conn->msg_sent)
+			srv_conn->msg_sent =
+				__tfw_http_req_resend_all(srv_conn, &equeue);
 		set_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
 		if (tfw_http_conn_need_fwd(srv_conn))
 			__tfw_http_req_fwd_stalled(srv_conn, &equeue);
@@ -840,13 +886,18 @@ tfw_http_conn_msg_free(TfwHttpMsg *hm)
  * Non-idempotent requests may be rescheduled depending on the option
  * in configuration.
  *
- * FIXME: It appears that a re-scheduled request should be put in a
- * new server connection's queue according to its original timestamp.
- * It may matter as old requests are evicted. However, that is time
- * consuming. For now just put them at the end of the queue.
+ * Note: re-scheduled requests are put at the tail of a new server's
+ * connection queue, and NOT according to their original timestamps.
+ * That's the indended behaviour. There requests are unlucky already.
+ * They had been delayed by the waiting in their original server
+ * connections, and then by the re-scheduling procedure itself. Now
+ * they have much greater chance to be evicted when it's their turn
+ * to be forwarded. The main effort is put into servicing requests
+ * that are on time. Unlucky requests are just given another chance
+ * with minimal effort.
  */
 static void
-tfw_http_req_fwd_resched(TfwConnection *srv_conn)
+tfw_http_req_resched(TfwConnection *srv_conn)
 {
 	TfwHttpReq *req, *tmp;
 	TfwConnection *sconn;
@@ -893,6 +944,7 @@ tfw_http_req_fwd_resched(TfwConnection *srv_conn)
 static void
 tfw_http_conn_repair(TfwConnection *srv_conn)
 {
+	TfwMsg *msg_resent = NULL;
 	LIST_HEAD(equeue);
 
 	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
@@ -901,21 +953,20 @@ tfw_http_conn_repair(TfwConnection *srv_conn)
 
 	/* See if requests need to be rescheduled. */
 	if (unlikely(!tfw_connection_live(srv_conn))) {
-		tfw_http_req_fwd_resched(srv_conn);
+		tfw_http_req_resched(srv_conn);
 		return;
 	}
 	spin_lock(&srv_conn->fwd_qlock);
 	/* Handle non-idempotent requests. */
 	tfw_http_req_fwd_handlenip(srv_conn);
 	/* Re-send the first unanswered request. */
-	srv_conn->msg_resent = NULL;
 	if (srv_conn->msg_sent) {
-		__tfw_http_req_fwd_resend(srv_conn, true, &equeue);
-		if (!srv_conn->msg_resent)
+		msg_resent = __tfw_http_req_resend_first(srv_conn, &equeue);
+		if (unlikely(!msg_resent))
 			srv_conn->msg_sent = NULL;
 	}
-	/* Send the remaining unsent requests. */
-	if (!srv_conn->msg_resent) {
+	/* If none resent, then send the remaining unsent requests. */
+	if (!msg_resent) {
 		set_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
 		if (tfw_http_conn_need_fwd(srv_conn))
 			__tfw_http_req_fwd_stalled(srv_conn, &equeue);
diff --git a/tempesta_fw/t/unit/test_http_sticky.c b/tempesta_fw/t/unit/test_http_sticky.c
index fe5d621b6..eb3ae7b0c 100644
--- a/tempesta_fw/t/unit/test_http_sticky.c
+++ b/tempesta_fw/t/unit/test_http_sticky.c
@@ -81,8 +81,8 @@ static struct {
 
 	TfwHttpReq	*req;
 	TfwHttpResp	*resp;
-	TfwConnection   conn_req;
-	TfwConnection   conn_resp;
+	TfwConnection	conn_req;
+	TfwConnection	conn_resp;
 	TfwClient	client;
 	struct sock	sock;
 } mock;

From 5470e37d316ceb61588d173de2a991886ad5761a Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Wed, 11 Jan 2017 14:44:13 +0300
Subject: [PATCH 31/65] Hold the client connection while sending responses.

A client may close the connection after receiving the last response.
The connection closing may be serviced faster by Tempesta, and the
response sending function may access the connection data (especially
spin_unlock()) after the connection has been closed and destroyed.
The correct solution is to hold the connection until we're completely
done with it.
---
 tempesta_fw/http.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index e9dee1ef9..0ab12866a 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -1440,10 +1440,12 @@ tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
 		return;
 	}
 	__list_cut_position(&ret_queue, seq_queue, req_retent);
+	tfw_connection_get(cli_conn);
 	spin_lock(&cli_conn->ret_qlock);
 	spin_unlock(&cli_conn->seq_qlock);
 	__tfw_http_resp_fwd(cli_conn, &ret_queue);
 	spin_unlock(&cli_conn->ret_qlock);
+	tfw_connection_put(cli_conn);
 }
 
 /**

From cb8760f0f7c5730ca1d68153b7592ed0058a4997 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Thu, 12 Jan 2017 15:44:25 +0300
Subject: [PATCH 32/65] Fix handling of non-idempotent requests in case of
 connection repair.

Also, lots of code unification, better function names, and lots of
comments.
---
 tempesta_fw/connection.h |   4 +-
 tempesta_fw/http.c       | 299 +++++++++++++++++++++------------------
 2 files changed, 163 insertions(+), 140 deletions(-)

diff --git a/tempesta_fw/connection.h b/tempesta_fw/connection.h
index d286c6d11..d6375c824 100644
--- a/tempesta_fw/connection.h
+++ b/tempesta_fw/connection.h
@@ -87,7 +87,7 @@ enum {
  * @qsize	- current number of requests in server's @msg_queue;
  * @timer	- The keep-alive/retry timer for the connection;
  * @msg		- message that is currently being processed;
- * @msg_sent	- message that was sent last in a server connection;
+ * @req_sent	- request that was sent last in a server connection;
  * @msg_resent	- message that was re-sent last in a server connection;
  * @peer	- TfwClient or TfwServer handler;
  * @sk		- an appropriate sock handler;
@@ -113,7 +113,7 @@ typedef struct tfw_connection_t {
 	int			qsize;					/*srv*/
 	struct timer_list	timer;
 	TfwMsg			*msg;
-	TfwMsg			*msg_sent;				/*srv*/
+	TfwMsg			*req_sent;				/*srv*/
 	TfwPeer 		*peer;
 	struct sock		*sk;
 	void			(*destructor)(void *);
diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 0ab12866a..59b2db7de 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -374,29 +374,37 @@ tfw_http_send_504(TfwHttpReq *req)
 	return tfw_http_send_resp(req, &rh, __TFW_STR_CH(&rh, 1));
 }
 
+/*
+ * Check if a request is non-idempotent.
+ */
 static inline bool
-tfw_http_req_is_nonidempotent(TfwHttpReq *req)
+tfw_http_req_is_nip(TfwHttpReq *req)
 {
 	return (req->flags & TFW_HTTP_NON_IDEMP);
 }
 
 /*
- * Set the request @req in server connection @srv_conn as idempotent.
- * Called only when a request turns idempotent from a non-idempotent.
+ * Remove @req from the list of non-idempotent requests in @srv_conn.
+ * If it is the last requests on the list, then clear the flag that
+ * @srv_conn has non-idempotent requests.
+ *
+ * @req must be confirmed to be on the list.
  */
 static inline void
-__tfw_http_req_nonidemp_delist(TfwConnection *srv_conn, TfwHttpReq *req)
+__tfw_http_req_nip_delist(TfwConnection *srv_conn, TfwHttpReq *req)
 {
-	list_del_init(&req->nip_list);
+	BUG_ON(list_empty(&req->nip_list));
+	list_del(&req->nip_list);
 	if (list_empty(&srv_conn->nip_queue))
 		clear_bit(TFW_CONN_B_HASNIP, &srv_conn->flags);
 }
 
 /*
- * Set the request @req in server connection @srv_conn as non-idempotent.
+ * Put @req on the list of non-idempotent requests in @srv_conn. 
+ * Raise the flag that the connection has non-idempotent requests.
  */
 static inline void
-__tfw_http_req_nonidemp_enlist(TfwConnection *srv_conn, TfwHttpReq *req)
+__tfw_http_req_nip_enlist(TfwConnection *srv_conn, TfwHttpReq *req)
 {
 	BUG_ON(!list_empty(&req->nip_list));
 	list_add_tail(&req->nip_list, &srv_conn->nip_queue);
@@ -404,29 +412,33 @@ __tfw_http_req_nonidemp_enlist(TfwConnection *srv_conn, TfwHttpReq *req)
 }
 
 /*
- * If @req in server connection @srv_conn is non-idempotent, then set
- * it as idempotent.
+ * Remove @req from the list of non-idempotent requests in @srv_conn.
+ * @req is verified to be on the list. Does nothing if @req is NOT on
+ * the list.
  */
 static inline void
-tfw_http_req_nonidemp_delist(TfwConnection *srv_conn, TfwHttpReq *req)
+tfw_http_req_nip_delist(TfwConnection *srv_conn, TfwHttpReq *req)
 {
 	if (!list_empty(&req->nip_list))
-		__tfw_http_req_nonidemp_delist(srv_conn, req);
+		__tfw_http_req_nip_delist(srv_conn, req);
 }
 
 /*
- * If a request on the list of non-idempotent requests in server
- * connection @srv_conn had become idempotent, then set it as such.
+ * Remove idempotent requests from the list of non-idempotent requests
+ * in @srv_conn. A non-idempotent request may become idempotent when
+ * another request is received from a client before a response to the
+ * non-idempotent request is forwarded to the client. See the comment
+ * to tfw_http_req_add_seq_queue().
  */
 static inline void
-tfw_http_conn_nonidemp_delist(TfwConnection *srv_conn)
+tfw_http_conn_nip_delist(TfwConnection *srv_conn)
 {
 	TfwHttpReq *req, *tmp;
 
 	list_for_each_entry_safe(req, tmp, &srv_conn->nip_queue, nip_list)
-		if (!tfw_http_req_is_nonidempotent(req)) {
+		if (!tfw_http_req_is_nip(req)) {
 			BUG_ON(list_empty(&req->nip_list));
-			__tfw_http_req_nonidemp_delist(srv_conn, req);
+			__tfw_http_req_nip_delist(srv_conn, req);
 		}
 }
 
@@ -437,10 +449,10 @@ tfw_http_conn_nonidemp_delist(TfwConnection *srv_conn)
 static inline bool
 tfw_http_conn_on_hold(TfwConnection *srv_conn)
 {
-	TfwHttpReq *req = (TfwHttpReq *)srv_conn->msg_sent;
+	TfwHttpReq *req = (TfwHttpReq *)srv_conn->req_sent;
 
 	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
-	return (req && tfw_http_req_is_nonidempotent(req));
+	return (req && tfw_http_req_is_nip(req));
 }
 
 /*
@@ -458,10 +470,10 @@ tfw_http_conn_drained(TfwConnection *srv_conn)
 
 	if (list_empty(fwd_queue))
 		return true;
-	if (!srv_conn->msg_sent)
+	if (!srv_conn->req_sent)
 		return false;
 	msg = (TfwMsg *)list_last_entry(fwd_queue, TfwHttpReq, fwd_list);
-	if (srv_conn->msg_sent == msg)
+	if (srv_conn->req_sent == msg)
 		return true;
 	return false;
 }
@@ -477,6 +489,17 @@ tfw_http_conn_need_fwd(TfwConnection *srv_conn)
 		&& !tfw_http_conn_drained(srv_conn));
 }
 
+/*
+ * Remove @req from the server connection's forwarding queue.
+ */
+static inline void
+tfw_http_req_delist(TfwConnection *srv_conn, TfwHttpReq *req)
+{
+	tfw_http_req_nip_delist(srv_conn, req);
+	list_del(&req->fwd_list);
+	srv_conn->qsize--;
+}
+
 /*
  * Common actions in case of an error while forwarding requests.
  * Erroneous requests are removed from the forwarding queue and placed
@@ -486,9 +509,8 @@ static inline void
 tfw_http_req_move2equeue(TfwConnection *srv_conn, TfwHttpReq *req,
 			 struct list_head *equeue, unsigned short status)
 {
-	tfw_http_req_nonidemp_delist(srv_conn, req);
-	list_move_tail(&req->fwd_list, equeue);
-	srv_conn->qsize--;
+	tfw_http_req_delist(srv_conn, req);
+	list_add_tail(&req->fwd_list, equeue);
 	req->rstatus = status;
 }
 
@@ -506,8 +528,10 @@ tfw_http_req_zap_error(struct list_head *equeue)
 		 __func__, list_empty(err_queue) ? "" : "NOT ");
 
 	list_for_each_entry_safe(req, tmp, equeue, fwd_list) {
-		list_del_init(&req->fwd_list);
-		if (req->rstatus == 500)
+		list_del(&req->fwd_list);
+		if (req->rstatus == 404)
+			tfw_http_send_404(req);
+		else if (req->rstatus == 500)
 			tfw_http_send_500(req);
 		else if (req->rstatus == 504)
 			tfw_http_send_504(req);
@@ -517,9 +541,13 @@ tfw_http_req_zap_error(struct list_head *equeue)
 	}
 }
 
+/*
+ * If @req has timed out (has not been forwarded for too long), then
+ * move it to the error queue @equeue for sending an error response later.
+ */
 static inline bool
-__tfw_http_req_evict_timeout(TfwConnection *srv_conn, TfwServer *srv,
-			     TfwHttpReq *req, struct list_head *equeue)
+tfw_http_req_evict_timeout(TfwConnection *srv_conn, TfwServer *srv,
+			   TfwHttpReq *req, struct list_head *equeue)
 {
 	unsigned long jtimeout = jiffies - req->jtstamp;
 
@@ -533,9 +561,13 @@ __tfw_http_req_evict_timeout(TfwConnection *srv_conn, TfwServer *srv,
 	return false;
 }
 
+/*
+ * If the number of re-forwarding attempts for @req is exceeded, then
+ * move it to the error queue @equeue for sending an error response later.
+ */
 static inline bool
-__tfw_http_req_evict_retries(TfwConnection *srv_conn, TfwServer *srv,
-			     TfwHttpReq *req, struct list_head *equeue)
+tfw_http_req_evict_retries(TfwConnection *srv_conn, TfwServer *srv,
+			   TfwHttpReq *req, struct list_head *equeue)
 {
 	if (unlikely(req->retries++ >= srv->retry_max)) {
 		TFW_DBG2("%s: Eviction: req=[%p] retries=[%d]\n",
@@ -546,16 +578,14 @@ __tfw_http_req_evict_retries(TfwConnection *srv_conn, TfwServer *srv,
 	return false;
 }
 
+/*
+ * If forwarding of @req to server @srv_conn is not successful, then
+ * move it to the error queue @equeue for sending an error response later.
+ */
 static inline bool
-__tfw_http_req_fwd_send(TfwConnection *srv_conn, TfwServer *srv,
-			TfwHttpReq *req, struct list_head *equeue)
+tfw_http_req_fwd_send(TfwConnection *srv_conn, TfwServer *srv,
+		      TfwHttpReq *req, struct list_head *equeue)
 {
-	/*
-	 * If unable to send to the server connection due to an error,
-	 * then move the request to @err_queue for sending a 500 error
-	 * response later. That is safe as the response will be sent
-	 * in proper seq order.
-	 */
 	if (tfw_connection_send(srv_conn, (TfwMsg *)req)) {
 		TFW_DBG2("%s: Forwarding error: conn=[%p] req=[%p]\n",
 			 __func__, srv_conn, req);
@@ -586,24 +616,24 @@ __tfw_http_req_fwd_stalled(TfwConnection *srv_conn, struct list_head *equeue)
 	 * it as fast as possible by moving failed requests to other
 	 * queues that can be processed without the lock.
 	 */
-	req = srv_conn->msg_sent
-	    ? list_next_entry((TfwHttpReq *)srv_conn->msg_sent, fwd_list)
+	req = srv_conn->req_sent
+	    ? list_next_entry((TfwHttpReq *)srv_conn->req_sent, fwd_list)
 	    : list_first_entry(fwd_queue, TfwHttpReq, fwd_list);
 
 	list_for_each_entry_safe_from(req, tmp, fwd_queue, fwd_list) {
-		if (__tfw_http_req_evict_timeout(srv_conn, srv, req, equeue))
+		if (tfw_http_req_evict_timeout(srv_conn, srv, req, equeue))
 			continue;
-		if (!__tfw_http_req_fwd_send(srv_conn, srv, req, equeue))
+		if (!tfw_http_req_fwd_send(srv_conn, srv, req, equeue))
 			continue;
-		srv_conn->msg_sent = (TfwMsg *)req;
+		srv_conn->req_sent = (TfwMsg *)req;
 		/* Stop sending if the request is non-idempotent. */
-		if (tfw_http_req_is_nonidempotent(req)) {
+		if (tfw_http_req_is_nip(req)) {
 			TFW_DBG2("%s: Break on non-idempotent: req=[%p]\n",
 				 __func__, req);
 			break;
 		}
-		/* See if a non-idempotent request has become idempotent. */
-		tfw_http_req_nonidemp_delist(srv_conn, req);
+		/* See if the idempotent request was non-idempotent. */
+		tfw_http_req_nip_delist(srv_conn, req);
 	}
 }
 
@@ -656,8 +686,8 @@ tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
 	drained = tfw_http_conn_drained(srv_conn);
 	list_add_tail(&req->fwd_list, &srv_conn->fwd_queue);
 	srv_conn->qsize++;
-	if (tfw_http_req_is_nonidempotent(req))
-		__tfw_http_req_nonidemp_enlist(srv_conn, req);
+	if (tfw_http_req_is_nip(req))
+		__tfw_http_req_nip_enlist(srv_conn, req);
 	if (tfw_http_conn_on_hold(srv_conn)) {
 		spin_unlock(&srv_conn->fwd_qlock);
 		TFW_DBG2("%s: Server connection is on hold: conn=[%p]\n",
@@ -672,9 +702,7 @@ tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
 		return;
 	}
 	if (tfw_connection_send(srv_conn, (TfwMsg *)req)) {
-		tfw_http_req_nonidemp_delist(srv_conn, req);
-		list_del_init(&req->fwd_list);
-		srv_conn->qsize--;
+		tfw_http_req_delist(srv_conn, req);
 		spin_unlock(&srv_conn->fwd_qlock);
 		TFW_DBG2("%s: Forwarding error: conn=[%p] req=[%p]\n",
 			 __func__, srv_conn, req);
@@ -682,7 +710,7 @@ tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
 		TFW_INC_STAT_BH(clnt.msgs_otherr);
 		return;
 	}
-	srv_conn->msg_sent = (TfwMsg *)req;
+	srv_conn->req_sent = (TfwMsg *)req;
 	spin_unlock(&srv_conn->fwd_qlock);
 }
 
@@ -694,48 +722,31 @@ tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
  * are not re-sent or re-scheduled by default. Configuration option
  * can be used to have those requests re-sent or re-scheduled as well.
  *
- * Note: @srv_conn->msg_sent may change in result.
+ * Note: @srv_conn->req_sent may change in result.
  */
 static inline void
-tfw_http_req_fwd_handlenip(TfwConnection *srv_conn)
+tfw_http_req_fwd_handlenip(TfwConnection *srv_conn, struct list_head *equeue)
 {
 	TfwServer *srv = (TfwServer *)srv_conn->peer;
-	TfwHttpReq *req_sent = (TfwHttpReq *)srv_conn->msg_sent;
+	TfwHttpReq *req_sent = (TfwHttpReq *)srv_conn->req_sent;
 
-	if (req_sent && tfw_http_req_is_nonidempotent(req_sent)
+	if (req_sent && tfw_http_req_is_nip(req_sent)
 	    && likely(!(srv->flags & TFW_SRV_RETRY_NON_IDEMP)))
 	{
 		BUG_ON(list_empty(&req_sent->nip_list));
-		srv_conn->msg_sent =
-			(&req_sent->fwd_list == srv_conn->fwd_queue.next) ?
+		srv_conn->req_sent =
+			(srv_conn->fwd_queue.next == &req_sent->fwd_list) ?
 			NULL : (TfwMsg *)list_prev_entry(req_sent, fwd_list);
-		__tfw_http_req_nonidemp_delist(srv_conn, req_sent);
-		list_del_init(&req_sent->fwd_list);
-		srv_conn->qsize--;
-		tfw_http_send_404(req_sent);
-		TFW_INC_STAT_BH(clnt.msgs_otherr);
+		tfw_http_req_move2equeue(srv_conn, req_sent, equeue, 404);
 	}
 }
 
-static inline bool
-__tfw_http_req_resend_one(TfwConnection *srv_conn, TfwServer *srv,
-			      TfwHttpReq *req, struct list_head *equeue)
-{
-	if (__tfw_http_req_evict_timeout(srv_conn, srv, req, equeue))
-		return false;
-	if (__tfw_http_req_evict_retries(srv_conn, srv, req, equeue))
-		return false;
-	if (!__tfw_http_req_fwd_send(srv_conn, srv, req, equeue))
-		return false;
-	return true;
-}
-
 /*
  * Re-forward requests in a server connection. Requests that exceed
  * the set limits are evicted.
  */
 static TfwMsg *
-__tfw_http_req_resend(TfwConnection *srv_conn,
+tfw_http_req_resend(TfwConnection *srv_conn,
 		      bool first, struct list_head *equeue)
 {
 	TfwHttpReq *req, *tmp, *req_resent = NULL;
@@ -744,37 +755,41 @@ __tfw_http_req_resend(TfwConnection *srv_conn,
 
 	TFW_DBG2("%s: conn=[%p] one_msg=[%s]\n",
 		 __func__, srv_conn, one_msg ? "true" : "false");
-	BUG_ON(!srv_conn->msg_sent);
-	BUG_ON(list_empty(&((TfwHttpReq *)srv_conn->msg_sent)->fwd_list));
+	BUG_ON(!srv_conn->req_sent);
+	BUG_ON(list_empty(&((TfwHttpReq *)srv_conn->req_sent)->fwd_list));
 
 	req = list_first_entry(fwd_queue, TfwHttpReq, fwd_list);
-	end = ((TfwHttpReq *)srv_conn->msg_sent)->fwd_list.next;
+	end = ((TfwHttpReq *)srv_conn->req_sent)->fwd_list.next;
 
-	/* Similar to list_for_each_entry_safe() */
+	/* Similar to list_for_each_entry_safe_from() */
 	for (tmp = list_next_entry(req, fwd_list);
 	     &req->fwd_list != end;
 	     req = tmp, tmp = list_next_entry(tmp, fwd_list))
 	{
-		if (__tfw_http_req_resend_one(srv_conn, srv, req, equeue)) {
-			req_resent = req;
-			if (unlikely(first))
-				break;
-		}
+		if (tfw_http_req_evict_timeout(srv_conn, srv, req, equeue))
+			continue;
+		if (tfw_http_req_evict_retries(srv_conn, srv, req, equeue))
+			continue;
+		if (!tfw_http_req_fwd_send(srv_conn, srv, req, equeue))
+			continue;
+		req_resent = req;
+		if (unlikely(first))
+			break;
 	}
 
 	return (TfwMsg *)req_resent;
 }
 
 static inline TfwMsg *
-__tfw_http_req_resend_first(TfwConnection *srv_conn, struct list_head *equeue)
+tfw_http_req_resend_first(TfwConnection *srv_conn, struct list_head *equeue)
 {
-	return __tfw_http_req_resend(srv_conn, true, equeue);
+	return tfw_http_req_resend(srv_conn, true, equeue);
 }
 
 static inline TfwMsg *
-__tfw_http_req_resend_all(TfwConnection *srv_conn, struct list_head *equeue)
+tfw_http_req_resend_all(TfwConnection *srv_conn, struct list_head *equeue)
 {
-	return __tfw_http_req_resend(srv_conn, false, equeue);
+	return tfw_http_req_resend(srv_conn, false, equeue);
 }
 
 /*
@@ -798,12 +813,25 @@ tfw_http_req_fwd_repair(TfwConnection *srv_conn)
 		if (tfw_http_conn_need_fwd(srv_conn))
 			__tfw_http_req_fwd_stalled(srv_conn, &equeue);
 	} else {
-		if (srv_conn->msg_sent)
-			srv_conn->msg_sent =
-				__tfw_http_req_resend_all(srv_conn, &equeue);
-		set_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
-		if (tfw_http_conn_need_fwd(srv_conn))
-			__tfw_http_req_fwd_stalled(srv_conn, &equeue);
+		/*
+		 * After all previously forwarded requests are re-sent,
+		 * srv_conn->req_sent will be either NULL or the last
+		 * request that was re-sent successfully. If re-sending
+		 * of non-idempotent requests is allowed, then that last
+		 * request may be non-idempotent. Continue with sending
+		 * requests that were never forwarded only if the last
+		 * request that was re-sent was NOT non-idempotent.
+		 */
+		if (srv_conn->req_sent)
+			srv_conn->req_sent =
+				tfw_http_req_resend_all(srv_conn, &equeue);
+		if (!srv_conn->req_sent
+		    || !tfw_http_req_is_nip((TfwHttpReq *)srv_conn->req_sent))
+		{
+			set_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
+			if (tfw_http_conn_need_fwd(srv_conn))
+				__tfw_http_req_fwd_stalled(srv_conn, &equeue);
+		}
 	}
 	spin_unlock(&srv_conn->fwd_qlock);
 
@@ -897,36 +925,28 @@ tfw_http_conn_msg_free(TfwHttpMsg *hm)
  * with minimal effort.
  */
 static void
-tfw_http_req_resched(TfwConnection *srv_conn)
+tfw_http_req_resched(TfwConnection *srv_conn, struct list_head *equeue)
 {
 	TfwHttpReq *req, *tmp;
 	TfwConnection *sconn;
+	TfwServer *srv = (TfwServer *)srv_conn->peer;
 	struct list_head *fwd_queue = &srv_conn->fwd_queue;
 
 	TFW_DBG2("%s: conn=[%p]\n", __func__, conn);
 
-	/* Handle non-idempotent requests. */
-	tfw_http_req_fwd_handlenip(srv_conn);
+	/* Handle the non-idempotent request if any. */
+	tfw_http_req_fwd_handlenip(srv_conn, equeue);
 
 	/* Process complete queue. */
 	list_for_each_entry_safe(req, tmp, fwd_queue, fwd_list) {
-		tfw_http_req_nonidemp_delist(srv_conn, req);
-		list_del_init(&req->fwd_list);
-		srv_conn->qsize--;
+		if (tfw_http_req_evict_retries(srv_conn, srv, req, equeue))
+			continue;
 		if (!(sconn = tfw_sched_get_srv_conn((TfwMsg *)req))) {
 			TFW_WARN("Unable to find a backend server\n");
-			tfw_http_send_404(req);
-			TFW_INC_STAT_BH(clnt.msgs_otherr);
-			continue;
-		}
-		if (req->retries++ >= ((TfwServer *)sconn->peer)->retry_max) {
-			TFW_DBG2("%s: Eviction: req=[%p] retries=[%d]\n",
-				 __func__, req, req->retries);
-			tfw_http_send_504(req);
-			TFW_INC_STAT_BH(clnt.msgs_otherr);
-			tfw_connection_put(sconn);
+			tfw_http_req_move2equeue(srv_conn, req, equeue, 404);
 			continue;
 		}
+		tfw_http_req_delist(srv_conn, req);
 		tfw_http_req_fwd(sconn, req);
 		tfw_connection_put(sconn);
 	}
@@ -944,7 +964,7 @@ tfw_http_req_resched(TfwConnection *srv_conn)
 static void
 tfw_http_conn_repair(TfwConnection *srv_conn)
 {
-	TfwMsg *msg_resent = NULL;
+	TfwMsg *req_resent = NULL;
 	LIST_HEAD(equeue);
 
 	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
@@ -953,26 +973,27 @@ tfw_http_conn_repair(TfwConnection *srv_conn)
 
 	/* See if requests need to be rescheduled. */
 	if (unlikely(!tfw_connection_live(srv_conn))) {
-		tfw_http_req_resched(srv_conn);
-		return;
+		tfw_http_req_resched(srv_conn, &equeue);
+		goto zap_error;
 	}
+
 	spin_lock(&srv_conn->fwd_qlock);
-	/* Handle non-idempotent requests. */
-	tfw_http_req_fwd_handlenip(srv_conn);
+	/* Handle the non-idempotent request if any. */
+	tfw_http_req_fwd_handlenip(srv_conn, &equeue);
 	/* Re-send the first unanswered request. */
-	if (srv_conn->msg_sent) {
-		msg_resent = __tfw_http_req_resend_first(srv_conn, &equeue);
-		if (unlikely(!msg_resent))
-			srv_conn->msg_sent = NULL;
+	if (srv_conn->req_sent) {
+		req_resent = tfw_http_req_resend_first(srv_conn, &equeue);
+		if (unlikely(!req_resent))
+			srv_conn->req_sent = NULL;
 	}
 	/* If none resent, then send the remaining unsent requests. */
-	if (!msg_resent) {
+	if (!req_resent) {
 		set_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
 		if (tfw_http_conn_need_fwd(srv_conn))
 			__tfw_http_req_fwd_stalled(srv_conn, &equeue);
 	}
 	spin_unlock(&srv_conn->fwd_qlock);
-
+zap_error:
 	if (!list_empty(&equeue))
 		tfw_http_req_zap_error(&equeue);
 }
@@ -1038,7 +1059,8 @@ tfw_http_conn_release(TfwConnection *srv_conn)
  * server connections until paired responses come. If a response comes
  * after the list is destroyed, then both the request and the response
  * are dropped at the sight of an empty list. The requests from the
- * dead client connection are then removed from that server connection.
+ * dead client connection are then removed from that server connection
+ * and freed.
  *
  * Locking is necessary as the list is constantly probed from server
  * connection threads.
@@ -1061,7 +1083,7 @@ tfw_http_conn_cli_drop(TfwConnection *cli_conn)
 	spin_unlock(&cli_conn->seq_qlock);
 
 	list_for_each_entry_safe(hmreq, tmp, &zap_queue, msg.seq_list)
-		list_del_init(&hmreq->msg.seq_list);
+		list_del(&hmreq->msg.seq_list);
 }
 
 /*
@@ -1368,7 +1390,7 @@ __tfw_http_resp_fwd(TfwConnection *cli_conn, struct list_head *ret_queue)
 
 	/* Forward responses to the client. */
 	list_for_each_entry_safe(req, tmp, ret_queue, msg.seq_list) {
-		list_del_init(&req->msg.seq_list);
+		list_del(&req->msg.seq_list);
 		resp = (TfwHttpResp *)req->resp;
 		BUG_ON(!resp);
 		/*
@@ -1440,10 +1462,13 @@ tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
 		return;
 	}
 	__list_cut_position(&ret_queue, seq_queue, req_retent);
+
 	tfw_connection_get(cli_conn);
 	spin_lock(&cli_conn->ret_qlock);
 	spin_unlock(&cli_conn->seq_qlock);
+
 	__tfw_http_resp_fwd(cli_conn, &ret_queue);
+
 	spin_unlock(&cli_conn->ret_qlock);
 	tfw_connection_put(cli_conn);
 }
@@ -1542,7 +1567,7 @@ tfw_http_req_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)
 }
 
 static void
-tfw_http_req_mark_nonidempotent(TfwHttpReq *req)
+tfw_http_req_mark_nip(TfwHttpReq *req)
 {
 	/* See RFC 7231 4.2.1 */
 	static const unsigned int __read_mostly safe_methods =
@@ -1581,10 +1606,10 @@ tfw_http_req_mark_nonidempotent(TfwHttpReq *req)
 }
 
 /*
- * Set a flag if the request is non-idempotent. Add the request to
- * the list of the client connection to preserve the correct order
- * of responses. If the request follows a non-idempotent request
- * in flight, then the preceding request becomes idempotent.
+ * Set the flag if @req is non-idempotent. Add the request to the list
+ * of the client connection to preserve the correct order of responses.
+ * If the request follows a non-idempotent request in flight, then the
+ * preceding request becomes idempotent.
  */
 static void
 tfw_http_req_add_seq_queue(TfwHttpReq *req)
@@ -1593,13 +1618,13 @@ tfw_http_req_add_seq_queue(TfwHttpReq *req)
 	TfwConnection *cli_conn = req->conn;
 	struct list_head *seq_queue = &cli_conn->seq_queue;
 
-	tfw_http_req_mark_nonidempotent(req);
+	tfw_http_req_mark_nip(req);
 
 	spin_lock(&cli_conn->seq_qlock);
 	preq = !list_empty(seq_queue)
 	     ? list_last_entry(seq_queue, TfwHttpReq, msg.seq_list)
 	     : NULL;
-	if (preq && (preq->flags & TFW_HTTP_NON_IDEMP))
+	if (preq && tfw_http_req_is_nip(preq))
 		preq->flags &= ~TFW_HTTP_NON_IDEMP;
 	list_add_tail(&req->msg.seq_list, seq_queue);
 	spin_unlock(&cli_conn->seq_qlock);
@@ -1904,12 +1929,10 @@ tfw_http_popreq(TfwHttpMsg *hmresp)
 		return NULL;
 	}
 	req = list_first_entry(fwd_queue, TfwHttpReq, fwd_list);
-	list_del_init(&req->fwd_list);
-	srv_conn->qsize--;
-	if ((TfwMsg *)req == srv_conn->msg_sent)
-		srv_conn->msg_sent = NULL;
-	tfw_http_req_nonidemp_delist(srv_conn, req);
-	tfw_http_conn_nonidemp_delist(srv_conn);
+	if ((TfwMsg *)req == srv_conn->req_sent)
+		srv_conn->req_sent = NULL;
+	tfw_http_req_delist(srv_conn, req);
+	tfw_http_conn_nip_delist(srv_conn);
 	/*
 	 * Perform special processing if the connection is in repair
 	 * mode. Otherwise, forward pending requests to the server.

From 28286448347160276bfb1c6b2e49b2b9a337ba99 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Thu, 12 Jan 2017 15:47:57 +0300
Subject: [PATCH 33/65] Replace spaces with tabs in vhost.c

---
 tempesta_fw/vhost.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tempesta_fw/vhost.c b/tempesta_fw/vhost.c
index bfa4cc65a..0fcd166ce 100644
--- a/tempesta_fw/vhost.c
+++ b/tempesta_fw/vhost.c
@@ -962,28 +962,28 @@ tfw_vhost_cfg_stop(void)
 }
 
 static TfwCfgSpec tfw_location_specs[] = {
-        {
+	{
 		"cache_bypass", NULL,
 		tfw_handle_in_cache_bypass,
 		.allow_none = true,
 		.allow_repeat = true,
 		.cleanup = tfw_cleanup_locache
-        },
-        {
+	},
+	{
 		"cache_fulfill", NULL,
 		tfw_handle_in_cache_fulfill,
 		.allow_none = true,
 		.allow_repeat = true,
 		.cleanup = tfw_cleanup_locache
-        },
-        {
+	},
+	{
 		"nonidempotent", NULL,
 		tfw_handle_in_nonidempotent,
 		.allow_none = true,
 		.allow_repeat = true,
 		.cleanup = tfw_cleanup_locache
-        },
-        { 0 }
+	},
+	{ 0 }
 };
 
 static TfwCfgSpec tfw_vhost_cfg_specs[] = {
@@ -1015,13 +1015,13 @@ static TfwCfgSpec tfw_vhost_cfg_specs[] = {
 		.allow_repeat = true,
 		.cleanup = tfw_cleanup_locache
 	},
-        {
+	{
 		"cache_fulfill", NULL,
 		tfw_handle_out_cache_fulfill,
 		.allow_none = true,
 		.allow_repeat = true,
 		.cleanup = tfw_cleanup_locache
-        },
+	},
 	{
 		"nonidempotent", NULL,
 		tfw_handle_out_nonidempotent,

From 84542c1428e34895b3f72db94872159a9d263dbb Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Fri, 13 Jan 2017 15:05:25 +0300
Subject: [PATCH 34/65] Return 502 or 504 error message instead of 404.

---
 tempesta_fw/http.c | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 59b2db7de..62f2e4fa2 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -463,7 +463,7 @@ tfw_http_conn_on_hold(TfwConnection *srv_conn)
 static inline bool
 tfw_http_conn_drained(TfwConnection *srv_conn)
 {
-	TfwMsg *msg;
+	TfwHttpReq *req_last;
 	struct list_head *fwd_queue = &srv_conn->fwd_queue;
 
 	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
@@ -472,8 +472,8 @@ tfw_http_conn_drained(TfwConnection *srv_conn)
 		return true;
 	if (!srv_conn->req_sent)
 		return false;
-	msg = (TfwMsg *)list_last_entry(fwd_queue, TfwHttpReq, fwd_list);
-	if (srv_conn->req_sent == msg)
+	req_last = list_last_entry(fwd_queue, TfwHttpReq, fwd_list);
+	if (srv_conn->req_sent == (TfwMsg *)req_last)
 		return true;
 	return false;
 }
@@ -533,6 +533,8 @@ tfw_http_req_zap_error(struct list_head *equeue)
 			tfw_http_send_404(req);
 		else if (req->rstatus == 500)
 			tfw_http_send_500(req);
+		else if (req->rstatus == 502)
+			tfw_http_send_502(req);
 		else if (req->rstatus == 504)
 			tfw_http_send_504(req);
 		else
@@ -715,12 +717,15 @@ tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
 }
 
 /*
- * Handle non-idempotent requests in case of a connection repair
- * (re-send or re-schedule).
+ * Handle a possible non-idempotent request in case of a connection
+ * repair (re-send or re-schedule).
  *
- * Non-idempotent requests that were forwarded but not responded to
- * are not re-sent or re-scheduled by default. Configuration option
- * can be used to have those requests re-sent or re-scheduled as well.
+ * A non-idempotent request that was forwarded but not responded to
+ * is not re-sent or re-scheduled by default. Configuration option
+ * can be used to have that request re-sent or re-scheduled as well.
+ *
+ * As forwarding is paused after a non-idempotent request is sent,
+ * there can be only one such request, and that's @srv_conn->req_sent.
  *
  * Note: @srv_conn->req_sent may change in result.
  */
@@ -737,7 +742,7 @@ tfw_http_req_fwd_handlenip(TfwConnection *srv_conn, struct list_head *equeue)
 		srv_conn->req_sent =
 			(srv_conn->fwd_queue.next == &req_sent->fwd_list) ?
 			NULL : (TfwMsg *)list_prev_entry(req_sent, fwd_list);
-		tfw_http_req_move2equeue(srv_conn, req_sent, equeue, 404);
+		tfw_http_req_move2equeue(srv_conn, req_sent, equeue, 504);
 	}
 }
 
@@ -943,7 +948,7 @@ tfw_http_req_resched(TfwConnection *srv_conn, struct list_head *equeue)
 			continue;
 		if (!(sconn = tfw_sched_get_srv_conn((TfwMsg *)req))) {
 			TFW_WARN("Unable to find a backend server\n");
-			tfw_http_req_move2equeue(srv_conn, req, equeue, 404);
+			tfw_http_req_move2equeue(srv_conn, req, equeue, 502);
 			continue;
 		}
 		tfw_http_req_delist(srv_conn, req);
@@ -1494,7 +1499,7 @@ tfw_http_req_cache_service(TfwHttpReq *req, TfwHttpResp *resp)
 /**
  * Depending on results of processing of a request, either send the request
  * to an appropriate server, or return the cached response. If none of that
- * can be done for any reason, return HTTP 404 or 500 error to the client.
+ * can be done for any reason, return HTTP 500 or 502 error to the client.
  */
 static void
 tfw_http_req_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)

From ed9b2f44e6aad531905c2733e4c4bd5129ea5a5c Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Mon, 16 Jan 2017 16:15:02 +0300
Subject: [PATCH 35/65] Don't call connection repair if the connection is not
 restricted.

---
 tempesta_fw/sock_srv.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tempesta_fw/sock_srv.c b/tempesta_fw/sock_srv.c
index e52ac49a4..e031f5252 100644
--- a/tempesta_fw/sock_srv.c
+++ b/tempesta_fw/sock_srv.c
@@ -228,9 +228,10 @@ tfw_sock_srv_connect_try_later(TfwSrvConnection *srv_conn)
 		char s_addr[TFW_ADDR_STR_BUF_SIZE] = { 0 };
 		tfw_addr_ntop(srv_addr, s_addr, sizeof(s_addr));
 		TFW_WARN("The limit of [%d] on reconnect attempts exceeded. "
-			 "The server connection [%s] is down permanently.\n",
+			 "The server connection [%s] is down.\n",
 			 srv_conn->max_attempts, s_addr);
-		tfw_connection_repair(&srv_conn->conn);
+		if (unlikely(tfw_connection_restricted(&srv_conn->conn)))
+			tfw_connection_repair(&srv_conn->conn);
 	}
 	if (srv_conn->attempts < ARRAY_SIZE(timeouts)) {
 		srv_conn->timeout = timeouts[srv_conn->attempts];

From fac5ceaa3d21a9e2257e2394dab6fbb64cce7528 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Tue, 17 Jan 2017 14:37:37 +0300
Subject: [PATCH 36/65] Separate APM timestamps and eviction (age) timestamp.

---
 tempesta_fw/connection.h |   7 +--
 tempesta_fw/http.c       | 116 +++++++++++++++++++--------------------
 tempesta_fw/http.h       |  10 +++-
 3 files changed, 68 insertions(+), 65 deletions(-)

diff --git a/tempesta_fw/connection.h b/tempesta_fw/connection.h
index d6375c824..be82aad7e 100644
--- a/tempesta_fw/connection.h
+++ b/tempesta_fw/connection.h
@@ -81,14 +81,13 @@ enum {
  * @seq_queue	- queue of client's messages in the order they came;
  * @fwd_qlock	- lock for accessing @fwd_queue and @nip_queue;
  * @seq_qlock	- lock for accessing @seq_queue;
- * @ret_qlock	- lock for accessing @ret_queue;
+ * @ret_qlock	- lock for serializing sets of responses;
  * @flags	- atomic flags related to server connection's state;
  * @refcnt	- number of users of the connection structure instance;
  * @qsize	- current number of requests in server's @msg_queue;
  * @timer	- The keep-alive/retry timer for the connection;
  * @msg		- message that is currently being processed;
- * @req_sent	- request that was sent last in a server connection;
- * @msg_resent	- message that was re-sent last in a server connection;
+ * @msg_sent	- request that was sent last in a server connection;
  * @peer	- TfwClient or TfwServer handler;
  * @sk		- an appropriate sock handler;
  * @destructor	- called when a connection is destroyed;
@@ -113,7 +112,7 @@ typedef struct tfw_connection_t {
 	int			qsize;					/*srv*/
 	struct timer_list	timer;
 	TfwMsg			*msg;
-	TfwMsg			*req_sent;				/*srv*/
+	TfwMsg			*msg_sent;				/*srv*/
 	TfwPeer 		*peer;
 	struct sock		*sk;
 	void			(*destructor)(void *);
diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 62f2e4fa2..b3c9c9e24 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -394,7 +394,7 @@ static inline void
 __tfw_http_req_nip_delist(TfwConnection *srv_conn, TfwHttpReq *req)
 {
 	BUG_ON(list_empty(&req->nip_list));
-	list_del(&req->nip_list);
+	list_del_init(&req->nip_list);
 	if (list_empty(&srv_conn->nip_queue))
 		clear_bit(TFW_CONN_B_HASNIP, &srv_conn->flags);
 }
@@ -449,10 +449,10 @@ tfw_http_conn_nip_delist(TfwConnection *srv_conn)
 static inline bool
 tfw_http_conn_on_hold(TfwConnection *srv_conn)
 {
-	TfwHttpReq *req = (TfwHttpReq *)srv_conn->req_sent;
+	TfwHttpReq *req_sent = (TfwHttpReq *)srv_conn->msg_sent;
 
 	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
-	return (req && tfw_http_req_is_nip(req));
+	return (req_sent && tfw_http_req_is_nip(req_sent));
 }
 
 /*
@@ -463,17 +463,16 @@ tfw_http_conn_on_hold(TfwConnection *srv_conn)
 static inline bool
 tfw_http_conn_drained(TfwConnection *srv_conn)
 {
-	TfwHttpReq *req_last;
 	struct list_head *fwd_queue = &srv_conn->fwd_queue;
+	TfwHttpReq *req_sent = (TfwHttpReq *)srv_conn->msg_sent;
 
 	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
 
 	if (list_empty(fwd_queue))
 		return true;
-	if (!srv_conn->req_sent)
+	if (!req_sent)
 		return false;
-	req_last = list_last_entry(fwd_queue, TfwHttpReq, fwd_list);
-	if (srv_conn->req_sent == (TfwMsg *)req_last)
+	if (list_is_last(&req_sent->fwd_list, fwd_queue))
 		return true;
 	return false;
 }
@@ -496,7 +495,7 @@ static inline void
 tfw_http_req_delist(TfwConnection *srv_conn, TfwHttpReq *req)
 {
 	tfw_http_req_nip_delist(srv_conn, req);
-	list_del(&req->fwd_list);
+	list_del_init(&req->fwd_list);
 	srv_conn->qsize--;
 }
 
@@ -528,7 +527,7 @@ tfw_http_req_zap_error(struct list_head *equeue)
 		 __func__, list_empty(err_queue) ? "" : "NOT ");
 
 	list_for_each_entry_safe(req, tmp, equeue, fwd_list) {
-		list_del(&req->fwd_list);
+		list_del_init(&req->fwd_list);
 		if (req->rstatus == 404)
 			tfw_http_send_404(req);
 		else if (req->rstatus == 500)
@@ -551,7 +550,7 @@ static inline bool
 tfw_http_req_evict_timeout(TfwConnection *srv_conn, TfwServer *srv,
 			   TfwHttpReq *req, struct list_head *equeue)
 {
-	unsigned long jtimeout = jiffies - req->jtstamp;
+	unsigned long jtimeout = jiffies - req->jrxtstamp;
 
 	if (unlikely(time_after(jtimeout, srv->qjtimeout))) {
 		TFW_DBG2("%s: Eviction: req=[%p] overdue=[%dms]\n",
@@ -588,6 +587,8 @@ static inline bool
 tfw_http_req_fwd_send(TfwConnection *srv_conn, TfwServer *srv,
 		      TfwHttpReq *req, struct list_head *equeue)
 {
+	req->jtxtstamp = jiffies;
+
 	if (tfw_connection_send(srv_conn, (TfwMsg *)req)) {
 		TFW_DBG2("%s: Forwarding error: conn=[%p] req=[%p]\n",
 			 __func__, srv_conn, req);
@@ -618,8 +619,8 @@ __tfw_http_req_fwd_stalled(TfwConnection *srv_conn, struct list_head *equeue)
 	 * it as fast as possible by moving failed requests to other
 	 * queues that can be processed without the lock.
 	 */
-	req = srv_conn->req_sent
-	    ? list_next_entry((TfwHttpReq *)srv_conn->req_sent, fwd_list)
+	req = srv_conn->msg_sent
+	    ? list_next_entry((TfwHttpReq *)srv_conn->msg_sent, fwd_list)
 	    : list_first_entry(fwd_queue, TfwHttpReq, fwd_list);
 
 	list_for_each_entry_safe_from(req, tmp, fwd_queue, fwd_list) {
@@ -627,13 +628,10 @@ __tfw_http_req_fwd_stalled(TfwConnection *srv_conn, struct list_head *equeue)
 			continue;
 		if (!tfw_http_req_fwd_send(srv_conn, srv, req, equeue))
 			continue;
-		srv_conn->req_sent = (TfwMsg *)req;
+		srv_conn->msg_sent = (TfwMsg *)req;
 		/* Stop sending if the request is non-idempotent. */
-		if (tfw_http_req_is_nip(req)) {
-			TFW_DBG2("%s: Break on non-idempotent: req=[%p]\n",
-				 __func__, req);
+		if (tfw_http_req_is_nip(req))
 			break;
-		}
 		/* See if the idempotent request was non-idempotent. */
 		tfw_http_req_nip_delist(srv_conn, req);
 	}
@@ -671,10 +669,15 @@ tfw_http_req_fwd_stalled(TfwConnection *srv_conn)
  * the request to the server immediately. If the queue is not drained,
  * then forward all stalled requests to the server.
  *
+ * The forwarding queue may not be drained only if it was on hold, and
+ * a number of requests had piled up. That would be a rather infrequent
+ * case. A regular case would be forwarding a single request, and that
+ * can be done in a simple fashion.
+ *
  * Forwarding to a server is considered to be on hold after
- * a non-idempotent request is forwarded to the server. The hold
- * is removed when the holding non-idempotent request is followed
- * by another request from the same client, which enables pipelining.
+ * a non-idempotent request is forwarded. The hold is removed when
+ * the holding non-idempotent request is followed by another request
+ * from the same client. Effectively, that re-enables pipelining.
  */
 static void
 tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
@@ -692,17 +695,14 @@ tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
 		__tfw_http_req_nip_enlist(srv_conn, req);
 	if (tfw_http_conn_on_hold(srv_conn)) {
 		spin_unlock(&srv_conn->fwd_qlock);
-		TFW_DBG2("%s: Server connection is on hold: conn=[%p]\n",
-			 __func__, srv_conn);
 		return;
 	}
 	if (!drained) {
-		TFW_DBG2("%s: Server connection is not drained: conn=[%p]\n",
-			 __func__, srv_conn);
 		tfw_http_req_fwd_stalled(srv_conn);
 		/* The queue is unlocked inside the function. */
 		return;
 	}
+	req->jtxtstamp = jiffies;
 	if (tfw_connection_send(srv_conn, (TfwMsg *)req)) {
 		tfw_http_req_delist(srv_conn, req);
 		spin_unlock(&srv_conn->fwd_qlock);
@@ -712,7 +712,7 @@ tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
 		TFW_INC_STAT_BH(clnt.msgs_otherr);
 		return;
 	}
-	srv_conn->req_sent = (TfwMsg *)req;
+	srv_conn->msg_sent = (TfwMsg *)req;
 	spin_unlock(&srv_conn->fwd_qlock);
 }
 
@@ -725,21 +725,22 @@ tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
  * can be used to have that request re-sent or re-scheduled as well.
  *
  * As forwarding is paused after a non-idempotent request is sent,
- * there can be only one such request, and that's @srv_conn->req_sent.
+ * there can be only one such request, and that's @srv_conn->msg_sent.
  *
- * Note: @srv_conn->req_sent may change in result.
+ * Note: @srv_conn->msg_sent may change in result.
  */
 static inline void
 tfw_http_req_fwd_handlenip(TfwConnection *srv_conn, struct list_head *equeue)
 {
 	TfwServer *srv = (TfwServer *)srv_conn->peer;
-	TfwHttpReq *req_sent = (TfwHttpReq *)srv_conn->req_sent;
+	TfwHttpReq *req_sent = (TfwHttpReq *)srv_conn->msg_sent;
 
 	if (req_sent && tfw_http_req_is_nip(req_sent)
 	    && likely(!(srv->flags & TFW_SRV_RETRY_NON_IDEMP)))
 	{
 		BUG_ON(list_empty(&req_sent->nip_list));
-		srv_conn->req_sent =
+		srv_conn->msg_sent =
+			/* list_is_first(&req_sent->fwd_list, fwd_queue); */
 			(srv_conn->fwd_queue.next == &req_sent->fwd_list) ?
 			NULL : (TfwMsg *)list_prev_entry(req_sent, fwd_list);
 		tfw_http_req_move2equeue(srv_conn, req_sent, equeue, 504);
@@ -760,11 +761,11 @@ tfw_http_req_resend(TfwConnection *srv_conn,
 
 	TFW_DBG2("%s: conn=[%p] one_msg=[%s]\n",
 		 __func__, srv_conn, one_msg ? "true" : "false");
-	BUG_ON(!srv_conn->req_sent);
-	BUG_ON(list_empty(&((TfwHttpReq *)srv_conn->req_sent)->fwd_list));
+	BUG_ON(!srv_conn->msg_sent);
+	BUG_ON(list_empty(&((TfwHttpReq *)srv_conn->msg_sent)->fwd_list));
 
 	req = list_first_entry(fwd_queue, TfwHttpReq, fwd_list);
-	end = ((TfwHttpReq *)srv_conn->req_sent)->fwd_list.next;
+	end = ((TfwHttpReq *)srv_conn->msg_sent)->fwd_list.next;
 
 	/* Similar to list_for_each_entry_safe_from() */
 	for (tmp = list_next_entry(req, fwd_list);
@@ -820,18 +821,18 @@ tfw_http_req_fwd_repair(TfwConnection *srv_conn)
 	} else {
 		/*
 		 * After all previously forwarded requests are re-sent,
-		 * srv_conn->req_sent will be either NULL or the last
+		 * srv_conn->msg_sent will be either NULL or the last
 		 * request that was re-sent successfully. If re-sending
 		 * of non-idempotent requests is allowed, then that last
 		 * request may be non-idempotent. Continue with sending
 		 * requests that were never forwarded only if the last
 		 * request that was re-sent was NOT non-idempotent.
 		 */
-		if (srv_conn->req_sent)
-			srv_conn->req_sent =
+		if (srv_conn->msg_sent)
+			srv_conn->msg_sent =
 				tfw_http_req_resend_all(srv_conn, &equeue);
-		if (!srv_conn->req_sent
-		    || !tfw_http_req_is_nip((TfwHttpReq *)srv_conn->req_sent))
+		if (!srv_conn->msg_sent
+		    || !tfw_http_req_is_nip((TfwHttpReq *)srv_conn->msg_sent))
 		{
 			set_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
 			if (tfw_http_conn_need_fwd(srv_conn))
@@ -986,10 +987,10 @@ tfw_http_conn_repair(TfwConnection *srv_conn)
 	/* Handle the non-idempotent request if any. */
 	tfw_http_req_fwd_handlenip(srv_conn, &equeue);
 	/* Re-send the first unanswered request. */
-	if (srv_conn->req_sent) {
+	if (srv_conn->msg_sent) {
 		req_resent = tfw_http_req_resend_first(srv_conn, &equeue);
 		if (unlikely(!req_resent))
-			srv_conn->req_sent = NULL;
+			srv_conn->msg_sent = NULL;
 	}
 	/* If none resent, then send the remaining unsent requests. */
 	if (!req_resent) {
@@ -1088,7 +1089,7 @@ tfw_http_conn_cli_drop(TfwConnection *cli_conn)
 	spin_unlock(&cli_conn->seq_qlock);
 
 	list_for_each_entry_safe(hmreq, tmp, &zap_queue, msg.seq_list)
-		list_del(&hmreq->msg.seq_list);
+		list_del_init(&hmreq->msg.seq_list);
 }
 
 /*
@@ -1395,7 +1396,7 @@ __tfw_http_resp_fwd(TfwConnection *cli_conn, struct list_head *ret_queue)
 
 	/* Forward responses to the client. */
 	list_for_each_entry_safe(req, tmp, ret_queue, msg.seq_list) {
-		list_del(&req->msg.seq_list);
+		list_del_init(&req->msg.seq_list);
 		resp = (TfwHttpResp *)req->resp;
 		BUG_ON(!resp);
 		/*
@@ -1737,11 +1738,11 @@ tfw_http_req_process(TfwConnection *conn, struct sk_buff *skb, unsigned int off)
 		}
 
 		/*
-		 * The time the request was received is used in cache
-		 * for age calculations, and for APM and Load Balancing.
+		 * The time the request was received is used for age
+		 * calculations in cache, and for eviction purposes.
 		 */
 		req->cache_ctl.timestamp = tfw_current_timestamp();
-		req->jtstamp = jiffies;
+		req->jrxtstamp = jiffies;
 
 		/* Assign the right Vhost for this request. */
 		if (tfw_http_req_set_context(req)) {
@@ -1889,19 +1890,18 @@ tfw_http_resp_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)
 		return;
 	}
 	/*
-	 * Responses from cache don't have @resp->conn.
+	 * Responses from cache don't have @resp->conn. Also, for those
+	 * responses @req->jtxtstamp is not set and remains zero.
 	 *
-	 * FIXME: The same check is performed in tfw_http_popreq()
-	 * which happens just a bit earlier. Is there a way to avoid
-	 * it here? The condition is considered rare, and there's no
-	 * need to check for it in the regular path. The real issue
-	 * here is that APM stats can't handle response times that
-	 * are >= USHORT_MAX. So for now don't count the requests
-	 * that are re-sent after a server connection is restored.
+	 * APM stats can't handle response times that are >= USHORT_MAX
+	 * which is about 65 secs.
 	 */
-	if (resp->conn && !tfw_connection_restricted(resp->conn))
-		tfw_apm_update(((TfwServer *)resp->conn->peer)->apm,
-			       resp->jtstamp, resp->jtstamp - req->jtstamp);
+	if (resp->conn) {
+		unsigned long rtt = resp->jrxtstamp - req->jtxtstamp;
+		if (likely(rtt < USHRT_MAX))
+			tfw_apm_update(((TfwServer *)resp->conn->peer)->apm,
+					resp->jrxtstamp, rtt);
+	}
 	tfw_http_resp_fwd(req, resp);
 	TFW_INC_STAT_BH(serv.msgs_forwarded);
 	return;
@@ -1934,8 +1934,8 @@ tfw_http_popreq(TfwHttpMsg *hmresp)
 		return NULL;
 	}
 	req = list_first_entry(fwd_queue, TfwHttpReq, fwd_list);
-	if ((TfwMsg *)req == srv_conn->req_sent)
-		srv_conn->req_sent = NULL;
+	if ((TfwMsg *)req == srv_conn->msg_sent)
+		srv_conn->msg_sent = NULL;
 	tfw_http_req_delist(srv_conn, req);
 	tfw_http_conn_nip_delist(srv_conn);
 	/*
@@ -2008,7 +2008,7 @@ tfw_http_resp_cache(TfwHttpMsg *hmresp)
 	 * for age calculations, and for APM and Load Balancing.
 	 */
 	hmresp->cache_ctl.timestamp = timestamp;
-	hmresp->jtstamp = jiffies;
+	((TfwHttpResp *)hmresp)->jrxtstamp = jiffies;
 	/*
 	 * If 'Date:' header is missing in the response, then
 	 * set the date to the time the response was received.
diff --git a/tempesta_fw/http.h b/tempesta_fw/http.h
index 952e959d1..f23866298 100644
--- a/tempesta_fw/http.h
+++ b/tempesta_fw/http.h
@@ -298,7 +298,6 @@ typedef struct {
  *			  aren't alowed. So use atomic operations if concurrent
  *			  updates are possible;
  * @content_length	- the value of Content-Length header field;
- * @jtstamp		- time the message has been received, in jiffies;
  * @keep_alive		- the value of timeout specified in Keep-Alive header;
  * @conn		- connection which the message was received on;
  * @crlf		- pointer to CRLF between headers and body;
@@ -315,7 +314,6 @@ typedef struct {
 	unsigned char	version;					\
 	unsigned int	flags;						\
 	unsigned long	content_length;					\
-	unsigned long	jtstamp;					\
 	unsigned int	keep_alive;					\
 	TfwConnection	*conn;						\
 	void (*destructor)(void *msg);					\
@@ -347,6 +345,8 @@ typedef struct {
  * @node	- NUMA node where request is serviced;
  * @frang_st	- current state of FRANG classifier;
  * @chunk_cnt	- header or body chunk count for Frang classifier;
+ * @jtxtstamp	- time the request is forwarded to a server, in jiffies;
+ * @jrxtstamp	- time the request is received from a client, in jiffies;
  * @tm_header	- time HTTP header started coming;
  * @tm_bchunk	- time previous chunk of HTTP body had come at;
  * @hash	- hash value for caching calculated for the request;
@@ -370,6 +370,8 @@ typedef struct {
 	unsigned short		node;
 	unsigned int		frang_st;
 	unsigned int		chunk_cnt;
+	unsigned long		jtxtstamp;
+	unsigned long		jrxtstamp;
 	unsigned long		tm_header;
 	unsigned long		tm_bchunk;
 	unsigned long		hash;
@@ -385,14 +387,16 @@ typedef struct {
 
 /**
  * HTTP Response.
- *
  * TfwStr members must be the first for efficient scanning.
+ *
+ * @jrxtstamp	- time the message has been received, in jiffies;
  */
 typedef struct {
 	TFW_HTTP_MSG_COMMON;
 	TfwStr			s_line;
 	unsigned short		status;
 	time_t			date;
+	unsigned long		jrxtstamp;
 } TfwHttpResp;
 
 #define TFW_HTTP_RESP_STR_START(r)	__MSG_STR_START(r)

From 9fd20bbd5bf1a62eecccabe3406dbc3640e79e55 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Wed, 18 Jan 2017 13:25:20 +0300
Subject: [PATCH 37/65] Better code unification.

---
 tempesta_fw/http.c      | 210 +++++++++++++++++++---------------------
 tempesta_fw/sock_clnt.c |  10 +-
 2 files changed, 106 insertions(+), 114 deletions(-)

diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index b3c9c9e24..50c35c661 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -514,9 +514,14 @@ tfw_http_req_move2equeue(TfwConnection *srv_conn, TfwHttpReq *req,
 }
 
 /*
+ * Forwarding of requests to a back end server is run under a lock
+ * on the server connection's forwarding queue. It's performed as
+ * fast as possible by moving failed requests to the error queue
+ * that can be processed without the lock.
+ *
  * Delete requests that were not forwarded due to an error. Send an
  * error response to a client. The response will be attached to the
- * request and sent to the client in proper seq order.
+ * request and then sent to the client in proper seq order.
  */
 static void
 tfw_http_req_zap_error(struct list_head *equeue)
@@ -599,37 +604,60 @@ tfw_http_req_fwd_send(TfwConnection *srv_conn, TfwServer *srv,
 }
 
 /*
- * Forward requests in the server connection @srv_conn. The requests
+ * Forward one request @req to server connection @srv_conn.
+ * Return false if forwarding must be stopped, or true otherwise.
+ */
+static inline bool
+__tfw_http_req_fwd_single(TfwConnection *srv_conn, TfwServer *srv,
+			  TfwHttpReq *req, struct list_head *equeue)
+{
+	if (tfw_http_req_evict_timeout(srv_conn, srv, req, equeue))
+		return false;
+	if (!tfw_http_req_fwd_send(srv_conn, srv, req, equeue))
+		return false;
+	srv_conn->msg_sent = (TfwMsg *)req;
+	TFW_INC_STAT_BH(clnt.msgs_forwarded);
+	return true;
+}
+
+/*
+ * Forward unsent requests in server connection @srv_conn. The requests
  * are forwarded until a non-idempotent request is found in the queue.
- * Must be called with a lock on the server connection's @fwd_queue.
+ * It's assumed that the forwarding queue in @srv_conn is locked.
+ * IT's also assumed that the forwarding queue is NOT drained.
  */
 static void
-__tfw_http_req_fwd_stalled(TfwConnection *srv_conn, struct list_head *equeue)
+__tfw_http_req_fwd_unsent(TfwConnection *srv_conn, struct list_head *equeue)
 {
 	TfwHttpReq *req, *tmp;
 	TfwServer *srv = (TfwServer *)srv_conn->peer;
 	struct list_head *fwd_queue = &srv_conn->fwd_queue;
 
 	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
+	WARN_ON(!spin_is_locked(&srv_conn->fwd_qlock));
+	BUG_ON(tfw_http_conn_drained(srv_conn));
 
-	/*
-	 * Process the server connection's queue of pending requests.
-	 * The queue is locked against concurrent updates: inserts of
-	 * outgoing requests, or closing of the server connection. Do
-	 * it as fast as possible by moving failed requests to other
-	 * queues that can be processed without the lock.
-	 */
 	req = srv_conn->msg_sent
 	    ? list_next_entry((TfwHttpReq *)srv_conn->msg_sent, fwd_list)
 	    : list_first_entry(fwd_queue, TfwHttpReq, fwd_list);
 
+	/* A frequent case: there's just one request in the queue. */
+	if (likely(list_is_singular(fwd_queue))) {
+		__tfw_http_req_fwd_single(srv_conn, srv, req, equeue);
+		/* See if the idempotent request was non-idempotent. */
+		tfw_http_req_nip_delist(srv_conn, req);
+		return;
+	}
+	/*
+	 * A less frequent case: the queue was on hold due to forwarding
+	 * a non-idempotent request, and a number of subsequent requests
+	 * had piled up. Process the server connection's full queue of
+	 * pending requests.
+	 */
 	list_for_each_entry_safe_from(req, tmp, fwd_queue, fwd_list) {
-		if (tfw_http_req_evict_timeout(srv_conn, srv, req, equeue))
-			continue;
-		if (!tfw_http_req_fwd_send(srv_conn, srv, req, equeue))
+		if (!__tfw_http_req_fwd_single(srv_conn, srv, req, equeue))
 			continue;
-		srv_conn->msg_sent = (TfwMsg *)req;
-		/* Stop sending if the request is non-idempotent. */
+		/* Stop forwarding if the request is non-idempotent. */
 		if (tfw_http_req_is_nip(req))
 			break;
 		/* See if the idempotent request was non-idempotent. */
@@ -638,57 +666,40 @@ __tfw_http_req_fwd_stalled(TfwConnection *srv_conn, struct list_head *equeue)
 }
 
 /*
- * Forward stalled requests in server connection @srv_conn.
- *
- * This function expects that the queue in the server connection
- * is locked. The queue in unlocked inside the function which is
- * very non-traditional. Please use with caution.
+ * Forward unsent requests in server connection @srv_conn.
+ * It's assumed that the forwarding queue in @srv_conn is locked.
  */
-static void
-tfw_http_req_fwd_stalled(TfwConnection *srv_conn)
+static inline void
+tfw_http_req_fwd_unsent(TfwConnection *srv_conn, struct list_head *equeue)
 {
-	LIST_HEAD(equeue);
-
 	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
-	WARN_ON(!spin_is_locked(&srv_conn->fwd_qlock));
-	BUG_ON(list_empty(&srv_conn->fwd_queue));
-
-	__tfw_http_req_fwd_stalled(srv_conn, &equeue);
-	spin_unlock(&srv_conn->fwd_qlock);
 
-	if (!list_empty(&equeue))
-		tfw_http_req_zap_error(&equeue);
+	if (tfw_http_conn_need_fwd(srv_conn))
+		__tfw_http_req_fwd_unsent(srv_conn, equeue);
 }
 
 /*
  * Forward the request @req to server connection @srv_conn.
  *
- * The request is added to the server connection (forwarding) queue.
- * If forwarding is on hold at this moment, then the request will be
- * forwarded later. Otherwise, if the queue is drained, then forward
- * the request to the server immediately. If the queue is not drained,
- * then forward all stalled requests to the server.
- *
- * The forwarding queue may not be drained only if it was on hold, and
- * a number of requests had piled up. That would be a rather infrequent
- * case. A regular case would be forwarding a single request, and that
- * can be done in a simple fashion.
+ * The request is added to the server connection's forwarding queue.
+ * If forwarding is on hold at the moment, then the request will be
+ * forwarded later. Otherwise, forward the request to the server now.
  *
  * Forwarding to a server is considered to be on hold after
  * a non-idempotent request is forwarded. The hold is removed when
  * the holding non-idempotent request is followed by another request
  * from the same client. Effectively, that re-enables pipelining.
+ * See RFC 7230 6.3.2.
  */
 static void
 tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
 {
-	bool drained;
+	LIST_HEAD(equeue);
 
 	TFW_DBG2("%s: srv_conn=[%p], req=[%p]\n", __func__, srv_conn, req);
 	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
 
 	spin_lock(&srv_conn->fwd_qlock);
-	drained = tfw_http_conn_drained(srv_conn);
 	list_add_tail(&req->fwd_list, &srv_conn->fwd_queue);
 	srv_conn->qsize++;
 	if (tfw_http_req_is_nip(req))
@@ -697,23 +708,11 @@ tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
 		spin_unlock(&srv_conn->fwd_qlock);
 		return;
 	}
-	if (!drained) {
-		tfw_http_req_fwd_stalled(srv_conn);
-		/* The queue is unlocked inside the function. */
-		return;
-	}
-	req->jtxtstamp = jiffies;
-	if (tfw_connection_send(srv_conn, (TfwMsg *)req)) {
-		tfw_http_req_delist(srv_conn, req);
-		spin_unlock(&srv_conn->fwd_qlock);
-		TFW_DBG2("%s: Forwarding error: conn=[%p] req=[%p]\n",
-			 __func__, srv_conn, req);
-		tfw_http_send_500(req);
-		TFW_INC_STAT_BH(clnt.msgs_otherr);
-		return;
-	}
-	srv_conn->msg_sent = (TfwMsg *)req;
+	__tfw_http_req_fwd_unsent(srv_conn, &equeue);
 	spin_unlock(&srv_conn->fwd_qlock);
+
+	if (!list_empty(&equeue))
+		tfw_http_req_zap_error(&equeue);
 }
 
 /*
@@ -725,7 +724,8 @@ tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
  * can be used to have that request re-sent or re-scheduled as well.
  *
  * As forwarding is paused after a non-idempotent request is sent,
- * there can be only one such request, and that's @srv_conn->msg_sent.
+ * there can be only one such request among those that were forwarded,
+ * and that's @srv_conn->msg_sent.
  *
  * Note: @srv_conn->msg_sent may change in result.
  */
@@ -751,7 +751,7 @@ tfw_http_req_fwd_handlenip(TfwConnection *srv_conn, struct list_head *equeue)
  * Re-forward requests in a server connection. Requests that exceed
  * the set limits are evicted.
  */
-static TfwMsg *
+static TfwHttpReq *
 tfw_http_req_resend(TfwConnection *srv_conn,
 		      bool first, struct list_head *equeue)
 {
@@ -783,16 +783,16 @@ tfw_http_req_resend(TfwConnection *srv_conn,
 			break;
 	}
 
-	return (TfwMsg *)req_resent;
+	return req_resent;
 }
 
-static inline TfwMsg *
+static inline TfwHttpReq *
 tfw_http_req_resend_first(TfwConnection *srv_conn, struct list_head *equeue)
 {
 	return tfw_http_req_resend(srv_conn, true, equeue);
 }
 
-static inline TfwMsg *
+static inline TfwHttpReq *
 tfw_http_req_resend_all(TfwConnection *srv_conn, struct list_head *equeue)
 {
 	return tfw_http_req_resend(srv_conn, false, equeue);
@@ -804,45 +804,37 @@ tfw_http_req_resend_all(TfwConnection *srv_conn, struct list_head *equeue)
  * The connection is not scheduled until all requests in it are re-sent.
  */
 static void
-tfw_http_req_fwd_repair(TfwConnection *srv_conn)
+__tfw_http_req_fwd_repair(TfwConnection *srv_conn, struct list_head *equeue)
 {
-	LIST_HEAD(equeue);
-
 	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
 	WARN_ON(!spin_is_locked(&srv_conn->fwd_qlock));
-	BUG_ON(!(srv_conn->flags & (TFW_CONN_B_QFORWD | TFW_CONN_B_RESEND)));
+	BUG_ON(!tfw_connection_restricted(srv_conn));
 
 	if (list_empty(&srv_conn->fwd_queue)) {
 		clear_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
 		clear_bit(TFW_CONN_B_RESEND, &srv_conn->flags);
 	} else if (test_bit(TFW_CONN_B_QFORWD, &srv_conn->flags)) {
-		if (tfw_http_conn_need_fwd(srv_conn))
-			__tfw_http_req_fwd_stalled(srv_conn, &equeue);
+		tfw_http_req_fwd_unsent(srv_conn, equeue);
 	} else {
 		/*
 		 * After all previously forwarded requests are re-sent,
-		 * srv_conn->msg_sent will be either NULL or the last
+		 * @srv_conn->msg_sent will be either NULL or the last
 		 * request that was re-sent successfully. If re-sending
 		 * of non-idempotent requests is allowed, then that last
 		 * request may be non-idempotent. Continue with sending
 		 * requests that were never forwarded only if the last
 		 * request that was re-sent was NOT non-idempotent.
 		 */
-		if (srv_conn->msg_sent)
-			srv_conn->msg_sent =
-				tfw_http_req_resend_all(srv_conn, &equeue);
-		if (!srv_conn->msg_sent
-		    || !tfw_http_req_is_nip((TfwHttpReq *)srv_conn->msg_sent))
-		{
+		TfwHttpReq *req_resent = (TfwHttpReq *)srv_conn->msg_sent;
+		if (req_resent) {
+			req_resent = tfw_http_req_resend_all(srv_conn, equeue);
+			srv_conn->msg_sent = (TfwMsg *)req_resent;
+		}
+		if (!(req_resent && tfw_http_req_is_nip(req_resent))) {
 			set_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
-			if (tfw_http_conn_need_fwd(srv_conn))
-				__tfw_http_req_fwd_stalled(srv_conn, &equeue);
+			tfw_http_req_fwd_unsent(srv_conn, equeue);
 		}
 	}
-	spin_unlock(&srv_conn->fwd_qlock);
-
-	if (!list_empty(&equeue))
-		tfw_http_req_zap_error(&equeue);
 }
 
 /*
@@ -892,7 +884,7 @@ tfw_http_conn_msg_alloc(TfwConnection *conn)
  * and a simpler tfw_http_msg_free() can be used for that.
  *
  * NOTE: @hm->conn might be NULL if @hm is the response that was served
- * from the cache.
+ * from cache.
  */
 static void
 tfw_http_conn_msg_free(TfwHttpMsg *hm)
@@ -922,13 +914,12 @@ tfw_http_conn_msg_free(TfwHttpMsg *hm)
  *
  * Note: re-scheduled requests are put at the tail of a new server's
  * connection queue, and NOT according to their original timestamps.
- * That's the indended behaviour. There requests are unlucky already.
- * They had been delayed by the waiting in their original server
- * connections, and then by the re-scheduling procedure itself. Now
- * they have much greater chance to be evicted when it's their turn
- * to be forwarded. The main effort is put into servicing requests
- * that are on time. Unlucky requests are just given another chance
- * with minimal effort.
+ * That's the intended behaviour. These requests are unlucky already.
+ * They were delayed by waiting in their original server connections,
+ * and then by the re-scheduling procedure itself. Now they have much
+ * greater chance to be evicted when it's their turn to be forwarded.
+ * The main effort is put into servicing requests that are on time.
+ * Unlucky requests are just given another chance with minimal effort.
  */
 static void
 tfw_http_req_resched(TfwConnection *srv_conn, struct list_head *equeue)
@@ -965,12 +956,12 @@ tfw_http_req_resched(TfwConnection *srv_conn, struct list_head *equeue)
  * 6.3.2, "a client MUST NOT pipeline immediately after connection
  * establishment". To address that, re-send the first request to the
  * server. When a response comes, that will trigger resending of the
- * rest of those unanswered requests (tfw_http_req_fwd_repair()).
+ * rest of those unanswered requests (__tfw_http_req_fwd_repair()).
  */
 static void
 tfw_http_conn_repair(TfwConnection *srv_conn)
 {
-	TfwMsg *req_resent = NULL;
+	TfwHttpReq *req_resent = NULL;
 	LIST_HEAD(equeue);
 
 	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
@@ -995,8 +986,7 @@ tfw_http_conn_repair(TfwConnection *srv_conn)
 	/* If none resent, then send the remaining unsent requests. */
 	if (!req_resent) {
 		set_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
-		if (tfw_http_conn_need_fwd(srv_conn))
-			__tfw_http_req_fwd_stalled(srv_conn, &equeue);
+		tfw_http_req_fwd_unsent(srv_conn, &equeue);
 	}
 	spin_unlock(&srv_conn->fwd_qlock);
 zap_error:
@@ -1419,6 +1409,7 @@ __tfw_http_resp_fwd(TfwConnection *cli_conn, struct list_head *ret_queue)
 				 __func__, cli_conn, resp);
 			ss_close_sync(cli_conn->sk, true);
 		}
+		TFW_INC_STAT_BH(serv.msgs_forwarded);
 loop_discard:
 		tfw_http_conn_msg_free((TfwHttpMsg *)resp);
 		tfw_http_conn_msg_free((TfwHttpMsg *)req);
@@ -1438,7 +1429,7 @@ tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
 	/*
 	 * Starting with the first request on the list, pick consecutive
 	 * requests that have a paired response. Remove those requests
-	 * from the list, and put them on the list of outgoing responses.
+	 * from the list, and put them on the list of returned responses.
 	 * Take care of concurrent calls to this function from different
 	 * CPUs, all going for the same client connection.
 	 *
@@ -1620,18 +1611,17 @@ tfw_http_req_mark_nip(TfwHttpReq *req)
 static void
 tfw_http_req_add_seq_queue(TfwHttpReq *req)
 {
-	TfwHttpReq *preq;
+	TfwHttpReq *req_prev;
 	TfwConnection *cli_conn = req->conn;
 	struct list_head *seq_queue = &cli_conn->seq_queue;
 
 	tfw_http_req_mark_nip(req);
 
 	spin_lock(&cli_conn->seq_qlock);
-	preq = !list_empty(seq_queue)
-	     ? list_last_entry(seq_queue, TfwHttpReq, msg.seq_list)
-	     : NULL;
-	if (preq && tfw_http_req_is_nip(preq))
-		preq->flags &= ~TFW_HTTP_NON_IDEMP;
+	req_prev = list_empty(seq_queue) ?
+		   NULL : list_last_entry(seq_queue, TfwHttpReq, msg.seq_list);
+	if (req_prev && tfw_http_req_is_nip(req_prev))
+		req_prev->flags &= ~TFW_HTTP_NON_IDEMP;
 	list_add_tail(&req->msg.seq_list, seq_queue);
 	spin_unlock(&cli_conn->seq_qlock);
 }
@@ -1903,7 +1893,6 @@ tfw_http_resp_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)
 					resp->jrxtstamp, rtt);
 	}
 	tfw_http_resp_fwd(req, resp);
-	TFW_INC_STAT_BH(serv.msgs_forwarded);
 	return;
 }
 
@@ -1921,6 +1910,7 @@ tfw_http_popreq(TfwHttpMsg *hmresp)
 	TfwHttpReq *req;
 	TfwConnection *srv_conn = hmresp->conn;
 	struct list_head *fwd_queue = &srv_conn->fwd_queue;
+	LIST_HEAD(equeue);
 
 	spin_lock(&srv_conn->fwd_qlock);
 	if (unlikely(list_empty(fwd_queue))) {
@@ -1941,15 +1931,15 @@ tfw_http_popreq(TfwHttpMsg *hmresp)
 	/*
 	 * Perform special processing if the connection is in repair
 	 * mode. Otherwise, forward pending requests to the server.
-	 * Note: The queue is unlocked inside tfw_http_req_fwd_repair()
-	 * or tfw_http_req_fwd_stalled().
 	 */
 	if (unlikely(tfw_connection_restricted(srv_conn)))
-		tfw_http_req_fwd_repair(srv_conn);
+		__tfw_http_req_fwd_repair(srv_conn, &equeue);
 	else if (tfw_http_conn_need_fwd(srv_conn))
-		tfw_http_req_fwd_stalled(srv_conn);
-	else
-		spin_unlock(&srv_conn->fwd_qlock);
+		__tfw_http_req_fwd_unsent(srv_conn, &equeue);
+	spin_unlock(&srv_conn->fwd_qlock);
+
+	if (!list_empty(&equeue))
+		tfw_http_req_zap_error(&equeue);
 
 	return req;
 }
diff --git a/tempesta_fw/sock_clnt.c b/tempesta_fw/sock_clnt.c
index 3df8bd5c9..f6baa1c2b 100644
--- a/tempesta_fw/sock_clnt.c
+++ b/tempesta_fw/sock_clnt.c
@@ -55,12 +55,14 @@ tfw_sock_cli_keepalive_timer_cb(unsigned long data)
 
 	TFW_DBG("Client timeout end\n");
 
-	/* Close socket asynchronously to avoid deadlock on del_timer_sync(). */
-	if (ss_close(conn->sk)) {
-		/* Try to close the connection 1 second later. */
+	/*
+	 * Close the socket (and the connection) asynchronously to avoid
+	 * a deadlock on del_timer_sync(). In case of error try to close
+	 * it one second later.
+	 */
+	if (ss_close(conn->sk))
 		mod_timer(&conn->timer,
 			  jiffies + msecs_to_jiffies(1000));
-	}
 }
 
 static TfwConnection *

From 676f205730a0a1454ba4daf4f21a70628608cd32 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Wed, 18 Jan 2017 16:22:15 +0300
Subject: [PATCH 38/65] Rename @srv->qjtimeout to srv->qjtmo_max for
 uniformity.

---
 tempesta_fw/http.c     | 4 ++--
 tempesta_fw/server.h   | 4 ++--
 tempesta_fw/sock_srv.c | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 50c35c661..11e19096e 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -557,10 +557,10 @@ tfw_http_req_evict_timeout(TfwConnection *srv_conn, TfwServer *srv,
 {
 	unsigned long jtimeout = jiffies - req->jrxtstamp;
 
-	if (unlikely(time_after(jtimeout, srv->qjtimeout))) {
+	if (unlikely(time_after(jtimeout, srv->qjtmo_max))) {
 		TFW_DBG2("%s: Eviction: req=[%p] overdue=[%dms]\n",
 			 __func__, req,
-			jiffies_to_msecs(jtimeout - srv->qjtimeout));
+			jiffies_to_msecs(jtimeout - srv->qjtmo_max));
 		tfw_http_req_move2equeue(srv_conn, req, equeue, 504);
 		return true;
 	}
diff --git a/tempesta_fw/server.h b/tempesta_fw/server.h
index 4f593fe15..cf8bf275f 100644
--- a/tempesta_fw/server.h
+++ b/tempesta_fw/server.h
@@ -39,7 +39,7 @@ typedef struct tfw_scheduler_t TfwScheduler;
  * @sg		- back-reference to the server group;
  * @apm		- opaque handle for APM stats;
  * @qsize_max	- maximum queue size of a server connection;
- * @qjtimeout	- maximum age of a request in a server connection, in jiffies;
+ * @qjtmo_max	- maximum age of a request in a server connection, in jiffies;
  * @retry_max	- maximum number of tries for forwarding a request;
  * @flags	- server related flags;
  */
@@ -50,7 +50,7 @@ typedef struct {
 	void			*apm;
 	int			stress;
 	unsigned int		qsize_max;
-	unsigned long		qjtimeout;
+	unsigned long		qjtmo_max;
 	unsigned int		retry_max;
 	unsigned int		flags;
 } TfwServer;
diff --git a/tempesta_fw/sock_srv.c b/tempesta_fw/sock_srv.c
index e031f5252..06e5c2bc4 100644
--- a/tempesta_fw/sock_srv.c
+++ b/tempesta_fw/sock_srv.c
@@ -820,7 +820,7 @@ tfw_handle_out_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
 
 	tfw_cfg_set_conn_tries(srv, tfw_cfg_out_retry_attempts);
 	srv->qsize_max = tfw_cfg_out_queue_size ? : UINT_MAX;
-	srv->qjtimeout = tfw_cfg_out_send_timeout
+	srv->qjtmo_max = tfw_cfg_out_send_timeout
 		       ? msecs_to_jiffies(tfw_cfg_out_send_timeout * 1000)
 		       : ULONG_MAX;
 	srv->retry_max = tfw_cfg_out_send_tries ? : UINT_MAX;
@@ -905,7 +905,7 @@ tfw_finish_srv_group(TfwCfgSpec *cs)
 			msecs_to_jiffies(tfw_cfg_in_send_timeout * 1000);
 		tfw_cfg_set_conn_tries(srv, tfw_cfg_in_retry_attempts);
 		srv->qsize_max = tfw_cfg_in_queue_size ? : UINT_MAX;
-		srv->qjtimeout = tfw_cfg_in_send_timeout ? jtmout : ULONG_MAX;
+		srv->qjtmo_max = tfw_cfg_in_send_timeout ? jtmout : ULONG_MAX;
 		srv->retry_max = tfw_cfg_in_send_tries ? : UINT_MAX;
 		srv->flags |= tfw_cfg_in_retry_nip ?
 			      TFW_SRV_RETRY_NON_IDEMP : 0;

From 5fe74f5d195cba48335c63d393f4c1cb9f79101a Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Wed, 18 Jan 2017 16:28:26 +0300
Subject: [PATCH 39/65] Print current queue size for each server's connection
 in procfs stats.

---
 tempesta_fw/procfs.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/tempesta_fw/procfs.c b/tempesta_fw/procfs.c
index c04563433..6f75e3e82 100644
--- a/tempesta_fw/procfs.c
+++ b/tempesta_fw/procfs.c
@@ -149,6 +149,7 @@ tfw_srvstats_seq_show(struct seq_file *seq, void *off)
 #define SPRNE(m, e)	seq_printf(seq, m": %dms\n", e)
 
 	int i;
+	TfwConnection *srv_conn;
 	TfwServer *srv = seq->private;
 	TfwPrcntl prcntl[ARRAY_SIZE(tfw_procfs_prcntl)];
 	TfwPrcntlStats pstats = { prcntl, ARRAY_SIZE(prcntl) };
@@ -163,8 +164,14 @@ tfw_srvstats_seq_show(struct seq_file *seq, void *off)
 	SPRNE("Maximum response time\t\t", pstats.max);
 	seq_printf(seq, "Percentiles\n");
 	for (i = 0; i < ARRAY_SIZE(prcntl); ++i)
-		seq_printf(seq, "%02d%%:\t%dms\n", prcntl[i].ith,
-			   prcntl[i].val);
+		seq_printf(seq, "\t%02d%%:\t%dms\n",
+				prcntl[i].ith, prcntl[i].val);
+	i = 0;
+	seq_printf(seq, "Maximum forwarding queue size\t: %d\n",
+			srv->qsize_max);
+	list_for_each_entry(srv_conn, &srv->conn_list, list)
+		seq_printf(seq, "\tConnection %03d queue size\t: %d\n",
+				++i, ACCESS_ONCE(srv_conn->qsize));
 
 	return 0;
 #undef SPRNE

From 4787c6312149a2305a442289c011ed6b6c23099b Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Thu, 19 Jan 2017 16:07:37 +0300
Subject: [PATCH 40/65] Count responses that were not forwarded to a client in
 perf stats.

---
 tempesta_fw/http.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 11e19096e..b62873845 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -1398,6 +1398,7 @@ __tfw_http_resp_fwd(TfwConnection *cli_conn, struct list_head *ret_queue)
 		if (!tfw_connection_live(cli_conn)) {
 			TFW_DBG2("%s: Client connection dead: conn=[%p]\n",
 				 __func__, cli_conn);
+			TFW_INC_STAT_BH(serv.msgs_otherr);
 			goto loop_discard;
 		}
 		/*
@@ -1408,6 +1409,7 @@ __tfw_http_resp_fwd(TfwConnection *cli_conn, struct list_head *ret_queue)
 			TFW_DBG2("%s: Forwarding error: conn=[%p] resp=[%p]\n",
 				 __func__, cli_conn, resp);
 			ss_close_sync(cli_conn->sk, true);
+			TFW_INC_STAT_BH(serv.msgs_otherr);
 		}
 		TFW_INC_STAT_BH(serv.msgs_forwarded);
 loop_discard:
@@ -1446,6 +1448,7 @@ tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
 		ss_close_sync(cli_conn->sk, true);
 		tfw_http_conn_msg_free((TfwHttpMsg *)resp);
 		tfw_http_conn_msg_free((TfwHttpMsg *)req);
+		TFW_INC_STAT_BH(serv.msgs_otherr);
 		return;
 	}
 	req->resp = (TfwHttpMsg *)resp;

From d6fdddf3da01557c880ae31d40237cd57ee0ac0d Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Sat, 21 Jan 2017 03:20:36 +0300
Subject: [PATCH 41/65] Call the repair correctly when a server connection is
 dead.

- Don't repair a dead connection more than once;
- Continue reconnect attempt for a dead connection;
- Remove TfwSrvConnection{}->timeout, replace with a local var;
- Make max_attempts value at least the size of the timeouts array;
---
 tempesta_fw/connection.h |  2 ++
 tempesta_fw/http.c       | 10 ++++--
 tempesta_fw/sock_srv.c   | 78 +++++++++++++++++++++++-----------------
 3 files changed, 55 insertions(+), 35 deletions(-)

diff --git a/tempesta_fw/connection.h b/tempesta_fw/connection.h
index be82aad7e..8edf7abb3 100644
--- a/tempesta_fw/connection.h
+++ b/tempesta_fw/connection.h
@@ -127,11 +127,13 @@ enum {
 	TFW_CONN_B_RESEND = 0,	/* Need to re-send requests. */
 	TFW_CONN_B_QFORWD,	/* Need to forward requests in the queue. */
 	TFW_CONN_B_HASNIP,	/* Has non-idempotent requests. */
+	TFW_CONN_B_ISDEAD,	/* Is dead, unable to reconnect. */
 };
 
 #define TFW_CONN_F_RESEND	(1 << TFW_CONN_B_RESEND)
 #define TFW_CONN_F_QFORWD	(1 << TFW_CONN_B_QFORWD)
 #define TFW_CONN_F_HASNIP	(1 << TFW_CONN_B_HASNIP)
+#define TFW_CONN_F_ISDEAD	(1 << TFW_CONN_B_ISDEAD)
 
 /**
  * TLS hardened connection.
diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index b62873845..ecd7cb188 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -912,6 +912,8 @@ tfw_http_conn_msg_free(TfwHttpMsg *hm)
  * Non-idempotent requests may be rescheduled depending on the option
  * in configuration.
  *
+ * No locks are needed as the server connection is dead at the moment.
+ *
  * Note: re-scheduled requests are put at the tail of a new server's
  * connection queue, and NOT according to their original timestamps.
  * That's the intended behaviour. These requests are unlucky already.
@@ -966,14 +968,17 @@ tfw_http_conn_repair(TfwConnection *srv_conn)
 
 	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
 	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
-	BUG_ON(!tfw_connection_restricted(srv_conn));
 
 	/* See if requests need to be rescheduled. */
 	if (unlikely(!tfw_connection_live(srv_conn))) {
+		if (list_empty(&srv_conn->fwd_queue))
+			return;
 		tfw_http_req_resched(srv_conn, &equeue);
 		goto zap_error;
 	}
 
+	BUG_ON(!tfw_connection_restricted(srv_conn));
+
 	spin_lock(&srv_conn->fwd_qlock);
 	/* Handle the non-idempotent request if any. */
 	tfw_http_req_fwd_handlenip(srv_conn, &equeue);
@@ -983,7 +988,7 @@ tfw_http_conn_repair(TfwConnection *srv_conn)
 		if (unlikely(!req_resent))
 			srv_conn->msg_sent = NULL;
 	}
-	/* If none resent, then send the remaining unsent requests. */
+	/* If none re-sent, then send the remaining unsent requests. */
 	if (!req_resent) {
 		set_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
 		tfw_http_req_fwd_unsent(srv_conn, &equeue);
@@ -1008,6 +1013,7 @@ tfw_http_conn_init(TfwConnection *conn)
 	if (TFW_CONN_TYPE(conn) & Conn_Srv) {
 		if (!list_empty(&conn->fwd_queue))
 			set_bit(TFW_CONN_B_RESEND, &conn->flags);
+		clear_bit(TFW_CONN_B_ISDEAD, &conn->flags);
 	}
 	tfw_gfsm_state_init(&conn->state, conn, TFW_HTTP_FSM_INIT);
 	return 0;
diff --git a/tempesta_fw/sock_srv.c b/tempesta_fw/sock_srv.c
index 06e5c2bc4..c9bbfb7cf 100644
--- a/tempesta_fw/sock_srv.c
+++ b/tempesta_fw/sock_srv.c
@@ -113,11 +113,25 @@
  */
 typedef struct {
 	TfwConnection		conn;
-	unsigned long		timeout;
 	unsigned int		attempts;
 	unsigned int		max_attempts;
 } TfwSrvConnection;
 
+/*
+ * Timeout between connect attempts is increased with each unsuccessful
+ * attempt. Length of the timeout for each attempt is chosen to follow
+ * a variant of exponential backoff delay algorithm.
+ *
+ * It's essential that the new connection is established and the failed
+ * connection is restored ASAP, so the min retry interval is set to 1.
+ * The next step is good for a cyclic reconnect, e.g. if an upstream
+ * ia configured to reset a connection periodically. The next steps are
+ * almost a pure backoff algo starting from 100ms, which is a good RTT
+ * for a fast 10Gbps link. The timeout is not increased after 1 second
+ * as it has moderate overhead, and it's still good in response time.
+ */
+static const unsigned long tfw_srv_tmo_vals[] = { 1, 10, 100, 250, 500, 1000 };
+
 /**
  * Initiate a non-blocking connect attempt.
  * Returns immediately without waiting until a connection is established.
@@ -188,30 +202,22 @@ tfw_sock_srv_connect_try(TfwSrvConnection *srv_conn)
 	return 0;
 }
 
+/*
+ * max_attempts can be the maximum value for the data type to mean
+ * the unlimited number of attempts, which is the value that should
+ * never be reached. UINT_MAX seconds is more than 136 years. It's
+ * safe to assume that it's not reached in a single run of Tempesta.
+ *
+ * The limit on the number of reconnect attempts is used to re-schedule
+ * requests that would never be forwarded otherwise. Then, the attempts
+ * to reconnect are continued in anticipation that the connection will
+ * be re-established sooner or later. Otherwise the connection would
+ * stay dead until Tempesta is restarted.
+ */
 static inline void
 tfw_sock_srv_connect_try_later(TfwSrvConnection *srv_conn)
 {
-	/*
-	 * Timeout between connect attempts is increased with each
-	 * unsuccessful attempt. Length of the timeout is decided
-	 * with a variant of exponential backoff delay algorithm.
-	 *
-	 * It's essential that the new connection is established and the
-	 * failed connection is restored ASAP, so the min retry interval
-	 * is set to 1. The next step is good for loopback reconnection,
-	 * e.g. if an upstream is configured to reset a connection
-	 * periodically. The following steps are almost pure backoff algo
-	 * starting from 100ms, which is a good RTT for a fast 10Gbps link.
-	 * The timeout is not increased after 1 second as it has moderate
-	 * overhead, and it's still good in response time.
-	 *
-	 * Note that the limit on the number of reconnect attempts is used
-	 * to re-schedule requests that would never be forwarded otherwise.
-	 * However, the attempts to reconnect are continued in hopes that
-	 * the connection will be re-established sooner or later. Otherwise
-	 * the connection will stay dead until Tempesta's restart.
-	 */
-	static const unsigned long timeouts[] = { 1, 10, 100, 250, 500, 1000 };
+	unsigned long timeout;
 
 	/* Don't rearm reconnection timer if we're about to shutdown. */
 	if (unlikely(!ss_active()))
@@ -223,23 +229,25 @@ tfw_sock_srv_connect_try_later(TfwSrvConnection *srv_conn)
 	 * never be reached. UINT_MAX seconds is more than 136 years. It's
 	 * safe to assume that it's not reached in a single run of Tempesta.
 	 */
-	if (unlikely(srv_conn->attempts >= srv_conn->max_attempts)) {
+	if (unlikely((srv_conn->attempts >= srv_conn->max_attempts)
+		     && !test_bit(TFW_CONN_B_ISDEAD, &srv_conn->conn.flags)))
+	{
 		TfwAddr *srv_addr = &srv_conn->conn.peer->addr;
 		char s_addr[TFW_ADDR_STR_BUF_SIZE] = { 0 };
 		tfw_addr_ntop(srv_addr, s_addr, sizeof(s_addr));
 		TFW_WARN("The limit of [%d] on reconnect attempts exceeded. "
 			 "The server connection [%s] is down.\n",
 			 srv_conn->max_attempts, s_addr);
-		if (unlikely(tfw_connection_restricted(&srv_conn->conn)))
-			tfw_connection_repair(&srv_conn->conn);
+		tfw_connection_repair(&srv_conn->conn);
+		set_bit(TFW_CONN_B_ISDEAD, &srv_conn->conn.flags);
 	}
-	if (srv_conn->attempts < ARRAY_SIZE(timeouts)) {
-		srv_conn->timeout = timeouts[srv_conn->attempts];
+	if (srv_conn->attempts < ARRAY_SIZE(tfw_srv_tmo_vals)) {
+		timeout = tfw_srv_tmo_vals[srv_conn->attempts];
 		TFW_DBG_ADDR("Cannot establish connection",
 			     &srv_conn->conn.peer->addr);
 	} else {
-		srv_conn->timeout = timeouts[ARRAY_SIZE(timeouts) - 1];
-		if (srv_conn->attempts == ARRAY_SIZE(timeouts)
+		timeout = tfw_srv_tmo_vals[ARRAY_SIZE(tfw_srv_tmo_vals) - 1];
+		if (srv_conn->attempts == ARRAY_SIZE(tfw_srv_tmo_vals)
 		    || !(srv_conn->attempts % 60))
 		{
 			char addr_str[TFW_ADDR_STR_BUF_SIZE] = { 0 };
@@ -252,8 +260,7 @@ tfw_sock_srv_connect_try_later(TfwSrvConnection *srv_conn)
 	}
 	srv_conn->attempts++;
 
-	mod_timer(&srv_conn->conn.timer,
-		  jiffies + msecs_to_jiffies(srv_conn->timeout));
+	mod_timer(&srv_conn->conn.timer, jiffies + msecs_to_jiffies(timeout));
 }
 
 static void
@@ -269,7 +276,6 @@ tfw_sock_srv_connect_retry_timer_cb(unsigned long data)
 static inline void
 __reset_retry_timer(TfwSrvConnection *srv_conn)
 {
-	srv_conn->timeout = 0;
 	srv_conn->attempts = 0;
 }
 
@@ -687,7 +693,13 @@ tfw_cfg_set_conn_tries(TfwServer *srv, int attempts)
 	TfwSrvConnection *srv_conn;
 
 	list_for_each_entry(srv_conn, &srv->conn_list, conn.list)
-		srv_conn->max_attempts = attempts ? : UINT_MAX;
+		if (!attempts) {
+			srv_conn->max_attempts = UINT_MAX;
+		} else if (attempts < ARRAY_SIZE(tfw_srv_tmo_vals)) {
+			srv_conn->max_attempts = ARRAY_SIZE(tfw_srv_tmo_vals);
+		} else {
+			srv_conn->max_attempts = attempts;
+		}
 
 	return 0;
 }

From 1f968146fe85b018d24119f9902f75217fe21cdd Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Sun, 22 Jan 2017 20:08:39 +0300
Subject: [PATCH 42/65] Correctly handle requests in a closing client
 connection.

Also, better structure and code in __tfw_http_resp_fwd(), and lots
of new and corrected comments.
---
 tempesta_fw/http.c | 312 +++++++++++++++++++++++++--------------------
 1 file changed, 176 insertions(+), 136 deletions(-)

diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index ecd7cb188..3c7cea347 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -385,8 +385,8 @@ tfw_http_req_is_nip(TfwHttpReq *req)
 
 /*
  * Remove @req from the list of non-idempotent requests in @srv_conn.
- * If it is the last requests on the list, then clear the flag that
- * @srv_conn has non-idempotent requests.
+ * If it is the last request on the list, then clear the flag saying
+ * that @srv_conn has non-idempotent requests.
  *
  * @req must be confirmed to be on the list.
  */
@@ -401,7 +401,7 @@ __tfw_http_req_nip_delist(TfwConnection *srv_conn, TfwHttpReq *req)
 
 /*
  * Put @req on the list of non-idempotent requests in @srv_conn. 
- * Raise the flag that the connection has non-idempotent requests.
+ * Raise the flag saying that the connection has non-idempotent requests.
  */
 static inline void
 __tfw_http_req_nip_enlist(TfwConnection *srv_conn, TfwHttpReq *req)
@@ -724,7 +724,7 @@ tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
  * can be used to have that request re-sent or re-scheduled as well.
  *
  * As forwarding is paused after a non-idempotent request is sent,
- * there can be only one such request among those that were forwarded,
+ * there can be only one such request among forwarded requests,
  * and that's @srv_conn->msg_sent.
  *
  * Note: @srv_conn->msg_sent may change in result.
@@ -786,12 +786,18 @@ tfw_http_req_resend(TfwConnection *srv_conn,
 	return req_resent;
 }
 
+/*
+ * Re-send only the first unanswered request in the forwarding queue.
+ */
 static inline TfwHttpReq *
 tfw_http_req_resend_first(TfwConnection *srv_conn, struct list_head *equeue)
 {
 	return tfw_http_req_resend(srv_conn, true, equeue);
 }
 
+/*
+ * Re-send all unanswered requests in the forwarding queue.
+ */
 static inline TfwHttpReq *
 tfw_http_req_resend_all(TfwConnection *srv_conn, struct list_head *equeue)
 {
@@ -837,75 +843,6 @@ __tfw_http_req_fwd_repair(TfwConnection *srv_conn, struct list_head *equeue)
 	}
 }
 
-/*
- * Allocate a new HTTP message structure, and link it with
- * the connection structure. Increment the number of users
- * of the connection structure. Initialize GFSM for the message.
- */
-static TfwMsg *
-tfw_http_conn_msg_alloc(TfwConnection *conn)
-{
-	TfwHttpMsg *hm = tfw_http_msg_alloc(TFW_CONN_TYPE(conn));
-	if (unlikely(!hm))
-		return NULL;
-
-	hm->conn = conn;
-	tfw_connection_get(conn);
-
-	if (TFW_CONN_TYPE(conn) & Conn_Clnt) {
-		TFW_INC_STAT_BH(clnt.rx_messages);
-	} else {
-		TfwHttpReq *req;
-
-		spin_lock(&conn->fwd_qlock);
-		req = list_first_entry_or_null(&conn->fwd_queue,
-					       TfwHttpReq, fwd_list);
-		spin_unlock(&conn->fwd_qlock);
-		if (req && (req->method == TFW_HTTP_METH_HEAD))
-			hm->flags |= TFW_HTTP_VOID_BODY;
-		TFW_INC_STAT_BH(serv.rx_messages);
-	}
-
-	return (TfwMsg *)hm;
-}
-
-/*
- * Free an HTTP message.
- * Also, free the connection structure if there's no more references.
- *
- * This function should be used anytime when there's a chance that
- * a connection structure may belong to multiple messages, which is
- * almost always. If a connection is suddenly closed then it still
- * can be safely dereferenced and used in the code.
- * In rare cases we're sure that a connection structure in a message
- * doesn't have multiple users. For instance, when an error response
- * is prepared and sent by Tempesta, that HTTP message does not need
- * a connection structure. The message is then immediately destroyed,
- * and a simpler tfw_http_msg_free() can be used for that.
- *
- * NOTE: @hm->conn might be NULL if @hm is the response that was served
- * from cache.
- */
-static void
-tfw_http_conn_msg_free(TfwHttpMsg *hm)
-{
-	if (unlikely(!hm))
-		return;
-
-	if (hm->conn) {
-		/*
-		 * Unlink connection while there is at least one reference.
-		 * Use atomic exchange to avoid races with new messages arrival
-		 * on the connection.
-		 */
-		__cmpxchg((unsigned long *)&hm->conn->msg, (unsigned long)hm,
-			  0UL, sizeof(long));
-		tfw_connection_put(hm->conn);
-	}
-
-	tfw_http_msg_free(hm);
-}
-
 /*
  * Re-schedule requests in a dead server connection's queue to a live
  * server connection. Idempotent requests are always rescheduled.
@@ -999,6 +936,91 @@ tfw_http_conn_repair(TfwConnection *srv_conn)
 		tfw_http_req_zap_error(&equeue);
 }
 
+/*
+ * Destructor for a request message.
+ */
+void
+tfw_http_req_destruct(void *msg)
+{
+	TfwHttpReq *req = msg;
+
+	BUG_ON(!list_empty(&req->msg.seq_list));
+	BUG_ON(!list_empty(&req->fwd_list));
+	BUG_ON(!list_empty(&req->nip_list));
+
+	if (req->sess)
+		tfw_http_sess_put(req->sess);
+}
+
+/*
+ * Allocate a new HTTP message structure, and link it with
+ * the connection structure. Increment the number of users
+ * of the connection structure. Initialize GFSM for the message.
+ */
+static TfwMsg *
+tfw_http_conn_msg_alloc(TfwConnection *conn)
+{
+	TfwHttpMsg *hm = tfw_http_msg_alloc(TFW_CONN_TYPE(conn));
+	if (unlikely(!hm))
+		return NULL;
+
+	hm->conn = conn;
+	tfw_connection_get(conn);
+
+	if (TFW_CONN_TYPE(conn) & Conn_Clnt) {
+		TFW_INC_STAT_BH(clnt.rx_messages);
+	} else {
+		TfwHttpReq *req;
+
+		spin_lock(&conn->fwd_qlock);
+		req = list_first_entry_or_null(&conn->fwd_queue,
+					       TfwHttpReq, fwd_list);
+		spin_unlock(&conn->fwd_qlock);
+		if (req && (req->method == TFW_HTTP_METH_HEAD))
+			hm->flags |= TFW_HTTP_VOID_BODY;
+		TFW_INC_STAT_BH(serv.rx_messages);
+	}
+
+	return (TfwMsg *)hm;
+}
+
+/*
+ * Free an HTTP message.
+ * Also, free the connection structure if there's no more references.
+ *
+ * This function should be used anytime when there's a chance that
+ * a connection structure may belong to multiple messages, which is
+ * almost always. If a connection is suddenly closed then it still
+ * can be safely dereferenced and used in the code.
+ * In rare cases we're sure that a connection structure in a message
+ * doesn't have multiple users. For instance, when an error response
+ * is prepared and sent by Tempesta, that HTTP message does not need
+ * a connection structure. The message is then immediately destroyed,
+ * and a simpler tfw_http_msg_free() can be used for that.
+ *
+ * NOTE: @hm->conn might be NULL if @hm is the response that was served
+ * from cache.
+ */
+static void
+tfw_http_conn_msg_free(TfwHttpMsg *hm)
+{
+	if (unlikely(!hm))
+		return;
+
+	if (hm->conn) {
+		/*
+		 * Unlink connection while there is at least one reference.
+		 * Use atomic exchange to avoid races with new messages
+		 * arriving on the connection.
+		 */
+		__cmpxchg((unsigned long *)&hm->conn->msg, (unsigned long)hm,
+			  0UL, sizeof(long));
+		tfw_connection_put(hm->conn);
+	}
+
+	tfw_http_msg_free(hm);
+}
+
 /*
  * Connection with a peer is created.
  *
@@ -1019,19 +1041,6 @@ tfw_http_conn_init(TfwConnection *conn)
 	return 0;
 }
 
-void
-tfw_http_req_destruct(void *msg)
-{
-	TfwHttpReq *req = msg;
-
-	BUG_ON(!list_empty(&req->msg.seq_list));
-	BUG_ON(!list_empty(&req->fwd_list));
-	BUG_ON(!list_empty(&req->nip_list));
-
-	if (req->sess)
-		tfw_http_sess_put(req->sess);
-}
-
 /*
  * Connection with a peer is released.
  *
@@ -1053,24 +1062,37 @@ tfw_http_conn_release(TfwConnection *srv_conn)
 	clear_bit(TFW_CONN_B_RESEND, &srv_conn->flags);
 }
 
+/*
+ * Dequeue the request from @seq_queue and free the request
+ * and the paired response.
+ */
+static inline void
+__tfw_http_resp_pair_free(TfwHttpReq *req)
+{
+	list_del_init(&req->msg.seq_list);
+	tfw_http_conn_msg_free(req->resp);
+	tfw_http_conn_msg_free((TfwHttpMsg *)req);
+}
+
 /*
  * Drop client connection's resources.
  *
- * Desintegrate the list, but do not free the requests. These requests
- * have not been answered yet. They are held in the lists of respective
- * server connections until paired responses come. If a response comes
- * after the list is destroyed, then both the request and the response
- * are dropped at the sight of an empty list. The requests from the
- * dead client connection are then removed from that server connection
- * and freed.
+ * Desintegrate the client connection's @seq_list. Requests that have
+ * a paired response can be freed. Move those to @zap_queue for doing
+ * it without the lock. Requests without a paired response have not
+ * been answered yet. They are held in the lists of server connections
+ * until responses come. Don't free those requests.
  *
- * Locking is necessary as the list is constantly probed from server
+ * If a response comes after @seq_list is desintegrated, then both the
+ * request and the response are dropped at the sight of an empty list.
+ *
+ * Locking is necessary as @seq_list is constantly probed from server
  * connection threads.
  */
 static void
 tfw_http_conn_cli_drop(TfwConnection *cli_conn)
 {
-	TfwHttpMsg *hmreq, *tmp;
+	TfwHttpReq *req, *tmp;
 	struct list_head *seq_queue = &cli_conn->seq_queue;
 	LIST_HEAD(zap_queue);
 
@@ -1081,11 +1103,19 @@ tfw_http_conn_cli_drop(TfwConnection *cli_conn)
 		return;
 
 	spin_lock(&cli_conn->seq_qlock);
-	list_splice_tail_init(seq_queue, &zap_queue);
+	list_for_each_entry_safe(req, tmp, seq_queue, msg.seq_list) {
+		if (req->resp)
+			list_move_tail(&req->msg.seq_list, &zap_queue);
+		else
+			list_del_init(&req->msg.seq_list);
+	}
 	spin_unlock(&cli_conn->seq_qlock);
 
-	list_for_each_entry_safe(hmreq, tmp, &zap_queue, msg.seq_list)
-		list_del_init(&hmreq->msg.seq_list);
+	list_for_each_entry_safe(req, tmp, &zap_queue, msg.seq_list) {
+		BUG_ON(!list_empty(&req->fwd_list));
+		BUG_ON(!list_empty(&req->nip_list));
+		__tfw_http_resp_pair_free(req);
+	}
 }
 
 /*
@@ -1382,48 +1412,35 @@ tfw_http_adjust_resp(TfwHttpResp *resp, TfwHttpReq *req)
 }
 
 /*
- * Forward responses to the client in the correct order.
+ * Forward responses in @ret_queue to the client in correct order.
+ *
+ * In case of error the client connection must be closed immediately.
+ * Otherwise, the correct order of responses will be broken. Unsent
+ * responses are taken care of by the caller.
  */
 static void
 __tfw_http_resp_fwd(TfwConnection *cli_conn, struct list_head *ret_queue)
 {
 	TfwHttpReq *req, *tmp;
-	TfwHttpResp *resp;
 
-	/* Forward responses to the client. */
 	list_for_each_entry_safe(req, tmp, ret_queue, msg.seq_list) {
-		list_del_init(&req->msg.seq_list);
-		resp = (TfwHttpResp *)req->resp;
-		BUG_ON(!resp);
-		/*
-		 * If the client connection is dead, then discard all
-		 * @req and @resp in the @out_queue. Remaining requests
-		 * from the client in the @seq_queue will be handled at
-		 * the time the client connection is released.
-		 */
-		if (!tfw_connection_live(cli_conn)) {
-			TFW_DBG2("%s: Client connection dead: conn=[%p]\n",
-				 __func__, cli_conn);
-			TFW_INC_STAT_BH(serv.msgs_otherr);
-			goto loop_discard;
-		}
-		/*
-		 * Close the client connection in case of an error.
-		 * Otherwise, the correct order of responses may be broken.
-		 */
-		if (tfw_cli_conn_send(cli_conn, (TfwMsg *)resp)) {
-			TFW_DBG2("%s: Forwarding error: conn=[%p] resp=[%p]\n",
-				 __func__, cli_conn, resp);
+		BUG_ON(!req->resp);
+		if (tfw_cli_conn_send(cli_conn, (TfwMsg *)req->resp)) {
 			ss_close_sync(cli_conn->sk, true);
-			TFW_INC_STAT_BH(serv.msgs_otherr);
+			return;
 		}
+		__tfw_http_resp_pair_free(req);
 		TFW_INC_STAT_BH(serv.msgs_forwarded);
-loop_discard:
-		tfw_http_conn_msg_free((TfwHttpMsg *)resp);
-		tfw_http_conn_msg_free((TfwHttpMsg *)req);
 	}
 }
 
+/*
+ * Pair response @resp with request @req in @seq_queue. Then, starting
+ * with the first request in @seq_queue, pick consecutive requests that
+ * have a paired response. Move those requests to the list of returned
+ * responses @ret_queue. Sequentially send responses from @ret_queue to
+ * the client.
+ */
 void
 tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
 {
@@ -1435,19 +1452,13 @@ tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
 	TFW_DBG2("%s: req=[%p], resp=[%p]\n", __func__, req, resp);
 
 	/*
-	 * Starting with the first request on the list, pick consecutive
-	 * requests that have a paired response. Remove those requests
-	 * from the list, and put them on the list of returned responses.
-	 * Take care of concurrent calls to this function from different
-	 * CPUs, all going for the same client connection.
-	 *
 	 * If the list is empty, then it's either a bug, or the client
 	 * connection had been closed. If it's a bug, then the correct
-	 * order of responses to requests may be broken. The client
-	 * connection needs to be closed.
+	 * order of responses to requests may be broken. The connection
+	 * with the client must to be closed immediately.
 	 */
 	spin_lock(&cli_conn->seq_qlock);
-	if (list_empty(seq_queue)) {
+	if (unlikely(list_empty(seq_queue))) {
 		spin_unlock(&cli_conn->seq_qlock);
 		TFW_DBG2("%s: The client's request missing: conn=[%p]\n",
 			 __func__, cli_conn);
@@ -1457,7 +1468,9 @@ tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
 		TFW_INC_STAT_BH(serv.msgs_otherr);
 		return;
 	}
+	BUG_ON(list_empty(&req->msg.seq_list));
 	req->resp = (TfwHttpMsg *)resp;
+	/* Move consecutive requests with @req->resp to @ret_queue. */
 	list_for_each_entry(req, seq_queue, msg.seq_list) {
 		if (req->resp == NULL)
 			break;
@@ -1469,6 +1482,21 @@ tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
 	}
 	__list_cut_position(&ret_queue, seq_queue, req_retent);
 
+	/*
+	 * The function may be called concurrently on different CPUs,
+	 * all going for the same client connection. In some threads
+	 * a response is paired with a request, but the first response
+	 * in the queue is not ready yet, so it can't be sent out. When
+	 * there're responses to send, sending must be in correct order
+	 * which is controlled by the lock. To allow other threads pair
+	 * requests with responses, unlock the seq_queue lock and use
+	 * different lock @ret_qlock for sending.
+	 *
+	 * A client may close the connection at any time. A connection
+	 * is destroyed when the last reference goes, so the argument
+	 * to spin_unlock() may get invalid. Hold the connection until
+	 * sending is done.
+	 */
 	tfw_connection_get(cli_conn);
 	spin_lock(&cli_conn->ret_qlock);
 	spin_unlock(&cli_conn->seq_qlock);
@@ -1477,6 +1505,18 @@ tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
 
 	spin_unlock(&cli_conn->ret_qlock);
 	tfw_connection_put(cli_conn);
+
+	/* Zap request/responses that were not sent due to an error. */
+	if (!list_empty(&ret_queue)) {
+		TfwHttpReq *tmp;
+		list_for_each_entry_safe(req, tmp, &ret_queue, msg.seq_list) {
+			TFW_DBG2("%s: Forwarding error: conn=[%p] resp=[%p]\n",
+				 __func__, cli_conn, req->resp);
+			BUG_ON(!req->resp);
+			__tfw_http_resp_pair_free(req);
+			TFW_INC_STAT_BH(serv.msgs_otherr);
+		}
+	}
 }
 
 /**

From a8fa850e99abacba58bc872787f981d4127f356b Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Mon, 23 Jan 2017 00:47:24 +0300
Subject: [PATCH 43/65] Rename *_max vars and structure members for
 unification.

---
 tempesta_fw/http.c                |   8 +--
 tempesta_fw/procfs.c              |   2 +-
 tempesta_fw/server.h              |  14 ++--
 tempesta_fw/sock_srv.c            | 107 +++++++++++++++---------------
 tempesta_fw/t/unit/sched_helper.c |   2 +-
 5 files changed, 67 insertions(+), 66 deletions(-)

diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 3c7cea347..e7afc1a88 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -555,12 +555,12 @@ static inline bool
 tfw_http_req_evict_timeout(TfwConnection *srv_conn, TfwServer *srv,
 			   TfwHttpReq *req, struct list_head *equeue)
 {
-	unsigned long jtimeout = jiffies - req->jrxtstamp;
+	unsigned long jqage = jiffies - req->jrxtstamp;
 
-	if (unlikely(time_after(jtimeout, srv->qjtmo_max))) {
+	if (unlikely(time_after(jqage, srv->max_jqage))) {
 		TFW_DBG2("%s: Eviction: req=[%p] overdue=[%dms]\n",
 			 __func__, req,
-			jiffies_to_msecs(jtimeout - srv->qjtmo_max));
+			 jiffies_to_msecs(jqage - srv->max_jqage));
 		tfw_http_req_move2equeue(srv_conn, req, equeue, 504);
 		return true;
 	}
@@ -575,7 +575,7 @@ static inline bool
 tfw_http_req_evict_retries(TfwConnection *srv_conn, TfwServer *srv,
 			   TfwHttpReq *req, struct list_head *equeue)
 {
-	if (unlikely(req->retries++ >= srv->retry_max)) {
+	if (unlikely(req->retries++ >= srv->max_refwd)) {
 		TFW_DBG2("%s: Eviction: req=[%p] retries=[%d]\n",
 			 __func__, req, req->retries);
 		tfw_http_req_move2equeue(srv_conn, req, equeue, 504);
diff --git a/tempesta_fw/procfs.c b/tempesta_fw/procfs.c
index 6f75e3e82..b51ec06e3 100644
--- a/tempesta_fw/procfs.c
+++ b/tempesta_fw/procfs.c
@@ -168,7 +168,7 @@ tfw_srvstats_seq_show(struct seq_file *seq, void *off)
 				prcntl[i].ith, prcntl[i].val);
 	i = 0;
 	seq_printf(seq, "Maximum forwarding queue size\t: %d\n",
-			srv->qsize_max);
+			srv->max_qsize);
 	list_for_each_entry(srv_conn, &srv->conn_list, list)
 		seq_printf(seq, "\tConnection %03d queue size\t: %d\n",
 				++i, ACCESS_ONCE(srv_conn->qsize));
diff --git a/tempesta_fw/server.h b/tempesta_fw/server.h
index cf8bf275f..6a4cf5a78 100644
--- a/tempesta_fw/server.h
+++ b/tempesta_fw/server.h
@@ -38,9 +38,9 @@ typedef struct tfw_scheduler_t TfwScheduler;
  * @list	- member pointer in the list of servers of a server group;
  * @sg		- back-reference to the server group;
  * @apm		- opaque handle for APM stats;
- * @qsize_max	- maximum queue size of a server connection;
- * @qjtmo_max	- maximum age of a request in a server connection, in jiffies;
- * @retry_max	- maximum number of tries for forwarding a request;
+ * @max_qsize	- maximum queue size of a server connection;
+ * @max_jqage	- maximum age of a request in a server connection, in jiffies;
+ * @max_refwd	- maximum number of tries for forwarding a request;
  * @flags	- server related flags;
  */
 typedef struct {
@@ -49,9 +49,9 @@ typedef struct {
 	TfwSrvGroup		*sg;
 	void			*apm;
 	int			stress;
-	unsigned int		qsize_max;
-	unsigned long		qjtmo_max;
-	unsigned int		retry_max;
+	unsigned int		max_qsize;
+	unsigned long		max_jqage;
+	unsigned int		max_refwd;
 	unsigned int		flags;
 } TfwServer;
 
@@ -127,7 +127,7 @@ static inline bool
 tfw_server_queue_full(TfwConnection *srv_conn)
 {
 	TfwServer *srv = (TfwServer *)srv_conn->peer;
-	return ACCESS_ONCE(srv_conn->qsize) >= srv->qsize_max;
+	return ACCESS_ONCE(srv_conn->qsize) >= srv->max_qsize;
 }
 
 /* Server group routines. */
diff --git a/tempesta_fw/sock_srv.c b/tempesta_fw/sock_srv.c
index c9bbfb7cf..94e1fef52 100644
--- a/tempesta_fw/sock_srv.c
+++ b/tempesta_fw/sock_srv.c
@@ -109,12 +109,13 @@
  *    soon as the last client releases the server connection.
  */
 /**
- * Note: `attempts` and `max_attempts` must be of the same type.
+ * @recons	- the number of reconnect attempts;
+ * @max_recons	- the maximum number of reconnect attempts;
  */
 typedef struct {
 	TfwConnection		conn;
-	unsigned int		attempts;
-	unsigned int		max_attempts;
+	unsigned int		recons;
+	unsigned int		max_recons;
 } TfwSrvConnection;
 
 /*
@@ -203,7 +204,7 @@ tfw_sock_srv_connect_try(TfwSrvConnection *srv_conn)
 }
 
 /*
- * max_attempts can be the maximum value for the data type to mean
+ * @max_recons can be the maximum value for the data type to mean
  * the unlimited number of attempts, which is the value that should
  * never be reached. UINT_MAX seconds is more than 136 years. It's
  * safe to assume that it's not reached in a single run of Tempesta.
@@ -229,7 +230,7 @@ tfw_sock_srv_connect_try_later(TfwSrvConnection *srv_conn)
 	 * never be reached. UINT_MAX seconds is more than 136 years. It's
 	 * safe to assume that it's not reached in a single run of Tempesta.
 	 */
-	if (unlikely((srv_conn->attempts >= srv_conn->max_attempts)
+	if (unlikely((srv_conn->recons >= srv_conn->max_recons)
 		     && !test_bit(TFW_CONN_B_ISDEAD, &srv_conn->conn.flags)))
 	{
 		TfwAddr *srv_addr = &srv_conn->conn.peer->addr;
@@ -237,28 +238,28 @@ tfw_sock_srv_connect_try_later(TfwSrvConnection *srv_conn)
 		tfw_addr_ntop(srv_addr, s_addr, sizeof(s_addr));
 		TFW_WARN("The limit of [%d] on reconnect attempts exceeded. "
 			 "The server connection [%s] is down.\n",
-			 srv_conn->max_attempts, s_addr);
+			 srv_conn->max_recons, s_addr);
 		tfw_connection_repair(&srv_conn->conn);
 		set_bit(TFW_CONN_B_ISDEAD, &srv_conn->conn.flags);
 	}
-	if (srv_conn->attempts < ARRAY_SIZE(tfw_srv_tmo_vals)) {
-		timeout = tfw_srv_tmo_vals[srv_conn->attempts];
+	if (srv_conn->recons < ARRAY_SIZE(tfw_srv_tmo_vals)) {
+		timeout = tfw_srv_tmo_vals[srv_conn->recons];
 		TFW_DBG_ADDR("Cannot establish connection",
 			     &srv_conn->conn.peer->addr);
 	} else {
 		timeout = tfw_srv_tmo_vals[ARRAY_SIZE(tfw_srv_tmo_vals) - 1];
-		if (srv_conn->attempts == ARRAY_SIZE(tfw_srv_tmo_vals)
-		    || !(srv_conn->attempts % 60))
+		if (srv_conn->recons == ARRAY_SIZE(tfw_srv_tmo_vals)
+		    || !(srv_conn->recons % 60))
 		{
 			char addr_str[TFW_ADDR_STR_BUF_SIZE] = { 0 };
 			tfw_addr_fmt_v6(&srv_conn->conn.peer->addr.v6.sin6_addr,
 					0, addr_str);
 			TFW_WARN("Cannot establish connection with %s in %u"
 				 " tries, keep trying...\n",
-				 addr_str, srv_conn->attempts);
+				 addr_str, srv_conn->recons);
 		}
 	}
-	srv_conn->attempts++;
+	srv_conn->recons++;
 
 	mod_timer(&srv_conn->conn.timer, jiffies + msecs_to_jiffies(timeout));
 }
@@ -276,7 +277,7 @@ tfw_sock_srv_connect_retry_timer_cb(unsigned long data)
 static inline void
 __reset_retry_timer(TfwSrvConnection *srv_conn)
 {
-	srv_conn->attempts = 0;
+	srv_conn->recons = 0;
 }
 
 static inline void
@@ -578,20 +579,20 @@ tfw_sock_srv_delete_all_conns(void)
  * Default values for various configuration directives and options.
  */
 #define TFW_SRV_QUEUE_SIZE_DEF		1000	/* Max queue size */
-#define TFW_SRV_SEND_TIMEOUT_DEF	60	/* Default request timeout */
-#define TFW_SRV_SEND_TRIES_DEF		5	/* Default number of tries */
+#define TFW_SRV_FWD_TIMEOUT_DEF		60	/* Default request timeout */
+#define TFW_SRV_FWD_RETRIES_DEF		5	/* Default number of tries */
 #define TFW_SRV_RETRY_NIP_DEF		0	/* Do NOT resend NIP reqs */
 #define TFW_SRV_RETRY_ATTEMPTS_DEF	10	/* Reconnect attempts. */
 
 static int tfw_cfg_in_queue_size = TFW_SRV_QUEUE_SIZE_DEF;
-static int tfw_cfg_in_send_timeout = TFW_SRV_SEND_TIMEOUT_DEF;
-static int tfw_cfg_in_send_tries = TFW_SRV_SEND_TRIES_DEF;
+static int tfw_cfg_in_fwd_timeout = TFW_SRV_FWD_TIMEOUT_DEF;
+static int tfw_cfg_in_fwd_retries = TFW_SRV_FWD_RETRIES_DEF;
 static int tfw_cfg_in_retry_nip = TFW_SRV_RETRY_NIP_DEF;
 static int tfw_cfg_in_retry_attempts = TFW_SRV_RETRY_ATTEMPTS_DEF;
 
 static int tfw_cfg_out_queue_size = TFW_SRV_QUEUE_SIZE_DEF;
-static int tfw_cfg_out_send_timeout = TFW_SRV_SEND_TIMEOUT_DEF;
-static int tfw_cfg_out_send_tries = TFW_SRV_SEND_TRIES_DEF;
+static int tfw_cfg_out_fwd_timeout = TFW_SRV_FWD_TIMEOUT_DEF;
+static int tfw_cfg_out_fwd_retries = TFW_SRV_FWD_RETRIES_DEF;
 static int tfw_cfg_out_retry_nip = TFW_SRV_RETRY_NIP_DEF;
 static int tfw_cfg_out_retry_attempts = TFW_SRV_RETRY_ATTEMPTS_DEF;
 
@@ -629,27 +630,27 @@ tfw_handle_out_queue_size(TfwCfgSpec *cs, TfwCfgEntry *ce)
 }
 
 static int
-tfw_handle_in_send_timeout(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_handle_in_fwd_timeout(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
-	return tfw_handle_opt_val(cs, ce, &tfw_cfg_in_send_timeout);
+	return tfw_handle_opt_val(cs, ce, &tfw_cfg_in_fwd_timeout);
 }
 
 static int
-tfw_handle_out_send_timeout(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_handle_out_fwd_timeout(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
-	return tfw_handle_opt_val(cs, ce, &tfw_cfg_out_send_timeout);
+	return tfw_handle_opt_val(cs, ce, &tfw_cfg_out_fwd_timeout);
 }
 
 static int
-tfw_handle_in_send_tries(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_handle_in_fwd_retries(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
-	return tfw_handle_opt_val(cs, ce, &tfw_cfg_in_send_tries);
+	return tfw_handle_opt_val(cs, ce, &tfw_cfg_in_fwd_retries);
 }
 
 static int
-tfw_handle_out_send_tries(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_handle_out_fwd_retries(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
-	return tfw_handle_opt_val(cs, ce, &tfw_cfg_out_send_tries);
+	return tfw_handle_opt_val(cs, ce, &tfw_cfg_out_fwd_retries);
 }
 
 static inline int
@@ -688,17 +689,17 @@ tfw_handle_out_conn_tries(TfwCfgSpec *cs, TfwCfgEntry *ce)
 }
 
 static int
-tfw_cfg_set_conn_tries(TfwServer *srv, int attempts)
+tfw_cfg_set_conn_tries(TfwServer *srv, int recons)
 {
 	TfwSrvConnection *srv_conn;
 
 	list_for_each_entry(srv_conn, &srv->conn_list, conn.list)
-		if (!attempts) {
-			srv_conn->max_attempts = UINT_MAX;
-		} else if (attempts < ARRAY_SIZE(tfw_srv_tmo_vals)) {
-			srv_conn->max_attempts = ARRAY_SIZE(tfw_srv_tmo_vals);
+		if (!recons) {
+			srv_conn->max_recons = UINT_MAX;
+		} else if (recons < ARRAY_SIZE(tfw_srv_tmo_vals)) {
+			srv_conn->max_recons = ARRAY_SIZE(tfw_srv_tmo_vals);
 		} else {
-			srv_conn->max_attempts = attempts;
+			srv_conn->max_recons = recons;
 		}
 
 	return 0;
@@ -831,11 +832,11 @@ tfw_handle_out_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
 		return -EINVAL;
 
 	tfw_cfg_set_conn_tries(srv, tfw_cfg_out_retry_attempts);
-	srv->qsize_max = tfw_cfg_out_queue_size ? : UINT_MAX;
-	srv->qjtmo_max = tfw_cfg_out_send_timeout
-		       ? msecs_to_jiffies(tfw_cfg_out_send_timeout * 1000)
-		       : ULONG_MAX;
-	srv->retry_max = tfw_cfg_out_send_tries ? : UINT_MAX;
+	srv->max_qsize = tfw_cfg_out_queue_size ? : UINT_MAX;
+	srv->max_jqage = tfw_cfg_out_fwd_timeout
+		      ? msecs_to_jiffies(tfw_cfg_out_fwd_timeout * 1000)
+		      : ULONG_MAX;
+	srv->max_refwd = tfw_cfg_out_fwd_retries ? : UINT_MAX;
 	srv->flags |= tfw_cfg_out_retry_nip ? TFW_SRV_RETRY_NON_IDEMP : 0;
 
 	return 0;
@@ -885,8 +886,8 @@ tfw_begin_srv_group(TfwCfgSpec *cs, TfwCfgEntry *ce)
 	tfw_cfg_in_lstsz = 0;
 	tfw_cfg_in_retry_attempts = tfw_cfg_out_retry_attempts;
 	tfw_cfg_in_queue_size = tfw_cfg_out_queue_size;
-	tfw_cfg_in_send_timeout = tfw_cfg_out_send_timeout;
-	tfw_cfg_in_send_tries = tfw_cfg_out_send_tries;
+	tfw_cfg_in_fwd_timeout = tfw_cfg_out_fwd_timeout;
+	tfw_cfg_in_fwd_retries = tfw_cfg_out_fwd_retries;
 	tfw_cfg_in_retry_nip = tfw_cfg_out_retry_nip;
 
 	return 0;
@@ -913,12 +914,12 @@ tfw_finish_srv_group(TfwCfgSpec *cs)
 
 	for (i = 0; i < tfw_cfg_in_lstsz; ++i) {
 		TfwServer *srv = tfw_cfg_in_lst[i];
-		unsigned long jtmout =
-			msecs_to_jiffies(tfw_cfg_in_send_timeout * 1000);
+		unsigned long jqage =
+			msecs_to_jiffies(tfw_cfg_in_fwd_timeout * 1000);
 		tfw_cfg_set_conn_tries(srv, tfw_cfg_in_retry_attempts);
-		srv->qsize_max = tfw_cfg_in_queue_size ? : UINT_MAX;
-		srv->qjtmo_max = tfw_cfg_in_send_timeout ? jtmout : ULONG_MAX;
-		srv->retry_max = tfw_cfg_in_send_tries ? : UINT_MAX;
+		srv->max_qsize = tfw_cfg_in_queue_size ? : UINT_MAX;
+		srv->max_jqage = tfw_cfg_in_fwd_timeout ? jqage : ULONG_MAX;
+		srv->max_refwd = tfw_cfg_in_fwd_retries ? : UINT_MAX;
 		srv->flags |= tfw_cfg_in_retry_nip ?
 			      TFW_SRV_RETRY_NON_IDEMP : 0;
 	}
@@ -968,17 +969,17 @@ static TfwCfgSpec tfw_srv_group_specs[] = {
 		.cleanup = tfw_clean_srv_groups,
 	},
 	{
-		"server_send_timeout",
+		"server_forward_timeout",
 		NULL,
-		tfw_handle_in_send_timeout,
+		tfw_handle_in_fwd_timeout,
 		.allow_none = true,
 		.allow_repeat = false,
 		.cleanup = tfw_clean_srv_groups,
 	},
 	{
-		"server_send_tries",
+		"server_forward_retries",
 		NULL,
-		tfw_handle_in_send_tries,
+		tfw_handle_in_fwd_retries,
 		.allow_none = true,
 		.allow_repeat = false,
 		.cleanup = tfw_clean_srv_groups,
@@ -1024,17 +1025,17 @@ TfwCfgMod tfw_sock_srv_cfg_mod = {
 			.cleanup = tfw_clean_srv_groups,
 		},
 		{
-			"server_send_timeout",
+			"server_forward_timeout",
 			NULL,
-			tfw_handle_out_send_timeout,
+			tfw_handle_out_fwd_timeout,
 			.allow_none = true,
 			.allow_repeat = true,
 			.cleanup = tfw_clean_srv_groups,
 		},
 		{
-			"server_send_tries",
+			"server_forward_retries",
 			NULL,
-			tfw_handle_out_send_tries,
+			tfw_handle_out_fwd_retries,
 			.allow_none = true,
 			.allow_repeat = true,
 			.cleanup = tfw_clean_srv_groups,
diff --git a/tempesta_fw/t/unit/sched_helper.c b/tempesta_fw/t/unit/sched_helper.c
index 842e9163f..06e1905e9 100644
--- a/tempesta_fw/t/unit/sched_helper.c
+++ b/tempesta_fw/t/unit/sched_helper.c
@@ -105,7 +105,7 @@ test_create_srv(const char *in_addr, TfwSrvGroup *sg)
 	srv = tfw_server_create(&addr);
 	BUG_ON(!srv);
 
-	srv->qsize_max = 100;
+	srv->max_qsize = 100;
 	tfw_sg_add(sg, srv);
 
 	return srv;

From 84e7cc0fd679b41cc454402ec3ae384302054c42 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Mon, 23 Jan 2017 01:43:39 +0300
Subject: [PATCH 44/65] Move flags and max_* members from TfwServer{} to
 TfwSrvGroup{}.

---
 tempesta_fw/http.c                |  6 +--
 tempesta_fw/procfs.c              |  2 +-
 tempesta_fw/server.h              | 40 ++++++++++---------
 tempesta_fw/sock_srv.c            | 65 +++++++++++++------------------
 tempesta_fw/t/unit/sched_helper.c |  3 +-
 5 files changed, 55 insertions(+), 61 deletions(-)

diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index e7afc1a88..22ce29bc8 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -557,7 +557,7 @@ tfw_http_req_evict_timeout(TfwConnection *srv_conn, TfwServer *srv,
 {
 	unsigned long jqage = jiffies - req->jrxtstamp;
 
-	if (unlikely(time_after(jqage, srv->max_jqage))) {
+	if (unlikely(time_after(jqage, srv->sg->max_jqage))) {
 		TFW_DBG2("%s: Eviction: req=[%p] overdue=[%dms]\n",
 			 __func__, req,
 			 jiffies_to_msecs(jqage - srv->max_jqage));
@@ -575,7 +575,7 @@ static inline bool
 tfw_http_req_evict_retries(TfwConnection *srv_conn, TfwServer *srv,
 			   TfwHttpReq *req, struct list_head *equeue)
 {
-	if (unlikely(req->retries++ >= srv->max_refwd)) {
+	if (unlikely(req->retries++ >= srv->sg->max_refwd)) {
 		TFW_DBG2("%s: Eviction: req=[%p] retries=[%d]\n",
 			 __func__, req, req->retries);
 		tfw_http_req_move2equeue(srv_conn, req, equeue, 504);
@@ -736,7 +736,7 @@ tfw_http_req_fwd_handlenip(TfwConnection *srv_conn, struct list_head *equeue)
 	TfwHttpReq *req_sent = (TfwHttpReq *)srv_conn->msg_sent;
 
 	if (req_sent && tfw_http_req_is_nip(req_sent)
-	    && likely(!(srv->flags & TFW_SRV_RETRY_NON_IDEMP)))
+	    && likely(!(srv->sg->flags & TFW_SRV_RETRY_NIP)))
 	{
 		BUG_ON(list_empty(&req_sent->nip_list));
 		srv_conn->msg_sent =
diff --git a/tempesta_fw/procfs.c b/tempesta_fw/procfs.c
index b51ec06e3..377477949 100644
--- a/tempesta_fw/procfs.c
+++ b/tempesta_fw/procfs.c
@@ -168,7 +168,7 @@ tfw_srvstats_seq_show(struct seq_file *seq, void *off)
 				prcntl[i].ith, prcntl[i].val);
 	i = 0;
 	seq_printf(seq, "Maximum forwarding queue size\t: %d\n",
-			srv->max_qsize);
+			srv->sg->max_qsize);
 	list_for_each_entry(srv_conn, &srv->conn_list, list)
 		seq_printf(seq, "\tConnection %03d queue size\t: %d\n",
 				++i, ACCESS_ONCE(srv_conn->qsize));
diff --git a/tempesta_fw/server.h b/tempesta_fw/server.h
index 6a4cf5a78..7907ed1b1 100644
--- a/tempesta_fw/server.h
+++ b/tempesta_fw/server.h
@@ -38,10 +38,6 @@ typedef struct tfw_scheduler_t TfwScheduler;
  * @list	- member pointer in the list of servers of a server group;
  * @sg		- back-reference to the server group;
  * @apm		- opaque handle for APM stats;
- * @max_qsize	- maximum queue size of a server connection;
- * @max_jqage	- maximum age of a request in a server connection, in jiffies;
- * @max_refwd	- maximum number of tries for forwarding a request;
- * @flags	- server related flags;
  */
 typedef struct {
 	TFW_PEER_COMMON;
@@ -49,15 +45,8 @@ typedef struct {
 	TfwSrvGroup		*sg;
 	void			*apm;
 	int			stress;
-	unsigned int		max_qsize;
-	unsigned long		max_jqage;
-	unsigned int		max_refwd;
-	unsigned int		flags;
 } TfwServer;
 
-/* Server related flags. */
-#define TFW_SRV_RETRY_NON_IDEMP		0x0001	/* Retry non-idemporent req. */
-
 /**
  * The servers group with the same load balancing, failovering and eviction
  * policies.
@@ -65,12 +54,17 @@ typedef struct {
  * Reverse proxy must define load balancing policy. Forward proxy must define
  * eviction policy. While both of them should define failovering policy.
  *
- * @list		- member pointer in the list of server groups;
- * @srv_list		- list of servers belonging to the group;
- * @lock		- synchronizes the group readers with updaters;
- * @sched		- requests scheduling handler;
- * @sched_data		- private scheduler data for the server group;
- * @name		- name of the group specified in the configuration;
+ * @list	- member pointer in the list of server groups;
+ * @srv_list	- list of servers belonging to the group;
+ * @lock	- synchronizes the group readers with updaters;
+ * @sched	- requests scheduling handler;
+ * @sched_data	- private scheduler data for the server group;
+ * @max_qsize	- maximum queue size of a server connection;
+ * @max_jqage	- maximum age of a request in a server connection, in jiffies;
+ * @max_refwd	- maximum number of tries for forwarding a request;
+ * @max_recons	- maximum number of reconnect attempts;
+ * @flags	- server group related flags;
+ * @name	- name of the group specified in the configuration;
  */
 struct tfw_srv_group_t {
 	struct list_head	list;
@@ -78,9 +72,17 @@ struct tfw_srv_group_t {
 	rwlock_t		lock;
 	TfwScheduler		*sched;
 	void			*sched_data;
+	unsigned int		max_qsize;
+	unsigned int		max_refwd;
+	unsigned long		max_jqage;
+	unsigned int		max_recons;
+	unsigned int		flags;
 	char			name[0];
 };
 
+/* Server related flags. */
+#define TFW_SRV_RETRY_NIP	0x0001	/* Retry non-idemporent req. */
+
 /**
  * Requests scheduling algorithm handler.
  *
@@ -126,8 +128,8 @@ void tfw_srv_conn_release(TfwConnection *conn);
 static inline bool
 tfw_server_queue_full(TfwConnection *srv_conn)
 {
-	TfwServer *srv = (TfwServer *)srv_conn->peer;
-	return ACCESS_ONCE(srv_conn->qsize) >= srv->max_qsize;
+	TfwSrvGroup *sg = ((TfwServer *)srv_conn->peer)->sg;
+	return ACCESS_ONCE(srv_conn->qsize) >= sg->max_qsize;
 }
 
 /* Server group routines. */
diff --git a/tempesta_fw/sock_srv.c b/tempesta_fw/sock_srv.c
index 94e1fef52..42f40c00c 100644
--- a/tempesta_fw/sock_srv.c
+++ b/tempesta_fw/sock_srv.c
@@ -110,12 +110,10 @@
  */
 /**
  * @recons	- the number of reconnect attempts;
- * @max_recons	- the maximum number of reconnect attempts;
  */
 typedef struct {
 	TfwConnection		conn;
 	unsigned int		recons;
-	unsigned int		max_recons;
 } TfwSrvConnection;
 
 /*
@@ -218,6 +216,7 @@ tfw_sock_srv_connect_try(TfwSrvConnection *srv_conn)
 static inline void
 tfw_sock_srv_connect_try_later(TfwSrvConnection *srv_conn)
 {
+	TfwSrvGroup *sg = ((TfwServer *)srv_conn->conn.peer)->sg;
 	unsigned long timeout;
 
 	/* Don't rearm reconnection timer if we're about to shutdown. */
@@ -230,7 +229,7 @@ tfw_sock_srv_connect_try_later(TfwSrvConnection *srv_conn)
 	 * never be reached. UINT_MAX seconds is more than 136 years. It's
 	 * safe to assume that it's not reached in a single run of Tempesta.
 	 */
-	if (unlikely((srv_conn->recons >= srv_conn->max_recons)
+	if (unlikely((srv_conn->recons >= sg->max_recons)
 		     && !test_bit(TFW_CONN_B_ISDEAD, &srv_conn->conn.flags)))
 	{
 		TfwAddr *srv_addr = &srv_conn->conn.peer->addr;
@@ -238,7 +237,7 @@ tfw_sock_srv_connect_try_later(TfwSrvConnection *srv_conn)
 		tfw_addr_ntop(srv_addr, s_addr, sizeof(s_addr));
 		TFW_WARN("The limit of [%d] on reconnect attempts exceeded. "
 			 "The server connection [%s] is down.\n",
-			 srv_conn->max_recons, s_addr);
+			 sg->max_recons, s_addr);
 		tfw_connection_repair(&srv_conn->conn);
 		set_bit(TFW_CONN_B_ISDEAD, &srv_conn->conn.flags);
 	}
@@ -689,18 +688,15 @@ tfw_handle_out_conn_tries(TfwCfgSpec *cs, TfwCfgEntry *ce)
 }
 
 static int
-tfw_cfg_set_conn_tries(TfwServer *srv, int recons)
+tfw_cfg_set_conn_tries(TfwSrvGroup *sg, int recons)
 {
-	TfwSrvConnection *srv_conn;
-
-	list_for_each_entry(srv_conn, &srv->conn_list, conn.list)
-		if (!recons) {
-			srv_conn->max_recons = UINT_MAX;
-		} else if (recons < ARRAY_SIZE(tfw_srv_tmo_vals)) {
-			srv_conn->max_recons = ARRAY_SIZE(tfw_srv_tmo_vals);
-		} else {
-			srv_conn->max_recons = recons;
-		}
+	if (!recons) {
+		sg->max_recons = UINT_MAX;
+	} else if (recons < ARRAY_SIZE(tfw_srv_tmo_vals)) {
+		sg->max_recons = ARRAY_SIZE(tfw_srv_tmo_vals);
+	} else {
+		sg->max_recons = recons;
+	}
 
 	return 0;
 }
@@ -831,13 +827,13 @@ tfw_handle_out_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
 	if (!(srv = tfw_handle_server(cs, ce)))
 		return -EINVAL;
 
-	tfw_cfg_set_conn_tries(srv, tfw_cfg_out_retry_attempts);
-	srv->max_qsize = tfw_cfg_out_queue_size ? : UINT_MAX;
-	srv->max_jqage = tfw_cfg_out_fwd_timeout
+	tfw_cfg_set_conn_tries(sg, tfw_cfg_out_retry_attempts);
+	sg->max_qsize = tfw_cfg_out_queue_size ? : UINT_MAX;
+	sg->max_jqage = tfw_cfg_out_fwd_timeout
 		      ? msecs_to_jiffies(tfw_cfg_out_fwd_timeout * 1000)
 		      : ULONG_MAX;
-	srv->max_refwd = tfw_cfg_out_fwd_retries ? : UINT_MAX;
-	srv->flags |= tfw_cfg_out_retry_nip ? TFW_SRV_RETRY_NON_IDEMP : 0;
+	sg->max_refwd = tfw_cfg_out_fwd_retries ? : UINT_MAX;
+	sg->flags |= tfw_cfg_out_retry_nip ? TFW_SRV_RETRY_NIP : 0;
 
 	return 0;
 }
@@ -906,23 +902,18 @@ tfw_begin_srv_group(TfwCfgSpec *cs, TfwCfgEntry *ce)
 static int
 tfw_finish_srv_group(TfwCfgSpec *cs)
 {
-	int i;
-
-	BUG_ON(!tfw_cfg_curr_group);
-	BUG_ON(list_empty(&tfw_cfg_curr_group->srv_list));
-	TFW_DBG("finish srv_group: %s\n", tfw_cfg_curr_group->name);
-
-	for (i = 0; i < tfw_cfg_in_lstsz; ++i) {
-		TfwServer *srv = tfw_cfg_in_lst[i];
-		unsigned long jqage =
-			msecs_to_jiffies(tfw_cfg_in_fwd_timeout * 1000);
-		tfw_cfg_set_conn_tries(srv, tfw_cfg_in_retry_attempts);
-		srv->max_qsize = tfw_cfg_in_queue_size ? : UINT_MAX;
-		srv->max_jqage = tfw_cfg_in_fwd_timeout ? jqage : ULONG_MAX;
-		srv->max_refwd = tfw_cfg_in_fwd_retries ? : UINT_MAX;
-		srv->flags |= tfw_cfg_in_retry_nip ?
-			      TFW_SRV_RETRY_NON_IDEMP : 0;
-	}
+	unsigned long jqage = msecs_to_jiffies(tfw_cfg_in_fwd_timeout * 1000);
+	TfwSrvGroup *sg = tfw_cfg_curr_group;
+
+	BUG_ON(!sg);
+	BUG_ON(list_empty(&sg->srv_list));
+	TFW_DBG("finish srv_group: %s\n", sg->name);
+
+	tfw_cfg_set_conn_tries(sg, tfw_cfg_in_retry_attempts);
+	sg->max_qsize = tfw_cfg_in_queue_size ? : UINT_MAX;
+	sg->max_jqage = tfw_cfg_in_fwd_timeout ? jqage : ULONG_MAX;
+	sg->max_refwd = tfw_cfg_in_fwd_retries ? : UINT_MAX;
+	sg->flags |= tfw_cfg_in_retry_nip ? TFW_SRV_RETRY_NIP : 0;
 	tfw_cfg_curr_group = NULL;
 
 	return 0;
diff --git a/tempesta_fw/t/unit/sched_helper.c b/tempesta_fw/t/unit/sched_helper.c
index 06e1905e9..054ad4100 100644
--- a/tempesta_fw/t/unit/sched_helper.c
+++ b/tempesta_fw/t/unit/sched_helper.c
@@ -76,6 +76,8 @@ test_create_sg(const char *name, const char *sched_name)
 		BUG_ON(r);
 	}
 
+	sg->max_qsize = 100;
+
 	kernel_fpu_begin();
 
 	return sg;
@@ -105,7 +107,6 @@ test_create_srv(const char *in_addr, TfwSrvGroup *sg)
 	srv = tfw_server_create(&addr);
 	BUG_ON(!srv);
 
-	srv->max_qsize = 100;
 	tfw_sg_add(sg, srv);
 
 	return srv;

From 1dddfb51c13814f3d5d44df47c9666c3c16b864c Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Mon, 23 Jan 2017 03:33:07 +0300
Subject: [PATCH 45/65] Better config processing to align with ALB branch.

---
 tempesta_fw/sock_srv.c | 524 ++++++++++++++++++++++++-----------------
 1 file changed, 302 insertions(+), 222 deletions(-)

diff --git a/tempesta_fw/sock_srv.c b/tempesta_fw/sock_srv.c
index 42f40c00c..16dd41285 100644
--- a/tempesta_fw/sock_srv.c
+++ b/tempesta_fw/sock_srv.c
@@ -458,28 +458,6 @@ tfw_sock_srv_disconnect_srv(TfwServer *srv)
 	return tfw_peer_for_each_conn(srv, conn, list, tfw_sock_srv_disconnect);
 }
 
-static int
-tfw_sock_srv_start(void)
-{
-	int ret;
-
-	if ((ret = tfw_sg_for_each_srv(tfw_server_apm_create)) != 0)
-		return ret;
-
-	return tfw_sg_for_each_srv(tfw_sock_srv_connect_srv);
-}
-
-static void
-tfw_sock_srv_stop(void)
-{
-	/*
-	 * Connections list is read-only at run time for now, so no need
-	 * to synchronize the list access or disable softirqs.
-	 */
-	while (tfw_sg_for_each_srv(tfw_sock_srv_disconnect_srv))
-		schedule();
-}
-
 /*
  * ------------------------------------------------------------------------
  *	TfwServer creation/deletion helpers.
@@ -571,32 +549,38 @@ tfw_sock_srv_delete_all_conns(void)
  * ------------------------------------------------------------------------
  */
 
-/* Default number of connections per server. */
-#define TFW_SRV_CONNS_N_DEF		"32"
-
 /*
  * Default values for various configuration directives and options.
  */
-#define TFW_SRV_QUEUE_SIZE_DEF		1000	/* Max queue size */
-#define TFW_SRV_FWD_TIMEOUT_DEF		60	/* Default request timeout */
-#define TFW_SRV_FWD_RETRIES_DEF		5	/* Default number of tries */
-#define TFW_SRV_RETRY_NIP_DEF		0	/* Do NOT resend NIP reqs */
-#define TFW_SRV_RETRY_ATTEMPTS_DEF	10	/* Reconnect attempts. */
-
-static int tfw_cfg_in_queue_size = TFW_SRV_QUEUE_SIZE_DEF;
-static int tfw_cfg_in_fwd_timeout = TFW_SRV_FWD_TIMEOUT_DEF;
-static int tfw_cfg_in_fwd_retries = TFW_SRV_FWD_RETRIES_DEF;
-static int tfw_cfg_in_retry_nip = TFW_SRV_RETRY_NIP_DEF;
-static int tfw_cfg_in_retry_attempts = TFW_SRV_RETRY_ATTEMPTS_DEF;
-
-static int tfw_cfg_out_queue_size = TFW_SRV_QUEUE_SIZE_DEF;
-static int tfw_cfg_out_fwd_timeout = TFW_SRV_FWD_TIMEOUT_DEF;
-static int tfw_cfg_out_fwd_retries = TFW_SRV_FWD_RETRIES_DEF;
-static int tfw_cfg_out_retry_nip = TFW_SRV_RETRY_NIP_DEF;
-static int tfw_cfg_out_retry_attempts = TFW_SRV_RETRY_ATTEMPTS_DEF;
+#define TFW_CFG_SRV_CONNS_N_DEF		32	/* Default # of connections */
+#define TFW_CFG_SRV_QUEUE_SIZE_DEF	1000	/* Max queue size */
+#define TFW_CFG_SRV_FWD_TIMEOUT_DEF	60	/* Default request timeout */
+#define TFW_CFG_SRV_FWD_RETRIES_DEF	5	/* Default number of tries */
+#define TFW_CFG_SRV_RETRY_NIP_DEF	0	/* Do NOT resend NIP reqs */
+#define TFW_CFG_SRV_RETRY_ATTEMPTS_DEF	10	/* Reconnect attempts. */
+
+static TfwServer *tfw_cfg_in_slst[TFW_SG_MAX_SRV];
+static TfwServer *tfw_cfg_out_slst[TFW_SG_MAX_SRV];
+static int tfw_cfg_in_nconn[TFW_SG_MAX_SRV];
+static int tfw_cfg_out_nconn[TFW_SG_MAX_SRV];
+static int tfw_cfg_in_slstsz, tfw_cfg_out_slstsz;
+static TfwScheduler *tfw_cfg_in_sched, *tfw_cfg_out_sched;
+static TfwSrvGroup *tfw_cfg_in_sg, *tfw_cfg_out_sg;
+
+static int tfw_cfg_in_queue_size = TFW_CFG_SRV_QUEUE_SIZE_DEF;
+static int tfw_cfg_in_fwd_timeout = TFW_CFG_SRV_FWD_TIMEOUT_DEF;
+static int tfw_cfg_in_fwd_retries = TFW_CFG_SRV_FWD_RETRIES_DEF;
+static int tfw_cfg_in_retry_nip = TFW_CFG_SRV_RETRY_NIP_DEF;
+static int tfw_cfg_in_retry_attempts = TFW_CFG_SRV_RETRY_ATTEMPTS_DEF;
+
+static int tfw_cfg_out_queue_size = TFW_CFG_SRV_QUEUE_SIZE_DEF;
+static int tfw_cfg_out_fwd_timeout = TFW_CFG_SRV_FWD_TIMEOUT_DEF;
+static int tfw_cfg_out_fwd_retries = TFW_CFG_SRV_FWD_RETRIES_DEF;
+static int tfw_cfg_out_retry_nip = TFW_CFG_SRV_RETRY_NIP_DEF;
+static int tfw_cfg_out_retry_attempts = TFW_CFG_SRV_RETRY_ATTEMPTS_DEF;
 
 static int
-tfw_handle_opt_val(TfwCfgSpec *cs, TfwCfgEntry *ce, int *optval)
+tfw_cfgop_intval(TfwCfgSpec *cs, TfwCfgEntry *ce, int *intval)
 {
 	int ret;
 
@@ -610,50 +594,50 @@ tfw_handle_opt_val(TfwCfgSpec *cs, TfwCfgEntry *ce, int *optval)
 			cs->name, (int)ce->val_n);
 		return -EINVAL;
 	}
-	if ((ret = tfw_cfg_parse_int(ce->vals[0], optval)))
+	if ((ret = tfw_cfg_parse_int(ce->vals[0], intval)))
 		return ret;
 
 	return 0;
 }
 
 static int
-tfw_handle_in_queue_size(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_cfgop_in_queue_size(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
-	return tfw_handle_opt_val(cs, ce, &tfw_cfg_in_queue_size);
+	return tfw_cfgop_intval(cs, ce, &tfw_cfg_in_queue_size);
 }
 
 static int
-tfw_handle_out_queue_size(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_cfgop_out_queue_size(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
-	return tfw_handle_opt_val(cs, ce, &tfw_cfg_out_queue_size);
+	return tfw_cfgop_intval(cs, ce, &tfw_cfg_out_queue_size);
 }
 
 static int
-tfw_handle_in_fwd_timeout(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_cfgop_in_fwd_timeout(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
-	return tfw_handle_opt_val(cs, ce, &tfw_cfg_in_fwd_timeout);
+	return tfw_cfgop_intval(cs, ce, &tfw_cfg_in_fwd_timeout);
 }
 
 static int
-tfw_handle_out_fwd_timeout(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_cfgop_out_fwd_timeout(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
-	return tfw_handle_opt_val(cs, ce, &tfw_cfg_out_fwd_timeout);
+	return tfw_cfgop_intval(cs, ce, &tfw_cfg_out_fwd_timeout);
 }
 
 static int
-tfw_handle_in_fwd_retries(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_cfgop_in_fwd_retries(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
-	return tfw_handle_opt_val(cs, ce, &tfw_cfg_in_fwd_retries);
+	return tfw_cfgop_intval(cs, ce, &tfw_cfg_in_fwd_retries);
 }
 
 static int
-tfw_handle_out_fwd_retries(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_cfgop_out_fwd_retries(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
-	return tfw_handle_opt_val(cs, ce, &tfw_cfg_out_fwd_retries);
+	return tfw_cfgop_intval(cs, ce, &tfw_cfg_out_fwd_retries);
 }
 
 static inline int
-__tfw_handle_retry_nip(TfwCfgSpec *cs, TfwCfgEntry *ce, int *retry_nip)
+tfw_cfgop_retry_nip(TfwCfgSpec *cs, TfwCfgEntry *ce, int *retry_nip)
 {
 	if (ce->attr_n || ce->val_n) {
 		TFW_ERR("%s: The option may not have arguments.\n", cs->name);
@@ -664,31 +648,31 @@ __tfw_handle_retry_nip(TfwCfgSpec *cs, TfwCfgEntry *ce, int *retry_nip)
 }
 
 static int
-tfw_handle_in_retry_nip(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_cfgop_in_retry_nip(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
-	return __tfw_handle_retry_nip(cs, ce, &tfw_cfg_in_retry_nip);
+	return tfw_cfgop_retry_nip(cs, ce, &tfw_cfg_in_retry_nip);
 }
 
 static int
-tfw_handle_out_retry_nip(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_cfgop_out_retry_nip(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
-	return __tfw_handle_retry_nip(cs, ce, &tfw_cfg_out_retry_nip);
+	return tfw_cfgop_retry_nip(cs, ce, &tfw_cfg_out_retry_nip);
 }
 
 static int
-tfw_handle_in_conn_tries(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_cfgop_in_conn_tries(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
-	return tfw_handle_opt_val(cs, ce, &tfw_cfg_in_retry_attempts);
+	return tfw_cfgop_intval(cs, ce, &tfw_cfg_in_retry_attempts);
 }
 
 static int
-tfw_handle_out_conn_tries(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_cfgop_out_conn_tries(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
-	return tfw_handle_opt_val(cs, ce, &tfw_cfg_out_retry_attempts);
+	return tfw_cfgop_intval(cs, ce, &tfw_cfg_out_retry_attempts);
 }
 
 static int
-tfw_cfg_set_conn_tries(TfwSrvGroup *sg, int recons)
+tfw_cfgop_set_conn_tries(TfwSrvGroup *sg, int recons)
 {
 	if (!recons) {
 		sg->max_recons = UINT_MAX;
@@ -701,77 +685,100 @@ tfw_cfg_set_conn_tries(TfwSrvGroup *sg, int recons)
 	return 0;
 }
 
-/**
- * A "srv_group" which is currently being parsed.
- * All "server" entries are added to this group.
- */
-static TfwSrvGroup *tfw_cfg_curr_group;
-static TfwScheduler *tfw_cfg_dflt_sched;
-
-/**
- * Handle "server" within an "srv_group", e.g.:
- *   srv_group foo {
- *       server 10.0.0.1;
- *       server 10.0.0.2;
- *       server 10.0.0.3 conns_n=1;
- *   }
- *
- * Every server is simply added to the tfw_srv_cfg_curr_group.
- */
-static TfwServer *
-tfw_handle_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
+static int
+tfw_cfgop_server(TfwCfgSpec *cs, TfwCfgEntry *ce,
+		 TfwSrvGroup *sg, TfwServer **arg_srv, int *arg_conns_n)
 {
 	TfwAddr addr;
 	TfwServer *srv;
-	int r, conns_n;
-	const char *in_addr, *in_conns_n;
+	int i, conns_n = 0;
+	bool has_conns_n = false;
+	const char *key, *val, *saddr;
 
-	BUG_ON(!tfw_cfg_curr_group);
+	if (ce->val_n != 1) {
+		TFW_ERR("%s: %s %s: Invalid number of arguments: %zd\n",
+			sg->name, cs->name, ce->val_n ? ce->vals[0] : "",
+			ce->val_n);
+		return -EINVAL;
+	}
+	if (ce->attr_n > 2) {
+		TFW_ERR("%s: %s %s: Invalid number of key=value pairs: %zd\n",
+			sg->name, cs->name, ce->vals[0], ce->attr_n);
+		return -EINVAL;
+	}
 
-	if ((r = tfw_cfg_check_val_n(ce, 1)))
-		return NULL;
+	saddr = ce->vals[0];
 
-	in_addr = ce->vals[0];
-	in_conns_n = tfw_cfg_get_attr(ce, "conns_n", TFW_SRV_CONNS_N_DEF);
+	if (tfw_addr_pton(&TFW_STR_FROM(saddr), &addr)) {
+		TFW_ERR("%s: %s %s: Invalid IP address: '%s'\n",
+			sg->name, cs->name, saddr, saddr);
+		return -EINVAL;
+	}
 
-	if ((r = tfw_addr_pton(&TFW_STR_FROM(in_addr), &addr)))
-		return NULL;
-	if ((r = tfw_cfg_parse_int(in_conns_n, &conns_n)))
-		return NULL;
+	TFW_CFG_ENTRY_FOR_EACH_ATTR(ce, i, key, val) {
+		if (!strcasecmp(key, "conns_n")) {
+			if (has_conns_n) {
+				TFW_ERR("%s: %s %s: Duplicate arg: '%s=%s'\n",
+					sg->name, cs->name, saddr, key, val);
+				return -EINVAL;
+			}
+			if (tfw_cfg_parse_int(val, &conns_n)) {
+				TFW_ERR("%s: %s %s: Invalid value: '%s=%s'\n",
+					sg->name, cs->name, saddr, key, val);
+				return -EINVAL;
+			}
+			has_conns_n = true;
+		} else {
+			TFW_ERR("%s: %s %s: Unsupported argument: '%s=%s'\n",
+				sg->name, cs->name, saddr, key, val);
+			return -EINVAL;
+		}
+	}
 
-	if (conns_n > TFW_SRV_MAX_CONN) {
-		TFW_ERR("can't use more than %d connections", TFW_SRV_MAX_CONN);
-		return NULL;
+	if (!has_conns_n) {
+		conns_n = TFW_CFG_SRV_CONNS_N_DEF;
+	} else if ((conns_n < 1) || (conns_n > TFW_SRV_MAX_CONN)) {
+		TFW_ERR("%s: %s %s: Out of range of [1..%d]: 'conns_n=%d'\n",
+			sg->name, cs->name, saddr, TFW_SRV_MAX_CONN, conns_n);
+		return -EINVAL;
 	}
 
+
 	if (!(srv = tfw_server_create(&addr))) {
-		TFW_ERR("can't create a server socket\n");
-		return NULL;
+		TFW_ERR("%s: %s %s: Error handling the server\n",
+			sg->name, cs->name, saddr);
+		return -EINVAL;
 	}
-	tfw_sg_add(tfw_cfg_curr_group, srv);
+	tfw_sg_add(sg, srv);
 
-	if ((r = tfw_sock_srv_add_conns(srv, conns_n))) {
-		TFW_ERR("can't add connections to the server\n");
-		return NULL;
-	}
+	*arg_srv = srv;
+	*arg_conns_n = conns_n;
 
-	return srv;
+	return 0;
 }
 
-static TfwServer *tfw_cfg_in_lst[TFW_SG_MAX_SRV];
-static int tfw_cfg_in_lstsz = 0;
-static int tfw_cfg_out_lstsz = 0;
-
+/**
+ * Handle "server" within an "srv_group", e.g.:
+ *   srv_group foo {
+ *       server 10.0.0.1;
+ *       server 10.0.0.2;
+ *       server 10.0.0.3 conns_n=1;
+ *   }
+ *
+ * Every server is simply added to the tfw_srv_cfg_curr_group.
+ */
 static int
-tfw_handle_in_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_cfgop_in_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
+	int nconn;
 	TfwServer *srv;
 
-	if (tfw_cfg_in_lstsz >= TFW_SG_MAX_SRV)
+	if (tfw_cfg_in_slstsz >= TFW_SG_MAX_SRV)
 		return -EINVAL;
-	if (!(srv = tfw_handle_server(cs, ce)))
+	if (tfw_cfgop_server(cs, ce, tfw_cfg_in_sg, &srv, &nconn))
 		return -EINVAL;
-	tfw_cfg_in_lst[tfw_cfg_in_lstsz++] = srv;
+	tfw_cfg_in_nconn[tfw_cfg_in_slstsz] = nconn;
+	tfw_cfg_in_slst[tfw_cfg_in_slstsz++] = srv;
 
 	return 0;
 }
@@ -796,44 +803,30 @@ tfw_handle_in_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
  *    }
  */
 static int
-tfw_handle_out_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_cfgop_out_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
-	int ret;
+	int nconn;
 	TfwServer *srv;
-	const char *dflt_sched_name;
-	static const char __read_mostly s_default[] = "default";
-	TfwSrvGroup *sg = tfw_sg_lookup(s_default);
 
-	if (tfw_cfg_out_lstsz >= TFW_SG_MAX_SRV)
+	if (tfw_cfg_out_slstsz >= TFW_SG_MAX_SRV)
 		return -EINVAL;
-	/* The group "default" is created implicitly. */
-	if (sg == NULL) {
-		if ((sg = tfw_sg_new(s_default, GFP_KERNEL)) == NULL) {
-			TFW_ERR("Unable to add server group '%s'\n", s_default);
+	/*
+	 * The group "default" is created implicitly, and only when
+	 * a server outside of any group is found in the configuration.
+	 */
+	if (!tfw_cfg_out_sg) {
+		static const char __read_mostly s_default[] = "default";
+
+		if (!(tfw_cfg_out_sg = tfw_sg_new(s_default, GFP_KERNEL))) {
+			TFW_ERR("Unable to add default server group\n");
 			return -EINVAL;
 		}
-		dflt_sched_name = tfw_cfg_dflt_sched
-				  ? tfw_cfg_dflt_sched->name
-				  : "round-robin";
-		if ((ret = tfw_sg_set_sched(sg, dflt_sched_name)) != 0) {
-			TFW_ERR("Unable to set scheduler '%s' "
-				"for server group '%s'\n",
-				dflt_sched_name, s_default);
-			return ret;
-		}
 	}
-	tfw_cfg_curr_group = sg;
 
-	if (!(srv = tfw_handle_server(cs, ce)))
+	if (tfw_cfgop_server(cs, ce, tfw_cfg_out_sg, &srv, &nconn))
 		return -EINVAL;
-
-	tfw_cfg_set_conn_tries(sg, tfw_cfg_out_retry_attempts);
-	sg->max_qsize = tfw_cfg_out_queue_size ? : UINT_MAX;
-	sg->max_jqage = tfw_cfg_out_fwd_timeout
-		      ? msecs_to_jiffies(tfw_cfg_out_fwd_timeout * 1000)
-		      : ULONG_MAX;
-	sg->max_refwd = tfw_cfg_out_fwd_retries ? : UINT_MAX;
-	sg->flags |= tfw_cfg_out_retry_nip ? TFW_SRV_RETRY_NIP : 0;
+	tfw_cfg_out_nconn[tfw_cfg_out_slstsz] = nconn;
+	tfw_cfg_out_slst[tfw_cfg_out_slstsz++] = srv;
 
 	return 0;
 }
@@ -841,7 +834,7 @@ tfw_handle_out_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
 /**
  * The callback is invoked on entering an "srv_group", e.g:
  *
- *   srv_group foo sched=hash {  <--- The position at the moment of call.
+ *   srv_group foo {  <--- The position at the moment of call.
  *       server ...;
  *       server ...;
  *       ...
@@ -851,35 +844,28 @@ tfw_handle_out_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
  * new TfwSrvGroup object and sets the context for parsing nested "server"s.
  */
 static int
-tfw_begin_srv_group(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_cfgop_begin_srv_group(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
-	int r;
-	TfwSrvGroup *sg;
-	const char *sg_name, *sched_name, *dflt_sched_name;
-
-	if ((r = tfw_cfg_check_val_n(ce, 1)))
-		return r;
-	sg_name = ce->vals[0];
-	dflt_sched_name = tfw_cfg_dflt_sched
-			  ? tfw_cfg_dflt_sched->name : "round-robin";
-	sched_name = tfw_cfg_get_attr(ce, "sched", dflt_sched_name);
-
-	TFW_DBG("begin srv_group: %s\n", sg_name);
-
-	if (!(sg = tfw_sg_new(sg_name, GFP_KERNEL))) {
-		TFW_ERR("Unable to add server group '%s'\n", sg_name);
+	if (ce->val_n != 1) {
+		TFW_ERR("%s %s: Invalid number of arguments: %zd\n",
+			cs->name, ce->val_n ? ce->vals[0] : "", ce->val_n);
+			return -EINVAL;
+        }
+	if (ce->attr_n) {
+		TFW_ERR("%s %s: Arguments may not have the \'=\' sign\n",
+			cs->name, ce->vals[0]);
 		return -EINVAL;
 	}
-	if ((r = tfw_sg_set_sched(sg, sched_name))) {
-		TFW_ERR("Unable to set scheduler '%s' "
-			"for server group '%s'\n", sched_name, sg_name);
-		return r;
+
+	if (!(tfw_cfg_in_sg = tfw_sg_new(ce->vals[0], GFP_KERNEL))) {
+		TFW_ERR("%s %s: Unable to add group\n", cs->name, ce->vals[0]);
+		return -EINVAL;
 	}
 
-	/* Set the current group. All nested "server"s are added to it. */
-	tfw_cfg_curr_group = sg;
+	TFW_DBG("begin srv_group: %s\n", sg->name);
 
-	tfw_cfg_in_lstsz = 0;
+        tfw_cfg_in_slstsz = 0;
+        tfw_cfg_in_sched = tfw_cfg_out_sched;
 	tfw_cfg_in_retry_attempts = tfw_cfg_out_retry_attempts;
 	tfw_cfg_in_queue_size = tfw_cfg_out_queue_size;
 	tfw_cfg_in_fwd_timeout = tfw_cfg_out_fwd_timeout;
@@ -893,46 +879,91 @@ tfw_begin_srv_group(TfwCfgSpec *cs, TfwCfgEntry *ce)
  * The callback is invoked upon exit from a "srv_group" when all nested
  * "server"s are parsed, e.g.:
  *
- *   srv_group foo sched=hash {
+ *   srv_group foo {
  *       server ...;
  *       server ...;
  *       ...
  *   }  <--- The position at the moment of call.
  */
 static int
-tfw_finish_srv_group(TfwCfgSpec *cs)
+tfw_cfgop_finish_srv_group(TfwCfgSpec *cs)
 {
-	unsigned long jqage = msecs_to_jiffies(tfw_cfg_in_fwd_timeout * 1000);
-	TfwSrvGroup *sg = tfw_cfg_curr_group;
+	int i;
+	TfwSrvGroup *sg = tfw_cfg_in_sg;
 
 	BUG_ON(!sg);
 	BUG_ON(list_empty(&sg->srv_list));
+	BUG_ON(!tfw_cfg_in_sched);
 	TFW_DBG("finish srv_group: %s\n", sg->name);
 
-	tfw_cfg_set_conn_tries(sg, tfw_cfg_in_retry_attempts);
+	tfw_cfgop_set_conn_tries(sg, tfw_cfg_in_retry_attempts);
 	sg->max_qsize = tfw_cfg_in_queue_size ? : UINT_MAX;
-	sg->max_jqage = tfw_cfg_in_fwd_timeout ? jqage : ULONG_MAX;
+	sg->max_jqage = tfw_cfg_in_fwd_timeout
+		      ? msecs_to_jiffies(tfw_cfg_in_fwd_timeout * 1000)
+		      : ULONG_MAX;
 	sg->max_refwd = tfw_cfg_in_fwd_retries ? : UINT_MAX;
 	sg->flags |= tfw_cfg_in_retry_nip ? TFW_SRV_RETRY_NIP : 0;
-	tfw_cfg_curr_group = NULL;
+
+	if (tfw_sg_set_sched(sg, tfw_cfg_in_sched->name)) {
+		TFW_ERR("%s %s: Unable to set scheduler: '%s'\n",
+			cs->name, sg->name, tfw_cfg_in_sched->name);
+		return -EINVAL;
+	}
+	/* Add connections only after a scheduler is set. */
+	for (i = 0; i < tfw_cfg_in_slstsz; ++i) {
+		TfwServer *srv = tfw_cfg_in_slst[i];
+		if (tfw_sock_srv_add_conns(srv, tfw_cfg_in_nconn[i])) {
+			char as[TFW_ADDR_STR_BUF_SIZE] = { 0 };
+			tfw_addr_ntop(&srv->addr, as, sizeof(as));
+			TFW_ERR("%s %s: server '%s': "
+				"Error adding connections\n",
+				cs->name, sg->name, as);
+			return -EINVAL;
+		}
+	}
 
 	return 0;
 }
 
 static int
-tfw_handle_sched(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_cfgop_sched(TfwCfgSpec *cs, TfwCfgEntry *ce, TfwScheduler **arg_sched)
 {
-	if (tfw_cfg_check_val_n(ce, 1))
+	TfwScheduler *sched;
+
+	if (!ce->val_n) {
+		TFW_ERR("%s: Invalid number of arguments: %zd\n",
+			cs->name, ce->val_n);
+		return -EINVAL;
+	}
+	if (ce->attr_n) {
+		TFW_ERR("%s %s: Arguments may not have the \'=\' sign\n",
+			cs->name, ce->vals[0]);
 		return -EINVAL;
+	}
 
-	if (!(tfw_cfg_dflt_sched = tfw_sched_lookup(ce->vals[0]))) {
-		TFW_ERR("Unrecognized scheduler: '%s'\n", ce->vals[0]);
+	if (!(sched = tfw_sched_lookup(ce->vals[0]))) {
+		TFW_ERR("%s %s: Unrecognized scheduler: '%s'\n",
+			cs->name, ce->vals[0], ce->vals[0]);
 		return -EINVAL;
 	}
 
+	*arg_sched = sched;
+
 	return 0;
 }
 
+static int
+tfw_cfgop_in_sched(TfwCfgSpec *cs, TfwCfgEntry *ce)
+{
+	return tfw_cfgop_sched(cs, ce, &tfw_cfg_in_sched);
+}
+
+static int
+tfw_cfgop_out_sched(TfwCfgSpec *cs, TfwCfgEntry *ce)
+{
+	return tfw_cfgop_sched(cs, ce, &tfw_cfg_out_sched);
+}
+
 /**
  * Clean everything produced during parsing "server" and "srv_group" entries.
  */
@@ -941,52 +972,109 @@ tfw_clean_srv_groups(TfwCfgSpec *cs)
 {
 	tfw_sock_srv_delete_all_conns();
 	tfw_sg_release_all();
-	tfw_cfg_curr_group = NULL;
+
+	tfw_cfg_in_sg = tfw_cfg_out_sg = NULL;
+	tfw_cfg_in_sched = tfw_cfg_out_sched = NULL;
+	tfw_cfg_in_slstsz = tfw_cfg_out_slstsz = 0;
+}
+
+static int
+tfw_sock_srv_start(void)
+{
+	int i, ret;
+	TfwSrvGroup *sg = tfw_cfg_out_sg;
+
+	if (sg) {
+		BUG_ON(!tfw_cfg_out_sched);
+
+		tfw_cfgop_set_conn_tries(sg, tfw_cfg_out_retry_attempts);
+		sg->max_qsize = tfw_cfg_out_queue_size ? : UINT_MAX;
+		sg->max_jqage = tfw_cfg_out_fwd_timeout
+			      ? msecs_to_jiffies(tfw_cfg_out_fwd_timeout * 1000)
+			      : ULONG_MAX;
+		sg->max_refwd = tfw_cfg_out_fwd_retries ? : UINT_MAX;
+		sg->flags |= tfw_cfg_out_retry_nip ? TFW_SRV_RETRY_NIP : 0;
+
+		if (tfw_sg_set_sched(sg, tfw_cfg_out_sched->name)) {
+			TFW_ERR("srv_group %s: Unable to set scheduler: "
+				"'%s'\n", sg->name, tfw_cfg_out_sched->name);
+			return -EINVAL;
+		}
+		/* Add connections only after a scheduler is set. */
+		for (i = 0; i < tfw_cfg_out_slstsz; ++i) {
+			TfwServer *srv = tfw_cfg_out_slst[i];
+			if (tfw_sock_srv_add_conns(srv, tfw_cfg_out_nconn[i])) {
+				char as[TFW_ADDR_STR_BUF_SIZE] = { 0 };
+				tfw_addr_ntop(&srv->addr, as, sizeof(as));
+				TFW_ERR("srv_group %s: server '%s': "
+					"Error adding connections\n",
+					sg->name, as);
+				return -EINVAL;
+			}
+		}
+	}
+	/*
+	 * This must be executed only after the complete configuration
+	 * has been processed as it depends on configuration directives
+	 * that can be located anywhere in the configuration file.
+	 */
+	if ((ret = tfw_sg_for_each_srv(tfw_server_apm_create)) != 0)
+		return ret;
+
+	return tfw_sg_for_each_srv(tfw_sock_srv_connect_srv);
+}
+
+static void
+tfw_sock_srv_stop(void)
+{
+	tfw_sg_for_each_srv(tfw_sock_srv_disconnect_srv);
 }
 
 static TfwCfgSpec tfw_srv_group_specs[] = {
 	{
 		"server", NULL,
-		tfw_handle_in_server,
+		tfw_cfgop_in_server,
 		.allow_repeat = true,
 		.cleanup = tfw_clean_srv_groups
 	},
 	{
-		"server_queue_size",
-		NULL,
-		tfw_handle_in_queue_size,
+		"sched", "round-robin",
+		tfw_cfgop_in_sched,
+		.allow_none = true,
+		.allow_repeat = false,
+		.cleanup = tfw_clean_srv_groups,
+	},
+	{
+		"server_queue_size", NULL,
+		tfw_cfgop_in_queue_size,
 		.allow_none = true,
 		.allow_repeat = false,
 		.cleanup = tfw_clean_srv_groups,
 	},
 	{
-		"server_forward_timeout",
-		NULL,
-		tfw_handle_in_fwd_timeout,
+		"server_forward_timeout", NULL,
+		tfw_cfgop_in_fwd_timeout,
 		.allow_none = true,
 		.allow_repeat = false,
 		.cleanup = tfw_clean_srv_groups,
 	},
 	{
-		"server_forward_retries",
-		NULL,
-		tfw_handle_in_fwd_retries,
+		"server_forward_retries", NULL,
+		tfw_cfgop_in_fwd_retries,
 		.allow_none = true,
 		.allow_repeat = false,
 		.cleanup = tfw_clean_srv_groups,
 	},
 	{
-		"server_retry_non_idempotent",
-		NULL,
-		tfw_handle_in_retry_nip,
+		"server_retry_non_idempotent", NULL,
+		tfw_cfgop_in_retry_nip,
 		.allow_none = true,
 		.allow_repeat = false,
 		.cleanup = tfw_clean_srv_groups,
 	},
 	{
-		"connect_tries",
-		NULL,
-		tfw_handle_in_conn_tries,
+		"connect_tries", NULL,
+		tfw_cfgop_in_conn_tries,
 		.allow_none = true,
 		.allow_repeat = false,
 		.cleanup = tfw_clean_srv_groups,
@@ -1000,69 +1088,61 @@ TfwCfgMod tfw_sock_srv_cfg_mod = {
 	.stop  = tfw_sock_srv_stop,
 	.specs = (TfwCfgSpec[] ) {
 		{
-			"server",
-			NULL,
-			tfw_handle_out_server,
+			"server", NULL,
+			tfw_cfgop_out_server,
 			.allow_none = true,
 			.allow_repeat = true,
 			.cleanup = tfw_clean_srv_groups,
 		},
 		{
-			"server_queue_size",
-			NULL,
-			tfw_handle_out_queue_size,
+			"sched", "round-robin",
+			tfw_cfgop_out_sched,
 			.allow_none = true,
-			.allow_repeat = true,
+			.allow_repeat = false,
 			.cleanup = tfw_clean_srv_groups,
 		},
 		{
-			"server_forward_timeout",
-			NULL,
-			tfw_handle_out_fwd_timeout,
+			"server_queue_size", NULL,
+			tfw_cfgop_out_queue_size,
 			.allow_none = true,
 			.allow_repeat = true,
 			.cleanup = tfw_clean_srv_groups,
 		},
 		{
-			"server_forward_retries",
-			NULL,
-			tfw_handle_out_fwd_retries,
+			"server_forward_timeout", NULL,
+			tfw_cfgop_out_fwd_timeout,
 			.allow_none = true,
 			.allow_repeat = true,
 			.cleanup = tfw_clean_srv_groups,
 		},
 		{
-			"server_retry_non_idempotent",
-			NULL,
-			tfw_handle_out_retry_nip,
+			"server_forward_retries", NULL,
+			tfw_cfgop_out_fwd_retries,
 			.allow_none = true,
 			.allow_repeat = true,
 			.cleanup = tfw_clean_srv_groups,
 		},
 		{
-			"connect_tries",
-			NULL,
-			tfw_handle_out_conn_tries,
+			"server_retry_non_idempotent", NULL,
+			tfw_cfgop_out_retry_nip,
 			.allow_none = true,
 			.allow_repeat = true,
 			.cleanup = tfw_clean_srv_groups,
 		},
 		{
-			"sched",
-			NULL,
-			tfw_handle_sched,
+			"connect_tries", NULL,
+			tfw_cfgop_out_conn_tries,
 			.allow_none = true,
 			.allow_repeat = true,
 			.cleanup = tfw_clean_srv_groups,
 		},
 		{
-			"srv_group",
-			NULL,
+			"srv_group", NULL,
 			tfw_cfg_handle_children,
 			tfw_srv_group_specs,
 			&(TfwCfgSpecChild ) {
-				.begin_hook = tfw_begin_srv_group,
-				.finish_hook = tfw_finish_srv_group
+				.begin_hook = tfw_cfgop_begin_srv_group,
+				.finish_hook = tfw_cfgop_finish_srv_group
 			},
 			.allow_none = true,
 			.allow_repeat = true,

From 98634d3d8ecd168ea27742a02ed8c7b0e9e599c0 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Mon, 23 Jan 2017 14:13:17 +0300
Subject: [PATCH 46/65] Minor fixes prompted by the code review.

---
 tempesta_fw/sched/tfw_sched_rr.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/tempesta_fw/sched/tfw_sched_rr.c b/tempesta_fw/sched/tfw_sched_rr.c
index 4413e6041..5a26e4d04 100644
--- a/tempesta_fw/sched/tfw_sched_rr.c
+++ b/tempesta_fw/sched/tfw_sched_rr.c
@@ -73,7 +73,7 @@ tfw_sched_rr_free_data(TfwSrvGroup *sg)
 static void
 tfw_sched_rr_add_conn(TfwSrvGroup *sg, TfwServer *srv, TfwConnection *conn)
 {
-	int s, c;
+	size_t s, c;
 	TfwRrSrv *srv_cl;
 	TfwRrSrvList *sl = sg->sched_data;
 
@@ -92,7 +92,7 @@ tfw_sched_rr_add_conn(TfwSrvGroup *sg, TfwServer *srv, TfwConnection *conn)
 	for (c = 0; c < srv_cl->conn_n; ++c)
 		if (srv_cl->conns[c] == conn) {
 			TFW_WARN("sched_rr: Try to add existing connection,"
-				 " srv=%d conn=%d\n", s, c);
+				 " srv=%zu conn=%zu\n", s, c);
 			return;
 		}
 	srv_cl->conns[c] = conn;
@@ -108,14 +108,21 @@ tfw_sched_rr_add_conn(TfwSrvGroup *sg, TfwServer *srv, TfwConnection *conn)
  * Dead connections and servers w/o live connections are skipped.
  * Initially, connections with non-idempotent requests are also skipped
  * in attempt to increase throughput. However, if all live connections
- * contain non-idempotent requests, then re-run the algorithm and get
- * the first live connection as it is usually done.
+ * contain a non-idempotent request, then re-run the algorithm and get
+ * the first live connection they way it is usually done.
+ *
+ * RR scheduler must be the fastest scheduler. Also, it's essential
+ * to maintain strict round-robin fashion of getting the next server.
+ * Usually the optimistic approach gives the fastest solution: we are
+ * optimistic in that there are not many non-idempotent requests, and
+ * there are available server connections.
  */
 static TfwConnection *
 tfw_sched_rr_get_srv_conn(TfwMsg *msg, TfwSrvGroup *sg)
 {
-	unsigned long idx;
-	int c, s, skipnip = 1, nipconn = 0;
+	size_t c, s;
+	unsigned long idxval;
+	int skipnip = 1, nipconn = 0;
 	TfwRrSrvList *sl = sg->sched_data;
 	TfwRrSrv *srv_cl;
 	TfwConnection *conn;
@@ -123,11 +130,11 @@ tfw_sched_rr_get_srv_conn(TfwMsg *msg, TfwSrvGroup *sg)
 	BUG_ON(!sl);
 rerun:
 	for (s = 0; s < sl->srv_n; ++s) {
-		idx = atomic64_inc_return(&sl->rr_counter);
-		srv_cl = &sl->srvs[idx % sl->srv_n];
+		idxval = atomic64_inc_return(&sl->rr_counter);
+		srv_cl = &sl->srvs[idxval % sl->srv_n];
 		for (c = 0; c < srv_cl->conn_n; ++c) {
-			idx = atomic64_inc_return(&srv_cl->rr_counter);
-			conn = srv_cl->conns[idx % srv_cl->conn_n];
+			idxval = atomic64_inc_return(&srv_cl->rr_counter);
+			conn = srv_cl->conns[idxval % srv_cl->conn_n];
 			if (unlikely(tfw_connection_restricted(conn))
 			    || unlikely(tfw_server_queue_full(conn)))
 				continue;

From ab8716a82d9f0a914cddc0e3c08432b4d5d5315f Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Mon, 23 Jan 2017 23:08:34 +0300
Subject: [PATCH 47/65] Release server connection's resources when Tempesta is
 shut down.

---
 tempesta_fw/http.c | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 22ce29bc8..884fb79d5 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -1041,6 +1041,34 @@ tfw_http_conn_init(TfwConnection *conn)
 	return 0;
 }
 
+/*
+ * Release server connection's resources.
+ *
+ * Drop and free the requests in server connection's @fwd_queue.
+ * Called only when Tempesta is stopped.
+ */
+static void
+tfw_http_conn_srv_release(TfwConnection *srv_conn)
+{
+	TfwHttpReq *req, *tmp;
+	struct list_head *fwd_queue = &srv_conn->fwd_queue;
+	LIST_HEAD(zap_queue);
+
+	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
+	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
+
+	list_for_each_entry_safe(req, tmp, fwd_queue, fwd_list) {
+		tfw_http_req_delist(srv_conn, req);
+		if (unlikely(!list_empty_careful(&req->msg.seq_list))) {
+			spin_lock(&req->conn->seq_qlock);
+			if (unlikely(!list_empty(&req->msg.seq_list)))
+				list_del_init(&req->msg.seq_list);
+			spin_unlock(&req->conn->seq_qlock);
+		}
+		tfw_http_conn_msg_free((TfwHttpMsg *)req);
+	}
+}
+
 /*
  * Connection with a peer is released.
  *
@@ -1058,6 +1086,10 @@ tfw_http_conn_release(TfwConnection *srv_conn)
 	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
 	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
 
+	if (unlikely(test_bit(TFW_CONN_B_ISDEAD, &srv_conn->flags))) {
+		tfw_http_conn_srv_release(srv_conn);
+		return;
+	}
 	clear_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
 	clear_bit(TFW_CONN_B_RESEND, &srv_conn->flags);
 }

From b2f0343b7ba5dc74b6a70b558e7220f35f6c4688 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Tue, 24 Jan 2017 04:02:03 +0300
Subject: [PATCH 48/65] Fix several oversights in cases of sending an error
 response.

---
 tempesta_fw/http.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 884fb79d5..62a226069 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -246,7 +246,7 @@ tfw_http_send_200(TfwHttpReq *req)
 		.flags = 4 << TFW_STR_CN_SHIFT
 	};
 
-	TFW_DBG("Send HTTP 200 response to the client\n");
+	TFW_DBG("Send HTTP 200 response\n");
 
 	return tfw_http_send_resp(req, &rh, __TFW_STR_CH(&rh, 1));
 }
@@ -294,7 +294,7 @@ tfw_http_send_404(TfwHttpReq *req)
 		.flags = 4 << TFW_STR_CN_SHIFT
 	};
 
-	TFW_DBG("Send HTTP 404 response: %s\n", reason);
+	TFW_DBG("Send HTTP 404 response\n");
 
 	return tfw_http_send_resp(req, &rh, __TFW_STR_CH(&rh, 1));
 }
@@ -1558,15 +1558,15 @@ tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
 static void
 tfw_http_req_cache_service(TfwHttpReq *req, TfwHttpResp *resp)
 {
-	if (tfw_http_adjust_resp(resp, req))
-		goto resp_err;
+	if (tfw_http_adjust_resp(resp, req)) {
+		tfw_http_send_500(req);
+		tfw_http_conn_msg_free((TfwHttpMsg *)resp);
+		TFW_INC_STAT_BH(clnt.msgs_otherr);
+		return;
+	}
 	tfw_http_resp_fwd(req, resp);
 	TFW_INC_STAT_BH(clnt.msgs_fromcache);
 	return;
-resp_err:
-	tfw_http_send_500(req);
-	TFW_INC_STAT_BH(clnt.msgs_otherr);
-	return;
 }
 
 /**
@@ -1955,8 +1955,8 @@ tfw_http_resp_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)
 	 * inter-node data transfers. (see tfw_http_req_cache_cb())
 	 */
 	if (tfw_http_adjust_resp(resp, req)) {
-		tfw_http_conn_msg_free((TfwHttpMsg *)resp);
 		tfw_http_send_500(req);
+		tfw_http_conn_msg_free((TfwHttpMsg *)resp);
 		TFW_INC_STAT_BH(serv.msgs_otherr);
 		return;
 	}
@@ -2068,6 +2068,11 @@ tfw_http_resp_gfsm(TfwHttpMsg *hmresp, struct sk_buff *skb, unsigned int off)
 	return r;
 }
 
+/*
+ * Set up the response @hmresp with data needed down the road,
+ * get the paired request, and then pass the response to cache
+ * for further processing.
+ */
 static int
 tfw_http_resp_cache(TfwHttpMsg *hmresp)
 {

From 3f25a47098d28713db8f32ff1a461284e65f9813 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Fri, 27 Jan 2017 13:35:54 +0300
Subject: [PATCH 49/65] Split TfwConnection{} into TfwCliConnection{} and
 TfwSrvConnection{}.

With this patch, there's a number of new connection features that are
relevant only to server connections. The number of client connections
is greater by an order of magnitude, so it makes sense to split the
connection structure and not spend system RAM on features that are
not needed for client connections.
---
 tempesta_fw/client.h                  |   4 +-
 tempesta_fw/connection.c              |   2 +-
 tempesta_fw/connection.h              | 160 +++++++++++++++++---------
 tempesta_fw/http.c                    | 125 ++++++++++----------
 tempesta_fw/http.h                    |   2 +-
 tempesta_fw/procfs.c                  |   2 +-
 tempesta_fw/sched.c                   |   9 +-
 tempesta_fw/sched/tfw_sched_hash.c    |  31 ++---
 tempesta_fw/sched/tfw_sched_http.c    |  16 +--
 tempesta_fw/sched/tfw_sched_rr.c      |  27 ++---
 tempesta_fw/server.c                  |   4 +-
 tempesta_fw/server.h                  |  22 ++--
 tempesta_fw/sock_clnt.c               |  64 +++++------
 tempesta_fw/sock_srv.c                | 148 +++++++++++-------------
 tempesta_fw/t/unit/sched_helper.c     |  38 +++---
 tempesta_fw/t/unit/test_http_sticky.c |   4 +-
 tempesta_fw/t/unit/test_sched_hash.c  |  40 ++++---
 tempesta_fw/t/unit/test_sched_http.c  |  32 +++---
 tempesta_fw/t/unit/test_sched_rr.c    |  30 ++---
 tempesta_fw/tls.c                     |   2 +-
 20 files changed, 407 insertions(+), 355 deletions(-)

diff --git a/tempesta_fw/client.h b/tempesta_fw/client.h
index 32c7c717f..83b7c292c 100644
--- a/tempesta_fw/client.h
+++ b/tempesta_fw/client.h
@@ -47,8 +47,8 @@ typedef struct {
 TfwClient *tfw_client_obtain(struct sock *sk, void (*init)(TfwClient *));
 void tfw_client_put(TfwClient *cli);
 int tfw_client_for_each(int (*fn)(TfwClient *));
-void tfw_cli_conn_release(TfwConnection *conn);
-int tfw_cli_conn_send(TfwConnection *conn, TfwMsg *msg);
+void tfw_cli_conn_release(TfwCliConnection *cli_conn);
+int tfw_cli_conn_send(TfwCliConnection *cli_conn, TfwMsg *msg);
 int tfw_sock_check_listeners(void);
 
 #endif /* __TFW_CLIENT_H__ */
diff --git a/tempesta_fw/connection.c b/tempesta_fw/connection.c
index 84135c3ef..88cac3541 100644
--- a/tempesta_fw/connection.c
+++ b/tempesta_fw/connection.c
@@ -84,7 +84,7 @@ tfw_connection_release(TfwConnection *conn)
 	/* Ask higher levels to free resources at connection release. */
 	TFW_CONN_HOOK_CALL(conn, conn_release);
 	BUG_ON((TFW_CONN_TYPE(conn) & Conn_Clnt)
-	       && !list_empty(&conn->seq_queue));
+	       && !list_empty(&((TfwCliConnection *)conn)->seq_queue));
 }
 
 /*
diff --git a/tempesta_fw/connection.h b/tempesta_fw/connection.h
index 8edf7abb3..e34abe105 100644
--- a/tempesta_fw/connection.h
+++ b/tempesta_fw/connection.h
@@ -54,74 +54,94 @@ enum {
 /**
  * Session/Presentation layer (in OSI terms) handling.
  *
- * An instance of TfwConnection{} structure links each HTTP message
- * to the attributes of a connection the message has come on. Some
- * of those messages may stay longer in Tempesta after they're sent
- * out to their destinations. Requests are kept until a paired
- * response comes. By the time there's need to use the request's
- * connection to send the response on, it may already be destroyed.
- * With that in mind, TfwConnection{} instance is not destroyed
- * along with the connection so that is can be safely dereferenced.
- * It's kept around until refcnt permits freeing of the instance,
- * so it may have longer lifetime than the connection itself.
+ * An instance of TfwConnection{} structure links each HTTP message to
+ * attributes of a connection the message has come on. Some of those
+ * messages may stay longer in Tempesta after they're sent out to their
+ * destinations. Requests are kept until a paired response comes. By the
+ * time the request's connection is needed for sending the response, it
+ * may already be destroyed. With that in mind, TfwConnection{} instance
+ * is not destroyed along with the connection so that it can be safely
+ * dereferenced. It's kept around until refcnt permits freeing of the
+ * instance, so it may have longer lifetime than the connection itself.
  *
  * @sk is an intrinsic property of TfwConnection{}.
  * It has exactly the same lifetime as an instance of TfwConnection{}.
  *
- * @peer is major property of TfwConnection{}. An instance of @peer
- * has longer lifetime expectation than a connection. @peer is always
- * valid while it's referenced from an instance of TfwConnection{}.
- * That is supported by a separate reference counter in @peer.
+ * @peer is major property of TfwConnection{}. An instance of @peer has
+ * longer lifetime expectation than a connection. @peer is always valid
+ * while it's referenced from an instance of TfwConnection{}. That is
+ * supported by a separate reference counter in @peer.
+ *
+ * These are the properties of a connection that are common to client
+ * and server connections.
  *
  * @proto	- protocol handler. Base class, must be first;
  * @state	- connection processing state;
  * @list	- member in the list of connections with @peer;
- * @fwd_queue	- queue of messages to be sent to a back-end server;
- * @nip_queue	- queue of non-idempotent messages in server's @fwd_queue;
- * @seq_queue	- queue of client's messages in the order they came;
- * @fwd_qlock	- lock for accessing @fwd_queue and @nip_queue;
- * @seq_qlock	- lock for accessing @seq_queue;
- * @ret_qlock	- lock for serializing sets of responses;
- * @flags	- atomic flags related to server connection's state;
  * @refcnt	- number of users of the connection structure instance;
- * @qsize	- current number of requests in server's @msg_queue;
  * @timer	- The keep-alive/retry timer for the connection;
  * @msg		- message that is currently being processed;
- * @msg_sent	- request that was sent last in a server connection;
  * @peer	- TfwClient or TfwServer handler;
  * @sk		- an appropriate sock handler;
  * @destructor	- called when a connection is destroyed;
  * @forward	- called when a request is forwarded to server;
  */
-typedef struct tfw_connection_t {
-	SsProto			proto;
-	TfwGState		state;
-	struct list_head	list;
-	struct list_head	fwd_queue;				/*srv*/
-	union {
-		struct list_head	nip_queue;			/*srv*/
-		struct list_head	seq_queue;			/*cli*/
-	};
-	union {
-		spinlock_t		fwd_qlock;			/*srv*/
-		spinlock_t		seq_qlock;			/*cli*/
-	};
-	spinlock_t		ret_qlock;				/*cli*/
-	unsigned long		flags;					/*srv*/
-	atomic_t		refcnt;
-	int			qsize;					/*srv*/
-	struct timer_list	timer;
-	TfwMsg			*msg;
-	TfwMsg			*msg_sent;				/*srv*/
-	TfwPeer 		*peer;
-	struct sock		*sk;
+#define TFW_CONNECTION_COMMON				\
+	SsProto			proto;			\
+	TfwGState		state;			\
+	struct list_head	list;			\
+	atomic_t		refcnt;			\
+	struct timer_list	timer;			\
+	TfwMsg			*msg;			\
+	TfwPeer 		*peer;			\
+	struct sock		*sk;			\
 	void			(*destructor)(void *);
-} TfwConnection;
 
-#define TFW_CONN_DEATHCNT	(INT_MIN / 2)
+typedef struct {
+	TFW_CONNECTION_COMMON;
+} TfwConnection;
 
 #define TFW_CONN_TYPE(c)	((c)->proto.type)
 
+/*
+ * These are specific properties that are relevant to client connections.
+ *
+ * @seq_queue	- queue of client's messages in the order they came;
+ * @seq_qlock	- lock for accessing @seq_queue;
+ * @ret_qlock	- lock for serializing sets of responses;
+ */
+typedef struct {
+	TFW_CONNECTION_COMMON;
+	struct list_head	seq_queue;
+	spinlock_t		seq_qlock;
+	spinlock_t		ret_qlock;
+} TfwCliConnection;
+
+/*
+ * These are specific properties that are relevant to server connections.
+ * See the description of special features of this structure in sock_srv.c.
+ *
+ * @fwd_queue	- queue of messages to be sent to a back-end server;
+ * @nip_queue	- queue of non-idempotent messages in server's @fwd_queue;
+ * @fwd_qlock	- lock for accessing @fwd_queue and @nip_queue;
+ * @flags	- atomic flags related to server connection's state;
+ * @qsize	- current number of requests in server's @msg_queue;
+ * @recns	- the number of reconnect attempts;
+ * @msg_sent	- request that was sent last in a server connection;
+ */
+typedef struct {
+	TFW_CONNECTION_COMMON;
+	struct list_head	fwd_queue;
+	struct list_head	nip_queue;
+	spinlock_t		fwd_qlock;
+	unsigned long		flags;
+	unsigned int		qsize;
+	unsigned int		recns;
+	TfwMsg			*msg_sent;
+} TfwSrvConnection;
+
+#define TFW_CONN_DEATHCNT	(INT_MIN / 2)
+
 /* Connection flags are defined by the bit number. */
 enum {
 	TFW_CONN_B_RESEND = 0,	/* Need to re-send requests. */
@@ -187,15 +207,15 @@ typedef struct {
 
 extern TfwConnHooks *conn_hooks[TFW_CONN_MAX_PROTOS];
 
-/* This macros are intended to use to call certain proto hooks. */
+/* These macros are for calling the defined proto hooks. */
 #define tfw_conn_hook_call(proto, c, f, ...)	\
 	conn_hooks[proto]->f ? conn_hooks[proto]->f(c, ## __VA_ARGS__) : 0
 #define TFW_CONN_HOOK_CALL(c, f...)		\
 	tfw_conn_hook_call(TFW_CONN_TYPE2IDX(TFW_CONN_TYPE(c)), c, f)
 
 /*
- * Tell if a connection is restricted. When restricted, a connection
- * is not available to schedulers.
+ * Tell if a server connection connection is restricted. A restricted
+ * server connection is not available to schedulers.
  *
  * The flag RESEND is set when a newly established server connection
  * has messages in the forwarding queue. That means that the connection
@@ -205,18 +225,18 @@ extern TfwConnHooks *conn_hooks[TFW_CONN_MAX_PROTOS];
  * are re-sent.
  */
 static inline bool
-tfw_connection_restricted(TfwConnection *conn)
+tfw_srv_conn_restricted(TfwSrvConnection *srv_conn)
 {
-	return test_bit(TFW_CONN_B_RESEND, &conn->flags);
+	return test_bit(TFW_CONN_B_RESEND, &srv_conn->flags);
 }
 
 /*
  * Tell if a connection has non-idempotent requests.
  */
 static inline bool
-tfw_connection_hasnip(TfwConnection *conn)
+tfw_srv_conn_hasnip(TfwSrvConnection *srv_conn)
 {
-	return test_bit(TFW_CONN_B_HASNIP, &conn->flags);
+	return test_bit(TFW_CONN_B_HASNIP, &srv_conn->flags);
 }
 
 static inline bool
@@ -224,19 +244,34 @@ tfw_connection_live(TfwConnection *conn)
 {
 	return atomic_read(&conn->refcnt) > 0;
 }
+static inline bool
+tfw_srv_conn_live(TfwSrvConnection *srv_conn)
+{
+	return tfw_connection_live((TfwConnection *)srv_conn);
+}
 
 static inline void
 tfw_connection_get(TfwConnection *conn)
 {
 	atomic_inc(&conn->refcnt);
 }
+static inline void
+tfw_cli_conn_get(TfwCliConnection *cli_conn)
+{
+	tfw_connection_get((TfwConnection *)cli_conn);
+}
+static inline void
+tfw_srv_conn_get(TfwSrvConnection *srv_conn)
+{
+	tfw_connection_get((TfwConnection *)srv_conn);
+}
 
 /**
  * Increment reference counter and return true if @conn is not in
  * failovering process, i.e. @refcnt wasn't less or equal to zero.
  */
 static inline bool
-tfw_connection_get_if_live(TfwConnection *conn)
+__tfw_connection_get_if_live(TfwConnection *conn)
 {
 	int old, rc = atomic_read(&conn->refcnt);
 
@@ -249,6 +284,11 @@ tfw_connection_get_if_live(TfwConnection *conn)
 
 	return false;
 }
+static inline bool
+tfw_srv_conn_get_if_live(TfwSrvConnection *srv_conn)
+{
+	return __tfw_connection_get_if_live((TfwConnection *)srv_conn);
+}
 
 static inline void
 tfw_connection_put(TfwConnection *conn)
@@ -264,6 +304,16 @@ tfw_connection_put(TfwConnection *conn)
 	if (conn->destructor)
 		conn->destructor(conn);
 }
+static inline void
+tfw_cli_conn_put(TfwCliConnection *cli_conn)
+{
+	tfw_connection_put((TfwConnection *)cli_conn);
+}
+static inline void
+tfw_srv_conn_put(TfwSrvConnection *srv_conn)
+{
+	tfw_connection_put((TfwConnection *)srv_conn);
+}
 
 static inline void
 tfw_connection_put_to_death(TfwConnection *conn)
diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 62a226069..37005c920 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -391,7 +391,7 @@ tfw_http_req_is_nip(TfwHttpReq *req)
  * @req must be confirmed to be on the list.
  */
 static inline void
-__tfw_http_req_nip_delist(TfwConnection *srv_conn, TfwHttpReq *req)
+__tfw_http_req_nip_delist(TfwSrvConnection *srv_conn, TfwHttpReq *req)
 {
 	BUG_ON(list_empty(&req->nip_list));
 	list_del_init(&req->nip_list);
@@ -404,7 +404,7 @@ __tfw_http_req_nip_delist(TfwConnection *srv_conn, TfwHttpReq *req)
  * Raise the flag saying that the connection has non-idempotent requests.
  */
 static inline void
-__tfw_http_req_nip_enlist(TfwConnection *srv_conn, TfwHttpReq *req)
+__tfw_http_req_nip_enlist(TfwSrvConnection *srv_conn, TfwHttpReq *req)
 {
 	BUG_ON(!list_empty(&req->nip_list));
 	list_add_tail(&req->nip_list, &srv_conn->nip_queue);
@@ -417,7 +417,7 @@ __tfw_http_req_nip_enlist(TfwConnection *srv_conn, TfwHttpReq *req)
  * the list.
  */
 static inline void
-tfw_http_req_nip_delist(TfwConnection *srv_conn, TfwHttpReq *req)
+tfw_http_req_nip_delist(TfwSrvConnection *srv_conn, TfwHttpReq *req)
 {
 	if (!list_empty(&req->nip_list))
 		__tfw_http_req_nip_delist(srv_conn, req);
@@ -431,7 +431,7 @@ tfw_http_req_nip_delist(TfwConnection *srv_conn, TfwHttpReq *req)
  * to tfw_http_req_add_seq_queue().
  */
 static inline void
-tfw_http_conn_nip_delist(TfwConnection *srv_conn)
+tfw_http_conn_nip_delist(TfwSrvConnection *srv_conn)
 {
 	TfwHttpReq *req, *tmp;
 
@@ -447,7 +447,7 @@ tfw_http_conn_nip_delist(TfwConnection *srv_conn)
  * It's on hold it the request that was sent last was non-idempotent.
  */
 static inline bool
-tfw_http_conn_on_hold(TfwConnection *srv_conn)
+tfw_http_conn_on_hold(TfwSrvConnection *srv_conn)
 {
 	TfwHttpReq *req_sent = (TfwHttpReq *)srv_conn->msg_sent;
 
@@ -461,7 +461,7 @@ tfw_http_conn_on_hold(TfwConnection *srv_conn)
  * request that was sent last.
  */
 static inline bool
-tfw_http_conn_drained(TfwConnection *srv_conn)
+tfw_http_conn_drained(TfwSrvConnection *srv_conn)
 {
 	struct list_head *fwd_queue = &srv_conn->fwd_queue;
 	TfwHttpReq *req_sent = (TfwHttpReq *)srv_conn->msg_sent;
@@ -482,7 +482,7 @@ tfw_http_conn_drained(TfwConnection *srv_conn)
  * that need to be forwarded.
  */
 static inline bool
-tfw_http_conn_need_fwd(TfwConnection *srv_conn)
+tfw_http_conn_need_fwd(TfwSrvConnection *srv_conn)
 {
 	return (!tfw_http_conn_on_hold(srv_conn)
 		&& !tfw_http_conn_drained(srv_conn));
@@ -492,7 +492,7 @@ tfw_http_conn_need_fwd(TfwConnection *srv_conn)
  * Remove @req from the server connection's forwarding queue.
  */
 static inline void
-tfw_http_req_delist(TfwConnection *srv_conn, TfwHttpReq *req)
+tfw_http_req_delist(TfwSrvConnection *srv_conn, TfwHttpReq *req)
 {
 	tfw_http_req_nip_delist(srv_conn, req);
 	list_del_init(&req->fwd_list);
@@ -505,7 +505,7 @@ tfw_http_req_delist(TfwConnection *srv_conn, TfwHttpReq *req)
  * in @equeue. The error code for an error response is saved as well.
  */
 static inline void
-tfw_http_req_move2equeue(TfwConnection *srv_conn, TfwHttpReq *req,
+tfw_http_req_move2equeue(TfwSrvConnection *srv_conn, TfwHttpReq *req,
 			 struct list_head *equeue, unsigned short status)
 {
 	tfw_http_req_delist(srv_conn, req);
@@ -552,7 +552,7 @@ tfw_http_req_zap_error(struct list_head *equeue)
  * move it to the error queue @equeue for sending an error response later.
  */
 static inline bool
-tfw_http_req_evict_timeout(TfwConnection *srv_conn, TfwServer *srv,
+tfw_http_req_evict_timeout(TfwSrvConnection *srv_conn, TfwServer *srv,
 			   TfwHttpReq *req, struct list_head *equeue)
 {
 	unsigned long jqage = jiffies - req->jrxtstamp;
@@ -572,7 +572,7 @@ tfw_http_req_evict_timeout(TfwConnection *srv_conn, TfwServer *srv,
  * move it to the error queue @equeue for sending an error response later.
  */
 static inline bool
-tfw_http_req_evict_retries(TfwConnection *srv_conn, TfwServer *srv,
+tfw_http_req_evict_retries(TfwSrvConnection *srv_conn, TfwServer *srv,
 			   TfwHttpReq *req, struct list_head *equeue)
 {
 	if (unlikely(req->retries++ >= srv->sg->max_refwd)) {
@@ -589,12 +589,12 @@ tfw_http_req_evict_retries(TfwConnection *srv_conn, TfwServer *srv,
  * move it to the error queue @equeue for sending an error response later.
  */
 static inline bool
-tfw_http_req_fwd_send(TfwConnection *srv_conn, TfwServer *srv,
+tfw_http_req_fwd_send(TfwSrvConnection *srv_conn, TfwServer *srv,
 		      TfwHttpReq *req, struct list_head *equeue)
 {
 	req->jtxtstamp = jiffies;
 
-	if (tfw_connection_send(srv_conn, (TfwMsg *)req)) {
+	if (tfw_connection_send((TfwConnection *)srv_conn, (TfwMsg *)req)) {
 		TFW_DBG2("%s: Forwarding error: conn=[%p] req=[%p]\n",
 			 __func__, srv_conn, req);
 		tfw_http_req_move2equeue(srv_conn, req, equeue, 500);
@@ -608,7 +608,7 @@ tfw_http_req_fwd_send(TfwConnection *srv_conn, TfwServer *srv,
  * Return false if forwarding must be stopped, or true otherwise.
  */
 static inline bool
-__tfw_http_req_fwd_single(TfwConnection *srv_conn, TfwServer *srv,
+__tfw_http_req_fwd_single(TfwSrvConnection *srv_conn, TfwServer *srv,
 			  TfwHttpReq *req, struct list_head *equeue)
 {
 	if (tfw_http_req_evict_timeout(srv_conn, srv, req, equeue))
@@ -627,7 +627,7 @@ __tfw_http_req_fwd_single(TfwConnection *srv_conn, TfwServer *srv,
  * IT's also assumed that the forwarding queue is NOT drained.
  */
 static void
-__tfw_http_req_fwd_unsent(TfwConnection *srv_conn, struct list_head *equeue)
+__tfw_http_req_fwd_unsent(TfwSrvConnection *srv_conn, struct list_head *equeue)
 {
 	TfwHttpReq *req, *tmp;
 	TfwServer *srv = (TfwServer *)srv_conn->peer;
@@ -670,7 +670,7 @@ __tfw_http_req_fwd_unsent(TfwConnection *srv_conn, struct list_head *equeue)
  * It's assumed that the forwarding queue in @srv_conn is locked.
  */
 static inline void
-tfw_http_req_fwd_unsent(TfwConnection *srv_conn, struct list_head *equeue)
+tfw_http_req_fwd_unsent(TfwSrvConnection *srv_conn, struct list_head *equeue)
 {
 	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
 
@@ -692,7 +692,7 @@ tfw_http_req_fwd_unsent(TfwConnection *srv_conn, struct list_head *equeue)
  * See RFC 7230 6.3.2.
  */
 static void
-tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
+tfw_http_req_fwd(TfwSrvConnection *srv_conn, TfwHttpReq *req)
 {
 	LIST_HEAD(equeue);
 
@@ -716,7 +716,7 @@ tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
 }
 
 /*
- * Handle a possible non-idempotent request in case of a connection
+ * Treat a possible non-idempotent request in case of a connection
  * repair (re-send or re-schedule).
  *
  * A non-idempotent request that was forwarded but not responded to
@@ -730,7 +730,7 @@ tfw_http_req_fwd(TfwConnection *srv_conn, TfwHttpReq *req)
  * Note: @srv_conn->msg_sent may change in result.
  */
 static inline void
-tfw_http_req_fwd_handlenip(TfwConnection *srv_conn, struct list_head *equeue)
+tfw_http_req_fwd_treatnip(TfwSrvConnection *srv_conn, struct list_head *equeue)
 {
 	TfwServer *srv = (TfwServer *)srv_conn->peer;
 	TfwHttpReq *req_sent = (TfwHttpReq *)srv_conn->msg_sent;
@@ -752,7 +752,7 @@ tfw_http_req_fwd_handlenip(TfwConnection *srv_conn, struct list_head *equeue)
  * the set limits are evicted.
  */
 static TfwHttpReq *
-tfw_http_req_resend(TfwConnection *srv_conn,
+tfw_http_req_resend(TfwSrvConnection *srv_conn,
 		      bool first, struct list_head *equeue)
 {
 	TfwHttpReq *req, *tmp, *req_resent = NULL;
@@ -790,7 +790,7 @@ tfw_http_req_resend(TfwConnection *srv_conn,
  * Re-send only the first unanswered request in the forwarding queue.
  */
 static inline TfwHttpReq *
-tfw_http_req_resend_first(TfwConnection *srv_conn, struct list_head *equeue)
+tfw_http_req_resend_first(TfwSrvConnection *srv_conn, struct list_head *equeue)
 {
 	return tfw_http_req_resend(srv_conn, true, equeue);
 }
@@ -799,7 +799,7 @@ tfw_http_req_resend_first(TfwConnection *srv_conn, struct list_head *equeue)
  * Re-send all unanswered requests in the forwarding queue.
  */
 static inline TfwHttpReq *
-tfw_http_req_resend_all(TfwConnection *srv_conn, struct list_head *equeue)
+tfw_http_req_resend_all(TfwSrvConnection *srv_conn, struct list_head *equeue)
 {
 	return tfw_http_req_resend(srv_conn, false, equeue);
 }
@@ -810,11 +810,11 @@ tfw_http_req_resend_all(TfwConnection *srv_conn, struct list_head *equeue)
  * The connection is not scheduled until all requests in it are re-sent.
  */
 static void
-__tfw_http_req_fwd_repair(TfwConnection *srv_conn, struct list_head *equeue)
+__tfw_http_req_fwd_repair(TfwSrvConnection *srv_conn, struct list_head *equeue)
 {
 	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
 	WARN_ON(!spin_is_locked(&srv_conn->fwd_qlock));
-	BUG_ON(!tfw_connection_restricted(srv_conn));
+	BUG_ON(!tfw_srv_conn_restricted(srv_conn));
 
 	if (list_empty(&srv_conn->fwd_queue)) {
 		clear_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
@@ -861,35 +861,37 @@ __tfw_http_req_fwd_repair(TfwConnection *srv_conn, struct list_head *equeue)
  * Unlucky requests are just given another chance with minimal effort.
  */
 static void
-tfw_http_req_resched(TfwConnection *srv_conn, struct list_head *equeue)
+tfw_http_req_resched(TfwSrvConnection *srv_conn, struct list_head *equeue)
 {
 	TfwHttpReq *req, *tmp;
-	TfwConnection *sconn;
+	TfwSrvConnection *sch_conn;
 	TfwServer *srv = (TfwServer *)srv_conn->peer;
 	struct list_head *fwd_queue = &srv_conn->fwd_queue;
 
 	TFW_DBG2("%s: conn=[%p]\n", __func__, conn);
 
-	/* Handle the non-idempotent request if any. */
-	tfw_http_req_fwd_handlenip(srv_conn, equeue);
+	/* Treat the non-idempotent request if any. */
+	tfw_http_req_fwd_treatnip(srv_conn, equeue);
 
 	/* Process complete queue. */
 	list_for_each_entry_safe(req, tmp, fwd_queue, fwd_list) {
 		if (tfw_http_req_evict_retries(srv_conn, srv, req, equeue))
 			continue;
-		if (!(sconn = tfw_sched_get_srv_conn((TfwMsg *)req))) {
+		if (!(sch_conn = tfw_sched_get_srv_conn((TfwMsg *)req))) {
 			TFW_WARN("Unable to find a backend server\n");
 			tfw_http_req_move2equeue(srv_conn, req, equeue, 502);
 			continue;
 		}
 		tfw_http_req_delist(srv_conn, req);
-		tfw_http_req_fwd(sconn, req);
-		tfw_connection_put(sconn);
+		tfw_http_req_fwd(sch_conn, req);
+		tfw_srv_conn_put(sch_conn);
 	}
 	BUG_ON(srv_conn->qsize);
 }
 
 /*
+ * Repair a connection. MAkes sense only for server connections.
+ *
  * Find requests in the server's connection queue that were forwarded
  * to the server. These are unanswered requests. According to RFC 7230
  * 6.3.2, "a client MUST NOT pipeline immediately after connection
@@ -898,8 +900,9 @@ tfw_http_req_resched(TfwConnection *srv_conn, struct list_head *equeue)
  * rest of those unanswered requests (__tfw_http_req_fwd_repair()).
  */
 static void
-tfw_http_conn_repair(TfwConnection *srv_conn)
+tfw_http_conn_repair(TfwConnection *conn)
 {
+	TfwSrvConnection *srv_conn = (TfwSrvConnection *)conn;
 	TfwHttpReq *req_resent = NULL;
 	LIST_HEAD(equeue);
 
@@ -907,18 +910,18 @@ tfw_http_conn_repair(TfwConnection *srv_conn)
 	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
 
 	/* See if requests need to be rescheduled. */
-	if (unlikely(!tfw_connection_live(srv_conn))) {
+	if (unlikely(!tfw_srv_conn_live(srv_conn))) {
 		if (list_empty(&srv_conn->fwd_queue))
 			return;
 		tfw_http_req_resched(srv_conn, &equeue);
 		goto zap_error;
 	}
 
-	BUG_ON(!tfw_connection_restricted(srv_conn));
+	BUG_ON(!tfw_srv_conn_restricted(srv_conn));
 
 	spin_lock(&srv_conn->fwd_qlock);
-	/* Handle the non-idempotent request if any. */
-	tfw_http_req_fwd_handlenip(srv_conn, &equeue);
+	/* Treat the non-idempotent request if any. */
+	tfw_http_req_fwd_treatnip(srv_conn, &equeue);
 	/* Re-send the first unanswered request. */
 	if (srv_conn->msg_sent) {
 		req_resent = tfw_http_req_resend_first(srv_conn, &equeue);
@@ -971,11 +974,12 @@ tfw_http_conn_msg_alloc(TfwConnection *conn)
 		TFW_INC_STAT_BH(clnt.rx_messages);
 	} else {
 		TfwHttpReq *req;
+		TfwSrvConnection *srv_conn = (TfwSrvConnection *)conn;
 
-		spin_lock(&conn->fwd_qlock);
-		req = list_first_entry_or_null(&conn->fwd_queue,
+		spin_lock(&srv_conn->fwd_qlock);
+		req = list_first_entry_or_null(&srv_conn->fwd_queue,
 					       TfwHttpReq, fwd_list);
-		spin_unlock(&conn->fwd_qlock);
+		spin_unlock(&srv_conn->fwd_qlock);
 		if (req && (req->method == TFW_HTTP_METH_HEAD))
 			hm->flags |= TFW_HTTP_VOID_BODY;
 		TFW_INC_STAT_BH(serv.rx_messages);
@@ -1033,9 +1037,10 @@ tfw_http_conn_init(TfwConnection *conn)
 	TFW_DBG2("%s: conn=[%p]\n", __func__, conn);
 
 	if (TFW_CONN_TYPE(conn) & Conn_Srv) {
-		if (!list_empty(&conn->fwd_queue))
-			set_bit(TFW_CONN_B_RESEND, &conn->flags);
-		clear_bit(TFW_CONN_B_ISDEAD, &conn->flags);
+		TfwSrvConnection *srv_conn = (TfwSrvConnection *)conn;
+		if (!list_empty(&srv_conn->fwd_queue))
+			set_bit(TFW_CONN_B_RESEND, &srv_conn->flags);
+		clear_bit(TFW_CONN_B_ISDEAD, &srv_conn->flags);
 	}
 	tfw_gfsm_state_init(&conn->state, conn, TFW_HTTP_FSM_INIT);
 	return 0;
@@ -1048,7 +1053,7 @@ tfw_http_conn_init(TfwConnection *conn)
  * Called only when Tempesta is stopped.
  */
 static void
-tfw_http_conn_srv_release(TfwConnection *srv_conn)
+tfw_http_conn_srv_release(TfwSrvConnection *srv_conn)
 {
 	TfwHttpReq *req, *tmp;
 	struct list_head *fwd_queue = &srv_conn->fwd_queue;
@@ -1060,10 +1065,10 @@ tfw_http_conn_srv_release(TfwConnection *srv_conn)
 	list_for_each_entry_safe(req, tmp, fwd_queue, fwd_list) {
 		tfw_http_req_delist(srv_conn, req);
 		if (unlikely(!list_empty_careful(&req->msg.seq_list))) {
-			spin_lock(&req->conn->seq_qlock);
+			spin_lock(&((TfwCliConnection *)req->conn)->seq_qlock);
 			if (unlikely(!list_empty(&req->msg.seq_list)))
 				list_del_init(&req->msg.seq_list);
-			spin_unlock(&req->conn->seq_qlock);
+			spin_unlock(&((TfwCliConnection *)req->conn)->seq_qlock);
 		}
 		tfw_http_conn_msg_free((TfwHttpMsg *)req);
 	}
@@ -1081,8 +1086,10 @@ tfw_http_conn_srv_release(TfwConnection *srv_conn)
  * so locks are not needed.
  */
 static void
-tfw_http_conn_release(TfwConnection *srv_conn)
+tfw_http_conn_release(TfwConnection *conn)
 {
+	TfwSrvConnection *srv_conn = (TfwSrvConnection *)conn;
+
 	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
 	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
 
@@ -1122,7 +1129,7 @@ __tfw_http_resp_pair_free(TfwHttpReq *req)
  * connection threads.
  */
 static void
-tfw_http_conn_cli_drop(TfwConnection *cli_conn)
+tfw_http_conn_cli_drop(TfwCliConnection *cli_conn)
 {
 	TfwHttpReq *req, *tmp;
 	struct list_head *seq_queue = &cli_conn->seq_queue;
@@ -1144,8 +1151,8 @@ tfw_http_conn_cli_drop(TfwConnection *cli_conn)
 	spin_unlock(&cli_conn->seq_qlock);
 
 	list_for_each_entry_safe(req, tmp, &zap_queue, msg.seq_list) {
-		BUG_ON(!list_empty(&req->fwd_list));
-		BUG_ON(!list_empty(&req->nip_list));
+		BUG_ON(!list_empty_careful(&req->fwd_list));
+		BUG_ON(!list_empty_careful(&req->nip_list));
 		__tfw_http_resp_pair_free(req);
 	}
 }
@@ -1164,7 +1171,7 @@ tfw_http_conn_drop(TfwConnection *conn)
 	TFW_DBG2("%s: conn=[%p]\n", __func__, conn);
 
 	if (TFW_CONN_TYPE(conn) & Conn_Clnt) {
-		tfw_http_conn_cli_drop(conn);
+		tfw_http_conn_cli_drop((TfwCliConnection *)conn);
 	} else if (conn->msg) {
 		if (tfw_http_parse_terminate((TfwHttpMsg *)conn->msg))
 			tfw_http_resp_terminate((TfwHttpMsg *)conn->msg);
@@ -1451,7 +1458,7 @@ tfw_http_adjust_resp(TfwHttpResp *resp, TfwHttpReq *req)
  * responses are taken care of by the caller.
  */
 static void
-__tfw_http_resp_fwd(TfwConnection *cli_conn, struct list_head *ret_queue)
+__tfw_http_resp_fwd(TfwCliConnection *cli_conn, struct list_head *ret_queue)
 {
 	TfwHttpReq *req, *tmp;
 
@@ -1476,7 +1483,7 @@ __tfw_http_resp_fwd(TfwConnection *cli_conn, struct list_head *ret_queue)
 void
 tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
 {
-	TfwConnection *cli_conn = req->conn;
+	TfwCliConnection *cli_conn = (TfwCliConnection *)req->conn;
 	struct list_head *seq_queue = &cli_conn->seq_queue;
 	struct list_head *req_retent = NULL;
 	LIST_HEAD(ret_queue);
@@ -1529,14 +1536,14 @@ tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
 	 * to spin_unlock() may get invalid. Hold the connection until
 	 * sending is done.
 	 */
-	tfw_connection_get(cli_conn);
+	tfw_cli_conn_get(cli_conn);
 	spin_lock(&cli_conn->ret_qlock);
 	spin_unlock(&cli_conn->seq_qlock);
 
 	__tfw_http_resp_fwd(cli_conn, &ret_queue);
 
 	spin_unlock(&cli_conn->ret_qlock);
-	tfw_connection_put(cli_conn);
+	tfw_cli_conn_put(cli_conn);
 
 	/* Zap request/responses that were not sent due to an error. */
 	if (!list_empty(&ret_queue)) {
@@ -1578,7 +1585,7 @@ static void
 tfw_http_req_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)
 {
 	int r;
-	TfwConnection *srv_conn = NULL;
+	TfwSrvConnection *srv_conn = NULL;
 
 	TFW_DBG2("%s: req = %p, resp = %p\n", __func__, req, resp);
 
@@ -1641,7 +1648,7 @@ tfw_http_req_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)
 	tfw_http_send_500(req);
 	TFW_INC_STAT_BH(clnt.msgs_otherr);
 conn_put:
-	tfw_connection_put(srv_conn);
+	tfw_srv_conn_put(srv_conn);
 }
 
 static void
@@ -1693,7 +1700,7 @@ static void
 tfw_http_req_add_seq_queue(TfwHttpReq *req)
 {
 	TfwHttpReq *req_prev;
-	TfwConnection *cli_conn = req->conn;
+	TfwCliConnection *cli_conn = (TfwCliConnection *)req->conn;
 	struct list_head *seq_queue = &cli_conn->seq_queue;
 
 	tfw_http_req_mark_nip(req);
@@ -1989,7 +1996,7 @@ static TfwHttpReq *
 tfw_http_popreq(TfwHttpMsg *hmresp)
 {
 	TfwHttpReq *req;
-	TfwConnection *srv_conn = hmresp->conn;
+	TfwSrvConnection *srv_conn = (TfwSrvConnection *)hmresp->conn;
 	struct list_head *fwd_queue = &srv_conn->fwd_queue;
 	LIST_HEAD(equeue);
 
@@ -2013,7 +2020,7 @@ tfw_http_popreq(TfwHttpMsg *hmresp)
 	 * Perform special processing if the connection is in repair
 	 * mode. Otherwise, forward pending requests to the server.
 	 */
-	if (unlikely(tfw_connection_restricted(srv_conn)))
+	if (unlikely(tfw_srv_conn_restricted(srv_conn)))
 		__tfw_http_req_fwd_repair(srv_conn, &equeue);
 	else if (tfw_http_conn_need_fwd(srv_conn))
 		__tfw_http_req_fwd_unsent(srv_conn, &equeue);
diff --git a/tempesta_fw/http.h b/tempesta_fw/http.h
index f23866298..e73ef8d8b 100644
--- a/tempesta_fw/http.h
+++ b/tempesta_fw/http.h
@@ -351,7 +351,7 @@ typedef struct {
  * @tm_bchunk	- time previous chunk of HTTP body had come at;
  * @hash	- hash value for caching calculated for the request;
  * @resp	- the response paired with this request;
- * @rstatus	- response HTTP status until the response is prepared;
+ * @rstatus	- error response status until the response is prepared;
  * @retries	- the number of re-send attempts;
  *
  * TfwStr members must be the first for efficient scanning.
diff --git a/tempesta_fw/procfs.c b/tempesta_fw/procfs.c
index 377477949..b47ca168b 100644
--- a/tempesta_fw/procfs.c
+++ b/tempesta_fw/procfs.c
@@ -149,7 +149,7 @@ tfw_srvstats_seq_show(struct seq_file *seq, void *off)
 #define SPRNE(m, e)	seq_printf(seq, m": %dms\n", e)
 
 	int i;
-	TfwConnection *srv_conn;
+	TfwSrvConnection *srv_conn;
 	TfwServer *srv = seq->private;
 	TfwPrcntl prcntl[ARRAY_SIZE(tfw_procfs_prcntl)];
 	TfwPrcntlStats pstats = { prcntl, ARRAY_SIZE(prcntl) };
diff --git a/tempesta_fw/sched.c b/tempesta_fw/sched.c
index 080fa3bc6..ed19eb0fe 100644
--- a/tempesta_fw/sched.c
+++ b/tempesta_fw/sched.c
@@ -51,20 +51,19 @@ static DEFINE_SPINLOCK(sched_lock);
  *
  * This function is always called in SoftIRQ context.
  */
-TfwConnection *
+TfwSrvConnection *
 tfw_sched_get_srv_conn(TfwMsg *msg)
 {
-	TfwConnection *conn;
+	TfwSrvConnection *srv_conn;
 	TfwScheduler *sched;
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(sched, &sched_list, list) {
 		if (!sched->sched_grp)
 			break;
-		conn = sched->sched_grp(msg);
-		if (conn) {
+		if ((srv_conn = sched->sched_grp(msg))) {
 			rcu_read_unlock();
-			return conn;
+			return srv_conn;
 		}
 	}
 	rcu_read_unlock();
diff --git a/tempesta_fw/sched/tfw_sched_hash.c b/tempesta_fw/sched/tfw_sched_hash.c
index 64b6abdd7..89116169d 100644
--- a/tempesta_fw/sched/tfw_sched_hash.c
+++ b/tempesta_fw/sched/tfw_sched_hash.c
@@ -49,8 +49,8 @@ MODULE_VERSION("0.2.1");
 MODULE_LICENSE("GPL");
 
 typedef struct {
-	TfwConnection	*conn;
-	unsigned long	hash;
+	TfwSrvConnection	*srv_conn;
+	unsigned long		hash;
 } TfwConnHash;
 
 /* The last item is used as the list teminator. */
@@ -99,16 +99,17 @@ __calc_conn_hash(TfwServer *srv, size_t conn_idx)
 }
 
 static void
-tfw_sched_hash_add_conn(TfwSrvGroup *sg, TfwServer *srv, TfwConnection *conn)
+tfw_sched_hash_add_conn(TfwSrvGroup *sg, TfwServer *srv,
+			TfwSrvConnection *srv_conn)
 {
 	size_t i;
 	TfwConnHash *conn_hash = sg->sched_data;
 
 	BUG_ON(!conn_hash);
 	for (i = 0; i < __HLIST_SZ(TFW_SG_MAX_CONN); ++i) {
-		if (conn_hash[i].conn)
+		if (conn_hash[i].srv_conn)
 			continue;
-		conn_hash[i].conn = conn;
+		conn_hash[i].srv_conn = srv_conn;
 		conn_hash[i].hash = __calc_conn_hash(srv, i);
 		return;
 	}
@@ -135,30 +136,30 @@ tfw_sched_hash_add_conn(TfwSrvGroup *sg, TfwServer *srv, TfwConnection *conn)
  *  - For every HTTP request, we have to scan the list of all servers to find
  *    a matching one with the highest weight. That adds some overhead.
  */
-static TfwConnection *
+static TfwSrvConnection *
 tfw_sched_hash_get_srv_conn(TfwMsg *msg, TfwSrvGroup *sg)
 {
 	unsigned long tries, msg_hash, curr_weight, best_weight = 0;
-	TfwConnection *best_conn = NULL;
+	TfwSrvConnection *best_srv_conn = NULL;
 	TfwConnHash *ch;
 
 	msg_hash = tfw_http_req_key_calc((TfwHttpReq *)msg);
 	for (tries = 0; tries < __HLIST_SZ(TFW_SG_MAX_CONN); ++tries) {
-		for (ch = sg->sched_data; ch->conn; ++ch) {
-			if (unlikely(tfw_connection_restricted(ch->conn))
-			    || unlikely(tfw_server_queue_full(ch->conn))
-			    || unlikely(!tfw_connection_live(ch->conn)))
+		for (ch = sg->sched_data; ch->srv_conn; ++ch) {
+			if (unlikely(tfw_srv_conn_restricted(ch->srv_conn)
+				     || tfw_server_queue_full(ch->srv_conn)
+				     || !tfw_srv_conn_live(ch->srv_conn)))
 				continue;
 			curr_weight = msg_hash ^ ch->hash;
 			if (curr_weight > best_weight) {
 				best_weight = curr_weight;
-				best_conn = ch->conn;
+				best_srv_conn = ch->srv_conn;
 			}
 		}
-		if (unlikely(!best_conn))
+		if (unlikely(!best_srv_conn))
 			return NULL;
-		if (tfw_connection_get_if_live(best_conn))
-			return best_conn;
+		if (tfw_srv_conn_get_if_live(best_srv_conn))
+			return best_srv_conn;
 	}
 	return NULL;
 }
diff --git a/tempesta_fw/sched/tfw_sched_http.c b/tempesta_fw/sched/tfw_sched_http.c
index 43b8217be..106009f73 100644
--- a/tempesta_fw/sched/tfw_sched_http.c
+++ b/tempesta_fw/sched/tfw_sched_http.c
@@ -99,11 +99,11 @@ static TfwHttpMatchList *tfw_sched_http_rules;
  * The search is based on contents of an HTTP request and match rules
  * that specify which Server Group the request should be forwarded to.
  */
-static TfwConnection *
+static TfwSrvConnection *
 tfw_sched_http_sched_grp(TfwMsg *msg)
 {
 	TfwSrvGroup *sg;
-	TfwConnection *conn;
+	TfwSrvConnection *srv_conn;
 	TfwSchedHttpRule *rule;
 
 	if(!tfw_sched_http_rules || list_empty(&tfw_sched_http_rules->list))
@@ -120,23 +120,23 @@ tfw_sched_http_sched_grp(TfwMsg *msg)
 	BUG_ON(!sg);
 	TFW_DBG2("sched_http: use server group: '%s'\n", sg->name);
 
-	conn = sg->sched->sched_srv(msg, sg);
+	srv_conn = sg->sched->sched_srv(msg, sg);
 
-	if (unlikely(!conn && rule->backup_sg)) {
+	if (unlikely(!srv_conn && rule->backup_sg)) {
 		sg = rule->backup_sg;
 		TFW_DBG("sched_http: the main group is offline, use backup:"
 			" '%s'\n", sg->name);
-		conn = sg->sched->sched_srv(msg, sg);
+		srv_conn = sg->sched->sched_srv(msg, sg);
 	}
 
-	if (unlikely(!conn))
+	if (unlikely(!srv_conn))
 		TFW_DBG2("sched_http: Unable to select server from group"
 			 " '%s'\n", sg->name);
 
-	return conn;
+	return srv_conn;
 }
 
-static TfwConnection *
+static TfwSrvConnection *
 tfw_sched_http_sched_srv(TfwMsg *msg, TfwSrvGroup *sg)
 {
 	WARN_ONCE(true, "tfw_sched_http can't select a server from a group\n");
diff --git a/tempesta_fw/sched/tfw_sched_rr.c b/tempesta_fw/sched/tfw_sched_rr.c
index 5a26e4d04..06a2f1467 100644
--- a/tempesta_fw/sched/tfw_sched_rr.c
+++ b/tempesta_fw/sched/tfw_sched_rr.c
@@ -39,7 +39,7 @@ typedef struct {
 	atomic64_t		rr_counter;
 	size_t			conn_n;
 	TfwServer		*srv;
-	TfwConnection 		*conns[TFW_SRV_MAX_CONN];
+	TfwSrvConnection	*srv_conns[TFW_SRV_MAX_CONN];
 } TfwRrSrv;
 
 /**
@@ -71,7 +71,8 @@ tfw_sched_rr_free_data(TfwSrvGroup *sg)
  * Called at configuration phase, no synchronization is required.
  */
 static void
-tfw_sched_rr_add_conn(TfwSrvGroup *sg, TfwServer *srv, TfwConnection *conn)
+tfw_sched_rr_add_conn(TfwSrvGroup *sg, TfwServer *srv,
+		      TfwSrvConnection *srv_conn)
 {
 	size_t s, c;
 	TfwRrSrv *srv_cl;
@@ -90,12 +91,12 @@ tfw_sched_rr_add_conn(TfwSrvGroup *sg, TfwServer *srv, TfwConnection *conn)
 
 	srv_cl = &sl->srvs[s];
 	for (c = 0; c < srv_cl->conn_n; ++c)
-		if (srv_cl->conns[c] == conn) {
+		if (srv_cl->srv_conns[c] == srv_conn) {
 			TFW_WARN("sched_rr: Try to add existing connection,"
 				 " srv=%zu conn=%zu\n", s, c);
 			return;
 		}
-	srv_cl->conns[c] = conn;
+	srv_cl->srv_conns[c] = srv_conn;
 	++srv_cl->conn_n;
 	BUG_ON(srv_cl->conn_n > TFW_SRV_MAX_CONN);
 }
@@ -117,7 +118,7 @@ tfw_sched_rr_add_conn(TfwSrvGroup *sg, TfwServer *srv, TfwConnection *conn)
  * optimistic in that there are not many non-idempotent requests, and
  * there are available server connections.
  */
-static TfwConnection *
+static TfwSrvConnection *
 tfw_sched_rr_get_srv_conn(TfwMsg *msg, TfwSrvGroup *sg)
 {
 	size_t c, s;
@@ -125,7 +126,7 @@ tfw_sched_rr_get_srv_conn(TfwMsg *msg, TfwSrvGroup *sg)
 	int skipnip = 1, nipconn = 0;
 	TfwRrSrvList *sl = sg->sched_data;
 	TfwRrSrv *srv_cl;
-	TfwConnection *conn;
+	TfwSrvConnection *srv_conn;
 
 	BUG_ON(!sl);
 rerun:
@@ -134,17 +135,17 @@ tfw_sched_rr_get_srv_conn(TfwMsg *msg, TfwSrvGroup *sg)
 		srv_cl = &sl->srvs[idxval % sl->srv_n];
 		for (c = 0; c < srv_cl->conn_n; ++c) {
 			idxval = atomic64_inc_return(&srv_cl->rr_counter);
-			conn = srv_cl->conns[idxval % srv_cl->conn_n];
-			if (unlikely(tfw_connection_restricted(conn))
-			    || unlikely(tfw_server_queue_full(conn)))
+			srv_conn = srv_cl->srv_conns[idxval % srv_cl->conn_n];
+			if (unlikely(tfw_srv_conn_restricted(srv_conn)
+				     || tfw_server_queue_full(srv_conn)))
 				continue;
-			if (skipnip && tfw_connection_hasnip(conn)) {
-				if (likely(tfw_connection_live(conn)))
+			if (skipnip && tfw_srv_conn_hasnip(srv_conn)) {
+				if (likely(tfw_srv_conn_live(srv_conn)))
 					nipconn++;
 				continue;
 			}
-			if (tfw_connection_get_if_live(conn))
-				return conn;
+			if (tfw_srv_conn_get_if_live(srv_conn))
+				return srv_conn;
 		}
 	}
 	if (skipnip && nipconn) {
diff --git a/tempesta_fw/server.c b/tempesta_fw/server.c
index f40a6a4dc..bc4dafab3 100644
--- a/tempesta_fw/server.c
+++ b/tempesta_fw/server.c
@@ -185,10 +185,10 @@ tfw_sg_add(TfwSrvGroup *sg, TfwServer *srv)
 }
 
 void
-tfw_sg_add_conn(TfwSrvGroup *sg, TfwServer *srv, TfwConnection *conn)
+tfw_sg_add_conn(TfwSrvGroup *sg, TfwServer *srv, TfwSrvConnection *srv_conn)
 {
 	if (sg->sched && sg->sched->add_conn)
-		sg->sched->add_conn(sg, srv, conn);
+		sg->sched->add_conn(sg, srv, srv_conn);
 }
 
 int
diff --git a/tempesta_fw/server.h b/tempesta_fw/server.h
index 7907ed1b1..e5bd9dbda 100644
--- a/tempesta_fw/server.h
+++ b/tempesta_fw/server.h
@@ -25,7 +25,7 @@
 #include "connection.h"
 #include "peer.h"
 
-#define TFW_SRV_MAX_CONN	32	/* TfwConnection per TfwServer */
+#define TFW_SRV_MAX_CONN	32	/* TfwSrvConnection per TfwServer */
 #define TFW_SG_MAX_SRV		32	/* TfwServer per TfwSrvGroup */
 #define TFW_SG_MAX_CONN		(TFW_SG_MAX_SRV * TFW_SRV_MAX_CONN)
 
@@ -62,7 +62,7 @@ typedef struct {
  * @max_qsize	- maximum queue size of a server connection;
  * @max_jqage	- maximum age of a request in a server connection, in jiffies;
  * @max_refwd	- maximum number of tries for forwarding a request;
- * @max_recons	- maximum number of reconnect attempts;
+ * @max_recns	- maximum number of reconnect attempts;
  * @flags	- server group related flags;
  * @name	- name of the group specified in the configuration;
  */
@@ -75,7 +75,7 @@ struct tfw_srv_group_t {
 	unsigned int		max_qsize;
 	unsigned int		max_refwd;
 	unsigned long		max_jqage;
-	unsigned int		max_recons;
+	unsigned int		max_recns;
 	unsigned int		flags;
 	char			name[0];
 };
@@ -112,10 +112,9 @@ struct tfw_scheduler_t {
 	void			(*add_grp)(TfwSrvGroup *sg);
 	void			(*del_grp)(TfwSrvGroup *sg);
 	void			(*add_conn)(TfwSrvGroup *sg, TfwServer *srv,
-					    TfwConnection *conn);
-	TfwConnection		*(*sched_grp)(TfwMsg *msg);
-	TfwConnection		*(*sched_srv)(TfwMsg *msg,
-					      TfwSrvGroup *sg);
+					    TfwSrvConnection *srv_conn);
+	TfwSrvConnection	*(*sched_grp)(TfwMsg *msg);
+	TfwSrvConnection	*(*sched_srv)(TfwMsg *msg, TfwSrvGroup *sg);
 };
 
 /* Server specific routines. */
@@ -123,10 +122,10 @@ TfwServer *tfw_server_create(const TfwAddr *addr);
 int tfw_server_apm_create(TfwServer *srv);
 void tfw_server_destroy(TfwServer *srv);
 
-void tfw_srv_conn_release(TfwConnection *conn);
+void tfw_srv_conn_release(TfwSrvConnection *srv_conn);
 
 static inline bool
-tfw_server_queue_full(TfwConnection *srv_conn)
+tfw_server_queue_full(TfwSrvConnection *srv_conn)
 {
 	TfwSrvGroup *sg = ((TfwServer *)srv_conn->peer)->sg;
 	return ACCESS_ONCE(srv_conn->qsize) >= sg->max_qsize;
@@ -139,13 +138,14 @@ void tfw_sg_free(TfwSrvGroup *sg);
 int tfw_sg_count(void);
 
 void tfw_sg_add(TfwSrvGroup *sg, TfwServer *srv);
-void tfw_sg_add_conn(TfwSrvGroup *sg, TfwServer *srv, TfwConnection *conn);
+void tfw_sg_add_conn(TfwSrvGroup *sg, TfwServer *srv,
+		     TfwSrvConnection *srv_conn);
 int tfw_sg_set_sched(TfwSrvGroup *sg, const char *sched);
 int tfw_sg_for_each_srv(int (*cb)(TfwServer *srv));
 void tfw_sg_release_all(void);
 
 /* Scheduler routines. */
-TfwConnection *tfw_sched_get_srv_conn(TfwMsg *msg);
+TfwSrvConnection *tfw_sched_get_srv_conn(TfwMsg *msg);
 TfwScheduler *tfw_sched_lookup(const char *name);
 int tfw_sched_register(TfwScheduler *sched);
 void tfw_sched_unregister(TfwScheduler *sched);
diff --git a/tempesta_fw/sock_clnt.c b/tempesta_fw/sock_clnt.c
index f6baa1c2b..c080faa79 100644
--- a/tempesta_fw/sock_clnt.c
+++ b/tempesta_fw/sock_clnt.c
@@ -51,7 +51,7 @@ tfw_cli_cache(int type)
 static void
 tfw_sock_cli_keepalive_timer_cb(unsigned long data)
 {
-	TfwConnection *conn = (TfwConnection *)data;
+	TfwCliConnection *cli_conn = (TfwCliConnection *)data;
 
 	TFW_DBG("Client timeout end\n");
 
@@ -60,64 +60,62 @@ tfw_sock_cli_keepalive_timer_cb(unsigned long data)
 	 * a deadlock on del_timer_sync(). In case of error try to close
 	 * it one second later.
 	 */
-	if (ss_close(conn->sk))
-		mod_timer(&conn->timer,
-			  jiffies + msecs_to_jiffies(1000));
+	if (ss_close(cli_conn->sk))
+		mod_timer(&cli_conn->timer, jiffies + msecs_to_jiffies(1000));
 }
 
-static TfwConnection *
+static TfwCliConnection *
 tfw_cli_conn_alloc(int type)
 {
-	TfwConnection *conn;
+	TfwCliConnection *cli_conn;
 
-	conn = kmem_cache_alloc(tfw_cli_cache(type), GFP_ATOMIC);
-	if (!conn)
+	if (!(cli_conn = kmem_cache_alloc(tfw_cli_cache(type), GFP_ATOMIC)))
 		return NULL;
 
-	tfw_connection_init(conn);
-	INIT_LIST_HEAD(&conn->seq_queue);
-	spin_lock_init(&conn->seq_qlock);
-	spin_lock_init(&conn->ret_qlock);
+	tfw_connection_init((TfwConnection *)cli_conn);
+	INIT_LIST_HEAD(&cli_conn->seq_queue);
+	spin_lock_init(&cli_conn->seq_qlock);
+	spin_lock_init(&cli_conn->ret_qlock);
 
-	setup_timer(&conn->timer,
+	setup_timer(&cli_conn->timer,
 		    tfw_sock_cli_keepalive_timer_cb,
-		    (unsigned long)conn);
+		    (unsigned long)cli_conn);
 
-	return conn;
+	return cli_conn;
 }
 
 static void
-tfw_cli_conn_free(TfwConnection *conn)
+tfw_cli_conn_free(TfwCliConnection *cli_conn)
 {
-	BUG_ON(timer_pending(&conn->timer));
+	BUG_ON(timer_pending(&cli_conn->timer));
 
 	/* Check that all nested resources are freed. */
-	tfw_connection_validate_cleanup(conn);
-	BUG_ON(!list_empty(&conn->seq_queue));
+	tfw_connection_validate_cleanup((TfwConnection *)cli_conn);
+	BUG_ON(!list_empty(&cli_conn->seq_queue));
 
-	kmem_cache_free(tfw_cli_cache(TFW_CONN_TYPE(conn)), conn);
+	kmem_cache_free(tfw_cli_cache(TFW_CONN_TYPE(cli_conn)), cli_conn);
 }
 
 void
-tfw_cli_conn_release(TfwConnection *conn)
+tfw_cli_conn_release(TfwCliConnection *cli_conn)
 {
-	del_timer_sync(&conn->timer);
+	del_timer_sync(&cli_conn->timer);
 
-	if (likely(conn->sk))
-		tfw_connection_unlink_to_sk(conn);
-	if (likely(conn->peer))
-		tfw_client_put((TfwClient *)conn->peer);
-	tfw_cli_conn_free(conn);
+	if (likely(cli_conn->sk))
+		tfw_connection_unlink_to_sk((TfwConnection *)cli_conn);
+	if (likely(cli_conn->peer))
+		tfw_client_put((TfwClient *)cli_conn->peer);
+	tfw_cli_conn_free(cli_conn);
 	TFW_INC_STAT_BH(clnt.conn_disconnects);
 }
 
 int
-tfw_cli_conn_send(TfwConnection *conn, TfwMsg *msg)
+tfw_cli_conn_send(TfwCliConnection *cli_conn, TfwMsg *msg)
 {
 	int r;
 
-	r = tfw_connection_send(conn, msg);
-	mod_timer(&conn->timer,
+	r = tfw_connection_send((TfwConnection *)cli_conn, msg);
+	mod_timer(&cli_conn->timer,
 		  jiffies + msecs_to_jiffies(tfw_cli_cfg_ka_timeout * 1000));
 
 	if (r)
@@ -156,7 +154,7 @@ tfw_sock_clnt_new(struct sock *sk)
 		return -ENOENT;
 	}
 
-	conn = tfw_cli_conn_alloc(listen_sock_proto->type);
+	conn = (TfwConnection *)tfw_cli_conn_alloc(listen_sock_proto->type);
 	if (!conn) {
 		TFW_ERR("can't allocate a new client connection\n");
 		goto err_client;
@@ -190,7 +188,7 @@ tfw_sock_clnt_new(struct sock *sk)
 
 err_conn:
 	tfw_connection_drop(conn);
-	tfw_cli_conn_free(conn);
+	tfw_cli_conn_free((TfwCliConnection *)conn);
 err_client:
 	tfw_client_put(cli);
 	return r;
@@ -621,7 +619,7 @@ tfw_sock_clnt_init(void)
 	BUG_ON(tfw_cli_conn_tls_cache);
 
 	tfw_cli_conn_cache = kmem_cache_create("tfw_cli_conn_cache",
-					       sizeof(TfwConnection),
+					       sizeof(TfwCliConnection),
 					       0, 0, NULL);
 	if (!tfw_cli_conn_cache)
 		return -ENOMEM;
diff --git a/tempesta_fw/sock_srv.c b/tempesta_fw/sock_srv.c
index 16dd41285..b14043c79 100644
--- a/tempesta_fw/sock_srv.c
+++ b/tempesta_fw/sock_srv.c
@@ -36,37 +36,32 @@
  * ------------------------------------------------------------------------
  *	Server connection establishment.
  *
- * This section of code is responsible for maintaining a server connection in
- * an established state, and doing so in an asynchronous (callback-based) way.
+ * This is responsible for maintaining a server connection in established
+ * state, and doing so in an asynchronous (callback-based) way.
  *
  * The entry point is the tfw_sock_srv_connect_try() function.
  * It initiates a connect attempt and just exits without blocking.
  *
  * Later on, when connection state is changed, a callback is invoked:
- *  - tfw_sock_srv_connect_retry() - a connect attempt has failed.
+ *  - tfw_sock_srv_connect_retry()    - a connect attempt has failed.
  *  - tfw_sock_srv_connect_complete() - a connection is established.
  *  - tfw_sock_srv_connect_failover() - an established connection is closed.
  *
- * Both retry() and failover() call tfw_sock_srv_connect_try() again
- * to re-establish the connection, and thus tfw_sock_srv_connect_try() is
- * called repeatedly until the connection is finally established (or until
- * this "loop" of callbacks is stopped by tfw_sock_srv_disconnect()).
- *
+ * Both retry() and failover() call connect_try() again to re-establish the
+ * connection. Thus connect_try() is called repeatedly until the connection
+ * is finally established (or until this "loop" of callbacks is stopped by
+ * tfw_sock_srv_disconnect()).
  * ------------------------------------------------------------------------
  */
 
 /**
- * TfwConnection extension for server sockets.
- *
- * @conn	- The base structure. Must be the first member.
- *
  * A server connection differs from a client connection.
- * For client sockets, a new TfwConnection{} instance is created when
- * a new client socket is accepted (the connection is established at
- * that point). For server sockets, we create a socket first, and then
- * some time passes while a connection is being established.
+ * For clients, a new TfwCliConnection{} instance is created when a new
+ * client socket is accepted (the connection is established at that point).
+ * For servers, a socket is created first, and then some time passes while
+ * a connection is being established.
  *
- * Therefore, this extension structure has slightly different semantics:
+ * TfwSrvConnection{} instance goes though the following periods of life:
  * - First, a TfwSrvConnection{} instance is allocated and set up with
  *   data from configuration file.
  * - When a server socket is created, the TfwSrvConnection{} instance
@@ -108,13 +103,6 @@
  *    reused. So the attempt to reconnect has to wait. It is started as
  *    soon as the last client releases the server connection.
  */
-/**
- * @recons	- the number of reconnect attempts;
- */
-typedef struct {
-	TfwConnection		conn;
-	unsigned int		recons;
-} TfwSrvConnection;
 
 /*
  * Timeout between connect attempts is increased with each unsuccessful
@@ -141,9 +129,8 @@ tfw_sock_srv_connect_try(TfwSrvConnection *srv_conn)
 	int r;
 	TfwAddr *addr;
 	struct sock *sk;
-	TfwConnection *conn = &srv_conn->conn;
 
-	addr = &conn->peer->addr;
+	addr = &srv_conn->peer->addr;
 
 	r = ss_sock_create(addr->family, SOCK_STREAM, IPPROTO_TCP, &sk);
 	if (r) {
@@ -160,7 +147,7 @@ tfw_sock_srv_connect_try(TfwSrvConnection *srv_conn)
 #if defined(DEBUG) && (DEBUG >= 2)
 	sock_set_flag(sk, SOCK_DBG);
 #endif
-	tfw_connection_link_from_sk(conn, sk);
+	tfw_connection_link_from_sk((TfwConnection *)srv_conn, sk);
 	ss_set_callbacks(sk);
 
 	/*
@@ -171,7 +158,7 @@ tfw_sock_srv_connect_try(TfwSrvConnection *srv_conn)
 	 *    so there is no activity in the socket;
 	 *
 	 * 2. tfw_sock_srv_do_failover() upcalled from SS layer and with
-	 *    inactive conn->sk, so nobody can send through the socket.
+	 *    inactive @srv_conn->sk, so nobody can send through the socket.
 	 *    Also since the function is called by connection_error or
 	 *    connection_drop hook from SoftIRQ, there can't be another
 	 *    socket state change upcall from SS layer due to RSS.
@@ -196,13 +183,13 @@ tfw_sock_srv_connect_try(TfwSrvConnection *srv_conn)
 	 * Set connection destructor such that connection failover can
 	 * take place if the connection attempt fails.
 	 */
-	conn->destructor = (void *)tfw_srv_conn_release;
+	srv_conn->destructor = (void *)tfw_srv_conn_release;
 
 	return 0;
 }
 
 /*
- * @max_recons can be the maximum value for the data type to mean
+ * @max_recns can be the maximum value for the data type to mean
  * the unlimited number of attempts, which is the value that should
  * never be reached. UINT_MAX seconds is more than 136 years. It's
  * safe to assume that it's not reached in a single run of Tempesta.
@@ -216,7 +203,7 @@ tfw_sock_srv_connect_try(TfwSrvConnection *srv_conn)
 static inline void
 tfw_sock_srv_connect_try_later(TfwSrvConnection *srv_conn)
 {
-	TfwSrvGroup *sg = ((TfwServer *)srv_conn->conn.peer)->sg;
+	TfwSrvGroup *sg = ((TfwServer *)srv_conn->peer)->sg;
 	unsigned long timeout;
 
 	/* Don't rearm reconnection timer if we're about to shutdown. */
@@ -229,38 +216,38 @@ tfw_sock_srv_connect_try_later(TfwSrvConnection *srv_conn)
 	 * never be reached. UINT_MAX seconds is more than 136 years. It's
 	 * safe to assume that it's not reached in a single run of Tempesta.
 	 */
-	if (unlikely((srv_conn->recons >= sg->max_recons)
-		     && !test_bit(TFW_CONN_B_ISDEAD, &srv_conn->conn.flags)))
+	if (unlikely((srv_conn->recns >= sg->max_recns)
+		     && !test_bit(TFW_CONN_B_ISDEAD, &srv_conn->flags)))
 	{
-		TfwAddr *srv_addr = &srv_conn->conn.peer->addr;
+		TfwAddr *srv_addr = &srv_conn->peer->addr;
 		char s_addr[TFW_ADDR_STR_BUF_SIZE] = { 0 };
 		tfw_addr_ntop(srv_addr, s_addr, sizeof(s_addr));
 		TFW_WARN("The limit of [%d] on reconnect attempts exceeded. "
 			 "The server connection [%s] is down.\n",
-			 sg->max_recons, s_addr);
-		tfw_connection_repair(&srv_conn->conn);
-		set_bit(TFW_CONN_B_ISDEAD, &srv_conn->conn.flags);
+			 sg->max_recns, s_addr);
+		tfw_connection_repair((TfwConnection *)srv_conn);
+		set_bit(TFW_CONN_B_ISDEAD, &srv_conn->flags);
 	}
-	if (srv_conn->recons < ARRAY_SIZE(tfw_srv_tmo_vals)) {
-		timeout = tfw_srv_tmo_vals[srv_conn->recons];
+	if (srv_conn->recns < ARRAY_SIZE(tfw_srv_tmo_vals)) {
+		timeout = tfw_srv_tmo_vals[srv_conn->recns];
 		TFW_DBG_ADDR("Cannot establish connection",
-			     &srv_conn->conn.peer->addr);
+			     &srv_conn->peer->addr);
 	} else {
 		timeout = tfw_srv_tmo_vals[ARRAY_SIZE(tfw_srv_tmo_vals) - 1];
-		if (srv_conn->recons == ARRAY_SIZE(tfw_srv_tmo_vals)
-		    || !(srv_conn->recons % 60))
+		if (srv_conn->recns == ARRAY_SIZE(tfw_srv_tmo_vals)
+		    || !(srv_conn->recns % 60))
 		{
 			char addr_str[TFW_ADDR_STR_BUF_SIZE] = { 0 };
-			tfw_addr_fmt_v6(&srv_conn->conn.peer->addr.v6.sin6_addr,
+			tfw_addr_fmt_v6(&srv_conn->peer->addr.v6.sin6_addr,
 					0, addr_str);
 			TFW_WARN("Cannot establish connection with %s in %u"
 				 " tries, keep trying...\n",
-				 addr_str, srv_conn->recons);
+				 addr_str, srv_conn->recns);
 		}
 	}
-	srv_conn->recons++;
+	srv_conn->recns++;
 
-	mod_timer(&srv_conn->conn.timer, jiffies + msecs_to_jiffies(timeout));
+	mod_timer(&srv_conn->timer, jiffies + msecs_to_jiffies(timeout));
 }
 
 static void
@@ -276,36 +263,36 @@ tfw_sock_srv_connect_retry_timer_cb(unsigned long data)
 static inline void
 __reset_retry_timer(TfwSrvConnection *srv_conn)
 {
-	srv_conn->recons = 0;
+	srv_conn->recns = 0;
 }
 
 static inline void
 __setup_retry_timer(TfwSrvConnection *srv_conn)
 {
 	__reset_retry_timer(srv_conn);
-	setup_timer(&srv_conn->conn.timer,
+	setup_timer(&srv_conn->timer,
 		    tfw_sock_srv_connect_retry_timer_cb,
 		    (unsigned long)srv_conn);
 }
 
 void
-tfw_srv_conn_release(TfwConnection *conn)
+tfw_srv_conn_release(TfwSrvConnection *srv_conn)
 {
-	tfw_connection_release(conn);
+	tfw_connection_release((TfwConnection *)srv_conn);
 	/*
 	 * conn->sk may be zeroed if we get here after a failed
 	 * connect attempt. In that case no connection has been
 	 * established yet, and conn->sk has not been set.
 	 */
-	if (likely(conn->sk))
-		tfw_connection_unlink_to_sk(conn);
+	if (likely(srv_conn->sk))
+		tfw_connection_unlink_to_sk((TfwConnection *)srv_conn);
 	/*
 	 * After a disconnect, new connect attempts are started
 	 * in deferred context after a short pause (in a timer
 	 * callback). Whatever the reason for a disconnect was,
 	 * this is uniform for any of them.
 	 */
-	tfw_sock_srv_connect_try_later((TfwSrvConnection *)conn);
+	tfw_sock_srv_connect_try_later(srv_conn);
 }
 
 /**
@@ -315,8 +302,7 @@ static int
 tfw_sock_srv_connect_complete(struct sock *sk)
 {
 	int r;
-	TfwSrvConnection *srv_conn = sk->sk_user_data;
-	TfwConnection *conn = &srv_conn->conn;
+	TfwConnection *conn = sk->sk_user_data;
 	TfwServer *srv = (TfwServer *)conn->peer;
 
 	/* Link Tempesta with the socket. */
@@ -332,10 +318,10 @@ tfw_sock_srv_connect_complete(struct sock *sk)
 	tfw_connection_revive(conn);
 
 	/* Repair the connection if necessary. */
-	if (unlikely(tfw_connection_restricted(conn)))
+	if (unlikely(tfw_srv_conn_restricted((TfwSrvConnection *)conn)))
 		tfw_connection_repair(conn);
 
-	__reset_retry_timer(srv_conn);
+	__reset_retry_timer((TfwSrvConnection *)conn);
 
 	TFW_DBG_ADDR("connected", &srv->addr);
 	TFW_INC_STAT_BH(serv.conn_established);
@@ -441,7 +427,7 @@ tfw_sock_srv_connect_srv(TfwServer *srv)
 	 * is locked, and spews lots of warnings. LOCKDEP doesn't know
 	 * that parallel execution can't happen with the same socket.
 	 */
-	list_for_each_entry(srv_conn, &srv->conn_list, conn.list)
+	list_for_each_entry(srv_conn, &srv->conn_list, list)
 		tfw_sock_srv_connect_try_later(srv_conn);
 
 	return 0;
@@ -455,7 +441,13 @@ tfw_sock_srv_disconnect_srv(TfwServer *srv)
 {
 	TfwConnection *conn;
 
+<<<<<<< 760ea44c0912d51bf97bb4ce7da4ed59151e545e
 	return tfw_peer_for_each_conn(srv, conn, list, tfw_sock_srv_disconnect);
+=======
+	list_for_each_entry(srv_conn, &srv->conn_list, list)
+		tfw_sock_srv_disconnect(srv_conn);
+	return 0;
+>>>>>>> Split TfwConnection{} into TfwCliConnection{} and TfwSrvConnection{}.
 }
 
 /*
@@ -484,14 +476,13 @@ tfw_srv_conn_alloc(void)
 	if (!(srv_conn = kmem_cache_alloc(tfw_srv_conn_cache, GFP_ATOMIC)))
 		return NULL;
 
-	tfw_connection_init(&srv_conn->conn);
-	INIT_LIST_HEAD(&srv_conn->conn.fwd_queue);
-	INIT_LIST_HEAD(&srv_conn->conn.nip_queue);
-	spin_lock_init(&srv_conn->conn.fwd_qlock);
+	tfw_connection_init((TfwConnection *)srv_conn);
+	INIT_LIST_HEAD(&srv_conn->fwd_queue);
+	INIT_LIST_HEAD(&srv_conn->nip_queue);
+	spin_lock_init(&srv_conn->fwd_qlock);
 
 	__setup_retry_timer(srv_conn);
-	ss_proto_init(&srv_conn->conn.proto,
-		      &tfw_sock_srv_ss_hooks, Conn_HttpSrv);
+	ss_proto_init(&srv_conn->proto, &tfw_sock_srv_ss_hooks, Conn_HttpSrv);
 
 	return srv_conn;
 }
@@ -499,12 +490,12 @@ tfw_srv_conn_alloc(void)
 static void
 tfw_srv_conn_free(TfwSrvConnection *srv_conn)
 {
-	BUG_ON(timer_pending(&srv_conn->conn.timer));
+	BUG_ON(timer_pending(&srv_conn->timer));
 
 	/* Check that all nested resources are freed. */
-	tfw_connection_validate_cleanup(&srv_conn->conn);
-	BUG_ON(!list_empty(&srv_conn->conn.nip_queue));
-	BUG_ON(ACCESS_ONCE(srv_conn->conn.qsize));
+	tfw_connection_validate_cleanup((TfwConnection *)srv_conn);
+	BUG_ON(!list_empty(&srv_conn->nip_queue));
+	BUG_ON(ACCESS_ONCE(srv_conn->qsize));
 
 	kmem_cache_free(tfw_srv_conn_cache, srv_conn);
 }
@@ -518,8 +509,9 @@ tfw_sock_srv_add_conns(TfwServer *srv, int conns_n)
 	for (i = 0; i < conns_n; ++i) {
 		if (!(srv_conn = tfw_srv_conn_alloc()))
 			return -ENOMEM;
-		tfw_connection_link_peer(&srv_conn->conn, (TfwPeer *)srv);
-		tfw_sg_add_conn(srv->sg, srv, &srv_conn->conn);
+		tfw_connection_link_peer((TfwConnection *)srv_conn,
+					 (TfwPeer *)srv);
+		tfw_sg_add_conn(srv->sg, srv, srv_conn);
 	}
 
 	return 0;
@@ -530,8 +522,8 @@ tfw_sock_srv_del_conns(TfwServer *srv)
 {
 	TfwSrvConnection *srv_conn, *tmp;
 
-	list_for_each_entry_safe(srv_conn, tmp, &srv->conn_list, conn.list) {
-		tfw_connection_unlink_from_peer(&srv_conn->conn);
+	list_for_each_entry_safe(srv_conn, tmp, &srv->conn_list, list) {
+		tfw_connection_unlink_from_peer((TfwConnection *)srv_conn);
 		tfw_srv_conn_free(srv_conn);
 	}
 	return 0;
@@ -672,14 +664,14 @@ tfw_cfgop_out_conn_tries(TfwCfgSpec *cs, TfwCfgEntry *ce)
 }
 
 static int
-tfw_cfgop_set_conn_tries(TfwSrvGroup *sg, int recons)
+tfw_cfgop_set_conn_tries(TfwSrvGroup *sg, int recns)
 {
-	if (!recons) {
-		sg->max_recons = UINT_MAX;
-	} else if (recons < ARRAY_SIZE(tfw_srv_tmo_vals)) {
-		sg->max_recons = ARRAY_SIZE(tfw_srv_tmo_vals);
+	if (!recns) {
+		sg->max_recns = UINT_MAX;
+	} else if (recns < ARRAY_SIZE(tfw_srv_tmo_vals)) {
+		sg->max_recns = ARRAY_SIZE(tfw_srv_tmo_vals);
 	} else {
-		sg->max_recons = recons;
+		sg->max_recns = recns;
 	}
 
 	return 0;
diff --git a/tempesta_fw/t/unit/sched_helper.c b/tempesta_fw/t/unit/sched_helper.c
index 054ad4100..119e35b99 100644
--- a/tempesta_fw/t/unit/sched_helper.c
+++ b/tempesta_fw/t/unit/sched_helper.c
@@ -118,38 +118,37 @@ test_create_conn(TfwPeer *peer)
 	static struct sock __test_sock = {
 		.sk_state = TCP_ESTABLISHED,
 	};
-	TfwSrvConnection *srv_conn;
+	TfwConnection *conn;
 
 	kernel_fpu_end();
 
 	if (!tfw_srv_conn_cache)
 		tfw_sock_srv_init();
-	srv_conn = tfw_srv_conn_alloc();
-	BUG_ON(!srv_conn);
+	conn = (TfwConnection *)tfw_srv_conn_alloc();
+	BUG_ON(!conn);
 
-	tfw_connection_link_peer(&srv_conn->conn, peer);
-	srv_conn->conn.sk = &__test_sock;
+	tfw_connection_link_peer(conn, peer);
+	conn->sk = &__test_sock;
 	/* A connection is skipped by schedulers if (refcnt <= 0). */
-	tfw_connection_revive(&srv_conn->conn);
+	tfw_connection_revive(conn);
 
 	kernel_fpu_begin();
 
-	return srv_conn;
+	return (TfwSrvConnection *)conn;
 }
 
 void
 test_conn_release_all(TfwSrvGroup *sg)
 {
-	TfwConnection *conn, *conn_tmp;
-	TfwServer *srv, *srv_tmp;
+	TfwServer *srv;
+	TfwConnection *conn, *tmp;
 
-	list_for_each_entry_safe(srv, srv_tmp, &sg->srv_list, list) {
-		list_for_each_entry_safe(conn, conn_tmp, &srv->conn_list, list) {
+	list_for_each_entry(srv, &sg->srv_list, list) {
+		list_for_each_entry_safe(conn, tmp, &srv->conn_list, list) {
 			conn->sk = NULL;
 			tfw_connection_unlink_from_peer(conn);
-			while (tfw_connection_live(conn)) {
+			while (tfw_connection_live(conn))
 				tfw_connection_put(conn);
-			}
 			tfw_srv_conn_free((TfwSrvConnection *)conn);
 		}
 	}
@@ -171,9 +170,9 @@ test_sched_generic_empty_sg(struct TestSchedHelper *sched_helper)
 
 	for (i = 0; i < sched_helper->conn_types; ++i) {
 		TfwMsg *msg = sched_helper->get_sched_arg(i);
-		TfwConnection *conn = sg->sched->sched_srv(msg, sg);
+		TfwSrvConnection *srv_conn = sg->sched->sched_srv(msg, sg);
 
-		EXPECT_NULL(conn);
+		EXPECT_NULL(srv_conn);
 		sched_helper->free_sched_arg(msg);
 	}
 
@@ -198,9 +197,9 @@ test_sched_generic_one_srv_zero_conn(struct TestSchedHelper *sched_helper)
 
 	for (i = 0; i < sched_helper->conn_types; ++i) {
 		TfwMsg *msg = sched_helper->get_sched_arg(i);
-		TfwConnection *conn = sg->sched->sched_srv(msg, sg);
+		TfwSrvConnection *srv_conn = sg->sched->sched_srv(msg, sg);
 
-		EXPECT_NULL(conn);
+		EXPECT_NULL(srv_conn);
 		sched_helper->free_sched_arg(msg);
 	}
 
@@ -227,9 +226,10 @@ test_sched_generic_max_srv_zero_conn(struct TestSchedHelper *sched_helper)
 	for (i = 0; i < sched_helper->conn_types; ++i) {
 		for (j = 0; j < TFW_SG_MAX_SRV; ++j) {
 			TfwMsg *msg = sched_helper->get_sched_arg(i);
-			TfwConnection *conn = sg->sched->sched_srv(msg, sg);
+			TfwSrvConnection *srv_conn =
+				sg->sched->sched_srv(msg, sg);
 
-			EXPECT_NULL(conn);
+			EXPECT_NULL(srv_conn);
 			sched_helper->free_sched_arg(msg);
 		}
 	}
diff --git a/tempesta_fw/t/unit/test_http_sticky.c b/tempesta_fw/t/unit/test_http_sticky.c
index eb3ae7b0c..865ce2f4f 100644
--- a/tempesta_fw/t/unit/test_http_sticky.c
+++ b/tempesta_fw/t/unit/test_http_sticky.c
@@ -177,9 +177,9 @@ tfw_connection_send(TfwConnection *conn, TfwMsg *msg)
 }
 
 /* custom version for testing purposes */
-int tfw_cli_conn_send(TfwConnection *conn, TfwMsg *msg)
+int tfw_cli_conn_send(TfwCliConnection *cli_conn, TfwMsg *msg)
 {
-	return tfw_connection_send(conn, msg);
+	return tfw_connection_send((TfwConnection *)cli_conn, msg);
 }
 
 /* setup/teardown helpers */
diff --git a/tempesta_fw/t/unit/test_sched_hash.c b/tempesta_fw/t/unit/test_sched_hash.c
index a4444a7e3..537e00209 100644
--- a/tempesta_fw/t/unit/test_sched_hash.c
+++ b/tempesta_fw/t/unit/test_sched_hash.c
@@ -107,25 +107,26 @@ TEST(tfw_sched_hash, one_srv_in_sg_and_max_conn)
 	TfwServer *srv = test_create_srv("127.0.0.1", sg);
 
 	for (i = 0; i < TFW_SRV_MAX_CONN; ++i) {
-		TfwSrvConnection *sconn = test_create_conn((TfwPeer *)srv);
-		sg->sched->add_conn(sg, srv, &sconn->conn);
+		TfwSrvConnection *srv_conn = test_create_conn((TfwPeer *)srv);
+		sg->sched->add_conn(sg, srv, srv_conn);
 	}
 
 	/* Check that every request is scheduled to the same connection. */
 	for (i = 0; i < sched_helper_hash.conn_types; ++i) {
-		TfwConnection *exp_conn = NULL;
+		TfwSrvConnection *expect_conn = NULL;
 
 		for (j = 0; j < TFW_SRV_MAX_CONN; ++j) {
 			TfwMsg *msg = sched_helper_hash.get_sched_arg(i);
-			TfwConnection *conn = sg->sched->sched_srv(msg, sg);
-			EXPECT_NOT_NULL(conn);
+			TfwSrvConnection *srv_conn =
+				sg->sched->sched_srv(msg, sg);
+			EXPECT_NOT_NULL(srv_conn);
 
-			if (!exp_conn)
-				exp_conn = conn;
+			if (!expect_conn)
+				expect_conn = srv_conn;
 			else
-				EXPECT_EQ(conn, exp_conn);
+				EXPECT_EQ(srv_conn, expect_conn);
 
-			tfw_connection_put(conn);
+			tfw_srv_conn_put(srv_conn);
 			sched_helper_hash.free_sched_arg(msg);
 		}
 	}
@@ -156,27 +157,28 @@ TEST(tfw_sched_hash, max_srv_in_sg_and_max_conn)
 		TfwServer *srv = test_create_srv("127.0.0.1", sg);
 
 		for (j = 0; j < TFW_SRV_MAX_CONN; ++j) {
-			TfwSrvConnection *sconn =
-					test_create_conn((TfwPeer *)srv);
-			sg->sched->add_conn(sg, srv, &sconn->conn);
+			TfwSrvConnection *srv_conn =
+				test_create_conn((TfwPeer *)srv);
+			sg->sched->add_conn(sg, srv, srv_conn);
 		}
 	}
 
 	/* Check that every request is scheduled to the same connection. */
 	for (i = 0; i < sched_helper_hash.conn_types; ++i) {
-		TfwConnection *exp_conn = NULL;
+		TfwSrvConnection *expect_conn = NULL;
 
 		for (j = 0; j < TFW_SG_MAX_SRV * TFW_SRV_MAX_CONN; ++j) {
 			TfwMsg *msg = sched_helper_hash.get_sched_arg(i);
-			TfwConnection *conn = sg->sched->sched_srv(msg, sg);
-			EXPECT_NOT_NULL(conn);
+			TfwSrvConnection *srv_conn =
+				sg->sched->sched_srv(msg, sg);
+			EXPECT_NOT_NULL(srv_conn);
 
-			if (!exp_conn)
-				exp_conn = conn;
+			if (!expect_conn)
+				expect_conn = srv_conn;
 			else
-				EXPECT_EQ(conn, exp_conn);
+				EXPECT_EQ(srv_conn, expect_conn);
 
-			tfw_connection_put(conn);
+			tfw_srv_conn_put(srv_conn);
 			sched_helper_hash.free_sched_arg(msg);
 		}
 	}
diff --git a/tempesta_fw/t/unit/test_sched_http.c b/tempesta_fw/t/unit/test_sched_http.c
index 02ac2f3b7..626a88143 100644
--- a/tempesta_fw/t/unit/test_sched_http.c
+++ b/tempesta_fw/t/unit/test_sched_http.c
@@ -81,7 +81,7 @@ static void
 test_req(char *req_str, TfwSrvConnection *expect_conn)
 {
 	TfwScheduler *sched;
-	TfwConnection *conn;
+	TfwSrvConnection *srv_conn;
 	TfwHttpReq *req = test_req_alloc(req_str? strlen(req_str): 1);
 
 	if (req_str) {
@@ -94,11 +94,11 @@ test_req(char *req_str, TfwSrvConnection *expect_conn)
 	}
 
 	sched = tfw_sched_lookup("http");
-	conn = sched->sched_grp((TfwMsg *)req);
-	EXPECT_EQ(conn, &expect_conn->conn);
+	srv_conn = sched->sched_grp((TfwMsg *)req);
+	EXPECT_EQ(srv_conn, expect_conn);
 
 	test_req_free(req);
-	tfw_connection_put(conn);
+	tfw_srv_conn_put(srv_conn);
 }
 
 TEST(tfw_sched_http, zero_rules_and_zero_conns)
@@ -131,7 +131,7 @@ TEST(tfw_sched_http, one_wildcard_rule)
 	sg = test_create_sg("default", "round-robin");
 	srv = test_create_srv("127.0.0.1", sg);
 	expect_conn = test_create_conn((TfwPeer *)srv);
-	sg->sched->add_conn(sg, srv, &expect_conn->conn);
+	sg->sched->add_conn(sg, srv, expect_conn);
 
 	if (parse_cfg("sched_http_rules {\nmatch default * * *;\n}\n")) {
 		TEST_FAIL("can't parse rules\n");
@@ -157,52 +157,52 @@ TEST(tfw_sched_http, some_rules)
 	sg1 = test_create_sg("sg1", "round-robin");
 	srv = test_create_srv("127.0.0.1", sg1);
 	expect_conn1 = test_create_conn((TfwPeer *)srv);
-	sg1->sched->add_conn(sg1, srv, &expect_conn1->conn);
+	sg1->sched->add_conn(sg1, srv, expect_conn1);
 
 	sg2 = test_create_sg("sg2", "round-robin");
 	srv = test_create_srv("127.0.0.1", sg2);
 	expect_conn2 = test_create_conn((TfwPeer *)srv);
-	sg2->sched->add_conn(sg2, srv, &expect_conn2->conn);
+	sg2->sched->add_conn(sg2, srv, expect_conn2);
 
 	sg3 = test_create_sg("sg3", "round-robin");
 	srv = test_create_srv("127.0.0.1", sg3);
 	expect_conn3 = test_create_conn((TfwPeer *)srv);
-	sg3->sched->add_conn(sg3, srv, &expect_conn3->conn);
+	sg3->sched->add_conn(sg3, srv, expect_conn3);
 
 	sg4 = test_create_sg("sg4", "round-robin");
 	srv = test_create_srv("127.0.0.1", sg4);
 	expect_conn4 = test_create_conn((TfwPeer *)srv);
-	sg4->sched->add_conn(sg4, srv, &expect_conn4->conn);
+	sg4->sched->add_conn(sg4, srv, expect_conn4);
 
 	sg5 = test_create_sg("sg5", "round-robin");
 	srv = test_create_srv("127.0.0.1", sg5);
 	expect_conn5 = test_create_conn((TfwPeer *)srv);
-	sg5->sched->add_conn(sg5, srv, &expect_conn5->conn);
+	sg5->sched->add_conn(sg5, srv, expect_conn5);
 
 	sg6 = test_create_sg("sg6", "round-robin");
 	srv = test_create_srv("127.0.0.1", sg6);
 	expect_conn6 = test_create_conn((TfwPeer *)srv);
-	sg6->sched->add_conn(sg6, srv, &expect_conn6->conn);
+	sg6->sched->add_conn(sg6, srv, expect_conn6);
 
 	sg7 = test_create_sg("sg7", "round-robin");
 	srv = test_create_srv("127.0.0.1", sg7);
 	expect_conn7 = test_create_conn((TfwPeer *)srv);
-	sg7->sched->add_conn(sg7, srv, &expect_conn7->conn);
+	sg7->sched->add_conn(sg7, srv, expect_conn7);
 
 	sg8 = test_create_sg("sg8", "round-robin");
 	srv = test_create_srv("127.0.0.1", sg8);
 	expect_conn8 = test_create_conn((TfwPeer *)srv);
-	sg8->sched->add_conn(sg8, srv, &expect_conn8->conn);
+	sg8->sched->add_conn(sg8, srv, expect_conn8);
 
 	sg9 = test_create_sg("sg9", "round-robin");
 	srv = test_create_srv("127.0.0.1", sg9);
 	expect_conn9 = test_create_conn((TfwPeer *)srv);
-	sg9->sched->add_conn(sg9, srv, &expect_conn9->conn);
+	sg9->sched->add_conn(sg9, srv, expect_conn9);
 
 	sg10 = test_create_sg("sg10", "round-robin");
 	srv = test_create_srv("127.0.0.1", sg10);
 	expect_conn10 = test_create_conn((TfwPeer *)srv);
-	sg10->sched->add_conn(sg10, srv, &expect_conn10->conn);
+	sg10->sched->add_conn(sg10, srv, expect_conn10);
 
 	if (parse_cfg("sched_http_rules {\nmatch sg1 uri eq /foo;\n\
 	                                   match sg2 uri prefix /foo/bar;\n\
@@ -317,7 +317,7 @@ TEST(tfw_sched_http, one_rule)
 		sg = test_create_sg("default", "round-robin");
 		srv = test_create_srv("127.0.0.1", sg);
 		expect_conn = test_create_conn((TfwPeer *)srv);
-		sg->sched->add_conn(sg, srv, &expect_conn->conn);
+		sg->sched->add_conn(sg, srv, expect_conn);
 
 		if (parse_cfg(test_cases[i].rule_str)) {
 			TEST_FAIL("can't parse rules\n");
diff --git a/tempesta_fw/t/unit/test_sched_rr.c b/tempesta_fw/t/unit/test_sched_rr.c
index 3852cb953..7d982d2b3 100644
--- a/tempesta_fw/t/unit/test_sched_rr.c
+++ b/tempesta_fw/t/unit/test_sched_rr.c
@@ -88,9 +88,9 @@ TEST(tfw_sched_rr, one_srv_in_sg_and_max_conn)
 	TfwServer *srv = test_create_srv("127.0.0.1", sg);
 
 	for (i = 0; i < TFW_SRV_MAX_CONN; ++i) {
-		TfwSrvConnection *sconn = test_create_conn((TfwPeer *)srv);
-		sg->sched->add_conn(sg, srv, &sconn->conn);
-		conn_acc ^= (long long)&sconn->conn;
+		TfwSrvConnection *srv_conn = test_create_conn((TfwPeer *)srv);
+		sg->sched->add_conn(sg, srv, srv_conn);
+		conn_acc ^= (long long)srv_conn;
 	}
 
 	/*
@@ -102,11 +102,12 @@ TEST(tfw_sched_rr, one_srv_in_sg_and_max_conn)
 
 		for (j = 0; j < TFW_SRV_MAX_CONN; ++j) {
 			TfwMsg *msg = sched_helper_rr.get_sched_arg(i);
-			TfwConnection *conn = sg->sched->sched_srv(msg, sg);
-			EXPECT_NOT_NULL(conn);
+			TfwSrvConnection *srv_conn =
+				sg->sched->sched_srv(msg, sg);
+			EXPECT_NOT_NULL(srv_conn);
 
-			conn_acc_check ^= (long long)conn;
-			tfw_connection_put(conn);
+			conn_acc_check ^= (long long)srv_conn;
+			tfw_srv_conn_put(srv_conn);
 			sched_helper_rr.free_sched_arg(msg);
 		}
 
@@ -140,10 +141,10 @@ TEST(tfw_sched_rr, max_srv_in_sg_and_max_conn)
 		TfwServer *srv = test_create_srv("127.0.0.1", sg);
 
 		for (j = 0; j < TFW_SRV_MAX_CONN; ++j) {
-			TfwSrvConnection *sconn =
+			TfwSrvConnection *srv_conn =
 					test_create_conn((TfwPeer *)srv);
-			sg->sched->add_conn(sg, srv, &sconn->conn);
-			conn_acc ^= (long long)&(sconn->conn);
+			sg->sched->add_conn(sg, srv, srv_conn);
+			conn_acc ^= (long long)srv_conn;
 		}
 	}
 
@@ -156,11 +157,12 @@ TEST(tfw_sched_rr, max_srv_in_sg_and_max_conn)
 
 		for (j = 0; j < TFW_SG_MAX_SRV * TFW_SRV_MAX_CONN; ++j) {
 			TfwMsg *msg = sched_helper_rr.get_sched_arg(i);
-			TfwConnection *conn = sg->sched->sched_srv(msg, sg);
-			EXPECT_NOT_NULL(conn);
+			TfwSrvConnection *srv_conn =
+				sg->sched->sched_srv(msg, sg);
+			EXPECT_NOT_NULL(srv_conn);
 
-			conn_acc_check ^= (long long)conn;
-			tfw_connection_put(conn);
+			conn_acc_check ^= (long long)srv_conn;
+			tfw_srv_conn_put(srv_conn);
 			sched_helper_rr.free_sched_arg(msg);
 		}
 
diff --git a/tempesta_fw/tls.c b/tempesta_fw/tls.c
index 80cbb5474..46ff673cc 100644
--- a/tempesta_fw/tls.c
+++ b/tempesta_fw/tls.c
@@ -262,7 +262,7 @@ tfw_tls_conn_dtor(TfwConnection *c)
 	TfwTlsContext *tls = tfw_tls_context(c);
 
 	mbedtls_ssl_free(&tls->ssl);
-	tfw_cli_conn_release(c);
+	tfw_cli_conn_release((TfwCliConnection *)c);
 }
 
 static int

From 3984c9bb6b37735a067897248dd75cdad30418bb Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Fri, 27 Jan 2017 19:51:54 +0300
Subject: [PATCH 50/65] Rename TfwConnection{} to TfwConn{}.

Also, rename:
TfwCliConnection{} to TfwCliConn{}
TfwSrvConnection{} to TfwSrvConn{}
---
 tempesta_fw/classifier/frang.c        |  24 ++---
 tempesta_fw/client.h                  |   4 +-
 tempesta_fw/connection.c              |  18 ++--
 tempesta_fw/connection.h              | 130 +++++++++++++-------------
 tempesta_fw/http.c                    |  98 ++++++++++---------
 tempesta_fw/http.h                    |   4 +-
 tempesta_fw/http_sess.c               |   4 +-
 tempesta_fw/procfs.c                  |   2 +-
 tempesta_fw/sched.c                   |   4 +-
 tempesta_fw/sched/tfw_sched_hash.c    |  11 +--
 tempesta_fw/sched/tfw_sched_http.c    |   6 +-
 tempesta_fw/sched/tfw_sched_rr.c      |  27 +++---
 tempesta_fw/server.c                  |   2 +-
 tempesta_fw/server.h                  |  19 ++--
 tempesta_fw/sock_clnt.c               |  63 ++++++-------
 tempesta_fw/sock_srv.c                | 119 ++++++++++++-----------
 tempesta_fw/t/unit/helpers.c          |   6 +-
 tempesta_fw/t/unit/sched_helper.c     |  19 ++--
 tempesta_fw/t/unit/sched_helper.h     |   2 +-
 tempesta_fw/t/unit/test_http_sticky.c |  10 +-
 tempesta_fw/t/unit/test_sched_hash.c  |  15 ++-
 tempesta_fw/t/unit/test_sched_http.c  |  15 ++-
 tempesta_fw/t/unit/test_sched_rr.c    |  11 +--
 tempesta_fw/tls.c                     |  20 ++--
 24 files changed, 309 insertions(+), 324 deletions(-)

diff --git a/tempesta_fw/classifier/frang.c b/tempesta_fw/classifier/frang.c
index 411e3a152..c85eb6fc1 100644
--- a/tempesta_fw/classifier/frang.c
+++ b/tempesta_fw/classifier/frang.c
@@ -156,12 +156,12 @@ frang_conn_limit(FrangAcc *ra, struct sock *unused)
 	}
 
 	/*
-	 * Increment connection counters even if we return TFW_BLOCK.
-	 * Linux will call sk_free() from inet_csk_clone_lock(),
-	 * so our frang_conn_close() is also called. conn_curr is
-	 * decremented there, but conn_new is not changed. We count
-	 * both failed connection attempts and connections that were
-	 * successfully established.
+	 * Increment connection counters even when we return TFW_BLOCK.
+	 * Linux will call sk_free() from inet_csk_clone_lock(), so our
+	 * frang_conn_close() is also called. @conn_curr is decremented
+	 * there, but @conn_new is not changed. We count both failed
+	 * connection attempts and connections that were successfully
+	 * established.
 	 */
 	ra->history[i].conn_new++;
 	ra->conn_curr++;
@@ -219,14 +219,14 @@ frang_conn_new(struct sock *sk)
 	spin_lock(&ra->lock);
 
 	/*
-	 * sk->sk_user_data references TfwConnection which in turn references
-	 * TfwPeer, so basically we can get FrangAcc from TfwConnection.
+	 * sk->sk_user_data references TfwConn{} which in turn references
+	 * TfwPeer, so basically we can get FrangAcc from TfwConn{}.
 	 * However, socket can live (for a short period of time, when kernel
 	 * just allocated the socket and called tempesta_new_clntsk()) w/o
-	 * TfwConnection and vise versa - TfwConnection can leave w/o socket
+	 * TfwConn{} and vise versa - TfwConn{} can leave w/o socket
 	 * (e.g. server connections during failover). Thus to keep design
 	 * consistent we  two references to TfwPeer: from socket and
-	 * TfwConnection.
+	 * TfwConn{}.
 	 */
 	sk->sk_security = ra;
 
@@ -601,7 +601,7 @@ do {									\
 } while (0)
 
 static int
-frang_http_req_process(FrangAcc *ra, TfwConnection *conn, struct sk_buff *skb,
+frang_http_req_process(FrangAcc *ra, TfwConn *conn, struct sk_buff *skb,
 		       unsigned int off)
 {
 	int r = TFW_PASS;
@@ -848,7 +848,7 @@ static int
 frang_http_req_handler(void *obj, struct sk_buff *skb, unsigned int off)
 {
 	int r;
-	TfwConnection *conn = (TfwConnection *)obj;
+	TfwConn *conn = (TfwConn *)obj;
 	FrangAcc *ra = conn->sk->sk_security;
 
 	r = frang_http_req_process(ra, conn, skb, off);
diff --git a/tempesta_fw/client.h b/tempesta_fw/client.h
index 83b7c292c..11ffde4e2 100644
--- a/tempesta_fw/client.h
+++ b/tempesta_fw/client.h
@@ -47,8 +47,8 @@ typedef struct {
 TfwClient *tfw_client_obtain(struct sock *sk, void (*init)(TfwClient *));
 void tfw_client_put(TfwClient *cli);
 int tfw_client_for_each(int (*fn)(TfwClient *));
-void tfw_cli_conn_release(TfwCliConnection *cli_conn);
-int tfw_cli_conn_send(TfwCliConnection *cli_conn, TfwMsg *msg);
+void tfw_cli_conn_release(TfwCliConn *cli_conn);
+int tfw_cli_conn_send(TfwCliConn *cli_conn, TfwMsg *msg);
 int tfw_sock_check_listeners(void);
 
 #endif /* __TFW_CLIENT_H__ */
diff --git a/tempesta_fw/connection.c b/tempesta_fw/connection.c
index 88cac3541..c9186bc50 100644
--- a/tempesta_fw/connection.c
+++ b/tempesta_fw/connection.c
@@ -32,14 +32,14 @@ TfwConnHooks *conn_hooks[TFW_CONN_MAX_PROTOS];
  * It's not on any list yet, so it's safe to do so without locks.
  */
 void
-tfw_connection_init(TfwConnection *conn)
+tfw_connection_init(TfwConn *conn)
 {
 	memset(conn, 0, sizeof(*conn));
 	INIT_LIST_HEAD(&conn->list);
 }
 
 void
-tfw_connection_link_peer(TfwConnection *conn, TfwPeer *peer)
+tfw_connection_link_peer(TfwConn *conn, TfwPeer *peer)
 {
 	BUG_ON(conn->peer || !list_empty(&conn->list));
 	conn->peer = peer;
@@ -50,7 +50,7 @@ tfw_connection_link_peer(TfwConnection *conn, TfwPeer *peer)
  * Publish the "connection is established" event via TfwConnHooks.
  */
 int
-tfw_connection_new(TfwConnection *conn)
+tfw_connection_new(TfwConn *conn)
 {
 	return TFW_CONN_HOOK_CALL(conn, conn_init);
 }
@@ -59,7 +59,7 @@ tfw_connection_new(TfwConnection *conn)
  * Call connection repairing via TfwConnHooks.
  */
 void
-tfw_connection_repair(TfwConnection *conn)
+tfw_connection_repair(TfwConn *conn)
 {
 	TFW_CONN_HOOK_CALL(conn, conn_repair);
 }
@@ -68,7 +68,7 @@ tfw_connection_repair(TfwConnection *conn)
  * Publish the "connection is dropped" event via TfwConnHooks.
  */
 void
-tfw_connection_drop(TfwConnection *conn)
+tfw_connection_drop(TfwConn *conn)
 {
 	/* Ask higher levels to free resources at connection close. */
 	TFW_CONN_HOOK_CALL(conn, conn_drop);
@@ -79,12 +79,12 @@ tfw_connection_drop(TfwConnection *conn)
  * Publish the "connection is released" event via TfwConnHooks.
  */
 void
-tfw_connection_release(TfwConnection *conn)
+tfw_connection_release(TfwConn *conn)
 {
 	/* Ask higher levels to free resources at connection release. */
 	TFW_CONN_HOOK_CALL(conn, conn_release);
 	BUG_ON((TFW_CONN_TYPE(conn) & Conn_Clnt)
-	       && !list_empty(&((TfwCliConnection *)conn)->seq_queue));
+	       && !list_empty(&((TfwCliConn *)conn)->seq_queue));
 }
 
 /*
@@ -94,7 +94,7 @@ tfw_connection_release(TfwConnection *conn)
  * only on an active socket.
  */
 int
-tfw_connection_send(TfwConnection *conn, TfwMsg *msg)
+tfw_connection_send(TfwConn *conn, TfwMsg *msg)
 {
 	return TFW_CONN_HOOK_CALL(conn, conn_send, msg);
 }
@@ -102,7 +102,7 @@ tfw_connection_send(TfwConnection *conn, TfwMsg *msg)
 int
 tfw_connection_recv(void *cdata, struct sk_buff *skb, unsigned int off)
 {
-	TfwConnection *conn = cdata;
+	TfwConn *conn = cdata;
 
 	return tfw_gfsm_dispatch(&conn->state, conn, skb, off);
 }
diff --git a/tempesta_fw/connection.h b/tempesta_fw/connection.h
index e34abe105..d389a61a4 100644
--- a/tempesta_fw/connection.h
+++ b/tempesta_fw/connection.h
@@ -54,23 +54,23 @@ enum {
 /**
  * Session/Presentation layer (in OSI terms) handling.
  *
- * An instance of TfwConnection{} structure links each HTTP message to
- * attributes of a connection the message has come on. Some of those
- * messages may stay longer in Tempesta after they're sent out to their
- * destinations. Requests are kept until a paired response comes. By the
- * time the request's connection is needed for sending the response, it
- * may already be destroyed. With that in mind, TfwConnection{} instance
- * is not destroyed along with the connection so that it can be safely
- * dereferenced. It's kept around until refcnt permits freeing of the
- * instance, so it may have longer lifetime than the connection itself.
+ * An instance of TfwConn{} structure links each HTTP message to properties
+ * of a connection the message has come on. Some messages may stay longer
+ * in Tempesta after they're sent out to their destinations. Requests are
+ * kept until a paired response comes. By the time the request's connection
+ * is needed for sending the response, it may be destroyed already. Thus,
+ * TfwConn{} instance is not destroyed along with the connection so that
+ * it can be safely dereferenced. It's kept around until refcnt's value
+ * permits freeing of the instance, so it may have longer lifetime than
+ * the connection itself.
  *
- * @sk is an intrinsic property of TfwConnection{}.
- * It has exactly the same lifetime as an instance of TfwConnection{}.
+ * @sk is an intrinsic property of TfwConn{}.
+ * It has exactly the same lifetime as an instance of TfwConn{}.
  *
- * @peer is major property of TfwConnection{}. An instance of @peer has
- * longer lifetime expectation than a connection. @peer is always valid
- * while it's referenced from an instance of TfwConnection{}. That is
- * supported by a separate reference counter in @peer.
+ * @peer is major property of TfwConn{}. An instance of @peer has longer
+ * lifetime expectation than a connection. @peer is always valid while
+ * it's referenced from an instance of TfwConn{}. That is supported by
+ * a separate reference counter in @peer.
  *
  * These are the properties of a connection that are common to client
  * and server connections.
@@ -86,7 +86,7 @@ enum {
  * @destructor	- called when a connection is destroyed;
  * @forward	- called when a request is forwarded to server;
  */
-#define TFW_CONNECTION_COMMON				\
+#define TFW_CONN_COMMON					\
 	SsProto			proto;			\
 	TfwGState		state;			\
 	struct list_head	list;			\
@@ -98,8 +98,8 @@ enum {
 	void			(*destructor)(void *);
 
 typedef struct {
-	TFW_CONNECTION_COMMON;
-} TfwConnection;
+	TFW_CONN_COMMON;
+} TfwConn;
 
 #define TFW_CONN_TYPE(c)	((c)->proto.type)
 
@@ -111,11 +111,11 @@ typedef struct {
  * @ret_qlock	- lock for serializing sets of responses;
  */
 typedef struct {
-	TFW_CONNECTION_COMMON;
+	TFW_CONN_COMMON;
 	struct list_head	seq_queue;
 	spinlock_t		seq_qlock;
 	spinlock_t		ret_qlock;
-} TfwCliConnection;
+} TfwCliConn;
 
 /*
  * These are specific properties that are relevant to server connections.
@@ -130,7 +130,7 @@ typedef struct {
  * @msg_sent	- request that was sent last in a server connection;
  */
 typedef struct {
-	TFW_CONNECTION_COMMON;
+	TFW_CONN_COMMON;
 	struct list_head	fwd_queue;
 	struct list_head	nip_queue;
 	spinlock_t		fwd_qlock;
@@ -138,7 +138,7 @@ typedef struct {
 	unsigned int		qsize;
 	unsigned int		recns;
 	TfwMsg			*msg_sent;
-} TfwSrvConnection;
+} TfwSrvConn;
 
 #define TFW_CONN_DEATHCNT	(INT_MIN / 2)
 
@@ -159,11 +159,11 @@ enum {
  * TLS hardened connection.
  */
 typedef struct {
-	TfwConnection		conn;
-	TfwTlsContext		tls;
-} TfwTlsConnection;
+	TfwConn		conn;
+	TfwTlsContext	tls;
+} TfwTlsConn;
 
-#define tfw_tls_context(p)	(TfwTlsContext *)(&((TfwTlsConnection *)p)->tls)
+#define tfw_tls_context(p)	(TfwTlsContext *)(&((TfwTlsConn *)p)->tls)
 
 /* Callbacks used by l5-l7 protocols to operate on connection level. */
 typedef struct {
@@ -173,7 +173,7 @@ typedef struct {
 	 * This is a good place to handle Access or GEO modules
 	 * (block a client or bind its descriptor with GEO data).
 	 */
-	int (*conn_init)(TfwConnection *conn);
+	int (*conn_init)(TfwConn *conn);
 
 	/*
 	 * Called when a new connection is initialized and before
@@ -181,26 +181,26 @@ typedef struct {
 	 * server connections. Used to re-send requests that were
 	 * left in the connection queue.
 	 */
-	void (*conn_repair)(TfwConnection *conn);
+	void (*conn_repair)(TfwConn *conn);
 
 	/*
 	 * Called when closing a connection (client or server,
 	 * as in conn_init()). This is required for modules that
 	 * maintain the number of established client connections.
 	 */
-	void (*conn_drop)(TfwConnection *conn);
+	void (*conn_drop)(TfwConn *conn);
 
 	/*
 	 * Called when there are no more users of a connection
 	 * and the connections's resources are finally released.
 	 */
-	void (*conn_release)(TfwConnection *conn);
+	void (*conn_release)(TfwConn *conn);
 
 	/*
 	 * Called by the connection layer when there is a message
 	 * that needs to be send.
 	 */
-	int (*conn_send)(TfwConnection *conn, TfwMsg *msg);
+	int (*conn_send)(TfwConn *conn, TfwMsg *msg);
 } TfwConnHooks;
 
 #define TFW_CONN_MAX_PROTOS	TFW_GFSM_FSM_N
@@ -225,7 +225,7 @@ extern TfwConnHooks *conn_hooks[TFW_CONN_MAX_PROTOS];
  * are re-sent.
  */
 static inline bool
-tfw_srv_conn_restricted(TfwSrvConnection *srv_conn)
+tfw_srv_conn_restricted(TfwSrvConn *srv_conn)
 {
 	return test_bit(TFW_CONN_B_RESEND, &srv_conn->flags);
 }
@@ -234,36 +234,36 @@ tfw_srv_conn_restricted(TfwSrvConnection *srv_conn)
  * Tell if a connection has non-idempotent requests.
  */
 static inline bool
-tfw_srv_conn_hasnip(TfwSrvConnection *srv_conn)
+tfw_srv_conn_hasnip(TfwSrvConn *srv_conn)
 {
 	return test_bit(TFW_CONN_B_HASNIP, &srv_conn->flags);
 }
 
 static inline bool
-tfw_connection_live(TfwConnection *conn)
+tfw_connection_live(TfwConn *conn)
 {
 	return atomic_read(&conn->refcnt) > 0;
 }
 static inline bool
-tfw_srv_conn_live(TfwSrvConnection *srv_conn)
+tfw_srv_conn_live(TfwSrvConn *srv_conn)
 {
-	return tfw_connection_live((TfwConnection *)srv_conn);
+	return tfw_connection_live((TfwConn *)srv_conn);
 }
 
 static inline void
-tfw_connection_get(TfwConnection *conn)
+tfw_connection_get(TfwConn *conn)
 {
 	atomic_inc(&conn->refcnt);
 }
 static inline void
-tfw_cli_conn_get(TfwCliConnection *cli_conn)
+tfw_cli_conn_get(TfwCliConn *cli_conn)
 {
-	tfw_connection_get((TfwConnection *)cli_conn);
+	tfw_connection_get((TfwConn *)cli_conn);
 }
 static inline void
-tfw_srv_conn_get(TfwSrvConnection *srv_conn)
+tfw_srv_conn_get(TfwSrvConn *srv_conn)
 {
-	tfw_connection_get((TfwConnection *)srv_conn);
+	tfw_connection_get((TfwConn *)srv_conn);
 }
 
 /**
@@ -271,7 +271,7 @@ tfw_srv_conn_get(TfwSrvConnection *srv_conn)
  * failovering process, i.e. @refcnt wasn't less or equal to zero.
  */
 static inline bool
-__tfw_connection_get_if_live(TfwConnection *conn)
+__tfw_connection_get_if_live(TfwConn *conn)
 {
 	int old, rc = atomic_read(&conn->refcnt);
 
@@ -285,13 +285,13 @@ __tfw_connection_get_if_live(TfwConnection *conn)
 	return false;
 }
 static inline bool
-tfw_srv_conn_get_if_live(TfwSrvConnection *srv_conn)
+tfw_srv_conn_get_if_live(TfwSrvConn *srv_conn)
 {
-	return __tfw_connection_get_if_live((TfwConnection *)srv_conn);
+	return __tfw_connection_get_if_live((TfwConn *)srv_conn);
 }
 
 static inline void
-tfw_connection_put(TfwConnection *conn)
+tfw_connection_put(TfwConn *conn)
 {
 	int rc;
 
@@ -305,24 +305,24 @@ tfw_connection_put(TfwConnection *conn)
 		conn->destructor(conn);
 }
 static inline void
-tfw_cli_conn_put(TfwCliConnection *cli_conn)
+tfw_cli_conn_put(TfwCliConn *cli_conn)
 {
-	tfw_connection_put((TfwConnection *)cli_conn);
+	tfw_connection_put((TfwConn *)cli_conn);
 }
 static inline void
-tfw_srv_conn_put(TfwSrvConnection *srv_conn)
+tfw_srv_conn_put(TfwSrvConn *srv_conn)
 {
-	tfw_connection_put((TfwConnection *)srv_conn);
+	tfw_connection_put((TfwConn *)srv_conn);
 }
 
 static inline void
-tfw_connection_put_to_death(TfwConnection *conn)
+tfw_connection_put_to_death(TfwConn *conn)
 {
 	atomic_add(TFW_CONN_DEATHCNT, &conn->refcnt);
 }
 
 static inline void
-tfw_connection_revive(TfwConnection *conn)
+tfw_connection_revive(TfwConn *conn)
 {
 	atomic_set(&conn->refcnt, 1);
 }
@@ -334,7 +334,7 @@ tfw_connection_revive(TfwConnection *conn)
  * the reference to @conn instance for the socket can be found quickly.
  */
 static inline void
-tfw_connection_link_from_sk(TfwConnection *conn, struct sock *sk)
+tfw_connection_link_from_sk(TfwConn *conn, struct sock *sk)
 {
 	BUG_ON(sk->sk_user_data);
 	sk->sk_user_data = conn;
@@ -347,7 +347,7 @@ tfw_connection_link_from_sk(TfwConnection *conn, struct sock *sk)
  * get a hold of the socket to avoid premature socket release.
  */
 static inline void
-tfw_connection_link_to_sk(TfwConnection *conn, struct sock *sk)
+tfw_connection_link_to_sk(TfwConn *conn, struct sock *sk)
 {
 	ss_sock_hold(sk);
 	conn->sk = sk;
@@ -374,7 +374,7 @@ tfw_connection_unlink_from_sk(struct sock *sk)
  * on the socket. A zeroed conn->sk is that indicator.
  */
 static inline void
-tfw_connection_unlink_to_sk(TfwConnection *conn)
+tfw_connection_unlink_to_sk(TfwConn *conn)
 {
 	struct sock *sk = conn->sk;
 
@@ -383,23 +383,23 @@ tfw_connection_unlink_to_sk(TfwConnection *conn)
 }
 
 static inline void
-tfw_connection_unlink_from_peer(TfwConnection *conn)
+tfw_connection_unlink_from_peer(TfwConn *conn)
 {
 	BUG_ON(!conn->peer || list_empty(&conn->list));
 	tfw_peer_del_conn(conn->peer, &conn->list);
 }
 
 static inline void
-tfw_connection_unlink_msg(TfwConnection *conn)
+tfw_connection_unlink_msg(TfwConn *conn)
 {
 	conn->msg = NULL;
 }
 
 /**
- * Check that TfwConnection resources are cleaned up properly.
+ * Check that TfwConn{} resources are cleaned up properly.
  */
 static inline void
-tfw_connection_validate_cleanup(TfwConnection *conn)
+tfw_connection_validate_cleanup(TfwConn *conn)
 {
 	int rc;
 
@@ -414,16 +414,16 @@ tfw_connection_validate_cleanup(TfwConnection *conn)
 
 void tfw_connection_hooks_register(TfwConnHooks *hooks, int type);
 void tfw_connection_hooks_unregister(int type);
-int tfw_connection_send(TfwConnection *conn, TfwMsg *msg);
+int tfw_connection_send(TfwConn *conn, TfwMsg *msg);
 
 /* Generic helpers, used for both client and server connections. */
-void tfw_connection_init(TfwConnection *conn);
-void tfw_connection_link_peer(TfwConnection *conn, TfwPeer *peer);
+void tfw_connection_init(TfwConn *conn);
+void tfw_connection_link_peer(TfwConn *conn, TfwPeer *peer);
 
-int tfw_connection_new(TfwConnection *conn);
-void tfw_connection_repair(TfwConnection *conn);
-void tfw_connection_drop(TfwConnection *conn);
-void tfw_connection_release(TfwConnection *conn);
+int tfw_connection_new(TfwConn *conn);
+void tfw_connection_repair(TfwConn *conn);
+void tfw_connection_drop(TfwConn *conn);
+void tfw_connection_release(TfwConn *conn);
 
 int tfw_connection_recv(void *cdata, struct sk_buff *skb, unsigned int off);
 
diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 37005c920..d2e909dfe 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -391,7 +391,7 @@ tfw_http_req_is_nip(TfwHttpReq *req)
  * @req must be confirmed to be on the list.
  */
 static inline void
-__tfw_http_req_nip_delist(TfwSrvConnection *srv_conn, TfwHttpReq *req)
+__tfw_http_req_nip_delist(TfwSrvConn *srv_conn, TfwHttpReq *req)
 {
 	BUG_ON(list_empty(&req->nip_list));
 	list_del_init(&req->nip_list);
@@ -404,7 +404,7 @@ __tfw_http_req_nip_delist(TfwSrvConnection *srv_conn, TfwHttpReq *req)
  * Raise the flag saying that the connection has non-idempotent requests.
  */
 static inline void
-__tfw_http_req_nip_enlist(TfwSrvConnection *srv_conn, TfwHttpReq *req)
+__tfw_http_req_nip_enlist(TfwSrvConn *srv_conn, TfwHttpReq *req)
 {
 	BUG_ON(!list_empty(&req->nip_list));
 	list_add_tail(&req->nip_list, &srv_conn->nip_queue);
@@ -417,7 +417,7 @@ __tfw_http_req_nip_enlist(TfwSrvConnection *srv_conn, TfwHttpReq *req)
  * the list.
  */
 static inline void
-tfw_http_req_nip_delist(TfwSrvConnection *srv_conn, TfwHttpReq *req)
+tfw_http_req_nip_delist(TfwSrvConn *srv_conn, TfwHttpReq *req)
 {
 	if (!list_empty(&req->nip_list))
 		__tfw_http_req_nip_delist(srv_conn, req);
@@ -431,7 +431,7 @@ tfw_http_req_nip_delist(TfwSrvConnection *srv_conn, TfwHttpReq *req)
  * to tfw_http_req_add_seq_queue().
  */
 static inline void
-tfw_http_conn_nip_delist(TfwSrvConnection *srv_conn)
+tfw_http_conn_nip_delist(TfwSrvConn *srv_conn)
 {
 	TfwHttpReq *req, *tmp;
 
@@ -447,7 +447,7 @@ tfw_http_conn_nip_delist(TfwSrvConnection *srv_conn)
  * It's on hold it the request that was sent last was non-idempotent.
  */
 static inline bool
-tfw_http_conn_on_hold(TfwSrvConnection *srv_conn)
+tfw_http_conn_on_hold(TfwSrvConn *srv_conn)
 {
 	TfwHttpReq *req_sent = (TfwHttpReq *)srv_conn->msg_sent;
 
@@ -461,7 +461,7 @@ tfw_http_conn_on_hold(TfwSrvConnection *srv_conn)
  * request that was sent last.
  */
 static inline bool
-tfw_http_conn_drained(TfwSrvConnection *srv_conn)
+tfw_http_conn_drained(TfwSrvConn *srv_conn)
 {
 	struct list_head *fwd_queue = &srv_conn->fwd_queue;
 	TfwHttpReq *req_sent = (TfwHttpReq *)srv_conn->msg_sent;
@@ -482,7 +482,7 @@ tfw_http_conn_drained(TfwSrvConnection *srv_conn)
  * that need to be forwarded.
  */
 static inline bool
-tfw_http_conn_need_fwd(TfwSrvConnection *srv_conn)
+tfw_http_conn_need_fwd(TfwSrvConn *srv_conn)
 {
 	return (!tfw_http_conn_on_hold(srv_conn)
 		&& !tfw_http_conn_drained(srv_conn));
@@ -492,7 +492,7 @@ tfw_http_conn_need_fwd(TfwSrvConnection *srv_conn)
  * Remove @req from the server connection's forwarding queue.
  */
 static inline void
-tfw_http_req_delist(TfwSrvConnection *srv_conn, TfwHttpReq *req)
+tfw_http_req_delist(TfwSrvConn *srv_conn, TfwHttpReq *req)
 {
 	tfw_http_req_nip_delist(srv_conn, req);
 	list_del_init(&req->fwd_list);
@@ -505,7 +505,7 @@ tfw_http_req_delist(TfwSrvConnection *srv_conn, TfwHttpReq *req)
  * in @equeue. The error code for an error response is saved as well.
  */
 static inline void
-tfw_http_req_move2equeue(TfwSrvConnection *srv_conn, TfwHttpReq *req,
+tfw_http_req_move2equeue(TfwSrvConn *srv_conn, TfwHttpReq *req,
 			 struct list_head *equeue, unsigned short status)
 {
 	tfw_http_req_delist(srv_conn, req);
@@ -552,7 +552,7 @@ tfw_http_req_zap_error(struct list_head *equeue)
  * move it to the error queue @equeue for sending an error response later.
  */
 static inline bool
-tfw_http_req_evict_timeout(TfwSrvConnection *srv_conn, TfwServer *srv,
+tfw_http_req_evict_timeout(TfwSrvConn *srv_conn, TfwServer *srv,
 			   TfwHttpReq *req, struct list_head *equeue)
 {
 	unsigned long jqage = jiffies - req->jrxtstamp;
@@ -572,7 +572,7 @@ tfw_http_req_evict_timeout(TfwSrvConnection *srv_conn, TfwServer *srv,
  * move it to the error queue @equeue for sending an error response later.
  */
 static inline bool
-tfw_http_req_evict_retries(TfwSrvConnection *srv_conn, TfwServer *srv,
+tfw_http_req_evict_retries(TfwSrvConn *srv_conn, TfwServer *srv,
 			   TfwHttpReq *req, struct list_head *equeue)
 {
 	if (unlikely(req->retries++ >= srv->sg->max_refwd)) {
@@ -589,12 +589,12 @@ tfw_http_req_evict_retries(TfwSrvConnection *srv_conn, TfwServer *srv,
  * move it to the error queue @equeue for sending an error response later.
  */
 static inline bool
-tfw_http_req_fwd_send(TfwSrvConnection *srv_conn, TfwServer *srv,
+tfw_http_req_fwd_send(TfwSrvConn *srv_conn, TfwServer *srv,
 		      TfwHttpReq *req, struct list_head *equeue)
 {
 	req->jtxtstamp = jiffies;
 
-	if (tfw_connection_send((TfwConnection *)srv_conn, (TfwMsg *)req)) {
+	if (tfw_connection_send((TfwConn *)srv_conn, (TfwMsg *)req)) {
 		TFW_DBG2("%s: Forwarding error: conn=[%p] req=[%p]\n",
 			 __func__, srv_conn, req);
 		tfw_http_req_move2equeue(srv_conn, req, equeue, 500);
@@ -608,7 +608,7 @@ tfw_http_req_fwd_send(TfwSrvConnection *srv_conn, TfwServer *srv,
  * Return false if forwarding must be stopped, or true otherwise.
  */
 static inline bool
-__tfw_http_req_fwd_single(TfwSrvConnection *srv_conn, TfwServer *srv,
+__tfw_http_req_fwd_single(TfwSrvConn *srv_conn, TfwServer *srv,
 			  TfwHttpReq *req, struct list_head *equeue)
 {
 	if (tfw_http_req_evict_timeout(srv_conn, srv, req, equeue))
@@ -627,7 +627,7 @@ __tfw_http_req_fwd_single(TfwSrvConnection *srv_conn, TfwServer *srv,
  * IT's also assumed that the forwarding queue is NOT drained.
  */
 static void
-__tfw_http_req_fwd_unsent(TfwSrvConnection *srv_conn, struct list_head *equeue)
+__tfw_http_req_fwd_unsent(TfwSrvConn *srv_conn, struct list_head *equeue)
 {
 	TfwHttpReq *req, *tmp;
 	TfwServer *srv = (TfwServer *)srv_conn->peer;
@@ -670,7 +670,7 @@ __tfw_http_req_fwd_unsent(TfwSrvConnection *srv_conn, struct list_head *equeue)
  * It's assumed that the forwarding queue in @srv_conn is locked.
  */
 static inline void
-tfw_http_req_fwd_unsent(TfwSrvConnection *srv_conn, struct list_head *equeue)
+tfw_http_req_fwd_unsent(TfwSrvConn *srv_conn, struct list_head *equeue)
 {
 	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
 
@@ -692,7 +692,7 @@ tfw_http_req_fwd_unsent(TfwSrvConnection *srv_conn, struct list_head *equeue)
  * See RFC 7230 6.3.2.
  */
 static void
-tfw_http_req_fwd(TfwSrvConnection *srv_conn, TfwHttpReq *req)
+tfw_http_req_fwd(TfwSrvConn *srv_conn, TfwHttpReq *req)
 {
 	LIST_HEAD(equeue);
 
@@ -730,7 +730,7 @@ tfw_http_req_fwd(TfwSrvConnection *srv_conn, TfwHttpReq *req)
  * Note: @srv_conn->msg_sent may change in result.
  */
 static inline void
-tfw_http_req_fwd_treatnip(TfwSrvConnection *srv_conn, struct list_head *equeue)
+tfw_http_req_fwd_treatnip(TfwSrvConn *srv_conn, struct list_head *equeue)
 {
 	TfwServer *srv = (TfwServer *)srv_conn->peer;
 	TfwHttpReq *req_sent = (TfwHttpReq *)srv_conn->msg_sent;
@@ -752,8 +752,7 @@ tfw_http_req_fwd_treatnip(TfwSrvConnection *srv_conn, struct list_head *equeue)
  * the set limits are evicted.
  */
 static TfwHttpReq *
-tfw_http_req_resend(TfwSrvConnection *srv_conn,
-		      bool first, struct list_head *equeue)
+tfw_http_req_resend(TfwSrvConn *srv_conn, bool first, struct list_head *equeue)
 {
 	TfwHttpReq *req, *tmp, *req_resent = NULL;
 	TfwServer *srv = (TfwServer *)srv_conn->peer;
@@ -790,7 +789,7 @@ tfw_http_req_resend(TfwSrvConnection *srv_conn,
  * Re-send only the first unanswered request in the forwarding queue.
  */
 static inline TfwHttpReq *
-tfw_http_req_resend_first(TfwSrvConnection *srv_conn, struct list_head *equeue)
+tfw_http_req_resend_first(TfwSrvConn *srv_conn, struct list_head *equeue)
 {
 	return tfw_http_req_resend(srv_conn, true, equeue);
 }
@@ -799,7 +798,7 @@ tfw_http_req_resend_first(TfwSrvConnection *srv_conn, struct list_head *equeue)
  * Re-send all unanswered requests in the forwarding queue.
  */
 static inline TfwHttpReq *
-tfw_http_req_resend_all(TfwSrvConnection *srv_conn, struct list_head *equeue)
+tfw_http_req_resend_all(TfwSrvConn *srv_conn, struct list_head *equeue)
 {
 	return tfw_http_req_resend(srv_conn, false, equeue);
 }
@@ -810,7 +809,7 @@ tfw_http_req_resend_all(TfwSrvConnection *srv_conn, struct list_head *equeue)
  * The connection is not scheduled until all requests in it are re-sent.
  */
 static void
-__tfw_http_req_fwd_repair(TfwSrvConnection *srv_conn, struct list_head *equeue)
+__tfw_http_req_fwd_repair(TfwSrvConn *srv_conn, struct list_head *equeue)
 {
 	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
 	WARN_ON(!spin_is_locked(&srv_conn->fwd_qlock));
@@ -861,10 +860,10 @@ __tfw_http_req_fwd_repair(TfwSrvConnection *srv_conn, struct list_head *equeue)
  * Unlucky requests are just given another chance with minimal effort.
  */
 static void
-tfw_http_req_resched(TfwSrvConnection *srv_conn, struct list_head *equeue)
+tfw_http_req_resched(TfwSrvConn *srv_conn, struct list_head *equeue)
 {
 	TfwHttpReq *req, *tmp;
-	TfwSrvConnection *sch_conn;
+	TfwSrvConn *sch_conn;
 	TfwServer *srv = (TfwServer *)srv_conn->peer;
 	struct list_head *fwd_queue = &srv_conn->fwd_queue;
 
@@ -900,9 +899,9 @@ tfw_http_req_resched(TfwSrvConnection *srv_conn, struct list_head *equeue)
  * rest of those unanswered requests (__tfw_http_req_fwd_repair()).
  */
 static void
-tfw_http_conn_repair(TfwConnection *conn)
+tfw_http_conn_repair(TfwConn *conn)
 {
-	TfwSrvConnection *srv_conn = (TfwSrvConnection *)conn;
+	TfwSrvConn *srv_conn = (TfwSrvConn *)conn;
 	TfwHttpReq *req_resent = NULL;
 	LIST_HEAD(equeue);
 
@@ -961,7 +960,7 @@ tfw_http_req_destruct(void *msg)
  * of the connection structure. Initialize GFSM for the message.
  */
 static TfwMsg *
-tfw_http_conn_msg_alloc(TfwConnection *conn)
+tfw_http_conn_msg_alloc(TfwConn *conn)
 {
 	TfwHttpMsg *hm = tfw_http_msg_alloc(TFW_CONN_TYPE(conn));
 	if (unlikely(!hm))
@@ -974,7 +973,7 @@ tfw_http_conn_msg_alloc(TfwConnection *conn)
 		TFW_INC_STAT_BH(clnt.rx_messages);
 	} else {
 		TfwHttpReq *req;
-		TfwSrvConnection *srv_conn = (TfwSrvConnection *)conn;
+		TfwSrvConn *srv_conn = (TfwSrvConn *)conn;
 
 		spin_lock(&srv_conn->fwd_qlock);
 		req = list_first_entry_or_null(&srv_conn->fwd_queue,
@@ -1032,12 +1031,12 @@ tfw_http_conn_msg_free(TfwHttpMsg *hm)
  * state machine here.
  */
 static int
-tfw_http_conn_init(TfwConnection *conn)
+tfw_http_conn_init(TfwConn *conn)
 {
 	TFW_DBG2("%s: conn=[%p]\n", __func__, conn);
 
 	if (TFW_CONN_TYPE(conn) & Conn_Srv) {
-		TfwSrvConnection *srv_conn = (TfwSrvConnection *)conn;
+		TfwSrvConn *srv_conn = (TfwSrvConn *)conn;
 		if (!list_empty(&srv_conn->fwd_queue))
 			set_bit(TFW_CONN_B_RESEND, &srv_conn->flags);
 		clear_bit(TFW_CONN_B_ISDEAD, &srv_conn->flags);
@@ -1053,7 +1052,7 @@ tfw_http_conn_init(TfwConnection *conn)
  * Called only when Tempesta is stopped.
  */
 static void
-tfw_http_conn_srv_release(TfwSrvConnection *srv_conn)
+tfw_http_conn_srv_release(TfwSrvConn *srv_conn)
 {
 	TfwHttpReq *req, *tmp;
 	struct list_head *fwd_queue = &srv_conn->fwd_queue;
@@ -1065,10 +1064,10 @@ tfw_http_conn_srv_release(TfwSrvConnection *srv_conn)
 	list_for_each_entry_safe(req, tmp, fwd_queue, fwd_list) {
 		tfw_http_req_delist(srv_conn, req);
 		if (unlikely(!list_empty_careful(&req->msg.seq_list))) {
-			spin_lock(&((TfwCliConnection *)req->conn)->seq_qlock);
+			spin_lock(&((TfwCliConn *)req->conn)->seq_qlock);
 			if (unlikely(!list_empty(&req->msg.seq_list)))
 				list_del_init(&req->msg.seq_list);
-			spin_unlock(&((TfwCliConnection *)req->conn)->seq_qlock);
+			spin_unlock(&((TfwCliConn *)req->conn)->seq_qlock);
 		}
 		tfw_http_conn_msg_free((TfwHttpMsg *)req);
 	}
@@ -1086,9 +1085,9 @@ tfw_http_conn_srv_release(TfwSrvConnection *srv_conn)
  * so locks are not needed.
  */
 static void
-tfw_http_conn_release(TfwConnection *conn)
+tfw_http_conn_release(TfwConn *conn)
 {
-	TfwSrvConnection *srv_conn = (TfwSrvConnection *)conn;
+	TfwSrvConn *srv_conn = (TfwSrvConn *)conn;
 
 	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
 	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
@@ -1129,7 +1128,7 @@ __tfw_http_resp_pair_free(TfwHttpReq *req)
  * connection threads.
  */
 static void
-tfw_http_conn_cli_drop(TfwCliConnection *cli_conn)
+tfw_http_conn_cli_drop(TfwCliConn *cli_conn)
 {
 	TfwHttpReq *req, *tmp;
 	struct list_head *seq_queue = &cli_conn->seq_queue;
@@ -1166,12 +1165,12 @@ tfw_http_conn_cli_drop(TfwCliConnection *cli_conn)
 static void tfw_http_resp_terminate(TfwHttpMsg *hm);
 
 static void
-tfw_http_conn_drop(TfwConnection *conn)
+tfw_http_conn_drop(TfwConn *conn)
 {
 	TFW_DBG2("%s: conn=[%p]\n", __func__, conn);
 
 	if (TFW_CONN_TYPE(conn) & Conn_Clnt) {
-		tfw_http_conn_cli_drop((TfwCliConnection *)conn);
+		tfw_http_conn_cli_drop((TfwCliConn *)conn);
 	} else if (conn->msg) {
 		if (tfw_http_parse_terminate((TfwHttpMsg *)conn->msg))
 			tfw_http_resp_terminate((TfwHttpMsg *)conn->msg);
@@ -1185,7 +1184,7 @@ tfw_http_conn_drop(TfwConnection *conn)
  * Called when the connection is used to send a message through.
  */
 static int
-tfw_http_conn_send(TfwConnection *conn, TfwMsg *msg)
+tfw_http_conn_send(TfwConn *conn, TfwMsg *msg)
 {
 	return ss_send(conn->sk, &msg->skb_list, msg->ss_flags);
 }
@@ -1458,7 +1457,7 @@ tfw_http_adjust_resp(TfwHttpResp *resp, TfwHttpReq *req)
  * responses are taken care of by the caller.
  */
 static void
-__tfw_http_resp_fwd(TfwCliConnection *cli_conn, struct list_head *ret_queue)
+__tfw_http_resp_fwd(TfwCliConn *cli_conn, struct list_head *ret_queue)
 {
 	TfwHttpReq *req, *tmp;
 
@@ -1483,7 +1482,7 @@ __tfw_http_resp_fwd(TfwCliConnection *cli_conn, struct list_head *ret_queue)
 void
 tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
 {
-	TfwCliConnection *cli_conn = (TfwCliConnection *)req->conn;
+	TfwCliConn *cli_conn = (TfwCliConn *)req->conn;
 	struct list_head *seq_queue = &cli_conn->seq_queue;
 	struct list_head *req_retent = NULL;
 	LIST_HEAD(ret_queue);
@@ -1585,7 +1584,7 @@ static void
 tfw_http_req_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)
 {
 	int r;
-	TfwSrvConnection *srv_conn = NULL;
+	TfwSrvConn *srv_conn = NULL;
 
 	TFW_DBG2("%s: req = %p, resp = %p\n", __func__, req, resp);
 
@@ -1625,7 +1624,7 @@ tfw_http_req_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)
 	 * an appropriate scheduler. That eliminates the long generic
 	 * scheduling work flow. When the first request in a session is
 	 * scheduled by the generic logic, TfwSession->srv_conn must be
-	 * initialized to point at the appropriate TfwConnection, so that
+	 * initialized to point at the appropriate TfwConn{}, so that
 	 * all subsequent session hits are scheduled much faster.
 	 */
 	if (!(srv_conn = tfw_sched_get_srv_conn((TfwMsg *)req))) {
@@ -1700,7 +1699,7 @@ static void
 tfw_http_req_add_seq_queue(TfwHttpReq *req)
 {
 	TfwHttpReq *req_prev;
-	TfwCliConnection *cli_conn = (TfwCliConnection *)req->conn;
+	TfwCliConn *cli_conn = (TfwCliConn *)req->conn;
 	struct list_head *seq_queue = &cli_conn->seq_queue;
 
 	tfw_http_req_mark_nip(req);
@@ -1728,7 +1727,7 @@ tfw_http_req_set_context(TfwHttpReq *req)
  * TODO enter the function depending on current GFSM state.
  */
 static int
-tfw_http_req_process(TfwConnection *conn, struct sk_buff *skb, unsigned int off)
+tfw_http_req_process(TfwConn *conn, struct sk_buff *skb, unsigned int off)
 {
 	int r = TFW_BLOCK;
 	unsigned int data_off = off;
@@ -1996,7 +1995,7 @@ static TfwHttpReq *
 tfw_http_popreq(TfwHttpMsg *hmresp)
 {
 	TfwHttpReq *req;
-	TfwSrvConnection *srv_conn = (TfwSrvConnection *)hmresp->conn;
+	TfwSrvConn *srv_conn = (TfwSrvConn *)hmresp->conn;
 	struct list_head *fwd_queue = &srv_conn->fwd_queue;
 	LIST_HEAD(equeue);
 
@@ -2158,8 +2157,7 @@ tfw_http_resp_terminate(TfwHttpMsg *hm)
  * TODO enter the function depending on current GFSM state.
  */
 static int
-tfw_http_resp_process(TfwConnection *conn, struct sk_buff *skb,
-		      unsigned int off)
+tfw_http_resp_process(TfwConn *conn, struct sk_buff *skb, unsigned int off)
 {
 	int r = TFW_BLOCK;
 	unsigned int data_off = off;
@@ -2318,7 +2316,7 @@ tfw_http_resp_process(TfwConnection *conn, struct sk_buff *skb,
 int
 tfw_http_msg_process(void *conn, struct sk_buff *skb, unsigned int off)
 {
-	TfwConnection *c = (TfwConnection *)conn;
+	TfwConn *c = (TfwConn *)conn;
 
 	if (unlikely(!c->msg)) {
 		c->msg = tfw_http_conn_msg_alloc(c);
diff --git a/tempesta_fw/http.h b/tempesta_fw/http.h
index e73ef8d8b..64026a236 100644
--- a/tempesta_fw/http.h
+++ b/tempesta_fw/http.h
@@ -286,7 +286,7 @@ typedef struct {
 	atomic_t		users;
 	unsigned long		ts;
 	unsigned long		expires;
-	TfwConnection		*srv_conn;
+	TfwSrvConn		*srv_conn;
 } TfwHttpSess;
 
 /**
@@ -315,7 +315,7 @@ typedef struct {
 	unsigned int	flags;						\
 	unsigned long	content_length;					\
 	unsigned int	keep_alive;					\
-	TfwConnection	*conn;						\
+	TfwConn		*conn;						\
 	void (*destructor)(void *msg);					\
 	TfwStr		crlf;						\
 	TfwStr		body;
diff --git a/tempesta_fw/http_sess.c b/tempesta_fw/http_sess.c
index 66f473474..c1470b571 100644
--- a/tempesta_fw/http_sess.c
+++ b/tempesta_fw/http_sess.c
@@ -623,8 +623,8 @@ tfw_http_sess_init(void)
 		return ret;
 	}
 
-	sess_cache = kmem_cache_create("tfw_sess_cache", sizeof(TfwHttpSess),
-				      0, 0, NULL);
+	sess_cache = kmem_cache_create("tfw_sess_cache",
+				       sizeof(TfwHttpSess), 0, 0, NULL);
 	if (!sess_cache) {
 		crypto_free_shash(tfw_sticky_shash);
 		return -ENOMEM;
diff --git a/tempesta_fw/procfs.c b/tempesta_fw/procfs.c
index b47ca168b..f3e4e1975 100644
--- a/tempesta_fw/procfs.c
+++ b/tempesta_fw/procfs.c
@@ -149,7 +149,7 @@ tfw_srvstats_seq_show(struct seq_file *seq, void *off)
 #define SPRNE(m, e)	seq_printf(seq, m": %dms\n", e)
 
 	int i;
-	TfwSrvConnection *srv_conn;
+	TfwSrvConn *srv_conn;
 	TfwServer *srv = seq->private;
 	TfwPrcntl prcntl[ARRAY_SIZE(tfw_procfs_prcntl)];
 	TfwPrcntlStats pstats = { prcntl, ARRAY_SIZE(prcntl) };
diff --git a/tempesta_fw/sched.c b/tempesta_fw/sched.c
index ed19eb0fe..dba8a6430 100644
--- a/tempesta_fw/sched.c
+++ b/tempesta_fw/sched.c
@@ -51,10 +51,10 @@ static DEFINE_SPINLOCK(sched_lock);
  *
  * This function is always called in SoftIRQ context.
  */
-TfwSrvConnection *
+TfwSrvConn *
 tfw_sched_get_srv_conn(TfwMsg *msg)
 {
-	TfwSrvConnection *srv_conn;
+	TfwSrvConn *srv_conn;
 	TfwScheduler *sched;
 
 	rcu_read_lock();
diff --git a/tempesta_fw/sched/tfw_sched_hash.c b/tempesta_fw/sched/tfw_sched_hash.c
index 89116169d..c3c0c8ed5 100644
--- a/tempesta_fw/sched/tfw_sched_hash.c
+++ b/tempesta_fw/sched/tfw_sched_hash.c
@@ -49,8 +49,8 @@ MODULE_VERSION("0.2.1");
 MODULE_LICENSE("GPL");
 
 typedef struct {
-	TfwSrvConnection	*srv_conn;
-	unsigned long		hash;
+	TfwSrvConn	*srv_conn;
+	unsigned long	hash;
 } TfwConnHash;
 
 /* The last item is used as the list teminator. */
@@ -99,8 +99,7 @@ __calc_conn_hash(TfwServer *srv, size_t conn_idx)
 }
 
 static void
-tfw_sched_hash_add_conn(TfwSrvGroup *sg, TfwServer *srv,
-			TfwSrvConnection *srv_conn)
+tfw_sched_hash_add_conn(TfwSrvGroup *sg, TfwServer *srv, TfwSrvConn *srv_conn)
 {
 	size_t i;
 	TfwConnHash *conn_hash = sg->sched_data;
@@ -136,11 +135,11 @@ tfw_sched_hash_add_conn(TfwSrvGroup *sg, TfwServer *srv,
  *  - For every HTTP request, we have to scan the list of all servers to find
  *    a matching one with the highest weight. That adds some overhead.
  */
-static TfwSrvConnection *
+static TfwSrvConn *
 tfw_sched_hash_get_srv_conn(TfwMsg *msg, TfwSrvGroup *sg)
 {
 	unsigned long tries, msg_hash, curr_weight, best_weight = 0;
-	TfwSrvConnection *best_srv_conn = NULL;
+	TfwSrvConn *best_srv_conn = NULL;
 	TfwConnHash *ch;
 
 	msg_hash = tfw_http_req_key_calc((TfwHttpReq *)msg);
diff --git a/tempesta_fw/sched/tfw_sched_http.c b/tempesta_fw/sched/tfw_sched_http.c
index 106009f73..83ae129d0 100644
--- a/tempesta_fw/sched/tfw_sched_http.c
+++ b/tempesta_fw/sched/tfw_sched_http.c
@@ -99,11 +99,11 @@ static TfwHttpMatchList *tfw_sched_http_rules;
  * The search is based on contents of an HTTP request and match rules
  * that specify which Server Group the request should be forwarded to.
  */
-static TfwSrvConnection *
+static TfwSrvConn *
 tfw_sched_http_sched_grp(TfwMsg *msg)
 {
 	TfwSrvGroup *sg;
-	TfwSrvConnection *srv_conn;
+	TfwSrvConn *srv_conn;
 	TfwSchedHttpRule *rule;
 
 	if(!tfw_sched_http_rules || list_empty(&tfw_sched_http_rules->list))
@@ -136,7 +136,7 @@ tfw_sched_http_sched_grp(TfwMsg *msg)
 	return srv_conn;
 }
 
-static TfwSrvConnection *
+static TfwSrvConn *
 tfw_sched_http_sched_srv(TfwMsg *msg, TfwSrvGroup *sg)
 {
 	WARN_ONCE(true, "tfw_sched_http can't select a server from a group\n");
diff --git a/tempesta_fw/sched/tfw_sched_rr.c b/tempesta_fw/sched/tfw_sched_rr.c
index 06a2f1467..cd107ff3f 100644
--- a/tempesta_fw/sched/tfw_sched_rr.c
+++ b/tempesta_fw/sched/tfw_sched_rr.c
@@ -36,10 +36,10 @@ MODULE_LICENSE("GPL");
  * taken into account by the scheduler.
  */
 typedef struct {
-	atomic64_t		rr_counter;
-	size_t			conn_n;
-	TfwServer		*srv;
-	TfwSrvConnection	*srv_conns[TFW_SRV_MAX_CONN];
+	atomic64_t	rr_counter;
+	size_t		conn_n;
+	TfwServer	*srv;
+	TfwSrvConn	*conns[TFW_SRV_MAX_CONN];
 } TfwRrSrv;
 
 /**
@@ -48,9 +48,9 @@ typedef struct {
  * whole run-time. This can be changed in future.
  */
 typedef struct {
-	atomic64_t		rr_counter;
-	size_t			srv_n;
-	TfwRrSrv		srvs[TFW_SG_MAX_SRV];
+	atomic64_t	rr_counter;
+	size_t		srv_n;
+	TfwRrSrv	srvs[TFW_SG_MAX_SRV];
 } TfwRrSrvList;
 
 static void
@@ -71,8 +71,7 @@ tfw_sched_rr_free_data(TfwSrvGroup *sg)
  * Called at configuration phase, no synchronization is required.
  */
 static void
-tfw_sched_rr_add_conn(TfwSrvGroup *sg, TfwServer *srv,
-		      TfwSrvConnection *srv_conn)
+tfw_sched_rr_add_conn(TfwSrvGroup *sg, TfwServer *srv, TfwSrvConn *srv_conn)
 {
 	size_t s, c;
 	TfwRrSrv *srv_cl;
@@ -91,12 +90,12 @@ tfw_sched_rr_add_conn(TfwSrvGroup *sg, TfwServer *srv,
 
 	srv_cl = &sl->srvs[s];
 	for (c = 0; c < srv_cl->conn_n; ++c)
-		if (srv_cl->srv_conns[c] == srv_conn) {
+		if (srv_cl->conns[c] == srv_conn) {
 			TFW_WARN("sched_rr: Try to add existing connection,"
 				 " srv=%zu conn=%zu\n", s, c);
 			return;
 		}
-	srv_cl->srv_conns[c] = srv_conn;
+	srv_cl->conns[c] = srv_conn;
 	++srv_cl->conn_n;
 	BUG_ON(srv_cl->conn_n > TFW_SRV_MAX_CONN);
 }
@@ -118,7 +117,7 @@ tfw_sched_rr_add_conn(TfwSrvGroup *sg, TfwServer *srv,
  * optimistic in that there are not many non-idempotent requests, and
  * there are available server connections.
  */
-static TfwSrvConnection *
+static TfwSrvConn *
 tfw_sched_rr_get_srv_conn(TfwMsg *msg, TfwSrvGroup *sg)
 {
 	size_t c, s;
@@ -126,7 +125,7 @@ tfw_sched_rr_get_srv_conn(TfwMsg *msg, TfwSrvGroup *sg)
 	int skipnip = 1, nipconn = 0;
 	TfwRrSrvList *sl = sg->sched_data;
 	TfwRrSrv *srv_cl;
-	TfwSrvConnection *srv_conn;
+	TfwSrvConn *srv_conn;
 
 	BUG_ON(!sl);
 rerun:
@@ -135,7 +134,7 @@ tfw_sched_rr_get_srv_conn(TfwMsg *msg, TfwSrvGroup *sg)
 		srv_cl = &sl->srvs[idxval % sl->srv_n];
 		for (c = 0; c < srv_cl->conn_n; ++c) {
 			idxval = atomic64_inc_return(&srv_cl->rr_counter);
-			srv_conn = srv_cl->srv_conns[idxval % srv_cl->conn_n];
+			srv_conn = srv_cl->conns[idxval % srv_cl->conn_n];
 			if (unlikely(tfw_srv_conn_restricted(srv_conn)
 				     || tfw_server_queue_full(srv_conn)))
 				continue;
diff --git a/tempesta_fw/server.c b/tempesta_fw/server.c
index bc4dafab3..ccd5440ba 100644
--- a/tempesta_fw/server.c
+++ b/tempesta_fw/server.c
@@ -185,7 +185,7 @@ tfw_sg_add(TfwSrvGroup *sg, TfwServer *srv)
 }
 
 void
-tfw_sg_add_conn(TfwSrvGroup *sg, TfwServer *srv, TfwSrvConnection *srv_conn)
+tfw_sg_add_conn(TfwSrvGroup *sg, TfwServer *srv, TfwSrvConn *srv_conn)
 {
 	if (sg->sched && sg->sched->add_conn)
 		sg->sched->add_conn(sg, srv, srv_conn);
diff --git a/tempesta_fw/server.h b/tempesta_fw/server.h
index e5bd9dbda..22d1e11b5 100644
--- a/tempesta_fw/server.h
+++ b/tempesta_fw/server.h
@@ -25,8 +25,8 @@
 #include "connection.h"
 #include "peer.h"
 
-#define TFW_SRV_MAX_CONN	32	/* TfwSrvConnection per TfwServer */
-#define TFW_SG_MAX_SRV		32	/* TfwServer per TfwSrvGroup */
+#define TFW_SRV_MAX_CONN	32	/* TfwSrvConn{} per TfwServer{} */
+#define TFW_SG_MAX_SRV		32	/* TfwServer{} per TfwSrvGroup{} */
 #define TFW_SG_MAX_CONN		(TFW_SG_MAX_SRV * TFW_SRV_MAX_CONN)
 
 typedef struct tfw_srv_group_t TfwSrvGroup;
@@ -112,9 +112,9 @@ struct tfw_scheduler_t {
 	void			(*add_grp)(TfwSrvGroup *sg);
 	void			(*del_grp)(TfwSrvGroup *sg);
 	void			(*add_conn)(TfwSrvGroup *sg, TfwServer *srv,
-					    TfwSrvConnection *srv_conn);
-	TfwSrvConnection	*(*sched_grp)(TfwMsg *msg);
-	TfwSrvConnection	*(*sched_srv)(TfwMsg *msg, TfwSrvGroup *sg);
+					    TfwSrvConn *srv_conn);
+	TfwSrvConn		*(*sched_grp)(TfwMsg *msg);
+	TfwSrvConn		*(*sched_srv)(TfwMsg *msg, TfwSrvGroup *sg);
 };
 
 /* Server specific routines. */
@@ -122,10 +122,10 @@ TfwServer *tfw_server_create(const TfwAddr *addr);
 int tfw_server_apm_create(TfwServer *srv);
 void tfw_server_destroy(TfwServer *srv);
 
-void tfw_srv_conn_release(TfwSrvConnection *srv_conn);
+void tfw_srv_conn_release(TfwSrvConn *srv_conn);
 
 static inline bool
-tfw_server_queue_full(TfwSrvConnection *srv_conn)
+tfw_server_queue_full(TfwSrvConn *srv_conn)
 {
 	TfwSrvGroup *sg = ((TfwServer *)srv_conn->peer)->sg;
 	return ACCESS_ONCE(srv_conn->qsize) >= sg->max_qsize;
@@ -138,14 +138,13 @@ void tfw_sg_free(TfwSrvGroup *sg);
 int tfw_sg_count(void);
 
 void tfw_sg_add(TfwSrvGroup *sg, TfwServer *srv);
-void tfw_sg_add_conn(TfwSrvGroup *sg, TfwServer *srv,
-		     TfwSrvConnection *srv_conn);
+void tfw_sg_add_conn(TfwSrvGroup *sg, TfwServer *srv, TfwSrvConn *srv_conn);
 int tfw_sg_set_sched(TfwSrvGroup *sg, const char *sched);
 int tfw_sg_for_each_srv(int (*cb)(TfwServer *srv));
 void tfw_sg_release_all(void);
 
 /* Scheduler routines. */
-TfwSrvConnection *tfw_sched_get_srv_conn(TfwMsg *msg);
+TfwSrvConn *tfw_sched_get_srv_conn(TfwMsg *msg);
 TfwScheduler *tfw_sched_lookup(const char *name);
 int tfw_sched_register(TfwScheduler *sched);
 void tfw_sched_unregister(TfwScheduler *sched);
diff --git a/tempesta_fw/sock_clnt.c b/tempesta_fw/sock_clnt.c
index c080faa79..eb5b310a5 100644
--- a/tempesta_fw/sock_clnt.c
+++ b/tempesta_fw/sock_clnt.c
@@ -38,20 +38,20 @@
  */
 
 static struct kmem_cache *tfw_cli_conn_cache;
-static struct kmem_cache *tfw_cli_conn_tls_cache;
+static struct kmem_cache *tfw_tls_conn_cache;
 static int tfw_cli_cfg_ka_timeout = -1;
 
 static inline struct kmem_cache *
 tfw_cli_cache(int type)
 {
 	return type == Conn_HttpClnt ?
-		tfw_cli_conn_cache : tfw_cli_conn_tls_cache;
+		tfw_cli_conn_cache : tfw_tls_conn_cache;
 }
 
 static void
 tfw_sock_cli_keepalive_timer_cb(unsigned long data)
 {
-	TfwCliConnection *cli_conn = (TfwCliConnection *)data;
+	TfwCliConn *cli_conn = (TfwCliConn *)data;
 
 	TFW_DBG("Client timeout end\n");
 
@@ -64,15 +64,15 @@ tfw_sock_cli_keepalive_timer_cb(unsigned long data)
 		mod_timer(&cli_conn->timer, jiffies + msecs_to_jiffies(1000));
 }
 
-static TfwCliConnection *
+static TfwCliConn *
 tfw_cli_conn_alloc(int type)
 {
-	TfwCliConnection *cli_conn;
+	TfwCliConn *cli_conn;
 
 	if (!(cli_conn = kmem_cache_alloc(tfw_cli_cache(type), GFP_ATOMIC)))
 		return NULL;
 
-	tfw_connection_init((TfwConnection *)cli_conn);
+	tfw_connection_init((TfwConn *)cli_conn);
 	INIT_LIST_HEAD(&cli_conn->seq_queue);
 	spin_lock_init(&cli_conn->seq_qlock);
 	spin_lock_init(&cli_conn->ret_qlock);
@@ -85,24 +85,24 @@ tfw_cli_conn_alloc(int type)
 }
 
 static void
-tfw_cli_conn_free(TfwCliConnection *cli_conn)
+tfw_cli_conn_free(TfwCliConn *cli_conn)
 {
 	BUG_ON(timer_pending(&cli_conn->timer));
 
 	/* Check that all nested resources are freed. */
-	tfw_connection_validate_cleanup((TfwConnection *)cli_conn);
+	tfw_connection_validate_cleanup((TfwConn *)cli_conn);
 	BUG_ON(!list_empty(&cli_conn->seq_queue));
 
 	kmem_cache_free(tfw_cli_cache(TFW_CONN_TYPE(cli_conn)), cli_conn);
 }
 
 void
-tfw_cli_conn_release(TfwCliConnection *cli_conn)
+tfw_cli_conn_release(TfwCliConn *cli_conn)
 {
 	del_timer_sync(&cli_conn->timer);
 
 	if (likely(cli_conn->sk))
-		tfw_connection_unlink_to_sk((TfwConnection *)cli_conn);
+		tfw_connection_unlink_to_sk((TfwConn *)cli_conn);
 	if (likely(cli_conn->peer))
 		tfw_client_put((TfwClient *)cli_conn->peer);
 	tfw_cli_conn_free(cli_conn);
@@ -110,11 +110,11 @@ tfw_cli_conn_release(TfwCliConnection *cli_conn)
 }
 
 int
-tfw_cli_conn_send(TfwCliConnection *cli_conn, TfwMsg *msg)
+tfw_cli_conn_send(TfwCliConn *cli_conn, TfwMsg *msg)
 {
 	int r;
 
-	r = tfw_connection_send((TfwConnection *)cli_conn, msg);
+	r = tfw_connection_send((TfwConn *)cli_conn, msg);
 	mod_timer(&cli_conn->timer,
 		  jiffies + msecs_to_jiffies(tfw_cli_cfg_ka_timeout * 1000));
 
@@ -133,7 +133,7 @@ tfw_sock_clnt_new(struct sock *sk)
 {
 	int r = -ENOMEM;
 	TfwClient *cli;
-	TfwConnection *conn;
+	TfwConn *conn;
 	SsProto *listen_sock_proto;
 
 	TFW_DBG3("new client socket: sk=%p, state=%u\n", sk, sk->sk_state);
@@ -142,8 +142,8 @@ tfw_sock_clnt_new(struct sock *sk)
 	/*
 	 * New sk->sk_user_data points to TfwListenSock{} of the parent
 	 * listening socket. We set it to NULL to stop other functions
-	 * from referencing TfwListenSock{} while a new TfwConnection{}
-	 * object is not yet allocated/initialized.
+	 * from referencing TfwListenSock{} while a new TfwConn{} object
+	 * is not yet allocated/initialized.
 	 */
 	listen_sock_proto = sk->sk_user_data;
 	tfw_connection_unlink_from_sk(sk);
@@ -154,7 +154,7 @@ tfw_sock_clnt_new(struct sock *sk)
 		return -ENOENT;
 	}
 
-	conn = (TfwConnection *)tfw_cli_conn_alloc(listen_sock_proto->type);
+	conn = (TfwConn *)tfw_cli_conn_alloc(listen_sock_proto->type);
 	if (!conn) {
 		TFW_ERR("can't allocate a new client connection\n");
 		goto err_client;
@@ -188,7 +188,7 @@ tfw_sock_clnt_new(struct sock *sk)
 
 err_conn:
 	tfw_connection_drop(conn);
-	tfw_cli_conn_free((TfwCliConnection *)conn);
+	tfw_cli_conn_free((TfwCliConn *)conn);
 err_client:
 	tfw_client_put(cli);
 	return r;
@@ -201,7 +201,7 @@ tfw_sock_clnt_new(struct sock *sk)
 static void
 tfw_sock_clnt_do_drop(struct sock *sk, const char *msg)
 {
-	TfwConnection *conn = sk->sk_user_data;
+	TfwConn *conn = sk->sk_user_data;
 
 	TFW_DBG3("%s: close client socket: sk=%p, conn=%p, client=%p\n",
 		 msg, sk, conn, conn->peer);
@@ -251,7 +251,7 @@ static const SsHooks tfw_sock_clnt_ss_hooks = {
 };
 
 static int
-__cli_conn_close_cb(TfwConnection *conn)
+__cli_conn_close_cb(TfwConn *conn)
 {
 	/*
 	 * Use assynchronous closing to release peer connection list and
@@ -264,7 +264,7 @@ __cli_conn_close_cb(TfwConnection *conn)
 static int
 tfw_cli_conn_close_all(TfwClient *cli)
 {
-	TfwConnection *conn;
+	TfwConn *conn;
 
 	return tfw_peer_for_each_conn(cli, conn, list, __cli_conn_close_cb);
 }
@@ -616,21 +616,20 @@ int
 tfw_sock_clnt_init(void)
 {
 	BUG_ON(tfw_cli_conn_cache);
-	BUG_ON(tfw_cli_conn_tls_cache);
+	BUG_ON(tfw_tls_conn_cache);
 
 	tfw_cli_conn_cache = kmem_cache_create("tfw_cli_conn_cache",
-					       sizeof(TfwCliConnection),
-					       0, 0, NULL);
-	if (!tfw_cli_conn_cache)
-		return -ENOMEM;
+					       sizeof(TfwCliConn), 0, 0, NULL);
+	tfw_tls_conn_cache = kmem_cache_create("tfw_tls_conn_cache",
+					       sizeof(TfwTlsConn), 0, 0, NULL);
+
+	if (tfw_cli_conn_cache && tfw_tls_conn_cache)
+		return 0;
 
-	tfw_cli_conn_tls_cache = kmem_cache_create("tfw_cli_conn_tls_cache",
-						   sizeof(TfwTlsConnection),
-						   0, 0, NULL);
-	if (!tfw_cli_conn_tls_cache) {
+	if (tfw_cli_conn_cache)
 		kmem_cache_destroy(tfw_cli_conn_cache);
-		return -ENOMEM;
-	}
+	if (tfw_tls_conn_cache)
+		kmem_cache_destroy(tfw_tls_conn_cache);
 
 	return 0;
 }
@@ -638,6 +637,6 @@ tfw_sock_clnt_init(void)
 void
 tfw_sock_clnt_exit(void)
 {
-	kmem_cache_destroy(tfw_cli_conn_tls_cache);
+	kmem_cache_destroy(tfw_tls_conn_cache);
 	kmem_cache_destroy(tfw_cli_conn_cache);
 }
diff --git a/tempesta_fw/sock_srv.c b/tempesta_fw/sock_srv.c
index b14043c79..b326b869d 100644
--- a/tempesta_fw/sock_srv.c
+++ b/tempesta_fw/sock_srv.c
@@ -55,25 +55,25 @@
  */
 
 /**
- * A server connection differs from a client connection.
- * For clients, a new TfwCliConnection{} instance is created when a new
- * client socket is accepted (the connection is established at that point).
- * For servers, a socket is created first, and then some time passes while
+ * A server connection differs from a client connection. For clients,
+ * a new TfwCliConn{} instance is created when a new client socket is
+ * accepted (the connection is established at that point). For servers,
+ * a socket is created first, and then there's a period of time while
  * a connection is being established.
  *
- * TfwSrvConnection{} instance goes though the following periods of life:
- * - First, a TfwSrvConnection{} instance is allocated and set up with
+ * TfwSrvConn{} instance goes though the following periods of life:
+ * - First, a TfwSrvConn{} instance is allocated and set up with
  *   data from configuration file.
- * - When a server socket is created, the TfwSrvConnection{} instance
+ * - When a server socket is created, the TfwSrvConn{} instance
  *   is partially initialized to allow a connect attempt to complete.
- * - When a connection is established, the TfwSrvConnection{} instance
+ * - When a connection is established, the TfwSrvConn{} instance
  *   is fully initialized and set up.
- * - If a connect attempt has failed, or the connection has been reset
- *   or closed, the same TfwSrvConnection{} instance is reused with
+ * - If a connect attempt has failed, or the connection has been
+ *   reset or closed, the same TfwSrvConn{} instance is reused with
  *   a new socket. Another attempt to establish a connection is made.
  *
- * So a TfwSrvConnection{} instance has a longer lifetime. In a sense,
- * a TfwSrvConnection{} instance is persistent. It lives from the time
+ * So a TfwSrvConn{} instance has a longer lifetime. In a sense,
+ * a TfwSrvConn{} instance is persistent. It lives from the time
  * it is created when Tempesta is started, and until the time it is
  * destroyed when Tempesta is stopped.
  *
@@ -84,13 +84,13 @@
  * reused for a new connection, and a new socket is created. Note that
  * @sk member is not cleared when it is no longer valid, and there is
  * a time frame until new connection is actually established. An old
- * non-valid @sk stays a member of an TfwSrvConnection{} instance during
+ * non-valid @sk stays a member of an TfwSrvConn{} instance during
  * that time frame. However, the condition for reuse of an instance is
  * that there're no more users of the instance, so no thread can make
  * use of an old socket @sk. Should something bad happen, then having
  * a stale pointer in conn->sk is no different than having a NULL pointer.
  *
- * The reference counter is still needed for TfwSrvConnection{} instances.
+ * The reference counter is still needed for TfwSrvConn{} instances.
  * It tells when an instance can be reused for a new connect attempt.
  * A scenario that may occur is as follows:
  * 1. There's a client's request, so scheduler finds a server connection
@@ -99,7 +99,7 @@
  * 2. At that time the server sends RST on that connection in response
  *    to an earlier request. It starts the failover procedure that runs
  *    in parallel. Part of the procedure is a new attempt to connect to
- *    the server, which requires that TfwSrvConnection{} instance can be
+ *    the server, which requires that TfwSrvConn{} instance can be
  *    reused. So the attempt to reconnect has to wait. It is started as
  *    soon as the last client releases the server connection.
  */
@@ -124,7 +124,7 @@ static const unsigned long tfw_srv_tmo_vals[] = { 1, 10, 100, 250, 500, 1000 };
  * Returns immediately without waiting until a connection is established.
  */
 static int
-tfw_sock_srv_connect_try(TfwSrvConnection *srv_conn)
+tfw_sock_srv_connect_try(TfwSrvConn *srv_conn)
 {
 	int r;
 	TfwAddr *addr;
@@ -147,7 +147,7 @@ tfw_sock_srv_connect_try(TfwSrvConnection *srv_conn)
 #if defined(DEBUG) && (DEBUG >= 2)
 	sock_set_flag(sk, SOCK_DBG);
 #endif
-	tfw_connection_link_from_sk((TfwConnection *)srv_conn, sk);
+	tfw_connection_link_from_sk((TfwConn *)srv_conn, sk);
 	ss_set_callbacks(sk);
 
 	/*
@@ -201,7 +201,7 @@ tfw_sock_srv_connect_try(TfwSrvConnection *srv_conn)
  * stay dead until Tempesta is restarted.
  */
 static inline void
-tfw_sock_srv_connect_try_later(TfwSrvConnection *srv_conn)
+tfw_sock_srv_connect_try_later(TfwSrvConn *srv_conn)
 {
 	TfwSrvGroup *sg = ((TfwServer *)srv_conn->peer)->sg;
 	unsigned long timeout;
@@ -225,7 +225,7 @@ tfw_sock_srv_connect_try_later(TfwSrvConnection *srv_conn)
 		TFW_WARN("The limit of [%d] on reconnect attempts exceeded. "
 			 "The server connection [%s] is down.\n",
 			 sg->max_recns, s_addr);
-		tfw_connection_repair((TfwConnection *)srv_conn);
+		tfw_connection_repair((TfwConn *)srv_conn);
 		set_bit(TFW_CONN_B_ISDEAD, &srv_conn->flags);
 	}
 	if (srv_conn->recns < ARRAY_SIZE(tfw_srv_tmo_vals)) {
@@ -253,7 +253,7 @@ tfw_sock_srv_connect_try_later(TfwSrvConnection *srv_conn)
 static void
 tfw_sock_srv_connect_retry_timer_cb(unsigned long data)
 {
-	TfwSrvConnection *srv_conn = (TfwSrvConnection *)data;
+	TfwSrvConn *srv_conn = (TfwSrvConn *)data;
 
 	/* A new socket is created for each connect attempt. */
 	if (tfw_sock_srv_connect_try(srv_conn))
@@ -261,13 +261,13 @@ tfw_sock_srv_connect_retry_timer_cb(unsigned long data)
 }
 
 static inline void
-__reset_retry_timer(TfwSrvConnection *srv_conn)
+__reset_retry_timer(TfwSrvConn *srv_conn)
 {
 	srv_conn->recns = 0;
 }
 
 static inline void
-__setup_retry_timer(TfwSrvConnection *srv_conn)
+__setup_retry_timer(TfwSrvConn *srv_conn)
 {
 	__reset_retry_timer(srv_conn);
 	setup_timer(&srv_conn->timer,
@@ -276,16 +276,16 @@ __setup_retry_timer(TfwSrvConnection *srv_conn)
 }
 
 void
-tfw_srv_conn_release(TfwSrvConnection *srv_conn)
+tfw_srv_conn_release(TfwSrvConn *srv_conn)
 {
-	tfw_connection_release((TfwConnection *)srv_conn);
+	tfw_connection_release((TfwConn *)srv_conn);
 	/*
 	 * conn->sk may be zeroed if we get here after a failed
 	 * connect attempt. In that case no connection has been
 	 * established yet, and conn->sk has not been set.
 	 */
 	if (likely(srv_conn->sk))
-		tfw_connection_unlink_to_sk((TfwConnection *)srv_conn);
+		tfw_connection_unlink_to_sk((TfwConn *)srv_conn);
 	/*
 	 * After a disconnect, new connect attempts are started
 	 * in deferred context after a short pause (in a timer
@@ -302,7 +302,7 @@ static int
 tfw_sock_srv_connect_complete(struct sock *sk)
 {
 	int r;
-	TfwConnection *conn = sk->sk_user_data;
+	TfwConn *conn = sk->sk_user_data;
 	TfwServer *srv = (TfwServer *)conn->peer;
 
 	/* Link Tempesta with the socket. */
@@ -318,10 +318,10 @@ tfw_sock_srv_connect_complete(struct sock *sk)
 	tfw_connection_revive(conn);
 
 	/* Repair the connection if necessary. */
-	if (unlikely(tfw_srv_conn_restricted((TfwSrvConnection *)conn)))
+	if (unlikely(tfw_srv_conn_restricted((TfwSrvConn *)conn)))
 		tfw_connection_repair(conn);
 
-	__reset_retry_timer((TfwSrvConnection *)conn);
+	__reset_retry_timer((TfwSrvConn *)conn);
 
 	TFW_DBG_ADDR("connected", &srv->addr);
 	TFW_INC_STAT_BH(serv.conn_established);
@@ -337,7 +337,7 @@ tfw_sock_srv_connect_complete(struct sock *sk)
 static void
 tfw_sock_srv_connect_drop(struct sock *sk)
 {
-	TfwConnection *conn = sk->sk_user_data;
+	TfwConn *conn = sk->sk_user_data;
 
 	TFW_INC_STAT_BH(serv.conn_disconnects);
 	tfw_connection_drop(conn);
@@ -354,7 +354,7 @@ tfw_sock_srv_connect_drop(struct sock *sk)
 static void
 tfw_sock_srv_connect_failover(struct sock *sk)
 {
-	TfwConnection *conn = sk->sk_user_data;
+	TfwConn *conn = sk->sk_user_data;
 	TfwServer *srv = (TfwServer *)conn->peer;
 
 	TFW_DBG_ADDR("connection error", &srv->addr);
@@ -389,7 +389,7 @@ static const SsHooks tfw_sock_srv_ss_hooks = {
  * is not established. This is called only in user context at STOP time.
  */
 static int
-tfw_sock_srv_disconnect(TfwConnection *conn)
+tfw_sock_srv_disconnect(TfwConn *conn)
 {
 	/* Prevent races with timer callbacks. */
 	del_timer_sync(&conn->timer);
@@ -410,14 +410,14 @@ tfw_sock_srv_disconnect(TfwConnection *conn)
  * This behavior may change in future for a forward proxy implementation.
  * Then we will have a lot of short-living connections. We should keep it in
  * mind to avoid possible bottlenecks. In particular, this is the reason why we
- * don't have a global list of all TfwSrvConnection objects and store
+ * don't have a global list of all TfwSrvConn{} objects and store
  * not-yet-established connections in the TfwServer->conn_list.
  */
 
 static int
 tfw_sock_srv_connect_srv(TfwServer *srv)
 {
-	TfwSrvConnection *srv_conn;
+	TfwSrvConn *srv_conn;
 
 	/*
 	 * For each server connection, schedule an immediate connect
@@ -439,15 +439,9 @@ tfw_sock_srv_connect_srv(TfwServer *srv)
 static int
 tfw_sock_srv_disconnect_srv(TfwServer *srv)
 {
-	TfwConnection *conn;
+	TfwConn *conn;
 
-<<<<<<< 760ea44c0912d51bf97bb4ce7da4ed59151e545e
 	return tfw_peer_for_each_conn(srv, conn, list, tfw_sock_srv_disconnect);
-=======
-	list_for_each_entry(srv_conn, &srv->conn_list, list)
-		tfw_sock_srv_disconnect(srv_conn);
-	return 0;
->>>>>>> Split TfwConnection{} into TfwCliConnection{} and TfwSrvConnection{}.
 }
 
 /*
@@ -455,28 +449,28 @@ tfw_sock_srv_disconnect_srv(TfwServer *srv)
  *	TfwServer creation/deletion helpers.
  * ------------------------------------------------------------------------
  *
- * This section of code is responsible for allocating TfwSrvConnection objects
+ * This section of code is responsible for allocating TfwSrvConn{} objects
  * and linking them with a TfwServer object.
  *
- * All server connections (TfwSrvConnection objects) are pre-allocated  when a
- * TfwServer is created. That happens when at the configuration parsing stage.
+ * All server connections (TfwSrvConn{} objects) are pre-allocated when
+ * TfwServer{} is created. That happens at the configuration parsing stage.
  *
- * Later on, when Tempesta FW is started, these TfwSrvConnection objects are
- * used to establish connections. These connection objects are re-used (but not
- * re-allocated) when connections are re-established.
+ * Later on, when Tempesta FW is started, these TfwSrvConn{} objects are
+ * used to establish connections. These connection objects are re-used
+ * (but not re-allocated) when connections are re-established.
  */
 
 static struct kmem_cache *tfw_srv_conn_cache;
 
-static TfwSrvConnection *
+static TfwSrvConn *
 tfw_srv_conn_alloc(void)
 {
-	TfwSrvConnection *srv_conn;
+	TfwSrvConn *srv_conn;
 
 	if (!(srv_conn = kmem_cache_alloc(tfw_srv_conn_cache, GFP_ATOMIC)))
 		return NULL;
 
-	tfw_connection_init((TfwConnection *)srv_conn);
+	tfw_connection_init((TfwConn *)srv_conn);
 	INIT_LIST_HEAD(&srv_conn->fwd_queue);
 	INIT_LIST_HEAD(&srv_conn->nip_queue);
 	spin_lock_init(&srv_conn->fwd_qlock);
@@ -488,12 +482,12 @@ tfw_srv_conn_alloc(void)
 }
 
 static void
-tfw_srv_conn_free(TfwSrvConnection *srv_conn)
+tfw_srv_conn_free(TfwSrvConn *srv_conn)
 {
 	BUG_ON(timer_pending(&srv_conn->timer));
 
 	/* Check that all nested resources are freed. */
-	tfw_connection_validate_cleanup((TfwConnection *)srv_conn);
+	tfw_connection_validate_cleanup((TfwConn *)srv_conn);
 	BUG_ON(!list_empty(&srv_conn->nip_queue));
 	BUG_ON(ACCESS_ONCE(srv_conn->qsize));
 
@@ -504,12 +498,12 @@ static int
 tfw_sock_srv_add_conns(TfwServer *srv, int conns_n)
 {
 	int i;
-	TfwSrvConnection *srv_conn;
+	TfwSrvConn *srv_conn;
 
 	for (i = 0; i < conns_n; ++i) {
 		if (!(srv_conn = tfw_srv_conn_alloc()))
 			return -ENOMEM;
-		tfw_connection_link_peer((TfwConnection *)srv_conn,
+		tfw_connection_link_peer((TfwConn *)srv_conn,
 					 (TfwPeer *)srv);
 		tfw_sg_add_conn(srv->sg, srv, srv_conn);
 	}
@@ -520,10 +514,10 @@ tfw_sock_srv_add_conns(TfwServer *srv, int conns_n)
 static int
 tfw_sock_srv_del_conns(TfwServer *srv)
 {
-	TfwSrvConnection *srv_conn, *tmp;
+	TfwSrvConn *srv_conn, *tmp;
 
 	list_for_each_entry_safe(srv_conn, tmp, &srv->conn_list, list) {
-		tfw_connection_unlink_from_peer((TfwConnection *)srv_conn);
+		tfw_connection_unlink_from_peer((TfwConn *)srv_conn);
 		tfw_srv_conn_free(srv_conn);
 	}
 	return 0;
@@ -677,6 +671,9 @@ tfw_cfgop_set_conn_tries(TfwSrvGroup *sg, int recns)
 	return 0;
 }
 
+/*
+ * Common code to handle 'server' directive.
+ */
 static int
 tfw_cfgop_server(TfwCfgSpec *cs, TfwCfgEntry *ce,
 		 TfwSrvGroup *sg, TfwServer **arg_srv, int *arg_conns_n)
@@ -832,8 +829,8 @@ tfw_cfgop_out_server(TfwCfgSpec *cs, TfwCfgEntry *ce)
  *       ...
  *   }
  *
- * Basically it parses the group name and the "sched" attribute, creates a
- * new TfwSrvGroup object and sets the context for parsing nested "server"s.
+ * Basically it parses the group name, creates a new TfwSrvGroup{} object
+ * and sets the context for parsing nested directives.
  */
 static int
 tfw_cfgop_begin_srv_group(TfwCfgSpec *cs, TfwCfgEntry *ce)
@@ -869,7 +866,7 @@ tfw_cfgop_begin_srv_group(TfwCfgSpec *cs, TfwCfgEntry *ce)
 
 /**
  * The callback is invoked upon exit from a "srv_group" when all nested
- * "server"s are parsed, e.g.:
+ * directives are parsed, e.g.:
  *
  *   srv_group foo {
  *       server ...;
@@ -917,6 +914,9 @@ tfw_cfgop_finish_srv_group(TfwCfgSpec *cs)
 	return 0;
 }
 
+/*
+ * Common code to handle 'sched' directive.
+ */
 static int
 tfw_cfgop_sched(TfwCfgSpec *cs, TfwCfgEntry *ce, TfwScheduler **arg_sched)
 {
@@ -1154,8 +1154,7 @@ tfw_sock_srv_init(void)
 {
 	BUG_ON(tfw_srv_conn_cache);
 	tfw_srv_conn_cache = kmem_cache_create("tfw_srv_conn_cache",
-					       sizeof(TfwSrvConnection),
-					       0, 0, NULL);
+					       sizeof(TfwSrvConn), 0, 0, NULL);
 	return !tfw_srv_conn_cache ? -ENOMEM : 0;
 }
 
diff --git a/tempesta_fw/t/unit/helpers.c b/tempesta_fw/t/unit/helpers.c
index 016b0b555..92ecaf74b 100644
--- a/tempesta_fw/t/unit/helpers.c
+++ b/tempesta_fw/t/unit/helpers.c
@@ -33,7 +33,7 @@
  */
 #include "http_msg.h"
 
-static TfwConnection conn_req, conn_resp;
+static TfwConn conn_req, conn_resp;
 
 TfwHttpReq *
 test_req_alloc(size_t data_len)
@@ -52,7 +52,7 @@ test_req_alloc(size_t data_len)
 	ret = tfw_http_msg_setup(hmreq, &it, data_len);
 	BUG_ON(ret);
 
-	memset(&conn_req, 0, sizeof(TfwConnection));
+	memset(&conn_req, 0, sizeof(TfwConn));
 	tfw_connection_init(&conn_req);
 	conn_req.proto.type = Conn_HttpClnt;
 	hmreq->conn = &conn_req;
@@ -83,7 +83,7 @@ test_resp_alloc(size_t data_len)
 	ret = tfw_http_msg_setup(hmresp, &it, data_len);
 	BUG_ON(ret);
 
-	memset(&conn_resp, 0, sizeof(TfwConnection));
+	memset(&conn_resp, 0, sizeof(TfwConn));
 	tfw_connection_init(&conn_req);
 	conn_resp.proto.type = Conn_HttpSrv;
 	hmresp->conn = &conn_resp;
diff --git a/tempesta_fw/t/unit/sched_helper.c b/tempesta_fw/t/unit/sched_helper.c
index 119e35b99..1c75fb0ad 100644
--- a/tempesta_fw/t/unit/sched_helper.c
+++ b/tempesta_fw/t/unit/sched_helper.c
@@ -112,19 +112,19 @@ test_create_srv(const char *in_addr, TfwSrvGroup *sg)
 	return srv;
 }
 
-TfwSrvConnection *
+TfwSrvConn *
 test_create_conn(TfwPeer *peer)
 {
 	static struct sock __test_sock = {
 		.sk_state = TCP_ESTABLISHED,
 	};
-	TfwConnection *conn;
+	TfwConn *conn;
 
 	kernel_fpu_end();
 
 	if (!tfw_srv_conn_cache)
 		tfw_sock_srv_init();
-	conn = (TfwConnection *)tfw_srv_conn_alloc();
+	conn = (TfwConn *)tfw_srv_conn_alloc();
 	BUG_ON(!conn);
 
 	tfw_connection_link_peer(conn, peer);
@@ -134,14 +134,14 @@ test_create_conn(TfwPeer *peer)
 
 	kernel_fpu_begin();
 
-	return (TfwSrvConnection *)conn;
+	return (TfwSrvConn *)conn;
 }
 
 void
 test_conn_release_all(TfwSrvGroup *sg)
 {
 	TfwServer *srv;
-	TfwConnection *conn, *tmp;
+	TfwConn *conn, *tmp;
 
 	list_for_each_entry(srv, &sg->srv_list, list) {
 		list_for_each_entry_safe(conn, tmp, &srv->conn_list, list) {
@@ -149,7 +149,7 @@ test_conn_release_all(TfwSrvGroup *sg)
 			tfw_connection_unlink_from_peer(conn);
 			while (tfw_connection_live(conn))
 				tfw_connection_put(conn);
-			tfw_srv_conn_free((TfwSrvConnection *)conn);
+			tfw_srv_conn_free((TfwSrvConn *)conn);
 		}
 	}
 }
@@ -170,7 +170,7 @@ test_sched_generic_empty_sg(struct TestSchedHelper *sched_helper)
 
 	for (i = 0; i < sched_helper->conn_types; ++i) {
 		TfwMsg *msg = sched_helper->get_sched_arg(i);
-		TfwSrvConnection *srv_conn = sg->sched->sched_srv(msg, sg);
+		TfwSrvConn *srv_conn = sg->sched->sched_srv(msg, sg);
 
 		EXPECT_NULL(srv_conn);
 		sched_helper->free_sched_arg(msg);
@@ -197,7 +197,7 @@ test_sched_generic_one_srv_zero_conn(struct TestSchedHelper *sched_helper)
 
 	for (i = 0; i < sched_helper->conn_types; ++i) {
 		TfwMsg *msg = sched_helper->get_sched_arg(i);
-		TfwSrvConnection *srv_conn = sg->sched->sched_srv(msg, sg);
+		TfwSrvConn *srv_conn = sg->sched->sched_srv(msg, sg);
 
 		EXPECT_NULL(srv_conn);
 		sched_helper->free_sched_arg(msg);
@@ -226,8 +226,7 @@ test_sched_generic_max_srv_zero_conn(struct TestSchedHelper *sched_helper)
 	for (i = 0; i < sched_helper->conn_types; ++i) {
 		for (j = 0; j < TFW_SG_MAX_SRV; ++j) {
 			TfwMsg *msg = sched_helper->get_sched_arg(i);
-			TfwSrvConnection *srv_conn =
-				sg->sched->sched_srv(msg, sg);
+			TfwSrvConn *srv_conn = sg->sched->sched_srv(msg, sg);
 
 			EXPECT_NULL(srv_conn);
 			sched_helper->free_sched_arg(msg);
diff --git a/tempesta_fw/t/unit/sched_helper.h b/tempesta_fw/t/unit/sched_helper.h
index 2187a113f..2f3d2ac57 100644
--- a/tempesta_fw/t/unit/sched_helper.h
+++ b/tempesta_fw/t/unit/sched_helper.h
@@ -36,7 +36,7 @@ void test_sg_release_all(void);
 
 TfwServer *test_create_srv(const char *in_addr, TfwSrvGroup *sg);
 
-TfwSrvConnection *test_create_conn(TfwPeer *peer);
+TfwSrvConn *test_create_conn(TfwPeer *peer);
 
 void test_conn_release_all(TfwSrvGroup *sg);
 
diff --git a/tempesta_fw/t/unit/test_http_sticky.c b/tempesta_fw/t/unit/test_http_sticky.c
index 865ce2f4f..ed1d558ff 100644
--- a/tempesta_fw/t/unit/test_http_sticky.c
+++ b/tempesta_fw/t/unit/test_http_sticky.c
@@ -81,8 +81,8 @@ static struct {
 
 	TfwHttpReq	*req;
 	TfwHttpResp	*resp;
-	TfwConnection	conn_req;
-	TfwConnection	conn_resp;
+	TfwConn		conn_req;
+	TfwConn		conn_resp;
 	TfwClient	client;
 	struct sock	sock;
 } mock;
@@ -137,7 +137,7 @@ tfw_http_field_value(TfwHttpMsg *hm, const TfwStr *field_name, TfwStr *value)
 
 /* custom version for testing purposes */
 int
-tfw_connection_send(TfwConnection *conn, TfwMsg *msg)
+tfw_connection_send(TfwConn *conn, TfwMsg *msg)
 {
 	struct sk_buff *skb;
 	unsigned int data_off = 0;
@@ -177,9 +177,9 @@ tfw_connection_send(TfwConnection *conn, TfwMsg *msg)
 }
 
 /* custom version for testing purposes */
-int tfw_cli_conn_send(TfwCliConnection *cli_conn, TfwMsg *msg)
+int tfw_cli_conn_send(TfwCliConn *cli_conn, TfwMsg *msg)
 {
-	return tfw_connection_send((TfwConnection *)cli_conn, msg);
+	return tfw_connection_send((TfwConn *)cli_conn, msg);
 }
 
 /* setup/teardown helpers */
diff --git a/tempesta_fw/t/unit/test_sched_hash.c b/tempesta_fw/t/unit/test_sched_hash.c
index 537e00209..fe1f0a61d 100644
--- a/tempesta_fw/t/unit/test_sched_hash.c
+++ b/tempesta_fw/t/unit/test_sched_hash.c
@@ -107,18 +107,17 @@ TEST(tfw_sched_hash, one_srv_in_sg_and_max_conn)
 	TfwServer *srv = test_create_srv("127.0.0.1", sg);
 
 	for (i = 0; i < TFW_SRV_MAX_CONN; ++i) {
-		TfwSrvConnection *srv_conn = test_create_conn((TfwPeer *)srv);
+		TfwSrvConn *srv_conn = test_create_conn((TfwPeer *)srv);
 		sg->sched->add_conn(sg, srv, srv_conn);
 	}
 
 	/* Check that every request is scheduled to the same connection. */
 	for (i = 0; i < sched_helper_hash.conn_types; ++i) {
-		TfwSrvConnection *expect_conn = NULL;
+		TfwSrvConn *expect_conn = NULL;
 
 		for (j = 0; j < TFW_SRV_MAX_CONN; ++j) {
 			TfwMsg *msg = sched_helper_hash.get_sched_arg(i);
-			TfwSrvConnection *srv_conn =
-				sg->sched->sched_srv(msg, sg);
+			TfwSrvConn *srv_conn = sg->sched->sched_srv(msg, sg);
 			EXPECT_NOT_NULL(srv_conn);
 
 			if (!expect_conn)
@@ -157,20 +156,18 @@ TEST(tfw_sched_hash, max_srv_in_sg_and_max_conn)
 		TfwServer *srv = test_create_srv("127.0.0.1", sg);
 
 		for (j = 0; j < TFW_SRV_MAX_CONN; ++j) {
-			TfwSrvConnection *srv_conn =
-				test_create_conn((TfwPeer *)srv);
+			TfwSrvConn *srv_conn = test_create_conn((TfwPeer *)srv);
 			sg->sched->add_conn(sg, srv, srv_conn);
 		}
 	}
 
 	/* Check that every request is scheduled to the same connection. */
 	for (i = 0; i < sched_helper_hash.conn_types; ++i) {
-		TfwSrvConnection *expect_conn = NULL;
+		TfwSrvConn *expect_conn = NULL;
 
 		for (j = 0; j < TFW_SG_MAX_SRV * TFW_SRV_MAX_CONN; ++j) {
 			TfwMsg *msg = sched_helper_hash.get_sched_arg(i);
-			TfwSrvConnection *srv_conn =
-				sg->sched->sched_srv(msg, sg);
+			TfwSrvConn *srv_conn = sg->sched->sched_srv(msg, sg);
 			EXPECT_NOT_NULL(srv_conn);
 
 			if (!expect_conn)
diff --git a/tempesta_fw/t/unit/test_sched_http.c b/tempesta_fw/t/unit/test_sched_http.c
index 626a88143..b20813f8c 100644
--- a/tempesta_fw/t/unit/test_sched_http.c
+++ b/tempesta_fw/t/unit/test_sched_http.c
@@ -78,10 +78,10 @@ cleanup_cfg(void)
 }
 
 static void
-test_req(char *req_str, TfwSrvConnection *expect_conn)
+test_req(char *req_str, TfwSrvConn *expect_conn)
 {
 	TfwScheduler *sched;
-	TfwSrvConnection *srv_conn;
+	TfwSrvConn *srv_conn;
 	TfwHttpReq *req = test_req_alloc(req_str? strlen(req_str): 1);
 
 	if (req_str) {
@@ -126,7 +126,7 @@ TEST(tfw_sched_http, one_wildcard_rule)
 {
 	TfwSrvGroup *sg;
 	TfwServer *srv;
-	TfwSrvConnection *expect_conn;
+	TfwSrvConn *expect_conn;
 
 	sg = test_create_sg("default", "round-robin");
 	srv = test_create_srv("127.0.0.1", sg);
@@ -149,10 +149,9 @@ TEST(tfw_sched_http, some_rules)
 	TfwServer *srv;
 	TfwSrvGroup *sg1, *sg2, *sg3, *sg4, *sg5, *sg6, *sg7, *sg8,
 		    *sg9, *sg10;
-	TfwSrvConnection *expect_conn1, *expect_conn2, *expect_conn3,
-			 *expect_conn4, *expect_conn5, *expect_conn6,
-			 *expect_conn7, *expect_conn8, *expect_conn9,
-			 *expect_conn10;
+	TfwSrvConn *expect_conn1, *expect_conn2, *expect_conn3, *expect_conn4,
+		   *expect_conn5, *expect_conn6, *expect_conn7, *expect_conn8,
+		   *expect_conn9, *expect_conn10;
 
 	sg1 = test_create_sg("sg1", "round-robin");
 	srv = test_create_srv("127.0.0.1", sg1);
@@ -312,7 +311,7 @@ TEST(tfw_sched_http, one_rule)
 	{
 		TfwSrvGroup *sg;
 		TfwServer *srv;
-		TfwSrvConnection *expect_conn;
+		TfwSrvConn *expect_conn;
 
 		sg = test_create_sg("default", "round-robin");
 		srv = test_create_srv("127.0.0.1", sg);
diff --git a/tempesta_fw/t/unit/test_sched_rr.c b/tempesta_fw/t/unit/test_sched_rr.c
index 7d982d2b3..b90bc2f13 100644
--- a/tempesta_fw/t/unit/test_sched_rr.c
+++ b/tempesta_fw/t/unit/test_sched_rr.c
@@ -88,7 +88,7 @@ TEST(tfw_sched_rr, one_srv_in_sg_and_max_conn)
 	TfwServer *srv = test_create_srv("127.0.0.1", sg);
 
 	for (i = 0; i < TFW_SRV_MAX_CONN; ++i) {
-		TfwSrvConnection *srv_conn = test_create_conn((TfwPeer *)srv);
+		TfwSrvConn *srv_conn = test_create_conn((TfwPeer *)srv);
 		sg->sched->add_conn(sg, srv, srv_conn);
 		conn_acc ^= (long long)srv_conn;
 	}
@@ -102,8 +102,7 @@ TEST(tfw_sched_rr, one_srv_in_sg_and_max_conn)
 
 		for (j = 0; j < TFW_SRV_MAX_CONN; ++j) {
 			TfwMsg *msg = sched_helper_rr.get_sched_arg(i);
-			TfwSrvConnection *srv_conn =
-				sg->sched->sched_srv(msg, sg);
+			TfwSrvConn *srv_conn = sg->sched->sched_srv(msg, sg);
 			EXPECT_NOT_NULL(srv_conn);
 
 			conn_acc_check ^= (long long)srv_conn;
@@ -141,8 +140,7 @@ TEST(tfw_sched_rr, max_srv_in_sg_and_max_conn)
 		TfwServer *srv = test_create_srv("127.0.0.1", sg);
 
 		for (j = 0; j < TFW_SRV_MAX_CONN; ++j) {
-			TfwSrvConnection *srv_conn =
-					test_create_conn((TfwPeer *)srv);
+			TfwSrvConn *srv_conn = test_create_conn((TfwPeer *)srv);
 			sg->sched->add_conn(sg, srv, srv_conn);
 			conn_acc ^= (long long)srv_conn;
 		}
@@ -157,8 +155,7 @@ TEST(tfw_sched_rr, max_srv_in_sg_and_max_conn)
 
 		for (j = 0; j < TFW_SG_MAX_SRV * TFW_SRV_MAX_CONN; ++j) {
 			TfwMsg *msg = sched_helper_rr.get_sched_arg(i);
-			TfwSrvConnection *srv_conn =
-				sg->sched->sched_srv(msg, sg);
+			TfwSrvConn *srv_conn = sg->sched->sched_srv(msg, sg);
 			EXPECT_NOT_NULL(srv_conn);
 
 			conn_acc_check ^= (long long)srv_conn;
diff --git a/tempesta_fw/tls.c b/tempesta_fw/tls.c
index 46ff673cc..2a1bed961 100644
--- a/tempesta_fw/tls.c
+++ b/tempesta_fw/tls.c
@@ -97,7 +97,7 @@ static int
 tfw_tls_msg_process(void *conn, struct sk_buff *skb, unsigned int off)
 {
 	int r;
-	TfwConnection *c = conn;
+	TfwConn *c = conn;
 	TfwTlsContext *tls = tfw_tls_context(c);
 
 	tls_dbg(c, "=>");
@@ -149,7 +149,7 @@ tfw_tls_msg_process(void *conn, struct sk_buff *skb, unsigned int off)
  * Send @buf of length @len using TLS context @tls.
  */
 static inline int
-tfw_tls_send_buf(TfwConnection *c, const unsigned char *buf, size_t len)
+tfw_tls_send_buf(TfwConn *c, const unsigned char *buf, size_t len)
 {
 	int r;
 	TfwTlsContext *tls = tfw_tls_context(c);
@@ -172,7 +172,7 @@ tfw_tls_send_buf(TfwConnection *c, const unsigned char *buf, size_t len)
  * Send @skb using TLS context @tls.
  */
 static inline int
-tfw_tls_send_skb(TfwConnection *c, struct sk_buff *skb)
+tfw_tls_send_skb(TfwConn *c, struct sk_buff *skb)
 {
 	int i;
 
@@ -200,7 +200,7 @@ tfw_tls_send_skb(TfwConnection *c, struct sk_buff *skb)
 static int
 tfw_tls_send_cb(void *conn, const unsigned char *buf, size_t len)
 {
-	TfwConnection *c = conn;
+	TfwConn *c = conn;
 	TfwTlsContext *tls = tfw_tls_context(c);
 	struct sk_buff *skb;
 
@@ -231,7 +231,7 @@ tfw_tls_send_cb(void *conn, const unsigned char *buf, size_t len)
 static int
 tfw_tls_recv_cb(void *conn, unsigned char *buf, size_t len)
 {
-	TfwConnection *c = conn;
+	TfwConn *c = conn;
 	TfwTlsContext *tls = tfw_tls_context(c);
 	struct sk_buff *skb = ss_skb_peek_tail(&tls->rx_queue);
 
@@ -257,16 +257,16 @@ tfw_tls_recv_cb(void *conn, unsigned char *buf, size_t len)
 }
 
 static void
-tfw_tls_conn_dtor(TfwConnection *c)
+tfw_tls_conn_dtor(TfwConn *c)
 {
 	TfwTlsContext *tls = tfw_tls_context(c);
 
 	mbedtls_ssl_free(&tls->ssl);
-	tfw_cli_conn_release((TfwCliConnection *)c);
+	tfw_cli_conn_release((TfwCliConn *)c);
 }
 
 static int
-tfw_tls_conn_init(TfwConnection *c)
+tfw_tls_conn_init(TfwConn *c)
 {
 	int r;
 	TfwTlsContext *tls = tfw_tls_context(c);
@@ -306,7 +306,7 @@ tfw_tls_conn_init(TfwConnection *c)
 }
 
 static void
-tfw_tls_conn_drop(TfwConnection *c)
+tfw_tls_conn_drop(TfwConn *c)
 {
 	TfwTlsContext *tls = tfw_tls_context(c);
 
@@ -318,7 +318,7 @@ tfw_tls_conn_drop(TfwConnection *c)
 }
 
 static int
-tfw_tls_conn_send(TfwConnection *c, TfwMsg *msg)
+tfw_tls_conn_send(TfwConn *c, TfwMsg *msg)
 {
 	struct sk_buff *skb;
 	TfwTlsContext *tls = tfw_tls_context(c);

From 7dc8fe1e870a3a9602b6f4dbc39690742582a583 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Mon, 30 Jan 2017 16:13:35 +0300
Subject: [PATCH 51/65] Minor APM-related changes, better/clearer comments.

---
 tempesta_fw/apm.c                  |  23 +++---
 tempesta_fw/http.c                 | 111 ++++++++++++++++-------------
 tempesta_fw/sched/tfw_sched_hash.c |   2 +-
 tempesta_fw/sched/tfw_sched_rr.c   |   2 +-
 4 files changed, 78 insertions(+), 60 deletions(-)

diff --git a/tempesta_fw/apm.c b/tempesta_fw/apm.c
index c4f553ec0..a71e3c39b 100644
--- a/tempesta_fw/apm.c
+++ b/tempesta_fw/apm.c
@@ -528,14 +528,14 @@ static inline void
 __tfw_apm_state_next(TfwPcntRanges *rng, TfwApmRBEState *st)
 {
 	int i = st->i, r, b;
-	unsigned short rtime;
+	unsigned short rtt;
 
 	for (r = i / TFW_STATS_BCKTS; r < TFW_STATS_RANGES; ++r) {
 		for (b = i % TFW_STATS_BCKTS; b < TFW_STATS_BCKTS; ++b, ++i) {
 			if (!atomic_read(&rng->cnt[r][b]))
 				continue;
-			rtime = rng->ctl[r].begin + (b << rng->ctl[r].order);
-			__tfw_apm_state_set(st, rtime, i, r, b);
+			rtt = rng->ctl[r].begin + (b << rng->ctl[r].order);
+			__tfw_apm_state_set(st, rtt, i, r, b);
 			return;
 		}
 	}
@@ -871,22 +871,29 @@ tfw_apm_prcntl_verify(TfwPrcntl *prcntl, unsigned int prcntlsz)
 }
 
 static inline void
-__tfw_apm_update(TfwApmRBuf *rbuf, unsigned long jtstamp, unsigned int rtime)
+__tfw_apm_update(TfwApmRBuf *rbuf, unsigned long jtstamp, unsigned int rtt)
 {
 	int centry = (jtstamp / tfw_apm_jtmintrvl) % rbuf->rbufsz;
 	unsigned long jtmistart = jtstamp - (jtstamp % tfw_apm_jtmintrvl);
 	TfwApmRBEnt *crbent = &rbuf->rbent[centry];
 
 	tfw_apm_rbent_checkreset(crbent, jtmistart);
-	tfw_stats_update(&crbent->pcntrng, rtime, &rbuf->slock);
+	tfw_stats_update(&crbent->pcntrng, rtt, &rbuf->slock);
 }
 
 void
-tfw_apm_update(void *apmdata, unsigned long jtstamp, unsigned long jrtime)
+tfw_apm_update(void *apmdata, unsigned long jtstamp, unsigned long jrtt)
 {
+	unsigned int rtt = jiffies_to_msecs(jrtt);
+
 	BUG_ON(!apmdata);
-	__tfw_apm_update(&((TfwApmData *)apmdata)->rbuf,
-			 jtstamp, jiffies_to_msecs(jrtime));
+	/*
+	 * APM stats can't handle response times that are greater than
+	 * the maximum value possible for TfwPcntCtl{}->end. Currently
+	 * the value is USHRT_MAX which is about 65 secs in milliseconds.
+	 */
+	if (likely(rtt < (1UL << sizeof(((TfwPcntCtl *)0)->end) * 8)))
+		__tfw_apm_update(&((TfwApmData *)apmdata)->rbuf, jtstamp, rtt);
 }
 
 /*
diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index d2e909dfe..f82826380 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -401,7 +401,7 @@ __tfw_http_req_nip_delist(TfwSrvConn *srv_conn, TfwHttpReq *req)
 
 /*
  * Put @req on the list of non-idempotent requests in @srv_conn. 
- * Raise the flag saying that the connection has non-idempotent requests.
+ * Raise the flag saying that @srv_conn has non-idempotent requests.
  */
 static inline void
 __tfw_http_req_nip_enlist(TfwSrvConn *srv_conn, TfwHttpReq *req)
@@ -413,8 +413,7 @@ __tfw_http_req_nip_enlist(TfwSrvConn *srv_conn, TfwHttpReq *req)
 
 /*
  * Remove @req from the list of non-idempotent requests in @srv_conn.
- * @req is verified to be on the list. Does nothing if @req is NOT on
- * the list.
+ * Does nothing if @req is NOT on the list.
  */
 static inline void
 tfw_http_req_nip_delist(TfwSrvConn *srv_conn, TfwHttpReq *req)
@@ -623,8 +622,8 @@ __tfw_http_req_fwd_single(TfwSrvConn *srv_conn, TfwServer *srv,
 /*
  * Forward unsent requests in server connection @srv_conn. The requests
  * are forwarded until a non-idempotent request is found in the queue.
- * It's assumed that the forwarding queue in @srv_conn is locked.
- * IT's also assumed that the forwarding queue is NOT drained.
+ * It's assumed that the forwarding queue in @srv_conn is locked and
+ * NOT drained.
  */
 static void
 __tfw_http_req_fwd_unsent(TfwSrvConn *srv_conn, struct list_head *equeue)
@@ -687,9 +686,10 @@ tfw_http_req_fwd_unsent(TfwSrvConn *srv_conn, struct list_head *equeue)
  *
  * Forwarding to a server is considered to be on hold after
  * a non-idempotent request is forwarded. The hold is removed when
- * the holding non-idempotent request is followed by another request
- * from the same client. Effectively, that re-enables pipelining.
- * See RFC 7230 6.3.2.
+ * a response is received to the holding request. The hold is also
+ * removed when the holding non-idempotent request is followed by
+ * another request from the same client. Effectively, that re-enables
+ * pipelining. See RFC 7230 6.3.2.
  */
 static void
 tfw_http_req_fwd(TfwSrvConn *srv_conn, TfwHttpReq *req)
@@ -721,11 +721,11 @@ tfw_http_req_fwd(TfwSrvConn *srv_conn, TfwHttpReq *req)
  *
  * A non-idempotent request that was forwarded but not responded to
  * is not re-sent or re-scheduled by default. Configuration option
- * can be used to have that request re-sent or re-scheduled as well.
+ * can be used to have that request re-sent or re-scheduled.
  *
  * As forwarding is paused after a non-idempotent request is sent,
- * there can be only one such request among forwarded requests,
- * and that's @srv_conn->msg_sent.
+ * there can be only one such request among forwarded requests, and
+ * that's @srv_conn->msg_sent.
  *
  * Note: @srv_conn->msg_sent may change in result.
  */
@@ -854,10 +854,11 @@ __tfw_http_req_fwd_repair(TfwSrvConn *srv_conn, struct list_head *equeue)
  * connection queue, and NOT according to their original timestamps.
  * That's the intended behaviour. These requests are unlucky already.
  * They were delayed by waiting in their original server connections,
- * and then by the re-scheduling procedure itself. Now they have much
- * greater chance to be evicted when it's their turn to be forwarded.
- * The main effort is put into servicing requests that are on time.
- * Unlucky requests are just given another chance with minimal effort.
+ * and then by the time spent on multiple attempts to reconnect. Now
+ * they have much greater chance to be evicted when it's their turn
+ * to be forwarded. The main effort is put into servicing requests
+ * that are on time. Unlucky requests are just given another chance
+ * with minimal effort.
  */
 static void
 tfw_http_req_resched(TfwSrvConn *srv_conn, struct list_head *equeue)
@@ -869,7 +870,7 @@ tfw_http_req_resched(TfwSrvConn *srv_conn, struct list_head *equeue)
 
 	TFW_DBG2("%s: conn=[%p]\n", __func__, conn);
 
-	/* Treat the non-idempotent request if any. */
+	/* Treat a non-idempotent request if any. */
 	tfw_http_req_fwd_treatnip(srv_conn, equeue);
 
 	/* Process complete queue. */
@@ -889,7 +890,7 @@ tfw_http_req_resched(TfwSrvConn *srv_conn, struct list_head *equeue)
 }
 
 /*
- * Repair a connection. MAkes sense only for server connections.
+ * Repair a connection. Makes sense only for server connections.
  *
  * Find requests in the server's connection queue that were forwarded
  * to the server. These are unanswered requests. According to RFC 7230
@@ -897,6 +898,10 @@ tfw_http_req_resched(TfwSrvConn *srv_conn, struct list_head *equeue)
  * establishment". To address that, re-send the first request to the
  * server. When a response comes, that will trigger resending of the
  * rest of those unanswered requests (__tfw_http_req_fwd_repair()).
+ *
+ * No need to take a reference on the server connection here as this
+ * is executed as part of establishing the connection. It definitely
+ * can't go away.
  */
 static void
 tfw_http_conn_repair(TfwConn *conn)
@@ -919,7 +924,7 @@ tfw_http_conn_repair(TfwConn *conn)
 	BUG_ON(!tfw_srv_conn_restricted(srv_conn));
 
 	spin_lock(&srv_conn->fwd_qlock);
-	/* Treat the non-idempotent request if any. */
+	/* Treat a non-idempotent request if any. */
 	tfw_http_req_fwd_treatnip(srv_conn, &equeue);
 	/* Re-send the first unanswered request. */
 	if (srv_conn->msg_sent) {
@@ -955,9 +960,9 @@ tfw_http_req_destruct(void *msg)
 }
 
 /*
- * Allocate a new HTTP message structure, and link it with
- * the connection structure. Increment the number of users
- * of the connection structure. Initialize GFSM for the message.
+ * Allocate a new HTTP message structure and link it with the connection
+ * instance. Increment the number of users of the instance. Initialize
+ * GFSM for the message.
  */
 static TfwMsg *
 tfw_http_conn_msg_alloc(TfwConn *conn)
@@ -989,19 +994,19 @@ tfw_http_conn_msg_alloc(TfwConn *conn)
 
 /*
  * Free an HTTP message.
- * Also, free the connection structure if there's no more references.
+ * Also, free the connection instance if there's no more references.
  *
  * This function should be used anytime when there's a chance that
- * a connection structure may belong to multiple messages, which is
+ * a connection instance may belong to multiple messages, which is
  * almost always. If a connection is suddenly closed then it still
  * can be safely dereferenced and used in the code.
- * In rare cases we're sure that a connection structure in a message
- * doesn't have multiple users. For instance, when an error response
+ * In rare cases we're sure that a connection instance in a message
+ * doesn't have multiple users. For example, when an error response
  * is prepared and sent by Tempesta, that HTTP message does not need
- * a connection structure. The message is then immediately destroyed,
+ * a connection instance. The message is then immediately destroyed,
  * and a simpler tfw_http_msg_free() can be used for that.
  *
- * NOTE: @hm->conn might be NULL if @hm is the response that was served
+ * NOTE: @hm->conn may be NULL if @hm is the response that was served
  * from cache.
  */
 static void
@@ -1012,9 +1017,9 @@ tfw_http_conn_msg_free(TfwHttpMsg *hm)
 
 	if (hm->conn) {
 		/*
-		 * Unlink connection while there is at least one reference.
-		 * Use atomic exchange to avoid races with new messages
-		 * arriving on the connection.
+		 * Unlink the connection while there is at least one
+		 * reference. Use atomic exchange to avoid races with
+		 * new messages arriving on the connection.
 		 */
 		__cmpxchg((unsigned long *)&hm->conn->msg, (unsigned long)hm,
 			  0UL, sizeof(long));
@@ -1027,7 +1032,7 @@ tfw_http_conn_msg_free(TfwHttpMsg *hm)
 /*
  * Connection with a peer is created.
  *
- * Called when a connection is created. We need to initialize connection
+ * Called when a connection is created. Initialize the connection's
  * state machine here.
  */
 static int
@@ -1171,7 +1176,7 @@ tfw_http_conn_drop(TfwConn *conn)
 
 	if (TFW_CONN_TYPE(conn) & Conn_Clnt) {
 		tfw_http_conn_cli_drop((TfwCliConn *)conn);
-	} else if (conn->msg) {
+	} else if (conn->msg) {		/* Conn_Srv */
 		if (tfw_http_parse_terminate((TfwHttpMsg *)conn->msg))
 			tfw_http_resp_terminate((TfwHttpMsg *)conn->msg);
 	}
@@ -1208,11 +1213,10 @@ tfw_http_msg_create_sibling(TfwHttpMsg *hm, struct sk_buff **skb,
 		return NULL;
 
 	/*
-	 * The sibling message is set up with a new SKB as
-	 * the starting SKB. The new SKB is split off from
-	 * the original SKB and contains the first part of
-	 * new message. The original SKB is shrunk to have
-	 * just data from the original message.
+	 * The sibling message is set up to start with a new SKB.
+	 * The new SKB is split off from the original SKB and has
+	 * the first part of the new message. The original SKB is
+	 * shrunk to have just data from the original message.
 	 */
 	nskb = ss_skb_split(*skb, split_offset);
 	if (!nskb) {
@@ -1225,6 +1229,9 @@ tfw_http_msg_create_sibling(TfwHttpMsg *hm, struct sk_buff **skb,
 	return shm;
 }
 
+/*
+ * Add 'Date:' header field to an HTTP message.
+ */
 static int
 tfw_http_set_hdr_date(TfwHttpMsg *hm)
 {
@@ -1246,9 +1253,8 @@ tfw_http_set_hdr_date(TfwHttpMsg *hm)
  * Remove Connection header from HTTP message @msg if @conn_flg is zero,
  * and replace or set a new header value otherwise.
  *
- * skb's can be shared between number of HTTP messages. We don't copy skb if
- * it's shared - we modify skb's safely and shared skb is still owned by one
- * CPU.
+ * SKBs may be shared by several HTTP messages. A shared SKB is not copied
+ * but safely modified. Thus, a shared SKB is still owned by one CPU.
  */
 static int
 tfw_http_set_hdr_connection(TfwHttpMsg *hm, int conn_flg)
@@ -1970,17 +1976,18 @@ tfw_http_resp_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)
 	 * Responses from cache don't have @resp->conn. Also, for those
 	 * responses @req->jtxtstamp is not set and remains zero.
 	 *
-	 * APM stats can't handle response times that are >= USHORT_MAX
-	 * which is about 65 secs.
+	 * TODO: Currently APM holds the pure roundtrip time (RTT) from
+	 * the time a request is forwarded to the time a response to it
+	 * is received and parsed. Perhaps it makes sense to penalize
+	 * server connections which get broken too often. What would be
+	 * a fast and simple algorithm for that? Keep in mind, that the
+	 * value of RTT has an upper boundary in the APM.
 	 */
-	if (resp->conn) {
-		unsigned long rtt = resp->jrxtstamp - req->jtxtstamp;
-		if (likely(rtt < USHRT_MAX))
-			tfw_apm_update(((TfwServer *)resp->conn->peer)->apm,
-					resp->jrxtstamp, rtt);
-	}
+	if (resp->conn)
+		tfw_apm_update(((TfwServer *)resp->conn->peer)->apm,
+				resp->jrxtstamp,
+				resp->jrxtstamp - req->jtxtstamp);
 	tfw_http_resp_fwd(req, resp);
-	return;
 }
 
 /*
@@ -2016,8 +2023,12 @@ tfw_http_popreq(TfwHttpMsg *hmresp)
 	tfw_http_req_delist(srv_conn, req);
 	tfw_http_conn_nip_delist(srv_conn);
 	/*
-	 * Perform special processing if the connection is in repair
+	 * Run special processing if the connection is in repair
 	 * mode. Otherwise, forward pending requests to the server.
+	 *
+	 * @hmresp is holding a reference to the server connection
+	 * while forwarding is done, so there's no need to take an
+	 * additional reference.
 	 */
 	if (unlikely(tfw_srv_conn_restricted(srv_conn)))
 		__tfw_http_req_fwd_repair(srv_conn, &equeue);
diff --git a/tempesta_fw/sched/tfw_sched_hash.c b/tempesta_fw/sched/tfw_sched_hash.c
index c3c0c8ed5..88e6fbb17 100644
--- a/tempesta_fw/sched/tfw_sched_hash.c
+++ b/tempesta_fw/sched/tfw_sched_hash.c
@@ -157,7 +157,7 @@ tfw_sched_hash_get_srv_conn(TfwMsg *msg, TfwSrvGroup *sg)
 		}
 		if (unlikely(!best_srv_conn))
 			return NULL;
-		if (tfw_srv_conn_get_if_live(best_srv_conn))
+		if (likely(tfw_srv_conn_get_if_live(best_srv_conn)))
 			return best_srv_conn;
 	}
 	return NULL;
diff --git a/tempesta_fw/sched/tfw_sched_rr.c b/tempesta_fw/sched/tfw_sched_rr.c
index cd107ff3f..d3e2433f1 100644
--- a/tempesta_fw/sched/tfw_sched_rr.c
+++ b/tempesta_fw/sched/tfw_sched_rr.c
@@ -143,7 +143,7 @@ tfw_sched_rr_get_srv_conn(TfwMsg *msg, TfwSrvGroup *sg)
 					nipconn++;
 				continue;
 			}
-			if (tfw_srv_conn_get_if_live(srv_conn))
+			if (likely(tfw_srv_conn_get_if_live(srv_conn)))
 				return srv_conn;
 		}
 	}

From 7f867d28c0e038edc39e1bb13571b1ac3e323c9d Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Mon, 30 Jan 2017 22:23:58 +0300
Subject: [PATCH 52/65] Describe queues in client and server connections.

---
 tempesta_fw/connection.h | 33 +++++++++++++++++++++++++++++++++
 tempesta_fw/http.c       |  8 ++++++++
 2 files changed, 41 insertions(+)

diff --git a/tempesta_fw/connection.h b/tempesta_fw/connection.h
index d389a61a4..acde1ca3d 100644
--- a/tempesta_fw/connection.h
+++ b/tempesta_fw/connection.h
@@ -103,6 +103,39 @@ typedef struct {
 
 #define TFW_CONN_TYPE(c)	((c)->proto.type)
 
+/*
+ * Queues in client and server connections provide support for correct
+ * handlng of requests and responses.
+ *
+ * Incoming requests are put on client connection's @seq_queue in the
+ * order they come in. When responses to these requests come, they're
+ * sent back to client in exactly the same order the requests came in.
+ * @seq_queue is contended by threads that process requests, as well
+ * as by threads that process responses. In the latter case that may
+ * not lead to sending a response. Thus a separate @ret_qlock is used
+ * for sending responses to decrease the time @seq_qlock is taken for.
+ *
+ * Unless serviced from cache, each request is forwarded to a server
+ * over specific server connection. It's put on server connection's
+ * @fwd_queue, and also on @nip_queue if it's non-idempotent. Requests
+ * must be forwarded in the same order they're put on @fwd_queue, so
+ * it must be done under the queue lock. Otherwise pairing of requests
+ * to responses may get broken. When a response comes then the first
+ * request is taken out of @fwd_queue, and that's the paired request.
+ * There're two types of requests in @fwd_queue: those that were sent
+ * out, and those that were not sent out yet. @msg_sent points at the
+ * latest request that was sent out. That is helpful when repairing
+ * a server connection that had gone bad.
+ *
+ * A request is in @seq_queue until it's deleted, and may also be in
+ * @fwd_queue if it's forwarded to a server. @nip_queue supplements
+ * @fwd_queue and may be considered as part of @fwd_queue for this
+ * description. A response is never put on any queue. Instead, it's
+ * attached to a paired request as @req->resp. A request is always
+ * processed in the context of just one queue at any given moment.
+ * That way NO locking hierarchy is involved. Please see the code.
+ */
+
 /*
  * These are specific properties that are relevant to client connections.
  *
diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index f82826380..aafa39f09 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -690,6 +690,14 @@ tfw_http_req_fwd_unsent(TfwSrvConn *srv_conn, struct list_head *equeue)
  * removed when the holding non-idempotent request is followed by
  * another request from the same client. Effectively, that re-enables
  * pipelining. See RFC 7230 6.3.2.
+ *
+ * Requests must be forwarded in the same order they are put in the
+ * queue, and so it must be done under the queue lock, otherwise
+ * pairing of requests with responses may get broken. Take a simple
+ * scenario. CPU-1 locks the queue, adds a request to it, unlocks
+ * the queue. CPU-2 does the same after CPU-1 (the queue was locked).
+ * After that CPU2 and CPU2 are fully concurrent. If CPU2 happens to
+ * proceed first with forwarding, then pairing gets broken.
  */
 static void
 tfw_http_req_fwd(TfwSrvConn *srv_conn, TfwHttpReq *req)

From 4131412cadb3d8219322eaa69c6c549f26047f7d Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Wed, 1 Feb 2017 23:09:08 +0300
Subject: [PATCH 53/65] Fix var names in debug statements. Also, fix
 TfwSrvConn{} init.

---
 tempesta_fw/connection.h |  1 -
 tempesta_fw/http.c       | 22 +++++++++++++++-------
 tempesta_fw/sock_srv.c   |  9 ++++++---
 3 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/tempesta_fw/connection.h b/tempesta_fw/connection.h
index acde1ca3d..05bfad695 100644
--- a/tempesta_fw/connection.h
+++ b/tempesta_fw/connection.h
@@ -438,7 +438,6 @@ tfw_connection_validate_cleanup(TfwConn *conn)
 
 	BUG_ON(!conn);
 	BUG_ON(!list_empty(&conn->list));
-	BUG_ON(atomic_read(&conn->refcnt) & ~1);	/* FIXME */
 	BUG_ON(conn->msg);
 
 	rc = atomic_read(&conn->refcnt);
diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index aafa39f09..4ccf0dfb6 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -528,7 +528,7 @@ tfw_http_req_zap_error(struct list_head *equeue)
 	TfwHttpReq *req, *tmp;
 
 	TFW_DBG2("%s: queue is %sempty\n",
-		 __func__, list_empty(err_queue) ? "" : "NOT ");
+		 __func__, list_empty(equeue) ? "" : "NOT ");
 
 	list_for_each_entry_safe(req, tmp, equeue, fwd_list) {
 		list_del_init(&req->fwd_list);
@@ -559,7 +559,7 @@ tfw_http_req_evict_timeout(TfwSrvConn *srv_conn, TfwServer *srv,
 	if (unlikely(time_after(jqage, srv->sg->max_jqage))) {
 		TFW_DBG2("%s: Eviction: req=[%p] overdue=[%dms]\n",
 			 __func__, req,
-			 jiffies_to_msecs(jqage - srv->max_jqage));
+			 jiffies_to_msecs(jqage - srv->sg->max_jqage));
 		tfw_http_req_move2equeue(srv_conn, req, equeue, 504);
 		return true;
 	}
@@ -696,8 +696,8 @@ tfw_http_req_fwd_unsent(TfwSrvConn *srv_conn, struct list_head *equeue)
  * pairing of requests with responses may get broken. Take a simple
  * scenario. CPU-1 locks the queue, adds a request to it, unlocks
  * the queue. CPU-2 does the same after CPU-1 (the queue was locked).
- * After that CPU2 and CPU2 are fully concurrent. If CPU2 happens to
- * proceed first with forwarding, then pairing gets broken.
+ * After that CPU-1 and CPU-2 are fully concurrent. If CPU-2 happens
+ * to proceed first with forwarding, then pairing gets broken.
  */
 static void
 tfw_http_req_fwd(TfwSrvConn *srv_conn, TfwHttpReq *req)
@@ -767,7 +767,7 @@ tfw_http_req_resend(TfwSrvConn *srv_conn, bool first, struct list_head *equeue)
 	struct list_head *end, *fwd_queue = &srv_conn->fwd_queue;
 
 	TFW_DBG2("%s: conn=[%p] one_msg=[%s]\n",
-		 __func__, srv_conn, one_msg ? "true" : "false");
+		 __func__, srv_conn, first ? "true" : "false");
 	BUG_ON(!srv_conn->msg_sent);
 	BUG_ON(list_empty(&((TfwHttpReq *)srv_conn->msg_sent)->fwd_list));
 
@@ -876,7 +876,7 @@ tfw_http_req_resched(TfwSrvConn *srv_conn, struct list_head *equeue)
 	TfwServer *srv = (TfwServer *)srv_conn->peer;
 	struct list_head *fwd_queue = &srv_conn->fwd_queue;
 
-	TFW_DBG2("%s: conn=[%p]\n", __func__, conn);
+	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
 
 	/* Treat a non-idempotent request if any. */
 	tfw_http_req_fwd_treatnip(srv_conn, equeue);
@@ -907,6 +907,8 @@ tfw_http_req_resched(TfwSrvConn *srv_conn, struct list_head *equeue)
  * server. When a response comes, that will trigger resending of the
  * rest of those unanswered requests (__tfw_http_req_fwd_repair()).
  *
+ * The connection is not scheduled until all requests in it are re-sent.
+ *
  * No need to take a reference on the server connection here as this
  * is executed as part of establishing the connection. It definitely
  * can't go away.
@@ -1130,7 +1132,7 @@ __tfw_http_resp_pair_free(TfwHttpReq *req)
  *
  * Desintegrate the client connection's @seq_list. Requests that have
  * a paired response can be freed. Move those to @zap_queue for doing
- * it without the lock. Requests without a paired response have not
+ * that without the lock. Requests without a paired response have not
  * been answered yet. They are held in the lists of server connections
  * until responses come. Don't free those requests.
  *
@@ -1153,6 +1155,12 @@ tfw_http_conn_cli_drop(TfwCliConn *cli_conn)
 	if (list_empty_careful(seq_queue))
 		return;
 
+	/*
+	 * Desintegration of the list must be done under the lock.
+	 * The list can't be just detached from seq_queue, and then
+	 * be desintegrated without the lock. That would open a race
+	 * condition with freeing of a request in tfw_http_resp_fwd().
+	 */
 	spin_lock(&cli_conn->seq_qlock);
 	list_for_each_entry_safe(req, tmp, seq_queue, msg.seq_list) {
 		if (req->resp)
diff --git a/tempesta_fw/sock_srv.c b/tempesta_fw/sock_srv.c
index b326b869d..22f05fe0a 100644
--- a/tempesta_fw/sock_srv.c
+++ b/tempesta_fw/sock_srv.c
@@ -230,8 +230,9 @@ tfw_sock_srv_connect_try_later(TfwSrvConn *srv_conn)
 	}
 	if (srv_conn->recns < ARRAY_SIZE(tfw_srv_tmo_vals)) {
 		timeout = tfw_srv_tmo_vals[srv_conn->recns];
-		TFW_DBG_ADDR("Cannot establish connection",
-			     &srv_conn->peer->addr);
+		if (srv_conn->recns)
+			TFW_DBG_ADDR("Cannot establish connection",
+				     &srv_conn->peer->addr);
 	} else {
 		timeout = tfw_srv_tmo_vals[ARRAY_SIZE(tfw_srv_tmo_vals) - 1];
 		if (srv_conn->recns == ARRAY_SIZE(tfw_srv_tmo_vals)
@@ -471,6 +472,8 @@ tfw_srv_conn_alloc(void)
 		return NULL;
 
 	tfw_connection_init((TfwConn *)srv_conn);
+	memset((char *)srv_conn + sizeof(TfwConn), 0,
+	       sizeof(TfwSrvConn) - sizeof(TfwConn));
 	INIT_LIST_HEAD(&srv_conn->fwd_queue);
 	INIT_LIST_HEAD(&srv_conn->nip_queue);
 	spin_lock_init(&srv_conn->fwd_qlock);
@@ -851,7 +854,7 @@ tfw_cfgop_begin_srv_group(TfwCfgSpec *cs, TfwCfgEntry *ce)
 		return -EINVAL;
 	}
 
-	TFW_DBG("begin srv_group: %s\n", sg->name);
+	TFW_DBG("begin srv_group: %s\n", tfw_cfg_in_sg->name);
 
         tfw_cfg_in_slstsz = 0;
         tfw_cfg_in_sched = tfw_cfg_out_sched;

From 1510439e7d1cb49d9caa8b901272353b08b36046 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Mon, 13 Feb 2017 20:21:28 +0300
Subject: [PATCH 54/65] Release all server connection's resources on shutdown.

---
 tempesta_fw/connection.h |  2 --
 tempesta_fw/http.c       |  7 +++++--
 tempesta_fw/sock.c       | 11 ++++++++---
 tempesta_fw/sock_srv.c   | 17 +++++++----------
 4 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/tempesta_fw/connection.h b/tempesta_fw/connection.h
index 05bfad695..1ccc19912 100644
--- a/tempesta_fw/connection.h
+++ b/tempesta_fw/connection.h
@@ -180,13 +180,11 @@ enum {
 	TFW_CONN_B_RESEND = 0,	/* Need to re-send requests. */
 	TFW_CONN_B_QFORWD,	/* Need to forward requests in the queue. */
 	TFW_CONN_B_HASNIP,	/* Has non-idempotent requests. */
-	TFW_CONN_B_ISDEAD,	/* Is dead, unable to reconnect. */
 };
 
 #define TFW_CONN_F_RESEND	(1 << TFW_CONN_B_RESEND)
 #define TFW_CONN_F_QFORWD	(1 << TFW_CONN_B_QFORWD)
 #define TFW_CONN_F_HASNIP	(1 << TFW_CONN_B_HASNIP)
-#define TFW_CONN_F_ISDEAD	(1 << TFW_CONN_B_ISDEAD)
 
 /**
  * TLS hardened connection.
diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 4ccf0dfb6..bd3072a5f 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -1054,7 +1054,6 @@ tfw_http_conn_init(TfwConn *conn)
 		TfwSrvConn *srv_conn = (TfwSrvConn *)conn;
 		if (!list_empty(&srv_conn->fwd_queue))
 			set_bit(TFW_CONN_B_RESEND, &srv_conn->flags);
-		clear_bit(TFW_CONN_B_ISDEAD, &srv_conn->flags);
 	}
 	tfw_gfsm_state_init(&conn->state, conn, TFW_HTTP_FSM_INIT);
 	return 0;
@@ -1107,7 +1106,7 @@ tfw_http_conn_release(TfwConn *conn)
 	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
 	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
 
-	if (unlikely(test_bit(TFW_CONN_B_ISDEAD, &srv_conn->flags))) {
+	if (unlikely(!ss_active())) {
 		tfw_http_conn_srv_release(srv_conn);
 		return;
 	}
@@ -1516,6 +1515,10 @@ tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
 	 * connection had been closed. If it's a bug, then the correct
 	 * order of responses to requests may be broken. The connection
 	 * with the client must to be closed immediately.
+	 *
+	 * Doing ss_close_sync() on client connection's socket is safe
+	 * as long as @req that holds a reference to the connection is
+	 * not freed.
 	 */
 	spin_lock(&cli_conn->seq_qlock);
 	if (unlikely(list_empty(seq_queue))) {
diff --git a/tempesta_fw/sock.c b/tempesta_fw/sock.c
index 4aa1da038..9c99fecd9 100644
--- a/tempesta_fw/sock.c
+++ b/tempesta_fw/sock.c
@@ -524,7 +524,7 @@ ss_linkerror(struct sock *sk)
 int
 __ss_close(struct sock *sk, int flags)
 {
-	if (unlikely(!(sk && ss_sock_live(sk))))
+	if (unlikely(!sk))
 		return SS_OK;
 	sk_incoming_cpu_update(sk);
 
@@ -552,10 +552,15 @@ __ss_close(struct sock *sk, int flags)
 	 * the queued work is closing and simply pretend that socket closing
 	 * event happened before the socket transmission event.
 	 *
-	 * The socket is owned by current CPU, so don't need to check its
-	 * liveness.
+	 * The socket is owned by current CPU, so there's no need to check
+	 * if it's live. However, in some cases this may be called multiple
+	 * times on the same socket. Do it only once for the socket.
 	 */
 	bh_lock_sock(sk);
+	if (unlikely(!ss_sock_live(sk))) {
+		bh_unlock_sock(sk);
+		return SS_OK;
+	}
 	ss_do_close(sk);
 	bh_unlock_sock(sk);
 	if (flags & SS_F_CONN_CLOSE)
diff --git a/tempesta_fw/sock_srv.c b/tempesta_fw/sock_srv.c
index 22f05fe0a..48a7984fe 100644
--- a/tempesta_fw/sock_srv.c
+++ b/tempesta_fw/sock_srv.c
@@ -216,9 +216,7 @@ tfw_sock_srv_connect_try_later(TfwSrvConn *srv_conn)
 	 * never be reached. UINT_MAX seconds is more than 136 years. It's
 	 * safe to assume that it's not reached in a single run of Tempesta.
 	 */
-	if (unlikely((srv_conn->recns >= sg->max_recns)
-		     && !test_bit(TFW_CONN_B_ISDEAD, &srv_conn->flags)))
-	{
+	if (unlikely(srv_conn->recns == sg->max_recns)) {
 		TfwAddr *srv_addr = &srv_conn->peer->addr;
 		char s_addr[TFW_ADDR_STR_BUF_SIZE] = { 0 };
 		tfw_addr_ntop(srv_addr, s_addr, sizeof(s_addr));
@@ -226,7 +224,6 @@ tfw_sock_srv_connect_try_later(TfwSrvConn *srv_conn)
 			 "The server connection [%s] is down.\n",
 			 sg->max_recns, s_addr);
 		tfw_connection_repair((TfwConn *)srv_conn);
-		set_bit(TFW_CONN_B_ISDEAD, &srv_conn->flags);
 	}
 	if (srv_conn->recns < ARRAY_SIZE(tfw_srv_tmo_vals)) {
 		timeout = tfw_srv_tmo_vals[srv_conn->recns];
@@ -404,14 +401,14 @@ tfw_sock_srv_disconnect(TfwConn *conn)
  *	Global connect/disconnect routines.
  * ------------------------------------------------------------------------
  *
- * At this point, we support only the reverse proxy mode, so we connect to all
- * servers when the Tempesta FW is started, and close all connections when the
- * Tempesta FW is stopped. This section of code is responsible for that.
+ * At this time, only reverse proxy mode is supported. All servers are
+ * connected to when Tempesta is started, and all connections are closed
+ * when Tempesta is stopped. The code in this section takes care of that.
  *
  * This behavior may change in future for a forward proxy implementation.
- * Then we will have a lot of short-living connections. We should keep it in
- * mind to avoid possible bottlenecks. In particular, this is the reason why we
- * don't have a global list of all TfwSrvConn{} objects and store
+ * Then there will be lots of short-living connections. That should be kept
+ * in mind to avoid possible bottlenecks. In particular, that is the reason
+ * for not having a global list of all TfwSrvConn{} objects, and for storing
  * not-yet-established connections in the TfwServer->conn_list.
  */
 

From fb3b579688faac48365c675151e5ac4f49e92ee8 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Tue, 14 Feb 2017 17:07:40 +0300
Subject: [PATCH 55/65] Add the description of all new options to README.md.

---
 README.md            | 118 ++++++++++++++++++++++++++++------
 etc/tempesta_fw.conf | 148 +++++++++++++++++++++++++++++++++++--------
 2 files changed, 220 insertions(+), 46 deletions(-)

diff --git a/README.md b/README.md
index db531a0fe..7899803c8 100644
--- a/README.md
+++ b/README.md
@@ -142,7 +142,7 @@ ssl_certificate_key /path/to/tfw-root.key;
 
 Also, `proto=https` option is needed for the `listen` directive.
 
-#### Self-signed certificate genration
+#### Self-signed certificate generation
 
 In case of using a self-signed certificate with Tempesta, it's
 convenient to use OpenSSL to generate a key and a certificate. The
@@ -301,6 +301,31 @@ just one example:
 curl -X PURGE http://192.168.10.10/
 ```
 
+#### Non-Idempotent Requests
+
+The consideration of whether a request is considered non-idempotent may
+depend on specific application, server, and/or service. A special directive
+allows the definition of a request that will be considered non-idempotent:
+```
+nonidempotent <METHOD> <OP> <ARG>;
+```
+`METHOD` is one of supported HTTP methods, such as GET, HEAD, POST, etc.
+`OP` is a string matching operator, such as `eq`, `prefix`, etc.
+`ARG` is an argument for `OP`, such as `/foo/bar.html`, `example.com`, etc.
+
+One or more of this directive may be specified. The directives apply to one
+or more locations as defined below in the [Locations](#Locations) section.
+
+If this directive is not specified, then a non-idempotent request in defined
+as a request that has an unsafe method.
+
+Below are examples of this directive:
+```
+nonidempotent GET prefix "/users/";
+nonidempotent POST prefix "/users/";
+nonidempotent GET suffix "/data";
+```
+
 ### Locations
 
 Location is a way of grouping certain directives that are applied only
@@ -321,9 +346,10 @@ location <OP> "<string>" {
 Multiple locations may be defined. Location directives are processed
 strictly in the order they are defined in the configuration file.
 
-Only caching policy directives may currently be grouped by the location
-directive. Caching policy directives defined outside of any specific
-location are considered the default policy for all locations.
+Only caching policy directives and the `nonidempotent` directive may
+currently be grouped by the location directive. The directives defined
+outside of any specific location are considered the default policy for
+all locations.
 
 When locations are defined in the configuration, the URL of each request
 is matched against strings specified in the location directives and using
@@ -351,6 +377,7 @@ location prefix "/society/" {
 	cache_bypass prefix "/society/breaking_news/";
 	cache_fulfill suffix ".jpg" ".png";
 	cache_fulfill suffix ".css";
+	nonidempotent GET prefix "/society/users/";
 }
 ```
 
@@ -367,7 +394,7 @@ server <IPADDR>[:<PORT>] [conns_n=<N>];
 IPv6 address must be enclosed in square brackets (e.g. "[::0]" but not "::0").
 `PORT` defaults to 80 if not specified.
 `conns_n=<N>` is the number of parallel connections to the server.
-`N` defaults to 4 if not specified.
+`N` defaults to 32 if not specified.
 
 Multiple back end servers may be defined. For example:
 ```
@@ -375,6 +402,55 @@ server 10.1.0.1;
 server [fc00::1]:80;
 ```
 
+if a connection with a server is terminated for any reason, an effort is made
+to restore the connection. Sometimes the effort is futile. The directive
+`connect_tries` sets the maximum number of re-connect attempts after which
+the server connection is considered dead. It is defined as follows:
+```
+connect_tries <N>;
+```
+If this directive is not defined, then the number of re-connect attempts
+defaults to 10. A value of zero specified for `N` means unlimited number
+of attempts.
+
+This is an important directive which controls how Tempesta deals with
+outstanding requests in a failed connection. If the connection is restored
+within the specified number of attempts, then all outstanding requests are
+re-forwarded to the server. However if it's not restored, then the server
+connection is considered dead, and all outstanding requests are re-scheduled
+to other servers and/or connections.
+
+If a server connection fails intermittenly, then requests may sit in the
+connection's forwarding queue for some time. The following directives set
+certain allowed limits before these requests are considered failed:
+```
+server_forward_retries <N>;
+server_forward_timeout <N>;
+```
+`server_forward_retries` sets the maximum number of attempts to re-forward
+a request to a server. If not defined, the default number of attempts is 5.
+`server_forward_timeout` set the maximum time frame in seconds within which
+a request may still be forwarded. If not defined, the default time frame
+is 60 seconds. When one or both of these limits is exceeded for a request,
+the request is evicted and an error is returned to a client.
+
+When re-forwarding or re-scheduling requests in a failed server connection,
+a special consideration is given to non-idempotent requests. Usually
+a non-idempotent request is not re-forwarded or re-scheduled. That may be
+changed with the following directive that doesn't have arguments:
+```
+server_retry_non_idempotent;
+```
+
+Each server connection has a queue of forwarded requests. The size of the
+queue is limited with `server_queue_size` directive as follows:
+```
+server_queue_size <N>;
+```
+Each connection to the server has the same limit on the queue size set
+with this directive. If not specified, the queue size is set to 1000.
+
+
 #### Server Groups
 
 Back end servers can be grouped together into a single unit for the purpose of
@@ -383,33 +459,39 @@ The load is distributed evenly among servers within a group.
 If a server goes offline, other servers in a group take the load.
 The full syntax is as follows:
 ```
-srv_group <NAME> [sched=<SCHED_NAME>] {
+srv_group <NAME> {
 	server <IPADDR>[:<PORT>] [conns_n=<N>];
 	...
 }
 ```
 `NAME` is a unique identifier of the group that may be used to refer to it
 later.
-`SCHED_NAME` is the name of scheduler module that distributes load among
-servers within the group. Default scheduler is used if `sched` parameter is
-not specified.
 
 Servers that are defined outside of any group implicitly form a special group
 called `default`.
 
+All server-related directives listed in [Servers](#Servers) section above
+are applicable for definition for a server group. Also, a scheduler may be
+speficied for a group.
+
 Below is an example of server group definition:
 ```
-srv_group static_storage sched=hash {
+srv_group static_storage {
+	sched hash;
 	server 10.10.0.1:8080;
 	server 10.10.0.2:8080;
 	server [fc00::3]:8081 conns_n=1;
+	server_queue_size 500;
+	server_forward_timeout 30;
+	connect_tries 15;
 }
 ```
 
 #### Schedulers
 
-Scheduler is used to distribute load among known servers. The syntax is as
-follows:
+Scheduler is used to distribute load among servers within a group. The group
+can be either explicit, defined with `srv_group` directive, or implicit.
+The syntax is as follows:
 ```
 sched <SCHED_NAME>;
 ```
@@ -423,14 +505,14 @@ scheduler.
 Requests are distributed uniformly, and requests with the same URI/Host are
 always sent to the same server.
 
-If no scheduler is defined, then scheduler defaults to `round-robin`.
+Only one `sched` directive is allowed per explicit or implicit group.
+A scheduler defined for the implicit group becomes the scheduler for an
+explicit group defined with `srv_group` directive if the explicit group
+is missing the `sched` directive.
 
-The defined scheduler affects all server definitions that are missing a
-scheduler definition. If `srv_group` is missing a scheduler definition,
-and there is a scheduler defined, then that scheduler is set for the group.
+If no scheduler is defined for a group, then scheduler defaults
+to `round-robin`.
 
-Multiple `sched` directives may be defined in the configuration file.
-Each directive affects server groups that follow it.
 
 #### HTTP Scheduler
 
diff --git a/etc/tempesta_fw.conf b/etc/tempesta_fw.conf
index 9ece25c18..49f6e0503 100644
--- a/etc/tempesta_fw.conf
+++ b/etc/tempesta_fw.conf
@@ -4,25 +4,33 @@
 
 # TAG: sched.
 #
-# Specifies the scheduler used to distribute load among servers
+# Specifies the scheduler used to distribute the load among servers within
+# a group. 
 #
 # Syntax:
 #   sched SCHED_NAME;
 #
-# Currently recognized schedulers are:
-# - hash
-# - round-robin
+# SCHED_NAME is a name of a scheduler module that distributes the load
+# among servers within a group. There are two schedulers available:
+#   - "round-robin" (default) - rotates all servers in the group in
+#     the round-robin manner, so requests are distributed uniformely across
+#     servers.
+#   - "hash" - chooses a server based on a URI/Host hash of a request.
+#     Requests are still distributed uniformely, but a request with the same
+#     URI/Host is always sent to the same server.
 #
-# If not specified, SCHED_NAME defaults to round-robin.
+# Note that there's also the HTTP scheduler. It dispatches requests among
+# server groups only. Round-robin or hash scheduler must be used to select
+# a server within a group.
 #
-# The specified scheduler affects all server definitions that are missing
-# a scheduler definition. If a srv_group is missing a scheduler definition,
-# and a scheduler is specified, then that scheduler is set for the group.
-# Also, if servers outside of any groups are specified, then they form
-# a special group, and the specified scheduler is assigned to that group.
+# A group can be either explicit, defined with `srv_group` directive, or
+# implicit. Only one `sched` directive is allowed per group. A scheduler
+# defined for the implicit group becomes the scheduler for an explicit
+# group defined with `srv_group` directive if the explicit group is missing
+# the `sched` directive.
 #
-# Multiple "sched" directives may be specified in the configuration file.
-# Each directive affects servers groups that follow it.
+# Default:
+#   sched round-robin;
 #
 
 # TAG: server.
@@ -37,12 +45,107 @@
 # PORT defaults to 80 if not set.
 #
 # conns_n=N is the number of parallel connections to the server.
-# The N defaults to 4 if not set.
+# The N defaults to 32 if not set.
 #
 # Multiple back-end servers may be specified, for example:
 #   server 10.1.0.1:80
 #   server [fc00::1]:80;
 #
+# Default:
+#   None.
+
+#
+# TAG: nonidempotent
+#
+# Defines a request that is considered non-idempotent.
+#
+# Syntax:
+#   nonidempotent <METHOD> <OP> <string>
+#
+# <METHOD> is one of supported HTTP methods, such as GET, HEAD, POST, etc.
+# <OP> is a string matching operator, one of "eq", "prefix", "suffix", or "*".
+# <string> is a verbatim string matched against URL in a request.
+#
+# One or more of this directive may be specified. The directives apply to
+# one or more locations, either specific or global (see "location" directive).
+#
+# Example:
+#    nonidempotent GET prefix "/users/";
+#    nonidempotent POST prefix "/users/";
+#    nonidempotent GET suffix "/data";
+#
+# Default:
+#   A non-idempotent request in defined as a request that has an unsafe method.
+#
+
+#
+# TAG: connect_tries
+#
+# Defines the maximum number of attempts to reconnect with a server.
+#
+# Syntax:
+#   connect_tries NUM;
+#
+# If a connection with a server is not re-established after the specified
+# number of attempts, then the connection is considered dead. Outstanding
+# requests in the connection are re-scheduled to other servers and/or
+# connections.
+#
+# Default:
+#   connect_tries 10;
+#
+
+#
+# TAG: server_forward_retries
+#
+# Defines the maximum number of attempts to re-forward a request to a server.
+#
+# Syntax:
+#   server_forward_retries NUM;
+#
+# Default:
+#   server_forward_retries 5;
+#
+
+
+#
+# TAG: server_forward_timeout
+#
+# Defines the maximum time frame within which a request may still be forwarded.
+#
+# Syntax:
+# server_forward_timeout SECONDS;
+#
+# Default:
+# server_forward_timeout 60;
+#
+
+#
+# TAG: server_retry_non_idempotent
+#
+# Defines that non-idempotent requests should be re-forwarded.
+#
+# Syntax:
+#   server_retry_non_idempotent;
+#
+# Default:
+#   Do not re-forward non-idempotent requests.
+#
+
+#
+# TAG: server_queue_size
+#
+# Defines the size of the forwarding queue in a server connection.
+#
+# Syntax:
+#   server_queue_size 600;
+#
+# This is the maxumum number of requests that may be in the forwarding
+# queue of a server connection at any given time.
+#
+# Default:
+#   server_queue_size 1000;
+#
 
 # TAG: srv_group
 #
@@ -52,33 +155,22 @@
 # If some server goes offline, other members of the group take its load.
 #
 # Syntax:
-#   srv_group NAME [sched=SCHED_NAME] {
+#   srv_group NAME {
 #       server IPADDR[:PORT] [conns_n=N];
 #       ...
 #   }
 #
 # NAME is a unique identifier of the group that may be used to refer it later.
 #
-# SCHED_NAME is a name of a scheduler module that distributes load among servers
-# within the group. There are three schedulers available:
-#   - "round-robin" (default) - rotates all servers in the group in
-#     the round-robin manner, so requests are distributed uniformely across
-#     servers.
-#   - "hash" - chooses a server based on a URI/Host hash of a request.
-#     Requests are still distributed uniformely, but a request with the same
-#     URI/Host is always sent to the same server.
-#
-# Note that HTTP scheduler dispatches message among server groups only and
-# round-robin or hash scheduler must be used to select a server in a group.
-#
 # IPADDR[:PORT] is the IPv4 or IPv6 address of the server (see: server).
 # conns_n=N is the number of parallel connections to the server (see: server).
 #
 # Examples:
-#   srv_group static_storage sched=hash {
+#   srv_group static_storage {
 #       server 10.10.0.1:8080;
 #       server 10.10.0.2:8080;
 #       server [fc00::3]:8081 conns_n=1;
+#       sched hash;
 #   }
 #
 # Default:
@@ -317,7 +409,7 @@
 #
 # <OP> is a match operator, one of "eq", "prefix", "suffix", or "*".
 # <string> is a verbatim string matched against URL in a request.
-# <directive> is one of "cache_bypass", "cache_fulfill".
+# <directive> is one of "cache_bypass", "cache_fulfill", "nonidempotent".
 #
 # Default:
 #   None.

From 328ff608dc2589efc7e712823ebad71a54891adc Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Wed, 15 Feb 2017 16:50:24 +0300
Subject: [PATCH 56/65] Evict timed out requests from an inactive server
 connection.

When a server connection fails, attempts are made to reconnect with
the server. While these attempts are made, a number of requests may
sit in the connection's queue. Susally they are re-forwarded to the
server when the connection is restored. If the connection is not
restored for a long time then requests in the queue eventually time
out and are evicted. When the limit on the number of reconnect
attempts is reached, then the connection is marked faulty, and
requests in the queue are rescheduled to other, live servers and/or
connections.

Also, better consistency in function names across http.c.
---
 README.md                          | 10 ++--
 etc/tempesta_fw.conf               | 10 ++--
 tempesta_fw/connection.h           | 11 +++++
 tempesta_fw/http.c                 | 78 +++++++++++++++++++-----------
 tempesta_fw/sched/tfw_sched_hash.c |  2 +-
 tempesta_fw/sched/tfw_sched_rr.c   |  2 +-
 tempesta_fw/server.h               | 15 +++++-
 tempesta_fw/sock_srv.c             | 66 ++++++++-----------------
 8 files changed, 108 insertions(+), 86 deletions(-)

diff --git a/README.md b/README.md
index 7899803c8..3d079faee 100644
--- a/README.md
+++ b/README.md
@@ -404,10 +404,10 @@ server [fc00::1]:80;
 
 if a connection with a server is terminated for any reason, an effort is made
 to restore the connection. Sometimes the effort is futile. The directive
-`connect_tries` sets the maximum number of re-connect attempts after which
-the server connection is considered dead. It is defined as follows:
+`server_connect_retries` sets the maximum number of re-connect attempts after
+which the server connection is considered dead. It is defined as follows:
 ```
-connect_tries <N>;
+server_connect_retries <N>;
 ```
 If this directive is not defined, then the number of re-connect attempts
 defaults to 10. A value of zero specified for `N` means unlimited number
@@ -439,7 +439,7 @@ a special consideration is given to non-idempotent requests. Usually
 a non-idempotent request is not re-forwarded or re-scheduled. That may be
 changed with the following directive that doesn't have arguments:
 ```
-server_retry_non_idempotent;
+server_retry_nonidempotent;
 ```
 
 Each server connection has a queue of forwarded requests. The size of the
@@ -483,7 +483,7 @@ srv_group static_storage {
 	server [fc00::3]:8081 conns_n=1;
 	server_queue_size 500;
 	server_forward_timeout 30;
-	connect_tries 15;
+	server_connect_retries 15;
 }
 ```
 
diff --git a/etc/tempesta_fw.conf b/etc/tempesta_fw.conf
index 49f6e0503..681021292 100644
--- a/etc/tempesta_fw.conf
+++ b/etc/tempesta_fw.conf
@@ -79,12 +79,12 @@
 #
 
 #
-# TAG: connect_tries
+# TAG: server_connect_retries
 #
 # Defines the maximum number of attempts to reconnect with a server.
 #
 # Syntax:
-#   connect_tries NUM;
+#   server_connect_retries NUM;
 #
 # If a connection with a server is not re-established after the specified
 # number of attempts, then the connection is considered dead. Outstanding
@@ -92,7 +92,7 @@
 # connections.
 #
 # Default:
-#   connect_tries 10;
+#   server_connect_retries 10;
 #
 
 #
@@ -121,12 +121,12 @@
 #
 
 #
-# TAG: server_retry_non_idempotent
+# TAG: server_retry_nonidempotent
 #
 # Defines that non-idempotent requests should be re-forwarded.
 #
 # Syntax:
-#   server_retry_non_idempotent;
+#   server_retry_nonidempotent;
 #
 # Default:
 #   Do not re-forward non-idempotent requests.
diff --git a/tempesta_fw/connection.h b/tempesta_fw/connection.h
index 1ccc19912..07101f2f4 100644
--- a/tempesta_fw/connection.h
+++ b/tempesta_fw/connection.h
@@ -180,11 +180,13 @@ enum {
 	TFW_CONN_B_RESEND = 0,	/* Need to re-send requests. */
 	TFW_CONN_B_QFORWD,	/* Need to forward requests in the queue. */
 	TFW_CONN_B_HASNIP,	/* Has non-idempotent requests. */
+	TFW_CONN_B_FAULTY,	/* Reconnects failed, need to re-schedule. */
 };
 
 #define TFW_CONN_F_RESEND	(1 << TFW_CONN_B_RESEND)
 #define TFW_CONN_F_QFORWD	(1 << TFW_CONN_B_QFORWD)
 #define TFW_CONN_F_HASNIP	(1 << TFW_CONN_B_HASNIP)
+#define TFW_CONN_F_FAULTY	(1 << TFW_CONN_B_FAULTY)
 
 /**
  * TLS hardened connection.
@@ -270,6 +272,15 @@ tfw_srv_conn_hasnip(TfwSrvConn *srv_conn)
 	return test_bit(TFW_CONN_B_HASNIP, &srv_conn->flags);
 }
 
+/*
+ * Tell if all attempts to re-connect had failed.
+ */
+static inline bool
+tfw_srv_conn_faulty(TfwSrvConn *srv_conn)
+{
+	return test_bit(TFW_CONN_B_FAULTY, &srv_conn->flags);
+}
+
 static inline bool
 tfw_connection_live(TfwConn *conn)
 {
diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index bd3072a5f..9dbc20de2 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -607,8 +607,8 @@ tfw_http_req_fwd_send(TfwSrvConn *srv_conn, TfwServer *srv,
  * Return false if forwarding must be stopped, or true otherwise.
  */
 static inline bool
-__tfw_http_req_fwd_single(TfwSrvConn *srv_conn, TfwServer *srv,
-			  TfwHttpReq *req, struct list_head *equeue)
+tfw_http_req_fwd_single(TfwSrvConn *srv_conn, TfwServer *srv,
+			TfwHttpReq *req, struct list_head *equeue)
 {
 	if (tfw_http_req_evict_timeout(srv_conn, srv, req, equeue))
 		return false;
@@ -626,7 +626,7 @@ __tfw_http_req_fwd_single(TfwSrvConn *srv_conn, TfwServer *srv,
  * NOT drained.
  */
 static void
-__tfw_http_req_fwd_unsent(TfwSrvConn *srv_conn, struct list_head *equeue)
+__tfw_http_conn_fwd_unsent(TfwSrvConn *srv_conn, struct list_head *equeue)
 {
 	TfwHttpReq *req, *tmp;
 	TfwServer *srv = (TfwServer *)srv_conn->peer;
@@ -642,7 +642,7 @@ __tfw_http_req_fwd_unsent(TfwSrvConn *srv_conn, struct list_head *equeue)
 
 	/* A frequent case: there's just one request in the queue. */
 	if (likely(list_is_singular(fwd_queue))) {
-		__tfw_http_req_fwd_single(srv_conn, srv, req, equeue);
+		tfw_http_req_fwd_single(srv_conn, srv, req, equeue);
 		/* See if the idempotent request was non-idempotent. */
 		tfw_http_req_nip_delist(srv_conn, req);
 		return;
@@ -654,7 +654,7 @@ __tfw_http_req_fwd_unsent(TfwSrvConn *srv_conn, struct list_head *equeue)
 	 * pending requests.
 	 */
 	list_for_each_entry_safe_from(req, tmp, fwd_queue, fwd_list) {
-		if (!__tfw_http_req_fwd_single(srv_conn, srv, req, equeue))
+		if (!tfw_http_req_fwd_single(srv_conn, srv, req, equeue))
 			continue;
 		/* Stop forwarding if the request is non-idempotent. */
 		if (tfw_http_req_is_nip(req))
@@ -669,12 +669,12 @@ __tfw_http_req_fwd_unsent(TfwSrvConn *srv_conn, struct list_head *equeue)
  * It's assumed that the forwarding queue in @srv_conn is locked.
  */
 static inline void
-tfw_http_req_fwd_unsent(TfwSrvConn *srv_conn, struct list_head *equeue)
+tfw_http_conn_fwd_unsent(TfwSrvConn *srv_conn, struct list_head *equeue)
 {
 	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
 
 	if (tfw_http_conn_need_fwd(srv_conn))
-		__tfw_http_req_fwd_unsent(srv_conn, equeue);
+		__tfw_http_conn_fwd_unsent(srv_conn, equeue);
 }
 
 /*
@@ -716,7 +716,7 @@ tfw_http_req_fwd(TfwSrvConn *srv_conn, TfwHttpReq *req)
 		spin_unlock(&srv_conn->fwd_qlock);
 		return;
 	}
-	__tfw_http_req_fwd_unsent(srv_conn, &equeue);
+	__tfw_http_conn_fwd_unsent(srv_conn, &equeue);
 	spin_unlock(&srv_conn->fwd_qlock);
 
 	if (!list_empty(&equeue))
@@ -738,7 +738,7 @@ tfw_http_req_fwd(TfwSrvConn *srv_conn, TfwHttpReq *req)
  * Note: @srv_conn->msg_sent may change in result.
  */
 static inline void
-tfw_http_req_fwd_treatnip(TfwSrvConn *srv_conn, struct list_head *equeue)
+tfw_http_conn_treatnip(TfwSrvConn *srv_conn, struct list_head *equeue)
 {
 	TfwServer *srv = (TfwServer *)srv_conn->peer;
 	TfwHttpReq *req_sent = (TfwHttpReq *)srv_conn->msg_sent;
@@ -760,7 +760,7 @@ tfw_http_req_fwd_treatnip(TfwSrvConn *srv_conn, struct list_head *equeue)
  * the set limits are evicted.
  */
 static TfwHttpReq *
-tfw_http_req_resend(TfwSrvConn *srv_conn, bool first, struct list_head *equeue)
+tfw_http_conn_resend(TfwSrvConn *srv_conn, bool first, struct list_head *equeue)
 {
 	TfwHttpReq *req, *tmp, *req_resent = NULL;
 	TfwServer *srv = (TfwServer *)srv_conn->peer;
@@ -797,18 +797,18 @@ tfw_http_req_resend(TfwSrvConn *srv_conn, bool first, struct list_head *equeue)
  * Re-send only the first unanswered request in the forwarding queue.
  */
 static inline TfwHttpReq *
-tfw_http_req_resend_first(TfwSrvConn *srv_conn, struct list_head *equeue)
+tfw_http_conn_resend_first(TfwSrvConn *srv_conn, struct list_head *equeue)
 {
-	return tfw_http_req_resend(srv_conn, true, equeue);
+	return tfw_http_conn_resend(srv_conn, true, equeue);
 }
 
 /*
  * Re-send all unanswered requests in the forwarding queue.
  */
 static inline TfwHttpReq *
-tfw_http_req_resend_all(TfwSrvConn *srv_conn, struct list_head *equeue)
+tfw_http_conn_resend_all(TfwSrvConn *srv_conn, struct list_head *equeue)
 {
-	return tfw_http_req_resend(srv_conn, false, equeue);
+	return tfw_http_conn_resend(srv_conn, false, equeue);
 }
 
 /*
@@ -817,7 +817,7 @@ tfw_http_req_resend_all(TfwSrvConn *srv_conn, struct list_head *equeue)
  * The connection is not scheduled until all requests in it are re-sent.
  */
 static void
-__tfw_http_req_fwd_repair(TfwSrvConn *srv_conn, struct list_head *equeue)
+__tfw_http_conn_fwd_repair(TfwSrvConn *srv_conn, struct list_head *equeue)
 {
 	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
 	WARN_ON(!spin_is_locked(&srv_conn->fwd_qlock));
@@ -827,7 +827,7 @@ __tfw_http_req_fwd_repair(TfwSrvConn *srv_conn, struct list_head *equeue)
 		clear_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
 		clear_bit(TFW_CONN_B_RESEND, &srv_conn->flags);
 	} else if (test_bit(TFW_CONN_B_QFORWD, &srv_conn->flags)) {
-		tfw_http_req_fwd_unsent(srv_conn, equeue);
+		tfw_http_conn_fwd_unsent(srv_conn, equeue);
 	} else {
 		/*
 		 * After all previously forwarded requests are re-sent,
@@ -840,12 +840,12 @@ __tfw_http_req_fwd_repair(TfwSrvConn *srv_conn, struct list_head *equeue)
 		 */
 		TfwHttpReq *req_resent = (TfwHttpReq *)srv_conn->msg_sent;
 		if (req_resent) {
-			req_resent = tfw_http_req_resend_all(srv_conn, equeue);
+			req_resent = tfw_http_conn_resend_all(srv_conn, equeue);
 			srv_conn->msg_sent = (TfwMsg *)req_resent;
 		}
 		if (!(req_resent && tfw_http_req_is_nip(req_resent))) {
 			set_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
-			tfw_http_req_fwd_unsent(srv_conn, equeue);
+			tfw_http_conn_fwd_unsent(srv_conn, equeue);
 		}
 	}
 }
@@ -869,7 +869,7 @@ __tfw_http_req_fwd_repair(TfwSrvConn *srv_conn, struct list_head *equeue)
  * with minimal effort.
  */
 static void
-tfw_http_req_resched(TfwSrvConn *srv_conn, struct list_head *equeue)
+tfw_http_conn_resched(TfwSrvConn *srv_conn, struct list_head *equeue)
 {
 	TfwHttpReq *req, *tmp;
 	TfwSrvConn *sch_conn;
@@ -879,7 +879,7 @@ tfw_http_req_resched(TfwSrvConn *srv_conn, struct list_head *equeue)
 	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
 
 	/* Treat a non-idempotent request if any. */
-	tfw_http_req_fwd_treatnip(srv_conn, equeue);
+	tfw_http_conn_treatnip(srv_conn, equeue);
 
 	/* Process complete queue. */
 	list_for_each_entry_safe(req, tmp, fwd_queue, fwd_list) {
@@ -897,6 +897,20 @@ tfw_http_req_resched(TfwSrvConn *srv_conn, struct list_head *equeue)
 	BUG_ON(srv_conn->qsize);
 }
 
+static inline void
+tfw_http_conn_evict_timeout(TfwSrvConn *srv_conn, struct list_head *equeue)
+{
+	TfwHttpReq *req, *tmp;
+	TfwServer *srv = (TfwServer *)srv_conn->peer;
+	struct list_head *fwd_queue = &srv_conn->fwd_queue;
+
+	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
+
+	/* Process complete queue and evict requests that timed out. */
+	list_for_each_entry_safe(req, tmp, fwd_queue, fwd_list)
+		tfw_http_req_evict_timeout(srv_conn, srv, req, equeue);
+}
+
 /*
  * Repair a connection. Makes sense only for server connections.
  *
@@ -905,10 +919,13 @@ tfw_http_req_resched(TfwSrvConn *srv_conn, struct list_head *equeue)
  * 6.3.2, "a client MUST NOT pipeline immediately after connection
  * establishment". To address that, re-send the first request to the
  * server. When a response comes, that will trigger resending of the
- * rest of those unanswered requests (__tfw_http_req_fwd_repair()).
+ * rest of those unanswered requests (__tfw_http_conn_fwd_repair()).
  *
  * The connection is not scheduled until all requests in it are re-sent.
  *
+ * The limit on the number of reconnect attempts is used to re-schedule
+ * requests that would never be forwarded otherwise.
+ *
  * No need to take a reference on the server connection here as this
  * is executed as part of establishing the connection. It definitely
  * can't go away.
@@ -927,7 +944,13 @@ tfw_http_conn_repair(TfwConn *conn)
 	if (unlikely(!tfw_srv_conn_live(srv_conn))) {
 		if (list_empty(&srv_conn->fwd_queue))
 			return;
-		tfw_http_req_resched(srv_conn, &equeue);
+		tfw_http_conn_evict_timeout(srv_conn, &equeue);
+		if (test_bit(TFW_CONN_B_FAULTY, &srv_conn->flags)) {
+			tfw_http_conn_resched(srv_conn, &equeue);
+		} else if (unlikely(tfw_srv_conn_need_resched(srv_conn))) {
+			set_bit(TFW_CONN_B_FAULTY, &srv_conn->flags);
+			tfw_http_conn_resched(srv_conn, &equeue);
+		}
 		goto zap_error;
 	}
 
@@ -935,17 +958,17 @@ tfw_http_conn_repair(TfwConn *conn)
 
 	spin_lock(&srv_conn->fwd_qlock);
 	/* Treat a non-idempotent request if any. */
-	tfw_http_req_fwd_treatnip(srv_conn, &equeue);
+	tfw_http_conn_treatnip(srv_conn, &equeue);
 	/* Re-send the first unanswered request. */
 	if (srv_conn->msg_sent) {
-		req_resent = tfw_http_req_resend_first(srv_conn, &equeue);
+		req_resent = tfw_http_conn_resend_first(srv_conn, &equeue);
 		if (unlikely(!req_resent))
 			srv_conn->msg_sent = NULL;
 	}
 	/* If none re-sent, then send the remaining unsent requests. */
 	if (!req_resent) {
 		set_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
-		tfw_http_req_fwd_unsent(srv_conn, &equeue);
+		tfw_http_conn_fwd_unsent(srv_conn, &equeue);
 	}
 	spin_unlock(&srv_conn->fwd_qlock);
 zap_error:
@@ -1054,6 +1077,7 @@ tfw_http_conn_init(TfwConn *conn)
 		TfwSrvConn *srv_conn = (TfwSrvConn *)conn;
 		if (!list_empty(&srv_conn->fwd_queue))
 			set_bit(TFW_CONN_B_RESEND, &srv_conn->flags);
+		clear_bit(TFW_CONN_B_FAULTY, &srv_conn->flags);
 	}
 	tfw_gfsm_state_init(&conn->state, conn, TFW_HTTP_FSM_INIT);
 	return 0;
@@ -2050,9 +2074,9 @@ tfw_http_popreq(TfwHttpMsg *hmresp)
 	 * additional reference.
 	 */
 	if (unlikely(tfw_srv_conn_restricted(srv_conn)))
-		__tfw_http_req_fwd_repair(srv_conn, &equeue);
+		__tfw_http_conn_fwd_repair(srv_conn, &equeue);
 	else if (tfw_http_conn_need_fwd(srv_conn))
-		__tfw_http_req_fwd_unsent(srv_conn, &equeue);
+		__tfw_http_conn_fwd_unsent(srv_conn, &equeue);
 	spin_unlock(&srv_conn->fwd_qlock);
 
 	if (!list_empty(&equeue))
diff --git a/tempesta_fw/sched/tfw_sched_hash.c b/tempesta_fw/sched/tfw_sched_hash.c
index 88e6fbb17..c800f9b87 100644
--- a/tempesta_fw/sched/tfw_sched_hash.c
+++ b/tempesta_fw/sched/tfw_sched_hash.c
@@ -146,7 +146,7 @@ tfw_sched_hash_get_srv_conn(TfwMsg *msg, TfwSrvGroup *sg)
 	for (tries = 0; tries < __HLIST_SZ(TFW_SG_MAX_CONN); ++tries) {
 		for (ch = sg->sched_data; ch->srv_conn; ++ch) {
 			if (unlikely(tfw_srv_conn_restricted(ch->srv_conn)
-				     || tfw_server_queue_full(ch->srv_conn)
+				     || tfw_srv_conn_queue_full(ch->srv_conn)
 				     || !tfw_srv_conn_live(ch->srv_conn)))
 				continue;
 			curr_weight = msg_hash ^ ch->hash;
diff --git a/tempesta_fw/sched/tfw_sched_rr.c b/tempesta_fw/sched/tfw_sched_rr.c
index d3e2433f1..e8de7b712 100644
--- a/tempesta_fw/sched/tfw_sched_rr.c
+++ b/tempesta_fw/sched/tfw_sched_rr.c
@@ -136,7 +136,7 @@ tfw_sched_rr_get_srv_conn(TfwMsg *msg, TfwSrvGroup *sg)
 			idxval = atomic64_inc_return(&srv_cl->rr_counter);
 			srv_conn = srv_cl->conns[idxval % srv_cl->conn_n];
 			if (unlikely(tfw_srv_conn_restricted(srv_conn)
-				     || tfw_server_queue_full(srv_conn)))
+				     || tfw_srv_conn_queue_full(srv_conn)))
 				continue;
 			if (skipnip && tfw_srv_conn_hasnip(srv_conn)) {
 				if (likely(tfw_srv_conn_live(srv_conn)))
diff --git a/tempesta_fw/server.h b/tempesta_fw/server.h
index 22d1e11b5..2d673724a 100644
--- a/tempesta_fw/server.h
+++ b/tempesta_fw/server.h
@@ -125,12 +125,25 @@ void tfw_server_destroy(TfwServer *srv);
 void tfw_srv_conn_release(TfwSrvConn *srv_conn);
 
 static inline bool
-tfw_server_queue_full(TfwSrvConn *srv_conn)
+tfw_srv_conn_queue_full(TfwSrvConn *srv_conn)
 {
 	TfwSrvGroup *sg = ((TfwServer *)srv_conn->peer)->sg;
 	return ACCESS_ONCE(srv_conn->qsize) >= sg->max_qsize;
 }
 
+/*
+ * max_recns can be the maximum value for the data type to mean
+ * the unlimited number of attempts, which is the value that should
+ * never be reached. UINT_MAX seconds is more than 136 years. It's
+ * safe to assume that it's not reached in a single run of Tempesta.
+ */
+static inline bool
+tfw_srv_conn_need_resched(TfwSrvConn *srv_conn)
+{       
+	TfwSrvGroup *sg = ((TfwServer *)srv_conn->peer)->sg;
+	return (srv_conn->recns == sg->max_recns);
+}
+
 /* Server group routines. */
 TfwSrvGroup *tfw_sg_lookup(const char *name);
 TfwSrvGroup *tfw_sg_new(const char *name, gfp_t flags);
diff --git a/tempesta_fw/sock_srv.c b/tempesta_fw/sock_srv.c
index 48a7984fe..4414ae7d1 100644
--- a/tempesta_fw/sock_srv.c
+++ b/tempesta_fw/sock_srv.c
@@ -188,50 +188,21 @@ tfw_sock_srv_connect_try(TfwSrvConn *srv_conn)
 	return 0;
 }
 
-/*
- * @max_recns can be the maximum value for the data type to mean
- * the unlimited number of attempts, which is the value that should
- * never be reached. UINT_MAX seconds is more than 136 years. It's
- * safe to assume that it's not reached in a single run of Tempesta.
- *
- * The limit on the number of reconnect attempts is used to re-schedule
- * requests that would never be forwarded otherwise. Then, the attempts
- * to reconnect are continued in anticipation that the connection will
- * be re-established sooner or later. Otherwise the connection would
- * stay dead until Tempesta is restarted.
- */
 static inline void
 tfw_sock_srv_connect_try_later(TfwSrvConn *srv_conn)
 {
-	TfwSrvGroup *sg = ((TfwServer *)srv_conn->peer)->sg;
 	unsigned long timeout;
 
 	/* Don't rearm reconnection timer if we're about to shutdown. */
 	if (unlikely(!ss_active()))
 		return;
 
-	/*
-	 * max_attempts can be the maximum value for the data type to mean
-	 * the unlimited number of attempts, which is the value that should
-	 * never be reached. UINT_MAX seconds is more than 136 years. It's
-	 * safe to assume that it's not reached in a single run of Tempesta.
-	 */
-	if (unlikely(srv_conn->recns == sg->max_recns)) {
-		TfwAddr *srv_addr = &srv_conn->peer->addr;
-		char s_addr[TFW_ADDR_STR_BUF_SIZE] = { 0 };
-		tfw_addr_ntop(srv_addr, s_addr, sizeof(s_addr));
-		TFW_WARN("The limit of [%d] on reconnect attempts exceeded. "
-			 "The server connection [%s] is down.\n",
-			 sg->max_recns, s_addr);
-		tfw_connection_repair((TfwConn *)srv_conn);
-	}
 	if (srv_conn->recns < ARRAY_SIZE(tfw_srv_tmo_vals)) {
-		timeout = tfw_srv_tmo_vals[srv_conn->recns];
 		if (srv_conn->recns)
 			TFW_DBG_ADDR("Cannot establish connection",
 				     &srv_conn->peer->addr);
+		timeout = tfw_srv_tmo_vals[srv_conn->recns];
 	} else {
-		timeout = tfw_srv_tmo_vals[ARRAY_SIZE(tfw_srv_tmo_vals) - 1];
 		if (srv_conn->recns == ARRAY_SIZE(tfw_srv_tmo_vals)
 		    || !(srv_conn->recns % 60))
 		{
@@ -242,6 +213,9 @@ tfw_sock_srv_connect_try_later(TfwSrvConn *srv_conn)
 				 " tries, keep trying...\n",
 				 addr_str, srv_conn->recns);
 		}
+
+		tfw_connection_repair((TfwConn *)srv_conn);
+		timeout = tfw_srv_tmo_vals[ARRAY_SIZE(tfw_srv_tmo_vals) - 1];
 	}
 	srv_conn->recns++;
 
@@ -542,8 +516,8 @@ tfw_sock_srv_delete_all_conns(void)
 #define TFW_CFG_SRV_QUEUE_SIZE_DEF	1000	/* Max queue size */
 #define TFW_CFG_SRV_FWD_TIMEOUT_DEF	60	/* Default request timeout */
 #define TFW_CFG_SRV_FWD_RETRIES_DEF	5	/* Default number of tries */
+#define TFW_CFG_SRV_CNS_RETRIES_DEF	10	/* Reconnect tries. */
 #define TFW_CFG_SRV_RETRY_NIP_DEF	0	/* Do NOT resend NIP reqs */
-#define TFW_CFG_SRV_RETRY_ATTEMPTS_DEF	10	/* Reconnect attempts. */
 
 static TfwServer *tfw_cfg_in_slst[TFW_SG_MAX_SRV];
 static TfwServer *tfw_cfg_out_slst[TFW_SG_MAX_SRV];
@@ -556,14 +530,14 @@ static TfwSrvGroup *tfw_cfg_in_sg, *tfw_cfg_out_sg;
 static int tfw_cfg_in_queue_size = TFW_CFG_SRV_QUEUE_SIZE_DEF;
 static int tfw_cfg_in_fwd_timeout = TFW_CFG_SRV_FWD_TIMEOUT_DEF;
 static int tfw_cfg_in_fwd_retries = TFW_CFG_SRV_FWD_RETRIES_DEF;
+static int tfw_cfg_in_cns_retries = TFW_CFG_SRV_CNS_RETRIES_DEF;
 static int tfw_cfg_in_retry_nip = TFW_CFG_SRV_RETRY_NIP_DEF;
-static int tfw_cfg_in_retry_attempts = TFW_CFG_SRV_RETRY_ATTEMPTS_DEF;
 
 static int tfw_cfg_out_queue_size = TFW_CFG_SRV_QUEUE_SIZE_DEF;
 static int tfw_cfg_out_fwd_timeout = TFW_CFG_SRV_FWD_TIMEOUT_DEF;
 static int tfw_cfg_out_fwd_retries = TFW_CFG_SRV_FWD_RETRIES_DEF;
+static int tfw_cfg_out_cns_retries = TFW_CFG_SRV_CNS_RETRIES_DEF;
 static int tfw_cfg_out_retry_nip = TFW_CFG_SRV_RETRY_NIP_DEF;
-static int tfw_cfg_out_retry_attempts = TFW_CFG_SRV_RETRY_ATTEMPTS_DEF;
 
 static int
 tfw_cfgop_intval(TfwCfgSpec *cs, TfwCfgEntry *ce, int *intval)
@@ -646,19 +620,19 @@ tfw_cfgop_out_retry_nip(TfwCfgSpec *cs, TfwCfgEntry *ce)
 }
 
 static int
-tfw_cfgop_in_conn_tries(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_cfgop_in_conn_retries(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
-	return tfw_cfgop_intval(cs, ce, &tfw_cfg_in_retry_attempts);
+	return tfw_cfgop_intval(cs, ce, &tfw_cfg_in_cns_retries);
 }
 
 static int
-tfw_cfgop_out_conn_tries(TfwCfgSpec *cs, TfwCfgEntry *ce)
+tfw_cfgop_out_conn_retries(TfwCfgSpec *cs, TfwCfgEntry *ce)
 {
-	return tfw_cfgop_intval(cs, ce, &tfw_cfg_out_retry_attempts);
+	return tfw_cfgop_intval(cs, ce, &tfw_cfg_out_cns_retries);
 }
 
 static int
-tfw_cfgop_set_conn_tries(TfwSrvGroup *sg, int recns)
+tfw_cfgop_set_conn_retries(TfwSrvGroup *sg, int recns)
 {
 	if (!recns) {
 		sg->max_recns = UINT_MAX;
@@ -855,10 +829,10 @@ tfw_cfgop_begin_srv_group(TfwCfgSpec *cs, TfwCfgEntry *ce)
 
         tfw_cfg_in_slstsz = 0;
         tfw_cfg_in_sched = tfw_cfg_out_sched;
-	tfw_cfg_in_retry_attempts = tfw_cfg_out_retry_attempts;
 	tfw_cfg_in_queue_size = tfw_cfg_out_queue_size;
 	tfw_cfg_in_fwd_timeout = tfw_cfg_out_fwd_timeout;
 	tfw_cfg_in_fwd_retries = tfw_cfg_out_fwd_retries;
+	tfw_cfg_in_cns_retries = tfw_cfg_out_cns_retries;
 	tfw_cfg_in_retry_nip = tfw_cfg_out_retry_nip;
 
 	return 0;
@@ -885,7 +859,7 @@ tfw_cfgop_finish_srv_group(TfwCfgSpec *cs)
 	BUG_ON(!tfw_cfg_in_sched);
 	TFW_DBG("finish srv_group: %s\n", sg->name);
 
-	tfw_cfgop_set_conn_tries(sg, tfw_cfg_in_retry_attempts);
+	tfw_cfgop_set_conn_retries(sg, tfw_cfg_in_cns_retries);
 	sg->max_qsize = tfw_cfg_in_queue_size ? : UINT_MAX;
 	sg->max_jqage = tfw_cfg_in_fwd_timeout
 		      ? msecs_to_jiffies(tfw_cfg_in_fwd_timeout * 1000)
@@ -979,7 +953,7 @@ tfw_sock_srv_start(void)
 	if (sg) {
 		BUG_ON(!tfw_cfg_out_sched);
 
-		tfw_cfgop_set_conn_tries(sg, tfw_cfg_out_retry_attempts);
+		tfw_cfgop_set_conn_retries(sg, tfw_cfg_out_cns_retries);
 		sg->max_qsize = tfw_cfg_out_queue_size ? : UINT_MAX;
 		sg->max_jqage = tfw_cfg_out_fwd_timeout
 			      ? msecs_to_jiffies(tfw_cfg_out_fwd_timeout * 1000)
@@ -1058,15 +1032,15 @@ static TfwCfgSpec tfw_srv_group_specs[] = {
 		.cleanup = tfw_clean_srv_groups,
 	},
 	{
-		"server_retry_non_idempotent", NULL,
+		"server_retry_nonidempotent", NULL,
 		tfw_cfgop_in_retry_nip,
 		.allow_none = true,
 		.allow_repeat = false,
 		.cleanup = tfw_clean_srv_groups,
 	},
 	{
-		"connect_tries", NULL,
-		tfw_cfgop_in_conn_tries,
+		"server_connect_retries", NULL,
+		tfw_cfgop_in_conn_retries,
 		.allow_none = true,
 		.allow_repeat = false,
 		.cleanup = tfw_clean_srv_groups,
@@ -1122,8 +1096,8 @@ TfwCfgMod tfw_sock_srv_cfg_mod = {
 			.cleanup = tfw_clean_srv_groups,
 		},
 		{
-			"connect_tries", NULL,
-			tfw_cfgop_out_conn_tries,
+			"server_connect_retries", NULL,
+			tfw_cfgop_out_conn_retries,
 			.allow_none = true,
 			.allow_repeat = true,
 			.cleanup = tfw_clean_srv_groups,

From 77d1d13a6d383ea1488078769f5b30f209d0f724 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Fri, 17 Feb 2017 20:13:49 +0300
Subject: [PATCH 57/65] Correctly release server connection's resources at STOP
 time.

---
 tempesta_fw/http.c     | 15 +++++++++++----
 tempesta_fw/sock_srv.c | 26 ++++++++++++++++++++++++--
 2 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 9dbc20de2..cad818152 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -1085,21 +1085,28 @@ tfw_http_conn_init(TfwConn *conn)
 
 /*
  * Release server connection's resources.
- *
  * Drop and free the requests in server connection's @fwd_queue.
- * Called only when Tempesta is stopped.
+ *
+ * This function is called only when connection is completely destroyed.
+ * Depending on Tempesta's state, both user and kernel context threads
+ * may try to do that at the same time. As @fwd_queue is moved atomically
+ * to local @zap_queue, only one thread is able to proceed and release
+ * the resources.
  */
 static void
 tfw_http_conn_srv_release(TfwSrvConn *srv_conn)
 {
 	TfwHttpReq *req, *tmp;
-	struct list_head *fwd_queue = &srv_conn->fwd_queue;
 	LIST_HEAD(zap_queue);
 
 	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
 	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
 
-	list_for_each_entry_safe(req, tmp, fwd_queue, fwd_list) {
+	spin_lock(&srv_conn->fwd_qlock);
+	list_splice_tail_init(&srv_conn->fwd_queue, &zap_queue);
+	spin_unlock(&srv_conn->fwd_qlock);
+
+	list_for_each_entry_safe(req, tmp, &zap_queue, fwd_list) {
 		tfw_http_req_delist(srv_conn, req);
 		if (unlikely(!list_empty_careful(&req->msg.seq_list))) {
 			spin_lock(&((TfwCliConn *)req->conn)->seq_qlock);
diff --git a/tempesta_fw/sock_srv.c b/tempesta_fw/sock_srv.c
index 4414ae7d1..6ec64fffa 100644
--- a/tempesta_fw/sock_srv.c
+++ b/tempesta_fw/sock_srv.c
@@ -359,15 +359,37 @@ static const SsHooks tfw_sock_srv_ss_hooks = {
 /**
  * Close a server connection, or stop connection attempts if a connection
  * is not established. This is called only in user context at STOP time.
+ *
+ * There are two corner cases. In both cases calling ss_close_sync() won't
+ * cause any effect as the connection is closed already. Instead, just free
+ * the connection's resources directly.
+ * 1. A connection has just been closed by the other side. A reconnect is
+ *    prevented by stopping the timer. Yet the connection may have unfreed
+ *    resources as closing was done as part of failover.
+ * 2. A connection is being closed by the other side just as Tempesta is
+ *    moved to STOP state. Both threads may call tfw_connection_release()
+ *    at the same time. See the implementation of the underlying function
+ *    tfw_srv_conn_release().
  */
 static int
 tfw_sock_srv_disconnect(TfwConn *conn)
 {
+	int ret = 0;
+
 	/* Prevent races with timer callbacks. */
 	del_timer_sync(&conn->timer);
 
-	/* Use synchronous closing to ensure that the job is enqueued. */
-	return ss_close_sync(conn->sk, true);
+	/*
+	 * If the connection is closed already, then simply release its
+	 * resources. Otherwise, use synchronous closing to ensure that
+	 * the job is enqueued.
+	 */
+	if (atomic_read(&conn->refcnt) == TFW_CONN_DEATHCNT)
+		tfw_connection_release(conn);
+	else
+		ret = ss_close_sync(conn->sk, true);
+
+	return ret;
 }
 
 /*

From 9db23ed4b2e67c595b53a9cf8a679da3ec372a7c Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Mon, 20 Feb 2017 13:49:09 +0300
Subject: [PATCH 58/65] Address latest code review comments - the first batch
 of changes.

---
 README.md                             |   8 ++
 etc/tempesta_fw.conf                  |   2 +-
 tempesta_fw/addr.h                    |   2 +-
 tempesta_fw/cache.c                   |   2 +-
 tempesta_fw/cfg.c                     |   1 -
 tempesta_fw/classifier/frang.c        |   2 +-
 tempesta_fw/connection.c              |   2 +-
 tempesta_fw/connection.h              |  48 ++++--------
 tempesta_fw/http.c                    | 105 +++++++++++++-------------
 tempesta_fw/http.h                    |  10 ++-
 tempesta_fw/http_match.c              |   2 +-
 tempesta_fw/http_match.h              |   1 +
 tempesta_fw/http_msg.c                |   4 +-
 tempesta_fw/http_msg.h                |   2 +-
 tempesta_fw/http_sess.c               |   4 +-
 tempesta_fw/msg.h                     |   2 +-
 tempesta_fw/sched.c                   |   2 +-
 tempesta_fw/sched/tfw_sched_hash.c    |   2 +-
 tempesta_fw/sched/tfw_sched_http.c    |   4 +-
 tempesta_fw/sched/tfw_sched_rr.c      |   2 +-
 tempesta_fw/server.c                  |   2 +-
 tempesta_fw/server.h                  |   4 +-
 tempesta_fw/sock_srv.c                |   4 +-
 tempesta_fw/t/unit/helpers.c          |   2 +-
 tempesta_fw/t/unit/sched_helper.c     |   2 +-
 tempesta_fw/t/unit/sched_helper.h     |   2 +-
 tempesta_fw/t/unit/test_http_sticky.c |   2 +-
 tempesta_fw/t/unit/test_sched_hash.c  |   2 +-
 tempesta_fw/t/unit/test_sched_http.c  |   2 +-
 tempesta_fw/t/unit/test_sched_rr.c    |   2 +-
 tempesta_fw/tempesta_fw.h             |   4 +-
 tempesta_fw/tls.c                     |   2 +-
 tempesta_fw/vhost.c                   |   2 +-
 tempesta_fw/vhost.h                   |   2 +-
 34 files changed, 117 insertions(+), 124 deletions(-)

diff --git a/README.md b/README.md
index 3d079faee..26c9fc45c 100644
--- a/README.md
+++ b/README.md
@@ -505,6 +505,14 @@ scheduler.
 Requests are distributed uniformly, and requests with the same URI/Host are
 always sent to the same server.
 
+The round-robin scheduler is the fastest scheduler. However, the presence
+of a non-idempotent request in a connection means that subsequent requests
+may not be sent out until a response is received to the non-idempotent
+request. With that in mind, an attempt is made to put new requests to
+connections that don't currently have non-idempotent requests. If all
+connections have a non-idempotent request in them, then such a connection
+is used as there's no other choice.
+
 Only one `sched` directive is allowed per explicit or implicit group.
 A scheduler defined for the implicit group becomes the scheduler for an
 explicit group defined with `srv_group` directive if the explicit group
diff --git a/etc/tempesta_fw.conf b/etc/tempesta_fw.conf
index 681021292..cc9070d8b 100644
--- a/etc/tempesta_fw.conf
+++ b/etc/tempesta_fw.conf
@@ -5,7 +5,7 @@
 # TAG: sched.
 #
 # Specifies the scheduler used to distribute the load among servers within
-# a group. 
+# a group.
 #
 # Syntax:
 #   sched SCHED_NAME;
diff --git a/tempesta_fw/addr.h b/tempesta_fw/addr.h
index 390e48a33..3793e3dbe 100644
--- a/tempesta_fw/addr.h
+++ b/tempesta_fw/addr.h
@@ -2,7 +2,7 @@
  *		Tempesta FW
  *
  * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com).
- * Copyright (C) 2015-2016 Tempesta Technologies, Inc.
+ * Copyright (C) 2015-2017 Tempesta Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by
diff --git a/tempesta_fw/cache.c b/tempesta_fw/cache.c
index 1497cd17d..8076f9082 100644
--- a/tempesta_fw/cache.c
+++ b/tempesta_fw/cache.c
@@ -4,7 +4,7 @@
  * HTTP cache (RFC 7234).
  *
  * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com).
- * Copyright (C) 2015-2016 Tempesta Technologies, Inc.
+ * Copyright (C) 2015-2017 Tempesta Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by
diff --git a/tempesta_fw/cfg.c b/tempesta_fw/cfg.c
index ed13e0595..35c828bdb 100644
--- a/tempesta_fw/cfg.c
+++ b/tempesta_fw/cfg.c
@@ -88,7 +88,6 @@
 #include <linux/kernel.h>
 #include <linux/moduleparam.h>
 #include <linux/vmalloc.h>
-#include <net/net_namespace.h> /* for sysctl */
 
 #include "addr.h"
 #include "cfg.h"
diff --git a/tempesta_fw/classifier/frang.c b/tempesta_fw/classifier/frang.c
index c85eb6fc1..0879b9008 100644
--- a/tempesta_fw/classifier/frang.c
+++ b/tempesta_fw/classifier/frang.c
@@ -24,7 +24,7 @@
  * Or, that singular header fields may not be duplicated in an HTTP header.
  *
  * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com).
- * Copyright (C) 2015-2016 Tempesta Technologies, Inc.
+ * Copyright (C) 2015-2017 Tempesta Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by
diff --git a/tempesta_fw/connection.c b/tempesta_fw/connection.c
index c9186bc50..816a37330 100644
--- a/tempesta_fw/connection.c
+++ b/tempesta_fw/connection.c
@@ -4,7 +4,7 @@
  * Generic connection management.
  *
  * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com).
- * Copyright (C) 2015-2016 Tempesta Technologies, Inc.
+ * Copyright (C) 2015-2017 Tempesta Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by
diff --git a/tempesta_fw/connection.h b/tempesta_fw/connection.h
index 07101f2f4..a5fe0fdef 100644
--- a/tempesta_fw/connection.h
+++ b/tempesta_fw/connection.h
@@ -4,7 +4,7 @@
  * Definitions for generic connection management at OSI level 6 (presentation).
  *
  * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com).
- * Copyright (C) 2015-2016 Tempesta Technologies, Inc.
+ * Copyright (C) 2015-2017 Tempesta Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by
@@ -84,7 +84,6 @@ enum {
  * @peer	- TfwClient or TfwServer handler;
  * @sk		- an appropriate sock handler;
  * @destructor	- called when a connection is destroyed;
- * @forward	- called when a request is forwarded to server;
  */
 #define TFW_CONN_COMMON					\
 	SsProto			proto;			\
@@ -247,8 +246,8 @@ extern TfwConnHooks *conn_hooks[TFW_CONN_MAX_PROTOS];
 	tfw_conn_hook_call(TFW_CONN_TYPE2IDX(TFW_CONN_TYPE(c)), c, f)
 
 /*
- * Tell if a server connection connection is restricted. A restricted
- * server connection is not available to schedulers.
+ * Tell if a server connection is restricted. A restricted connection
+ * is not available to schedulers.
  *
  * The flag RESEND is set when a newly established server connection
  * has messages in the forwarding queue. That means that the connection
@@ -286,27 +285,17 @@ tfw_connection_live(TfwConn *conn)
 {
 	return atomic_read(&conn->refcnt) > 0;
 }
-static inline bool
-tfw_srv_conn_live(TfwSrvConn *srv_conn)
-{
-	return tfw_connection_live((TfwConn *)srv_conn);
-}
+
+#define tfw_srv_conn_live(c)	tfw_connection_live((TfwConn *)(c))
 
 static inline void
 tfw_connection_get(TfwConn *conn)
 {
 	atomic_inc(&conn->refcnt);
 }
-static inline void
-tfw_cli_conn_get(TfwCliConn *cli_conn)
-{
-	tfw_connection_get((TfwConn *)cli_conn);
-}
-static inline void
-tfw_srv_conn_get(TfwSrvConn *srv_conn)
-{
-	tfw_connection_get((TfwConn *)srv_conn);
-}
+
+#define tfw_cli_conn_get(c)	tfw_connection_get((TfwConn *)(c))
+#define tfw_srv_conn_get(c)	tfw_connection_get((TfwConn *)(c))
 
 /**
  * Increment reference counter and return true if @conn is not in
@@ -326,11 +315,9 @@ __tfw_connection_get_if_live(TfwConn *conn)
 
 	return false;
 }
-static inline bool
-tfw_srv_conn_get_if_live(TfwSrvConn *srv_conn)
-{
-	return __tfw_connection_get_if_live((TfwConn *)srv_conn);
-}
+
+#define tfw_srv_conn_get_if_live(c)	\
+	__tfw_connection_get_if_live((TfwConn *)(c))
 
 static inline void
 tfw_connection_put(TfwConn *conn)
@@ -346,16 +333,9 @@ tfw_connection_put(TfwConn *conn)
 	if (conn->destructor)
 		conn->destructor(conn);
 }
-static inline void
-tfw_cli_conn_put(TfwCliConn *cli_conn)
-{
-	tfw_connection_put((TfwConn *)cli_conn);
-}
-static inline void
-tfw_srv_conn_put(TfwSrvConn *srv_conn)
-{
-	tfw_connection_put((TfwConn *)srv_conn);
-}
+
+#define tfw_cli_conn_put(c)	tfw_connection_put((TfwConn *)(c))
+#define tfw_srv_conn_put(c)	tfw_connection_put((TfwConn *)(c))
 
 static inline void
 tfw_connection_put_to_death(TfwConn *conn)
diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index cad818152..6c3314c5c 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -2,7 +2,7 @@
  *		Tempesta FW
  *
  * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com).
- * Copyright (C) 2015-2016 Tempesta Technologies, Inc.
+ * Copyright (C) 2015-2017 Tempesta Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by
@@ -122,7 +122,7 @@ unsigned long tfw_hash_str(const TfwStr *str);
  * but it includes 'Set-Cookie:' header field that sets Tempesta sticky cookie.
  */
 int
-tfw_http_prep_302(TfwHttpMsg *hmresp, TfwHttpReq *req, TfwStr *cookie)
+tfw_http_prep_302(TfwHttpMsg *resp, TfwHttpReq *req, TfwStr *cookie)
 {
 	size_t data_len = S_302_FIXLEN;
 	int conn_flag = req->flags & __TFW_HTTP_CONN_MASK;
@@ -162,24 +162,24 @@ tfw_http_prep_302(TfwHttpMsg *hmresp, TfwHttpReq *req, TfwStr *cookie)
 	data_len += req->uri_path.len + cookie->len;
 	data_len += crlf->len;
 
-	if (tfw_http_msg_setup(hmresp, &it, data_len))
+	if (tfw_http_msg_setup(resp, &it, data_len))
 		return TFW_BLOCK;
 
 	tfw_http_prep_date(__TFW_STR_CH(&rh, 1)->ptr);
-	tfw_http_msg_write(&it, hmresp, &rh);
+	tfw_http_msg_write(&it, resp, &rh);
 	/*
 	 * HTTP/1.0 may have no host part, so we create relative URI.
 	 * See RFC 1945 9.3 and RFC 7231 7.1.2.
 	 */
 	if (host.len) {
 		static TfwStr proto = { .ptr = S_HTTP, .len = SLEN(S_HTTP) };
-		tfw_http_msg_write(&it, hmresp, &proto);
-		tfw_http_msg_write(&it, hmresp, &host);
+		tfw_http_msg_write(&it, resp, &proto);
+		tfw_http_msg_write(&it, resp, &host);
 	}
-	tfw_http_msg_write(&it, hmresp, &req->uri_path);
-	tfw_http_msg_write(&it, hmresp, &part03);
-	tfw_http_msg_write(&it, hmresp, cookie);
-	tfw_http_msg_write(&it, hmresp, crlf);
+	tfw_http_msg_write(&it, resp, &req->uri_path);
+	tfw_http_msg_write(&it, resp, &part03);
+	tfw_http_msg_write(&it, resp, cookie);
+	tfw_http_msg_write(&it, resp, crlf);
 
 	return TFW_PASS;
 }
@@ -400,7 +400,7 @@ __tfw_http_req_nip_delist(TfwSrvConn *srv_conn, TfwHttpReq *req)
 }
 
 /*
- * Put @req on the list of non-idempotent requests in @srv_conn. 
+ * Put @req on the list of non-idempotent requests in @srv_conn.
  * Raise the flag saying that @srv_conn has non-idempotent requests.
  */
 static inline void
@@ -532,16 +532,25 @@ tfw_http_req_zap_error(struct list_head *equeue)
 
 	list_for_each_entry_safe(req, tmp, equeue, fwd_list) {
 		list_del_init(&req->fwd_list);
-		if (req->rstatus == 404)
+		switch(req->rstatus) {
+		case 404:
 			tfw_http_send_404(req);
-		else if (req->rstatus == 500)
+			break;
+		case 500:
 			tfw_http_send_500(req);
-		else if (req->rstatus == 502)
+			break;
+		case 502:
 			tfw_http_send_502(req);
-		else if (req->rstatus == 504)
+			break;
+		case 504:
 			tfw_http_send_504(req);
-		else
-			BUG();
+			break;
+		default:
+			TFW_WARN("Unexpected response error code: [%d]\n",
+				 req->rstatus);
+			tfw_http_send_500(req);
+			break;
+		}
 		TFW_INC_STAT_BH(clnt.msgs_otherr);
 	}
 }
@@ -747,8 +756,12 @@ tfw_http_conn_treatnip(TfwSrvConn *srv_conn, struct list_head *equeue)
 	    && likely(!(srv->sg->flags & TFW_SRV_RETRY_NIP)))
 	{
 		BUG_ON(list_empty(&req_sent->nip_list));
+		/*
+		 * There's list_is_last() function in the Linux kernel,
+		 * but there's no list_is_first. The condition that is
+		 * checked in an implementation of list_is_first().
+		 */
 		srv_conn->msg_sent =
-			/* list_is_first(&req_sent->fwd_list, fwd_queue); */
 			(srv_conn->fwd_queue.next == &req_sent->fwd_list) ?
 			NULL : (TfwMsg *)list_prev_entry(req_sent, fwd_list);
 		tfw_http_req_move2equeue(srv_conn, req_sent, equeue, 504);
@@ -766,7 +779,7 @@ tfw_http_conn_resend(TfwSrvConn *srv_conn, bool first, struct list_head *equeue)
 	TfwServer *srv = (TfwServer *)srv_conn->peer;
 	struct list_head *end, *fwd_queue = &srv_conn->fwd_queue;
 
-	TFW_DBG2("%s: conn=[%p] one_msg=[%s]\n",
+	TFW_DBG2("%s: conn=[%p] first=[%s]\n",
 		 __func__, srv_conn, first ? "true" : "false");
 	BUG_ON(!srv_conn->msg_sent);
 	BUG_ON(list_empty(&((TfwHttpReq *)srv_conn->msg_sent)->fwd_list));
@@ -1084,24 +1097,39 @@ tfw_http_conn_init(TfwConn *conn)
 }
 
 /*
- * Release server connection's resources.
- * Drop and free the requests in server connection's @fwd_queue.
+ * Connection with a peer is released.
+ *
+ * This function is called when all users of a server connection are gone,
+ * and the connection's resources can be released.
  *
- * This function is called only when connection is completely destroyed.
+ * If a server connection is in failover state, then the requests that were
+ * sent to that server are kept in the queue until a paired response comes.
+ * The responses will never come now. Keep the queue. When the connection
+ * is restored the requests will be re-sent to the server.
+ *
+ * If a server connection is completely destroyed (on Tempesta's shutdown),
+ * then all outstanding requests in @fwd_queue are dropped and released.
  * Depending on Tempesta's state, both user and kernel context threads
  * may try to do that at the same time. As @fwd_queue is moved atomically
  * to local @zap_queue, only one thread is able to proceed and release
  * the resources.
  */
 static void
-tfw_http_conn_srv_release(TfwSrvConn *srv_conn)
+tfw_http_conn_release(TfwConn *conn)
 {
+	TfwSrvConn *srv_conn = (TfwSrvConn *)conn;
 	TfwHttpReq *req, *tmp;
 	LIST_HEAD(zap_queue);
 
 	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
 	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
 
+	if (likely(ss_active())) {
+		clear_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
+		clear_bit(TFW_CONN_B_RESEND, &srv_conn->flags);
+		return;
+	}
+
 	spin_lock(&srv_conn->fwd_qlock);
 	list_splice_tail_init(&srv_conn->fwd_queue, &zap_queue);
 	spin_unlock(&srv_conn->fwd_qlock);
@@ -1118,33 +1146,6 @@ tfw_http_conn_srv_release(TfwSrvConn *srv_conn)
 	}
 }
 
-/*
- * Connection with a peer is released.
- *
- * For server connections the requests that were sent to that server are
- * kept in the queue until a paired response comes. That will never happen
- * now. Keep the queue. When the connection is restored the requests will
- * be re-sent to the server.
- *
- * Called when a connection is released. There are no users at that time,
- * so locks are not needed.
- */
-static void
-tfw_http_conn_release(TfwConn *conn)
-{
-	TfwSrvConn *srv_conn = (TfwSrvConn *)conn;
-
-	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
-	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
-
-	if (unlikely(!ss_active())) {
-		tfw_http_conn_srv_release(srv_conn);
-		return;
-	}
-	clear_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
-	clear_bit(TFW_CONN_B_RESEND, &srv_conn->flags);
-}
-
 /*
  * Dequeue the request from @seq_queue and free the request
  * and the paired response.
@@ -1564,7 +1565,7 @@ tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
 	}
 	BUG_ON(list_empty(&req->msg.seq_list));
 	req->resp = (TfwHttpMsg *)resp;
-	/* Move consecutive requests with @req->resp to @ret_queue. */
+	/* Move consecutive requests with @req->resp to @req_retent. */
 	list_for_each_entry(req, seq_queue, msg.seq_list) {
 		if (req->resp == NULL)
 			break;
@@ -2060,9 +2061,9 @@ tfw_http_popreq(TfwHttpMsg *hmresp)
 	if (unlikely(list_empty(fwd_queue))) {
 		BUG_ON(srv_conn->qsize);
 		spin_unlock(&srv_conn->fwd_qlock);
-		/* @conn->msg will get NULLed in the process. */
 		TFW_WARN("Paired request missing, "
 			 "HTTP Response Splitting attack?\n");
+		/* @conn->msg will get NULLed in the process. */
 		tfw_http_conn_msg_free(hmresp);
 		TFW_INC_STAT_BH(serv.msgs_otherr);
 		return NULL;
diff --git a/tempesta_fw/http.h b/tempesta_fw/http.h
index 64026a236..ff9aa6084 100644
--- a/tempesta_fw/http.h
+++ b/tempesta_fw/http.h
@@ -2,7 +2,7 @@
  *		Tempesta FW
  *
  * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com).
- * Copyright (C) 2015-2016 Tempesta Technologies, Inc.
+ * Copyright (C) 2015-2017 Tempesta Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by
@@ -255,7 +255,6 @@ typedef struct {
 #define __TFW_HTTP_CONN_MASK		(TFW_HTTP_CONN_CLOSE | TFW_HTTP_CONN_KA)
 #define TFW_HTTP_CONN_EXTRA		0x000004
 #define TFW_HTTP_CHUNKED		0x000008
-#define TFW_HTTP_MSG_SENT		0x000010
 
 /* Request flags */
 #define TFW_HTTP_HAS_STICKY		0x000100
@@ -430,10 +429,15 @@ tfw_current_timestamp(void)
 	return ts.tv_sec;
 }
 
+/*
+ * SKB data is needed only for calculation of a cache key from request
+ * fields. In all other cases it can just be passed to the network layer.
+ */
 static inline void
 tfw_http_req_init_ss_flags(TfwHttpReq *req)
 {
-	((TfwMsg *)req)->ss_flags |= SS_F_KEEP_SKB;
+	if (tfw_cache_msg_cacheable(req))
+		((TfwMsg *)req)->ss_flags |= SS_F_KEEP_SKB;
 }
 
 static inline void
diff --git a/tempesta_fw/http_match.c b/tempesta_fw/http_match.c
index b917afadc..6a297bf10 100644
--- a/tempesta_fw/http_match.c
+++ b/tempesta_fw/http_match.c
@@ -44,7 +44,7 @@
  *   - Case-sensitive matching for headers when required by RFC.
  *
  * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com).
- * Copyright (C) 2015-2016 Tempesta Technologies, Inc.
+ * Copyright (C) 2015-2017 Tempesta Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by
diff --git a/tempesta_fw/http_match.h b/tempesta_fw/http_match.h
index 527906d12..19f9a7e74 100644
--- a/tempesta_fw/http_match.h
+++ b/tempesta_fw/http_match.h
@@ -2,6 +2,7 @@
  *		Tempesta FW
  *
  * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com).
+ * Copyright (C) 2015-2016 Tempesta Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by
diff --git a/tempesta_fw/http_msg.c b/tempesta_fw/http_msg.c
index 38601433c..2411945b7 100644
--- a/tempesta_fw/http_msg.c
+++ b/tempesta_fw/http_msg.c
@@ -4,7 +4,7 @@
  * HTTP message manipulation helpers for the protocol processing.
  *
  * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com).
- * Copyright (C) 2015-2016 Tempesta Technologies, Inc.
+ * Copyright (C) 2015-2017 Tempesta Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by
@@ -810,7 +810,7 @@ EXPORT_SYMBOL(tfw_http_msg_free);
 /**
  * Allocate a new error response message.
  * This type of message is not parsed or adjusted before it's sent out.
- * That allows for a short (limited) initialization.
+ * That allows for a shorter (limited) initialization.
  */
 TfwHttpMsg *
 tfw_http_msg_alloc_err_resp(void)
diff --git a/tempesta_fw/http_msg.h b/tempesta_fw/http_msg.h
index c322f2d17..3cb11f326 100644
--- a/tempesta_fw/http_msg.h
+++ b/tempesta_fw/http_msg.h
@@ -2,7 +2,7 @@
  *		Tempesta FW
  *
  * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com).
- * Copyright (C) 2015-2016 Tempesta Technologies, Inc.
+ * Copyright (C) 2015-2017 Tempesta Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by
diff --git a/tempesta_fw/http_sess.c b/tempesta_fw/http_sess.c
index c1470b571..acb5de1b8 100644
--- a/tempesta_fw/http_sess.c
+++ b/tempesta_fw/http_sess.c
@@ -623,8 +623,8 @@ tfw_http_sess_init(void)
 		return ret;
 	}
 
-	sess_cache = kmem_cache_create("tfw_sess_cache",
-				       sizeof(TfwHttpSess), 0, 0, NULL);
+	sess_cache = kmem_cache_create("tfw_sess_cache", sizeof(TfwHttpSess),
+				       0, 0, NULL);
 	if (!sess_cache) {
 		crypto_free_shash(tfw_sticky_shash);
 		return -ENOMEM;
diff --git a/tempesta_fw/msg.h b/tempesta_fw/msg.h
index ccde0f90a..40e181a62 100644
--- a/tempesta_fw/msg.h
+++ b/tempesta_fw/msg.h
@@ -4,7 +4,7 @@
  * Generic protocol message.
  *
  * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com).
- * Copyright (C) 2015 Tempesta Technologies, Inc.
+ * Copyright (C) 2015-2017 Tempesta Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by
diff --git a/tempesta_fw/sched.c b/tempesta_fw/sched.c
index dba8a6430..df13b33e1 100644
--- a/tempesta_fw/sched.c
+++ b/tempesta_fw/sched.c
@@ -4,7 +4,7 @@
  * Requst schedulers interface.
  *
  * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com).
- * Copyright (C) 2015-2016 Tempesta Technologies, Inc.
+ * Copyright (C) 2015-2017 Tempesta Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by
diff --git a/tempesta_fw/sched/tfw_sched_hash.c b/tempesta_fw/sched/tfw_sched_hash.c
index c800f9b87..c5d110f48 100644
--- a/tempesta_fw/sched/tfw_sched_hash.c
+++ b/tempesta_fw/sched/tfw_sched_hash.c
@@ -19,7 +19,7 @@
  * server unless it is offline.
  *
  * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com).
- * Copyright (C) 2015-2016 Tempesta Technologies, Inc.
+ * Copyright (C) 2015-2017 Tempesta Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by
diff --git a/tempesta_fw/sched/tfw_sched_http.c b/tempesta_fw/sched/tfw_sched_http.c
index 83ae129d0..a35ea0b4c 100644
--- a/tempesta_fw/sched/tfw_sched_http.c
+++ b/tempesta_fw/sched/tfw_sched_http.c
@@ -53,10 +53,10 @@
  * "srv_group") are handled in other modules.
  *
  * TODO:
- *   - Extended string matching operators: "suffix", "regex", "substring".
+ *   - Extended string matching operators: "regex", "substring".
  *
  * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com).
- * Copyright (C) 2015-2016 Tempesta Technologies, Inc.
+ * Copyright (C) 2015-2017 Tempesta Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by
diff --git a/tempesta_fw/sched/tfw_sched_rr.c b/tempesta_fw/sched/tfw_sched_rr.c
index e8de7b712..538749f50 100644
--- a/tempesta_fw/sched/tfw_sched_rr.c
+++ b/tempesta_fw/sched/tfw_sched_rr.c
@@ -2,7 +2,7 @@
  *		Tempesta FW
  *
  * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com).
- * Copyright (C) 2015-2016 Tempesta Technologies, Inc.
+ * Copyright (C) 2015-2017 Tempesta Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by
diff --git a/tempesta_fw/server.c b/tempesta_fw/server.c
index ccd5440ba..5ebae3c2a 100644
--- a/tempesta_fw/server.c
+++ b/tempesta_fw/server.c
@@ -4,7 +4,7 @@
  * Servers handling.
  *
  * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com).
- * Copyright (C) 2015-2016 Tempesta Technologies, Inc.
+ * Copyright (C) 2015-2017 Tempesta Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by
diff --git a/tempesta_fw/server.h b/tempesta_fw/server.h
index 2d673724a..b9c3164ae 100644
--- a/tempesta_fw/server.h
+++ b/tempesta_fw/server.h
@@ -2,7 +2,7 @@
  *		Tempesta FW
  *
  * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com).
- * Copyright (C) 2015-2016 Tempesta Technologies, Inc.
+ * Copyright (C) 2015-2017 Tempesta Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by
@@ -139,7 +139,7 @@ tfw_srv_conn_queue_full(TfwSrvConn *srv_conn)
  */
 static inline bool
 tfw_srv_conn_need_resched(TfwSrvConn *srv_conn)
-{       
+{
 	TfwSrvGroup *sg = ((TfwServer *)srv_conn->peer)->sg;
 	return (srv_conn->recns == sg->max_recns);
 }
diff --git a/tempesta_fw/sock_srv.c b/tempesta_fw/sock_srv.c
index 6ec64fffa..9584b0002 100644
--- a/tempesta_fw/sock_srv.c
+++ b/tempesta_fw/sock_srv.c
@@ -849,8 +849,8 @@ tfw_cfgop_begin_srv_group(TfwCfgSpec *cs, TfwCfgEntry *ce)
 
 	TFW_DBG("begin srv_group: %s\n", tfw_cfg_in_sg->name);
 
-        tfw_cfg_in_slstsz = 0;
-        tfw_cfg_in_sched = tfw_cfg_out_sched;
+	tfw_cfg_in_slstsz = 0;
+	tfw_cfg_in_sched = tfw_cfg_out_sched;
 	tfw_cfg_in_queue_size = tfw_cfg_out_queue_size;
 	tfw_cfg_in_fwd_timeout = tfw_cfg_out_fwd_timeout;
 	tfw_cfg_in_fwd_retries = tfw_cfg_out_fwd_retries;
diff --git a/tempesta_fw/t/unit/helpers.c b/tempesta_fw/t/unit/helpers.c
index 92ecaf74b..df7068eaa 100644
--- a/tempesta_fw/t/unit/helpers.c
+++ b/tempesta_fw/t/unit/helpers.c
@@ -15,7 +15,7 @@
  * and generic testing functions/macros are located in test.c/test.h
  *
  * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com).
- * Copyright (C) 2015-2016 Tempesta Technologies, Inc.
+ * Copyright (C) 2015-2017 Tempesta Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by
diff --git a/tempesta_fw/t/unit/sched_helper.c b/tempesta_fw/t/unit/sched_helper.c
index 1c75fb0ad..a27060e4c 100644
--- a/tempesta_fw/t/unit/sched_helper.c
+++ b/tempesta_fw/t/unit/sched_helper.c
@@ -2,7 +2,7 @@
  *		Tempesta FW
  *
  * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com).
- * Copyright (C) 2015-2016 Tempesta Technologies, Inc.
+ * Copyright (C) 2015-2017 Tempesta Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by
diff --git a/tempesta_fw/t/unit/sched_helper.h b/tempesta_fw/t/unit/sched_helper.h
index 2f3d2ac57..fa49c5dea 100644
--- a/tempesta_fw/t/unit/sched_helper.h
+++ b/tempesta_fw/t/unit/sched_helper.h
@@ -2,7 +2,7 @@
  *		Tempesta FW
  *
  * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com).
- * Copyright (C) 2015 Tempesta Technologies, Inc.
+ * Copyright (C) 2015-2017 Tempesta Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by
diff --git a/tempesta_fw/t/unit/test_http_sticky.c b/tempesta_fw/t/unit/test_http_sticky.c
index ed1d558ff..d1425dab1 100644
--- a/tempesta_fw/t/unit/test_http_sticky.c
+++ b/tempesta_fw/t/unit/test_http_sticky.c
@@ -2,7 +2,7 @@
  *		Tempesta FW
  *
  * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com).
- * Copyright (C) 2015-2016 Tempesta Technologies, Inc.
+ * Copyright (C) 2015-2017 Tempesta Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by
diff --git a/tempesta_fw/t/unit/test_sched_hash.c b/tempesta_fw/t/unit/test_sched_hash.c
index fe1f0a61d..fd40669c4 100644
--- a/tempesta_fw/t/unit/test_sched_hash.c
+++ b/tempesta_fw/t/unit/test_sched_hash.c
@@ -2,7 +2,7 @@
  *		Tempesta FW
  *
  * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com).
- * Copyright (C) 2015-2016 Tempesta Technologies, Inc.
+ * Copyright (C) 2015-2017 Tempesta Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by
diff --git a/tempesta_fw/t/unit/test_sched_http.c b/tempesta_fw/t/unit/test_sched_http.c
index b20813f8c..426a051b5 100644
--- a/tempesta_fw/t/unit/test_sched_http.c
+++ b/tempesta_fw/t/unit/test_sched_http.c
@@ -1,7 +1,7 @@
 /**
  *		Tempesta FW
  *
- * Copyright (C) 2015-2016 Tempesta Technologies, Inc.
+ * Copyright (C) 2015-2017 Tempesta Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by
diff --git a/tempesta_fw/t/unit/test_sched_rr.c b/tempesta_fw/t/unit/test_sched_rr.c
index b90bc2f13..2202f21e5 100644
--- a/tempesta_fw/t/unit/test_sched_rr.c
+++ b/tempesta_fw/t/unit/test_sched_rr.c
@@ -2,7 +2,7 @@
  *		Tempesta FW
  *
  * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com).
- * Copyright (C) 2015-2016 Tempesta Technologies, Inc.
+ * Copyright (C) 2015-2017 Tempesta Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by
diff --git a/tempesta_fw/tempesta_fw.h b/tempesta_fw/tempesta_fw.h
index fc3d041f1..c354173b1 100644
--- a/tempesta_fw/tempesta_fw.h
+++ b/tempesta_fw/tempesta_fw.h
@@ -2,7 +2,7 @@
  *		Tempesta FW
  *
  * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com).
- * Copyright (C) 2015-2016 Tempesta Technologies, Inc.
+ * Copyright (C) 2015-2017 Tempesta Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by
@@ -32,7 +32,7 @@
 
 #define TFW_AUTHOR		"Tempesta Technologies, Inc"
 #define TFW_NAME		"Tempesta FW"
-#define TFW_VERSION		"0.5.0-pre6"
+#define TFW_VERSION		"0.5.0-pre7"
 
 #define DEF_MAX_PORTS		8
 
diff --git a/tempesta_fw/tls.c b/tempesta_fw/tls.c
index 2a1bed961..a889a9c65 100644
--- a/tempesta_fw/tls.c
+++ b/tempesta_fw/tls.c
@@ -3,7 +3,7 @@
  *
  * Transport Layer Security (TLS) implementation.
  *
- * Copyright (C) 2015 Tempesta Technologies, Inc.
+ * Copyright (C) 2015-2017 Tempesta Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by
diff --git a/tempesta_fw/vhost.c b/tempesta_fw/vhost.c
index 0fcd166ce..cea475e5e 100644
--- a/tempesta_fw/vhost.c
+++ b/tempesta_fw/vhost.c
@@ -1,7 +1,7 @@
 /**
  *		Tempesta FW
  *
- * Copyright (C) 2016 Tempesta Technologies, Inc.
+ * Copyright (C) 2016-2017 Tempesta Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
diff --git a/tempesta_fw/vhost.h b/tempesta_fw/vhost.h
index 6b23ace3a..e66eb05ae 100644
--- a/tempesta_fw/vhost.h
+++ b/tempesta_fw/vhost.h
@@ -1,7 +1,7 @@
 /**
  *		Tempesta FW
  *
- * Copyright (C) 2016 Tempesta Technologies, Inc.
+ * Copyright (C) 2016-2017 Tempesta Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by

From 6478c707cfb65916d53fada60ca17955f968c84c Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Mon, 20 Feb 2017 21:54:24 +0300
Subject: [PATCH 59/65] Add TODO items for further improvements of the
 architecture.

---
 tempesta_fw/http.c     | 41 +++++++++++++++++++++++++++++++++++++++--
 tempesta_fw/http.h     |  2 ++
 tempesta_fw/sock.c     |  5 +++++
 tempesta_fw/sock_srv.c | 31 ++++++++++++++++++++++++++++++-
 4 files changed, 76 insertions(+), 3 deletions(-)

diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 6c3314c5c..5aa01d4b3 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -707,6 +707,16 @@ tfw_http_conn_fwd_unsent(TfwSrvConn *srv_conn, struct list_head *equeue)
  * the queue. CPU-2 does the same after CPU-1 (the queue was locked).
  * After that CPU-1 and CPU-2 are fully concurrent. If CPU-2 happens
  * to proceed first with forwarding, then pairing gets broken.
+ *
+ * TODO: In current design @fwd_queue is locked until after a request
+ * is submitted to SS for sending. It shouldn't be necessary to lock
+ * @fwd_queue for that. There's the ordered @fwd_queue. Also there's
+ * the ordered work queue in SS layer. Perhaps the right way of ordering
+ * these actions is to use message tickets according to the ordering of
+ * requests in @fwd_queue. Typically tfw_connection_send() or its pure
+ * server variant must care about ticket ordering. Backoff and per-cpu
+ * lock data structures could be used just like in Linux MCS locking.
+ * Please see the issue #687.
  */
 static void
 tfw_http_req_fwd(TfwSrvConn *srv_conn, TfwHttpReq *req)
@@ -1551,6 +1561,20 @@ tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
 	 * Doing ss_close_sync() on client connection's socket is safe
 	 * as long as @req that holds a reference to the connection is
 	 * not freed.
+	 *
+	 * TODO: Responses come from different server connections and on
+	 * different threads/CPUs. This code is called for each response.
+	 * If @seq_queue is empty, then ss_close_sync() may get called
+	 * multiple times, which doesn't look like a reasonable thing to
+	 * do. Perhaps, ss_close_sync() can be called only if ss_close()
+	 * fails. Also, perhaps the state of the client connection can be
+	 * checked in attempt to avoid a call to ss_close() altogether.
+	 * Note that ss_close_sync() is used because otherwise queueing
+	 * of the close() action is not guaranteed. Also note that calling
+	 * of ss_close_sync() multiple times is supported by the code in
+	 * __ss_close() that prevents closing of a socket (and a connection)
+	 * that is closed already. Please see a comment there, and the
+	 * issue #687.
 	 */
 	spin_lock(&cli_conn->seq_qlock);
 	if (unlikely(list_empty(seq_queue))) {
@@ -1591,6 +1615,12 @@ tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
 	 * is destroyed when the last reference goes, so the argument
 	 * to spin_unlock() may get invalid. Hold the connection until
 	 * sending is done.
+	 *
+	 * TODO: There's a lock contention here as multiple threads/CPUs
+	 * go for the same client connection's queue. Perhaps there's a
+	 * better way of doing this that is more effective. Please see
+	 * the TODO comment above and to the function tfw_http_popreq().
+	 * Also, please see the issue #687.
 	 */
 	tfw_cli_conn_get(cli_conn);
 	spin_lock(&cli_conn->ret_qlock);
@@ -2046,8 +2076,15 @@ tfw_http_resp_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)
  * to and kept in @fwd_queue of the connection @conn for that server.
  * If a paired request is not found, then the response is deleted.
  *
- * If a paired client request is missing, then it seems upsream server is
- * misbehaving, so the caller has to drop the server connection.
+ * If a paired client request is missing, then it seems upsream server
+ * is misbehaving, so the caller has to drop the server connection.
+ *
+ * TODO: When a response is received and a paired request is found,
+ * pending (unsent) requests in the connection are forwarded to the
+ * server right away. In current design, @fwd_queue is locked until
+ * after a request is submitted to SS for sending. It shouldn't be
+ * necessary to lock @fwd_queue for that. Please see a similar TODO
+ * comment to tfw_http_req_fwd(). Also, please see the issue #687.
  */
 static TfwHttpReq *
 tfw_http_popreq(TfwHttpMsg *hmresp)
diff --git a/tempesta_fw/http.h b/tempesta_fw/http.h
index ff9aa6084..26fe136af 100644
--- a/tempesta_fw/http.h
+++ b/tempesta_fw/http.h
@@ -433,6 +433,8 @@ tfw_current_timestamp(void)
  * SKB data is needed only for calculation of a cache key from request
  * fields. In all other cases it can just be passed to the network layer.
  */
+extern bool tfw_cache_msg_cacheable(TfwHttpReq *req);
+
 static inline void
 tfw_http_req_init_ss_flags(TfwHttpReq *req)
 {
diff --git a/tempesta_fw/sock.c b/tempesta_fw/sock.c
index 831119819..d0eae7a82 100644
--- a/tempesta_fw/sock.c
+++ b/tempesta_fw/sock.c
@@ -555,6 +555,11 @@ __ss_close(struct sock *sk, int flags)
 	 * The socket is owned by current CPU, so there's no need to check
 	 * if it's live. However, in some cases this may be called multiple
 	 * times on the same socket. Do it only once for the socket.
+	 *
+	 * TODO: Calling ss_close_sync() multiple times on the same socket
+	 * doesn't look like a reasonable thing to do. Please see the comment
+	 * in tfw_http_resp_fwd() for the reasons this may be called multiple
+	 * times. Perhaps there's a better way. Please see the issue #687.
 	 */
 	bh_lock_sock(sk);
 	if (unlikely(!ss_sock_live(sk))) {
diff --git a/tempesta_fw/sock_srv.c b/tempesta_fw/sock_srv.c
index 9584b0002..db593e268 100644
--- a/tempesta_fw/sock_srv.c
+++ b/tempesta_fw/sock_srv.c
@@ -188,12 +188,41 @@ tfw_sock_srv_connect_try(TfwSrvConn *srv_conn)
 	return 0;
 }
 
+/*
+ * There are several stages in the reconnect process. All stages are
+ * covered by tfw_connection_repair() function.
+ *
+ * 1. The attempts to reconnect are repeated at short intervals that are
+ *    gradually increased. There's a great chance that the connection is
+ *    restored during this stage. When that happens, all requests in the
+ *    connection are re-sent to the server.
+ * 2. The attempts to reconnect are continued at one second intervals.
+ *    This covers a short server's downtime such as a service restart.
+ *    During this time requests in the connection are checked to see if
+ *    they should be evicted for a variety of reasons (timed out, etc.).
+ *    Again, when the connection is restored, requests in the connection
+ *    are re-sent to the server.
+ * 3. When the number of attempts to reconnect exceeds the configured
+ *    value, then the connection is marked as faulty. All requests in
+ *    the connection are then re-scheduled to other servers/connections. 
+ *    Attempts to reconnect are still continued at one second intervals.
+ *    This would cover longer server's downtime due to a reboot or any
+ *    other maintenance, Should the connection be restored at some time,
+ *    everything will continue to work as usual.
+ *
+ * TODO: There's an interesting side effect in the described procedure.
+ * Connections that are currently in failover may still accept incoming
+ * requests if there are no active connections. When connections are
+ * restored, all requests will be correctly forwarded/re-sent to their
+ * respective servers. This may serve as a QoS feature that mitigates
+ * some temporary short periods when servers are not available.
+ */
 static inline void
 tfw_sock_srv_connect_try_later(TfwSrvConn *srv_conn)
 {
 	unsigned long timeout;
 
-	/* Don't rearm reconnection timer if we're about to shutdown. */
+	/* Don't rearm the reconnection timer if we're about to shutdown. */
 	if (unlikely(!ss_active()))
 		return;
 

From bcfcf0f09fa09af886880857f4f251bb57a7819e Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Wed, 22 Feb 2017 13:38:15 +0300
Subject: [PATCH 60/65] Retrofit each HTTP error response with a reason
 message.

---
 tempesta_fw/cache.c                   |  16 ++--
 tempesta_fw/http.c                    | 110 +++++++++++++++-----------
 tempesta_fw/http.h                    |  18 +++--
 tempesta_fw/t/unit/test_http_sticky.c |   3 +-
 4 files changed, 90 insertions(+), 57 deletions(-)

diff --git a/tempesta_fw/cache.c b/tempesta_fw/cache.c
index 8076f9082..cbc498dc8 100644
--- a/tempesta_fw/cache.c
+++ b/tempesta_fw/cache.c
@@ -41,6 +41,8 @@
 #warning "Please set CONFIG_NODES_SHIFT to less than 16"
 #endif
 
+extern const char *s_source_cache;
+
 /* Flags stored in a Cache Entry. */
 #define TFW_CE_MUST_REVAL	0x0001		/* MUST revalidate if stale. */
 
@@ -935,12 +937,14 @@ tfw_cache_purge_method(TfwHttpReq *req)
 
 	/* Deny PURGE requests by default. */
 	if (!(cache_cfg.cache && vhost->cache_purge && vhost->cache_purge_acl))
-		return tfw_http_send_403(req);
+		return tfw_http_send_403(req, s_source_cache,
+					 "purge: not configured");
 
 	/* Accept requests from configured hosts only. */
 	ss_getpeername(req->conn->sk, &saddr);
 	if (!tfw_capuacl_match(vhost, &saddr))
-		return tfw_http_send_403(req);
+		return tfw_http_send_403(req, s_source_cache,
+					 "purge: ACL violation");
 
 	/* Only "invalidate" option is implemented at this time. */
 	switch (vhost->cache_purge_mode) {
@@ -948,11 +952,13 @@ tfw_cache_purge_method(TfwHttpReq *req)
 		ret = tfw_cache_purge_invalidate(req);
 		break;
 	default:
-		return tfw_http_send_403(req);
+		return tfw_http_send_403(req, s_source_cache,
+					 "purge: invalid option");
 	}
 
 	return ret
-		? tfw_http_send_404(req)
+		? tfw_http_send_404(req, s_source_cache,
+				    "purge: processing error")
 		: tfw_http_send_200(req);
 }
 
@@ -1208,7 +1214,7 @@ cache_req_process_node(TfwHttpReq *req, tfw_http_cache_cb_t action)
 		resp->flags |= TFW_HTTP_RESP_STALE;
 out:
 	if (!resp && (req->cache_ctl.flags & TFW_HTTP_CC_OIFCACHED))
-		tfw_http_send_504(req);
+		tfw_http_send_504(req, s_source_cache, "resource not cached");
 	else
 		action(req, resp);
 
diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 5aa01d4b3..cfb20b226 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -62,6 +62,21 @@ int ghprio; /* GFSM hook priority. */
 #define S_H_CONN_KA		S_F_CONNECTION S_V_CONN_KA S_CRLFCRLF
 #define S_H_CONN_CLOSE		S_F_CONNECTION S_V_CONN_CLOSE S_CRLFCRLF
 
+const char *s_source_proxy = "proxy forward";
+const char *s_source_cache = "proxy cache";
+
+static const char *s_reason_evict_timeout = "request evicted: timed out";
+static const char *s_reason_evict_retries = "request evicted: the number"
+					    " of retries exceeded";
+static const char *s_reason_fwd = "request dropped: forwarding error";
+static const char *s_reason_nip = "request dropped: non-idempotent requests"
+				  "are not re-forwarded or re-scheduled";
+static const char *s_reason_sched = "request dropped: unable to find "
+				    "an available back end server";
+static const char *s_reason_req_common = "request dropped: processing error";
+static const char *s_reason_resp_common = "response dropped: processing error";
+static const char *s_reason_resp_filter = "response dropped: filtered out";
+
 /*
  * Prepare current date in the format required for HTTP "Date:"
  * header field. See RFC 2616 section 3.3.
@@ -257,7 +272,7 @@ tfw_http_send_200(TfwHttpReq *req)
  * HTTP 403 response: Access is forbidden.
  */
 int
-tfw_http_send_403(TfwHttpReq *req)
+tfw_http_send_403(TfwHttpReq *req, const char *source, const char *reason)
 {
 	TfwStr rh = {
 		.ptr = (TfwStr []){
@@ -270,7 +285,7 @@ tfw_http_send_403(TfwHttpReq *req)
 		.flags = 4 << TFW_STR_CN_SHIFT
 	};
 
-	TFW_DBG("Send HTTP 403 response\n");
+	TFW_DBG("Send HTTP 403 response: %s: %s\n", source, reason);
 
 	return tfw_http_send_resp(req, &rh, __TFW_STR_CH(&rh, 1));
 }
@@ -281,7 +296,7 @@ tfw_http_send_403(TfwHttpReq *req)
  * HTTP 404 response: Tempesta is unable to find the requested data.
  */
 int
-tfw_http_send_404(TfwHttpReq *req)
+tfw_http_send_404(TfwHttpReq *req, const char *source, const char *reason)
 {
 	TfwStr rh = {
 		.ptr = (TfwStr []){
@@ -294,7 +309,7 @@ tfw_http_send_404(TfwHttpReq *req)
 		.flags = 4 << TFW_STR_CN_SHIFT
 	};
 
-	TFW_DBG("Send HTTP 404 response\n");
+	TFW_DBG("Send HTTP 404 response: %s: %s\n", source, reason);
 
 	return tfw_http_send_resp(req, &rh, __TFW_STR_CH(&rh, 1));
 }
@@ -306,7 +321,7 @@ tfw_http_send_404(TfwHttpReq *req)
  * the request to a server.
  */
 static int
-tfw_http_send_500(TfwHttpReq *req)
+tfw_http_send_500(TfwHttpReq *req, const char *source, const char *reason)
 {
 	TfwStr rh = {
 		.ptr = (TfwStr []){
@@ -319,7 +334,7 @@ tfw_http_send_500(TfwHttpReq *req)
 		.flags = 4 << TFW_STR_CN_SHIFT
 	};
 
-	TFW_DBG("Send HTTP 500 response\n");
+	TFW_DBG("Send HTTP 500 response: %s: %s\n", source, reason);
 
 	return tfw_http_send_resp(req, &rh, __TFW_STR_CH(&rh, 1));
 }
@@ -331,7 +346,7 @@ tfw_http_send_500(TfwHttpReq *req)
  * the designated server.
  */
 int
-tfw_http_send_502(TfwHttpReq *req)
+tfw_http_send_502(TfwHttpReq *req, const char *source, const char *reason)
 {
 	TfwStr rh = {
 		.ptr = (TfwStr []){
@@ -344,7 +359,7 @@ tfw_http_send_502(TfwHttpReq *req)
 		.flags = 4 << TFW_STR_CN_SHIFT
 	};
 
-	TFW_DBG("Send HTTP 502 response\n");
+	TFW_DBG("Send HTTP 502 response: %s: %s:\n", source, reason);
 
 	return tfw_http_send_resp(req, &rh, __TFW_STR_CH(&rh, 1));
 }
@@ -356,7 +371,7 @@ tfw_http_send_502(TfwHttpReq *req)
  * the designated server.
  */
 int
-tfw_http_send_504(TfwHttpReq *req)
+tfw_http_send_504(TfwHttpReq *req, const char *source, const char *reason)
 {
 	TfwStr rh = {
 		.ptr = (TfwStr []){
@@ -369,7 +384,7 @@ tfw_http_send_504(TfwHttpReq *req)
 		.flags = 4 << TFW_STR_CN_SHIFT
 	};
 
-	TFW_DBG("Send HTTP 504 response\n");
+	TFW_DBG("Send HTTP 504 response: %s: %s:\n", source, reason);
 
 	return tfw_http_send_resp(req, &rh, __TFW_STR_CH(&rh, 1));
 }
@@ -505,11 +520,13 @@ tfw_http_req_delist(TfwSrvConn *srv_conn, TfwHttpReq *req)
  */
 static inline void
 tfw_http_req_move2equeue(TfwSrvConn *srv_conn, TfwHttpReq *req,
-			 struct list_head *equeue, unsigned short status)
+			 struct list_head *equeue, unsigned short status,
+			 const char *reason)
 {
 	tfw_http_req_delist(srv_conn, req);
 	list_add_tail(&req->fwd_list, equeue);
-	req->rstatus = status;
+	req->status = status;
+	req->reason = reason;
 }
 
 /*
@@ -523,7 +540,7 @@ tfw_http_req_move2equeue(TfwSrvConn *srv_conn, TfwHttpReq *req,
  * request and then sent to the client in proper seq order.
  */
 static void
-tfw_http_req_zap_error(struct list_head *equeue)
+tfw_http_req_zap_error(struct list_head *equeue, const char *source)
 {
 	TfwHttpReq *req, *tmp;
 
@@ -532,23 +549,23 @@ tfw_http_req_zap_error(struct list_head *equeue)
 
 	list_for_each_entry_safe(req, tmp, equeue, fwd_list) {
 		list_del_init(&req->fwd_list);
-		switch(req->rstatus) {
+		switch(req->status) {
 		case 404:
-			tfw_http_send_404(req);
+			tfw_http_send_404(req, source, req->reason);
 			break;
 		case 500:
-			tfw_http_send_500(req);
+			tfw_http_send_500(req, source, req->reason);
 			break;
 		case 502:
-			tfw_http_send_502(req);
+			tfw_http_send_502(req, source, req->reason);
 			break;
 		case 504:
-			tfw_http_send_504(req);
+			tfw_http_send_504(req, source, req->reason);
 			break;
 		default:
 			TFW_WARN("Unexpected response error code: [%d]\n",
-				 req->rstatus);
-			tfw_http_send_500(req);
+				 req->status);
+			tfw_http_send_500(req, source, req->reason);
 			break;
 		}
 		TFW_INC_STAT_BH(clnt.msgs_otherr);
@@ -569,7 +586,8 @@ tfw_http_req_evict_timeout(TfwSrvConn *srv_conn, TfwServer *srv,
 		TFW_DBG2("%s: Eviction: req=[%p] overdue=[%dms]\n",
 			 __func__, req,
 			 jiffies_to_msecs(jqage - srv->sg->max_jqage));
-		tfw_http_req_move2equeue(srv_conn, req, equeue, 504);
+		tfw_http_req_move2equeue(srv_conn, req, equeue,
+					 504, s_reason_evict_timeout);
 		return true;
 	}
 	return false;
@@ -586,7 +604,8 @@ tfw_http_req_evict_retries(TfwSrvConn *srv_conn, TfwServer *srv,
 	if (unlikely(req->retries++ >= srv->sg->max_refwd)) {
 		TFW_DBG2("%s: Eviction: req=[%p] retries=[%d]\n",
 			 __func__, req, req->retries);
-		tfw_http_req_move2equeue(srv_conn, req, equeue, 504);
+		tfw_http_req_move2equeue(srv_conn, req, equeue,
+					 504, s_reason_evict_retries);
 		return true;
 	}
 	return false;
@@ -605,7 +624,8 @@ tfw_http_req_fwd_send(TfwSrvConn *srv_conn, TfwServer *srv,
 	if (tfw_connection_send((TfwConn *)srv_conn, (TfwMsg *)req)) {
 		TFW_DBG2("%s: Forwarding error: conn=[%p] req=[%p]\n",
 			 __func__, srv_conn, req);
-		tfw_http_req_move2equeue(srv_conn, req, equeue, 500);
+		tfw_http_req_move2equeue(srv_conn, req, equeue,
+					 500, s_reason_fwd);
 		return false;
 	}
 	return true;
@@ -719,10 +739,9 @@ tfw_http_conn_fwd_unsent(TfwSrvConn *srv_conn, struct list_head *equeue)
  * Please see the issue #687.
  */
 static void
-tfw_http_req_fwd(TfwSrvConn *srv_conn, TfwHttpReq *req)
+tfw_http_req_fwd(TfwSrvConn *srv_conn,
+		 TfwHttpReq *req, struct list_head *equeue)
 {
-	LIST_HEAD(equeue);
-
 	TFW_DBG2("%s: srv_conn=[%p], req=[%p]\n", __func__, srv_conn, req);
 	BUG_ON(!(TFW_CONN_TYPE(srv_conn) & Conn_Srv));
 
@@ -735,11 +754,8 @@ tfw_http_req_fwd(TfwSrvConn *srv_conn, TfwHttpReq *req)
 		spin_unlock(&srv_conn->fwd_qlock);
 		return;
 	}
-	__tfw_http_conn_fwd_unsent(srv_conn, &equeue);
+	__tfw_http_conn_fwd_unsent(srv_conn, equeue);
 	spin_unlock(&srv_conn->fwd_qlock);
-
-	if (!list_empty(&equeue))
-		tfw_http_req_zap_error(&equeue);
 }
 
 /*
@@ -774,7 +790,8 @@ tfw_http_conn_treatnip(TfwSrvConn *srv_conn, struct list_head *equeue)
 		srv_conn->msg_sent =
 			(srv_conn->fwd_queue.next == &req_sent->fwd_list) ?
 			NULL : (TfwMsg *)list_prev_entry(req_sent, fwd_list);
-		tfw_http_req_move2equeue(srv_conn, req_sent, equeue, 504);
+		tfw_http_req_move2equeue(srv_conn, req_sent, equeue,
+					 504, s_reason_nip);
 	}
 }
 
@@ -910,11 +927,12 @@ tfw_http_conn_resched(TfwSrvConn *srv_conn, struct list_head *equeue)
 			continue;
 		if (!(sch_conn = tfw_sched_get_srv_conn((TfwMsg *)req))) {
 			TFW_WARN("Unable to find a backend server\n");
-			tfw_http_req_move2equeue(srv_conn, req, equeue, 502);
+			tfw_http_req_move2equeue(srv_conn, req, equeue,
+						 502, s_reason_sched);
 			continue;
 		}
 		tfw_http_req_delist(srv_conn, req);
-		tfw_http_req_fwd(sch_conn, req);
+		tfw_http_req_fwd(sch_conn, req, equeue);
 		tfw_srv_conn_put(sch_conn);
 	}
 	BUG_ON(srv_conn->qsize);
@@ -996,7 +1014,7 @@ tfw_http_conn_repair(TfwConn *conn)
 	spin_unlock(&srv_conn->fwd_qlock);
 zap_error:
 	if (!list_empty(&equeue))
-		tfw_http_req_zap_error(&equeue);
+		tfw_http_req_zap_error(&equeue, s_source_proxy);
 }
 
 /*
@@ -1645,14 +1663,14 @@ tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
 }
 
 /**
- * The request is served from cache.
+ * The request is serviced from cache.
  * Send the response as is and unrefer its data.
  */
 static void
 tfw_http_req_cache_service(TfwHttpReq *req, TfwHttpResp *resp)
 {
 	if (tfw_http_adjust_resp(resp, req)) {
-		tfw_http_send_500(req);
+		tfw_http_send_500(req, s_source_proxy, s_reason_resp_common);
 		tfw_http_conn_msg_free((TfwHttpMsg *)resp);
 		TFW_INC_STAT_BH(clnt.msgs_otherr);
 		return;
@@ -1672,6 +1690,7 @@ tfw_http_req_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)
 {
 	int r;
 	TfwSrvConn *srv_conn = NULL;
+	LIST_HEAD(equeue);
 
 	TFW_DBG2("%s: req = %p, resp = %p\n", __func__, req, resp);
 
@@ -1723,15 +1742,17 @@ tfw_http_req_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)
 		goto send_500;
 
 	/* Forward request to the server. */
-	tfw_http_req_fwd(srv_conn, req);
+	tfw_http_req_fwd(srv_conn, req, &equeue);
+	if (!list_empty(&equeue))
+		tfw_http_req_zap_error(&equeue, s_source_proxy);
 	goto conn_put;
 
 send_502:
-	tfw_http_send_502(req);
+	tfw_http_send_502(req, s_source_proxy, s_reason_req_common);
 	TFW_INC_STAT_BH(clnt.msgs_otherr);
 	return;
 send_500:
-	tfw_http_send_500(req);
+	tfw_http_send_500(req, s_source_proxy, s_reason_req_common);
 	TFW_INC_STAT_BH(clnt.msgs_otherr);
 conn_put:
 	tfw_srv_conn_put(srv_conn);
@@ -1995,7 +2016,8 @@ tfw_http_req_process(TfwConn *conn, struct sk_buff *skb, unsigned int off)
 		 * Otherwise we lose the reference to it and get a leak.
 		 */
 		if (tfw_cache_process(req, NULL, tfw_http_req_cache_cb)) {
-			tfw_http_send_500(req);
+			tfw_http_send_500(req, s_source_cache,
+					  s_reason_req_common);
 			TFW_INC_STAT_BH(clnt.msgs_otherr);
 			return TFW_PASS;
 		}
@@ -2048,7 +2070,7 @@ tfw_http_resp_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)
 	 * inter-node data transfers. (see tfw_http_req_cache_cb())
 	 */
 	if (tfw_http_adjust_resp(resp, req)) {
-		tfw_http_send_500(req);
+		tfw_http_send_500(req, s_source_proxy, s_reason_resp_common);
 		tfw_http_conn_msg_free((TfwHttpMsg *)resp);
 		TFW_INC_STAT_BH(serv.msgs_otherr);
 		return;
@@ -2125,7 +2147,7 @@ tfw_http_popreq(TfwHttpMsg *hmresp)
 	spin_unlock(&srv_conn->fwd_qlock);
 
 	if (!list_empty(&equeue))
-		tfw_http_req_zap_error(&equeue);
+		tfw_http_req_zap_error(&equeue, s_source_proxy);
 
 	return req;
 }
@@ -2167,7 +2189,7 @@ tfw_http_resp_gfsm(TfwHttpMsg *hmresp, struct sk_buff *skb, unsigned int off)
 		return TFW_BLOCK;
 	}
 
-	tfw_http_send_502(req);
+	tfw_http_send_502(req, s_source_proxy, s_reason_resp_filter);
 	tfw_http_conn_msg_free(hmresp);
 	TFW_INC_STAT_BH(serv.msgs_filtout);
 	return r;
@@ -2220,7 +2242,7 @@ tfw_http_resp_cache(TfwHttpMsg *hmresp)
 	if (tfw_cache_process(req, (TfwHttpResp *)hmresp,
 			      tfw_http_resp_cache_cb))
 	{
-		tfw_http_send_500(req);
+		tfw_http_send_500(req, s_source_cache, s_reason_resp_common);
 		tfw_http_conn_msg_free(hmresp);
 		TFW_INC_STAT_BH(serv.msgs_otherr);
 		/* Proceed with processing of the next response. */
diff --git a/tempesta_fw/http.h b/tempesta_fw/http.h
index 26fe136af..5a3fdf87e 100644
--- a/tempesta_fw/http.h
+++ b/tempesta_fw/http.h
@@ -350,8 +350,9 @@ typedef struct {
  * @tm_bchunk	- time previous chunk of HTTP body had come at;
  * @hash	- hash value for caching calculated for the request;
  * @resp	- the response paired with this request;
- * @rstatus	- error response status until the response is prepared;
+ * @reason	- the string with the reason for an error response;
  * @retries	- the number of re-send attempts;
+ * @status	- error response status until the response is prepared;
  *
  * TfwStr members must be the first for efficient scanning.
  */
@@ -374,9 +375,12 @@ typedef struct {
 	unsigned long		tm_header;
 	unsigned long		tm_bchunk;
 	unsigned long		hash;
-	TfwHttpMsg		*resp;
 	union {
-		unsigned short	rstatus;
+		TfwHttpMsg	*resp;
+		const char	*reason;
+	};
+	union {
+		unsigned short	status;
 		unsigned short	retries;
 	};
 } TfwHttpReq;
@@ -467,10 +471,10 @@ void tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp);
  */
 int tfw_http_send_200(TfwHttpReq *req);
 int tfw_http_prep_302(TfwHttpMsg *resp, TfwHttpReq *req, TfwStr *cookie);
-int tfw_http_send_403(TfwHttpReq *req);
-int tfw_http_send_404(TfwHttpReq *req);
-int tfw_http_send_502(TfwHttpReq *req);
-int tfw_http_send_504(TfwHttpReq *req);
+int tfw_http_send_403(TfwHttpReq *req, const char *source, const char *reason);
+int tfw_http_send_404(TfwHttpReq *req, const char *source, const char *reason);
+int tfw_http_send_502(TfwHttpReq *req, const char *source, const char *reason);
+int tfw_http_send_504(TfwHttpReq *req, const char *source, const char *reason);
 
 /*
  * Functions to create SKBs with data stream.
diff --git a/tempesta_fw/t/unit/test_http_sticky.c b/tempesta_fw/t/unit/test_http_sticky.c
index d1425dab1..b2c89c42e 100644
--- a/tempesta_fw/t/unit/test_http_sticky.c
+++ b/tempesta_fw/t/unit/test_http_sticky.c
@@ -275,7 +275,8 @@ TEST(http_sticky, sending_502)
 	StickyVal sv = { .ts = 1 };
 
 	EXPECT_EQ(__sticky_calc(mock.req, &sv), 0);
-	EXPECT_EQ(tfw_http_send_502(mock.req), 0);
+	EXPECT_EQ(tfw_http_send_502(mock.req, __func__,
+				    "sticky calculation"), 0);
 
 	/* HTTP 502 response have no Set-Cookie header */
 	EXPECT_TRUE(mock.tfw_connection_send_was_called);

From 43653461a85357e91f5dd25d13b652d2b7278187 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Mon, 27 Feb 2017 12:53:39 +0300
Subject: [PATCH 61/65] Better/stricter implementation of the request eviction
 procedure.

---
 README.md               |  17 ++++
 tempesta_fw/http.c      | 176 ++++++++++++++++++++++++++++------------
 tempesta_fw/http.h      |  20 -----
 tempesta_fw/http_sess.c |   1 -
 tempesta_fw/server.h    |   2 +-
 5 files changed, 141 insertions(+), 75 deletions(-)

diff --git a/README.md b/README.md
index 26c9fc45c..cb702c55e 100644
--- a/README.md
+++ b/README.md
@@ -429,6 +429,23 @@ server_forward_timeout <N>;
 ```
 `server_forward_retries` sets the maximum number of attempts to re-forward
 a request to a server. If not defined, the default number of attempts is 5.
+The value of zero prohibits re-forwarding of requests completely.
+
+Note that this value affects certain aspects of Tempesta when forwarding
+requests to a server. If a request may be re-sent, then the request's
+data have to be kept around, and only a copy of the data can be sent out.
+If a request may NOT be re-sent to a server, then the data can be sent
+out as is without copying. Copying of data of each request for a chance
+that the request may be re-sent may get quite expensive and may affect
+performance.
+
+If connections with back end servers are stable and are not reset every
+so often, then it would make sense to prohibit re-forwarding of requests
+for better performance. Otherwise, if may be useful to permit re-forwarding
+of requests. That would significantly decrease the number of error responses
+due to temporary unavailability of a live connection to a back end server.
+
+
 `server_forward_timeout` set the maximum time frame in seconds within which
 a request may still be forwarded. If not defined, the default time frame
 is 60 seconds. When one or both of these limits is exceeded for a request,
diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index cfb20b226..5513f99a2 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -236,7 +236,6 @@ tfw_http_send_resp(TfwHttpReq *req, TfwStr *msg, const TfwStr *date)
 	tfw_http_prep_date(date->ptr);
 	tfw_http_msg_write(&it, hmresp, msg);
 
-	tfw_http_resp_init_ss_flags((TfwHttpResp *)hmresp, req);
 	tfw_http_resp_fwd(req, (TfwHttpResp *)hmresp);
 
 	return 0;
@@ -389,6 +388,26 @@ tfw_http_send_504(TfwHttpReq *req, const char *source, const char *reason)
 	return tfw_http_send_resp(req, &rh, __TFW_STR_CH(&rh, 1));
 }
 
+/*
+ * SKB data is needed for calculation of a cache key from fields of
+ * a request. It's also needed when a request may need to be re-sent.
+ * In all other cases it can just be passed to the network layer.
+ */
+static inline void
+tfw_http_req_init_ss_flags(TfwSrvConn *srv_conn, TfwHttpReq *req)
+{
+	TfwSrvGroup *sg = ((TfwServer *)(srv_conn->peer))->sg;
+	if (tfw_cache_msg_cacheable(req) || (req->retries < sg->max_refwd))
+		((TfwMsg *)req)->ss_flags |= SS_F_KEEP_SKB;
+}
+
+static inline void
+tfw_http_resp_init_ss_flags(TfwHttpResp *resp, const TfwHttpReq *req)
+{
+	if (req->flags & TFW_HTTP_CONN_CLOSE)
+		((TfwMsg *)resp)->ss_flags |= SS_F_CONN_CLOSE;
+}
+
 /*
  * Check if a request is non-idempotent.
  */
@@ -502,6 +521,24 @@ tfw_http_conn_need_fwd(TfwSrvConn *srv_conn)
 		&& !tfw_http_conn_drained(srv_conn));
 }
 
+/*
+ * Get the request that is previous to @srv_conn->msg_sent.
+ */
+static inline TfwMsg *
+__tfw_http_conn_msg_sent_prev(TfwSrvConn *srv_conn)
+{
+	TfwHttpReq *req_sent = (TfwHttpReq *)srv_conn->msg_sent;
+
+	BUG_ON(!req_sent);
+	/*
+	 * There is list_is_last() function in the Linux kernel,
+	 * but there is no list_is_first(). The condition below
+	 * is an implementation of list_is_first().
+	 */
+	return (srv_conn->fwd_queue.next == &req_sent->fwd_list) ?
+		NULL : (TfwMsg *)list_prev_entry(req_sent, fwd_list);
+}
+
 /*
  * Remove @req from the server connection's forwarding queue.
  */
@@ -620,6 +657,7 @@ tfw_http_req_fwd_send(TfwSrvConn *srv_conn, TfwServer *srv,
 		      TfwHttpReq *req, struct list_head *equeue)
 {
 	req->jtxtstamp = jiffies;
+	tfw_http_req_init_ss_flags(srv_conn, req);
 
 	if (tfw_connection_send((TfwConn *)srv_conn, (TfwMsg *)req)) {
 		TFW_DBG2("%s: Forwarding error: conn=[%p] req=[%p]\n",
@@ -778,18 +816,11 @@ tfw_http_conn_treatnip(TfwSrvConn *srv_conn, struct list_head *equeue)
 	TfwServer *srv = (TfwServer *)srv_conn->peer;
 	TfwHttpReq *req_sent = (TfwHttpReq *)srv_conn->msg_sent;
 
-	if (req_sent && tfw_http_req_is_nip(req_sent)
+	if (tfw_http_conn_on_hold(srv_conn)
 	    && likely(!(srv->sg->flags & TFW_SRV_RETRY_NIP)))
 	{
 		BUG_ON(list_empty(&req_sent->nip_list));
-		/*
-		 * There's list_is_last() function in the Linux kernel,
-		 * but there's no list_is_first. The condition that is
-		 * checked in an implementation of list_is_first().
-		 */
-		srv_conn->msg_sent =
-			(srv_conn->fwd_queue.next == &req_sent->fwd_list) ?
-			NULL : (TfwMsg *)list_prev_entry(req_sent, fwd_list);
+		srv_conn->msg_sent = __tfw_http_conn_msg_sent_prev(srv_conn);
 		tfw_http_req_move2equeue(srv_conn, req_sent, equeue,
 					 504, s_reason_nip);
 	}
@@ -799,7 +830,7 @@ tfw_http_conn_treatnip(TfwSrvConn *srv_conn, struct list_head *equeue)
  * Re-forward requests in a server connection. Requests that exceed
  * the set limits are evicted.
  */
-static TfwHttpReq *
+static TfwMsg *
 tfw_http_conn_resend(TfwSrvConn *srv_conn, bool first, struct list_head *equeue)
 {
 	TfwHttpReq *req, *tmp, *req_resent = NULL;
@@ -830,25 +861,7 @@ tfw_http_conn_resend(TfwSrvConn *srv_conn, bool first, struct list_head *equeue)
 			break;
 	}
 
-	return req_resent;
-}
-
-/*
- * Re-send only the first unanswered request in the forwarding queue.
- */
-static inline TfwHttpReq *
-tfw_http_conn_resend_first(TfwSrvConn *srv_conn, struct list_head *equeue)
-{
-	return tfw_http_conn_resend(srv_conn, true, equeue);
-}
-
-/*
- * Re-send all unanswered requests in the forwarding queue.
- */
-static inline TfwHttpReq *
-tfw_http_conn_resend_all(TfwSrvConn *srv_conn, struct list_head *equeue)
-{
-	return tfw_http_conn_resend(srv_conn, false, equeue);
+	return (TfwMsg *)req_resent;
 }
 
 /*
@@ -870,7 +883,7 @@ __tfw_http_conn_fwd_repair(TfwSrvConn *srv_conn, struct list_head *equeue)
 		tfw_http_conn_fwd_unsent(srv_conn, equeue);
 	} else {
 		/*
-		 * After all previously forwarded requests are re-sent,
+		 * Resend all previously forwarded requests. After that
 		 * @srv_conn->msg_sent will be either NULL or the last
 		 * request that was re-sent successfully. If re-sending
 		 * of non-idempotent requests is allowed, then that last
@@ -878,12 +891,10 @@ __tfw_http_conn_fwd_repair(TfwSrvConn *srv_conn, struct list_head *equeue)
 		 * requests that were never forwarded only if the last
 		 * request that was re-sent was NOT non-idempotent.
 		 */
-		TfwHttpReq *req_resent = (TfwHttpReq *)srv_conn->msg_sent;
-		if (req_resent) {
-			req_resent = tfw_http_conn_resend_all(srv_conn, equeue);
-			srv_conn->msg_sent = (TfwMsg *)req_resent;
-		}
-		if (!(req_resent && tfw_http_req_is_nip(req_resent))) {
+		if (srv_conn->msg_sent)
+			srv_conn->msg_sent =
+				tfw_http_conn_resend(srv_conn, false, equeue);
+		if (!tfw_http_conn_on_hold(srv_conn)) {
 			set_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
 			tfw_http_conn_fwd_unsent(srv_conn, equeue);
 		}
@@ -921,10 +932,41 @@ tfw_http_conn_resched(TfwSrvConn *srv_conn, struct list_head *equeue)
 	/* Treat a non-idempotent request if any. */
 	tfw_http_conn_treatnip(srv_conn, equeue);
 
-	/* Process complete queue. */
+	/*
+	 * The assumption is that the forwarding queue is processed
+	 * in one pass. There's no need to maintain the correct value
+	 * of @srv_conn->msg_sent in each loop iteration.
+	 *
+	 * Note: The limit on re-forward attempts is checked against
+	 * the maximum value for the current server group. Then the
+	 * request is placed in another connection in the same group.
+	 * It's essential that all servers in a group have the same
+	 * limit. Otherwise, it will be necessary to check requests
+	 * for eviction _after_ a new connection is found.
+	 */
+	/*
+	 * Evict requests with depleted number of re-send attempts. Do it
+	 * for requests that were sent before. Don't touch unsent requests.
+	 */
+	if (srv_conn->msg_sent) {
+		struct list_head *end =
+			((TfwHttpReq *)srv_conn->msg_sent)->fwd_list.next;
+		req = list_first_entry(fwd_queue, TfwHttpReq, fwd_list);
+
+		/* Similar to list_for_each_entry_safe_from() */
+		for (tmp = list_next_entry(req, fwd_list);
+		     &req->fwd_list != end;
+		     req = tmp, tmp = list_next_entry(tmp, fwd_list))
+		{
+			tfw_http_req_evict_retries(srv_conn, srv, req, equeue);
+		}
+	}
+
+	/*
+	 * Process the complete forwarding queue and re-schedule all
+	 * reguests to other servers/connections.
+	 */
 	list_for_each_entry_safe(req, tmp, fwd_queue, fwd_list) {
-		if (tfw_http_req_evict_retries(srv_conn, srv, req, equeue))
-			continue;
 		if (!(sch_conn = tfw_sched_get_srv_conn((TfwMsg *)req))) {
 			TFW_WARN("Unable to find a backend server\n");
 			tfw_http_req_move2equeue(srv_conn, req, equeue,
@@ -936,8 +978,18 @@ tfw_http_conn_resched(TfwSrvConn *srv_conn, struct list_head *equeue)
 		tfw_srv_conn_put(sch_conn);
 	}
 	BUG_ON(srv_conn->qsize);
+	srv_conn->msg_sent = NULL;
 }
 
+/*
+ * Process complete forwarding queue and evict requests that timed out.
+ *
+ * First, process unanswered requests that were forwarded to the server,
+ * not including the request that was sent last. Then, process that last
+ * request that was sent, and reassign @srv_conn->msg_sent in case it is
+ * evicted. Finally, process the rest of the forwarding queue. Those are
+ * the requests that were never forwarded yet.
+ */
 static inline void
 tfw_http_conn_evict_timeout(TfwSrvConn *srv_conn, struct list_head *equeue)
 {
@@ -947,8 +999,32 @@ tfw_http_conn_evict_timeout(TfwSrvConn *srv_conn, struct list_head *equeue)
 
 	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
 
-	/* Process complete queue and evict requests that timed out. */
-	list_for_each_entry_safe(req, tmp, fwd_queue, fwd_list)
+	if (srv_conn->msg_sent) {
+		TfwMsg *msg_sent_prev;
+		struct list_head *end =
+			&((TfwHttpReq *)srv_conn->msg_sent)->fwd_list;
+		req = list_first_entry(fwd_queue, TfwHttpReq, fwd_list);
+
+		/* Similar to list_for_each_entry_safe_from() */
+		for (tmp = list_next_entry(req, fwd_list);
+		     &req->fwd_list != end;
+		     req = tmp, tmp = list_next_entry(tmp, fwd_list))
+		{
+			tfw_http_req_evict_timeout(srv_conn, srv, req, equeue);
+		}
+
+		/* Process the request that was forwarded last. */
+		msg_sent_prev = __tfw_http_conn_msg_sent_prev(srv_conn);
+		if (tfw_http_req_evict_timeout(srv_conn, srv, req, equeue))
+			srv_conn->msg_sent = msg_sent_prev;
+	}
+
+	/* Process the rest of the forwarding queue. */
+	req = srv_conn->msg_sent
+	    ? list_next_entry((TfwHttpReq *)srv_conn->msg_sent, fwd_list)
+	    : list_first_entry(fwd_queue, TfwHttpReq, fwd_list);
+
+	list_for_each_entry_safe_from(req, tmp, fwd_queue, fwd_list)
 		tfw_http_req_evict_timeout(srv_conn, srv, req, equeue);
 }
 
@@ -975,7 +1051,6 @@ static void
 tfw_http_conn_repair(TfwConn *conn)
 {
 	TfwSrvConn *srv_conn = (TfwSrvConn *)conn;
-	TfwHttpReq *req_resent = NULL;
 	LIST_HEAD(equeue);
 
 	TFW_DBG2("%s: conn=[%p]\n", __func__, srv_conn);
@@ -1000,14 +1075,12 @@ tfw_http_conn_repair(TfwConn *conn)
 	spin_lock(&srv_conn->fwd_qlock);
 	/* Treat a non-idempotent request if any. */
 	tfw_http_conn_treatnip(srv_conn, &equeue);
-	/* Re-send the first unanswered request. */
-	if (srv_conn->msg_sent) {
-		req_resent = tfw_http_conn_resend_first(srv_conn, &equeue);
-		if (unlikely(!req_resent))
+	/* Re-send only the first unanswered request. */
+	if (srv_conn->msg_sent)
+		if (unlikely(!tfw_http_conn_resend(srv_conn, true, &equeue)))
 			srv_conn->msg_sent = NULL;
-	}
 	/* If none re-sent, then send the remaining unsent requests. */
-	if (!req_resent) {
+	if (!srv_conn->msg_sent) {
 		set_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
 		tfw_http_conn_fwd_unsent(srv_conn, &equeue);
 	}
@@ -1461,8 +1534,6 @@ tfw_http_adjust_req(TfwHttpReq *req)
 	int r;
 	TfwHttpMsg *hm = (TfwHttpMsg *)req;
 
-	tfw_http_req_init_ss_flags(req);
-
 	r = tfw_http_add_x_forwarded_for(hm);
 	if (r)
 		return r;
@@ -1487,8 +1558,6 @@ tfw_http_adjust_resp(TfwHttpResp *resp, TfwHttpReq *req)
 	int r, conn_flg = req->flags & __TFW_HTTP_CONN_MASK;
 	TfwHttpMsg *hm = (TfwHttpMsg *)resp;
 
-	tfw_http_resp_init_ss_flags(resp, req);
-
 	r = tfw_http_sess_resp_process(resp, req);
 	if (r < 0)
 		return r;
@@ -1544,6 +1613,7 @@ __tfw_http_resp_fwd(TfwCliConn *cli_conn, struct list_head *ret_queue)
 
 	list_for_each_entry_safe(req, tmp, ret_queue, msg.seq_list) {
 		BUG_ON(!req->resp);
+		tfw_http_resp_init_ss_flags((TfwHttpResp *)req->resp, req);
 		if (tfw_cli_conn_send(cli_conn, (TfwMsg *)req->resp)) {
 			ss_close_sync(cli_conn->sk, true);
 			return;
diff --git a/tempesta_fw/http.h b/tempesta_fw/http.h
index 5a3fdf87e..cbc53976d 100644
--- a/tempesta_fw/http.h
+++ b/tempesta_fw/http.h
@@ -433,26 +433,6 @@ tfw_current_timestamp(void)
 	return ts.tv_sec;
 }
 
-/*
- * SKB data is needed only for calculation of a cache key from request
- * fields. In all other cases it can just be passed to the network layer.
- */
-extern bool tfw_cache_msg_cacheable(TfwHttpReq *req);
-
-static inline void
-tfw_http_req_init_ss_flags(TfwHttpReq *req)
-{
-	if (tfw_cache_msg_cacheable(req))
-		((TfwMsg *)req)->ss_flags |= SS_F_KEEP_SKB;
-}
-
-static inline void
-tfw_http_resp_init_ss_flags(TfwHttpResp *resp, const TfwHttpReq *req)
-{
-	if (req->flags & TFW_HTTP_CONN_CLOSE)
-		((TfwMsg *)resp)->ss_flags |= SS_F_CONN_CLOSE;
-}
-
 typedef void (*tfw_http_cache_cb_t)(TfwHttpReq *, TfwHttpResp *);
 
 /* Internal (parser) HTTP functions. */
diff --git a/tempesta_fw/http_sess.c b/tempesta_fw/http_sess.c
index acb5de1b8..26b9bc4db 100644
--- a/tempesta_fw/http_sess.c
+++ b/tempesta_fw/http_sess.c
@@ -128,7 +128,6 @@ tfw_http_sticky_send_302(TfwHttpReq *req, StickyVal *sv)
 	if (tfw_http_prep_302(hmresp, req, &cookie))
 		return -1;
 
-	tfw_http_resp_init_ss_flags((TfwHttpResp *)hmresp, req);
 	tfw_http_resp_fwd(req, (TfwHttpResp *)hmresp);
 
 	return 0;
diff --git a/tempesta_fw/server.h b/tempesta_fw/server.h
index b9c3164ae..400f05393 100644
--- a/tempesta_fw/server.h
+++ b/tempesta_fw/server.h
@@ -60,8 +60,8 @@ typedef struct {
  * @sched	- requests scheduling handler;
  * @sched_data	- private scheduler data for the server group;
  * @max_qsize	- maximum queue size of a server connection;
- * @max_jqage	- maximum age of a request in a server connection, in jiffies;
  * @max_refwd	- maximum number of tries for forwarding a request;
+ * @max_jqage	- maximum age of a request in a server connection, in jiffies;
  * @max_recns	- maximum number of reconnect attempts;
  * @flags	- server group related flags;
  * @name	- name of the group specified in the configuration;

From 69a5a2fdbc87160e599e1a1b627dccf199e3d80c Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Tue, 28 Feb 2017 01:36:22 +0300
Subject: [PATCH 62/65] Add a TODO comment; retract the description of zero
 forward_retries.

---
 README.md          | 26 +++++++-------------------
 tempesta_fw/http.c |  7 +++++++
 2 files changed, 14 insertions(+), 19 deletions(-)

diff --git a/README.md b/README.md
index cb702c55e..8b6edca74 100644
--- a/README.md
+++ b/README.md
@@ -410,7 +410,7 @@ which the server connection is considered dead. It is defined as follows:
 server_connect_retries <N>;
 ```
 If this directive is not defined, then the number of re-connect attempts
-defaults to 10. A value of zero specified for `N` means unlimited number
+defaults to 10. The value of zero specified for `N` means unlimited number
 of attempts.
 
 This is an important directive which controls how Tempesta deals with
@@ -427,29 +427,17 @@ certain allowed limits before these requests are considered failed:
 server_forward_retries <N>;
 server_forward_timeout <N>;
 ```
+
 `server_forward_retries` sets the maximum number of attempts to re-forward
 a request to a server. If not defined, the default number of attempts is 5.
-The value of zero prohibits re-forwarding of requests completely.
-
-Note that this value affects certain aspects of Tempesta when forwarding
-requests to a server. If a request may be re-sent, then the request's
-data have to be kept around, and only a copy of the data can be sent out.
-If a request may NOT be re-sent to a server, then the data can be sent
-out as is without copying. Copying of data of each request for a chance
-that the request may be re-sent may get quite expensive and may affect
-performance.
-
-If connections with back end servers are stable and are not reset every
-so often, then it would make sense to prohibit re-forwarding of requests
-for better performance. Otherwise, if may be useful to permit re-forwarding
-of requests. That would significantly decrease the number of error responses
-due to temporary unavailability of a live connection to a back end server.
-
+The value of zero specified for `N` means unlimited number of attempts.
 
 `server_forward_timeout` set the maximum time frame in seconds within which
 a request may still be forwarded. If not defined, the default time frame
-is 60 seconds. When one or both of these limits is exceeded for a request,
-the request is evicted and an error is returned to a client.
+is 60 seconds. The value of zero specified for `N` means unlimited timeout.
+
+When one or both of these limits is exceeded for a request, the request is
+evicted and an error is returned to a client.
 
 When re-forwarding or re-scheduling requests in a failed server connection,
 a special consideration is given to non-idempotent requests. Usually
diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 5513f99a2..731290cbc 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -651,6 +651,13 @@ tfw_http_req_evict_retries(TfwSrvConn *srv_conn, TfwServer *srv,
 /*
  * If forwarding of @req to server @srv_conn is not successful, then
  * move it to the error queue @equeue for sending an error response later.
+ *
+ * TODO: Perhaps, there's a small optimization. Ultimately, the thread
+ * ends up in ss_send(). In some cases a connection is still active when
+ * it's obtained, but not active by the time the thread is in ss_send().
+ * In that case -EBADF is returned, and nothing destructive happens to
+ * the request. So, perhaps, instead of sending an error in that case
+ * these unlucky requests can be re-sent when the connection is restored.
  */
 static inline bool
 tfw_http_req_fwd_send(TfwSrvConn *srv_conn, TfwServer *srv,

From 6969c7601ba73f2b883d6771fb1f675b4d69ed95 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Tue, 28 Feb 2017 02:26:46 +0300
Subject: [PATCH 63/65] Remove erroneous response free() in popreq().

---
 tempesta_fw/http.c     | 2 --
 tempesta_fw/sock_srv.c | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 731290cbc..b6003db18 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -2199,8 +2199,6 @@ tfw_http_popreq(TfwHttpMsg *hmresp)
 		spin_unlock(&srv_conn->fwd_qlock);
 		TFW_WARN("Paired request missing, "
 			 "HTTP Response Splitting attack?\n");
-		/* @conn->msg will get NULLed in the process. */
-		tfw_http_conn_msg_free(hmresp);
 		TFW_INC_STAT_BH(serv.msgs_otherr);
 		return NULL;
 	}
diff --git a/tempesta_fw/sock_srv.c b/tempesta_fw/sock_srv.c
index db593e268..25691d384 100644
--- a/tempesta_fw/sock_srv.c
+++ b/tempesta_fw/sock_srv.c
@@ -367,9 +367,9 @@ tfw_sock_srv_connect_failover(struct sock *sk)
 	 * failover state.
 	 */
 	if (tfw_connection_live(conn)) {
+		TFW_INC_STAT_BH(serv.conn_disconnects);
 		tfw_connection_put_to_death(conn);
 		tfw_connection_drop(conn);
-		TFW_INC_STAT_BH(serv.conn_disconnects);
 	} else {
 		tfw_connection_get(conn);
 	}

From 50ade60fdbf027ca81ad18e9e14b7e926a2d54ea Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Wed, 1 Mar 2017 11:48:07 +0300
Subject: [PATCH 64/65] Add the number of schedulable connections to perfstat.

---
 README.md            | 31 ++++++++++++++++++++++---------
 tempesta_fw/http.c   | 26 +++++++++++++++++++-------
 tempesta_fw/procfs.c | 10 ++++++++--
 tempesta_fw/procfs.h |  1 +
 tempesta_fw/sock.c   |  2 +-
 5 files changed, 51 insertions(+), 19 deletions(-)

diff --git a/README.md b/README.md
index 8b6edca74..1cdbccf30 100644
--- a/README.md
+++ b/README.md
@@ -439,6 +439,10 @@ is 60 seconds. The value of zero specified for `N` means unlimited timeout.
 When one or both of these limits is exceeded for a request, the request is
 evicted and an error is returned to a client.
 
+Note that while requests in a connection are re-forwarded or re-scheduled,
+that connection is not schedulable, which means it's not available to
+schedulers for new incoming requests.
+
 When re-forwarding or re-scheduling requests in a failed server connection,
 a special consideration is given to non-idempotent requests. Usually
 a non-idempotent request is not re-forwarded or re-scheduled. That may be
@@ -804,22 +808,31 @@ and running. Below is an example of the command to show the statistics,
 and the output:
 ```
 $ cat /proc/tempesta/perfstat
-Client messages received                : 450
-Client messages forwarded               : 450
+SS pfl hits                             : 5836412
+SS pfl misses                           : 5836412
+Cache hits                              : 0
+Cache misses                            : 0
+Client messages received                : 2918206
+Client messages forwarded               : 2918206
+Client messages served from cache       : 0
 Client messages parsing errors          : 0
 Client messages filtered out            : 0
 Client messages other errors            : 0
-Client connections total                : 30
+Clients online                          : 0
+Client connection attempts              : 2048
+Client established connections          : 2048
 Client connections active               : 0
-Client RX bytes                         : 47700
-Server messages received                : 447
-Server messages forwarded               : 447
+Client RX bytes                         : 309329836
+Server messages received                : 2918206
+Server messages forwarded               : 2918206
 Server messages parsing errors          : 0
 Server messages filtered out            : 0
 Server messages other errors            : 0
-Server connections total                : 2220
-Server connections active               : 4
-Server RX bytes                         : 153145
+Server connection attempts              : 8896
+Server established connections          : 8896
+Server connections active               : 32
+Server connections schedulable          : 32
+Server RX bytes                         : 11494813434
 ```
 
 Also, there's Application Performance Monitoring statistics. These stats show
diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index b6003db18..6dd14fd3c 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -885,7 +885,10 @@ __tfw_http_conn_fwd_repair(TfwSrvConn *srv_conn, struct list_head *equeue)
 
 	if (list_empty(&srv_conn->fwd_queue)) {
 		clear_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
-		clear_bit(TFW_CONN_B_RESEND, &srv_conn->flags);
+		if (test_bit(TFW_CONN_B_RESEND, &srv_conn->flags)) {
+			TFW_DEC_STAT_BH(serv.conn_restricted);
+			clear_bit(TFW_CONN_B_RESEND, &srv_conn->flags);
+		}
 	} else if (test_bit(TFW_CONN_B_QFORWD, &srv_conn->flags)) {
 		tfw_http_conn_fwd_unsent(srv_conn, equeue);
 	} else {
@@ -1019,8 +1022,10 @@ tfw_http_conn_evict_timeout(TfwSrvConn *srv_conn, struct list_head *equeue)
 		{
 			tfw_http_req_evict_timeout(srv_conn, srv, req, equeue);
 		}
-
-		/* Process the request that was forwarded last. */
+		/*
+		 * Process the request that was forwarded last.
+		 * @req is now the same as @srv_conn->msg_sent.
+		 */
 		msg_sent_prev = __tfw_http_conn_msg_sent_prev(srv_conn);
 		if (tfw_http_req_evict_timeout(srv_conn, srv, req, equeue))
 			srv_conn->msg_sent = msg_sent_prev;
@@ -1092,6 +1097,7 @@ tfw_http_conn_repair(TfwConn *conn)
 		tfw_http_conn_fwd_unsent(srv_conn, &equeue);
 	}
 	spin_unlock(&srv_conn->fwd_qlock);
+
 zap_error:
 	if (!list_empty(&equeue))
 		tfw_http_req_zap_error(&equeue, s_source_proxy);
@@ -1196,8 +1202,10 @@ tfw_http_conn_init(TfwConn *conn)
 
 	if (TFW_CONN_TYPE(conn) & Conn_Srv) {
 		TfwSrvConn *srv_conn = (TfwSrvConn *)conn;
-		if (!list_empty(&srv_conn->fwd_queue))
+		if (!list_empty(&srv_conn->fwd_queue)) {
 			set_bit(TFW_CONN_B_RESEND, &srv_conn->flags);
+			TFW_INC_STAT_BH(serv.conn_restricted);
+		}
 		clear_bit(TFW_CONN_B_FAULTY, &srv_conn->flags);
 	}
 	tfw_gfsm_state_init(&conn->state, conn, TFW_HTTP_FSM_INIT);
@@ -1234,7 +1242,10 @@ tfw_http_conn_release(TfwConn *conn)
 
 	if (likely(ss_active())) {
 		clear_bit(TFW_CONN_B_QFORWD, &srv_conn->flags);
-		clear_bit(TFW_CONN_B_RESEND, &srv_conn->flags);
+		if (test_bit(TFW_CONN_B_RESEND, &srv_conn->flags)) {
+			TFW_DEC_STAT_BH(serv.conn_restricted);
+			clear_bit(TFW_CONN_B_RESEND, &srv_conn->flags);
+		}
 		return;
 	}
 
@@ -1673,6 +1684,7 @@ tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
 	 */
 	spin_lock(&cli_conn->seq_qlock);
 	if (unlikely(list_empty(seq_queue))) {
+		BUG_ON(!list_empty(&req->msg.seq_list));
 		spin_unlock(&cli_conn->seq_qlock);
 		TFW_DBG2("%s: The client's request missing: conn=[%p]\n",
 			 __func__, cli_conn);
@@ -1684,7 +1696,7 @@ tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp)
 	}
 	BUG_ON(list_empty(&req->msg.seq_list));
 	req->resp = (TfwHttpMsg *)resp;
-	/* Move consecutive requests with @req->resp to @req_retent. */
+	/* Move consecutive requests with @req->resp to @ret_queue. */
 	list_for_each_entry(req, seq_queue, msg.seq_list) {
 		if (req->resp == NULL)
 			break;
@@ -1811,7 +1823,7 @@ tfw_http_req_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)
 	 * all subsequent session hits are scheduled much faster.
 	 */
 	if (!(srv_conn = tfw_sched_get_srv_conn((TfwMsg *)req))) {
-		TFW_WARN("Unable to find a backend server\n");
+		TFW_WARN("Unable to find a back end server\n");
 		goto send_502;
 	}
 
diff --git a/tempesta_fw/procfs.c b/tempesta_fw/procfs.c
index f3e4e1975..ab37bedf6 100644
--- a/tempesta_fw/procfs.c
+++ b/tempesta_fw/procfs.c
@@ -74,6 +74,7 @@ tfw_perfstat_collect(TfwPerfStat *stat)
 		SADD(serv.conn_attempts);
 		SADD(serv.conn_disconnects);
 		SADD(serv.conn_established);
+		SADD(serv.conn_restricted);
 		SADD(serv.rx_bytes);
 	}
 #undef SADD
@@ -86,6 +87,7 @@ tfw_perfstat_seq_show(struct seq_file *seq, void *off)
 #define SPRN(m, c)	seq_printf(seq, m": %llu\n", stat.c)
 
 	TfwPerfStat stat;
+	u64 serv_conn_active, serv_conn_sched;
 
 	memset(&stat, 0, sizeof(stat));
 	tfw_perfstat_collect(&stat);
@@ -113,6 +115,10 @@ tfw_perfstat_seq_show(struct seq_file *seq, void *off)
 	SPRN("Client RX bytes\t\t\t\t", clnt.rx_bytes);
 
 	/* Server related statistics. */
+	serv_conn_active = stat.serv.conn_established
+			   - stat.serv.conn_disconnects;
+	serv_conn_sched = serv_conn_active - stat.serv.conn_restricted;
+
 	SPRN("Server messages received\t\t", serv.rx_messages);
 	SPRN("Server messages forwarded\t\t", serv.msgs_forwarded);
 	SPRN("Server messages parsing errors\t\t", serv.msgs_parserr);
@@ -120,8 +126,8 @@ tfw_perfstat_seq_show(struct seq_file *seq, void *off)
 	SPRN("Server messages other errors\t\t", serv.msgs_otherr);
 	SPRN("Server connection attempts\t\t", serv.conn_attempts);
 	SPRN("Server established connections\t\t", serv.conn_established);
-	SPRNE("Server connections active\t\t",
-	      stat.serv.conn_established - stat.serv.conn_disconnects);
+	SPRNE("Server connections active\t\t", serv_conn_active);
+	SPRNE("Server connections schedulable\t\t", serv_conn_sched);
 	SPRN("Server RX bytes\t\t\t\t", serv.rx_bytes);
 
 	return 0;
diff --git a/tempesta_fw/procfs.h b/tempesta_fw/procfs.h
index 3cd28f871..9d0103835 100644
--- a/tempesta_fw/procfs.h
+++ b/tempesta_fw/procfs.h
@@ -61,6 +61,7 @@ typedef struct {
 
 typedef struct {
 	TFW_STAT_COMMON;
+	u64	conn_restricted;
 } TfwSrvStat;
 
 /*
diff --git a/tempesta_fw/sock.c b/tempesta_fw/sock.c
index d0eae7a82..39f4b0010 100644
--- a/tempesta_fw/sock.c
+++ b/tempesta_fw/sock.c
@@ -311,7 +311,7 @@ ss_send(struct sock *sk, SsSkbList *skb_list, int flags)
 	 * avoid expensive work queue operations.
 	 */
 	if (unlikely(!ss_sock_active(sk))) {
-		SS_DBG("Try to send on inactive socket %p\n", sk);
+		SS_DBG("Attempt to send on inactive socket %p\n", sk);
 		return -EBADF;
 	}
 

From 8273de0295f64d74b88695741f4fac70344f5fd7 Mon Sep 17 00:00:00 2001
From: Aleksey Baulin <ab@natsys-lab.com>
Date: Thu, 2 Mar 2017 20:38:10 +0300
Subject: [PATCH 65/65] Minor changes according to the latest code review.

- Eliminate the "source" part of an error message. Not needed now.
- Eliminate static global constant strings. Use strings in place.
- Rename tfw_http_req_move2equeue() to a shorter tfw_http_req_error().
- tfw_http_req_init_ss_flags() now set to copy all SKBs. Make a TODO.
- Better comment to tfw_http_conn_evict_timeout().
---
 tempesta_fw/cache.c                   |  16 ++--
 tempesta_fw/http.c                    | 125 +++++++++++++-------------
 tempesta_fw/http.h                    |   8 +-
 tempesta_fw/t/unit/test_http_sticky.c |   3 +-
 4 files changed, 71 insertions(+), 81 deletions(-)

diff --git a/tempesta_fw/cache.c b/tempesta_fw/cache.c
index cbc498dc8..6f6857adb 100644
--- a/tempesta_fw/cache.c
+++ b/tempesta_fw/cache.c
@@ -41,8 +41,6 @@
 #warning "Please set CONFIG_NODES_SHIFT to less than 16"
 #endif
 
-extern const char *s_source_cache;
-
 /* Flags stored in a Cache Entry. */
 #define TFW_CE_MUST_REVAL	0x0001		/* MUST revalidate if stale. */
 
@@ -937,14 +935,12 @@ tfw_cache_purge_method(TfwHttpReq *req)
 
 	/* Deny PURGE requests by default. */
 	if (!(cache_cfg.cache && vhost->cache_purge && vhost->cache_purge_acl))
-		return tfw_http_send_403(req, s_source_cache,
-					 "purge: not configured");
+		return tfw_http_send_403(req, "purge: not configured");
 
 	/* Accept requests from configured hosts only. */
 	ss_getpeername(req->conn->sk, &saddr);
 	if (!tfw_capuacl_match(vhost, &saddr))
-		return tfw_http_send_403(req, s_source_cache,
-					 "purge: ACL violation");
+		return tfw_http_send_403(req, "purge: ACL violation");
 
 	/* Only "invalidate" option is implemented at this time. */
 	switch (vhost->cache_purge_mode) {
@@ -952,13 +948,11 @@ tfw_cache_purge_method(TfwHttpReq *req)
 		ret = tfw_cache_purge_invalidate(req);
 		break;
 	default:
-		return tfw_http_send_403(req, s_source_cache,
-					 "purge: invalid option");
+		return tfw_http_send_403(req, "purge: invalid option");
 	}
 
 	return ret
-		? tfw_http_send_404(req, s_source_cache,
-				    "purge: processing error")
+		? tfw_http_send_404(req, "purge: processing error")
 		: tfw_http_send_200(req);
 }
 
@@ -1214,7 +1208,7 @@ cache_req_process_node(TfwHttpReq *req, tfw_http_cache_cb_t action)
 		resp->flags |= TFW_HTTP_RESP_STALE;
 out:
 	if (!resp && (req->cache_ctl.flags & TFW_HTTP_CC_OIFCACHED))
-		tfw_http_send_504(req, s_source_cache, "resource not cached");
+		tfw_http_send_504(req, "resource not cached");
 	else
 		action(req, resp);
 
diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c
index 6dd14fd3c..f8cc641cf 100644
--- a/tempesta_fw/http.c
+++ b/tempesta_fw/http.c
@@ -62,21 +62,6 @@ int ghprio; /* GFSM hook priority. */
 #define S_H_CONN_KA		S_F_CONNECTION S_V_CONN_KA S_CRLFCRLF
 #define S_H_CONN_CLOSE		S_F_CONNECTION S_V_CONN_CLOSE S_CRLFCRLF
 
-const char *s_source_proxy = "proxy forward";
-const char *s_source_cache = "proxy cache";
-
-static const char *s_reason_evict_timeout = "request evicted: timed out";
-static const char *s_reason_evict_retries = "request evicted: the number"
-					    " of retries exceeded";
-static const char *s_reason_fwd = "request dropped: forwarding error";
-static const char *s_reason_nip = "request dropped: non-idempotent requests"
-				  "are not re-forwarded or re-scheduled";
-static const char *s_reason_sched = "request dropped: unable to find "
-				    "an available back end server";
-static const char *s_reason_req_common = "request dropped: processing error";
-static const char *s_reason_resp_common = "response dropped: processing error";
-static const char *s_reason_resp_filter = "response dropped: filtered out";
-
 /*
  * Prepare current date in the format required for HTTP "Date:"
  * header field. See RFC 2616 section 3.3.
@@ -271,7 +256,7 @@ tfw_http_send_200(TfwHttpReq *req)
  * HTTP 403 response: Access is forbidden.
  */
 int
-tfw_http_send_403(TfwHttpReq *req, const char *source, const char *reason)
+tfw_http_send_403(TfwHttpReq *req, const char *reason)
 {
 	TfwStr rh = {
 		.ptr = (TfwStr []){
@@ -284,7 +269,7 @@ tfw_http_send_403(TfwHttpReq *req, const char *source, const char *reason)
 		.flags = 4 << TFW_STR_CN_SHIFT
 	};
 
-	TFW_DBG("Send HTTP 403 response: %s: %s\n", source, reason);
+	TFW_DBG("Send HTTP 403 response: %s\n", reason);
 
 	return tfw_http_send_resp(req, &rh, __TFW_STR_CH(&rh, 1));
 }
@@ -295,7 +280,7 @@ tfw_http_send_403(TfwHttpReq *req, const char *source, const char *reason)
  * HTTP 404 response: Tempesta is unable to find the requested data.
  */
 int
-tfw_http_send_404(TfwHttpReq *req, const char *source, const char *reason)
+tfw_http_send_404(TfwHttpReq *req, const char *reason)
 {
 	TfwStr rh = {
 		.ptr = (TfwStr []){
@@ -308,7 +293,7 @@ tfw_http_send_404(TfwHttpReq *req, const char *source, const char *reason)
 		.flags = 4 << TFW_STR_CN_SHIFT
 	};
 
-	TFW_DBG("Send HTTP 404 response: %s: %s\n", source, reason);
+	TFW_DBG("Send HTTP 404 response: %s\n", reason);
 
 	return tfw_http_send_resp(req, &rh, __TFW_STR_CH(&rh, 1));
 }
@@ -320,7 +305,7 @@ tfw_http_send_404(TfwHttpReq *req, const char *source, const char *reason)
  * the request to a server.
  */
 static int
-tfw_http_send_500(TfwHttpReq *req, const char *source, const char *reason)
+tfw_http_send_500(TfwHttpReq *req, const char *reason)
 {
 	TfwStr rh = {
 		.ptr = (TfwStr []){
@@ -333,7 +318,7 @@ tfw_http_send_500(TfwHttpReq *req, const char *source, const char *reason)
 		.flags = 4 << TFW_STR_CN_SHIFT
 	};
 
-	TFW_DBG("Send HTTP 500 response: %s: %s\n", source, reason);
+	TFW_DBG("Send HTTP 500 response: %s\n", reason);
 
 	return tfw_http_send_resp(req, &rh, __TFW_STR_CH(&rh, 1));
 }
@@ -345,7 +330,7 @@ tfw_http_send_500(TfwHttpReq *req, const char *source, const char *reason)
  * the designated server.
  */
 int
-tfw_http_send_502(TfwHttpReq *req, const char *source, const char *reason)
+tfw_http_send_502(TfwHttpReq *req, const char *reason)
 {
 	TfwStr rh = {
 		.ptr = (TfwStr []){
@@ -358,7 +343,7 @@ tfw_http_send_502(TfwHttpReq *req, const char *source, const char *reason)
 		.flags = 4 << TFW_STR_CN_SHIFT
 	};
 
-	TFW_DBG("Send HTTP 502 response: %s: %s:\n", source, reason);
+	TFW_DBG("Send HTTP 502 response: %s:\n", reason);
 
 	return tfw_http_send_resp(req, &rh, __TFW_STR_CH(&rh, 1));
 }
@@ -370,7 +355,7 @@ tfw_http_send_502(TfwHttpReq *req, const char *source, const char *reason)
  * the designated server.
  */
 int
-tfw_http_send_504(TfwHttpReq *req, const char *source, const char *reason)
+tfw_http_send_504(TfwHttpReq *req, const char *reason)
 {
 	TfwStr rh = {
 		.ptr = (TfwStr []){
@@ -383,7 +368,7 @@ tfw_http_send_504(TfwHttpReq *req, const char *source, const char *reason)
 		.flags = 4 << TFW_STR_CN_SHIFT
 	};
 
-	TFW_DBG("Send HTTP 504 response: %s: %s:\n", source, reason);
+	TFW_DBG("Send HTTP 504 response: %s:\n", reason);
 
 	return tfw_http_send_resp(req, &rh, __TFW_STR_CH(&rh, 1));
 }
@@ -392,13 +377,20 @@ tfw_http_send_504(TfwHttpReq *req, const char *source, const char *reason)
  * SKB data is needed for calculation of a cache key from fields of
  * a request. It's also needed when a request may need to be re-sent.
  * In all other cases it can just be passed to the network layer.
+ *
+ * However, at this time requests may always be re-sent in case of
+ * a connection failure. There's no option to prohibit re-sending.
+ * Thus, request's SKB can't be passed to the network layer until
+ * certain changes are implemented. For now there's no choice but
+ * make a copy of requests's SKBs in SS layer.
+ *
+ * TODO: Making a copy of each SKB _IS BAD_. See issues #391 and #488.
+ *
  */
 static inline void
 tfw_http_req_init_ss_flags(TfwSrvConn *srv_conn, TfwHttpReq *req)
 {
-	TfwSrvGroup *sg = ((TfwServer *)(srv_conn->peer))->sg;
-	if (tfw_cache_msg_cacheable(req) || (req->retries < sg->max_refwd))
-		((TfwMsg *)req)->ss_flags |= SS_F_KEEP_SKB;
+	((TfwMsg *)req)->ss_flags |= SS_F_KEEP_SKB;
 }
 
 static inline void
@@ -553,12 +545,13 @@ tfw_http_req_delist(TfwSrvConn *srv_conn, TfwHttpReq *req)
 /*
  * Common actions in case of an error while forwarding requests.
  * Erroneous requests are removed from the forwarding queue and placed
- * in @equeue. The error code for an error response is saved as well.
+ * in @equeue. The error code and the reason for an error response are
+ * saved as well.
  */
 static inline void
-tfw_http_req_move2equeue(TfwSrvConn *srv_conn, TfwHttpReq *req,
-			 struct list_head *equeue, unsigned short status,
-			 const char *reason)
+tfw_http_req_error(TfwSrvConn *srv_conn, TfwHttpReq *req,
+		   struct list_head *equeue, unsigned short status,
+		   const char *reason)
 {
 	tfw_http_req_delist(srv_conn, req);
 	list_add_tail(&req->fwd_list, equeue);
@@ -577,7 +570,7 @@ tfw_http_req_move2equeue(TfwSrvConn *srv_conn, TfwHttpReq *req,
  * request and then sent to the client in proper seq order.
  */
 static void
-tfw_http_req_zap_error(struct list_head *equeue, const char *source)
+tfw_http_req_zap_error(struct list_head *equeue)
 {
 	TfwHttpReq *req, *tmp;
 
@@ -588,21 +581,21 @@ tfw_http_req_zap_error(struct list_head *equeue, const char *source)
 		list_del_init(&req->fwd_list);
 		switch(req->status) {
 		case 404:
-			tfw_http_send_404(req, source, req->reason);
+			tfw_http_send_404(req, req->reason);
 			break;
 		case 500:
-			tfw_http_send_500(req, source, req->reason);
+			tfw_http_send_500(req, req->reason);
 			break;
 		case 502:
-			tfw_http_send_502(req, source, req->reason);
+			tfw_http_send_502(req, req->reason);
 			break;
 		case 504:
-			tfw_http_send_504(req, source, req->reason);
+			tfw_http_send_504(req, req->reason);
 			break;
 		default:
 			TFW_WARN("Unexpected response error code: [%d]\n",
 				 req->status);
-			tfw_http_send_500(req, source, req->reason);
+			tfw_http_send_500(req, req->reason);
 			break;
 		}
 		TFW_INC_STAT_BH(clnt.msgs_otherr);
@@ -623,8 +616,8 @@ tfw_http_req_evict_timeout(TfwSrvConn *srv_conn, TfwServer *srv,
 		TFW_DBG2("%s: Eviction: req=[%p] overdue=[%dms]\n",
 			 __func__, req,
 			 jiffies_to_msecs(jqage - srv->sg->max_jqage));
-		tfw_http_req_move2equeue(srv_conn, req, equeue,
-					 504, s_reason_evict_timeout);
+		tfw_http_req_error(srv_conn, req, equeue, 504,
+				   "request evicted: timed out");
 		return true;
 	}
 	return false;
@@ -641,8 +634,9 @@ tfw_http_req_evict_retries(TfwSrvConn *srv_conn, TfwServer *srv,
 	if (unlikely(req->retries++ >= srv->sg->max_refwd)) {
 		TFW_DBG2("%s: Eviction: req=[%p] retries=[%d]\n",
 			 __func__, req, req->retries);
-		tfw_http_req_move2equeue(srv_conn, req, equeue,
-					 504, s_reason_evict_retries);
+		tfw_http_req_error(srv_conn, req, equeue, 504,
+				   "request evicted: the number"
+				   " of retries exceeded");
 		return true;
 	}
 	return false;
@@ -669,8 +663,8 @@ tfw_http_req_fwd_send(TfwSrvConn *srv_conn, TfwServer *srv,
 	if (tfw_connection_send((TfwConn *)srv_conn, (TfwMsg *)req)) {
 		TFW_DBG2("%s: Forwarding error: conn=[%p] req=[%p]\n",
 			 __func__, srv_conn, req);
-		tfw_http_req_move2equeue(srv_conn, req, equeue,
-					 500, s_reason_fwd);
+		tfw_http_req_error(srv_conn, req, equeue, 500,
+				   "request dropped: forwarding error");
 		return false;
 	}
 	return true;
@@ -828,8 +822,9 @@ tfw_http_conn_treatnip(TfwSrvConn *srv_conn, struct list_head *equeue)
 	{
 		BUG_ON(list_empty(&req_sent->nip_list));
 		srv_conn->msg_sent = __tfw_http_conn_msg_sent_prev(srv_conn);
-		tfw_http_req_move2equeue(srv_conn, req_sent, equeue,
-					 504, s_reason_nip);
+		tfw_http_req_error(srv_conn, req_sent, equeue, 504,
+				   "request dropped: non-idempotent requests"
+				   " are not re-forwarded or re-scheduled");
 	}
 }
 
@@ -979,8 +974,9 @@ tfw_http_conn_resched(TfwSrvConn *srv_conn, struct list_head *equeue)
 	list_for_each_entry_safe(req, tmp, fwd_queue, fwd_list) {
 		if (!(sch_conn = tfw_sched_get_srv_conn((TfwMsg *)req))) {
 			TFW_WARN("Unable to find a backend server\n");
-			tfw_http_req_move2equeue(srv_conn, req, equeue,
-						 502, s_reason_sched);
+			tfw_http_req_error(srv_conn, req, equeue, 502,
+					   "request dropped: unable to find"
+					   " an available back end server");
 			continue;
 		}
 		tfw_http_req_delist(srv_conn, req);
@@ -994,11 +990,12 @@ tfw_http_conn_resched(TfwSrvConn *srv_conn, struct list_head *equeue)
 /*
  * Process complete forwarding queue and evict requests that timed out.
  *
- * First, process unanswered requests that were forwarded to the server,
- * not including the request that was sent last. Then, process that last
- * request that was sent, and reassign @srv_conn->msg_sent in case it is
- * evicted. Finally, process the rest of the forwarding queue. Those are
- * the requests that were never forwarded yet.
+ * - First, process unanswered requests that were forwarded to the server,
+ *   NOT including the request that was sent last.
+ * - Secondly, process that request that was sent last, and then reassign
+ *   @srv_conn->msg_sent in case it is evicted.
+ * - Finally, process the rest of the queue. Those are the requests that
+ *   were never forwarded yet.
  */
 static inline void
 tfw_http_conn_evict_timeout(TfwSrvConn *srv_conn, struct list_head *equeue)
@@ -1100,7 +1097,7 @@ tfw_http_conn_repair(TfwConn *conn)
 
 zap_error:
 	if (!list_empty(&equeue))
-		tfw_http_req_zap_error(&equeue, s_source_proxy);
+		tfw_http_req_zap_error(&equeue);
 }
 
 /*
@@ -1759,7 +1756,7 @@ static void
 tfw_http_req_cache_service(TfwHttpReq *req, TfwHttpResp *resp)
 {
 	if (tfw_http_adjust_resp(resp, req)) {
-		tfw_http_send_500(req, s_source_proxy, s_reason_resp_common);
+		tfw_http_send_500(req, "response dropped: processing error");
 		tfw_http_conn_msg_free((TfwHttpMsg *)resp);
 		TFW_INC_STAT_BH(clnt.msgs_otherr);
 		return;
@@ -1833,15 +1830,15 @@ tfw_http_req_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)
 	/* Forward request to the server. */
 	tfw_http_req_fwd(srv_conn, req, &equeue);
 	if (!list_empty(&equeue))
-		tfw_http_req_zap_error(&equeue, s_source_proxy);
+		tfw_http_req_zap_error(&equeue);
 	goto conn_put;
 
 send_502:
-	tfw_http_send_502(req, s_source_proxy, s_reason_req_common);
+	tfw_http_send_502(req, "request dropped: processing error");
 	TFW_INC_STAT_BH(clnt.msgs_otherr);
 	return;
 send_500:
-	tfw_http_send_500(req, s_source_proxy, s_reason_req_common);
+	tfw_http_send_500(req, "request dropped: processing error");
 	TFW_INC_STAT_BH(clnt.msgs_otherr);
 conn_put:
 	tfw_srv_conn_put(srv_conn);
@@ -2105,8 +2102,8 @@ tfw_http_req_process(TfwConn *conn, struct sk_buff *skb, unsigned int off)
 		 * Otherwise we lose the reference to it and get a leak.
 		 */
 		if (tfw_cache_process(req, NULL, tfw_http_req_cache_cb)) {
-			tfw_http_send_500(req, s_source_cache,
-					  s_reason_req_common);
+			tfw_http_send_500(req, "request dropped:"
+					       " processing error");
 			TFW_INC_STAT_BH(clnt.msgs_otherr);
 			return TFW_PASS;
 		}
@@ -2159,7 +2156,7 @@ tfw_http_resp_cache_cb(TfwHttpReq *req, TfwHttpResp *resp)
 	 * inter-node data transfers. (see tfw_http_req_cache_cb())
 	 */
 	if (tfw_http_adjust_resp(resp, req)) {
-		tfw_http_send_500(req, s_source_proxy, s_reason_resp_common);
+		tfw_http_send_500(req, "response dropped: processing error");
 		tfw_http_conn_msg_free((TfwHttpMsg *)resp);
 		TFW_INC_STAT_BH(serv.msgs_otherr);
 		return;
@@ -2234,7 +2231,7 @@ tfw_http_popreq(TfwHttpMsg *hmresp)
 	spin_unlock(&srv_conn->fwd_qlock);
 
 	if (!list_empty(&equeue))
-		tfw_http_req_zap_error(&equeue, s_source_proxy);
+		tfw_http_req_zap_error(&equeue);
 
 	return req;
 }
@@ -2276,7 +2273,7 @@ tfw_http_resp_gfsm(TfwHttpMsg *hmresp, struct sk_buff *skb, unsigned int off)
 		return TFW_BLOCK;
 	}
 
-	tfw_http_send_502(req, s_source_proxy, s_reason_resp_filter);
+	tfw_http_send_502(req, "response dropped: filtered out");
 	tfw_http_conn_msg_free(hmresp);
 	TFW_INC_STAT_BH(serv.msgs_filtout);
 	return r;
@@ -2329,7 +2326,7 @@ tfw_http_resp_cache(TfwHttpMsg *hmresp)
 	if (tfw_cache_process(req, (TfwHttpResp *)hmresp,
 			      tfw_http_resp_cache_cb))
 	{
-		tfw_http_send_500(req, s_source_cache, s_reason_resp_common);
+		tfw_http_send_500(req, "response dropped: processing error");
 		tfw_http_conn_msg_free(hmresp);
 		TFW_INC_STAT_BH(serv.msgs_otherr);
 		/* Proceed with processing of the next response. */
diff --git a/tempesta_fw/http.h b/tempesta_fw/http.h
index cbc53976d..15d346644 100644
--- a/tempesta_fw/http.h
+++ b/tempesta_fw/http.h
@@ -451,10 +451,10 @@ void tfw_http_resp_fwd(TfwHttpReq *req, TfwHttpResp *resp);
  */
 int tfw_http_send_200(TfwHttpReq *req);
 int tfw_http_prep_302(TfwHttpMsg *resp, TfwHttpReq *req, TfwStr *cookie);
-int tfw_http_send_403(TfwHttpReq *req, const char *source, const char *reason);
-int tfw_http_send_404(TfwHttpReq *req, const char *source, const char *reason);
-int tfw_http_send_502(TfwHttpReq *req, const char *source, const char *reason);
-int tfw_http_send_504(TfwHttpReq *req, const char *source, const char *reason);
+int tfw_http_send_403(TfwHttpReq *req, const char *reason);
+int tfw_http_send_404(TfwHttpReq *req, const char *reason);
+int tfw_http_send_502(TfwHttpReq *req, const char *reason);
+int tfw_http_send_504(TfwHttpReq *req, const char *reason);
 
 /*
  * Functions to create SKBs with data stream.
diff --git a/tempesta_fw/t/unit/test_http_sticky.c b/tempesta_fw/t/unit/test_http_sticky.c
index b2c89c42e..3c5133048 100644
--- a/tempesta_fw/t/unit/test_http_sticky.c
+++ b/tempesta_fw/t/unit/test_http_sticky.c
@@ -275,8 +275,7 @@ TEST(http_sticky, sending_502)
 	StickyVal sv = { .ts = 1 };
 
 	EXPECT_EQ(__sticky_calc(mock.req, &sv), 0);
-	EXPECT_EQ(tfw_http_send_502(mock.req, __func__,
-				    "sticky calculation"), 0);
+	EXPECT_EQ(tfw_http_send_502(mock.req, "sticky calculation"), 0);
 
 	/* HTTP 502 response have no Set-Cookie header */
 	EXPECT_TRUE(mock.tfw_connection_send_was_called);