diff --git a/tools/lkl/Makefile b/tools/lkl/Makefile
index 0ed1c5ce892466..bc71ff06dbe14d 100644
--- a/tools/lkl/Makefile
+++ b/tools/lkl/Makefile
@@ -48,6 +48,7 @@ ifneq (,$(filter $(OUTPUT_FORMAT),elf64-x86-64 elf32-i386 elf64-x86-64-freebsd e
     LDLIBS   += -lrt -lpthread
   endif
   export CONFIG_AUTO_LKL_POSIX_HOST=y
+  CFLAGS += -DCONFIG_AUTO_LKL_POSIX_HOST
 
   # Intel DPDK configuration
   ifeq ($(dpdk),yes)
@@ -71,6 +72,7 @@ else ifneq (,$(filter $(OUTPUT_FORMAT),pe-i386))
   EXESUF   := .exe
   SOSUF    := .dll
   export CONFIG_AUTO_LKL_NT_HOST=y
+  CFLAGS += -DCONFIG_AUTO_LKL_NT_HOST
 else
   $(error Unrecognized platform: $(OUTPUT_FORMAT))
 endif
diff --git a/tools/lkl/include/lkl_host.h b/tools/lkl/include/lkl_host.h
index 03ebb04c50fb6f..07208494af3279 100644
--- a/tools/lkl/include/lkl_host.h
+++ b/tools/lkl/include/lkl_host.h
@@ -19,10 +19,14 @@ int lkl_printf(const char *fmt, ...);
 
 extern char lkl_virtio_devs[256];
 
-struct lkl_dev_buf {
-	void *addr;
-	size_t len;
+#ifdef CONFIG_AUTO_LKL_POSIX_HOST
+#include <sys/uio.h>
+#else
+struct iovec {
+	void *iov_base;
+	size_t iov_len;
 };
+#endif
 
 extern struct lkl_dev_blk_ops lkl_dev_blk_ops;
 
@@ -35,7 +39,7 @@ struct lkl_blk_req {
 	unsigned int type;
 	unsigned int prio;
 	unsigned long long sector;
-	struct lkl_dev_buf *buf;
+	struct iovec *buf;
 	int count;
 };
 
@@ -63,7 +67,7 @@ struct lkl_dev_net_ops {
 	 * @cnt - # of vectors in iov.
 	 * @returns number of bytes transmitted
 	 */
-	int (*tx)(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt);
+	int (*tx)(struct lkl_netdev *nd, struct iovec *iov, int cnt);
 
 	/*
 	 * Reads a packet from the net device.
@@ -78,7 +82,7 @@ struct lkl_dev_net_ops {
 	 * @cnt - # of vectors in iov.
 	 * @returns number of bytes read for success or < 0 if error
 	 */
-	int (*rx)(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt);
+	int (*rx)(struct lkl_netdev *nd, struct iovec *iov, int cnt);
 
 #define LKL_DEV_NET_POLL_RX		1
 #define LKL_DEV_NET_POLL_TX		2
diff --git a/tools/lkl/lib/nt-host.c b/tools/lkl/lib/nt-host.c
index aa1c90d79dd6ed..7e09a4bfe291df 100644
--- a/tools/lkl/lib/nt-host.c
+++ b/tools/lkl/lib/nt-host.c
@@ -262,16 +262,17 @@ static int blk_request(struct lkl_disk disk, struct lkl_blk_req *req)
 
 		for (i = 0; i < req->count; i++) {
 			DWORD res;
+			struct iovec *buf = &req->buf[i];
 
 			ov.Offset = offset & 0xffffffff;
 			ov.OffsetHigh = offset >> 32;
 
 			if (req->type == LKL_DEV_BLK_TYPE_READ)
-				ret = ReadFile(disk.handle, req->buf[i].addr,
-					       req->buf[i].len, &res, &ov);
+				ret = ReadFile(disk.handle, buf->iov_base,
+					       buf->iov_len, &res, &ov);
 			else
-				ret = WriteFile(disk.handle, req->buf[i].addr,
-						req->buf[i].len, &res, &ov);
+				ret = WriteFile(disk.handle, buf->iov_base,
+						buf->iov_len, &res, &ov);
 			if (!ret) {
 				lkl_printf("%s: I/O error: %d\n", __func__,
 					   GetLastError());
@@ -279,14 +280,14 @@ static int blk_request(struct lkl_disk disk, struct lkl_blk_req *req)
 				goto out;
 			}
 
-			if (res != req->buf[i].len) {
+			if (res != buf->iov_len) {
 				lkl_printf("%s: I/O error: short: %d %d\n",
-					   res, req->buf[i].len);
+					   res, buf->iov_len);
 				err = -1;
 				goto out;
 			}
 
-			offset += req->buf[i].len;
+			offset += buf->iov_len;
 		}
 		break;
 	}
diff --git a/tools/lkl/lib/posix-host.c b/tools/lkl/lib/posix-host.c
index 92b5c05e6d151b..ee4fb3a6c8c040 100644
--- a/tools/lkl/lib/posix-host.c
+++ b/tools/lkl/lib/posix-host.c
@@ -336,8 +336,8 @@ static int do_rw(ssize_t (*fn)(), struct lkl_disk disk, struct lkl_blk_req *req)
 
 	for (i = 0; i < req->count; i++) {
 
-		addr = req->buf[i].addr;
-		len = req->buf[i].len;
+		addr = req->buf[i].iov_base;
+		len = req->buf[i].iov_len;
 
 		do {
 			ret = fn(disk.fd, addr, len, off);
diff --git a/tools/lkl/lib/virtio.c b/tools/lkl/lib/virtio.c
index 289aef511837d6..977f86d25fb6ea 100644
--- a/tools/lkl/lib/virtio.c
+++ b/tools/lkl/lib/virtio.c
@@ -39,14 +39,31 @@
 
 #define BIT(x) (1ULL << x)
 
-#ifdef DEBUG
-#define bad_driver(msg) do {					\
-		lkl_printf("LKL virtio error: %s\n", msg);	\
-		lkl_host_ops.panic();				\
+#define virtio_panic(msg, ...) do {					\
+		lkl_printf("LKL virtio error" msg, ##__VA_ARGS__);	\
+		lkl_host_ops.panic();					\
 	} while (0)
-#else
-#define bad_driver(msg) do { } while (0)
-#endif /* DEBUG */
+
+struct virtio_queue {
+	uint32_t num_max;
+	uint32_t num;
+	uint32_t ready;
+	uint32_t max_merge_len;
+
+	struct lkl_vring_desc *desc;
+	struct lkl_vring_avail *avail;
+	struct lkl_vring_used *used;
+	uint16_t last_avail_idx;
+	uint16_t last_used_idx_signaled;
+};
+
+struct _virtio_req {
+	struct virtio_req req;
+	struct virtio_dev *dev;
+	struct virtio_queue *q;
+	uint16_t idx;
+};
+
 
 static inline uint16_t virtio_get_used_event(struct virtio_queue *q)
 {
@@ -66,55 +83,73 @@ static inline void virtio_deliver_irq(struct virtio_dev *dev)
 	lkl_trigger_irq(dev->irq);
 }
 
+static inline uint16_t virtio_get_used_idx(struct virtio_queue *q)
+{
+	return le16toh(q->used->idx);
+}
+
+static inline void virtio_add_used(struct virtio_queue *q, uint16_t used_idx,
+				   uint16_t avail_idx, uint16_t len)
+{
+	uint16_t desc_idx = q->avail->ring[avail_idx & (q->num - 1)];
+
+	used_idx = used_idx & (q->num - 1);
+	q->used->ring[used_idx].id = desc_idx;
+	q->used->ring[used_idx].len = htole16(len);
+}
+
+/*
+ * Make sure all memory writes before are visible to the driver before updating
+ * the idx.  We need it here even we already have one in virtio_deliver_irq()
+ * because there might already be an driver thread reading the idx and dequeuing
+ * used buffers.
+ */
+static inline void virtio_sync_used_idx(struct virtio_queue *q, uint16_t idx)
+{
+	__sync_synchronize();
+	q->used->idx = htole16(idx);
+}
+
+#define min_len(a, b) (a < b ? a : b)
+
 void virtio_req_complete(struct virtio_req *req, uint32_t len)
 {
-	struct virtio_queue *q = req->q;
-	struct virtio_dev *dev = req->dev;
-	uint16_t idx = le16toh(q->used->idx) & (q->num - 1);
-	uint16_t new;
 	int send_irq = 0;
-	int avail_used;
+	struct _virtio_req *_req = container_of(req, struct _virtio_req, req);
+	struct virtio_queue *q = _req->q;
+	uint16_t avail_idx = _req->idx;
+	uint16_t used_idx = virtio_get_used_idx(_req->q);
+	int i;
 
-	q->used->ring[idx].id = htole16(req->idx);
-	if (req->mergeable_rx_len == 0) {
-		new = le16toh(q->used->idx) + 1;
-		avail_used = 1;
-	} else {
-		/* we've potentially used up multiple (non-chained)
-		 * descriptors and have to create one "used" entry for
-		 * each descr we've consumed.
-		 */
-		int i = 0, last_idx = q->last_avail_idx, req_idx;
-
-		avail_used = req->buf_count;
-		new = le16toh(q->used->idx) + req->buf_count;
-		while (i < req->buf_count-1) {
-			q->used->ring[idx].len = htole16(req->buf[i].len);
-			len -= req->buf[i].len;
-			idx++; i++; last_idx++;
-			idx &= (q->num - 1);
-			req_idx = q->avail->ring[last_idx & (q->num - 1)];
-			q->used->ring[idx].id = htole16(req_idx);
-		}
-	}
-	q->used->ring[idx].len = htole16(len);
-	/* Make sure all memory writes before are visible to the driver before
-	 * updating the idx.
-	 * We need it here even we already have one in virtio_deliver_irq()
-	 * because there might already be an driver thread reading the idx and
-	 * dequeuing used buffers.
+	/*
+	 * We've potentially used up multiple (non-chained) descriptors and have
+	 * to create one "used" entry for each descriptor we've consumed.
 	 */
-	__sync_synchronize();
-	q->used->idx = htole16(new);
+	for (i = 0; i < req->buf_count; i++) {
+		uint16_t used_len;
+
+		if (!q->max_merge_len)
+			used_len = len;
+		else
+			used_len = min_len(len,  req->buf[i].iov_len);
+
+		virtio_add_used(q, used_idx++, avail_idx++, used_len);
+
+		len -= used_len;
+		if (!len)
+			break;
+	}
+	virtio_sync_used_idx(q, used_idx);
+	q->last_avail_idx = avail_idx;
 
-	/* Triggers the irq whenever there is no available buffer.
-	 * q->last_avail_idx is incremented after calling virtio_req_complete(),
-	 * so here we need to add avail_used to it.
+	/*
+	 * Triggers the irq whenever there is no available buffer.
 	 */
-	if (q->last_avail_idx + avail_used == le16toh(q->avail->idx))
+	if (q->last_avail_idx == le16toh(q->avail->idx))
 		send_irq = 1;
 
-	/* There are two rings: q->avail and q->used for each of the rx and tx
+	/*
+	 * There are two rings: q->avail and q->used for each of the rx and tx
 	 * queues that are used to pass buffers between kernel driver and the
 	 * virtio device implementation.
 	 *
@@ -145,30 +180,66 @@ void virtio_req_complete(struct virtio_req *req, uint32_t len)
 	 * case when those numbers wrap up.
 	 */
 	if (send_irq || lkl_vring_need_event(le16toh(virtio_get_used_event(q)),
-					     new, q->last_used_idx_signaled)) {
-		q->last_used_idx_signaled = new;
-		virtio_deliver_irq(dev);
+					     virtio_get_used_idx(q),
+					     q->last_used_idx_signaled)) {
+		q->last_used_idx_signaled = virtio_get_used_idx(q);
+		virtio_deliver_irq(_req->dev);
 	}
 }
 
-/* Grab the vring_desc from the queue at the appropriate index in the
+/*
+ * Grab the vring_desc from the queue at the appropriate index in the
  * queue's circular buffer, converting from little-endian to
- * the host's endianness. */
-static inline struct lkl_vring_desc *vring_desc_at_le_idx(struct virtio_queue *q,
-							__lkl__virtio16 le_idx)
+ * the host's endianness.
+ */
+static inline
+struct lkl_vring_desc *vring_desc_at_le_idx(struct virtio_queue *q,
+					    __lkl__virtio16 le_idx)
 {
 	return &q->desc[le16toh(le_idx) & (q->num -1)];
 }
 
+static inline
+struct lkl_vring_desc *vring_desc_at_avail_idx(struct virtio_queue *q,
+					       uint16_t idx)
+{
+	uint16_t desc_idx = q->avail->ring[idx & (q->num - 1)];
+
+	return vring_desc_at_le_idx(q, desc_idx);
+}
+
 /* Initialize buf to hold the same info as the vring_desc */
-static void init_dev_buf_from_vring_desc(struct lkl_dev_buf *buf,
+static void add_dev_buf_from_vring_desc(struct virtio_req *req,
 					struct lkl_vring_desc *vring_desc)
 {
-	buf->addr = (void *)(uintptr_t)le64toh(vring_desc->addr);
-	buf->len = le32toh(vring_desc->len);
+	struct iovec *buf = &req->buf[req->buf_count++];
+
+	buf->iov_base = (void *)(uintptr_t)le64toh(vring_desc->addr);
+	buf->iov_len = le32toh(vring_desc->len);
 
-	if (!(buf->addr && buf->len))
-		bad_driver("bad vring_desc\n");
+	if (!(buf->iov_base && buf->iov_len))
+		virtio_panic("bad vring_desc: %p %d\n",
+			     buf->iov_base, buf->iov_len);
+
+	req->total_len += buf->iov_len;
+}
+
+static struct lkl_vring_desc *get_next_desc(struct virtio_queue *q,
+					    struct lkl_vring_desc *desc,
+					    uint16_t *idx)
+{
+	uint16_t desc_idx;
+
+	if (q->max_merge_len) {
+		if (++(*idx) == le16toh(q->avail->idx))
+			return NULL;
+		desc_idx = q->avail->ring[*idx & (q->num - 1)];
+		return vring_desc_at_le_idx(q, desc_idx);
+	}
+
+	if (!(le16toh(desc->flags) & LKL_VRING_DESC_F_NEXT))
+		return NULL;
+	return vring_desc_at_le_idx(q, desc->next);
 }
 
 /*
@@ -182,58 +253,29 @@ static void init_dev_buf_from_vring_desc(struct lkl_dev_buf *buf,
  *    The mode is entered when the VIRTIO_NET_F_MRG_RXBUF device feature
  *    is enabled.
  */
-static int virtio_process_one(struct virtio_dev *dev, struct virtio_queue *q,
-			      int idx, bool is_mergeable_rx)
+static int virtio_process_one(struct virtio_dev *dev, int qidx)
 {
-	int q_buf_cnt = 0, ret = -1;
-	struct virtio_req req = {
+	struct virtio_queue *q = &dev->queue[qidx];
+	uint16_t idx = q->last_avail_idx;
+	struct _virtio_req _req = {
 		.dev = dev,
 		.q = q,
-		.idx = q->avail->ring[idx & (q->num - 1)],
-		.mergeable_rx_len = 0,
+		.idx = idx,
 	};
-	uint16_t prev_flags = LKL_VRING_DESC_F_NEXT;
-	struct lkl_vring_desc *curr_vring_desc = vring_desc_at_le_idx(q, req.idx);
+	struct virtio_req *req = &_req.req;
+	struct lkl_vring_desc *desc = vring_desc_at_avail_idx(q, _req.idx);
 
-	if (is_mergeable_rx) {
-		int len = 0, desc_idx;
+	do {
+		add_dev_buf_from_vring_desc(req, desc);
+		if (q->max_merge_len && req->total_len > q->max_merge_len)
+			break;
+		desc = get_next_desc(q, desc, &idx);
+	} while (desc && req->buf_count < VIRTIO_REQ_MAX_BUFS);
 
-		/* We may receive upto 64KB TSO packet so collect as many
-		 * descriptors as there are available upto 64KB in total len.
-		 */
-		while ((len < 65535) && (q_buf_cnt < VIRTIO_REQ_MAX_BUFS)) {
-			init_dev_buf_from_vring_desc(
-			    &req.buf[q_buf_cnt], curr_vring_desc);
-			len += req.buf[q_buf_cnt++].len;
-			if (++idx == le16toh(q->avail->idx))
-				break;
-			desc_idx = q->avail->ring[idx & (q->num - 1)];
-			curr_vring_desc = vring_desc_at_le_idx(q, desc_idx);
-		}
-		req.mergeable_rx_len = len;
-	} else {
-		while ((prev_flags & LKL_VRING_DESC_F_NEXT) &&
-			(q_buf_cnt < VIRTIO_REQ_MAX_BUFS)) {
-			prev_flags = le16toh(curr_vring_desc->flags);
-			init_dev_buf_from_vring_desc(
-			    &req.buf[q_buf_cnt++], curr_vring_desc);
-			curr_vring_desc =
-			    vring_desc_at_le_idx(q, curr_vring_desc->next);
-		}
-		/* Somehow we've built a request too long to fit our device */
-		if (q_buf_cnt == VIRTIO_REQ_MAX_BUFS &&
-			(prev_flags & LKL_VRING_DESC_F_NEXT))
-			bad_driver("enqueued too many request bufs");
-	}
-	req.buf_count = q_buf_cnt;
-	ret = dev->ops->enqueue(dev, &req);
-	if (ret < 0)
-		return ret;
-	if (is_mergeable_rx)
-		q->last_avail_idx += ret;
-	else
-		q->last_avail_idx++;
-	return 0;
+	if (desc && le16toh(desc->flags) & LKL_VRING_DESC_F_NEXT)
+		virtio_panic("too many chained bufs");
+
+	return dev->ops->enqueue(dev, qidx, req);
 }
 
 /* NB: we can enter this function two different ways in the case of
@@ -256,7 +298,6 @@ static int virtio_process_one(struct virtio_dev *dev, struct virtio_queue *q,
 void virtio_process_queue(struct virtio_dev *dev, uint32_t qidx)
 {
 	struct virtio_queue *q = &dev->queue[qidx];
-	bool is_mergeable_rx;
 
 	if (!q->ready)
 		return;
@@ -264,16 +305,13 @@ void virtio_process_queue(struct virtio_dev *dev, uint32_t qidx)
 	if (dev->ops->acquire_queue)
 		dev->ops->acquire_queue(dev, qidx);
 
-	is_mergeable_rx = ((dev->device_id == LKL_VIRTIO_ID_NET) &&
-	    is_rx_queue(dev, q) &&
-	    (dev->device_features & BIT(LKL_VIRTIO_NET_F_MRG_RXBUF)));
-
 	while (q->last_avail_idx != le16toh(q->avail->idx)) {
-		/* Make sure following loads happens after loading q->avail->idx.
+		/*
+		 * Make sure following loads happens after loading
+		 * q->avail->idx.
 		 */
 		__sync_synchronize();
-		if (virtio_process_one(dev, q, q->last_avail_idx,
-		    is_mergeable_rx) < 0)
+		if (virtio_process_one(dev, qidx) < 0)
 			break;
 		if (q->last_avail_idx == le16toh(q->avail->idx))
 			virtio_set_avail_event(q, q->avail->idx);
@@ -473,6 +511,11 @@ static const struct lkl_iomem_ops virtio_ops = {
 char lkl_virtio_devs[256];
 static char *devs = lkl_virtio_devs;
 
+void virtio_set_queue_max_merge_len(struct virtio_dev *dev, int q, int len)
+{
+	dev->queue[q].max_merge_len = len;
+}
+
 int virtio_dev_setup(struct virtio_dev *dev, int queues, int num_max)
 {
 	int qsize = queues * sizeof(*dev->queue);
diff --git a/tools/lkl/lib/virtio.h b/tools/lkl/lib/virtio.h
index e4e239feaaf10b..c4f7c2a329e94a 100644
--- a/tools/lkl/lib/virtio.h
+++ b/tools/lkl/lib/virtio.h
@@ -15,50 +15,42 @@
 
 #define VIRTIO_REQ_MAX_BUFS	(MAX_SKB_FRAGS + 2)
 
-/* We always have 2 queues on a netdev: one for tx, one for rx. */
-#define RX_QUEUE_IDX 0
-#define TX_QUEUE_IDX 1
-
 struct virtio_req {
-	struct virtio_dev *dev;
-	struct virtio_queue *q;
-	uint16_t idx;
 	uint16_t buf_count;
-	struct lkl_dev_buf buf[VIRTIO_REQ_MAX_BUFS];
-	uint32_t mergeable_rx_len;
+	struct iovec buf[VIRTIO_REQ_MAX_BUFS];
+	uint32_t total_len;
 };
 
+struct virtio_dev;
+
 struct virtio_dev_ops {
 	int (*check_features)(struct virtio_dev *dev);
+	/**
+	 * enqueue - queues the request for processing
+	 *
+	 * Note that the curret implementation assumes that the requests are
+	 * processed synchronous and, as such, @virtio_req_complete must be
+	 * called by from this function.
+	 *
+	 * @dev - virtio device
+	 * @q	- queue index
+	 *
+	 * @returns a negative value if the request has not been queued for
+	 * processing in which case the virtio device is resposible for
+	 * restaring the queue processing by calling @virtio_process_queue at a
+	 * later time; 0 or a positive value means that the request has been
+	 * queued for processing
+	 */
+	int (*enqueue)(struct virtio_dev *dev, int q, struct virtio_req *req);
 	/*
-	 * Return a negative value to stop the queue processing. In this case
-	 * the current request is not consumed from the queue and the host
-	 * device is resposible for restaring the queue processing by calling
-	 * virtio_process_queue at a later time.
-	 * A special case exists if a netdev is in mergeable RX buffer mode
-	 * where more than one "avail" slots may be consumed. In this case
-	 * it will return how many avail idx to advance.
+	 * Acquire/release a lock on the specified queue. Only implemented by
+	 * netdevs, all other devices have NULL acquire/release function
+	 * pointers.
 	 */
-	int (*enqueue)(struct virtio_dev *dev, struct virtio_req *req);
-	/* Acquire/release a lock on the specified queue. Only
-	 * implemented by netdevs, all other devices have NULL
-	 * acquire/release function pointers. */
 	void (*acquire_queue)(struct virtio_dev *dev, int queue_idx);
 	void (*release_queue)(struct virtio_dev *dev, int queue_idx);
 };
 
-struct virtio_queue {
-	uint32_t num_max;
-	uint32_t num;
-	uint32_t ready;
-
-	struct lkl_vring_desc *desc;
-	struct lkl_vring_avail *avail;
-	struct lkl_vring_used *used;
-	uint16_t last_avail_idx;
-	uint16_t last_used_idx_signaled;
-};
-
 struct virtio_dev {
 	uint32_t device_id;
 	uint32_t vendor_id;
@@ -82,23 +74,17 @@ struct virtio_dev {
 
 int virtio_dev_setup(struct virtio_dev *dev, int queues, int num_max);
 void virtio_dev_cleanup(struct virtio_dev *dev);
+/**
+ * virtio_req_complete - complete a virtio request
+ *
+ * @req - the request to be completed
+ * @len - the total size in bytes of the completed request
+ */
 void virtio_req_complete(struct virtio_req *req, uint32_t len);
 void virtio_process_queue(struct virtio_dev *dev, uint32_t qidx);
+void virtio_set_queue_max_merge_len(struct virtio_dev *dev, int q, int len);
 
 #define container_of(ptr, type, member) \
 	(type *)((char *)(ptr) - __builtin_offsetof(type, member))
 
-
-static inline int is_rx_queue(struct virtio_dev *dev,
-			      struct virtio_queue *queue)
-{
-	return &dev->queue[RX_QUEUE_IDX] == queue;
-}
-
-static inline int is_tx_queue(struct virtio_dev *dev,
-			      struct virtio_queue *queue)
-{
-	return &dev->queue[TX_QUEUE_IDX] == queue;
-}
-
 #endif /* _LKL_LIB_VIRTIO_H */
diff --git a/tools/lkl/lib/virtio_blk.c b/tools/lkl/lib/virtio_blk.c
index 023d0b435296cc..2c037cfd52490f 100644
--- a/tools/lkl/lib/virtio_blk.c
+++ b/tools/lkl/lib/virtio_blk.c
@@ -21,7 +21,7 @@ static int blk_check_features(struct virtio_dev *dev)
 	return -LKL_EINVAL;
 }
 
-static int blk_enqueue(struct virtio_dev *dev, struct virtio_req *req)
+static int blk_enqueue(struct virtio_dev *dev, int q, struct virtio_req *req)
 {
 	struct virtio_blk_dev *blk_dev;
 	struct lkl_virtio_blk_outhdr *h;
@@ -33,18 +33,18 @@ static int blk_enqueue(struct virtio_dev *dev, struct virtio_req *req)
 		goto out;
 	}
 
-	h = req->buf[0].addr;
-	t = req->buf[req->buf_count - 1].addr;
+	h = req->buf[0].iov_base;
+	t = req->buf[req->buf_count - 1].iov_base;
 	blk_dev = container_of(dev, struct virtio_blk_dev, dev);
 
 	t->status = LKL_DEV_BLK_STATUS_IOERR;
 
-	if (req->buf[0].len != sizeof(*h)) {
+	if (req->buf[0].iov_len != sizeof(*h)) {
 		lkl_printf("virtio_blk: bad header buf\n");
 		goto out;
 	}
 
-	if (req->buf[req->buf_count - 1].len != sizeof(*t)) {
+	if (req->buf[req->buf_count - 1].iov_len != sizeof(*t)) {
 		lkl_printf("virtio_blk: bad status buf\n");
 		goto out;
 	}
diff --git a/tools/lkl/lib/virtio_net.c b/tools/lkl/lib/virtio_net.c
index 7e670aab69359f..3fd9e94565cd13 100644
--- a/tools/lkl/lib/virtio_net.c
+++ b/tools/lkl/lib/virtio_net.c
@@ -8,6 +8,10 @@
 #define netdev_of(x) (container_of(x, struct virtio_net_dev, dev))
 #define BIT(x) (1ULL << x)
 
+/* We always have 2 queues on a netdev: one for tx, one for rx. */
+#define RX_QUEUE_IDX 0
+#define TX_QUEUE_IDX 1
+
 #define NUM_QUEUES (TX_QUEUE_IDX + 1)
 #define QUEUE_DEPTH 128
 
@@ -50,68 +54,68 @@ static void net_release_queue(struct virtio_dev *dev, int queue_idx)
 	lkl_host_ops.mutex_unlock(netdev_of(dev)->queue_locks[queue_idx]);
 }
 
-/* The buffers passed through "req" from the virtio_net driver always
- * starts with a vnet_hdr. We need to check the backend device if it
- * expects vnet_hdr and adjust buffer offset accordingly.
+/*
+ * The buffers passed through "req" from the virtio_net driver always starts
+ * with a vnet_hdr. We need to check the backend device if it expects vnet_hdr
+ * and adjust buffer offset accordingly.
  */
-static int net_enqueue(struct virtio_dev *dev, struct virtio_req *req)
+static int net_enqueue(struct virtio_dev *dev, int q, struct virtio_req *req)
 {
 	struct lkl_virtio_net_hdr_v1 *header;
 	struct virtio_net_dev *net_dev;
-	int ret, len, i;
-	struct lkl_dev_buf *iov;
+	struct iovec *iov;
+	int ret;
 
-	header = req->buf[0].addr;
+	header = req->buf[0].iov_base;
 	net_dev = netdev_of(dev);
+	/*
+	 * The backend device does not expect a vnet_hdr so adjust buf
+	 * accordingly. (We make adjustment to req->buf so it can be used
+	 * directly for the tx/rx call but remember to undo the change after the
+	 * call.  Note that it's ok to pass iov with entry's len==0.  The caller
+	 * will skip to the next entry correctly.
+	 */
 	if (!net_dev->nd->has_vnet_hdr) {
-		/* The backend device does not expect a vnet_hdr so adjust
-		 * buf accordingly. (We make adjustment to req->buf so it
-		 * can be used directly for the tx/rx call but remember to
-		 * undo the change after the call.
-		 * Note that it's ok to pass iov with entry's len==0.
-		 * The caller will skip to the next entry correctly.
-		 */
-		req->buf[0].addr += sizeof(*header);
-		req->buf[0].len -= sizeof(*header);
+		req->buf[0].iov_base += sizeof(*header);
+		req->buf[0].iov_len -= sizeof(*header);
 	}
 	iov = req->buf;
 
 	/* Pick which virtqueue to send the buffer(s) to */
-	if (is_tx_queue(dev, req->q)) {
+	if (q == TX_QUEUE_IDX) {
 		ret = net_dev->nd->ops->tx(net_dev->nd, iov, req->buf_count);
 		if (ret < 0)
 			return -1;
-		i = 1;
-	} else if (is_rx_queue(dev, req->q)) {
+	} else if (q == RX_QUEUE_IDX) {
+		int i, len;
+
 		ret = net_dev->nd->ops->rx(net_dev->nd, iov, req->buf_count);
 		if (ret < 0)
 			return -1;
 		if (net_dev->nd->has_vnet_hdr) {
-
-			/* if the number of bytes returned exactly matches
-			 * the total space in the iov then there is a good
-			 * chance we did not supply a large enough buffer for
-			 * the whole pkt, i.e., pkt has been truncated.
-			 * This is only likely to happen under mergeable RX
-			 * buffer mode.
+			/*
+			 * If the number of bytes returned exactly matches the
+			 * total space in the iov then there is a good chance we
+			 * did not supply a large enough buffer for the whole
+			 * pkt, i.e., pkt has been truncated.  This is only
+			 * likely to happen under mergeable RX buffer mode.
 			 */
-			if (req->mergeable_rx_len == (unsigned int)ret)
+			if (req->total_len == (unsigned int)ret)
 				lkl_printf("PKT is likely truncated! len=%d\n",
 				    ret);
 		} else {
 			header->flags = 0;
 			header->gso_type = LKL_VIRTIO_NET_HDR_GSO_NONE;
 		}
-		/* Have to compute how many descriptors we've consumed (really
+		/*
+		 * Have to compute how many descriptors we've consumed (really
 		 * only matters to the the mergeable RX mode) and return it
 		 * through "num_buffers".
 		 */
 		for (i = 0, len = ret; len > 0; i++)
-			len -= req->buf[i].len;
-		req->buf_count = header->num_buffers = i;
-		/* Need to set "buf_count" to how many we really used in
-		 * order for virtio_req_complete() to work.
-		 */
+			len -= req->buf[i].iov_len;
+		header->num_buffers = i;
+
 		if (dev->device_features & BIT(LKL_VIRTIO_NET_F_GUEST_CSUM))
 			header->flags = LKL_VIRTIO_NET_HDR_F_DATA_VALID;
 	} else {
@@ -120,12 +124,12 @@ static int net_enqueue(struct virtio_dev *dev, struct virtio_req *req)
 	}
 	if (!net_dev->nd->has_vnet_hdr) {
 		/* Undo the adjustment */
-		req->buf[0].addr -= sizeof(*header);
-		req->buf[0].len += sizeof(*header);
+		req->buf[0].iov_base -= sizeof(*header);
+		req->buf[0].iov_len += sizeof(*header);
 		ret += sizeof(struct lkl_virtio_net_hdr_v1);
 	}
 	virtio_req_complete(req, ret);
-	return i;
+	return 0;
 }
 
 static struct virtio_dev_ops net_ops = {
@@ -232,15 +236,24 @@ int lkl_netdev_add(struct lkl_netdev *nd, struct lkl_netdev_args* args)
 	if (!dev->queue_locks)
 		goto out_free;
 
-	/* MUST match the number of queue locks we initialized. We
-	 * could init the queues in virtio_dev_setup to help enforce
-	 * this, but netdevs are the only flavor that need these
-	 * locks, so it's better to do it here. */
+	/*
+	 * MUST match the number of queue locks we initialized. We could init
+	 * the queues in virtio_dev_setup to help enforce this, but netdevs are
+	 * the only flavor that need these locks, so it's better to do it
+	 * here.
+	 */
 	ret = virtio_dev_setup(&dev->dev, NUM_QUEUES, QUEUE_DEPTH);
 
 	if (ret)
 		goto out_free;
 
+	/*
+	 * We may receive upto 64KB TSO packet so collect as many descriptors as
+	 * there are available up to 64KB in total len.
+	 */
+	if (dev->dev.device_features & BIT(LKL_VIRTIO_NET_F_MRG_RXBUF))
+		virtio_set_queue_max_merge_len(&dev->dev, RX_QUEUE_IDX, 65536);
+
 	dev->poll_tid = lkl_host_ops.thread_create(poll_thread, dev);
 	if (dev->poll_tid == 0)
 		goto out_cleanup_dev;
diff --git a/tools/lkl/lib/virtio_net_dpdk.c b/tools/lkl/lib/virtio_net_dpdk.c
index ab59d51748d8fb..05bb7050e35267 100644
--- a/tools/lkl/lib/virtio_net_dpdk.c
+++ b/tools/lkl/lib/virtio_net_dpdk.c
@@ -61,13 +61,13 @@ struct lkl_netdev_dpdk {
 	int close;
 };
 
-static int net_tx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt)
+static int net_tx(struct lkl_netdev *nd, struct iovec *iov, int cnt)
 {
 	void *pkt;
 	struct rte_mbuf *rm;
 	struct lkl_netdev_dpdk *nd_dpdk;
-	void *data = iov[0].addr;
-	int len = (int)iov[0].len;
+	void *data = iov[0].iov_base;
+	int len = (int)iov[0].iov_len;
 
 	nd_dpdk = (struct lkl_netdev_dpdk *) nd;
 
@@ -95,12 +95,12 @@ static int net_tx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt)
  * refactor allows us to read in parallel, the buffer (nd_dpdk->rms) shall
  * be guarded.
  */
-static int net_rx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt)
+static int net_rx(struct lkl_netdev *nd, struct iovec *iov, int cnt)
 {
 	struct lkl_netdev_dpdk *nd_dpdk;
 	int i, nb_rx, read = 0;
-	void *data = iov[0].addr;
-	int len = (int)iov[0].len;
+	void *data = iov[0].iov_base;
+	int len = (int)iov[0].iov_len;
 
 	nd_dpdk = (struct lkl_netdev_dpdk *) nd;
 
diff --git a/tools/lkl/lib/virtio_net_fd.c b/tools/lkl/lib/virtio_net_fd.c
index 0fcf6ae7742335..6fa09cabde4670 100644
--- a/tools/lkl/lib/virtio_net_fd.c
+++ b/tools/lkl/lib/virtio_net_fd.c
@@ -41,20 +41,14 @@ struct lkl_netdev_fd {
 	int pipe[2];
 };
 
-/* The following tx() and rx() code assume struct lkl_dev_buf matches
- * sruct iovec so we can safely cast iov to (struct iovec *). (If
- * BUILD_BUG_ON() were supported in LKL, I would have added
- *
- * "BUILD_BUG_ON(sizeof(struct lkl_dev_buf) == sizeof(struct iovec));"
- */
-static int fd_net_tx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt)
+static int fd_net_tx(struct lkl_netdev *nd, struct iovec *iov, int cnt)
 {
 	int ret;
 	struct lkl_netdev_fd *nd_fd =
 		container_of(nd, struct lkl_netdev_fd, dev);
 
 	do {
-		ret = writev(nd_fd->fd, (struct iovec *)iov, cnt);
+		ret = writev(nd_fd->fd, iov, cnt);
 	} while (ret == -1 && errno == EINTR);
 
 	if (ret < 0) {
@@ -71,7 +65,7 @@ static int fd_net_tx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt)
 	return ret;
 }
 
-static int fd_net_rx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt)
+static int fd_net_rx(struct lkl_netdev *nd, struct iovec *iov, int cnt)
 {
 	int ret;
 	struct lkl_netdev_fd *nd_fd =
diff --git a/tools/lkl/lib/virtio_net_vde.c b/tools/lkl/lib/virtio_net_vde.c
index 95fadaa01950cb..5b793da952e681 100644
--- a/tools/lkl/lib/virtio_net_vde.c
+++ b/tools/lkl/lib/virtio_net_vde.c
@@ -18,8 +18,8 @@ struct lkl_netdev_vde {
 };
 
 struct lkl_netdev *nuse_vif_vde_create(char *switch_path);
-static int net_vde_tx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt);
-static int net_vde_rx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt);
+static int net_vde_tx(struct lkl_netdev *nd, struct iovec *iov, int cnt);
+static int net_vde_rx(struct lkl_netdev *nd, struct iovec *iov, int cnt);
 static int net_vde_poll_with_timeout(struct lkl_netdev *nd, int timeout);
 static int net_vde_poll(struct lkl_netdev *nd);
 static void net_vde_poll_hup(struct lkl_netdev *nd);
@@ -33,13 +33,13 @@ struct lkl_dev_net_ops vde_net_ops = {
 	.free = net_vde_free,
 };
 
-int net_vde_tx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt)
+int net_vde_tx(struct lkl_netdev *nd, struct iovec *iov, int cnt)
 {
 	int ret;
 	struct lkl_netdev_vde *nd_vde =
 		container_of(nd, struct lkl_netdev_vde, dev);
-	void *data = iov[0].addr;
-	int len = (int)iov[0].len;
+	void *data = iov[0].iov_base;
+	int len = (int)iov[0].iov_len;
 
 	ret = vde_send(nd_vde->conn, data, len, 0);
 	if (ret <= 0 && errno == EAGAIN)
@@ -47,13 +47,13 @@ int net_vde_tx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt)
 	return ret;
 }
 
-int net_vde_rx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt)
+int net_vde_rx(struct lkl_netdev *nd, struct iovec *iov, int cnt)
 {
 	int ret;
 	struct lkl_netdev_vde *nd_vde =
 		container_of(nd, struct lkl_netdev_vde, dev);
-	void *data = iov[0].addr;
-	int len = (int)iov[0].len;
+	void *data = iov[0].iov_base;
+	int len = (int)iov[0].iov_len;
 
 	/*
 	 * Due to a bug in libvdeplug we have to first poll to make sure