diff --git a/tools/lkl/Makefile b/tools/lkl/Makefile index 0ed1c5ce892466..bc71ff06dbe14d 100644 --- a/tools/lkl/Makefile +++ b/tools/lkl/Makefile @@ -48,6 +48,7 @@ ifneq (,$(filter $(OUTPUT_FORMAT),elf64-x86-64 elf32-i386 elf64-x86-64-freebsd e LDLIBS += -lrt -lpthread endif export CONFIG_AUTO_LKL_POSIX_HOST=y + CFLAGS += -DCONFIG_AUTO_LKL_POSIX_HOST # Intel DPDK configuration ifeq ($(dpdk),yes) @@ -71,6 +72,7 @@ else ifneq (,$(filter $(OUTPUT_FORMAT),pe-i386)) EXESUF := .exe SOSUF := .dll export CONFIG_AUTO_LKL_NT_HOST=y + CFLAGS += -DCONFIG_AUTO_LKL_NT_HOST else $(error Unrecognized platform: $(OUTPUT_FORMAT)) endif diff --git a/tools/lkl/include/lkl_host.h b/tools/lkl/include/lkl_host.h index 03ebb04c50fb6f..07208494af3279 100644 --- a/tools/lkl/include/lkl_host.h +++ b/tools/lkl/include/lkl_host.h @@ -19,10 +19,14 @@ int lkl_printf(const char *fmt, ...); extern char lkl_virtio_devs[256]; -struct lkl_dev_buf { - void *addr; - size_t len; +#ifdef CONFIG_AUTO_LKL_POSIX_HOST +#include +#else +struct iovec { + void *iov_base; + size_t iov_len; }; +#endif extern struct lkl_dev_blk_ops lkl_dev_blk_ops; @@ -35,7 +39,7 @@ struct lkl_blk_req { unsigned int type; unsigned int prio; unsigned long long sector; - struct lkl_dev_buf *buf; + struct iovec *buf; int count; }; @@ -63,7 +67,7 @@ struct lkl_dev_net_ops { * @cnt - # of vectors in iov. * @returns number of bytes transmitted */ - int (*tx)(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt); + int (*tx)(struct lkl_netdev *nd, struct iovec *iov, int cnt); /* * Reads a packet from the net device. @@ -78,7 +82,7 @@ struct lkl_dev_net_ops { * @cnt - # of vectors in iov. * @returns number of bytes read for success or < 0 if error */ - int (*rx)(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt); + int (*rx)(struct lkl_netdev *nd, struct iovec *iov, int cnt); #define LKL_DEV_NET_POLL_RX 1 #define LKL_DEV_NET_POLL_TX 2 diff --git a/tools/lkl/lib/nt-host.c b/tools/lkl/lib/nt-host.c index aa1c90d79dd6ed..7e09a4bfe291df 100644 --- a/tools/lkl/lib/nt-host.c +++ b/tools/lkl/lib/nt-host.c @@ -262,16 +262,17 @@ static int blk_request(struct lkl_disk disk, struct lkl_blk_req *req) for (i = 0; i < req->count; i++) { DWORD res; + struct iovec *buf = &req->buf[i]; ov.Offset = offset & 0xffffffff; ov.OffsetHigh = offset >> 32; if (req->type == LKL_DEV_BLK_TYPE_READ) - ret = ReadFile(disk.handle, req->buf[i].addr, - req->buf[i].len, &res, &ov); + ret = ReadFile(disk.handle, buf->iov_base, + buf->iov_len, &res, &ov); else - ret = WriteFile(disk.handle, req->buf[i].addr, - req->buf[i].len, &res, &ov); + ret = WriteFile(disk.handle, buf->iov_base, + buf->iov_len, &res, &ov); if (!ret) { lkl_printf("%s: I/O error: %d\n", __func__, GetLastError()); @@ -279,14 +280,14 @@ static int blk_request(struct lkl_disk disk, struct lkl_blk_req *req) goto out; } - if (res != req->buf[i].len) { + if (res != buf->iov_len) { lkl_printf("%s: I/O error: short: %d %d\n", - res, req->buf[i].len); + res, buf->iov_len); err = -1; goto out; } - offset += req->buf[i].len; + offset += buf->iov_len; } break; } diff --git a/tools/lkl/lib/posix-host.c b/tools/lkl/lib/posix-host.c index 92b5c05e6d151b..ee4fb3a6c8c040 100644 --- a/tools/lkl/lib/posix-host.c +++ b/tools/lkl/lib/posix-host.c @@ -336,8 +336,8 @@ static int do_rw(ssize_t (*fn)(), struct lkl_disk disk, struct lkl_blk_req *req) for (i = 0; i < req->count; i++) { - addr = req->buf[i].addr; - len = req->buf[i].len; + addr = req->buf[i].iov_base; + len = req->buf[i].iov_len; do { ret = fn(disk.fd, addr, len, off); diff --git a/tools/lkl/lib/virtio.c b/tools/lkl/lib/virtio.c index 289aef511837d6..977f86d25fb6ea 100644 --- a/tools/lkl/lib/virtio.c +++ b/tools/lkl/lib/virtio.c @@ -39,14 +39,31 @@ #define BIT(x) (1ULL << x) -#ifdef DEBUG -#define bad_driver(msg) do { \ - lkl_printf("LKL virtio error: %s\n", msg); \ - lkl_host_ops.panic(); \ +#define virtio_panic(msg, ...) do { \ + lkl_printf("LKL virtio error" msg, ##__VA_ARGS__); \ + lkl_host_ops.panic(); \ } while (0) -#else -#define bad_driver(msg) do { } while (0) -#endif /* DEBUG */ + +struct virtio_queue { + uint32_t num_max; + uint32_t num; + uint32_t ready; + uint32_t max_merge_len; + + struct lkl_vring_desc *desc; + struct lkl_vring_avail *avail; + struct lkl_vring_used *used; + uint16_t last_avail_idx; + uint16_t last_used_idx_signaled; +}; + +struct _virtio_req { + struct virtio_req req; + struct virtio_dev *dev; + struct virtio_queue *q; + uint16_t idx; +}; + static inline uint16_t virtio_get_used_event(struct virtio_queue *q) { @@ -66,55 +83,73 @@ static inline void virtio_deliver_irq(struct virtio_dev *dev) lkl_trigger_irq(dev->irq); } +static inline uint16_t virtio_get_used_idx(struct virtio_queue *q) +{ + return le16toh(q->used->idx); +} + +static inline void virtio_add_used(struct virtio_queue *q, uint16_t used_idx, + uint16_t avail_idx, uint16_t len) +{ + uint16_t desc_idx = q->avail->ring[avail_idx & (q->num - 1)]; + + used_idx = used_idx & (q->num - 1); + q->used->ring[used_idx].id = desc_idx; + q->used->ring[used_idx].len = htole16(len); +} + +/* + * Make sure all memory writes before are visible to the driver before updating + * the idx. We need it here even we already have one in virtio_deliver_irq() + * because there might already be an driver thread reading the idx and dequeuing + * used buffers. + */ +static inline void virtio_sync_used_idx(struct virtio_queue *q, uint16_t idx) +{ + __sync_synchronize(); + q->used->idx = htole16(idx); +} + +#define min_len(a, b) (a < b ? a : b) + void virtio_req_complete(struct virtio_req *req, uint32_t len) { - struct virtio_queue *q = req->q; - struct virtio_dev *dev = req->dev; - uint16_t idx = le16toh(q->used->idx) & (q->num - 1); - uint16_t new; int send_irq = 0; - int avail_used; + struct _virtio_req *_req = container_of(req, struct _virtio_req, req); + struct virtio_queue *q = _req->q; + uint16_t avail_idx = _req->idx; + uint16_t used_idx = virtio_get_used_idx(_req->q); + int i; - q->used->ring[idx].id = htole16(req->idx); - if (req->mergeable_rx_len == 0) { - new = le16toh(q->used->idx) + 1; - avail_used = 1; - } else { - /* we've potentially used up multiple (non-chained) - * descriptors and have to create one "used" entry for - * each descr we've consumed. - */ - int i = 0, last_idx = q->last_avail_idx, req_idx; - - avail_used = req->buf_count; - new = le16toh(q->used->idx) + req->buf_count; - while (i < req->buf_count-1) { - q->used->ring[idx].len = htole16(req->buf[i].len); - len -= req->buf[i].len; - idx++; i++; last_idx++; - idx &= (q->num - 1); - req_idx = q->avail->ring[last_idx & (q->num - 1)]; - q->used->ring[idx].id = htole16(req_idx); - } - } - q->used->ring[idx].len = htole16(len); - /* Make sure all memory writes before are visible to the driver before - * updating the idx. - * We need it here even we already have one in virtio_deliver_irq() - * because there might already be an driver thread reading the idx and - * dequeuing used buffers. + /* + * We've potentially used up multiple (non-chained) descriptors and have + * to create one "used" entry for each descriptor we've consumed. */ - __sync_synchronize(); - q->used->idx = htole16(new); + for (i = 0; i < req->buf_count; i++) { + uint16_t used_len; + + if (!q->max_merge_len) + used_len = len; + else + used_len = min_len(len, req->buf[i].iov_len); + + virtio_add_used(q, used_idx++, avail_idx++, used_len); + + len -= used_len; + if (!len) + break; + } + virtio_sync_used_idx(q, used_idx); + q->last_avail_idx = avail_idx; - /* Triggers the irq whenever there is no available buffer. - * q->last_avail_idx is incremented after calling virtio_req_complete(), - * so here we need to add avail_used to it. + /* + * Triggers the irq whenever there is no available buffer. */ - if (q->last_avail_idx + avail_used == le16toh(q->avail->idx)) + if (q->last_avail_idx == le16toh(q->avail->idx)) send_irq = 1; - /* There are two rings: q->avail and q->used for each of the rx and tx + /* + * There are two rings: q->avail and q->used for each of the rx and tx * queues that are used to pass buffers between kernel driver and the * virtio device implementation. * @@ -145,30 +180,66 @@ void virtio_req_complete(struct virtio_req *req, uint32_t len) * case when those numbers wrap up. */ if (send_irq || lkl_vring_need_event(le16toh(virtio_get_used_event(q)), - new, q->last_used_idx_signaled)) { - q->last_used_idx_signaled = new; - virtio_deliver_irq(dev); + virtio_get_used_idx(q), + q->last_used_idx_signaled)) { + q->last_used_idx_signaled = virtio_get_used_idx(q); + virtio_deliver_irq(_req->dev); } } -/* Grab the vring_desc from the queue at the appropriate index in the +/* + * Grab the vring_desc from the queue at the appropriate index in the * queue's circular buffer, converting from little-endian to - * the host's endianness. */ -static inline struct lkl_vring_desc *vring_desc_at_le_idx(struct virtio_queue *q, - __lkl__virtio16 le_idx) + * the host's endianness. + */ +static inline +struct lkl_vring_desc *vring_desc_at_le_idx(struct virtio_queue *q, + __lkl__virtio16 le_idx) { return &q->desc[le16toh(le_idx) & (q->num -1)]; } +static inline +struct lkl_vring_desc *vring_desc_at_avail_idx(struct virtio_queue *q, + uint16_t idx) +{ + uint16_t desc_idx = q->avail->ring[idx & (q->num - 1)]; + + return vring_desc_at_le_idx(q, desc_idx); +} + /* Initialize buf to hold the same info as the vring_desc */ -static void init_dev_buf_from_vring_desc(struct lkl_dev_buf *buf, +static void add_dev_buf_from_vring_desc(struct virtio_req *req, struct lkl_vring_desc *vring_desc) { - buf->addr = (void *)(uintptr_t)le64toh(vring_desc->addr); - buf->len = le32toh(vring_desc->len); + struct iovec *buf = &req->buf[req->buf_count++]; + + buf->iov_base = (void *)(uintptr_t)le64toh(vring_desc->addr); + buf->iov_len = le32toh(vring_desc->len); - if (!(buf->addr && buf->len)) - bad_driver("bad vring_desc\n"); + if (!(buf->iov_base && buf->iov_len)) + virtio_panic("bad vring_desc: %p %d\n", + buf->iov_base, buf->iov_len); + + req->total_len += buf->iov_len; +} + +static struct lkl_vring_desc *get_next_desc(struct virtio_queue *q, + struct lkl_vring_desc *desc, + uint16_t *idx) +{ + uint16_t desc_idx; + + if (q->max_merge_len) { + if (++(*idx) == le16toh(q->avail->idx)) + return NULL; + desc_idx = q->avail->ring[*idx & (q->num - 1)]; + return vring_desc_at_le_idx(q, desc_idx); + } + + if (!(le16toh(desc->flags) & LKL_VRING_DESC_F_NEXT)) + return NULL; + return vring_desc_at_le_idx(q, desc->next); } /* @@ -182,58 +253,29 @@ static void init_dev_buf_from_vring_desc(struct lkl_dev_buf *buf, * The mode is entered when the VIRTIO_NET_F_MRG_RXBUF device feature * is enabled. */ -static int virtio_process_one(struct virtio_dev *dev, struct virtio_queue *q, - int idx, bool is_mergeable_rx) +static int virtio_process_one(struct virtio_dev *dev, int qidx) { - int q_buf_cnt = 0, ret = -1; - struct virtio_req req = { + struct virtio_queue *q = &dev->queue[qidx]; + uint16_t idx = q->last_avail_idx; + struct _virtio_req _req = { .dev = dev, .q = q, - .idx = q->avail->ring[idx & (q->num - 1)], - .mergeable_rx_len = 0, + .idx = idx, }; - uint16_t prev_flags = LKL_VRING_DESC_F_NEXT; - struct lkl_vring_desc *curr_vring_desc = vring_desc_at_le_idx(q, req.idx); + struct virtio_req *req = &_req.req; + struct lkl_vring_desc *desc = vring_desc_at_avail_idx(q, _req.idx); - if (is_mergeable_rx) { - int len = 0, desc_idx; + do { + add_dev_buf_from_vring_desc(req, desc); + if (q->max_merge_len && req->total_len > q->max_merge_len) + break; + desc = get_next_desc(q, desc, &idx); + } while (desc && req->buf_count < VIRTIO_REQ_MAX_BUFS); - /* We may receive upto 64KB TSO packet so collect as many - * descriptors as there are available upto 64KB in total len. - */ - while ((len < 65535) && (q_buf_cnt < VIRTIO_REQ_MAX_BUFS)) { - init_dev_buf_from_vring_desc( - &req.buf[q_buf_cnt], curr_vring_desc); - len += req.buf[q_buf_cnt++].len; - if (++idx == le16toh(q->avail->idx)) - break; - desc_idx = q->avail->ring[idx & (q->num - 1)]; - curr_vring_desc = vring_desc_at_le_idx(q, desc_idx); - } - req.mergeable_rx_len = len; - } else { - while ((prev_flags & LKL_VRING_DESC_F_NEXT) && - (q_buf_cnt < VIRTIO_REQ_MAX_BUFS)) { - prev_flags = le16toh(curr_vring_desc->flags); - init_dev_buf_from_vring_desc( - &req.buf[q_buf_cnt++], curr_vring_desc); - curr_vring_desc = - vring_desc_at_le_idx(q, curr_vring_desc->next); - } - /* Somehow we've built a request too long to fit our device */ - if (q_buf_cnt == VIRTIO_REQ_MAX_BUFS && - (prev_flags & LKL_VRING_DESC_F_NEXT)) - bad_driver("enqueued too many request bufs"); - } - req.buf_count = q_buf_cnt; - ret = dev->ops->enqueue(dev, &req); - if (ret < 0) - return ret; - if (is_mergeable_rx) - q->last_avail_idx += ret; - else - q->last_avail_idx++; - return 0; + if (desc && le16toh(desc->flags) & LKL_VRING_DESC_F_NEXT) + virtio_panic("too many chained bufs"); + + return dev->ops->enqueue(dev, qidx, req); } /* NB: we can enter this function two different ways in the case of @@ -256,7 +298,6 @@ static int virtio_process_one(struct virtio_dev *dev, struct virtio_queue *q, void virtio_process_queue(struct virtio_dev *dev, uint32_t qidx) { struct virtio_queue *q = &dev->queue[qidx]; - bool is_mergeable_rx; if (!q->ready) return; @@ -264,16 +305,13 @@ void virtio_process_queue(struct virtio_dev *dev, uint32_t qidx) if (dev->ops->acquire_queue) dev->ops->acquire_queue(dev, qidx); - is_mergeable_rx = ((dev->device_id == LKL_VIRTIO_ID_NET) && - is_rx_queue(dev, q) && - (dev->device_features & BIT(LKL_VIRTIO_NET_F_MRG_RXBUF))); - while (q->last_avail_idx != le16toh(q->avail->idx)) { - /* Make sure following loads happens after loading q->avail->idx. + /* + * Make sure following loads happens after loading + * q->avail->idx. */ __sync_synchronize(); - if (virtio_process_one(dev, q, q->last_avail_idx, - is_mergeable_rx) < 0) + if (virtio_process_one(dev, qidx) < 0) break; if (q->last_avail_idx == le16toh(q->avail->idx)) virtio_set_avail_event(q, q->avail->idx); @@ -473,6 +511,11 @@ static const struct lkl_iomem_ops virtio_ops = { char lkl_virtio_devs[256]; static char *devs = lkl_virtio_devs; +void virtio_set_queue_max_merge_len(struct virtio_dev *dev, int q, int len) +{ + dev->queue[q].max_merge_len = len; +} + int virtio_dev_setup(struct virtio_dev *dev, int queues, int num_max) { int qsize = queues * sizeof(*dev->queue); diff --git a/tools/lkl/lib/virtio.h b/tools/lkl/lib/virtio.h index e4e239feaaf10b..c4f7c2a329e94a 100644 --- a/tools/lkl/lib/virtio.h +++ b/tools/lkl/lib/virtio.h @@ -15,50 +15,42 @@ #define VIRTIO_REQ_MAX_BUFS (MAX_SKB_FRAGS + 2) -/* We always have 2 queues on a netdev: one for tx, one for rx. */ -#define RX_QUEUE_IDX 0 -#define TX_QUEUE_IDX 1 - struct virtio_req { - struct virtio_dev *dev; - struct virtio_queue *q; - uint16_t idx; uint16_t buf_count; - struct lkl_dev_buf buf[VIRTIO_REQ_MAX_BUFS]; - uint32_t mergeable_rx_len; + struct iovec buf[VIRTIO_REQ_MAX_BUFS]; + uint32_t total_len; }; +struct virtio_dev; + struct virtio_dev_ops { int (*check_features)(struct virtio_dev *dev); + /** + * enqueue - queues the request for processing + * + * Note that the curret implementation assumes that the requests are + * processed synchronous and, as such, @virtio_req_complete must be + * called by from this function. + * + * @dev - virtio device + * @q - queue index + * + * @returns a negative value if the request has not been queued for + * processing in which case the virtio device is resposible for + * restaring the queue processing by calling @virtio_process_queue at a + * later time; 0 or a positive value means that the request has been + * queued for processing + */ + int (*enqueue)(struct virtio_dev *dev, int q, struct virtio_req *req); /* - * Return a negative value to stop the queue processing. In this case - * the current request is not consumed from the queue and the host - * device is resposible for restaring the queue processing by calling - * virtio_process_queue at a later time. - * A special case exists if a netdev is in mergeable RX buffer mode - * where more than one "avail" slots may be consumed. In this case - * it will return how many avail idx to advance. + * Acquire/release a lock on the specified queue. Only implemented by + * netdevs, all other devices have NULL acquire/release function + * pointers. */ - int (*enqueue)(struct virtio_dev *dev, struct virtio_req *req); - /* Acquire/release a lock on the specified queue. Only - * implemented by netdevs, all other devices have NULL - * acquire/release function pointers. */ void (*acquire_queue)(struct virtio_dev *dev, int queue_idx); void (*release_queue)(struct virtio_dev *dev, int queue_idx); }; -struct virtio_queue { - uint32_t num_max; - uint32_t num; - uint32_t ready; - - struct lkl_vring_desc *desc; - struct lkl_vring_avail *avail; - struct lkl_vring_used *used; - uint16_t last_avail_idx; - uint16_t last_used_idx_signaled; -}; - struct virtio_dev { uint32_t device_id; uint32_t vendor_id; @@ -82,23 +74,17 @@ struct virtio_dev { int virtio_dev_setup(struct virtio_dev *dev, int queues, int num_max); void virtio_dev_cleanup(struct virtio_dev *dev); +/** + * virtio_req_complete - complete a virtio request + * + * @req - the request to be completed + * @len - the total size in bytes of the completed request + */ void virtio_req_complete(struct virtio_req *req, uint32_t len); void virtio_process_queue(struct virtio_dev *dev, uint32_t qidx); +void virtio_set_queue_max_merge_len(struct virtio_dev *dev, int q, int len); #define container_of(ptr, type, member) \ (type *)((char *)(ptr) - __builtin_offsetof(type, member)) - -static inline int is_rx_queue(struct virtio_dev *dev, - struct virtio_queue *queue) -{ - return &dev->queue[RX_QUEUE_IDX] == queue; -} - -static inline int is_tx_queue(struct virtio_dev *dev, - struct virtio_queue *queue) -{ - return &dev->queue[TX_QUEUE_IDX] == queue; -} - #endif /* _LKL_LIB_VIRTIO_H */ diff --git a/tools/lkl/lib/virtio_blk.c b/tools/lkl/lib/virtio_blk.c index 023d0b435296cc..2c037cfd52490f 100644 --- a/tools/lkl/lib/virtio_blk.c +++ b/tools/lkl/lib/virtio_blk.c @@ -21,7 +21,7 @@ static int blk_check_features(struct virtio_dev *dev) return -LKL_EINVAL; } -static int blk_enqueue(struct virtio_dev *dev, struct virtio_req *req) +static int blk_enqueue(struct virtio_dev *dev, int q, struct virtio_req *req) { struct virtio_blk_dev *blk_dev; struct lkl_virtio_blk_outhdr *h; @@ -33,18 +33,18 @@ static int blk_enqueue(struct virtio_dev *dev, struct virtio_req *req) goto out; } - h = req->buf[0].addr; - t = req->buf[req->buf_count - 1].addr; + h = req->buf[0].iov_base; + t = req->buf[req->buf_count - 1].iov_base; blk_dev = container_of(dev, struct virtio_blk_dev, dev); t->status = LKL_DEV_BLK_STATUS_IOERR; - if (req->buf[0].len != sizeof(*h)) { + if (req->buf[0].iov_len != sizeof(*h)) { lkl_printf("virtio_blk: bad header buf\n"); goto out; } - if (req->buf[req->buf_count - 1].len != sizeof(*t)) { + if (req->buf[req->buf_count - 1].iov_len != sizeof(*t)) { lkl_printf("virtio_blk: bad status buf\n"); goto out; } diff --git a/tools/lkl/lib/virtio_net.c b/tools/lkl/lib/virtio_net.c index 7e670aab69359f..3fd9e94565cd13 100644 --- a/tools/lkl/lib/virtio_net.c +++ b/tools/lkl/lib/virtio_net.c @@ -8,6 +8,10 @@ #define netdev_of(x) (container_of(x, struct virtio_net_dev, dev)) #define BIT(x) (1ULL << x) +/* We always have 2 queues on a netdev: one for tx, one for rx. */ +#define RX_QUEUE_IDX 0 +#define TX_QUEUE_IDX 1 + #define NUM_QUEUES (TX_QUEUE_IDX + 1) #define QUEUE_DEPTH 128 @@ -50,68 +54,68 @@ static void net_release_queue(struct virtio_dev *dev, int queue_idx) lkl_host_ops.mutex_unlock(netdev_of(dev)->queue_locks[queue_idx]); } -/* The buffers passed through "req" from the virtio_net driver always - * starts with a vnet_hdr. We need to check the backend device if it - * expects vnet_hdr and adjust buffer offset accordingly. +/* + * The buffers passed through "req" from the virtio_net driver always starts + * with a vnet_hdr. We need to check the backend device if it expects vnet_hdr + * and adjust buffer offset accordingly. */ -static int net_enqueue(struct virtio_dev *dev, struct virtio_req *req) +static int net_enqueue(struct virtio_dev *dev, int q, struct virtio_req *req) { struct lkl_virtio_net_hdr_v1 *header; struct virtio_net_dev *net_dev; - int ret, len, i; - struct lkl_dev_buf *iov; + struct iovec *iov; + int ret; - header = req->buf[0].addr; + header = req->buf[0].iov_base; net_dev = netdev_of(dev); + /* + * The backend device does not expect a vnet_hdr so adjust buf + * accordingly. (We make adjustment to req->buf so it can be used + * directly for the tx/rx call but remember to undo the change after the + * call. Note that it's ok to pass iov with entry's len==0. The caller + * will skip to the next entry correctly. + */ if (!net_dev->nd->has_vnet_hdr) { - /* The backend device does not expect a vnet_hdr so adjust - * buf accordingly. (We make adjustment to req->buf so it - * can be used directly for the tx/rx call but remember to - * undo the change after the call. - * Note that it's ok to pass iov with entry's len==0. - * The caller will skip to the next entry correctly. - */ - req->buf[0].addr += sizeof(*header); - req->buf[0].len -= sizeof(*header); + req->buf[0].iov_base += sizeof(*header); + req->buf[0].iov_len -= sizeof(*header); } iov = req->buf; /* Pick which virtqueue to send the buffer(s) to */ - if (is_tx_queue(dev, req->q)) { + if (q == TX_QUEUE_IDX) { ret = net_dev->nd->ops->tx(net_dev->nd, iov, req->buf_count); if (ret < 0) return -1; - i = 1; - } else if (is_rx_queue(dev, req->q)) { + } else if (q == RX_QUEUE_IDX) { + int i, len; + ret = net_dev->nd->ops->rx(net_dev->nd, iov, req->buf_count); if (ret < 0) return -1; if (net_dev->nd->has_vnet_hdr) { - - /* if the number of bytes returned exactly matches - * the total space in the iov then there is a good - * chance we did not supply a large enough buffer for - * the whole pkt, i.e., pkt has been truncated. - * This is only likely to happen under mergeable RX - * buffer mode. + /* + * If the number of bytes returned exactly matches the + * total space in the iov then there is a good chance we + * did not supply a large enough buffer for the whole + * pkt, i.e., pkt has been truncated. This is only + * likely to happen under mergeable RX buffer mode. */ - if (req->mergeable_rx_len == (unsigned int)ret) + if (req->total_len == (unsigned int)ret) lkl_printf("PKT is likely truncated! len=%d\n", ret); } else { header->flags = 0; header->gso_type = LKL_VIRTIO_NET_HDR_GSO_NONE; } - /* Have to compute how many descriptors we've consumed (really + /* + * Have to compute how many descriptors we've consumed (really * only matters to the the mergeable RX mode) and return it * through "num_buffers". */ for (i = 0, len = ret; len > 0; i++) - len -= req->buf[i].len; - req->buf_count = header->num_buffers = i; - /* Need to set "buf_count" to how many we really used in - * order for virtio_req_complete() to work. - */ + len -= req->buf[i].iov_len; + header->num_buffers = i; + if (dev->device_features & BIT(LKL_VIRTIO_NET_F_GUEST_CSUM)) header->flags = LKL_VIRTIO_NET_HDR_F_DATA_VALID; } else { @@ -120,12 +124,12 @@ static int net_enqueue(struct virtio_dev *dev, struct virtio_req *req) } if (!net_dev->nd->has_vnet_hdr) { /* Undo the adjustment */ - req->buf[0].addr -= sizeof(*header); - req->buf[0].len += sizeof(*header); + req->buf[0].iov_base -= sizeof(*header); + req->buf[0].iov_len += sizeof(*header); ret += sizeof(struct lkl_virtio_net_hdr_v1); } virtio_req_complete(req, ret); - return i; + return 0; } static struct virtio_dev_ops net_ops = { @@ -232,15 +236,24 @@ int lkl_netdev_add(struct lkl_netdev *nd, struct lkl_netdev_args* args) if (!dev->queue_locks) goto out_free; - /* MUST match the number of queue locks we initialized. We - * could init the queues in virtio_dev_setup to help enforce - * this, but netdevs are the only flavor that need these - * locks, so it's better to do it here. */ + /* + * MUST match the number of queue locks we initialized. We could init + * the queues in virtio_dev_setup to help enforce this, but netdevs are + * the only flavor that need these locks, so it's better to do it + * here. + */ ret = virtio_dev_setup(&dev->dev, NUM_QUEUES, QUEUE_DEPTH); if (ret) goto out_free; + /* + * We may receive upto 64KB TSO packet so collect as many descriptors as + * there are available up to 64KB in total len. + */ + if (dev->dev.device_features & BIT(LKL_VIRTIO_NET_F_MRG_RXBUF)) + virtio_set_queue_max_merge_len(&dev->dev, RX_QUEUE_IDX, 65536); + dev->poll_tid = lkl_host_ops.thread_create(poll_thread, dev); if (dev->poll_tid == 0) goto out_cleanup_dev; diff --git a/tools/lkl/lib/virtio_net_dpdk.c b/tools/lkl/lib/virtio_net_dpdk.c index ab59d51748d8fb..05bb7050e35267 100644 --- a/tools/lkl/lib/virtio_net_dpdk.c +++ b/tools/lkl/lib/virtio_net_dpdk.c @@ -61,13 +61,13 @@ struct lkl_netdev_dpdk { int close; }; -static int net_tx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt) +static int net_tx(struct lkl_netdev *nd, struct iovec *iov, int cnt) { void *pkt; struct rte_mbuf *rm; struct lkl_netdev_dpdk *nd_dpdk; - void *data = iov[0].addr; - int len = (int)iov[0].len; + void *data = iov[0].iov_base; + int len = (int)iov[0].iov_len; nd_dpdk = (struct lkl_netdev_dpdk *) nd; @@ -95,12 +95,12 @@ static int net_tx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt) * refactor allows us to read in parallel, the buffer (nd_dpdk->rms) shall * be guarded. */ -static int net_rx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt) +static int net_rx(struct lkl_netdev *nd, struct iovec *iov, int cnt) { struct lkl_netdev_dpdk *nd_dpdk; int i, nb_rx, read = 0; - void *data = iov[0].addr; - int len = (int)iov[0].len; + void *data = iov[0].iov_base; + int len = (int)iov[0].iov_len; nd_dpdk = (struct lkl_netdev_dpdk *) nd; diff --git a/tools/lkl/lib/virtio_net_fd.c b/tools/lkl/lib/virtio_net_fd.c index 0fcf6ae7742335..6fa09cabde4670 100644 --- a/tools/lkl/lib/virtio_net_fd.c +++ b/tools/lkl/lib/virtio_net_fd.c @@ -41,20 +41,14 @@ struct lkl_netdev_fd { int pipe[2]; }; -/* The following tx() and rx() code assume struct lkl_dev_buf matches - * sruct iovec so we can safely cast iov to (struct iovec *). (If - * BUILD_BUG_ON() were supported in LKL, I would have added - * - * "BUILD_BUG_ON(sizeof(struct lkl_dev_buf) == sizeof(struct iovec));" - */ -static int fd_net_tx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt) +static int fd_net_tx(struct lkl_netdev *nd, struct iovec *iov, int cnt) { int ret; struct lkl_netdev_fd *nd_fd = container_of(nd, struct lkl_netdev_fd, dev); do { - ret = writev(nd_fd->fd, (struct iovec *)iov, cnt); + ret = writev(nd_fd->fd, iov, cnt); } while (ret == -1 && errno == EINTR); if (ret < 0) { @@ -71,7 +65,7 @@ static int fd_net_tx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt) return ret; } -static int fd_net_rx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt) +static int fd_net_rx(struct lkl_netdev *nd, struct iovec *iov, int cnt) { int ret; struct lkl_netdev_fd *nd_fd = diff --git a/tools/lkl/lib/virtio_net_vde.c b/tools/lkl/lib/virtio_net_vde.c index 95fadaa01950cb..5b793da952e681 100644 --- a/tools/lkl/lib/virtio_net_vde.c +++ b/tools/lkl/lib/virtio_net_vde.c @@ -18,8 +18,8 @@ struct lkl_netdev_vde { }; struct lkl_netdev *nuse_vif_vde_create(char *switch_path); -static int net_vde_tx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt); -static int net_vde_rx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt); +static int net_vde_tx(struct lkl_netdev *nd, struct iovec *iov, int cnt); +static int net_vde_rx(struct lkl_netdev *nd, struct iovec *iov, int cnt); static int net_vde_poll_with_timeout(struct lkl_netdev *nd, int timeout); static int net_vde_poll(struct lkl_netdev *nd); static void net_vde_poll_hup(struct lkl_netdev *nd); @@ -33,13 +33,13 @@ struct lkl_dev_net_ops vde_net_ops = { .free = net_vde_free, }; -int net_vde_tx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt) +int net_vde_tx(struct lkl_netdev *nd, struct iovec *iov, int cnt) { int ret; struct lkl_netdev_vde *nd_vde = container_of(nd, struct lkl_netdev_vde, dev); - void *data = iov[0].addr; - int len = (int)iov[0].len; + void *data = iov[0].iov_base; + int len = (int)iov[0].iov_len; ret = vde_send(nd_vde->conn, data, len, 0); if (ret <= 0 && errno == EAGAIN) @@ -47,13 +47,13 @@ int net_vde_tx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt) return ret; } -int net_vde_rx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt) +int net_vde_rx(struct lkl_netdev *nd, struct iovec *iov, int cnt) { int ret; struct lkl_netdev_vde *nd_vde = container_of(nd, struct lkl_netdev_vde, dev); - void *data = iov[0].addr; - int len = (int)iov[0].len; + void *data = iov[0].iov_base; + int len = (int)iov[0].iov_len; /* * Due to a bug in libvdeplug we have to first poll to make sure