Skip to content

Commit

Permalink
lkl: introduce raw socket based netdev backend
Browse files Browse the repository at this point in the history
This patch introduces new backend for virtio net, which uses AF_PACKET
socket (a.k.a. raw socket) to bypass host kernel and uses LKL network
stack instead.  it is convinient since we don't have to add additional
net_device (e.g., tap) for LKL, and possibly faster than tuntap with
PACKET_QDISC_BYPASS socket option (available after Linux 3.14).  One
drawback is it requires root privilege (sudo or suid bit on) to use
this.

example usage is like this:

sudo LKL_HIJACK_NET_IFTYPE=raw LKL_HIJACK_NET_IFPARAMS=docker0 \
LKL_HIJACK_NET_IP=172.17.0.39 LKL_HIJACK_NET_NETMASK_LEN=24 \
./bin/lkl-hijack.sh ping  172.17.0.2

some benchmarks with netperf:

- TCP_RR
 raw(QDISC_BYPASS): 9519.31 Trans/sec
 tap:               9486.03 Trans/sec
- TCP_STREAM
 raw(QDISC_BYPASS): 2184.79 Mbps
 tap:               2130.39 Mbps
- UDP_STREAM
 raw(QDISC_BYPASS): 3654.32 Mbps
 tap:               3108.10 Mbps

Signed-off-by: Hajime Tazaki <thehajime@gmail.com>
  • Loading branch information
thehajime committed Jul 5, 2016
1 parent f12c823 commit a28b355
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 4 deletions.
8 changes: 8 additions & 0 deletions tools/lkl/include/lkl.h
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,14 @@ struct lkl_netdev *lkl_netdev_dpdk_create(const char *ifname);
*/
struct lkl_netdev *lkl_netdev_vde_create(const char *switch_path);

/**
* lkl_netdev_raw_create - create raw socket net_device for the virtio net
* backend
*
* @ifname - interface name for the snoop device.
*/
struct lkl_netdev *lkl_netdev_raw_create(const char *ifname);

/*
* lkl_register_dbg_handler- register a signal handler that loads a debug lib.
*
Expand Down
1 change: 1 addition & 0 deletions tools/lkl/lib/Build
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,6 @@ lkl-y += dbg_handler.o
lkl-$(CONFIG_AUTO_LKL_POSIX_HOST) += virtio_net.o
lkl-$(CONFIG_AUTO_LKL_POSIX_HOST) += virtio_net_linux_fdnet.o
lkl-$(CONFIG_AUTO_LKL_POSIX_HOST) += virtio_net_tap.o
lkl-$(CONFIG_AUTO_LKL_POSIX_HOST) += virtio_net_raw.o
lkl-$(CONFIG_AUTO_LKL_VIRTIO_NET_DPDK) += virtio_net_dpdk.o
lkl-$(CONFIG_AUTO_LKL_VIRTIO_NET_VDE) += virtio_net_vde.o
2 changes: 1 addition & 1 deletion tools/lkl/lib/hijack/hijack.c
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ HOST_CALL(socket);
int socket(int domain, int type, int protocol)
{
CHECK_HOST_CALL(socket);
if (domain == AF_UNIX)
if (domain == AF_UNIX || domain == PF_PACKET)
return host_socket(domain, type, protocol);

return lkl_call(__lkl__NR_socket, 3, domain, type, protocol);
Expand Down
2 changes: 2 additions & 0 deletions tools/lkl/lib/hijack/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,8 @@ hijack_init(void)
nd = lkl_netdev_dpdk_create(ifparams);
else if (strcmp(iftype, "vde") == 0)
nd = lkl_netdev_vde_create(ifparams);
else if (strcmp(iftype, "raw") == 0)
nd = lkl_netdev_raw_create(ifparams);
}

if (nd) {
Expand Down
69 changes: 69 additions & 0 deletions tools/lkl/lib/virtio_net_raw.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*
* raw socket based virtual network interface feature for LKL
* Copyright (c) 2015,2016 Ryo Nakamura, Hajime Tazaki
*
* Author: Ryo Nakamura <upa@wide.ad.jp>
* Hajime Tazaki <thehajime@gmail.com>
*
* Current implementation is linux-specific.
*/

#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <unistd.h>
#include <net/if.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <arpa/inet.h>
#include <fcntl.h>

#include "virtio.h"
#include "virtio_net_linux_fdnet.h"

/* since Linux 3.14 (man 7 packet) */
#ifndef PACKET_QDISC_BYPASS
#define PACKET_QDISC_BYPASS 20
#endif

struct lkl_netdev *lkl_netdev_raw_create(const char *ifname)
{
struct lkl_netdev_linux_fdnet *nd;
int ret;
struct sockaddr_ll ll;
int fd, fd_flags, val;

fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
if (fd < 0) {
perror("socket");
return NULL;
}

memset(&ll, 0, sizeof(ll));
ll.sll_family = PF_PACKET;
ll.sll_ifindex = if_nametoindex(ifname);
ll.sll_protocol = htons(ETH_P_ALL);
ret = bind(fd, (struct sockaddr *)&ll, sizeof(ll));
if (ret) {
perror("bind");
close(fd);
return NULL;
}

val = 1;
ret = setsockopt(fd, SOL_PACKET, PACKET_QDISC_BYPASS, &val,
sizeof(val));
if (ret)
perror("PACKET_QDISC_BYPASS, ignoring");

fd_flags = fcntl(fd, F_GETFD, NULL);
fcntl(fd, F_SETFL, fd_flags | O_NONBLOCK);

nd = lkl_register_netdev_linux_fdnet(fd);
if (!nd) {
perror("failed to register to.");
return NULL;
}

return (struct lkl_netdev *)nd;
}
8 changes: 5 additions & 3 deletions tools/lkl/tests/net-test.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ static int test_icmp(char *str, int len)
return TEST_FAILURE;
}

ret = lkl_sys_recv(sock, buf, sizeof(buf), 0);
ret = lkl_sys_recv(sock, buf, sizeof(buf), MSG_DONTWAIT);
if (ret < 0) {
snprintf(str, len, "recv error (%s)", strerror(errno));
return TEST_FAILURE;
Expand All @@ -116,7 +116,7 @@ static int test_net_init(int argc, char **argv)
struct lkl_netdev *nd = NULL;

if (argc < 6) {
printf("usage %s <iftype: tap|dpdk> <ifname> <v4addr> <v4mask> <dstaddr> [gateway]\n", argv[0]);
printf("usage %s <iftype: tap|dpdk|raw> <ifname> <v4addr> <v4mask> <dstaddr> [gateway]\n", argv[0]);
exit(0);
}

Expand All @@ -127,14 +127,16 @@ static int test_net_init(int argc, char **argv)
dst = argv[5];

if (argc == 7)
gateway = argv[5];
gateway = argv[6];

if (iftype && ifname && (strncmp(iftype, "tap", 3) == 0))
nd = lkl_netdev_tap_create(ifname);
#ifdef CONFIG_AUTO_LKL_VIRTIO_NET_DPDK
else if (iftype && ifname && (strncmp(iftype, "dpdk", 4) == 0))
nd = lkl_netdev_dpdk_create(ifname);
#endif /* CONFIG_AUTO_LKL_VIRTIO_NET_DPDK */
else if (iftype && ifname && (strncmp(iftype, "raw", 3) == 0))
nd = lkl_netdev_raw_create(ifname);

if (!nd) {
fprintf(stderr, "init netdev failed\n");
Expand Down
14 changes: 14 additions & 0 deletions tools/lkl/tests/net.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
#!/bin/bash -e

IFNAME=`ip route |grep default | awk '{print $5}'`
GW=`ip route |grep default | awk '{print $3}'`
IPADDR=`echo $GW | sed -r "s/([0-9]+\.[0-9]+\.[0-9]+\.)([0-9]+)$/\1\`expr \2 + 10\`/"`
PLEN=`ip rou |grep ${IFNAME} | grep "scope link" | awk '{print $1}' | sed "s/.*\/\(.*\)/\1/"`

echo "== TAP (LKL net) tests =="
if [ -c /dev/net/tun ]; then
sudo ip link set dev lkl_ptt1 down || true
Expand All @@ -14,6 +19,15 @@ if [ -c /dev/net/tun ]; then
sudo ip tuntap del dev lkl_ptt1 mode tap
fi

echo "== RAW socket (LKL net) tests =="
# currently not supported mingw
if [ -n "`printenv CONFIG_AUTO_LKL_POSIX_HOST`" ] ; then
sudo ip link set dev ${IFNAME} promisc on
# this won't work if IFNAME is wifi since it rewrites the src macaddr
sudo ./net-test raw ${IFNAME} ${IPADDR} ${PLEN} 8.8.8.8 ${GW}
sudo ip link set dev ${IFNAME} promisc off
fi

# we disabled this DPDK test because it's unlikely possible to describe
# a generic set of commands for all environments to test with DPDK. users
# may customize those test commands for your host.
Expand Down

0 comments on commit a28b355

Please sign in to comment.