From 9799c43e83f9c321dc23d35f7e680395b54f1285 Mon Sep 17 00:00:00 2001 From: "James O. D. Hunt" Date: Tue, 16 Jan 2018 16:55:58 +0000 Subject: [PATCH] vendor: Re-vendor virtcontainers Update virtcontainer specifically for commit fb1eecd which fixes the issue of lingering bind-mounts after a container has been killed. shortlog of virtcontainer changes: fb1eecd mount: Fix unmount of dangling bind-mounts d7462c7 pkg/oci: Clarify resource calculation comment 027aab8 qemu: adjust QMP naming to avoid non-unique truncation 0c4064e capabilities: Pass capabilities to hyperstart. e20ba9d oci: Add support for capabilities 6776dd9 shim: Correct kata debug flag b307c08 qemu: refactor/simplify addDevice function 747d364 vhost-user: rewrite to use interfaces/embedded types cc67fb0 vhost-user: enabling for vhost-user network devices f5587cf device: make a more generic function for hypervisor args d6f0600 gitignore: Add new shim binary to gitignore list bf8359f gitignore: Add new shim binary to gitignore list c30fd9a ci: Install missing dep tool d1bb792 kata_agent: Signal the kata shim 08c96c2 shim: Generalize stopShim Fixes #924. Signed-off-by: James O. D. Hunt --- Gopkg.lock | 116 +++++++++-- Gopkg.toml | 2 +- .../containers/virtcontainers/.gitignore | 2 + .../containers/virtcontainers/container.go | 4 +- .../containers/virtcontainers/device.go | 195 ++++++++++++++++-- .../containers/virtcontainers/device_test.go | 74 +++++++ .../virtcontainers/hyperstart_agent.go | 8 + .../containers/virtcontainers/hypervisor.go | 13 ++ .../containers/virtcontainers/kata_agent.go | 8 +- .../containers/virtcontainers/kata_shim.go | 2 +- .../containers/virtcontainers/mount.go | 12 +- .../containers/virtcontainers/network.go | 106 +++++++++- .../containers/virtcontainers/network_test.go | 31 +++ .../virtcontainers/pkg/hyperstart/types.go | 16 ++ .../virtcontainers/pkg/oci/utils.go | 73 ++++++- .../virtcontainers/pkg/oci/utils_test.go | 50 +++++ .../containers/virtcontainers/pod.go | 20 +- .../containers/virtcontainers/qemu.go | 58 ++++-- .../containers/virtcontainers/qemu_test.go | 29 ++- .../containers/virtcontainers/shim.go | 18 +- 20 files changed, 772 insertions(+), 65 deletions(-) diff --git a/Gopkg.lock b/Gopkg.lock index aabeaabe..8ee73533 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -8,12 +8,22 @@ [[projects]] name = "github.com/clearcontainers/proxy" - packages = ["api","client"] + packages = [ + "api", + "client" + ] revision = "1d2a6a3ea132a86abd0731408b7dc34f2fc17d55" [[projects]] name = "github.com/containernetworking/cni" - packages = ["libcni","pkg/invoke","pkg/types","pkg/types/020","pkg/types/current","pkg/version"] + packages = [ + "libcni", + "pkg/invoke", + "pkg/types", + "pkg/types/020", + "pkg/types/current", + "pkg/version" + ] revision = "384d8c0b5288c25b9f1da901c66ea5155e6c567d" [[projects]] @@ -23,8 +33,17 @@ [[projects]] name = "github.com/containers/virtcontainers" - packages = [".","pkg/annotations","pkg/cni","pkg/ethtool","pkg/hyperstart","pkg/oci","pkg/uuid","pkg/vcMock"] - revision = "387abeb4466d86aef1171b054f64ee8c636aa05f" + packages = [ + ".", + "pkg/annotations", + "pkg/cni", + "pkg/ethtool", + "pkg/hyperstart", + "pkg/oci", + "pkg/uuid", + "pkg/vcMock" + ] + revision = "3a6bd62b25e130dc142ba430cf3b45c975d1878e" [[projects]] name = "github.com/davecgh/go-spew" @@ -44,13 +63,25 @@ [[projects]] name = "github.com/gogo/protobuf" - packages = ["gogoproto","proto","protoc-gen-gogo/descriptor","sortkeys","types"] + packages = [ + "gogoproto", + "proto", + "protoc-gen-gogo/descriptor", + "sortkeys", + "types" + ] revision = "342cbe0a04158f6dcb03ca0079991a51a4248c02" [[projects]] branch = "master" name = "github.com/golang/protobuf" - packages = ["proto","ptypes","ptypes/any","ptypes/duration","ptypes/timestamp"] + packages = [ + "proto", + "ptypes", + "ptypes/any", + "ptypes/duration", + "ptypes/timestamp" + ] revision = "1e59b77b52bf8e4b449a57e6f79f21226d571845" [[projects]] @@ -60,7 +91,10 @@ [[projects]] name = "github.com/kata-containers/agent" - packages = ["protocols/client","protocols/grpc"] + packages = [ + "protocols/client", + "protocols/grpc" + ] revision = "306ee20ff47628fe370012d7e3f9316480e43c2e" [[projects]] @@ -96,7 +130,10 @@ [[projects]] name = "github.com/sirupsen/logrus" - packages = [".","hooks/syslog"] + packages = [ + ".", + "hooks/syslog" + ] revision = "89742aefa4b206dcf400792f3bd35b542998eb3b" [[projects]] @@ -111,7 +148,10 @@ [[projects]] name = "github.com/vishvananda/netlink" - packages = [".","nl"] + packages = [ + ".", + "nl" + ] revision = "c2a3de3b38bd00f07290c3c5e12b4dbc04ec8666" [[projects]] @@ -126,18 +166,44 @@ [[projects]] name = "golang.org/x/net" - packages = ["context","http2","http2/hpack","idna","internal/timeseries","lex/httplex","trace"] + packages = [ + "context", + "http2", + "http2/hpack", + "idna", + "internal/timeseries", + "lex/httplex", + "trace" + ] revision = "a8b9294777976932365dabb6640cf1468d95c70f" [[projects]] name = "golang.org/x/sys" - packages = ["unix","windows"] + packages = [ + "unix", + "windows" + ] revision = "1d2aa6dbdea45adaaebb9905d0666e4537563829" [[projects]] branch = "master" name = "golang.org/x/text" - packages = ["collate","collate/build","internal/colltab","internal/gen","internal/tag","internal/triegen","internal/ucd","language","secure/bidirule","transform","unicode/bidi","unicode/cldr","unicode/norm","unicode/rangetable"] + packages = [ + "collate", + "collate/build", + "internal/colltab", + "internal/gen", + "internal/tag", + "internal/triegen", + "internal/ucd", + "language", + "secure/bidirule", + "transform", + "unicode/bidi", + "unicode/cldr", + "unicode/norm", + "unicode/rangetable" + ] revision = "e19ae1496984b1c655b8044a65c0300a3c878dd3" [[projects]] @@ -148,12 +214,34 @@ [[projects]] name = "google.golang.org/grpc" - packages = [".","balancer","balancer/roundrobin","codes","connectivity","credentials","encoding","grpclb/grpc_lb_v1/messages","grpclog","internal","keepalive","metadata","naming","peer","resolver","resolver/dns","resolver/passthrough","stats","status","tap","transport"] + packages = [ + ".", + "balancer", + "balancer/roundrobin", + "codes", + "connectivity", + "credentials", + "encoding", + "grpclb/grpc_lb_v1/messages", + "grpclog", + "internal", + "keepalive", + "metadata", + "naming", + "peer", + "resolver", + "resolver/dns", + "resolver/passthrough", + "stats", + "status", + "tap", + "transport" + ] revision = "5a9f7b402fe85096d2e1d0383435ee1876e863d0" [solve-meta] analyzer-name = "dep" analyzer-version = 1 - inputs-digest = "2d0f5ec123ad4d9ee3a784a7ae3c9962a1ac547079589968ec722ca922f472a2" + inputs-digest = "9fb7085c26eb892865b021b6f8b8b81f43e5669f9a9c71b9c21d68b349da468e" solver-name = "gps-cdcl" solver-version = 1 diff --git a/Gopkg.toml b/Gopkg.toml index 724d63b2..a6d39c12 100644 --- a/Gopkg.toml +++ b/Gopkg.toml @@ -72,4 +72,4 @@ [[constraint]] name = "github.com/containers/virtcontainers" - revision = "387abeb4466d86aef1171b054f64ee8c636aa05f" + revision = "3a6bd62b25e130dc142ba430cf3b45c975d1878e" diff --git a/vendor/github.com/containers/virtcontainers/.gitignore b/vendor/github.com/containers/virtcontainers/.gitignore index d3ed8ca6..d5fb1b0d 100644 --- a/vendor/github.com/containers/virtcontainers/.gitignore +++ b/vendor/github.com/containers/virtcontainers/.gitignore @@ -4,4 +4,6 @@ /hack/virtc/virtc /hook/mock/hook /shim/mock/shim +/shim/mock/cc-shim/cc-shim +/shim/mock/kata-shim/kata-shim profile.cov diff --git a/vendor/github.com/containers/virtcontainers/container.go b/vendor/github.com/containers/virtcontainers/container.go index 61522796..66f14be9 100644 --- a/vendor/github.com/containers/virtcontainers/container.go +++ b/vendor/github.com/containers/virtcontainers/container.go @@ -834,7 +834,7 @@ func (c *Container) hotplugDrive() error { }).Info("Block device detected") // Add drive with id as container id - devID := makeBlockDevIDForHypervisor(c.id) + devID := makeNameID("drive", c.id) drive := Drive{ File: devicePath, Format: "raw", @@ -870,7 +870,7 @@ func (c *Container) removeDrive() (err error) { if c.isDriveUsed() && c.state.HotpluggedDrive { c.Logger().Info("unplugging block device") - devID := makeBlockDevIDForHypervisor(c.id) + devID := makeNameID("drive", c.id) drive := Drive{ ID: devID, } diff --git a/vendor/github.com/containers/virtcontainers/device.go b/vendor/github.com/containers/virtcontainers/device.go index 9f4b48b4..b322714e 100644 --- a/vendor/github.com/containers/virtcontainers/device.go +++ b/vendor/github.com/containers/virtcontainers/device.go @@ -100,6 +100,10 @@ type DeviceInfo struct { ID string } +func deviceLogger() *logrus.Entry { + return virtLog.WithField("subsystem", "device") +} + // VFIODevice is a vfio device meant to be passed to the hypervisor // to be used by the Virtual Machine. type VFIODevice struct { @@ -108,10 +112,6 @@ type VFIODevice struct { BDF string } -func deviceLogger() *logrus.Entry { - return virtLog.WithField("subsystem", "device") -} - func newVFIODevice(devInfo DeviceInfo) *VFIODevice { return &VFIODevice{ DeviceType: DeviceVFIO, @@ -161,6 +161,180 @@ func (device *VFIODevice) deviceType() string { return device.DeviceType } +// VhostUserDeviceType - represents a vhost-user device type +// Currently support just VhostUserNet +type VhostUserDeviceType string + +const ( + //VhostUserSCSI - SCSI based vhost-user type + VhostUserSCSI = "vhost-user-scsi-pci" + //VhostUserNet - net based vhost-user type + VhostUserNet = "virtio-net-pci" + //VhostUserBlk represents a block vhostuser device type + VhostUserBlk = "vhost-user-blk-pci" +) + +// VhostUserDevice represents a vhost-user device. Shared +// attributes of a vhost-user device can be retrieved using +// the Attrs() method. Unique data can be obtained by casting +// the object to the proper type. +type VhostUserDevice interface { + Attrs() *VhostUserDeviceAttrs + Type() string +} + +// VhostUserDeviceAttrs represents data shared by most vhost-user devices +type VhostUserDeviceAttrs struct { + DeviceType string + DeviceInfo DeviceInfo + SocketPath string + ID string +} + +// VhostUserNetDevice is a network vhost-user based device +type VhostUserNetDevice struct { + VhostUserDeviceAttrs + MacAddress string +} + +// Attrs returns the VhostUserDeviceAttrs associated with the vhost-user device +func (vhostUserNetDevice *VhostUserNetDevice) Attrs() *VhostUserDeviceAttrs { + return &vhostUserNetDevice.VhostUserDeviceAttrs +} + +// Type returns the type associated with the vhost-user device +func (vhostUserNetDevice *VhostUserNetDevice) Type() string { + return VhostUserNet +} + +// VhostUserSCSIDevice is a SCSI vhost-user based device +type VhostUserSCSIDevice struct { + VhostUserDeviceAttrs +} + +// Attrs returns the VhostUserDeviceAttrs associated with the vhost-user device +func (vhostUserSCSIDevice *VhostUserSCSIDevice) Attrs() *VhostUserDeviceAttrs { + return &vhostUserSCSIDevice.VhostUserDeviceAttrs +} + +// Type returns the type associated with the vhost-user device +func (vhostUserSCSIDevice *VhostUserSCSIDevice) Type() string { + return VhostUserSCSI +} + +// VhostUserBlkDevice is a block vhost-user based device +type VhostUserBlkDevice struct { + VhostUserDeviceAttrs +} + +// Attrs returns the VhostUserDeviceAttrs associated with the vhost-user device +func (vhostUserBlkDevice *VhostUserBlkDevice) Attrs() *VhostUserDeviceAttrs { + return &vhostUserBlkDevice.VhostUserDeviceAttrs +} + +// Type returns the type associated with the vhost-user device +func (vhostUserBlkDevice *VhostUserBlkDevice) Type() string { + return VhostUserBlk +} + +// vhostUserAttach handles the common logic among all of the vhost-user device's +// attach functions +func vhostUserAttach(device VhostUserDevice, h hypervisor, c *Container) (err error) { + // generate a unique ID to be used for hypervisor commandline fields + randBytes, err := generateRandomBytes(8) + if err != nil { + return err + } + id := hex.EncodeToString(randBytes) + + device.Attrs().ID = id + + return h.addDevice(device, vhostuserDev) +} + +// +// VhostUserNetDevice's implementation of the device interface: +// +func (vhostUserNetDevice *VhostUserNetDevice) attach(h hypervisor, c *Container) (err error) { + return vhostUserAttach(vhostUserNetDevice, h, c) +} + +func (vhostUserNetDevice *VhostUserNetDevice) detach(h hypervisor) error { + return nil +} + +func (vhostUserNetDevice *VhostUserNetDevice) deviceType() string { + return vhostUserNetDevice.DeviceType +} + +// +// VhostUserBlkDevice's implementation of the device interface: +// +func (vhostUserBlkDevice *VhostUserBlkDevice) attach(h hypervisor, c *Container) (err error) { + return vhostUserAttach(vhostUserBlkDevice, h, c) +} + +func (vhostUserBlkDevice *VhostUserBlkDevice) detach(h hypervisor) error { + return nil +} + +func (vhostUserBlkDevice *VhostUserBlkDevice) deviceType() string { + return vhostUserBlkDevice.DeviceType +} + +// +// VhostUserSCSIDevice's implementation of the device interface: +// +func (vhostUserSCSIDevice *VhostUserSCSIDevice) attach(h hypervisor, c *Container) (err error) { + return vhostUserAttach(vhostUserSCSIDevice, h, c) +} + +func (vhostUserSCSIDevice *VhostUserSCSIDevice) detach(h hypervisor) error { + return nil +} + +func (vhostUserSCSIDevice *VhostUserSCSIDevice) deviceType() string { + return vhostUserSCSIDevice.DeviceType +} + +// Long term, this should be made more configurable. For now matching path +// provided by CNM VPP and OVS-DPDK plugins, available at github.com/clearcontainers/vpp and +// github.com/clearcontainers/ovsdpdk. The plugins create the socket on the host system +// using this path. +const hostSocketSearchPath = "/tmp/vhostuser_%s/vhu.sock" + +// findVhostUserNetSocketPath checks if an interface is a dummy placeholder +// for a vhost-user socket, and if it is it returns the path to the socket +func findVhostUserNetSocketPath(netInfo NetworkInfo) (string, error) { + if netInfo.Iface.Name == "lo" { + return "", nil + } + + // check for socket file existence at known location. + for _, addr := range netInfo.Addrs { + socketPath := fmt.Sprintf(hostSocketSearchPath, addr.IPNet.IP) + if _, err := os.Stat(socketPath); err == nil { + return socketPath, nil + } + } + + return "", nil +} + +// vhostUserSocketPath returns the path of the socket discovered. This discovery +// will vary depending on the type of vhost-user socket. +// Today only VhostUserNetDevice is supported. +func vhostUserSocketPath(info interface{}) (string, error) { + + switch v := info.(type) { + case NetworkInfo: + return findVhostUserNetSocketPath(v) + default: + return "", nil + } + +} + // BlockDevice refers to a block storage device implementation. type BlockDevice struct { DeviceType string @@ -177,15 +351,6 @@ func newBlockDevice(devInfo DeviceInfo) *BlockDevice { } } -func makeBlockDevIDForHypervisor(deviceID string) string { - devID := fmt.Sprintf("drive-%s", deviceID) - if len(devID) > maxDevIDSize { - devID = string(devID[:maxDevIDSize]) - } - - return devID -} - func (device *BlockDevice) attach(h hypervisor, c *Container) (err error) { // If VM has not been launched yet, return immediately. // This is because we always want to hotplug block devices. @@ -206,7 +371,7 @@ func (device *BlockDevice) attach(h hypervisor, c *Container) (err error) { drive := Drive{ File: device.DeviceInfo.HostPath, Format: "raw", - ID: makeBlockDevIDForHypervisor(device.DeviceInfo.ID), + ID: makeNameID("drive", device.DeviceInfo.ID), } // Increment the block index for the pod. This is used to determine the name @@ -246,7 +411,7 @@ func (device BlockDevice) detach(h hypervisor) error { deviceLogger().WithField("device", device.DeviceInfo.HostPath).Info("Unplugging block device") drive := Drive{ - ID: makeBlockDevIDForHypervisor(device.DeviceInfo.ID), + ID: makeNameID("drive", device.DeviceInfo.ID), } if err := h.hotplugRemoveDevice(drive, blockDev); err != nil { diff --git a/vendor/github.com/containers/virtcontainers/device_test.go b/vendor/github.com/containers/virtcontainers/device_test.go index 41ced2e9..4c4312dc 100644 --- a/vendor/github.com/containers/virtcontainers/device_test.go +++ b/vendor/github.com/containers/virtcontainers/device_test.go @@ -17,17 +17,91 @@ package virtcontainers import ( + "fmt" "io/ioutil" + "net" "os" "path/filepath" "strconv" "testing" "github.com/stretchr/testify/assert" + "github.com/vishvananda/netlink" ) const fileMode0640 = os.FileMode(0640) +func TestVhostUserSocketPath(t *testing.T) { + + // First test case: search for existing: + addresses := []netlink.Addr{ + { + IPNet: &net.IPNet{ + IP: net.IPv4(192, 168, 0, 2), + Mask: net.IPv4Mask(192, 168, 0, 2), + }, + }, + { + IPNet: &net.IPNet{ + IP: net.IPv4(192, 168, 0, 1), + Mask: net.IPv4Mask(192, 168, 0, 1), + }, + }, + } + + expectedPath := "/tmp/vhostuser_192.168.0.1" + expectedFileName := "vhu.sock" + expectedResult := fmt.Sprintf("%s/%s", expectedPath, expectedFileName) + + err := os.Mkdir(expectedPath, 0777) + if err != nil { + t.Fatal(err) + } + + _, err = os.Create(expectedResult) + if err != nil { + t.Fatal(err) + } + netinfo := NetworkInfo{ + Addrs: addresses, + } + + path, _ := vhostUserSocketPath(netinfo) + + if path != expectedResult { + t.Fatalf("Got %+v\nExpecting %+v", path, expectedResult) + } + + // Second test case: search doesn't include matching vsock: + addressesFalse := []netlink.Addr{ + { + IPNet: &net.IPNet{ + IP: net.IPv4(192, 168, 0, 4), + Mask: net.IPv4Mask(192, 168, 0, 4), + }, + }, + } + netinfoFail := NetworkInfo{ + Addrs: addressesFalse, + } + + path, _ = vhostUserSocketPath(netinfoFail) + if path != "" { + t.Fatalf("Got %+v\nExpecting %+v", path, "") + } + + err = os.Remove(expectedResult) + if err != nil { + t.Fatal(err) + } + + err = os.Remove(expectedPath) + if err != nil { + t.Fatal(err) + } + +} + func TestIsVFIO(t *testing.T) { type testData struct { path string diff --git a/vendor/github.com/containers/virtcontainers/hyperstart_agent.go b/vendor/github.com/containers/virtcontainers/hyperstart_agent.go index 2f22e499..a1f937a2 100644 --- a/vendor/github.com/containers/virtcontainers/hyperstart_agent.go +++ b/vendor/github.com/containers/virtcontainers/hyperstart_agent.go @@ -123,6 +123,14 @@ func (h *hyper) buildHyperContainerProcess(cmd Cmd) (*hyperstart.Process, error) NoNewPrivileges: cmd.NoNewPrivileges, } + process.Capabilities = hyperstart.Capabilities{ + Bounding: cmd.Capabilities.Bounding, + Effective: cmd.Capabilities.Effective, + Inheritable: cmd.Capabilities.Inheritable, + Permitted: cmd.Capabilities.Permitted, + Ambient: cmd.Capabilities.Ambient, + } + return process, nil } diff --git a/vendor/github.com/containers/virtcontainers/hypervisor.go b/vendor/github.com/containers/virtcontainers/hypervisor.go index 9ebdc62c..df4e283b 100644 --- a/vendor/github.com/containers/virtcontainers/hypervisor.go +++ b/vendor/github.com/containers/virtcontainers/hypervisor.go @@ -75,6 +75,9 @@ const ( // VFIODevice is VFIO device type vfioDev + + // vhostuserDev is a Vhost-user device type + vhostuserDev ) // Set sets an hypervisor type based on the input string. @@ -115,6 +118,16 @@ func newHypervisor(hType HypervisorType) (hypervisor, error) { } } +//Generic function for creating a named-id for passing on the hypervisor commandline +func makeNameID(namedType string, id string) string { + nameID := fmt.Sprintf("%s-%s", namedType, id) + if len(nameID) > maxDevIDSize { + nameID = string(nameID[:maxDevIDSize]) + } + + return nameID +} + // Param is a key/value representation for hypervisor and kernel parameters. type Param struct { Key string diff --git a/vendor/github.com/containers/virtcontainers/kata_agent.go b/vendor/github.com/containers/virtcontainers/kata_agent.go index 7bfe87cc..471585dd 100644 --- a/vendor/github.com/containers/virtcontainers/kata_agent.go +++ b/vendor/github.com/containers/virtcontainers/kata_agent.go @@ -437,7 +437,13 @@ func (k *kataAgent) startContainer(pod Pod, c Container) error { } _, err := k.client.StartContainer(context.Background(), req) - return err + if err != nil { + return err + } + + // The Kata shim wants to be signaled when the init container + // is created. Sending the signal for all containers is harmless. + return signalShim(c.process.Pid, syscall.SIGUSR1) } func (k *kataAgent) stopContainer(pod Pod, c Container) error { diff --git a/vendor/github.com/containers/virtcontainers/kata_shim.go b/vendor/github.com/containers/virtcontainers/kata_shim.go index 323629d6..3315aa5f 100644 --- a/vendor/github.com/containers/virtcontainers/kata_shim.go +++ b/vendor/github.com/containers/virtcontainers/kata_shim.go @@ -60,7 +60,7 @@ func (s *kataShim) start(pod Pod, params ShimParams) (int, error) { args := []string{config.Path, "-agent", params.URL, "-container", params.Container, "-exec-id", params.Token} if config.Debug { - args = append(args, "-d") + args = append(args, "-log", "debug") } return startShim(args, params) diff --git a/vendor/github.com/containers/virtcontainers/mount.go b/vendor/github.com/containers/virtcontainers/mount.go index fd501221..9aa11fb2 100644 --- a/vendor/github.com/containers/virtcontainers/mount.go +++ b/vendor/github.com/containers/virtcontainers/mount.go @@ -373,11 +373,13 @@ func bindUnmountContainerMounts(mounts []Mount) error { for _, m := range mounts { if !isSystemMount(m.Destination) && m.Type == "bind" { err := syscall.Unmount(m.HostPath, 0) - mountLogger().WithFields(logrus.Fields{ - "host-path": m.HostPath, - "error": err, - }).Warn("Could not umount") - return err + if err != nil { + mountLogger().WithFields(logrus.Fields{ + "host-path": m.HostPath, + "error": err, + }).Warn("Could not umount") + return err + } } } return nil diff --git a/vendor/github.com/containers/virtcontainers/network.go b/vendor/github.com/containers/virtcontainers/network.go index d0b16c74..3ea3e7fc 100644 --- a/vendor/github.com/containers/virtcontainers/network.go +++ b/vendor/github.com/containers/virtcontainers/network.go @@ -17,6 +17,7 @@ package virtcontainers import ( + "encoding/hex" "encoding/json" "fmt" "io/ioutil" @@ -151,6 +152,17 @@ type PhysicalEndpoint struct { VendorDeviceID string } +// VhostUserEndpoint represents a vhost-user socket based network interface +type VhostUserEndpoint struct { + // Path to the vhost-user socket on the host system + SocketPath string + // MAC address of the interface + HardAddr string + IfaceName string + EndpointProperties NetworkInfo + EndpointType EndpointType +} + // Properties returns properties for the veth interface in the network pair. func (endpoint *VirtualEndpoint) Properties() NetworkInfo { return endpoint.EndpointProperties @@ -200,6 +212,69 @@ func (endpoint *VirtualEndpoint) Detach() error { return xconnectVMNetwork(&(endpoint.NetPair), false) } +// Properties returns the properties of the interface. +func (endpoint *VhostUserEndpoint) Properties() NetworkInfo { + return endpoint.EndpointProperties +} + +// Name returns name of the interface. +func (endpoint *VhostUserEndpoint) Name() string { + return endpoint.IfaceName +} + +// HardwareAddr returns the mac address of the vhostuser network interface +func (endpoint *VhostUserEndpoint) HardwareAddr() string { + return endpoint.HardAddr +} + +// Type indentifies the endpoint as a vhostuser endpoint. +func (endpoint *VhostUserEndpoint) Type() EndpointType { + return endpoint.EndpointType +} + +// SetProperties sets the properties of the endpoint. +func (endpoint *VhostUserEndpoint) SetProperties(properties NetworkInfo) { + endpoint.EndpointProperties = properties +} + +// Attach for vhostuser endpoint +func (endpoint *VhostUserEndpoint) Attach(h hypervisor) error { + networkLogger().Info("Attaching vhostuser based endpoint") + + // generate a unique ID to be used for hypervisor commandline fields + randBytes, err := generateRandomBytes(8) + if err != nil { + return err + } + id := hex.EncodeToString(randBytes) + + d := VhostUserNetDevice{ + MacAddress: endpoint.HardAddr, + } + d.SocketPath = endpoint.SocketPath + d.ID = id + + return h.addDevice(d, vhostuserDev) +} + +// Detach for vhostuser endpoint +func (endpoint *VhostUserEndpoint) Detach() error { + networkLogger().Info("Detaching vhostuser based endpoint") + return nil +} + +// Create a vhostuser endpoint +func createVhostUserEndpoint(netInfo NetworkInfo, socket string) (*VhostUserEndpoint, error) { + + vhostUserEndpoint := &VhostUserEndpoint{ + SocketPath: socket, + HardAddr: netInfo.Iface.HardwareAddr.String(), + IfaceName: netInfo.Iface.Name, + EndpointType: VhostUserEndpointType, + } + return vhostUserEndpoint, nil +} + // Properties returns the properties of the physical interface. func (endpoint *PhysicalEndpoint) Properties() NetworkInfo { return endpoint.EndpointProperties @@ -260,6 +335,9 @@ const ( // VirtualEndpointType is the virtual network interface. VirtualEndpointType EndpointType = "virtual" + + // VhostUserEndpointType is the vhostuser network interface. + VhostUserEndpointType EndpointType = "vhost-user" ) // Set sets an endpoint type based on the input string. @@ -271,6 +349,9 @@ func (endpointType *EndpointType) Set(value string) error { case "virtual": *endpointType = VirtualEndpointType return nil + case "vhost-user": + *endpointType = VhostUserEndpointType + return nil default: return fmt.Errorf("Unknown endpoint type %s", value) } @@ -283,6 +364,8 @@ func (endpointType *EndpointType) String() string { return string(PhysicalEndpointType) case VirtualEndpointType: return string(VirtualEndpointType) + case VhostUserEndpointType: + return string(VhostUserEndpointType) default: return "" } @@ -382,6 +465,16 @@ func (n *NetworkNamespace) UnmarshalJSON(b []byte) error { endpoints = append(endpoints, &endpoint) virtLog.Infof("Virtual endpoint unmarshalled [%v]", endpoint) + case VhostUserEndpointType: + var endpoint VhostUserEndpoint + err := json.Unmarshal(e.Data, &endpoint) + if err != nil { + return err + } + + endpoints = append(endpoints, &endpoint) + virtLog.Infof("VhostUser endpoint unmarshalled [%v]", endpoint) + default: virtLog.Errorf("Unknown endpoint type received %s\n", e.Type) } @@ -1141,7 +1234,18 @@ func createEndpointsFromScan(networkNSPath string) ([]Endpoint, error) { cnmLogger().WithField("interface", netInfo.Iface.Name).Info("Physical network interface found") endpoint, err = createPhysicalEndpoint(netInfo) } else { - endpoint, err = createVirtualNetworkEndpoint(idx, netInfo.Iface.Name) + // Check if this is a dummy interface which has a vhost-user socket associated with it + socketPath, err := vhostUserSocketPath(netInfo) + if err != nil { + return err + } + + if socketPath != "" { + cnmLogger().WithField("interface", netInfo.Iface.Name).Info("VhostUser network interface found") + endpoint, err = createVhostUserEndpoint(netInfo, socketPath) + } else { + endpoint, err = createVirtualNetworkEndpoint(idx, netInfo.Iface.Name) + } } return err diff --git a/vendor/github.com/containers/virtcontainers/network_test.go b/vendor/github.com/containers/virtcontainers/network_test.go index d9070828..c89657d7 100644 --- a/vendor/github.com/containers/virtcontainers/network_test.go +++ b/vendor/github.com/containers/virtcontainers/network_test.go @@ -193,6 +193,37 @@ func TestIncorrectEndpointTypeString(t *testing.T) { testEndpointTypeString(t, &endpointType, "") } +func TestCreateVhostUserEndpoint(t *testing.T) { + macAddr := net.HardwareAddr{0x02, 0x00, 0xCA, 0xFE, 0x00, 0x48} + ifcName := "vhost-deadbeef" + socket := "/tmp/vhu_192.168.0.1" + + netinfo := NetworkInfo{ + Iface: NetlinkIface{ + LinkAttrs: netlink.LinkAttrs{ + HardwareAddr: macAddr, + Name: ifcName, + }, + }, + } + + expected := &VhostUserEndpoint{ + SocketPath: socket, + HardAddr: macAddr.String(), + IfaceName: ifcName, + EndpointType: VhostUserEndpointType, + } + + result, err := createVhostUserEndpoint(netinfo, socket) + if err != nil { + t.Fatal(err) + } + + if reflect.DeepEqual(result, expected) == false { + t.Fatalf("\n\tGot %v\n\tExpecting %v", result, expected) + } +} + func TestCreateVirtualNetworkEndpoint(t *testing.T) { macAddr := net.HardwareAddr{0x02, 0x00, 0xCA, 0xFE, 0x00, 0x04} diff --git a/vendor/github.com/containers/virtcontainers/pkg/hyperstart/types.go b/vendor/github.com/containers/virtcontainers/pkg/hyperstart/types.go index c24bdd2c..877b2f97 100644 --- a/vendor/github.com/containers/virtcontainers/pkg/hyperstart/types.go +++ b/vendor/github.com/containers/virtcontainers/pkg/hyperstart/types.go @@ -151,6 +151,20 @@ type Rlimit struct { Soft uint64 `json:"soft"` } +// Capabilities specify the capabilities to keep when executing the process inside the container. +type Capabilities struct { + // Bounding is the set of capabilities checked by the kernel. + Bounding []string `json:"bounding"` + // Effective is the set of capabilities checked by the kernel. + Effective []string `json:"effective"` + // Inheritable is the capabilities preserved across execve. + Inheritable []string `json:"inheritable"` + // Permitted is the limiting superset for effective capabilities. + Permitted []string `json:"permitted"` + // Ambient is the ambient set of capabilities that are kept. + Ambient []string `json:"ambient"` +} + // Process describes a process running on a container inside a pod. type Process struct { User string `json:"user,omitempty"` @@ -173,6 +187,8 @@ type Process struct { Rlimits []Rlimit `json:"rlimits,omitempty"` // NoNewPrivileges indicates that the process should not gain any additional privileges NoNewPrivileges bool `json:"noNewPrivileges"` + // Capabilities specifies the sets of capabilities for the process(es) inside the container. + Capabilities Capabilities `json:"capabilities"` } // SystemMountsInfo describes additional information for system mounts that the agent diff --git a/vendor/github.com/containers/virtcontainers/pkg/oci/utils.go b/vendor/github.com/containers/virtcontainers/pkg/oci/utils.go index 13faa6c5..69bb108f 100644 --- a/vendor/github.com/containers/virtcontainers/pkg/oci/utils.go +++ b/vendor/github.com/containers/virtcontainers/pkg/oci/utils.go @@ -246,6 +246,64 @@ func containerDeviceInfos(spec CompatOCISpec) ([]vc.DeviceInfo, error) { return devices, nil } +func containerCapabilities(s CompatOCISpec) (vc.LinuxCapabilities, error) { + capabilities := s.Process.Capabilities + var c vc.LinuxCapabilities + + // In spec v1.0.0-rc4, capabilities was a list of strings. This was changed + // to an object with v1.0.0-rc5. + // Check for the interface type to support both the versions. + switch caps := capabilities.(type) { + case map[string]interface{}: + for key, value := range caps { + switch val := value.(type) { + case []interface{}: + var list []string + + for _, str := range val { + list = append(list, str.(string)) + } + + switch key { + case "bounding": + c.Bounding = list + case "effective": + c.Effective = list + case "inheritable": + c.Inheritable = list + case "ambient": + c.Ambient = list + case "permitted": + c.Permitted = list + } + + default: + return c, fmt.Errorf("Unexpected format for capabilities: %v", caps) + } + } + case []interface{}: + var list []string + for _, str := range caps { + list = append(list, str.(string)) + } + + c = vc.LinuxCapabilities{ + Bounding: list, + Effective: list, + Inheritable: list, + Ambient: list, + Permitted: list, + } + case nil: + ociLog.Debug("Empty capabilities have been passed") + return c, nil + default: + return c, fmt.Errorf("Unexpected format for capabilities: %v", caps) + } + + return c, nil +} + func networkConfig(ocispec CompatOCISpec) (vc.NetworkConfig, error) { linux := ocispec.Linux if linux == nil { @@ -354,7 +412,8 @@ func vmConfig(ocispec CompatOCISpec, config RuntimeConfig) (vc.Resources, error) if memBytes <= 0 { return vc.Resources{}, fmt.Errorf("Invalid OCI memory limit %d", memBytes) } - // round up memory to 1MB + // Use some math magic to round up to the nearest Mb. + // This has the side effect that we can never have <1Mb assigned. resources.Memory = uint((memBytes + (1024*1024 - 1)) / (1024 * 1024)) } @@ -372,7 +431,12 @@ func vmConfig(ocispec CompatOCISpec, config RuntimeConfig) (vc.Resources, error) return vc.Resources{}, fmt.Errorf("Invalid OCI cpu period %d", period) } - // round up to 1 CPU + // Use some math magic to round up to the nearest whole vCPU + // (that is, a partial part of a quota request ends up assigning + // a whole vCPU, for instance, a request of 1.5 'cpu quotas' + // will give 2 vCPUs). + // This also has the side effect that we will always allocate + // at least 1 vCPU. resources.VCPUs = uint((uint64(quota) + (period - 1)) / period) } @@ -495,6 +559,11 @@ func ContainerConfig(ocispec CompatOCISpec, bundlePath, cid, console string, det return vc.ContainerConfig{}, err } + cmd.Capabilities, err = containerCapabilities(ocispec) + if err != nil { + return vc.ContainerConfig{}, err + } + containerConfig := vc.ContainerConfig{ ID: cid, RootFs: rootfs, diff --git a/vendor/github.com/containers/virtcontainers/pkg/oci/utils_test.go b/vendor/github.com/containers/virtcontainers/pkg/oci/utils_test.go index 0618c099..71f2fdb6 100644 --- a/vendor/github.com/containers/virtcontainers/pkg/oci/utils_test.go +++ b/vendor/github.com/containers/virtcontainers/pkg/oci/utils_test.go @@ -76,6 +76,8 @@ func TestMinimalPodConfig(t *testing.T) { Console: consolePath, } + capList := []string{"CAP_AUDIT_WRITE", "CAP_KILL", "CAP_NET_BIND_SERVICE"} + expectedCmd := vc.Cmd{ Args: []string{"sh"}, Envs: []vc.EnvVar{ @@ -95,6 +97,13 @@ func TestMinimalPodConfig(t *testing.T) { Interactive: true, Console: consolePath, NoNewPrivileges: true, + Capabilities: vc.LinuxCapabilities{ + Bounding: capList, + Effective: capList, + Inheritable: capList, + Permitted: capList, + Ambient: capList, + }, } expectedMounts := []vc.Mount{ @@ -739,6 +748,47 @@ func TestDevicePathEmpty(t *testing.T) { assert.NotNil(t, err, "This test should fail as path cannot be empty for device") } +func TestContainerCapabilities(t *testing.T) { + var ociSpec CompatOCISpec + + ociSpec.Process = &CompatOCIProcess{} + ociSpec.Process.Capabilities = map[string]interface{}{ + "bounding": []interface{}{"CAP_KILL"}, + "effective": []interface{}{"CAP_KILL", "CAP_LEASE"}, + "permitted": []interface{}{"CAP_SETUID"}, + "inheritable": []interface{}{"CAP_KILL", "CAP_LEASE", "CAP_SYS_ADMIN"}, + "ambient": []interface{}{""}, + } + + c, err := containerCapabilities(ociSpec) + assert.Nil(t, err) + assert.Equal(t, c.Bounding, []string{"CAP_KILL"}) + assert.Equal(t, c.Effective, []string{"CAP_KILL", "CAP_LEASE"}) + assert.Equal(t, c.Permitted, []string{"CAP_SETUID"}) + assert.Equal(t, c.Inheritable, []string{"CAP_KILL", "CAP_LEASE", "CAP_SYS_ADMIN"}) + assert.Equal(t, c.Ambient, []string{""}) + + ociSpec.Process.Capabilities = []interface{}{"CAP_LEASE", "CAP_SETUID"} + + c, err = containerCapabilities(ociSpec) + assert.Nil(t, err) + assert.Equal(t, c.Bounding, []string{"CAP_LEASE", "CAP_SETUID"}) + assert.Equal(t, c.Effective, []string{"CAP_LEASE", "CAP_SETUID"}) + assert.Equal(t, c.Permitted, []string{"CAP_LEASE", "CAP_SETUID"}) + assert.Equal(t, c.Inheritable, []string{"CAP_LEASE", "CAP_SETUID"}) + assert.Equal(t, c.Ambient, []string{"CAP_LEASE", "CAP_SETUID"}) + + ociSpec.Process.Capabilities = nil + + c, err = containerCapabilities(ociSpec) + assert.Nil(t, err) + assert.Equal(t, c.Bounding, []string(nil)) + assert.Equal(t, c.Effective, []string(nil)) + assert.Equal(t, c.Permitted, []string(nil)) + assert.Equal(t, c.Inheritable, []string(nil)) + assert.Equal(t, c.Ambient, []string(nil)) +} + func TestMain(m *testing.M) { /* Create temp bundle directory if necessary */ err := os.MkdirAll(tempBundlePath, dirMode) diff --git a/vendor/github.com/containers/virtcontainers/pod.go b/vendor/github.com/containers/virtcontainers/pod.go index 27ed9d8c..ccf281a4 100644 --- a/vendor/github.com/containers/virtcontainers/pod.go +++ b/vendor/github.com/containers/virtcontainers/pod.go @@ -30,13 +30,13 @@ import ( // controlSocket is the pod control socket. // It is an hypervisor resource, and for example qemu's control // socket is the QMP one. -const controlSocket = "ctrl.sock" +const controlSocket = "ctl" // monitorSocket is the pod monitoring socket. // It is an hypervisor resource, and is a qmp socket in the qemu case. // This is a socket that any monitoring entity will listen to in order // to understand if the VM is still alive or not. -const monitorSocket = "monitor.sock" +const monitorSocket = "mon" // vmStartTimeout represents the time in seconds a pod can wait before // to consider the VM starting operation failed. @@ -258,6 +258,21 @@ type EnvVar struct { Value string } +// LinuxCapabilities specify the capabilities to keep when executing +// the process inside the container. +type LinuxCapabilities struct { + // Bounding is the set of capabilities checked by the kernel. + Bounding []string + // Effective is the set of capabilities checked by the kernel. + Effective []string + // Inheritable is the capabilities preserved across execve. + Inheritable []string + // Permitted is the limiting superset for effective capabilities. + Permitted []string + // Ambient is the ambient set of capabilities that are kept. + Ambient []string +} + // Cmd represents a command to execute in a running container. type Cmd struct { Args []string @@ -294,6 +309,7 @@ type Cmd struct { Console string Detach bool NoNewPrivileges bool + Capabilities LinuxCapabilities } // Resources describes VM resources configuration. diff --git a/vendor/github.com/containers/virtcontainers/qemu.go b/vendor/github.com/containers/virtcontainers/qemu.go index eb527102..e39dcbde 100644 --- a/vendor/github.com/containers/virtcontainers/qemu.go +++ b/vendor/github.com/containers/virtcontainers/qemu.go @@ -287,6 +287,27 @@ func (q *qemu) appendBlockDevice(devices []govmmQemu.Device, drive Drive) []govm return devices } +func (q *qemu) appendVhostUserDevice(devices []govmmQemu.Device, vhostUserDevice VhostUserDevice) []govmmQemu.Device { + + qemuVhostUserDevice := govmmQemu.VhostUserDevice{} + + switch vhostUserDevice := vhostUserDevice.(type) { + case *VhostUserNetDevice: + qemuVhostUserDevice.TypeDevID = makeNameID("net", vhostUserDevice.ID) + qemuVhostUserDevice.Address = vhostUserDevice.MacAddress + case *VhostUserSCSIDevice: + qemuVhostUserDevice.TypeDevID = makeNameID("scsi", vhostUserDevice.ID) + case *VhostUserBlkDevice: + } + + qemuVhostUserDevice.VhostUserType = govmmQemu.VhostUserDeviceType(vhostUserDevice.Type()) + qemuVhostUserDevice.SocketPath = vhostUserDevice.Attrs().SocketPath + qemuVhostUserDevice.CharDevID = makeNameID("char", vhostUserDevice.Attrs().ID) + + devices = append(devices, qemuVhostUserDevice) + + return devices +} func (q *qemu) appendVFIODevice(devices []govmmQemu.Device, vfDevice VFIODevice) []govmmQemu.Device { if vfDevice.BDF == "" { return devices @@ -594,7 +615,7 @@ func (q *qemu) qmpSocketPath(socketName string) (string, error) { parentDirPath, len(parentDirPath)) } - path := fmt.Sprintf("%s/%s-%s", parentDirPath, q.state.UUID, socketName) + path := fmt.Sprintf("%s/%s-%s", parentDirPath, socketName, q.state.UUID) if len(path) > qmpSockPathSizeLimit { return path[:qmpSockPathSizeLimit], nil @@ -997,22 +1018,25 @@ func (q *qemu) resumePod() error { // addDevice will add extra devices to Qemu command line. func (q *qemu) addDevice(devInfo interface{}, devType deviceType) error { - switch devType { - case fsDev: - volume := devInfo.(Volume) - q.qemuConfig.Devices = q.appendVolume(q.qemuConfig.Devices, volume) - case serialPortDev: - socket := devInfo.(Socket) - q.qemuConfig.Devices = q.appendSocket(q.qemuConfig.Devices, socket) - case netDev: - endpoint := devInfo.(Endpoint) - q.qemuConfig.Devices = q.appendNetwork(q.qemuConfig.Devices, endpoint) - case blockDev: - drive := devInfo.(Drive) - q.qemuConfig.Devices = q.appendBlockDevice(q.qemuConfig.Devices, drive) - case vfioDev: - vfDevice := devInfo.(VFIODevice) - q.qemuConfig.Devices = q.appendVFIODevice(q.qemuConfig.Devices, vfDevice) + switch v := devInfo.(type) { + case Volume: + q.qemuConfig.Devices = q.appendVolume(q.qemuConfig.Devices, v) + case Socket: + q.qemuConfig.Devices = q.appendSocket(q.qemuConfig.Devices, v) + case Endpoint: + q.qemuConfig.Devices = q.appendNetwork(q.qemuConfig.Devices, v) + case Drive: + q.qemuConfig.Devices = q.appendBlockDevice(q.qemuConfig.Devices, v) + + //vhostUserDevice is an interface, hence the pointer for Net, SCSI and Blk: + case VhostUserNetDevice: + q.qemuConfig.Devices = q.appendVhostUserDevice(q.qemuConfig.Devices, &v) + case VhostUserSCSIDevice: + q.qemuConfig.Devices = q.appendVhostUserDevice(q.qemuConfig.Devices, &v) + case VhostUserBlkDevice: + q.qemuConfig.Devices = q.appendVhostUserDevice(q.qemuConfig.Devices, &v) + case VFIODevice: + q.qemuConfig.Devices = q.appendVFIODevice(q.qemuConfig.Devices, v) default: break } diff --git a/vendor/github.com/containers/virtcontainers/qemu_test.go b/vendor/github.com/containers/virtcontainers/qemu_test.go index cd94e268..3f80852a 100644 --- a/vendor/github.com/containers/virtcontainers/qemu_test.go +++ b/vendor/github.com/containers/virtcontainers/qemu_test.go @@ -121,10 +121,12 @@ func testQemuAppend(t *testing.T, structure interface{}, expected []govmmQemu.De devices = q.appendBlockDevice(devices, s) case VFIODevice: devices = q.appendVFIODevice(devices, s) + case VhostUserNetDevice: + devices = q.appendVhostUserDevice(devices, &s) } if reflect.DeepEqual(devices, expected) == false { - t.Fatalf("Got %v\nExpecting %v", devices, expected) + t.Fatalf("\n\tGot %v\n\tExpecting %v", devices, expected) } } @@ -225,6 +227,31 @@ func TestQemuAppendVFIODevice(t *testing.T) { testQemuAppend(t, vfDevice, expectedOut, -1, nestedVM) } +func TestQemuAppendVhostUserDevice(t *testing.T) { + nestedVM := true + socketPath := "nonexistentpath.sock" + macAddress := "00:11:22:33:44:55:66" + id := "deadbeef" + + expectedOut := []govmmQemu.Device{ + govmmQemu.VhostUserDevice{ + SocketPath: socketPath, + CharDevID: fmt.Sprintf("char-%s", id), + TypeDevID: fmt.Sprintf("net-%s", id), + Address: macAddress, + VhostUserType: VhostUserNet, + }, + } + + vhostUserDevice := VhostUserNetDevice{ + MacAddress: macAddress, + } + vhostUserDevice.ID = id + vhostUserDevice.SocketPath = socketPath + + testQemuAppend(t, vhostUserDevice, expectedOut, -1, nestedVM) +} + func TestQemuAppendFSDevices(t *testing.T) { podID := "testPodID" contID := "testContID" diff --git a/vendor/github.com/containers/virtcontainers/shim.go b/vendor/github.com/containers/virtcontainers/shim.go index 8c41cb90..6410ea46 100644 --- a/vendor/github.com/containers/virtcontainers/shim.go +++ b/vendor/github.com/containers/virtcontainers/shim.go @@ -128,14 +128,26 @@ func shimLogger() *logrus.Entry { return virtLog.WithField("subsystem", "shim") } -func stopShim(pid int) error { +func signalShim(pid int, sig syscall.Signal) error { if pid <= 0 { return nil } - shimLogger().WithField("shim-pid", pid).Info("Stopping shim") + shimLogger().WithFields( + logrus.Fields{ + "shim-pid": pid, + "shim-signal": sig, + }).Info("Signalling shim") + + if err := syscall.Kill(pid, sig); err != nil { + return err + } + + return nil +} - if err := syscall.Kill(pid, syscall.SIGKILL); err != nil && err != syscall.ESRCH { +func stopShim(pid int) error { + if err := signalShim(pid, syscall.SIGKILL); err != nil && err != syscall.ESRCH { return err }