From 274b033daeeeda109744be3227a74a7868fcbee7 Mon Sep 17 00:00:00 2001 From: William Zhao Date: Wed, 10 Aug 2022 15:47:44 -0400 Subject: [PATCH] Send IPv4 GARP and IPv6 Unsolicited NA in "cmdAdd" In "cmdAdd", SRIOV-CNI would construct and send IPv4 Gratuitous ARP and/or Unsolicited Neighbor Advertisement depending on the IP addresses configured by IPAM. The reason why this change is needed is for the scenario when an IP address is reused by IPAM with different interfaces (with different link-layer addresses). This can occur when pods are deleted and created. For performance reasons, sending of GARP and/or Unsolicited NA would update invalid ARP/Neighbor caches in other neighbors/nodes. Also we set IPv4 ARP Notify and IPv6 Neighbor Discovery Notify in sysfs for each interface. This will send GARP and/or Unsolicited NA when the interface is either brought up or the link-layer address changes. This is useful in cases where an application reenables the interface or the MAC address configuration is changed. Some new packages were added, thus go.mod and go.sum were modified accordingly. Mocked PciUtils for sriov tests since sriov.go would call PciUtils to set IPv4 ARP Notify and IPv6 Neighbor Discovery. Fixes k8snetworkplumbingwg#177 Signed-off-by: William Zhao --- cmd/sriov/main.go | 49 +++++++++++++ go.mod | 3 +- go.sum | 10 ++- pkg/sriov/sriov.go | 12 +++- pkg/sriov/sriov_test.go | 83 ++++++++++++++++++++- pkg/utils/utils.go | 155 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 306 insertions(+), 6 deletions(-) diff --git a/cmd/sriov/main.go b/cmd/sriov/main.go index 12e592581..a6999ee4c 100644 --- a/cmd/sriov/main.go +++ b/cmd/sriov/main.go @@ -3,6 +3,7 @@ package main import ( "errors" "fmt" + "net" "runtime" "github.com/containernetworking/cni/pkg/skel" @@ -144,6 +145,54 @@ func cmdAdd(args *skel.CmdArgs) error { result = newResult } + /* After IPAM configuration is done, the following needs to handle the case of an IP address being reused by a different pods. + * This is achieved by sending Gratuitous ARPs and/or Unsolicited Neighbor Advertisements unconditionally. + * Although we set arp_notify and ndisc_notify unconditionally on the interface (please see EnableArpAndNdiscNotify()), the kernel + * only sends GARPs/Unsolicited NA when the interface goes from down to up, or when the link-layer address changes on the interfaces. + * These scenarios are perfectly valid and recommended to be enabled for optimal network performance. + * However for our specific case, which the kernel is unaware of, is the reuse of IP addresses across pods where each pod has a different + * link-layer address for it's SRIOV interface. The ARP/Neighbor cache residing in neighbors would be invalid if an IP address is reused. + * In order to update the cache, the GARP/Unsolicited NA packets should be sent for performance reasons. Otherwise, the neighbors + * may be sending packets with the incorrect link-layer address. Eventually, most network stacks would send ARPs and/or Neighbor + * Solicitation packets when the connection is unreachable. This would correct the invalid cache; however this may take a significant + * amount of time to complete. + */ + if !netConf.DPDKMode { + err = netns.Do(func(_ ns.NetNS) error { + // Retrieve the interface name in the container. + contVeth, err := net.InterfaceByName(args.IfName) + if err != nil { + return fmt.Errorf("failed to look up interface %q: %v", args.IfName, err) + } + + // Check that we have a valid hardware MAC address. + if len(contVeth.HardwareAddr) != 6 { + return fmt.Errorf("error invalid Ethernet MAC address: %q", contVeth.HardwareAddr) + } + + // For all the IP addresses assigned by IPAM, we will sent either a GARP (IPv4) or Unsolicited NA (IPv6). + for _, ipc := range result.IPs { + var err error + if ipc.Address.IP.To4() == nil { // IPv6 + /* As per RFC 4861, sending unsolicited neighbor advertisements should be considered as a performance + * optimization. It does not reliably update caches in all nodes. The Neighbor Unreachability Detection + * algorithm is more reliable although it may take slightly longer to update. + */ + err = utils.SendUnsolicitedNeighborAdvertisement(ipc.Address.IP, *contVeth) + } else { // IPv4 + err = utils.SendGratuitousArp(ipc.Address.IP, *contVeth) + } + if err != nil { + return fmt.Errorf("error sending messages for ip %s on interface %q: %v", ipc.Address.IP.String(), args.IfName, err) + } + } + return nil + }) + if err != nil { + return err + } + } + // Cache NetConf for CmdDel if err = utils.SaveNetConf(args.ContainerID, config.DefaultCNIDir, args.IfName, netConf); err != nil { return fmt.Errorf("error saving NetConf %q", err) diff --git a/go.mod b/go.mod index 93a6087b1..f283e0c91 100644 --- a/go.mod +++ b/go.mod @@ -9,6 +9,7 @@ require ( github.com/onsi/gomega v0.0.0-20151007035656-2152b45fa28a github.com/stretchr/testify v1.4.0 github.com/vishvananda/netlink v1.0.1-0.20190924205540-07ace697bea4 + golang.org/x/net v0.0.0-20220802222814-0bcc04d9c69b ) require ( @@ -18,6 +19,6 @@ require ( github.com/safchain/ethtool v0.0.0-20190326074333-42ed695e3de8 // indirect github.com/stretchr/objx v0.1.0 // indirect github.com/vishvananda/netns v0.0.0-20180720170159-13995c7128cc // indirect - golang.org/x/sys v0.0.0-20210510120138-977fb7262007 // indirect + golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10 // indirect gopkg.in/yaml.v2 v2.2.2 // indirect ) diff --git a/go.sum b/go.sum index 02e732001..a685f4b77 100644 --- a/go.sum +++ b/go.sum @@ -43,9 +43,15 @@ github.com/vishvananda/netns v0.0.0-20180720170159-13995c7128cc h1:R83G5ikgLMxrB github.com/vishvananda/netns v0.0.0-20180720170159-13995c7128cc/go.mod h1:ZjcWmFBXmLKZu9Nxj3WKYEafiSqer2rnvPr0en9UNpI= golang.org/x/crypto v0.0.0-20181009213950-7c1a557ab941/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/net v0.0.0-20181011144130-49bb7cea24b1/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20220802222814-0bcc04d9c69b h1:3ogNYyK4oIQdIKzTu68hQrr4iuVxF3AxKl9Aj/eDrw0= +golang.org/x/net v0.0.0-20220802222814-0bcc04d9c69b/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk= golang.org/x/sys v0.0.0-20190616124812-15dcb6c0061f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210510120138-977fb7262007 h1:gG67DSER+11cZvqIMb8S8bt0vZtiN6xWYARwirrOSfE= -golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10 h1:WIoqL4EROvwiPdUtaip4VcDdpZ4kha7wBWZrbVKCIZg= +golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= gopkg.in/airbrake/gobrake.v2 v2.0.9/go.mod h1:/h5ZAUhDkGaJfjzjKLSjv6zCL6O0LLBxU4K+aSYdM/U= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/pkg/sriov/sriov.go b/pkg/sriov/sriov.go index 3ea101e5f..033ed40a7 100644 --- a/pkg/sriov/sriov.go +++ b/pkg/sriov/sriov.go @@ -105,6 +105,7 @@ type pciUtils interface { getSriovNumVfs(ifName string) (int, error) getVFLinkNamesFromVFID(pfName string, vfID int) ([]string, error) getPciAddress(ifName string, vf int) (string, error) + enableArpAndNdiscNotify(ifName string) error } type pciUtilsImpl struct{} @@ -121,6 +122,10 @@ func (p *pciUtilsImpl) getPciAddress(ifName string, vf int) (string, error) { return utils.GetPciAddress(ifName, vf) } +func (p *pciUtilsImpl) enableArpAndNdiscNotify(ifName string) error { + return utils.EnableArpAndNdiscNotify(ifName) +} + // Manager provides interface invoke sriov nic related operations type Manager interface { SetupVF(conf *sriovtypes.NetConf, podifName string, cid string, netns ns.NetNS) (string, error) @@ -192,7 +197,12 @@ func (s *sriovManager) SetupVF(conf *sriovtypes.NetConf, podifName string, cid s return fmt.Errorf("error setting container interface name %s for %s", linkName, tempName) } - // 6. Bring IF up in Pod netns + // 6. Enable IPv4 ARP notify and IPv6 Network Discovery notify + if err := s.utils.enableArpAndNdiscNotify(podifName); err != nil { + return fmt.Errorf("failed to enable arp_notify for interface name: %s, %q", podifName, err) + } + + // 7. Bring IF up in Pod netns if err := s.nLink.LinkSetUp(linkObj); err != nil { return fmt.Errorf("error bringing interface up in container ns: %q", err) } diff --git a/pkg/sriov/sriov_test.go b/pkg/sriov/sriov_test.go index b3581d832..cbf19b74f 100644 --- a/pkg/sriov/sriov_test.go +++ b/pkg/sriov/sriov_test.go @@ -209,6 +209,81 @@ func (_m *MockNetlinkManager) LinkSetVfState(_a0 netlink.Link, _a1 int, _a2 uint return r0 } +// mockPciUtils is an autogenerated mock type for the mockPciUtils type +type mockPciUtils struct { + mock.Mock +} + +// enableArpAndNdiscNotify provides a mock function with given fields: ifName +func (_m *mockPciUtils) enableArpAndNdiscNotify(ifName string) error { + ret := _m.Called(ifName) + + var r0 error + if rf, ok := ret.Get(0).(func(string) error); ok { + r0 = rf(ifName) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// getPciAddress provides a mock function with given fields: ifName, vf +func (_m *mockPciUtils) getPciAddress(ifName string, vf int) (string, error) { + ret := _m.Called(ifName, vf) + var r0 string + if rf, ok := ret.Get(0).(func(string, int) string); ok { + r0 = rf(ifName, vf) + } else { + r0 = ret.Get(0).(string) + } + var r1 error + if rf, ok := ret.Get(1).(func(string, int) error); ok { + r1 = rf(ifName, vf) + } else { + r1 = ret.Error(1) + } + return r0, r1 +} + +// getSriovNumVfs provides a mock function with given fields: ifName +func (_m *mockPciUtils) getSriovNumVfs(ifName string) (int, error) { + ret := _m.Called(ifName) + var r0 int + if rf, ok := ret.Get(0).(func(string) int); ok { + r0 = rf(ifName) + } else { + r0 = ret.Get(0).(int) + } + var r1 error + if rf, ok := ret.Get(1).(func(string) error); ok { + r1 = rf(ifName) + } else { + r1 = ret.Error(1) + } + return r0, r1 +} + +// getVFLinkNamesFromVFID provides a mock function with given fields: pfName, vfID +func (_m *mockPciUtils) getVFLinkNamesFromVFID(pfName string, vfID int) ([]string, error) { + ret := _m.Called(pfName, vfID) + var r0 []string + if rf, ok := ret.Get(0).(func(string, int) []string); ok { + r0 = rf(pfName, vfID) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).([]string) + } + } + var r1 error + if rf, ok := ret.Get(1).(func(string, int) error); ok { + r1 = rf(pfName, vfID) + } else { + r1 = ret.Error(1) + } + return r0, r1 +} + // FakeLink is a dummy netlink struct used during testing type FakeLink struct { netlink.LinkAttrs @@ -266,6 +341,7 @@ var _ = Describe("Sriov", func() { }() Expect(err).NotTo(HaveOccurred()) mocked := &MockNetlinkManager{} + mockedPciUtils := &mockPciUtils{} fakeMac, err := net.ParseMAC("6e:16:06:0e:b7:e9") Expect(err).NotTo(HaveOccurred()) @@ -283,7 +359,8 @@ var _ = Describe("Sriov", func() { mocked.On("LinkSetUp", fakeLink).Return(nil) mocked.On("LinkSetVfVlan", mock.Anything, mock.AnythingOfType("int"), mock.AnythingOfType("int")).Return(nil) mocked.On("LinkSetVfVlanQos", mock.Anything, mock.AnythingOfType("int"), mock.AnythingOfType("int"), mock.AnythingOfType("int")).Return(nil) - sm := sriovManager{nLink: mocked} + mockedPciUtils.On("enableArpAndNdiscNotify", mock.AnythingOfType("string")).Return(nil) + sm := sriovManager{nLink: mocked, utils: mockedPciUtils} macAddr, err := sm.SetupVF(netconf, podifName, contID, targetNetNS) Expect(err).NotTo(HaveOccurred()) Expect(macAddr).To(Equal("6e:16:06:0e:b7:e9")) @@ -298,6 +375,7 @@ var _ = Describe("Sriov", func() { }() Expect(err).NotTo(HaveOccurred()) mocked := &MockNetlinkManager{} + mockedPciUtils := &mockPciUtils{} fakeMac, err := net.ParseMAC("6e:16:06:0e:b7:e9") Expect(err).NotTo(HaveOccurred()) @@ -317,7 +395,8 @@ var _ = Describe("Sriov", func() { mocked.On("LinkSetHardwareAddr", fakeLink, expMac).Return(nil) mocked.On("LinkSetNsFd", fakeLink, mock.AnythingOfType("int")).Return(nil) mocked.On("LinkSetUp", fakeLink).Return(nil) - sm := sriovManager{nLink: mocked} + mockedPciUtils.On("enableArpAndNdiscNotify", mock.AnythingOfType("string")).Return(nil) + sm := sriovManager{nLink: mocked, utils: mockedPciUtils} macAddr, err := sm.SetupVF(netconf, podifName, contID, targetNetNS) Expect(err).NotTo(HaveOccurred()) Expect(macAddr).To(Equal(netconf.MAC)) diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index 30704283d..c9a1914e6 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -1,13 +1,20 @@ package utils import ( + "bytes" + "encoding/binary" "encoding/json" "fmt" "io/ioutil" + "net" "os" "path/filepath" "strconv" "strings" + "syscall" + + "golang.org/x/net/icmp" + "golang.org/x/net/ipv6" ) var ( @@ -16,10 +23,37 @@ var ( NetDirectory = "/sys/class/net" // SysBusPci is sysfs pci device directory SysBusPci = "/sys/bus/pci/devices" + // SysV4ArpNotify is the sysfs IPv4 ARP Notify directory + SysV4ArpNotify = "/proc/sys/net/ipv4/conf/" + // SysV6NdiscNotify is the sysfs IPv6 Neighbor Discovery Notify directory + SysV6NdiscNotify = "/proc/sys/net/ipv6/conf/" // UserspaceDrivers is a list of driver names that don't have netlink representation for their devices UserspaceDrivers = []string{"vfio-pci", "uio_pci_generic", "igb_uio"} ) +// EnableArpAndNdiscNotify enables IPv4 arp_notify and IPv6 ndisc_notify for VF +func EnableArpAndNdiscNotify(ifName string) error { + /* For arp_notify, when a value of "1" is set then a Gratuitous ARP request will be sent + * when the network device is brought up or if the link-layer address changes. + * For ndsic_notify, when a value of "1" is set then a Unsolicited Neighbor Advertisement + * will be sent when the network device is brought up or if the link-layer address changes. + * Both of these being enabled would be useful in the case when an application reenables + * an interface or if the MAC address configuration is changed. The kernel is responsible + * for sending of these packets when the conditions are met. + */ + v4ArpNotifyPath := filepath.Join(SysV4ArpNotify, ifName, "arp_notify") + err := ioutil.WriteFile(v4ArpNotifyPath, []byte("1"), os.ModeAppend) + if err != nil { + return fmt.Errorf("failed to write arp_notify=1 for interface %s: %v", ifName, err) + } + v6NdiscNotifyPath := filepath.Join(SysV6NdiscNotify, ifName, "ndisc_notify") + err = ioutil.WriteFile(v6NdiscNotifyPath, []byte("1"), os.ModeAppend) + if err != nil { + return fmt.Errorf("failed to write ndisc_notify=1 for interface %s: %v", ifName, err) + } + return nil +} + // GetSriovNumVfs takes in a PF name(ifName) as string and returns number of VF configured as int func GetSriovNumVfs(ifName string) (int, error) { var vfTotal int @@ -258,3 +292,124 @@ func CleanCachedNetConf(cRefPath string) error { } return nil } + +// htons converts an uint16 from host to network byte order. +func htons(i uint16) uint16 { + return (i<<8)&0xff00 | i>>8 +} + +// SendGratuitousArp sends a gratuitous ARP packet with the provided source IP over the provided interface. +func SendGratuitousArp(srcIP net.IP, iface net.Interface) error { + /* As per RFC 5944 section 4.6, a gratuitous ARP packet can be sent by a node in order to spontaneously cause other nodes to update + * an entry in their ARP cache. In the case of SRIOV-CNI, an address can be reused for different pods. Each pod could likely have a + * different link-layer address in this scenario, which makes the ARP cache entries residing in the other nodes to be an invalid. + * The gratuitous ARP packet should update the link-layer address accordingly for the invalid ARP cache. + */ + + // Construct the ARP packet following RFC 5944 section 4.6. + arpPacket := new(bytes.Buffer) + _ = binary.Write(arpPacket, binary.BigEndian, uint16(1)) // Hardware Type: 1 is Ethernet + _ = binary.Write(arpPacket, binary.BigEndian, uint16(syscall.ETH_P_IP)) // Protocol Type: 0x0800 is IPv4 + _ = binary.Write(arpPacket, binary.BigEndian, uint8(6)) // Hardware address Length: 6 bytes for MAC address + _ = binary.Write(arpPacket, binary.BigEndian, uint8(4)) // Protocol address length: 4 bytes for IPv4 address + _ = binary.Write(arpPacket, binary.BigEndian, uint16(1)) // Operation: 1 is request, 2 is response + if _, writeErr := arpPacket.Write(iface.HardwareAddr); writeErr != nil { // Sender hardware address + return fmt.Errorf("failed to write the hardware address in the ARP packet: %v", writeErr) + } + if _, writeErr := arpPacket.Write(srcIP.To4()); writeErr != nil { // Sender protocol address + return fmt.Errorf("failed to write the sender protocol address in the ARP packet: %v", writeErr) + } + _, _ = arpPacket.Write([]byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff}) // Target hardware address is the Broadcast MAC. + if _, writeErr := arpPacket.Write(srcIP.To4()); writeErr != nil { // Target protocol address + return fmt.Errorf("failed to write the target protocol address in the ARP packet: %v", writeErr) + } + + sockAddr := syscall.SockaddrLinklayer{ + Protocol: htons(syscall.ETH_P_ARP), // Ethertype of ARP (0x0806) + Ifindex: iface.Index, // Interface Index + Hatype: 1, // Hardware Type: 1 is Ethernet + Pkttype: 0, // Packet Type. + Halen: 6, // Hardware address Length: 6 bytes for MAC address + Addr: [8]byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, // Address is the broadcast MAC address. + } + + // Create a socket such that the Ethernet header would constructed by the OS. The arpPacket only contains the ARP payload. + soc, err := syscall.Socket(syscall.AF_PACKET, syscall.SOCK_DGRAM, int(htons(syscall.ETH_P_ARP))) + if err != nil { + return fmt.Errorf("failed to create AF_PACKET datagram socket: %v", err) + } + defer syscall.Close(soc) + + if err := syscall.Sendto(soc, arpPacket.Bytes(), 0, &sockAddr); err != nil { + return fmt.Errorf("failed to send Gratuitous ARP for IPv4 %s on Interface %s: %v", srcIP.String(), iface.Name, err) + } + + return nil +} + +// SendUnsolicitedNeighborAdvertisement sends an unsolicited neighbor advertisement packet with the provided source IP over the provided interface. +func SendUnsolicitedNeighborAdvertisement(srcIP net.IP, iface net.Interface) error { + /* As per RFC 4861, a link-layer address change can multicast a few unsolicited neighbor advertisements to all nodes to quickly + * update the cached link-layer addresses that have become invalid. In the case of SRIOV-CNI, an address can be reused for + * different pods. Each pod could likely have a different link-layer address in this scenario, which makes the Neighbor Cache + * entries residing in the neighbors to be an invalid. The unsolicited neighbor advertisement should update the link-layer address + * accordingly for the IPv6 entry. + * However if any of these conditions are true: + * - The IPv6 address was not reused for the new pod. + * - No prior established communication with the neighbor. + * Then the neighbor receiving this unsolicited neighbor advertisement would be silently discard. This behavior is described + * in RFC 4861 section 7.2.5. This is acceptable behavior since the purpose of sending an unsolicited neighbor advertisement + * is not to create a new entry but rather update already existing invalid entries. + */ + + // Construct the ICMPv6 Neighbor Advertisement packet following RFC 4861. + payload := new(bytes.Buffer) + // ICMPv6 Flags: As per RFC 4861, the solicited flag must not be set and the override flag should be set (to + // override existing cache entry) for unsolicited advertisements. + _ = binary.Write(payload, binary.BigEndian, uint32(0x20000000)) + if _, writeErr := payload.Write(srcIP.To16()); writeErr != nil { // ICMPv6 Target IPv6 Address. + return fmt.Errorf("failed to write the target IPv6 address in the ICMPv6 packet: %v", writeErr) + } + _ = binary.Write(payload, binary.BigEndian, uint8(2)) // ICMPv6 Option Type: 2 is target link-layer address. + _ = binary.Write(payload, binary.BigEndian, uint8(1)) // ICMPv6 Option length. Units of 8 bytes. + if _, writeErr := payload.Write(iface.HardwareAddr); writeErr != nil { // ICMPv6 Option Link-layer address. + return fmt.Errorf("failed to write the link-layer address in the ICMPv6 packet: %v", writeErr) + } + + icmpv6Msg := icmp.Message{ + Type: ipv6.ICMPTypeNeighborAdvertisement, // ICMPv6 type is neighbor advertisement. + Code: 0, // ICMPv6 Code: As per RFC 4861 section 7.1.2, the code is always 0. + Checksum: 0, // Checksum is calculated later. + Body: &icmp.RawBody{ + Data: payload.Bytes(), + }, + } + + // Get the byte array of the ICMPv6 Message. + icmpv6Bytes, err := icmpv6Msg.Marshal(nil) + if err != nil { + return fmt.Errorf("failed to Marshal ICMPv6 Message: %v", err) + } + + // Create a socket such that the Ethernet header and IPv6 header would constructed by the OS. + soc, err := syscall.Socket(syscall.AF_INET6, syscall.SOCK_RAW, syscall.IPPROTO_ICMPV6) + if err != nil { + return fmt.Errorf("failed to create AF_INET6 raw socket: %v", err) + } + defer syscall.Close(soc) + + // As per RFC 4861 section 7.1.2, the IPv6 hop limit is always 255. + if err := syscall.SetsockoptInt(soc, syscall.IPPROTO_IPV6, syscall.IPV6_MULTICAST_HOPS, 255); err != nil { + return fmt.Errorf("failed to set IPv6 multicast hops to 255: %v", err) + } + + // Set the destination IPv6 address to the IPv6 link-local all nodes multicast address (ff02::1). + var r [16]byte + copy(r[:], net.IPv6linklocalallnodes.To16()) + sockAddr := syscall.SockaddrInet6{Addr: r} + if err := syscall.Sendto(soc, icmpv6Bytes, 0, &sockAddr); err != nil { + return fmt.Errorf("failed to send Unsolicited Neighbor Advertisement for IPv6 %s on Interface %s: %v", srcIP.String(), iface.Name, err) + } + + return nil +}