diff --git a/.version b/.version index ca54cd8e0c..16ff8e1a24 100644 --- a/.version +++ b/.version @@ -1 +1 @@ -2019.01 +2019.06.01 diff --git a/lib/ljsyscall/syscall/linux/syscalls.lua b/lib/ljsyscall/syscall/linux/syscalls.lua index fb6da2ea06..843e9e713e 100644 --- a/lib/ljsyscall/syscall/linux/syscalls.lua +++ b/lib/ljsyscall/syscall/linux/syscalls.lua @@ -483,33 +483,30 @@ local function get_maxnumnodes() return math.floor(((#line+1)/9)*32) end end + -- If we don't know, guess that the system has a max of 1024 nodes. + return 1024 +end + +local function ensure_bitmask(mask, size) + if ffi.istype(t.bitmask, mask) then return mask end + return t.bitmask(mask, size or get_maxnumnodes()) end function S.get_mempolicy(mode, mask, addr, flags) mode = mode or t.int1() - local size - if ffi.istype(t.bitmask, mask) then - -- if mask was provided by the caller, then use its size - -- and let the syscall error if it's too small - size = ffi.cast("uint64_t", tonumber(mask.size)) - else - local mask_for_size = t.bitmask(mask) - -- Size should be at least equals to maxnumnodes. - size = ffi.cast("uint64_t", math.max(tonumber(mask_for_size.size), get_maxnumnodes())) - mask = t.bitmask(mask, tonumber(size)) - end - local ret, err = C.get_mempolicy(mode, mask.mask, size, addr or 0, c.MPOL_FLAG[flags]) + mask = ensure_bitmask(mask); + local ret, err = C.get_mempolicy(mode, mask.mask, mask.size, addr or 0, c.MPOL_FLAG[flags]) if ret == -1 then return nil, t.error(err or errno()) end return { mode=mode[0], mask=mask } end function S.set_mempolicy(mode, mask) - mask = mktype(t.bitmask, mask) + mask = ensure_bitmask(mask); return retbool(C.set_mempolicy(c.MPOL_MODE[mode], mask.mask, mask.size)) end function S.migrate_pages(pid, from, to) - from = mktype(t.bitmask, from) - to = mktype(t.bitmask, to) + from = ensure_bitmask(from); + to = ensure_bitmask(to, from.size) assert(from.size == to.size, "incompatible nodemask sizes") return retbool(C.migrate_pages(pid or 0, from.size, from.mask, to.mask)) end diff --git a/src/Makefile b/src/Makefile index 2a9ecafc11..4fc6913c24 100644 --- a/src/Makefile +++ b/src/Makefile @@ -45,7 +45,7 @@ LUAJIT_A := ../lib/luajit/src/raptorjit.a # for each module that has a top-level selftest () function. TESTMODS = $(shell find . -regex '[^\#]*\.\(lua\|dasl\)' -printf '%P ' | \ xargs grep -s -l '^function selftest *[[:punct:]]' | \ - sed -e 's_\.lua__' -e 's_\.dasl__' -e 's_/_._g') + sed -e 's_\.lua__' -e 's_\.dasl__' -e 's_/_._g' -e 's/-/_/g') # TESTSCRIPTS expands to: # lib/watchdog/selftest.sh ... diff --git a/src/apps/intel_mp/intel_mp.lua b/src/apps/intel_mp/intel_mp.lua index c9a7670135..941c001bd4 100644 --- a/src/apps/intel_mp/intel_mp.lua +++ b/src/apps/intel_mp/intel_mp.lua @@ -133,12 +133,14 @@ EEC 0x10010 - RW EEPROM/Flash Control Register EIMC 0x00888 - RW Extended Interrupt Mask Clear ERRBC 0x04008 - RC Error Byte Count FCCFG 0x03D00 - RW Flow Control Configuration +FCOERPDC 0x0241C - RC Rx Packets Dropped Count FCTRL 0x05080 - RW Filter Control HLREG0 0x04240 - RW MAC Core Control 0 ILLERRC 0x04004 - RC Illegal Byte Error Count LINKS 0x042A4 - RO Link Status Register MAXFRS 0x04268 - RW Max Frame Size MFLCN 0x04294 - RW MAC Flow Control Register +MNGPDC 0x040B8 - RO Management Packets Dropped Count MRQC 0x0EC80 - RW Multiple Receive Queues Command Register MTQC 0x08120 - RW Multiple Transmit Queues Command Register PFVTCTL 0x051B0 - RW PF Virtual Control Register @@ -1336,7 +1338,11 @@ function Intel82599:promisc () return band(self.r.FCTRL(), lshift(1, 9)) ~= 0ULL end function Intel82599:rxbytes () return self.r.GORC64() end -function Intel82599:rxdrop () return self.r.QPRDC[0]() end +function Intel82599:rxdrop () + local rxdrop = self.r.MNGPDC() + self.r.FCOERPDC() + for i=0,15 do rxdrop = rxdrop + self.r.QPRDC[i]() end + return rxdrop +end function Intel82599:rxerrors () return self.r.CRCERRS() + self.r.ILLERRC() + self.r.ERRBC() + self.r.RUC() + self.r.RFC() + self.r.ROC() + self.r.RJC() diff --git a/src/apps/lwaftr/binding_table.lua b/src/apps/lwaftr/binding_table.lua index d6b10257d0..977606f4b5 100644 --- a/src/apps/lwaftr/binding_table.lua +++ b/src/apps/lwaftr/binding_table.lua @@ -118,7 +118,6 @@ function BTLookupQueue:get_lookup(n) local streamer = self.streamer local pkt, b4_ipv6, br_ipv6 pkt = self.packet_queue[n] - self.packet_queue[n] = nil if not streamer:is_empty(n) then b4_ipv6 = streamer.entries[n].value.b4_ipv6 br_ipv6 = streamer.entries[n].value.br_address diff --git a/src/apps/lwaftr/rangemap.lua b/src/apps/lwaftr/rangemap.lua index ae343c148b..3fac77669f 100644 --- a/src/apps/lwaftr/rangemap.lua +++ b/src/apps/lwaftr/rangemap.lua @@ -27,6 +27,14 @@ local function make_entry_type(value_type) value_type) end +local entry_type_cache = {} +local function get_entry_type(value_type) + if not entry_type_cache[value_type] then + entry_type_cache[value_type] = make_entry_type(value_type) + end + return entry_type_cache[value_type] +end + local function make_entries_type(entry_type) return ffi.typeof('$[?]', entry_type) end @@ -63,7 +71,7 @@ end function RangeMapBuilder.new(value_type) local builder = {} builder.value_type = value_type - builder.entry_type = make_entry_type(builder.value_type) + builder.entry_type = get_entry_type(builder.value_type) builder.type = make_entries_type(builder.entry_type) builder.equal_fn = make_equal_fn(builder.value_type) builder.entries = {} diff --git a/src/lib/README.numa.md b/src/lib/README.numa.md index aabc2e7874..608903bbb8 100644 --- a/src/lib/README.numa.md +++ b/src/lib/README.numa.md @@ -12,11 +12,17 @@ for some reason the current process is not bound to a NUMA node. See [../doc/performance-tuning.md] for more notes on getting good performance out of your Snabb program. -— Function **bind_to_cpu** *cpu* +— Function **bind_to_cpu** *cpu* *skip_perf_checks* Bind the current process to *cpu*, arranging for it to only ever be run on that CPU. Additionally, call **bind_to_numa_node** on the NUMA node corresponding to *cpu*. +Unless the optional argument *skip_perf_checks* is true, also run some +basic checks to verify that the given core is suitable for processing +low-latency network traffic: that the CPU has the `performance` scaling +governor, that it has been reserved from the kernel scheduler, and so +on, printing out any problems to `stderr`. + — Function **bind_to_numa_node** *node* Bind the current process to NUMA node *node*, arranging for it to only ever allocate memory local to that NUMA node. Additionally, migrate @@ -55,3 +61,18 @@ node bound by **bind_to_numa_node**, if present, and in any case that all *addrs* are on the same NUMA node. If *require_affinity* is true (not the default), then error if a problem is detected, otherwise just print a warning to the console. + +— Function **parse_cpuset** *cpus* +A helper function to parse a CPU set from a string. A CPU set is either +the number of a CPU, a range of CPUs, or two or more CPU sets joined by +commas. The result is a table whose keys are the CPUs and whose values +are true (a set). For example, q`parse_cpuset("1-3,5")` will return a +table with keys 1, 2, 3, and 5 bound to `true`. + +— Function **node_cpus** *node* +Return a set of CPUs belonging to NUMA node *node*, in the same format +as in **parse_cpuset**. + +— Function **isolated_cpus** +Return a set of CPUs that have been "isolated" away from the kernel at +boot via the `isolcpus` kernel boot parameter. diff --git a/src/lib/binary_search.dasl b/src/lib/binary_search.dasl index 214b254736..cc2b9a99e2 100644 --- a/src/lib/binary_search.dasl +++ b/src/lib/binary_search.dasl @@ -35,6 +35,7 @@ local function assemble (name, prototype, generator) return ffi.cast(prototype, mcode) end +local gencache = {} -- Cache for generated variants (reuse if possible.) function gen(count, entry_type) local function gen_binary_search(Dst) if count == 1 then @@ -80,9 +81,19 @@ function gen(count, entry_type) | mov rax, rdi | ret end - return assemble("binary_search_"..count, - ffi.typeof("$*(*)($*, uint32_t)", entry_type, entry_type), - gen_binary_search) + -- Assemble binary search variant and cache it unless it has not been + -- previously generated. + if not gencache[entry_type] then + gencache[entry_type] = {} + end + if not gencache[entry_type][count] then + gencache[entry_type][count] = + assemble("binary_search_"..count, + ffi.typeof("$*(*)($*, uint32_t)", entry_type, entry_type), + gen_binary_search) + end + -- Return (now) cached routine. + return gencache[entry_type][count] end function selftest () diff --git a/src/lib/cpuset.lua b/src/lib/cpuset.lua index 0adaaca54d..0905b9d83e 100644 --- a/src/lib/cpuset.lua +++ b/src/lib/cpuset.lua @@ -19,53 +19,7 @@ do end end -local function trim (str) - return str:gsub("^%s", ""):gsub("%s$", "") -end - -local function parse_cpulist (cpus) - local ret = {} - cpus = trim(cpus) - if #cpus == 0 then return ret end - for range in cpus:split(',') do - local lo, hi = range:match("^%s*([^%-]*)%s*-%s*([^%-%s]*)%s*$") - if lo == nil then lo = range:match("^%s*([^%-]*)%s*$") end - assert(lo ~= nil, 'invalid range: '..range) - lo = assert(tonumber(lo), 'invalid range begin: '..lo) - assert(lo == math.floor(lo), 'invalid range begin: '..lo) - if hi ~= nil then - hi = assert(tonumber(hi), 'invalid range end: '..hi) - assert(hi == math.floor(hi), 'invalid range end: '..hi) - assert(lo < hi, 'invalid range: '..range) - else - hi = lo - end - for cpu=lo,hi do table.insert(ret, cpu) end - end - return ret -end - -local function parse_cpulist_from_file (path) - local fd = assert(io.open(path)) - if not fd then return {} end - local ret = parse_cpulist(fd:read("*all")) - fd:close() - return ret -end - local function available_cpus (node) - local function set (t) - local ret = {} - for _,v in pairs(t) do ret[tostring(v)] = true end - return ret - end - local function cpus_in_node (node) - local node_path = '/sys/devices/system/node/node'..node - return set(parse_cpulist_from_file(node_path..'/cpulist')) - end - local function isolated_cpus () - return set(parse_cpulist_from_file('/sys/devices/system/cpu/isolated')) - end local function subtract (s, t) local ret = {} for k,_ in pairs(s) do @@ -75,7 +29,7 @@ local function available_cpus (node) return ret end -- XXX: Add sched_getaffinity cpus. - return subtract(cpus_in_node(node), isolated_cpus()) + return subtract(numa.node_cpus(node), numa.isolated_cpus()) end function CPUSet:bind_to_numa_node() @@ -87,7 +41,7 @@ function CPUSet:bind_to_numa_node() numa.bind_to_numa_node(nodes[1]) local cpus = available_cpus(nodes[1]) assert(#cpus > 0, 'Not available CPUs') - numa.bind_to_cpu(cpus) + numa.bind_to_cpu(cpus, 'skip-perf-checks') print(("Bound main process to NUMA node: %s (CPU %s)"):format(nodes[1], cpus[1])) else print("CPUs available from multiple NUMA nodes: "..table.concat(nodes, ",")) @@ -96,7 +50,7 @@ function CPUSet:bind_to_numa_node() end function CPUSet:add_from_string(cpus) - for _, cpu in ipairs(parse_cpulist(cpus)) do + for cpu,_ in pairs(numa.parse_cpuset(cpus)) do self:add(cpu) end end @@ -157,14 +111,3 @@ function CPUSet:release(cpu) end error('CPU not found on NUMA node: '..cpu..', '..node) end - -function selftest () - print('selftest: cpuset') - local cpus = parse_cpulist("0-5,7") - assert(#cpus == 7 and cpus[6] == 5 and cpus[7] == 7) - cpus = parse_cpulist("1") - assert(#cpus == 1 and cpus[1] == 1) - assert(#parse_cpulist("\n") == 0) - assert(#parse_cpulist("") == 0) - print('selftest: ok') -end diff --git a/src/lib/ctable.lua b/src/lib/ctable.lua index 7cf282e093..6397f649c4 100644 --- a/src/lib/ctable.lua +++ b/src/lib/ctable.lua @@ -4,13 +4,10 @@ local ffi = require("ffi") local C = ffi.C local S = require("syscall") local lib = require("core.lib") -local util = require("lib.yang.util") local binary_search = require("lib.binary_search") local multi_copy = require("lib.multi_copy") local siphash = require("lib.hash.siphash") --- TODO: Move to core/lib.lua. -local memoize = util.memoize local min, max, floor, ceil = math.min, math.max, math.floor, math.ceil CTable = {} @@ -430,21 +427,6 @@ function CTable:remove(key, missing_allowed) return true end -local function generate_multi_copy(width, size) - return multi_copy.gen(width, size) -end -generate_multi_copy = memoize(generate_multi_copy) - -local function generate_multi_hash(self, width) - return self.make_multi_hash_fn(width) -end -generate_multi_hash = memoize(generate_multi_hash) - -local function generate_binary_search(entries_per_lookup, entry_type) - return binary_search.gen(entries_per_lookup, entry_type) -end -generate_binary_search = memoize(generate_binary_search) - function CTable:make_lookup_streamer(width) assert(width > 0 and width <= 262144, "Width value out of range: "..width) local res = { @@ -463,6 +445,9 @@ function CTable:make_lookup_streamer(width) -- more entry. stream_entries = self.type(width * (self.max_displacement + 1) + 1) } + -- Pointer to first entry key (cache to avoid cdata allocation.) + local key_offset = 4 -- Skip past uint32_t hash. + res.keys = ffi.cast('uint8_t*', res.entries) + key_offset -- Give res.pointers sensible default values in case the first lookup -- doesn't fill the pointers vector. for i = 0, width-1 do res.pointers[i] = self.entries end @@ -475,9 +460,9 @@ function CTable:make_lookup_streamer(width) -- Compile multi-copy and binary-search procedures that are -- specialized for this table and this width. local entry_size = ffi.sizeof(self.entry_type) - res.multi_copy = generate_multi_copy(width, res.entries_per_lookup * entry_size) - res.multi_hash = generate_multi_hash(self, width) - res.binary_search = generate_binary_search(res.entries_per_lookup, self.entry_type) + res.multi_copy = multi_copy.gen(width, res.entries_per_lookup * entry_size) + res.multi_hash = self.make_multi_hash_fn(width) + res.binary_search = binary_search.gen(res.entries_per_lookup, self.entry_type) return setmetatable(res, { __index = LookupStreamer }) end @@ -485,13 +470,13 @@ end function LookupStreamer:stream() local width = self.width local entries = self.entries + local keys = self.keys local pointers = self.pointers local stream_entries = self.stream_entries local entries_per_lookup = self.entries_per_lookup local equal_fn = self.equal_fn - local key_offset = 4 -- Skip past uint32_t hash. - self.multi_hash(ffi.cast('uint8_t*', entries) + key_offset, self.hashes) + self.multi_hash(self.keys, self.hashes) for i=0,width-1 do local hash = self.hashes[i] diff --git a/src/lib/fibers/timer.lua b/src/lib/fibers/timer.lua index aa19ecef65..761156fe04 100644 --- a/src/lib/fibers/timer.lua +++ b/src/lib/fibers/timer.lua @@ -129,8 +129,8 @@ local function tick_outer(inner, outer) local ent = pop_node(head) local idx = math.floor((ent.time - outer.now) * inner.rate) -- Because of floating-point imprecision it's possible to get an - -- index that is too large by 1. - idx = math.min(idx, WHEEL_SLOTS-1) + -- index that falls just outside [0,WHEEL_SLOTS-1]. + idx = math.max(math.min(idx, WHEEL_SLOTS-1), 0) push_node(ent, inner.slots[idx]) end outer.cur = band(outer.cur + 1, SLOT_INDEX_MASK) diff --git a/src/lib/gauge.lua b/src/lib/gauge.lua new file mode 100644 index 0000000000..53256dc31f --- /dev/null +++ b/src/lib/gauge.lua @@ -0,0 +1,48 @@ +-- Use of this source code is governed by the Apache 2.0 license; see COPYING. + +module(..., package.seeall) + +local ffi = require("ffi") +local shm = require("core.shm") + +-- SHM object type for gauges (double precision float values). + +type = shm.register('gauge', getfenv()) + +local gauge_t = ffi.typeof("struct { double g; }") + +function create (name, initval) + local gauge = shm.create(name, gauge_t) + set(gauge, initval or 0) + return gauge +end + +function open (name) + return shm.open(name, gauge_t, 'readonly') +end + +function set (gauge, value) gauge.g = value end +function read (gauge) return gauge.g end + +ffi.metatype(gauge_t, + {__tostring = + function (gauge) return ("%f"):format(read(gauge)) end}) + +function selftest () + print('selftest: lib.gauge') + local a = create("lib.gauge/gauge/a", 1.42) + local a2 = open("lib.gauge/gauge/a") + local b = create("lib.gauge/gauge/b") + assert(read(a) == 1.42) + assert(read(a2) == read(a)) + assert(read(b) == 0) + assert(read(a) ~= read(b)) + set(a, 0.1234) + assert(read(a) == 0.1234) + assert(read(a2) == read(a)) + shm.unmap(a) + shm.unmap(a2) + shm.unmap(b) + shm.unlink("link.gauge") + print('selftest: ok') +end diff --git a/src/lib/hash/siphash.dasl b/src/lib/hash/siphash.dasl index 778595eb86..86c7a9a9c8 100644 --- a/src/lib/hash/siphash.dasl +++ b/src/lib/hash/siphash.dasl @@ -427,6 +427,7 @@ local sip_hash_config = { size={required=true}, stride={}, key={default=false}, c={default=2}, d={default=4}, as_specified={default=false}, width={default=1} } +local sip_hash_cache = {} -- Cache for generated variants (reuse if possible.) local function make_sip_hash(assembler, opts) function siphash(asm) -- Arguments: @@ -513,8 +514,33 @@ local function make_sip_hash(assembler, opts) opts = lib.parse(opts, sip_hash_config) if not opts.stride then opts.stride = opts.size end + -- Assemble siphash variant and cache it unless it has not been + -- previously generated. + sip_hash_cache[assembler] = sip_hash_cache[assembler] or {} + for conf, cached in pairs(sip_hash_cache[assembler]) do + if lib.equal(conf, opts) then return cached end + end local asm = assembler(opts.stride) - return asm:assemble("siphash_"..opts.c.."_"..opts.d, siphash) + local sip_hash = asm:assemble("siphash_"..opts.c.."_"..opts.d, siphash) + sip_hash_cache[assembler][opts] = sip_hash + return sip_hash +end + +-- Immediate value x86-64 backend for the SipHash implementation. +local function ImmX86_64() + local asm = X86_64() + function asm.load_u64_and_advance(dst) + asm.copy_argument(dst, 1) + end + function asm.finish(name, Dst) + return finish(name.."_u64", + ffi.typeof("uint32_t (*)(uint64_t)"), + Dst) + end + asm.load_u32_and_advance = error + asm.load_u16_and_advance = error + asm.load_u8_and_advance = error + return asm end -- A special implementation to hash immediate values; requires our @@ -523,21 +549,6 @@ function make_u64_hash(opts) local opts = lib.deepcopy(opts) opts.size = 8 assert(not opts.as_specified) - local function ImmX86_64() - local asm = X86_64() - function asm.load_u64_and_advance(dst) - asm.copy_argument(dst, 1) - end - function asm.finish(name, Dst) - return finish(name.."_u64", - ffi.typeof("uint32_t (*)(uint64_t)"), - Dst) - end - asm.load_u32_and_advance = error - asm.load_u16_and_advance = error - asm.load_u8_and_advance = error - return asm - end return make_sip_hash(ImmX86_64, opts) end diff --git a/src/lib/maxpc.lua b/src/lib/maxpc.lua index c5dec94fb2..7967efd96a 100644 --- a/src/lib/maxpc.lua +++ b/src/lib/maxpc.lua @@ -265,6 +265,17 @@ function capture.integer_number (radix) end +-- string parsing + +function match.string (s) + local chars = {} + for i = 1, #s do + chars[i] = match.equal(s:sub(i,i)) + end + return match.seq(unpack(chars)) +end + + -- backtracking combinators function match.plus (a, b) diff --git a/src/lib/multi_copy.dasl b/src/lib/multi_copy.dasl index 09051eb526..dd669c18dd 100644 --- a/src/lib/multi_copy.dasl +++ b/src/lib/multi_copy.dasl @@ -40,6 +40,7 @@ local function assemble (name, prototype, generator) return ffi.cast(prototype, mcode) end +local gencache = {} -- Cache for generated variants (reuse if possible.) function gen(count, size) local function gen_multi_copy(Dst) -- dst in rdi @@ -115,10 +116,13 @@ function gen(count, size) | pop r12 | ret end - - return assemble("multi_copy_"..size, - "void(*)(void*, void*)", - gen_multi_copy) + local name = "multi_copy_"..size.."_"..count + -- Assemble multi copy variant and cache it unless it has not been + -- previously generated. + if not gencache[name] then + gencache[name] = assemble(name, "void(*)(void*, void*)", gen_multi_copy) + end + return gencache[name] end function selftest () diff --git a/src/lib/numa.lua b/src/lib/numa.lua index 6c7f6518ba..55b510be24 100644 --- a/src/lib/numa.lua +++ b/src/lib/numa.lua @@ -18,6 +18,57 @@ local bound_numa_node local node_path = '/sys/devices/system/node/node' local MAX_CPU = 1023 +local function warn(fmt, ...) + io.stderr:write(string.format("Warning: ".. fmt .. "\n", ...)) + io.stderr:flush() +end + +local function die(fmt, ...) + error(string.format(fmt, ...)) +end + +local function trim (str) + return str:gsub("^%s", ""):gsub("%s$", "") +end + +function parse_cpuset (cpus) + local ret = {} + cpus = trim(cpus) + if #cpus == 0 then return ret end + for range in cpus:split(',') do + local lo, hi = range:match("^%s*([^%-]*)%s*-%s*([^%-%s]*)%s*$") + if lo == nil then lo = range:match("^%s*([^%-]*)%s*$") end + assert(lo ~= nil, 'invalid range: '..range) + lo = assert(tonumber(lo), 'invalid range begin: '..lo) + assert(lo == math.floor(lo), 'invalid range begin: '..lo) + if hi ~= nil then + hi = assert(tonumber(hi), 'invalid range end: '..hi) + assert(hi == math.floor(hi), 'invalid range end: '..hi) + assert(lo < hi, 'invalid range: '..range) + else + hi = lo + end + for cpu=lo,hi do table.insert(ret, cpu) end + end + return lib.set(unpack(ret)) +end + +local function parse_cpuset_from_file (path) + local fd = assert(io.open(path)) + if not fd then return {} end + local ret = parse_cpuset(fd:read("*all")) + fd:close() + return ret +end + +function node_cpus (node) + return parse_cpuset_from_file(node_path..node..'/cpulist') +end + +function isolated_cpus (node) + return parse_cpuset_from_file('/sys/devices/system/cpu/isolated') +end + function cpu_get_numa_node (cpu) local node = 0 while true do @@ -62,10 +113,10 @@ function choose_numa_node_for_pci_addresses (addrs, require_affinity) chosen_node = node chosen_because_of_addr = addr else - local msg = string.format( - "PCI devices %s and %s have different NUMA node affinities", - chosen_because_of_addr, addr) - if require_affinity then error(msg) else print('Warning: '..msg) end + local warn = warn + if require_affinity then warn = die end + warn("PCI devices %s and %s have different NUMA node affinities", + chosen_because_of_addr, addr) end end return chosen_node @@ -75,20 +126,54 @@ function check_affinity_for_pci_addresses (addrs) local policy = S.get_mempolicy() if policy.mode == S.c.MPOL_MODE['default'] then if has_numa() then - print('Warning: No NUMA memory affinity.') - print('Pass --cpu to bind to a CPU and its NUMA node.') + warn('No NUMA memory affinity.\n'.. + 'Pass --cpu to bind to a CPU and its NUMA node.') end - elseif policy.mode ~= S.c.MPOL_MODE['bind'] then - print("Warning: NUMA memory policy already in effect, but it's not --membind.") + elseif (policy.mode ~= S.c.MPOL_MODE['bind'] and + policy.mode ~= S.c.MPOL_MODE['preferred']) then + warn("NUMA memory policy already in effect, but it's not --membind or --preferred.") else local node = S.getcpu().node local node_for_pci = choose_numa_node_for_pci_addresses(addrs) if node_for_pci and node ~= node_for_pci then - print("Warning: Bound NUMA node does not have affinity with PCI devices.") + warn("Bound NUMA node does not have affinity with PCI devices.") end end end +local irqbalanced_checked = false +local function assert_irqbalanced_disabled (warn) + if irqbalanced_checked then return end + irqbalanced_checked = true + for path in os.getenv('PATH'):split(':') do + if S.stat(path..'/irqbalance') then + if S.stat('/etc/default/irqbalance') then + for line in io.lines('/etc/default/irqbalance') do + if line:match('^ENABLED=0') then return end + end + end + warn('Irqbalanced detected; this will hurt performance! %s', + 'Consider uninstalling via "sudo apt-get remove irqbalance" and rebooting.') + end + end +end + +local function check_cpu_performance_tuning (cpu, strict) + local warn = warn + if strict then warn = die end + assert_irqbalanced_disabled(warn) + local path = '/sys/devices/system/cpu/cpu'..cpu..'/cpufreq/scaling_governor' + local gov = assert(io.open(path)):read() + if not gov:match('performance') then + warn('Expected performance scaling governor for CPU %s, but got "%s"', + cpu, gov) + end + + if not isolated_cpus()[cpu] then + warn('Expected dedicated core, but CPU %s is not in isolcpus set', cpu) + end +end + function unbind_cpu () local cpu_set = S.sched_getaffinity() cpu_set:zero() @@ -97,58 +182,7 @@ function unbind_cpu () bound_cpu = nil end -local blacklisted_kernels = { - '>=4.15', -} -local function sys_kernel () - return lib.readfile('/proc/sys/kernel/osrelease', '*all'):gsub('%s$', '') -end -local function parse_version_number (str) - local t = {} - for each in str:gmatch("([^.]+)") do - table.insert(t, tonumber(each) or 0) - end - return t -end -local function equals (v1, v2) - for i, p1 in ipairs(v1) do - local p2 = v2[i] or 0 - if p1 ~= p2 then return false end - end - return true -end -local function greater_or_equals (v1, v2) - for i, p1 in ipairs(v1) do - local p2 = v2[i] or 0 - if p2 > p1 then return false end - end - return true -end -local function greater (v1, v2) - return greater_or_equals(v1, v2) and not equals(v1, v2) -end -function is_blacklisted_kernel (v) - for _, each in ipairs(blacklisted_kernels) do - -- Greater or equal. - if each:sub(1, 2) == '>=' then - each = each:sub(3, #each) - local v1, v2 = parse_version_number(v), parse_version_number(each) - if greater_or_equals(v1, v2) then return true end - -- Greater than. - elseif each:sub(1, 1) == '>' then - each = each:sub(2, #each) - local v1, v2 = parse_version_number(v), parse_version_number(each) - if greater(v1, v2) then return true end - -- Equals. - else - local v1, v2 = parse_version_number(v), parse_version_number(each) - if equals(v1, v2) then return true end - end - end - return false -end - -function bind_to_cpu (cpu) +function bind_to_cpu (cpu, skip_perf_checks) local function contains (t, e) for k,v in ipairs(t) do if tonumber(v) == tonumber(e) then return true end @@ -167,6 +201,8 @@ function bind_to_cpu (cpu) bound_cpu = cpu_and_node.cpu bind_to_numa_node (cpu_and_node.node) + + if not skip_perf_checks then check_cpu_performance_tuning(bound_cpu) end end function unbind_numa_node () @@ -177,11 +213,6 @@ function unbind_numa_node () end function bind_to_numa_node (node, policy) - local kernel = sys_kernel() - if is_blacklisted_kernel(kernel) then - print(("WARNING: Buggy kernel '%s'. Not binding CPU to NUMA node."):format(kernel)) - return - end if node == bound_numa_node then return end if not node then return unbind_numa_node() end assert(not bound_numa_node, "already bound") @@ -193,9 +224,8 @@ function bind_to_numa_node (node, policy) local from_mask = assert(S.get_mempolicy(nil, nil, nil, 'mems_allowed')).mask local ok, err = S.migrate_pages(0, from_mask, node) if not ok then - io.stderr:write( - string.format("Warning: Failed to migrate pages to NUMA node %d: %s\n", - node, tostring(err))) + warn("Failed to migrate pages to NUMA node %d: %s\n", + node, tostring(err)) end end @@ -209,9 +239,20 @@ end function selftest () + local cpus = parse_cpuset("0-5,7") + for i=0,5 do assert(cpus[i]) end + assert(not cpus[6]) + assert(cpus[7]) + do + local count = 0 + for k,v in pairs(cpus) do count = count + 1 end + assert(count == 7) + end + assert(parse_cpuset("1")[1]) + function test_cpu(cpu) local node = cpu_get_numa_node(cpu) - bind_to_cpu(cpu) + bind_to_cpu(cpu, 'skip-perf-checks') assert(bound_cpu == cpu) assert(bound_numa_node == node) assert(S.getcpu().cpu == cpu) @@ -247,9 +288,5 @@ function selftest () test_pci_affinity(pciaddr) end - assert(greater(parse_version_number('4.15'), parse_version_number('4.4.80'))) - assert(greater_or_equals(parse_version_number('4.15'), parse_version_number('4.15'))) - assert(not greater(parse_version_number('4.14'), parse_version_number('4.15'))) - print('selftest: numa: ok') end diff --git a/src/lib/ptree/action_codec.lua b/src/lib/ptree/action_codec.lua index 305be9ed91..c483d9a930 100644 --- a/src/lib/ptree/action_codec.lua +++ b/src/lib/ptree/action_codec.lua @@ -179,7 +179,10 @@ local function decoder(buf, len) return assert(require(require_path)[name]) end function decoder:config() - return binary.load_compiled_data_file(self:string()).data + local filename = self:string() + local data = binary.load_compiled_data_file(filename).data + S.unlink(filename) + return data end function decoder:finish(...) return { ... } diff --git a/src/lib/ptree/ptree.lua b/src/lib/ptree/ptree.lua index e9540b5a5b..749634ea54 100644 --- a/src/lib/ptree/ptree.lua +++ b/src/lib/ptree/ptree.lua @@ -36,6 +36,7 @@ local queue = require('lib.fibers.queue') local fiber_sleep = require('lib.fibers.sleep').sleep local inotify = require("lib.ptree.inotify") local counter = require("core.counter") +local gauge = require("lib.gauge") local cond = require("lib.fibers.cond") local call_with_output_string = mem.call_with_output_string @@ -92,6 +93,8 @@ function new_manager (conf) ret.state_change_listeners = {} -- name->{aggregated=counter, active=pid->counter, archived=uint64[1]} ret.counters = {} + -- name->{aggregated=gauge, active=pid->gauge} + ret.gauges = {} if conf.rpc_trace_file then ret:info("Logging RPCs to %s", conf.rpc_trace_file) @@ -177,7 +180,7 @@ function Manager:start () fiber.spawn(function () self:accept_rpc_peers() end) fiber.spawn(function () self:accept_notification_peers() end) fiber.spawn(function () self:notification_poller() end) - fiber.spawn(function () self:sample_active_counters() end) + fiber.spawn(function () self:sample_active_stats() end) end function Manager:call_with_cleanup(closeable, f, ...) @@ -316,10 +319,11 @@ local function strip_suffix(a, b) return a:sub(1,-(#b+1)) end -function Manager:make_rrd(counter_name) - local name = strip_suffix(counter_name, ".counter")..'.rrd' +function Manager:make_rrd(counter_name, typ) + typ = typ or 'counter' + local name = strip_suffix(counter_name, "."..typ)..'.rrd' return rrd.create_shm(name, { - sources={{name='value', type='counter'}}, + sources={{name='value', type=typ}}, -- NOTE: The default heartbeat interval is 1s, so relax -- base_interval to 2s as we're only polling every 1s (and we'll -- be slightly late). Also note that these settings correspond to @@ -339,41 +343,59 @@ local function blacklisted (name) return blacklisted_counters[strip_suffix(lib.basename(name), '.counter')] end -function Manager:monitor_worker_counters(id) +function Manager:monitor_worker_stats(id) local worker = self.workers[id] if not worker then return end -- Worker was removed before monitor started. local pid, cancel = worker.pid, worker.cancel:wait_operation() local dir = shm.root..'/'..pid local events = inotify.recursive_directory_inventory_events(dir, cancel) for ev in events.get, events do - if has_suffix(ev.name, '.counter') then + if has_prefix(ev.name, dir..'/') then local name = strip_prefix(ev.name, dir..'/') local qualified_name = '/'..pid..'/'..name - local counters = self.counters[name] - if blacklisted(name) then + if has_suffix(ev.name, '.counter') then + local counters = self.counters[name] + if blacklisted(name) then -- Pass. - elseif ev.kind == 'creat' then - if not counters then - counters = { aggregated=counter.create(name), active={}, - rrd={}, aggregated_rrd=self:make_rrd(name), - archived=ffi.new('uint64_t[1]') } - self.counters[name] = counters + elseif ev.kind == 'creat' then + if not counters then + counters = { aggregated=counter.create(name), active={}, + rrd={}, aggregated_rrd=self:make_rrd(name), + archived=ffi.new('uint64_t[1]') } + self.counters[name] = counters + end + counters.active[pid] = counter.open(qualified_name) + counters.rrd[pid] = self:make_rrd(qualified_name) + elseif ev.kind == 'rm' then + local val = counter.read(assert(counters.active[pid])) + counters.active[pid] = nil + counters.rrd[pid] = nil + counters.archived[0] = counters.archived[0] + val + counter.delete(qualified_name) + S.unlink(strip_suffix(qualified_name, ".counter")..".rrd") + end + elseif has_suffix(ev.name, '.gauge') then + local gauges = self.gauges[name] + if ev.kind == 'creat' then + if not gauges then + gauges = { aggregated=gauge.create(name), active={}, + rrd={}, aggregated_rrd=self:make_rrd(name, 'gauge') } + self.gauges[name] = gauges + end + gauges.active[pid] = gauge.open(qualified_name) + gauges.rrd[pid] = self:make_rrd(qualified_name, 'gauge') + elseif ev.kind == 'rm' then + shm.unmap(gauges.active[pid]) + gauges.active[pid] = nil + gauges.rrd[pid] = nil + S.unlink(strip_suffix(qualified_name, ".gauge")..".rrd") end - counters.active[pid] = counter.open(qualified_name) - counters.rrd[pid] = self:make_rrd(qualified_name) - elseif ev.kind == 'rm' then - local val = counter.read(assert(counters.active[pid])) - counters.active[pid] = nil - counters.rrd[pid] = nil - counters.archived[0] = counters.archived[0] + val - counter.delete(qualified_name) - S.unlink(strip_suffix(qualified_name, ".counter")..".rrd") end end end end -function Manager:sample_active_counters() +function Manager:sample_active_stats() while true do local now = rrd.now() for name, counters in pairs(self.counters) do @@ -387,6 +409,16 @@ function Manager:sample_active_counters() counter.set(counters.aggregated, sum) end counter.commit() + for name, gauges in pairs(self.gauges) do + local sum = 0 + for pid, active in pairs(gauges.active) do + local v = gauge.read(active) + gauges.rrd[pid]:add({value=v}, now) + sum = sum + v + end + gauges.aggregated_rrd:add({value=sum}, now) + gauge.set(gauges.aggregated, sum) + end fiber_sleep(1) end end @@ -403,7 +435,7 @@ function Manager:start_worker_for_graph(id, graph) local actions = self.support.compute_config_actions( app_graph.new(), self.workers[id].graph, {}, 'load') self:enqueue_config_actions_for_worker(id, actions) - fiber.spawn(function () self:monitor_worker_counters(id) end) + fiber.spawn(function () self:monitor_worker_stats(id) end) return self.workers[id] end @@ -582,7 +614,8 @@ function Manager:foreign_rpc_get_state (schema_name, path, format, print_default) path = path_mod.normalize_path(path) local translate = self:get_translator(schema_name) - local foreign_state = translate.get_state(self:get_native_state()) + local foreign_state = translate.get_state(self:get_native_state(), + self.current_configuration) local printer = path_data.printer_for_schema_by_name( schema_name, path, false, format, print_default) return { state = call_with_output_string(printer, foreign_state) } diff --git a/src/lib/ptree/support/snabb-softwire-v2.lua b/src/lib/ptree/support/snabb-softwire-v2.lua index 49b8cd1746..0eb88ce844 100644 --- a/src/lib/ptree/support/snabb-softwire-v2.lua +++ b/src/lib/ptree/support/snabb-softwire-v2.lua @@ -11,36 +11,25 @@ local ctable = require('lib.ctable') local cltable = require('lib.cltable') local data = require('lib.yang.data') local state = require('lib.yang.state') -local ipv4_ntop = require('lib.yang.util').ipv4_ntop +local yang_util = require('lib.yang.util') +local ipv4_ntop = yang_util.ipv4_ntop local yang = require('lib.yang.yang') local path_mod = require('lib.yang.path') local path_data = require('lib.yang.path_data') local generic = require('lib.ptree.support').generic_schema_config_support local binding_table = require("apps.lwaftr.binding_table") -local binding_table_instance -local function get_binding_table_instance(conf) - if binding_table_instance == nil then - binding_table_instance = binding_table.load(conf) - end - return binding_table_instance -end - -- Packs snabb-softwire-v2 softwire entry into softwire and PSID blob -- -- The data plane stores a separate table of psid maps and softwires. It -- requires that we give it a blob it can quickly add. These look rather -- similar to snabb-softwire-v1 structures however it maintains the br-address -- on the softwire so are subtly different. -local function pack_softwire(app_graph, entry) +local function pack_softwire(app_graph, bt, entry) assert(app_graph.apps['lwaftr']) assert(entry.value.port_set, "Softwire lacks port-set definition") local key, value = entry.key, entry.value - -- Get the binding table - local bt_conf = app_graph.apps.lwaftr.arg.softwire_config.binding_table - bt = get_binding_table_instance(bt_conf) - local softwire_t = bt.softwires.entry_type() psid_map_t = bt.psid_map.entry_type() @@ -60,13 +49,10 @@ local function pack_softwire(app_graph, entry) return packed_softwire, packed_psid_map end -local function add_softwire_entry_actions(app_graph, entries) - assert(app_graph.apps['lwaftr']) - local bt_conf = app_graph.apps.lwaftr.arg.softwire_config.binding_table - local bt = get_binding_table_instance(bt_conf) +local function add_softwire_entry_actions(app_graph, bt, entries) local ret = {} for entry in entries:iterate() do - local psoftwire, ppsid = pack_softwire(app_graph, entry) + local psoftwire, ppsid = pack_softwire(app_graph, bt, entry) assert(bt:is_managed_ipv4_address(psoftwire.key.ipv4)) local softwire_args = {'lwaftr', 'add_softwire_entry', psoftwire} @@ -100,7 +86,8 @@ local function remove_softwire_entry_actions(app_graph, path) return {{'call_app_method_with_blob', args}, {'commit', {}}} end -local function compute_config_actions(old_graph, new_graph, to_restart, +local function compute_config_actions(get_binding_table_instance, + old_graph, new_graph, to_restart, verb, path, arg) -- If the binding cable changes, remove our cached version. if path ~= nil and path:match("^/softwire%-config/binding%-table") then @@ -109,7 +96,10 @@ local function compute_config_actions(old_graph, new_graph, to_restart, if verb == 'add' and path == '/softwire-config/binding-table/softwire' then if to_restart == false then - return add_softwire_entry_actions(new_graph, arg) + assert(new_graph.apps['lwaftr']) + local bt_conf = app_graph.apps.lwaftr.arg.softwire_config.binding_table + local bt = get_binding_table_instance(bt_conf) + return add_softwire_entry_actions(new_graph, bt, arg) end elseif (verb == 'remove' and path:match('^/softwire%-config/binding%-table/softwire')) then @@ -135,6 +125,7 @@ local function update_mutable_objects_embedded_in_app_initargs( end local function compute_apps_to_restart_after_configuration_update( + get_binding_table_instance, schema_name, configuration, verb, path, in_place_dependencies, arg) if verb == 'add' and path == '/softwire-config/binding-table/softwire' then -- We need to check if the softwire defines a new port-set, if so we need to @@ -172,24 +163,24 @@ local function cltable_for_grammar(grammar) return cltable.new({key_type=key_t}), key_t end -local ietf_br_instance_grammar -local function get_ietf_br_instance_grammar() - if not ietf_br_instance_grammar then +local ietf_bind_instance_grammar +local function get_ietf_bind_instance_grammar() + if not ietf_bind_instance_grammar then local schema = yang.load_schema_by_name('ietf-softwire-br') local grammar = data.config_grammar_from_schema(schema) grammar = assert(grammar.members['br-instances']) grammar = assert(grammar.members['br-type']) grammar = assert(grammar.choices['binding'].binding) - grammar = assert(grammar.members['br-instance']) - ietf_br_instance_grammar = grammar + grammar = assert(grammar.members['bind-instance']) + ietf_bind_instance_grammar = grammar end - return ietf_br_instance_grammar + return ietf_bind_instance_grammar end local ietf_softwire_grammar local function get_ietf_softwire_grammar() if not ietf_softwire_grammar then - local grammar = get_ietf_br_instance_grammar() + local grammar = get_ietf_bind_instance_grammar() grammar = assert(grammar.values['binding-table']) grammar = assert(grammar.members['binding-entry']) ietf_softwire_grammar = grammar @@ -199,18 +190,35 @@ end local function ietf_binding_table_from_native(bt) local ret, key_t = cltable_for_grammar(get_ietf_softwire_grammar()) + local warn_lossy = false for softwire in bt.softwire:iterate() do local k = key_t({ binding_ipv6info = softwire.value.b4_ipv6 }) - local v = { - binding_ipv4_addr = softwire.key.ipv4, - port_set = { - psid_offset = softwire.value.port_set.reserved_ports_bit_count, - psid_len = softwire.value.port_set.psid_length, - psid = softwire.key.psid - }, - br_ipv6_addr = softwire.value.br_address, - } - ret[k] = v + if ret[k] ~= nil then + -- If two entries in the native softwire table have the same key in + -- the ietf-softwire-br schema, we omit the duplicate entry and print + -- a load warning to inform the user of this issue. + warn_lossy = warn_lossy or ret[k] + else + local v = { + binding_ipv4_addr = softwire.key.ipv4, + port_set = { + psid_offset = softwire.value.port_set.reserved_ports_bit_count, + psid_len = softwire.value.port_set.psid_length, + psid = softwire.key.psid + }, + br_ipv6_addr = softwire.value.br_address, + } + ret[k] = v + end + end + if warn_lossy then + io.stderr:write( + "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n".. + "WARNING: the native configuration has softwires with non-unique\n".. + "values for b4-ipv6. The binding-table returned through the\n".. + "ietf-softwire-br schema is incomplete!\n".. + "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n" + ) end return ret end @@ -233,19 +241,21 @@ local function native_binding_table_from_ietf(ietf) local _, softwire_grammar = snabb_softwire_getter('/softwire-config/binding-table/softwire') local softwire_key_t = data.typeof(softwire_grammar.key_ctype) - local softwire = cltable.new({key_type=softwire_key_t}) + local softwire_value_t = data.typeof(softwire_grammar.value_ctype) + local softwire = ctable.new({key_type=softwire_key_t, + value_type=softwire_value_t}) for k,v in cltable.pairs(ietf) do local softwire_key = softwire_key_t({ipv4=v.binding_ipv4_addr, psid=v.port_set.psid}) - local softwire_value = { + local softwire_value = softwire_value_t({ br_address=v.br_ipv6_addr, b4_ipv6=k.binding_ipv6info, port_set={ psid_length=v.port_set.psid_len, reserved_ports_bit_count=v.port_set.psid_offset } - } - cltable.set(softwire, softwire_key, softwire_value) + }) + softwire:add(softwire_key, softwire_value) end return {softwire=softwire} end @@ -262,138 +272,191 @@ function ipv6_equals(a, b) return x[0] == y[0] and x[1] == y[1] end -local function ietf_softwire_br_translator () - local ret = {} - local instance_id_map = {} - local cached_config - local function instance_id_by_device(device) - local last - for id, pciaddr in ipairs(instance_id_map) do - if pciaddr == device then return id end - last = id +local function instance_name (config) + return config.softwire_config.name or 'unnamed' +end + +local function path_has_query(path, component, query) + if query then + for k, v in pairs(query) do + if path[component].query[k] ~= v then + return false + end end - if last == nil then - last = 1 - else - last = last + 1 + return true + else + for k,v in pairs(path[component].query) do + return true + end + return false + end +end + +local function path_match(path, ...) + local patterns = {...} + local idx = 1 + for _, pattern in ipairs(patterns) do + for _, component in ipairs(pattern) do + if not path[idx] or path[idx].name ~= component then + return false + end + idx = idx + 1 end - instance_id_map[last] = device - return last end + return true +end + +local function ietf_softwire_br_translator () + local ret = {} + local cached_config function ret.get_config(native_config) if cached_config ~= nil then return cached_config end local int = native_config.softwire_config.internal_interface local int_err = int.error_rate_limiting local ext = native_config.softwire_config.external_interface - local br_instance, br_instance_key_t = - cltable_for_grammar(get_ietf_br_instance_grammar()) - for device, instance in pairs(native_config.softwire_config.instance) do - br_instance[br_instance_key_t({id=instance_id_by_device(device)})] = { - name = native_config.softwire_config.name, - tunnel_payload_mtu = int.mtu, - tunnel_path_mru = ext.mtu, - -- FIXME: There's no equivalent of softwire-num-threshold in - -- snabb-softwire-v1. - softwire_num_threshold = 0xffffffff, - enable_hairpinning = int.hairpinning, - binding_table = { - binding_entry = ietf_binding_table_from_native( - native_config.softwire_config.binding_table) - }, - icmp_policy = { - icmpv4_errors = { - allow_incoming_icmpv4 = ext.allow_incoming_icmp, - generate_icmpv4_errors = ext.generate_icmp_errors - }, - icmpv6_errors = { - generate_icmpv6_errors = int.generate_icmp_errors, - icmpv6_errors_rate = - math.floor(int_err.packets / int_err.period) - } + local ext_err = ext.error_rate_limiting + local instance = { + softwire_payload_mtu = int.mtu, + softwire_path_mru = ext.mtu, + -- FIXME: There's no equivalent of softwire-num-max in + -- snabb-softwire-v2. + softwire_num_max = 0xffffffff, + enable_hairpinning = int.hairpinning, + binding_table = { + binding_entry = ietf_binding_table_from_native( + native_config.softwire_config.binding_table) + }, + icmp_policy = { + icmpv4_errors = { + allow_incoming_icmpv4 = ext.allow_incoming_icmp, + generate_icmpv4_errors = ext.generate_icmp_errors, + icmpv4_rate = math.floor(ext_err.packets / ext_err.period) + }, + icmpv6_errors = { + generate_icmpv6_errors = int.generate_icmp_errors, + icmpv6_rate = math.floor(int_err.packets / int_err.period) } - } - end + } + } cached_config = { br_instances = { - binding = { br_instance = br_instance } + binding = { + bind_instance = { + [instance_name(native_config)] = instance + } + } } } return cached_config end - function ret.get_state(native_state) - -- Even though this is a different br-instance node, it is a - -- cltable with the same key type, so just re-use the key here. - local br_instance, br_instance_key_t = - cltable_for_grammar(get_ietf_br_instance_grammar()) - for device, instance in pairs(native_state.softwire_config.instance) do - local c = instance.softwire_state - br_instance[br_instance_key_t({id=instance_id_by_device(device)})] = { - traffic_stat = { - sent_ipv4_packet = c.out_ipv4_packets, - sent_ipv4_byte = c.out_ipv4_bytes, - sent_ipv6_packet = c.out_ipv6_packets, - sent_ipv6_byte = c.out_ipv6_bytes, - rcvd_ipv4_packet = c.in_ipv4_packets, - rcvd_ipv4_byte = c.in_ipv4_bytes, - rcvd_ipv6_packet = c.in_ipv6_packets, - rcvd_ipv6_byte = c.in_ipv6_bytes, - dropped_ipv4_packet = c.drop_all_ipv4_iface_packets, - dropped_ipv4_byte = c.drop_all_ipv4_iface_bytes, - dropped_ipv6_packet = c.drop_all_ipv6_iface_packets, - dropped_ipv6_byte = c.drop_all_ipv6_iface_bytes, - dropped_ipv4_fragments = 0, -- FIXME - dropped_ipv4_bytes = 0, -- FIXME - ipv6_fragments_reassembled = c.in_ipv6_frag_reassembled, - ipv6_fragments_bytes_reassembled = 0, -- FIXME - out_icmpv4_error_packets = c.out_icmpv4_error_packets, - out_icmpv6_error_packets = c.out_icmpv6_error_packets, - hairpin_ipv4_bytes = c.hairpin_ipv4_bytes, - hairpin_ipv4_packets = c.hairpin_ipv4_packets, - active_softwire_num = 0, -- FIXME - } - } - end + function ret.get_state(native_state, native_config) + local c = native_state.softwire_state + local traffic_stat = { + discontinuity_time = c.discontinuity_time, + sent_ipv4_packets = c.out_ipv4_packets, + sent_ipv4_bytes = c.out_ipv4_bytes, + sent_ipv6_packets = c.out_ipv6_packets, + sent_ipv6_bytes = c.out_ipv6_bytes, + rcvd_ipv4_packets = c.in_ipv4_packets, + rcvd_ipv4_bytes = c.in_ipv4_bytes, + rcvd_ipv6_packets = c.in_ipv6_packets, + rcvd_ipv6_bytes = c.in_ipv6_bytes, + dropped_ipv4_packets = c.drop_all_ipv4_iface_packets, + dropped_ipv4_bytes = c.drop_all_ipv4_iface_bytes, + dropped_ipv6_packets = c.drop_all_ipv6_iface_packets, + dropped_ipv6_bytes = c.drop_all_ipv6_iface_bytes, + dropped_ipv4_fragments = 0, -- FIXME + dropped_ipv4_bytes = 0, -- FIXME + ipv6_fragments_reassembled = c.in_ipv6_frag_reassembled, + ipv6_fragments_bytes_reassembled = 0, -- FIXME + out_icmpv4_error_packets = c.out_icmpv4_packets, + out_icmpv4_error_bytes = c.out_icmpv4_bytes, + out_icmpv6_error_packets = c.out_icmpv6_packets, + out_icmpv6_error_bytes = c.out_icmpv6_bytes, + dropped_icmpv4_packets = c.drop_in_by_policy_icmpv4_packets, + dropped_icmpv4_bytes = c.drop_in_by_policy_icmpv4_bytes, + hairpin_ipv4_bytes = c.hairpin_ipv4_bytes, + hairpin_ipv4_packets = c.hairpin_ipv4_packets, + active_softwire_num = 0, -- FIXME + } return { br_instances = { - binding = { br_instance = br_instance } + binding = { + bind_instance = { + [instance_name(native_config)] = { + traffic_stat = traffic_stat + } + } + } } } end - local function sets_whole_table(path, count) - if #path > count then return false end - if #path == count then - for k,v in pairs(path[#path].query) do return false end - end - return true - end function ret.set_config(native_config, path_str, arg) path = path_mod.parse_path(path_str) - local br_instance_paths = {'br-instances', 'binding', 'br-instance'} - local bt_paths = {'binding-table', 'binding-entry'} - - -- Can't actually set the instance itself. - if #path <= #br_instance_paths then - error("Unspported path: "..path_str) + local bind_instance_path = {'br-instances', 'binding', 'bind-instance'} + local binding_entry_path = {'binding-table', 'binding-entry'} + + if #path >= #bind_instance_path and + not path_has_query(path, #bind_instance_path, + {name=instance_name(native_config)}) + then + error(("Instance name does not match this instance (%s): %s") + :format(instance_name(native_config), + path[#bind_instance_path].query.name)) end - -- Handle special br attributes (tunnel-payload-mtu, tunnel-path-mru, softwire-num-threshold). - if #path > #br_instance_paths then - local maybe_leaf = path[#path].name + -- Handle special br attributes (softwire-payload-mtu, + -- softwire-path-mru, ..., icmp-policy). + if #path == #bind_instance_path+1 and + path_match(path, bind_instance_path) + then + local leaf = path[#path].name local path_tails = { - ['tunnel-payload-mtu'] = 'internal-interface/mtu', - ['tunnel-path-mtu'] = 'external-interface/mtu', + ['softwire-payload-mtu'] = 'internal-interface/mtu', + ['softwire-path-mru'] = 'external-interface/mtu', ['name'] = 'name', - ['enable-hairpinning'] = 'internal-interface/hairpinning', + ['enable-hairpinning'] = 'internal-interface/hairpinning' + } + local path_tail = path_tails[leaf] + if path_tail then + return {{'set', {schema='snabb-softwire-v2', + path='/softwire-config/'..path_tail, + config=tostring(arg)}}} + else + error('unrecognized leaf: '..leaf) + end + elseif #path == #bind_instance_path+3 and + path_match(path, bind_instance_path, {'icmp-policy', 'icmpv4-errors'}) + then + local leaf = path[#path].name + local path_tails = { ['allow-incoming-icmpv4'] = 'external-interface/allow-incoming-icmp', ['generate-icmpv4-errors'] = 'external-interface/generate-icmp-errors', - ['generate-icmpv6-errors'] = 'internal-interface/generate-icmp-errors' } - local path_tail = path_tails[maybe_leaf] + local path_tail = path_tails[leaf] if path_tail then return {{'set', {schema='snabb-softwire-v2', path='/softwire-config/'..path_tail, config=tostring(arg)}}} - elseif maybe_leaf == 'icmpv6-errors-rate' then + elseif leaf == 'icmpv4-rate' then + local head = '/softwire-config/external-interface/error-rate-limiting' + return { + {'set', {schema='snabb-softwire-v2', path=head..'/packets', + config=tostring(arg * 2)}}, + {'set', {schema='snabb-softwire-v2', path=head..'/period', + config='2'}}} + else + error('unrecognized leaf: '..leaf) + end + elseif #path == #bind_instance_path+3 and + path_match(path, bind_instance_path, {'icmp-policy', 'icmpv6-errors'}) + then + local leaf = path[#path].name + if leaf == 'generate-icmpv6-errors' then + return {{'set', {schema='snabb-softwire-v2', + path='/softwire-config/internal-interface/generate-icmp-errors', + config=tostring(arg)}}} + elseif leaf == 'icmpv6-rate' then local head = '/softwire-config/internal-interface/error-rate-limiting' return { {'set', {schema='snabb-softwire-v2', path=head..'/packets', @@ -401,117 +464,117 @@ local function ietf_softwire_br_translator () {'set', {schema='snabb-softwire-v2', path=head..'/period', config='2'}}} else - error('unrecognized leaf: '..maybe_leaf) + error('unrecognized leaf: '..leaf) end end -- Two kinds of updates: setting the whole binding table, or -- updating one entry. - if sets_whole_table(path, #br_instance_paths + #bt_paths) then + if path_match(path, bind_instance_path, binding_entry_path) then -- Setting the whole binding table. - if sets_whole_table(path, #br_instance_paths) then - for i=#path+1,#br_instance_paths do - arg = arg[data.normalize_id(br_instance_paths[i])] - end - local instance - for k,v in cltable.pairs(arg) do - if instance then error('multiple instances in config') end - if k.id ~= 1 then error('instance id not 1: '..tostring(k.id)) end - instance = v - end - if not instance then error('no instances in config') end - arg = instance - end - for i=math.max(#path-#br_instance_paths,0)+1,#bt_paths do - arg = arg[data.normalize_id(bt_paths[i])] - end - local bt = native_binding_table_from_ietf(arg) - return {{'set', {schema='snabb-softwire-v2', - path='/softwire-config/binding-table', - config=serialize_binding_table(bt)}}} - else - -- An update to an existing entry. First, get the existing entry. - local config = ret.get_config(native_config) - local entry_path = path_str - local entry_path_len = #br_instance_paths + #bt_paths - for i=entry_path_len+1, #path do - entry_path = dirname(entry_path) - end - local old = ietf_softwire_br_getter(entry_path)(config) - -- Now figure out what the new entry should look like. - local new - if #path == entry_path_len then - new = arg + if #path == #bind_instance_path + #binding_entry_path and + not path_has_query(path, #path) + then + local bt = native_binding_table_from_ietf(arg) + return {{'set', {schema='snabb-softwire-v2', + path='/softwire-config/binding-table', + config=serialize_binding_table(bt)}}} else - new = { - port_set = { - psid_offset = old.port_set.psid_offset, - psid_len = old.port_set.psid_len, - psid = old.port_set.psid - }, - binding_ipv4_addr = old.binding_ipv4_addr, - br_ipv6_addr = old.br_ipv6_addr - } - if path[entry_path_len + 1].name == 'port-set' then - if #path == entry_path_len + 1 then - new.port_set = arg + -- An update to an existing entry. First, get the existing entry. + local config = ret.get_config(native_config) + local entry_path = path_str + local entry_path_len = #bind_instance_path + #binding_entry_path + for i=entry_path_len+1, #path do + entry_path = dirname(entry_path) + end + local old = ietf_softwire_br_getter(entry_path)(config) + -- Now figure out what the new entry should look like. + local new + if #path == entry_path_len then + new = arg + else + new = { + port_set = { + psid_offset = old.port_set.psid_offset, + psid_len = old.port_set.psid_len, + psid = old.port_set.psid + }, + binding_ipv4_addr = old.binding_ipv4_addr, + br_ipv6_addr = old.br_ipv6_addr + } + if path[entry_path_len + 1].name == 'port-set' then + if #path == entry_path_len + 1 then + new.port_set = arg + else + local k = data.normalize_id(path[#path].name) + new.port_set[k] = arg + end + elseif path[#path].name == 'binding-ipv4-addr' then + new.binding_ipv4_addr = arg + elseif path[#path].name == 'br-ipv6-addr' then + new.br_ipv6_addr = arg else - local k = data.normalize_id(path[#path].name) - new.port_set[k] = arg + error('bad path element: '..path[#path].name) end - elseif path[#path].name == 'binding-ipv4-addr' then - new.binding_ipv4_addr = arg - elseif path[#path].name == 'br-ipv6-addr' then - new.br_ipv6_addr = arg - else - error('bad path element: '..path[#path].name) end + -- Apply changes. Ensure that the port-set + -- changes are compatible with the existing configuration. + local updates = {} + local softwire_path = '/softwire-config/binding-table/softwire' + + -- Lets remove this softwire entry and add a new one. + local function q(ipv4, psid) + return string.format('[ipv4=%s][psid=%s]', ipv4_ntop(ipv4), psid) + end + local old_query = q(old.binding_ipv4_addr, old.port_set.psid) + -- FIXME: This remove will succeed but the add could fail if + -- there's already a softwire with this IPv4 and PSID. We need + -- to add a check here that the IPv4/PSID is not present in the + -- binding table. + table.insert(updates, + {'remove', {schema='snabb-softwire-v2', + path=softwire_path..old_query}}) + + local config_str = string.format([[{ + ipv4 %s; + psid %s; + br-address %s; + b4-ipv6 %s; + port-set { + psid-length %s; + reserved-ports-bit-count %s; + } + }]], ipv4_ntop(new.binding_ipv4_addr), new.port_set.psid, + ipv6:ntop(new.br_ipv6_addr), + path[entry_path_len].query['binding-ipv6info'], + new.port_set.psid_len, new.port_set.psid_offset) + table.insert(updates, + {'add', {schema='snabb-softwire-v2', + path=softwire_path, + config=config_str}}) + return updates end - -- Apply changes. Ensure that the port-set - -- changes are compatible with the existing configuration. - local updates = {} - local softwire_path = '/softwire-config/binding-table/softwire' - - -- Lets remove this softwire entry and add a new one. - local function q(ipv4, psid) - return string.format('[ipv4=%s][psid=%s]', ipv4_ntop(ipv4), psid) - end - local old_query = q(old.binding_ipv4_addr, old.port_set.psid) - -- FIXME: This remove will succeed but the add could fail if - -- there's already a softwire with this IPv4 and PSID. We need - -- to add a check here that the IPv4/PSID is not present in the - -- binding table. - table.insert(updates, - {'remove', {schema='snabb-softwire-v2', - path=softwire_path..old_query}}) - - local config_str = string.format([[{ - ipv4 %s; - psid %s; - br-address %s; - b4-ipv6 %s; - port-set { - psid-length %s; - reserved-ports-bit-count %s; - } - }]], ipv4_ntop(new.binding_ipv4_addr), new.port_set.psid, - ipv6:ntop(new.br_ipv6_addr), - path[entry_path_len].query['binding-ipv6info'], - new.port_set.psid_len, new.port_set.psid_offset) - table.insert(updates, - {'add', {schema='snabb-softwire-v2', - path=softwire_path, - config=config_str}}) - return updates end + + -- Can't actually set the instance itself. + -- (Or other paths not implemented above.) + error("Unspported path: "..path_str) end function ret.add_config(native_config, path_str, data) - local binding_entry_path = {'br-instances', 'binding', 'br-instance', - 'binding-table', 'binding-entry'} + local bind_instance_path = {'br-instances', 'binding', 'bind-instance'} + local binding_entry_path = {'binding-table', 'binding-entry'} local path = path_mod.parse_path(path_str) - if #path ~= #binding_entry_path then - error('unsupported path: '..path) + if #path ~= #bind_instance_path + #binding_entry_path or + not path_match(path, bind_instance_path, binding_entry_path) + then + error('unsupported path: '..path_str) + end + if not path_has_query(path, #bind_instance_path, + {name=instance_name(native_config)}) then + error(("Instance name does not match this instance (%s): %s") + :format(instance_name(native_config), + path[#bind_instance_path].query.name)) end local config = ret.get_config(native_config) local ietf_bt = ietf_softwire_br_getter(path_str)(config) @@ -526,7 +589,7 @@ local function ietf_softwire_br_translator () local key, value = entry.key, entry.value if old_bt.softwire:lookup_ptr(key) ~= nil then error('softwire already present in table: '.. - inet_ntop(key.ipv4)..'/'..key.psid) + ipv4_ntop(key.ipv4)..'/'..key.psid) end local config_str = string.format([[{ ipv4 %s; @@ -552,36 +615,25 @@ local function ietf_softwire_br_translator () return updates end function ret.remove_config(native_config, path_str) + local bind_instance_path = {'br-instances', 'binding', 'bind-instance'} + local binding_entry_path = {'binding-table', 'binding-entry'} local path = path_mod.parse_path(path_str) - local ietf_binding_table_path = {'softwire-config', 'binding', 'br', - 'br-instances', 'br-instance', 'binding-table'} - local ietf_instance_path = {'softwire-config', 'binding', 'br', - 'br-instances', 'br-instance'} - - if #path == #ietf_instance_path then - -- Remove appropriate instance - local ietf_instance_id = tonumber(assert(path[5].query).id) - local instance_path = "/softwire-config/instance" - - -- If it's not been populated in instance_id_map this is meaningless - -- and dangerous as they have no mapping from snabb's "device". - local function q(device) return - string.format("[device=%s]", device) + + if #path == #bind_instance_path + #binding_entry_path and + path_match(path, bind_instance_path, binding_entry_path) + then + if not path_has_query(path, #bind_instance_path, + {name=instance_name(native_config)}) then + error(("Instance name does not match this instance (%s): %s") + :format(instance_name(native_config), + path[#bind_instance_path].query.name)) end - local device = instance_id_map[ietf_instance_id] - if device then - return {{'remove', {schema='snabb-softwire-v2', - path=instance_path..q(device)}}} - else - error(string.format( - "Could not find '%s' in ietf instance mapping", ietf_instance_id - )) + if not path_has_query(path, #path) then + error('unsupported path: '..path_str) end - elseif #path == #ietf_binding_table_path then local softwire_path = '/softwire-config/binding-table/softwire' - if path:sub(-1) ~= ']' then error('unsupported path: '..path_str) end local config = ret.get_config(native_config) - local entry = ietf_softwire_getter(path_str)(config) + local entry = ietf_softwire_br_getter(path_str)(config) local function q(ipv4, psid) return string.format('[ipv4=%s][psid=%s]', ipv4_ntop(ipv4), psid) end @@ -594,40 +646,8 @@ local function ietf_softwire_br_translator () end function ret.pre_update(native_config, verb, path, data) -- Given the notification that the native config is about to be - -- updated, make our cached config follow along if possible (and - -- if we have one). Otherwise throw away our cached config; we'll - -- re-make it next time. - if cached_config == nil then return end - local br_instance = cached_config.br_instances.binding.br_instance - if (verb == 'remove' and - path:match('^/softwire%-config/binding%-table/softwire')) then - -- Remove a softwire. - local value = snabb_softwire_getter(path)(native_config) - for _,instance in cltable.pairs(br_instance) do - local grammar = get_ietf_softwire_grammar() - local key = path_data.prepare_table_lookup( - grammar.keys, grammar.key_ctype, {['binding-ipv6info']='::'}) - key.binding_ipv6info = value.b4_ipv6 - assert(instance.binding_table.binding_entry[key] ~= nil) - instance.binding_table.binding_entry[key] = nil - end - elseif (verb == 'add' and - path == '/softwire-config/binding-table/softwire') then - local bt = native_config.softwire_config.binding_table - for k,v in cltable.pairs( - ietf_binding_table_from_native({softwire = data})) do - for _,instance in cltable.pairs(br_instance) do - instance.binding_table.binding_entry[k] = v - end - end - elseif (verb == 'set' and path == "/softwire-config/name") then - local br = cached_config.softwire_config.binding.br - for _, instance in cltable.pairs(br_instance) do - instance.name = data - end - else - cached_config = nil - end + -- updated, throw away our cached config; we'll re-make it next time. + cached_config = nil end return ret end @@ -664,7 +684,7 @@ local function compute_state_reader(schema_name) end end -local function process_states(states) +local function process_states(discontinuity_time, states) -- We need to create a summation of all the states as well as adding all the -- instance specific state data to create a total in software-state. @@ -685,6 +705,9 @@ local function process_states(states) local name, instance = next(inst_config.softwire_config.instance) unified.softwire_config.instance[name] = instance + unified.softwire_state.discontinuity_time = + yang_util.format_date_as_iso_8601(discontinuity_time) + for name, value in pairs(instance.softwire_state) do unified.softwire_state[name] = total_counter( name, unified.softwire_state, value) @@ -696,15 +719,55 @@ end function get_config_support() + -- Binding table instance cache. + local binding_table_instance + local function get_binding_table_instance(conf) + if binding_table_instance == nil then + binding_table_instance = binding_table.load(conf) + end + return binding_table_instance + end + -- Configuration discontinuity-time: this is set on startup and whenever the + -- configuration changes. + local discontinuity_time = os.time() + -- Wrap some stateless support functions with isolated, stateful effects. + local function compute_config_actions1 (...) + -- Set discontinuity-time. + discontinuity_time = os.time() + return compute_config_actions(get_binding_table_instance, ...) + end + local function compute_apps_to_restart_after_configuration_update1 (...) + return compute_apps_to_restart_after_configuration_update( + get_binding_table_instance, ...) + end + local function process_states1 (...) + return process_states(discontinuity_time, ...) + end return { - compute_config_actions = compute_config_actions, + compute_config_actions = compute_config_actions1, update_mutable_objects_embedded_in_app_initargs = update_mutable_objects_embedded_in_app_initargs, compute_apps_to_restart_after_configuration_update = - compute_apps_to_restart_after_configuration_update, + compute_apps_to_restart_after_configuration_update1, compute_state_reader = compute_state_reader, - process_states = process_states, + process_states = process_states1, configuration_for_worker = configuration_for_worker, translators = { ['ietf-softwire-br'] = ietf_softwire_br_translator () } } end + +function selftest () + local bind_instance_path = {'br-instances', 'binding', 'bind-instance'} + local binding_entry_path = {'binding-table', 'binding-entry'} + local path = path_mod.parse_path( + "/br-instances/binding/bind-instance[name=test]/binding-table/binding-entry" + ) + assert(#path == #bind_instance_path + #binding_entry_path) + assert(path_match(path, bind_instance_path, binding_entry_path)) + assert(path_match(path, bind_instance_path)) + assert(not path_match(path, binding_entry_path)) + assert(path_has_query(path, #bind_instance_path)) + assert(path_has_query(path, #bind_instance_path, {name="test"})) + assert(not path_has_query(path, #bind_instance_path, {name="foo"})) + assert(not path_has_query(path, 1)) +end diff --git a/src/lib/ptree/worker.lua b/src/lib/ptree/worker.lua index 1f3ecc4ca8..971213f230 100644 --- a/src/lib/ptree/worker.lua +++ b/src/lib/ptree/worker.lua @@ -9,6 +9,7 @@ local counter = require("core.counter") local histogram = require('core.histogram') local lib = require('core.lib') local timer = require('core.timer') +local memory_info = require("lib.timers.memory_info") local alarms = require("lib.yang.alarms") local channel = require("lib.ptree.channel") local action_codec = require("lib.ptree.action_codec") @@ -19,6 +20,7 @@ local Worker = {} local worker_config_spec = { duration = {}, measure_latency = {default=true}, + measure_memory = {default=true}, no_report = {default=false}, report = {default={showapps=true,showlinks=true}}, Hz = {default=1000}, @@ -41,6 +43,9 @@ function new_worker (conf) local latency = histogram.create('engine/latency.histogram', 1e-6, 1e0) ret.breathe = latency:wrap_thunk(ret.breathe, engine.now) end + if conf.measure_memory then + timer.activate(memory_info.HeapSizeMonitor.new():timer()) + end return ret end diff --git a/src/lib/rrd.lua b/src/lib/rrd.lua index 75408ca57a..8e9e12b6fc 100644 --- a/src/lib/rrd.lua +++ b/src/lib/rrd.lua @@ -400,8 +400,9 @@ end local function compute_pdp_value(pdp, secs_per_pdp, dt, heartbeat_interval) -- This condition comes from upstream rrdtool. - if dt < heartbeat_interval and pdp.unknown_count < secs_per_pdp/2 then - return pdp.diff / tonumber(secs_per_pdp - pdp.unknown_count) + local unknown = tonumber(pdp.unknown_count) + if dt < heartbeat_interval and unknown < secs_per_pdp/2 then + return pdp.diff / tonumber(secs_per_pdp - unknown) else return 0/0 end diff --git a/src/lib/timers/ingress_drop_monitor.lua b/src/lib/timers/ingress_drop_monitor.lua index 5e96e573af..c1268fc04a 100644 --- a/src/lib/timers/ingress_drop_monitor.lua +++ b/src/lib/timers/ingress_drop_monitor.lua @@ -25,7 +25,7 @@ function new(args) grace_period = args.grace_period or 10, action = args.action or 'flush', tips_url = args.tips_url or default_tips_url, - last_flush = 0, + last_flush = now(), -- Start in the grace period. last_value = ffi.new('uint64_t[1]'), current_value = ffi.new('uint64_t[1]'), } @@ -57,8 +57,8 @@ function IngressDropMonitor:sample () sum[0] = 0 for i = 1, #app_array do local app = app_array[i] - if app.rxdrop and not app.dead then - sum[0] = sum[0] + app:rxdrop() + if app.get_rxstats and not app.dead then + sum[0] = sum[0] + app:get_rxstats().dropped end end if self.counter then diff --git a/src/lib/timers/memory_info.lua b/src/lib/timers/memory_info.lua new file mode 100644 index 0000000000..322c449dcb --- /dev/null +++ b/src/lib/timers/memory_info.lua @@ -0,0 +1,48 @@ +-- Use of this source code is governed by the Apache 2.0 license; see COPYING. + +module(...,package.seeall) + +-- HeapSizeMonitor: periodically update a gauge to reflect the current +-- memory use of the executing process. + +local gauge = require("lib.gauge") +local lib = require("core.lib") +local shm = require("core.shm") + +local params = { + path={default='engine/memory_gc_heap_bytes'}, +} + +HeapSizeMonitor = {} + +-- Once per second. +local default_interval = 1e9 + +function HeapSizeMonitor.new(args) + args = lib.parse(args, params); + if not args.path:match(".gauge") then + args.path = args.path..".gauge" + end + + local self = {} + if not shm.exists(args.path) then + self.heap_size = gauge.create(args.path) + else + self.heap_size = gauge.open(args.path) + end + + return setmetatable(self, {__index=HeapSizeMonitor}) +end + +function HeapSizeMonitor:sample () + -- collectgarbage('count') returns a value in kilobytes; convert to + -- bytes. + gauge.set(self.heap_size, collectgarbage('count') * 1024) +end + +function HeapSizeMonitor:timer(interval) + return timer.new("heap size monitor", + function () self:sample() end, + interval or default_interval, + "repeating") +end diff --git a/src/lib/yang/binary.lua b/src/lib/yang/binary.lua index 6a27300b7f..a5cf8c7b94 100644 --- a/src/lib/yang/binary.lua +++ b/src/lib/yang/binary.lua @@ -2,6 +2,7 @@ -- COPYING. module(..., package.seeall) +local S = require("syscall") local ffi = require("ffi") local lib = require("core.lib") local shm = require("core.shm") @@ -301,7 +302,7 @@ end function data_compiler_from_schema(schema, is_config) local grammar = data.data_grammar_from_schema(schema, is_config) return data_compiler_from_grammar(data_emitter(grammar), - schema.id, schema.revision_date) + schema.id, schema.last_revision) end function config_compiler_from_schema(schema) @@ -474,10 +475,14 @@ end function data_copier_from_grammar(production) local compile = data_compiler_from_grammar(data_emitter(production), '') return function(data) - local basename = 'copy-'..lib.random_printable_string(160) - local tmp = shm.root..'/'..shm.resolve(basename) - compile(data, tmp) - return function() return load_compiled_data_file(tmp).data end + return function() + local basename = 'copy-'..lib.random_printable_string(160) + local tmp = shm.root..'/'..shm.resolve(basename) + compile(data, tmp) + local copy = load_compiled_data_file(tmp).data + S.unlink(tmp) + return copy + end end end diff --git a/src/lib/yang/ietf-softwire-br.yang b/src/lib/yang/ietf-softwire-br.yang index 3b7f773925..2fa0b84e96 100644 --- a/src/lib/yang/ietf-softwire-br.yang +++ b/src/lib/yang/ietf-softwire-br.yang @@ -1,304 +1,493 @@ module ietf-softwire-br { yang-version 1.1; namespace "urn:ietf:params:xml:ns:yang:ietf-softwire-br"; - prefix "softwire-br"; + prefix softwire-br; - import ietf-inet-types {prefix inet; } - import ietf-yang-types {prefix yang; } - import ietf-softwire-common {prefix softwire-common; } - - organization "Softwire Working Group"; + import ietf-inet-types { + prefix inet; + reference "Section 4 of RFC 6991"; + } + import ietf-yang-types { + prefix yang; + reference "Section 3 of RFC 6991"; + } + import ietf-softwire-common { + prefix softwire-common; + reference + "RFC XXXX: YANG Modules for IPv4-in-IPv6 Address plus Port + Softwires"; + } + organization + "IETF Softwire Working Group"; contact - " - Qi Sun - Hao Wang - Yong Cui - Ian - Sladjana Zoric - Mohamed Boucadair - Rajiv - "; + "WG Web: + WG List: - description - "This document defines a YANG data module for the configuration and - management of A+P Softwire Border Routers. It covers Lightweight 4over6, - MAP-E, and MAP-T mechanisms. + Author: Qi Sun + - Copyright (c) 2017 IETF Trust and the persons identified - as authors of the code. All rights reserved. - This version of this YANG module is part of RFC XXX; see the RFC - itself for full legal notices."; + Author: Linhui Sun + - revision 2017-10-19 { - description - "Update..."; - reference "-02"; - } + Author: Yong Cui + - revision 2017-06-14 { - description - "Monolithic version of ietf-softwire divided into separate CE and BR - models. Added icmp handling and improved counters."; - reference "-06"; - } - revision 2016-06-04 { - description - "Version-05: Combined MAP-E/MAP-T into a single tree. Added binding - table/algorthm versioning"; - reference "-05"; - } + Editor: Ian Farrer + - revision 2015-09-30 { - description - "Version-04: Fix YANG syntax; Add flags to map-rule; Remove - the map-rule-type element. "; - reference "-04"; - } + Author: Sladjana Zoric + - revision 2015-04-07 { - description - "Version-03: Integrate lw4over6; Updata state nodes; Correct - grammar errors; Reuse groupings; Update descriptions. - Simplify the model."; - reference "-03"; - } + Editor: Mohamed Boucadair + - revision 2015-02-10 { + Author: Rajiv Asati + "; + + description + "This document defines a YANG module for the configuration and + management of A+P Softwire Border Routers. It covers Lightweight + 4over6, MAP-E, and MAP-T mechanisms. + + Copyright (c) 2019 IETF Trust and the persons identified as + authors of the code. All rights reserved. + + Redistribution and use in source and binary forms, with or + without modification, is permitted pursuant to, and subject + to the license terms contained in, the Simplified BSD License + set forth in Section 4.c of the IETF Trust's Legal Provisions + Relating to IETF Documents + (http://trustee.ietf.org/license-info). + + This version of this YANG module is part of RFC XXXX; see + the RFC itself for full legal notices."; + + revision 2019-01-11 { description - "Version-02: Add notifications."; - reference "-02"; + "Initial revision."; + reference + "RFC XXXX: YANG Modules for IPv4-in-IPv6 Address plus Port + Softwires"; } + /* + * Groupings + */ - revision 2015-02-06 { + grouping port-set { description - "Version-01: Correct grammar errors; Reuse groupings; Update - descriptions."; - reference "-01"; + "Describes a set of layer 4 port numbers. + + This may be a simple port range, or use the Port Set + Identifier (PSID) algorithm to represent a range of transport + layer ports which will be used by a NAPT."; + leaf psid-offset { + type uint8 { + range "0..16"; + } + description + "The number of offset bits. In Lightweight 4over6, + the default value is 0 for assigning one contiguous + port range. In MAP-E/T, the default value is 6, + which means the system ports (0-1023) are excluded by + default and the assigned port ranges are distributed across + the entire port space, depending on either psid-len or the + number of contiguous ports."; + } + leaf psid-len { + type uint8 { + range "0..15"; + } + mandatory true; + description + "The length of PSID, representing the sharing + ratio for an IPv4 address. This, along with ea-len, can + be used to calculate the number of contiguous ports per + port range"; + } + leaf psid { + type uint16; + mandatory true; + description + "Port Set Identifier (PSID) value, which + identifies a set of ports algorithmically."; + } } - revision 2015-02-02 { + grouping binding-entry { description - "Initial revision."; - reference "-00"; + "The binding BR maintains an address binding table that + contains the binding between the CE's IPv6 address, + the allocated IPv4 address and restricted port-set."; + leaf binding-ipv6info { + type union { + type inet:ipv6-address; + type inet:ipv6-prefix; + } + description + "The IPv6 information for a CE binding entry. + When the IPv6 prefix type is used, + the IPv6 source address of the CE is constructed + according to the description in RFC7596; + if the IPv6 address type is used, the CE can use + any valid /128 address from a prefix assigned to + the CE."; + reference + "RFC7596: Lightweight 4over6: An Extension to the Dual-Stack + Lite Architecture"; + } + leaf binding-ipv4-addr { + type inet:ipv4-address; + description + "The IPv4 address assigned to the binding CE, + which is used as the IPv4 external address + for binding CE local NAPT44."; + } + container port-set { + description + "For Lightweight 4over6, the default value + for offset should be 0, to configure one contiguous + port range."; + uses port-set { + refine "psid-offset" { + default "0"; + } + } + } + leaf br-ipv6-addr { + type inet:ipv6-address; + description + "The IPv6 address for binding BR."; + } } /* * Features */ - feature binding { + feature binding-mode { description - "Binding is used for configuring Lightweight 4over6 mechanism. + "Binding is used for configuring the Lightweight 4over6 mechanism. - Binding softwire mechanisms are IPv4-over-IPv6 tunnelling transition mechanisms specifically for complete independence between IPv6 subnet prefix (and /128 IPv6 address) and IPv4 address with or without IPv4 address sharing. + Binding based softwire mechanisms are IPv4-over-IPv6 tunnelling + transition mechanisms specifically intended for complete + independence between the IPv6 subnet prefix (and IPv6 address) + and IPv4 address, with or without IPv4 address sharing. - This is accomplished by maintaining state for each softwire (per-subscriber state) in the central Border Relay (BR) and a hub-and-spoke forwarding architecture. In order to delegate the NAPT function and achieve IPv4 address sharing, port-restricted IPv4 addresses needs to be allocated to CEs."; + This is accomplished by maintaining state for each softwire + (per-subscriber state) in the central Border Relay (BR) and using + a hub-and-spoke forwarding architecture. In order to delegate the + NAPT function and achieve IPv4 address sharing, port-restricted + IPv4 addresses needs to be allocated to CEs. + This feature indicates that the network element can function as + one or more binding based softwire instances."; reference - "RFC7596, RFC7597 & RFC7599"; + "RFC7596: Lightweight 4over6: An Extension to the Dual-Stack Lite + Architecture + RFC7597: Mapping of Address and Port with Encapsulation (MAP-E) + RFC7599: Mapping of Address and Port using Translation (MAP-T)"; } - feature algorithm { + feature map-e { description "MAP-E is an IPv6 transition mechanism for transporting IPv4 - packets across an IPv6 network using IP encapsulation. MAP-E - allows for a reduction of the amount of centralized state using - rules to express IPv4/IPv6 address mappings. This introduces an - algorithmic relationship between the IPv6 subnet and IPv4 address. - - MAP-T is an IPv6 transition mechanism for transporting IPv4 packets across an IPv6 network using IP translation. It leverages double stateless NAT64 based solution as well as the stateless algorithmic address & transport layer port mapping algorithm defined for MAP-E. - - This feature indicates the instance functions as a MAP-E or - MAP-T instance."; + packets across an IPv6 network using IP encapsulation. MAP-E + allows for a reduction of the amount of centralized state using + rules to express IPv4/IPv6 address mappings. This introduces an + algorithmic relationship between the IPv6 subnet and IPv4 + address. + + This feature indicates that the network element can function as + one or more MAP-E softwire instances."; + reference + "RFC7597: Mapping of Address and Port with Encapsulation (MAP-E)"; + } + feature map-t { + description + "MAP-T is an IPv6 transition mechanism for transporting IPv4 + packets across an IPv6 network using IP translation. It leverages + a double stateless NAT64 based solution as well as the stateless + algorithmic address & transport layer port mapping algorithm + defined for MAP-E. + + This feature indicates that the network element can function as + one or more MAP-T softwire instances."; reference - "RFC7597 & RFC7599"; + "RFC7599: Mapping of Address and Port using Translation (MAP-T)"; } container br-instances { description - "BR Instances"; - + "BR instances enabled in a network element."; choice br-type { description "Select binding or algorithmic BR functionality."; case binding { - if-feature binding; + if-feature "binding-mode"; container binding { - if-feature binding; description - "lw4over6 (binding table) configuration."; - list br-instance { - key "id"; + "binding mechanism (binding table) configuration."; + list bind-instance { + key "name"; description - "A set of lwAFTRs to be configured."; + "A set of binding instances to be configured."; + leaf name { + type string; + mandatory true; + description + "The name for the binding BR. It is used to uniquely + distinguish a binding instance by its name."; + } container binding-table-versioning { - description "binding table's version"; - leaf version{ + description + "binding table's version"; + leaf version { type uint64; - description "Incremental version number of the binding - table"; + description + "Version number for this binding table."; } leaf date { type yang:date-and-time; - description "Timestamp of the binding - table"; + description + "Timestamp when the binding table was activated. + + A binding instance may be provided with binding + entries that may change in time (e.g., increase + the size of the port set). When a party who is the + victim of abuse presents an external IP address/port, + the version of the binding table is important + because depending on the version, a distinct customer + may be identified. + + The timestamp is used as a key to find the + appropriate binding table that was put into effect + when an abuse occurred."; + reference + "RFC7422: Deterministic Address Mapping to Reduce + Logging in Carrier-Grade NAT Deployments"; } } - leaf id { - type uint32; - mandatory true; - description "An instance identifier."; - } - leaf name { - type string; - description "The name for the lwaftr."; - } - leaf softwire-num-threshold { - type uint32; + leaf softwire-num-max { + type uint32 { + range "1..max"; + } mandatory true; description - "The maximum number of softwires that can be created on - the lwAFTR."; + "The maximum number of softwires that can be created + on the binding BR."; } - leaf softwires-payload-mtu { + leaf softwire-payload-mtu { type uint16; - units bytes; + units "bytes"; mandatory true; description - "The payload MTU for Lightweight 4over6 softwire."; + "The payload IPv4 MTU for binding softwire."; } leaf softwire-path-mru { type uint16; - units bytes; + units "bytes"; mandatory true; description - "The path MRU for Lightweight 4over6 softwire."; + "The path MRU for binding softwire."; + reference + "RFC4213: Basic Transition Mechanisms for IPv6 Hosts + and Routers"; } leaf enable-hairpinning { type boolean; - default true; + default "true"; description "Enables/disables support for locally forwarding - (hairpinning) traffic between two CEs (RFC7596 - Section 6.2)"; + (hairpinning) traffic between two CEs."; + reference "Section 6.2 of RFC7596"; } container binding-table { - description "binding table"; + description + "binding table"; list binding-entry { key "binding-ipv6info"; - description "binding entry"; - uses softwire-common:binding-entry; + description + "binding entry"; + uses binding-entry; } } container icmp-policy { description - "The lwAFTR can be configured to process or drop incoming ICMP - messages, and to generate outgoing ICMP error messages or - not."; - + "The binding BR can be configured to process or drop + incoming ICMP messages, and to generate outgoing ICMP + error messages."; container icmpv4-errors { description "ICMPv4 error processing configuration"; leaf allow-incoming-icmpv4 { type boolean; - default true; + default "true"; description - "Whether to allow processing of incoming ICMPv4 packets. - (RFC7596 )"; + "Enables the processing of incoming ICMPv4 + packets."; + reference + "RFC7596: Lightweight 4over6: An Extension to + the Dual-Stack Lite Architecture"; + } + leaf icmpv4-rate { + type uint32; + description + "Rate limit threshold in messages per-second + for processing incoming ICMPv4 errors messages"; } - leaf generate-icmpv4-errors { type boolean; - default true; + default "true"; description - "Whether to generate outgoing ICMP error messages on - receipt of an inbound IPv4 packet with no matching - binding table entry (RFC7596 Seciton 5.2)."; + "Enables the generation of outgoing ICMPv4 error + messages on receipt of an inbound IPv4 packet + with no matching binding table entry."; + reference "Seciton 5.2 of RFC7596."; } } - container icmpv6-errors { description "ICMPv6 error processing configuration"; leaf generate-icmpv6-errors { type boolean; - default true; + default "true"; description - "Whether to generate ICMPv6 errors messages if no - matching binding table entry is found (RFC7596 - Section 6.2)"; + "Enables the generation of ICMPv6 error messages if + no matching binding table entry is found for a + received packet."; + reference "Section 6.2 of RFC7596."; } - leaf icmpv6-errors-rate { - type uint16; + leaf icmpv6-rate { + type uint32; description "Rate limit threshold in messages per-second - for sending ICMPv6 errors messages (RFC7596 - Section 9.)"; + for sending ICMPv6 errors messages"; + reference "Section 9 of RFC7596."; } } } - container traffic-stat { config false; description - "traffic-stat"; + "Traffic statistics information for the BR."; + leaf discontinuity-time { + type yang:date-and-time; + mandatory true; + description + "The time of the most recent occasion on which the BR + instance suffered a discontinuity. This must be + initialized when the BR instance is configured + or rebooted."; + } uses softwire-common:traffic-stat; - - leaf hairpin-ipv4-bytes { + leaf dropped-icmpv4-packets { type yang:zero-based-counter64; - description "IPv4 packets locally routed between two CEs - (hairpinned)."; + description + "ICMPv4 packets that are dropped as a result + of the ICMP policy. Typically, this can be any + incoming ICMPv4 packets if ICMPv4 processing is + disabled or incoming ICMPv4 packets that exceed + the ICMPv4 rate-limit threshold. + + Discontinuities in the value of this counter can + occur at re-initialization of the management + system, and at other times as indicated by + the value of 'discontinuity-time'."; + } + leaf dropped-icmpv4-bytes { + type yang:zero-based-counter64; + description + "ICMPv4 messages, in bytes, that are dropped as + a result of the ICMP policy. Typically, it + can be any incoming ICMPv4 packets if ICMPv4 + processing is disabled or incoming ICMPv4 + packets that exceed the ICMPv4 rate-limit + threshold. + + Discontinuities in the value of this counter can + occur at re-initialization of the management + system, and at other times as indicated by + the value of 'discontinuity-time'."; } - leaf hairpin-ipv4-packets { type yang:zero-based-counter64; - description "IPv4 bytes locally routed between two CEs - (hairpinned)."; + description + "IPv4 packets locally routed between two CEs + (hairpinned). + + Discontinuities in the value of this counter can + occur at re-initialization of the management + system, and at other times as indicated by + the value of 'discontinuity-time'."; } + leaf hairpin-ipv4-bytes { + type yang:zero-based-counter64; + description + "IPv4 bytes locally routed between two CEs + (hairpinned). + Discontinuities in the value of this counter can + occur at re-initialization of the management + system, and at other times as indicated by + the value of 'discontinuity-time'."; + } leaf active-softwire-num { type uint32; + config false; description "The number of currently active softwires on the - lw4over6 (binding) instance."; + binding instance. + + Discontinuities in the value of this counter can + occur at re-initialization of the management + system, and at other times as indicated by + the value of 'discontinuity-time'."; } } } } } - case algorithm { - if-feature algorithm; + case algo { + if-feature "map-e or map-t"; container algorithm { - if-feature algorithm; description - "Indicate the instances support the MAP-E and MAP-T function. - The instances advertise the map-e/map-t feature through the - capability exchange mechanism when a NETCONF session is - established."; + " A set of parameters used for MAP-E/MAP-T."; list algo-instance { - key "id"; - description "Instances of algorithm"; - leaf id { - type uint32; - mandatory true; - description "id"; - } + key "name"; + description + "Instances of algorithm"; leaf name { type string; - description "The MAP instance name."; - } - uses softwire-common:algorithm { - augment "algo-instances/algo-instance"{ - description "Augments the port-set group for the algorithm."; - uses softwire-common:port-set; + mandatory true; + description + "The name is used to uniquely identify an algorithm + instance. + + This name can be automatically assigned + or explicitly configured."; } + uses softwire-common:algorithm-instance; + container port-set { + description + "Indicates a set of ports."; + uses port-set; } container traffic-stat { + config false; description - "traffic-stat"; + "Traffic statistics information for the BR."; + leaf discontinuity-time { + type yang:date-and-time; + mandatory true; + description + "The time of the most recent occasion on which the BR + instance suffered a discontinuity. This must be + reset to the current date-and-time when the BR + instance is configured or rebooted."; + } uses softwire-common:traffic-stat; } } @@ -310,79 +499,84 @@ module ietf-softwire-br { /* * Notifications */ - notification softwire-br-event { - if-feature binding; - description "Notifications for BR."; - leaf br-id { + + notification softwire-binding-instance-event { + if-feature "binding-mode"; + description + "Notifications for binding instance when an entry is + added, modified, or is not valid anymore."; + leaf bind-name { type leafref { - path - "/br-instances/binding/" - + "br-instance/id"; + path "/br-instances/binding/bind-instance/name"; } - description "..."; + description + "The name of the binding-instance that + generated the notification."; } leaf-list invalid-entry { type leafref { path "/br-instances/binding/" - + "br-instance[id=current()/../br-id]/" + + "bind-instance[name=current()/../bind-name]/" + "binding-table/binding-entry/binding-ipv6info"; } description - "Notify the client that a specific binding entry has been - expired/invalid. The binding-ipv6info identifies an entry."; + "Notify the client that a specific binding entry has + expired or is invalid. The binding-ipv6info identifies + an entry."; } leaf-list added-entry { type inet:ipv6-address; description "Notify the client that a binding entry has been added. - The ipv6 address of that entry is the index. The client - get other information from the lwaftr about the entry - indexed by that ipv6 address. - "; + The ipv6 address of that entry is the index. The client + gets other information from the binding BR about the entry + indexed by that ipv6 address."; } leaf-list modified-entry { type leafref { path "/br-instances/binding/" - + "br-instance[id=current()/../br-id]/" + + "bind-instance[name=current()/../bind-name]/" + "binding-table/binding-entry/binding-ipv6info"; } - description "..."; + description + "The binding-table entry that has been modified."; } } - notification softwire-algorithm-instance-event { - if-feature algorithm; - description "Notifications for MAP-E or MAP-T."; - leaf algo-id { + if-feature "map-e or map-t"; + description + "Notifications for algorithm instance when an entry is + added, modified, or is not valid anymore."; + leaf algo-name { type leafref { - path - "/br-instances/algorithm/algo-instance/id"; + path "/br-instances/algorithm/algo-instance/name"; } mandatory true; - description "MAP-E or MAP-T event."; + description + "algorithmic instance event."; } - leaf-list invalid-entry-id { + leaf-list invalid-entry { type leafref { - path - "/br-instances/algorithm/algo-instance/id"; + path "/br-instances/algorithm/algo-instance/name"; } - description "Invalid entry event."; + description + "Invalid entry event."; } leaf-list added-entry { type leafref { - path - "/br-instances/algorithm/algo-instance/id"; + path "/br-instances/algorithm/algo-instance/name"; } - description "Added entry."; + description + "Added entry."; } leaf-list modified-entry { type leafref { - path - "/br-instances/algorithm/algo-instance/id"; + path "/br-instances/algorithm/algo-instance/name"; } - description "Modified entry."; + description + "Modified entry."; } } } diff --git a/src/lib/yang/ietf-softwire-common.yang b/src/lib/yang/ietf-softwire-common.yang index b375717627..c563fe55c2 100644 --- a/src/lib/yang/ietf-softwire-common.yang +++ b/src/lib/yang/ietf-softwire-common.yang @@ -1,298 +1,400 @@ module ietf-softwire-common { yang-version 1.1; namespace "urn:ietf:params:xml:ns:yang:ietf-softwire-common"; - prefix "softwire-common"; + prefix softwire-common; - import ietf-inet-types { prefix inet; } - import ietf-yang-types { prefix yang; } - - organization "Softwire Working Group"; + import ietf-inet-types { + prefix inet; + reference "Section 4 of RFC 6991"; + } + import ietf-yang-types { + prefix yang; + reference "Section 3 of RFC 6991"; + } + organization + "IETF Softwire Working Group"; contact - " - Qi Sun - Hao Wang - Yong Cui - Ian - Sladjana Zoric - Mohamed Boucadair - Rajiv - "; + "WG Web: + WG List: + + Author: Qi Sun + + + Author: Linhui Sun + + + Author: Yong Cui + + + Editor: Ian Farrer + + Author: Sladjana Zoric + + + Editor: Mohamed Boucadair + + Author: Rajiv Asati + "; description - "This document defines a YANG data model for the configuration and - management of A+P Softwire Customer Premises Equipment (CEs). It - covers Lightweight 4over6, MAP-E and MAP-T mechanisms. + "This document defines a YANG module defining types + common to all A+P modules. - Copyright (c) 2017 IETF Trust and the persons identified - as authors of the code. All rights reserved. - This version of this YANG module is part of RFC XXX; see the RFC - itself for full legal notices."; + Copyright (c) 2019 IETF Trust and the persons identified as + authors of the code. All rights reserved. - revision 2017-10-19 { + Redistribution and use in source and binary forms, with or + without modification, is permitted pursuant to, and subject + to the license terms contained in, the Simplified BSD License + set forth in Section 4.c of the IETF Trust's Legal Provisions + Relating to IETF Documents + (http://trustee.ietf.org/license-info). + + This version of this YANG module is part of RFC XXXX; see + the RFC itself for full legal notices."; + + revision 2019-01-11 { description - "Initial version of containing a model for common softwire elements."; - reference "-02"; + "Initial revision."; + reference + "RFC XXXX: YANG Modules for IPv4-in-IPv6 Address plus Port + Softwires"; } - /* - * Grouping - */ - - grouping port-set { + feature map-e { description - "Indicates a set of ports. - It may be a simple port range, or use the PSID algorithm - to represent a range of transport layer ports which will - be used by a NAPT."; - - leaf psid-offset { - type uint8 { - range 0..16; - } - description - "The number of offset bits. In Lightweight 4over6, - the default value is 0 for assigning one contiguous - port range. In MAP-E/T, the default value is 6, - which means the system ports (0-1023) are excluded by - default and assigns port ranges distributed across the - entire port space, depending on either psid-len or the - number of contiguous ports."; - } + "MAP-E is an IPv6 transition mechanism for transporting IPv4 + packets across an IPv6 network using IP encapsulation. MAP-E + allows for a reduction of the amount of centralized state using + rules to express IPv4/IPv6 address mappings. This introduces an + algorithmic relationship between the IPv6 subnet and IPv4 + address. - leaf psid-len { - type uint8 { - range 0..15; - } - mandatory true; - description - "The length of PSID, representing the sharing - ratio for an IPv4 address. This, along with ea-len, also - helps to calculate the number of contiguous ports per - port range"; - } + This feature indicates that the network element can function as + one or more MAP-E softwire instances."; + reference + "RFC7597: Mapping of Address and Port with Encapsulation (MAP-E)"; + } - leaf psid { - type uint16; - mandatory true; - description - "Port Set Identifier (PSID) value, which - identifies a set of ports algorithmically."; - } + feature map-t { + description + "MAP-T is an IPv6 transition mechanism for transporting IPv4 + packets across an IPv6 network using IP translation. It leverages + a double stateless NAT64 based solution as well as the stateless + algorithmic address & transport layer port mapping algorithm + defined for MAP-E. + + This feature indicates that the network element can function as + one or more MAP-T softwire instances."; + reference + "RFC7599: Mapping of Address and Port using Translation (MAP-T)"; } - grouping binding-entry { + /* + * Groupings + */ + + grouping algorithm-instance { description - "The lwAFTR maintains an address binding table that contains - the binding between the lwB4's IPv6 address, the allocated IPv4 - address and restricted port-set."; - leaf binding-ipv6info { - type union { - type inet:ipv6-address; - type inet:ipv6-prefix; - } - description - "The IPv6 information for a binding entry. - If this is an IPv6 prefix, it indicates that - the IPv6 source address of the CE is constructed - according to the description in RFC7596; - if it is an IPv6 address, it means the CE uses - any /128 address from the assigned CE prefix. - "; - } - leaf binding-ipv4-addr { - type inet:ipv4-address; + "A collection of parameters that is used fro MAP-E/MAP-T."; + leaf enable { + type boolean; description - "The IPv4 address assigned to the lwB4, which is - used as the IPv4 external address - for lwB4 local NAPT44."; + "Enable/disable an individual MAP-E or MAP-T rule."; } - container port-set { + container algo-versioning { description - "For Lightweight 4over6, the default value - of offset should be 0, to configure one contiguous - port range."; - uses port-set { - refine "psid-offset" { - default "0"; - } + "Version number for this algorithm instance"; + leaf version { + type uint64; + description + "A version number for the mapping algorithm + rules provided to the algorithm instance"; } - } - leaf br-ipv6-addr { - type inet:ipv6-address; - description - "The IPv6 address for lwaftr."; - } - } + leaf date { + type yang:date-and-time; + description + "Timestamp when the algorithm instance was activated. - grouping algorithm { - description - "Indicate the instances support the MAP-E and MAP-T function. - The instances advertise the map-e feature through the - capability exchange mechanism when a NETCONF session is - established."; - container algo-instances { + An algorithm instance may be provided with mapping + rules that may change in time (for example, increase + the size of the port set). When a party who is the victim + of abuse presents an external IP address/port, the version + of the algorithm is important because depending on + the version, a distinct customer may be identified. + + The timestamp is used as a key to find the appropriate + algorithm that was put into effect when an abuse + occurred. "; + reference + "RFC7422: Deterministic Address Mapping to Reduce + Logging in Carrier-Grade NAT Deployments"; + } + } + choice data-plane { description - "A set of MAP-E or MAP-T instances to be configured, - applying to BRs and CEs. A MAP-E/T instance defines a MAP - domain comprising one or more MAP-CE and MAP-BR"; - list algo-instance { - key "id"; - description "MAP forwarding rule instance for MAP-E/MAP-T"; - leaf enable { - type boolean; - description - "Enable/disable individual MAP-E or MAP-T rule."; - } - container algo-versioning { - description "algorithm's version"; - leaf version { - type uint64; - description "Incremental version number for the algorithm"; - } - leaf date { - type yang:date-and-time; - description "Timestamp to the algorithm"; - } - } - leaf id { - type uint32; - mandatory true; - description "Algorithm Instance ID"; - } - leaf name { - type string; - description "The name for the instance."; - } - choice data-plane { - description "Selects MAP-E (encapsulation) or MAP-T (translation)"; - case encapsulation { - description "encapsulation for MAP-E"; - leaf br-ipv6-addr { - type inet:ipv6-address; - mandatory true; - description - "The IPv6 address of the MAP-E BR."; - } - } - case translation { - description "translation for MAP-T"; - leaf dmr-ipv6-prefix { - type inet:ipv6-prefix; - description - "The IPv6 prefix of the MAP-T BR. "; - } - } - } - leaf ea-len { - type uint8; + "Selects MAP-E (encapsulation) or MAP-T + (translation)"; + case encapsulation { + if-feature "map-e"; + description + "encapsulation for MAP-E"; + leaf br-ipv6-addr { + type inet:ipv6-address; mandatory true; description - "Embedded Address (EA) bits are the IPv4 EA-bits in the IPv6 - address identify an IPv4 prefix/address (or part thereof) or - a shared IPv4 address (or part thereof) and a port-set identifier. - The length of the EA-bits is defined as part of a MAP rule for a - MAP domain."; + "The IPv6 address of the MAP-E BR."; } - leaf rule-ipv6-prefix { + } + case translation { + if-feature "map-t"; + description + "translation for MAP-T"; + leaf dmr-ipv6-prefix { type inet:ipv6-prefix; - mandatory true; - description - "The Rule IPv6 prefix defined in the mapping rule."; - } - leaf rule-ipv4-prefix { - type inet:ipv4-prefix; - mandatory true; description - "The Rule IPv4 prefix defined in the mapping rule."; - } - leaf forwarding { - type boolean; - mandatory true; - description - "This parameter specifies whether the rule may be used for - forwarding (FMR). If set, this rule is used as an FMR; - if not set, this rule is a BMR only and must not be used - for forwarding."; + "The IPv6 prefix of the MAP-T BR."; } } } + leaf ea-len { + type uint8; + mandatory true; + description + "Embedded Address (EA) bits are the IPv4 EA-bits in the IPv6 + address identifying an IPv4 prefix/address (or part thereof) + or a shared IPv4 address (or part thereof) and a port-set + identifier. The length of the EA-bits is defined as part of + a MAP rule for a MAP domain."; + } + leaf rule-ipv6-prefix { + type inet:ipv6-prefix; + mandatory true; + description + "The Rule IPv6 prefix defined in the mapping rule."; + } + leaf rule-ipv4-prefix { + type inet:ipv4-prefix; + mandatory true; + description + "The Rule IPv4 prefix defined in the mapping rule."; + } + leaf forwarding { + type boolean; + mandatory true; + description + "This parameter specifies whether the rule may be used for + forwarding (FMR). If set, this rule is used as an FMR; + if not set, this rule is a Basic Mapping Rule (BMR) only + and must not be used for forwarding."; + } } grouping traffic-stat { - description "Traffic statistics"; - leaf sent-ipv4-packet { + description + "Traffic statistics"; + leaf sent-ipv4-packets { type yang:zero-based-counter64; - description "Number of decapsulated/translated IPv4 packets sent."; + description + "Number of decapsulated and forwarded IPv4 packets. + + Discontinuities in the value of this counter can occur + at re-initialization of the management system, and at + other times as indicated by the value of + 'discontinuity-time'."; } - leaf sent-ipv4-byte { + leaf sent-ipv4-bytes { type yang:zero-based-counter64; - description "Decapsulated/translated IPv4 traffic sent, in bytes"; + description + "Decapsulated/translated IPv4 traffic sent, in bytes + + Discontinuities in the value of this counter can occur + at re-initialization of the management system, and at + other times as indicated by the value of + 'discontinuity-time'."; } - leaf sent-ipv6-packet { + leaf sent-ipv6-packets { type yang:zero-based-counter64; - description "Number of encapsulated/translated IPv6 packets sent."; + description + "Number of encapsulated IPv6 packets sent. + + Discontinuities in the value of this counter can occur + at re-initialization of the management system, and at + other times as indicated by the value of + 'discontinuity-time'."; } - leaf sent-ipv6-byte { + leaf sent-ipv6-bytes { type yang:zero-based-counter64; - description "Encapsulated/translated IPv6 traffic sent, in bytes"; + description + "Encapsulated IPv6 traffic sent, in bytes + + Discontinuities in the value of this counter can occur + at re-initialization of the management system, and at + other times as indicated by the value of + 'discontinuity-time'."; } - leaf rcvd-ipv4-packet { + leaf rcvd-ipv4-packets { type yang:zero-based-counter64; - description "Number of IPv4 packets received for processing."; + description + "Number of IPv4 packets received. + + Discontinuities in the value of this counter can occur + at re-initialization of the management system, and at + other times as indicated by the value of + 'discontinuity-time'."; } - leaf rcvd-ipv4-byte { + leaf rcvd-ipv4-bytes { type yang:zero-based-counter64; - description "IPv4 traffic received for processing, in bytes"; + description + "IPv4 traffic received, in bytes. + + Discontinuities in the value of this counter can occur + at re-initialization of the management system, and at + other times as indicated by the value of + 'discontinuity-time'."; } - leaf rcvd-ipv6-packet { + leaf rcvd-ipv6-packets { type yang:zero-based-counter64; - description "Number of IPv6 packets received for processing."; + description + "Number of IPv4-in-IPv6 packets received. + + Discontinuities in the value of this counter can occur + at re-initialization of the management system, and at + other times as indicated by the value of + 'discontinuity-time'."; } - leaf rcvd-ipv6-byte { + leaf rcvd-ipv6-bytes { type yang:zero-based-counter64; - config false; - description "IPv6 traffic received for processing, in bytes"; + description + "IPv4-in-IPv6 traffic received, in bytes. + + Discontinuities in the value of this counter can occur + at re-initialization of the management system, and at + other times as indicated by the value of + 'discontinuity-time'."; } - leaf dropped-ipv4-packet { + leaf dropped-ipv4-packets { type yang:zero-based-counter64; - description "Number of IPv4 packets dropped."; + description + "Number of IPv4 packets dropped at the + Internet-facing interface. + + Discontinuities in the value of this counter can occur + at re-initialization of the management system, and at + other times as indicated by the value of + 'discontinuity-time'."; } - leaf dropped-ipv4-byte { + leaf dropped-ipv4-bytes { type yang:zero-based-counter64; - description "IPv4traffic dropped, in bytes"; + description + "IPv4 traffic dropped at the Internet-facing + interface, in bytes. + + Discontinuities in the value of this counter can occur + at re-initialization of the management system, and at + other times as indicated by the value of + 'discontinuity-time'."; } - leaf dropped-ipv6-packet { + leaf dropped-ipv6-packets { type yang:zero-based-counter64; - description "Number of IPv4 packets dropped."; + description + "Number of IPv4-in-IPv6 packets dropped. + + Discontinuities in the value of this counter can occur + at re-initialization of the management system, and at + other times as indicated by the value of + 'discontinuity-time'."; } - leaf dropped-ipv6-byte { + leaf dropped-ipv6-bytes { type yang:zero-based-counter64; - description "IPv4 traffic dropped, in bytes"; + description + "IPv4-in-IPv6 traffic dropped, in bytes. + + Discontinuities in the value of this counter can occur + at re-initialization of the management system, and at + other times as indicated by the value of + 'discontinuity-time'."; } leaf dropped-ipv4-fragments { type yang:zero-based-counter64; - description "Number of fragmented IPv4 packets dropped"; + description + "Number of fragmented IPv4 packets dropped. + + Discontinuities in the value of this counter can occur + at re-initialization of the management system, and at + other times as indicated by the value of + 'discontinuity-time'."; } - leaf dropped-ipv4-bytes { + leaf dropped-ipv4-fragment-bytes { type yang:zero-based-counter64; - description "Fragmented IPv4 traffic dropped, in bytes"; + description + "Fragmented IPv4 traffic dropped, in bytes. + + Discontinuities in the value of this counter can occur + at re-initialization of the management system, and at + other times as indicated by the value of + 'discontinuity-time'."; } leaf ipv6-fragments-reassembled { type yang:zero-based-counter64; - description "Number of IPv6 fragments successfully reassembled"; + description + "Number of IPv6 fragments successfully reassembled. + + Discontinuities in the value of this counter can occur + at re-initialization of the management system, and at + other times as indicated by the value of + 'discontinuity-time'."; } leaf ipv6-fragments-bytes-reassembled { type yang:zero-based-counter64; - description "IPv6 fragments successfully reassembled, in bytes"; + description + "IPv6 fragments successfully reassembled, in bytes. + + Discontinuities in the value of this counter can occur + at re-initialization of the management system, and at + other times as indicated by the value of + 'discontinuity-time'."; } leaf out-icmpv4-error-packets { type yang:zero-based-counter64; - description "Internally generated ICMPv4 error packets."; + description + "Internally generated ICMPv4 error packets. + + Discontinuities in the value of this counter can occur + at re-initialization of the management system, and at + other times as indicated by the value of + 'discontinuity-time'."; + } + leaf out-icmpv4-error-bytes { + type yang:zero-based-counter64; + description + "Internally generated ICMPv4 error messages, in bytes. + + Discontinuities in the value of this counter can occur + at re-initialization of the management system, and at + other times as indicated by the value of + 'discontinuity-time'."; } leaf out-icmpv6-error-packets { type yang:zero-based-counter64; - description "Internally generted ICMPv6 error packets."; + description + "Internally generated ICMPv6 error packets. + + Discontinuities in the value of this counter can occur + at re-initialization of the management system, and at + other times as indicated by the value of + 'discontinuity-time'."; + } + leaf out-icmpv6-error-bytes { + type yang:zero-based-counter64; + description + "Internally generated ICMPv6 error messages, in bytes. + + Discontinuities in the value of this counter can occur + at re-initialization of the management system, and at + other times as indicated by the value of + 'discontinuity-time'."; } } } diff --git a/src/lib/yang/schema.lua b/src/lib/yang/schema.lua index ac57cdbf8f..3bdca43bbc 100644 --- a/src/lib/yang/schema.lua +++ b/src/lib/yang/schema.lua @@ -5,6 +5,7 @@ local lib = require("core.lib") local mem = require("lib.stream.mem") local parser = require("lib.yang.parser") local util = require("lib.yang.util") +local maxpc = require("lib.maxpc") local function error_with_loc(loc, msg, ...) error(string.format("%s: "..msg, loc, ...)) @@ -446,7 +447,7 @@ local function init_refine(node, loc, argument, children) end local function init_revision(node, loc, argument, children) -- TODO: parse date - node.value = require_argument(loc, argument) + node.date = require_argument(loc, argument) node.description = maybe_child_property(loc, children, 'description', 'value') node.reference = maybe_child_property(loc, children, 'reference', 'value') end @@ -640,6 +641,110 @@ function set_default_capabilities(capabilities) end end +-- Parse/interpret YANG 1.1 if-feature expressions +-- https://tools.ietf.org/html/rfc7950#section-7.20.2 +local if_feature_expr_parser = (function () + local match, capture, combine = maxpc.import() + local refs = {} + local function ref (s) return function (...) return refs[s](...) end end + local function wsp_lf() + return combine._or(match.equal(' '), match.equal('\t'), + match.equal('\n'), match.equal('\r')) + end + local function sep() return combine.some(wsp_lf()) end + local function optsep() return combine.any(wsp_lf()) end + local function keyword(s) return match.string(s) end + local function identifier() + -- [a-zA-Z_][a-zA-Z0-9_-.:]+ + local alpha_ = match.satisfies(function (x) + return ("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_") + :find(x, 1, true) + end) + local digit_punct = match.satisfies(function (x) + return ("0123456789-."):find(x, 1, true) + end) + return capture.subseq( + match.seq(alpha_, combine.any(combine._or(alpha_, digit_punct))) + ) + end + local function identifier_ref() + local idref = capture.seq( + identifier(), + combine.maybe(match.equal(":")), combine.maybe(identifier()) + ) + local function ast_idref (mod_or_id, _, id) + return {'feature', id or mod_or_id, id and mod_or_id or nil} + end + return capture.unpack(idref, ast_idref) + end + local function if_feature_not() + local not_feature = capture.seq( + keyword'not', sep(), ref'if_feature_factor' + ) + local function ast_not (_, _, fact) return {'not', fact} end + return capture.unpack(not_feature, ast_not) + end + local function if_feature_subexpr() + local subexpr = capture.seq( + match.equal("("), optsep(), ref'if_feature_expr', optsep(), match.equal(")") + ) + local function ast_subexpr (_, _, expr) return {'subexpr', expr} end + return capture.unpack(subexpr, ast_subexpr) + end + local function if_feature_factor () + return combine._or( + if_feature_not(), if_feature_subexpr(), identifier_ref() + ) + end + refs.if_feature_factor = if_feature_factor() + local function if_feature_and() + local and_feature = capture.seq( + if_feature_factor(), sep(), keyword'and', sep(), ref'if_feature_term' + ) + local function ast_and (a, _, _, _, b) return {'and', a, b} end + return capture.unpack(and_feature, ast_and) + end + local function if_feature_term() + return combine._or(if_feature_and(), if_feature_factor()) + end + refs.if_feature_term = if_feature_term() + local function if_feature_or() + local or_feature = capture.seq( + if_feature_term(), sep(), keyword'or', sep(), ref'if_feature_expr' + ) + local function ast_or (a, _, _, _, b) return {'or', a, b} end + return capture.unpack(or_feature, ast_or) + end + local function if_feature_expr() + return combine._or(if_feature_or(), if_feature_term()) + end + refs.if_feature_expr = if_feature_expr() + return refs.if_feature_expr +end)() + +local function parse_if_feature_expr(expr) + local ast, success, eof = maxpc.parse(expr, if_feature_expr_parser) + assert(success and eof, "Error parsing if-feature-expression: "..expr) + return ast +end + +local function interpret_if_feature(expr, has_feature_p) + local function interpret (ast) + local op, a, b = unpack(ast) + if op == 'feature' then + return has_feature_p(a, b) + elseif op == 'or' then + if interpret(a) then return true + else return interpret(b) end + elseif op == 'and' then + return interpret(a) and interpret(b) + elseif op == 'subexpr' then + return interpret(a) + end + end + return interpret(parse_if_feature_expr(expr)) +end + -- Inline "grouping" into "uses". -- Inline "submodule" into "include". -- Inline "imports" into "module". @@ -771,6 +876,13 @@ function resolve(schema, features) node[prop] = shallow_copy(node[prop]) for k,v in pairs(node[prop]) do node[prop][k] = visit(v, env) end end + local last_revision = nil + for _,revision in ipairs(node.revisions) do + if last_revision == nil or last_revision < revision.date then + last_revision = revision.date + end + end + node.last_revision = last_revision end if node.kind == 'rpc' then if node.input then node.input = visit(node.input, env) end @@ -787,15 +899,22 @@ function resolve(schema, features) node.unavailable = true end end - for _,feature in ipairs(pop_prop(node, 'if_features') or {}) do - local feature_node = lookup_lazy(env, 'features', feature) - if node.kind == 'feature' then - -- This is a feature that depends on a feature. These we - -- keep in the environment but if the feature is - -- unavailable, we mark it as such. - local mod, id = feature_node.module_id, feature_node.id - if not (features[mod] or {})[id] then node.unavailable = true end - elseif feature_node.unavailable then + for _,expr in ipairs(pop_prop(node, 'if_features') or {}) do + local function resolve_feature (feature, mod) + assert(not mod, "NYI: module qualified features in if-feature expression") + local feature_node = lookup_lazy(env, 'features', feature) + if node.kind == 'feature' then + -- This is a feature that depends on a feature. These we + -- keep in the environment but if the feature is + -- unavailable, we mark it as such. + local mod, id = feature_node.module_id, feature_node.id + if (features[mod] or {})[id] then return true + else node.unavailable = true end + elseif not feature_node.unavailable then + return true + end + end + if not interpret_if_feature(expr, resolve_feature) then return nil, env end end @@ -1094,12 +1213,13 @@ function selftest() assert(schema.contact == "John Smith fake@person.tld") assert(schema.organization == "Fruit Inc.") assert(schema.description == "Module to test YANG schema lib") + assert(schema.last_revision == "2016-05-28") -- Check all revisions are accounted for. assert(schema.revisions[1].description == "Revision 1") - assert(schema.revisions[1].value == "2016-05-27") + assert(schema.revisions[1].date == "2016-05-27") assert(schema.revisions[2].description == "Revision 2") - assert(schema.revisions[2].value == "2016-05-28") + assert(schema.revisions[2].date == "2016-05-28") -- Check that the feature statements are in the exports interface -- but not the schema itself. @@ -1150,7 +1270,7 @@ function selftest() -- capabilities, for now, assuming tests are run independently from -- programs. local caps = get_default_capabilities() - local new_caps = { ['ietf-softwire-br'] = {feature={'binding'}} } + local new_caps = { ['ietf-softwire-br'] = {feature={'binding-mode'}} } for mod_name, mod_caps in pairs(new_caps) do if not caps[mod_name] then caps[mod_name] = {feature={}} end for _,feature in ipairs(mod_caps.feature) do @@ -1166,10 +1286,12 @@ function selftest() local br = load_schema_by_name('ietf-softwire-br') local binding = br.body['br-instances'].body['br-type'].body['binding'] assert(binding) - local bt = binding.body['binding'].body['br-instance'].body['binding-table'] + local bt = binding.body['binding'].body['bind-instance'].body['binding-table'] assert(bt) local ps = bt.body['binding-entry'].body['port-set'] assert(ps) + local alg = br.body['br-instances'].body['br-type'].body['algorithm'] + assert(not alg) -- The binding-entry grouping is defined in ietf-softwire-common and -- imported by ietf-softwire-br, but with a refinement that the -- default is 0. Test that the refinement was applied. @@ -1221,5 +1343,16 @@ function selftest() -- Test Range with explicit value. assert(lib.equal(parse_range_or_length_arg(nil, nil, "42"), {{42, 42}})) + -- Parsing/interpreting if-feature expressions + local function test_features (i, m) + local f = { b_99 = { ["c.d"] = true }, + [0] = { bar = true } } + return f[m or 0][i] + end + local expr = "baz and foo or bar and (a or b_99:c.d)" + assert(interpret_if_feature(expr, test_features)) + assert(not interpret_if_feature("boo", test_features)) + assert(not interpret_if_feature("baz or foo", test_features)) + print('selftest: ok') end diff --git a/src/lib/yang/snabb-softwire-v2.yang b/src/lib/yang/snabb-softwire-v2.yang index 5341b4c3d2..bd14a9e760 100644 --- a/src/lib/yang/snabb-softwire-v2.yang +++ b/src/lib/yang/snabb-softwire-v2.yang @@ -11,6 +11,16 @@ module snabb-softwire-v2 { description "Configuration for the Snabb Switch lwAFTR."; + revision 2019-09-17 { + description + "Add discontinuity time to softwire-state."; + } + + revision 2018-10-13 { + description + "Add flow-label setting."; + } + revision 2017-04-17 { description "Removal of br-address leaf-list and br leaf. It adds the @@ -40,6 +50,15 @@ module snabb-softwire-v2 { description "State data about interface."; config false; + leaf discontinuity-time { + type yang:date-and-time; + mandatory true; + description + "The time of the most recent occasion on which the lwaftr instance + suffered a discontinuity. This is set to the current time whenever + the lwaftr instance is started or configured."; + } + leaf drop-all-ipv4-iface-bytes { type yang:zero-based-counter64; description diff --git a/src/lib/yang/yang.lua b/src/lib/yang/yang.lua index d15839b336..353f1db272 100644 --- a/src/lib/yang/yang.lua +++ b/src/lib/yang/yang.lua @@ -80,15 +80,18 @@ function load_configuration(filename, opts) return end local compiled = result - if opts.schema_name and opts.schema_name ~= compiled.schema_name then + local expected_revision = opts.revision_date or + load_schema_by_name(opts.schema_name).last_revision + if opts.schema_name ~= compiled.schema_name then log('expected schema name %s in compiled file, but got %s', opts.schema_name, compiled.schema_name) return end - if opts.revision_date and opts.revision_date ~= schema.revision_date then - log('expected schema revision date %s in compiled file, but got %s', - opts.revision_date, compiled.revision_date) + if expected_revision ~= compiled.revision_date then + log('expected schema revision date %s in compiled file, but got "%s"', + expected_revision, compiled.revision_date) return + else end if source_mtime then if (source_mtime.sec == compiled.source_mtime.sec and @@ -105,7 +108,10 @@ function load_configuration(filename, opts) end local source = assert(file.open(filename)) - if binary.has_magic(source) then return load_compiled(source) end + if binary.has_magic(source) then + local conf = load_compiled(source) + if conf then return conf end + end -- If the file doesn't have the magic, assume it's a source file. -- First, see if we compiled it previously and saved a compiled file diff --git a/src/program/lwaftr/doc/CHANGELOG.md b/src/program/lwaftr/doc/CHANGELOG.md index eb0d28e844..6635173ce4 100644 --- a/src/program/lwaftr/doc/CHANGELOG.md +++ b/src/program/lwaftr/doc/CHANGELOG.md @@ -1,12 +1,91 @@ # Change Log +## [2019.06.01] + +### Notable changes + + * Improve stability of receive-side scaling (RSS), in which multiple + CPU cores can service traffic on the same NIC. Previously, the + lwAFTR had a pathology whereby a transient error condition that could + cause one core to drop packets could then cause another core to + attempt to perform self-healing by re-optimizing its code, which + could then ping-pong back and cause the other core to try to + self-heal, and on and on forever. See + https://github.com/Igalia/snabb/pull/1229 and + https://github.com/snabbco/snabb/pull/1443 for more details. + + * Fix a problem whereby `snabb config add` would cause the lwAFTR to + crash after a few thousand softwire additions. See + https://github.com/Igalia/snabb/pull/1228. + + * Update the `ieee-softwire` compatibility layer for the native + `snabb-softwire-v2` Yang module, corresponding the latest changes in + the Internet Draft, + [`draft-ietf-softwire-yang-16`](https://datatracker.ietf.org/doc/draft-ietf-softwire-yang/16/). + + * Add counters and historical data records for how much memory a lwAFTR + process uses over time, for use in on-line and post-mortem system + diagnostics. See https://github.com/Igalia/snabb/pull/1228 for + details. + + * Add `snabb rrdcat` tool that can be used to identify when packet + drops occured in the past. See + https://github.com/Igalia/snabb/pull/1225 for details. + + * Incorporate changes from the upstream [Snabb 2019.06 + "Deltadromeus"](https://github.com/snabbco/snabb/releases/tag/v2019.01) + release. This finally includes a switch over to RaptorJIT, which + adds a number of on-line diagnostic tools that can be useful for + troubleshooting performance problems in production. + +## [2018.09.03] + +### Features + + * Add new "revision" declaration to snabb-softwire-v2 YANG module, + corresponding to addition of flow-label nodes back in version + 2018.09.01. No changes to the schema otherwise. + + * Add new performance diagnostics that will print warnings for common + system misconfigurations, such as missing `isolcpus` declarations or + the use of power-saving CPU frequency scaling strategies. These + warnings detect conditions which are described in the performance + tuning document. + + https://github.com/Igalia/snabb/pull/1212 + https://github.com/snabbco/snabb/blob/master/src/doc/performance-tuning.md + + * Improve `snabb lwaftr run --help` output. Try it out! + +### Bug fixes + + * Ingress drop monitor treats startup as part of grace period (10 + seconds by default), postponing the start of dropped packet detection + until after the system has settled down. + + https://github.com/Igalia/snabb/issues/1216 + https://github.com/Igalia/snabb/pull/1217 + + * Fix PCI/NUMA affinity diagnostics. + + https://github.com/Igalia/snabb/pull/1211 + + * New YANG schema revisions cause Snabb to recompile configurations. + + https://github.com/Igalia/snabb/pull/1209 + + * Re-enable NUMA binding on newer kernels (including the kernel used by + Ubuntu 18.04). + + https://github.com/Igalia/snabb/pull/1207 + ## [2018.09.02] ### Features * Add benchmarking test for 2 instances each with 2 queues (total of 4 queues). - + https://github.com/Igalia/snabb/pull/1206 ### Bug fixes diff --git a/src/program/lwaftr/run/README b/src/program/lwaftr/run/README index c5105c881c..62d0eef32e 100644 --- a/src/program/lwaftr/run/README +++ b/src/program/lwaftr/run/README @@ -1,42 +1,69 @@ Usage: run --help - run --conf --v4 --v4 [OPTION...] - run --conf --on-a-stick [OPTION...] + run --conf CONF [OPTION...] Required arguments: - --conf Sets configuration policy table - --v4 PCI device number for the INET-side NIC - --v6 PCI device number for the B4-side NIC - --on-a-stick One single NIC for INET-side and B4-side + -c CONF, --conf CONF Use configuration from the file CONF. + See the snabb-softwire-v2 YANG module + for full documentation. Optional arguments: - --virtio Use virtio-net interfaces instead of Intel 82599 - --ring-buffer-size Set Intel 82599 receive buffer size - --cpu Run dataplane processes on the given CPUs - --real-time Enable real-time SCHED_FIFO scheduler - --mirror Copies matching packets to TAP interface. Matching - address set by "lwaftr monitor". - -D Duration in seconds - -v Verbose (repeat for more verbosity) - -y, --hydra - Hydra mode: emit CSV data in the format expected - by the Hydra reports. For instance: + -n NAME, --name NAME Record NAME as the identifier of this + network function instance. NAME can + then be passed to "snabb config" to + query or modify the current state or + configuration of the network function at + run-time. NAME must be unique among + running Snabb programs. + --cpu CPUSET Run data-plane processes on CPUSET, a + list of CPU ranges. For example + "3-5,7-9", or "3,4,5,7,8,9" both allow + the lwAFTR to run data planes on the + given CPUs. + --v4 PCI-ADDR Use the NIC at PCI-ADDR for IPv4 + traffic, instead of the NIC specified in + CONF. + --v6 PCI-ADDR Like --v4, but for IPv6 traffic. + --on-a-stick PCI-ADDR Use the NIC at PCI-ADDR for both IPv4 + and IPv6 traffic, instead of the NICs + specified in CONF. + -i, --virtio Interpret PCI addresses as referring to + virtio-net interfaces instead of + auto-detecting the appropriate driver. + -r SIZE, --ring-buffer-size SIZE + Set NIC receive buffer size. The + default is driver-dependent. See + performance tuning documentation, for + full details. + --real-time Enable real-time SCHED_FIFO scheduler. + --mirror TAP Copies matching packets to TAP + interface. Matching address set by + "lwaftr monitor". + --ingress-drop-monitor=HANDLER + When excessive packet loss is detected, + respond according to HANDLER. HANDLER + can be one of "off" (to do nothing), + "warn" (to print a warning), or "flush" + (to warn and also cause Snabb to + re-optimize itself). The default is + "flush". - benchmark,id,score,unit - - rather than the default: - - Time (s),Decap. MPPS,Decap. Gbps,Encap. MPPS,Encap. Gbps +Optional arguments for debugging and profiling: + -v Verbose (repeat for more verbosity). + -t FILE, --trace FILE Record a trace of any run-time "snabb + config" commands to FILE. + -jv, -jv=FILE Print out when traces are recorded. + -jp, -jp=MODE,FILE Profile the system by method. + -jtprof Profile the system by trace. -b FILENAME, --bench-file FILENAME - The file or path name where benchmark data is to - be written. A simple filename or relative pathname - will be based on the current directory. - -n NAME, --name NAME Sets the name as the identifier of this program. - This must be unique amongst other snab programs. - -When the -v option is used at least once, packets on the network interfaces are -counted and recorded, and the corresponding incoming and outgoing packet rates -are written to stdout in CSV format, suitable for passing to a graphing program. -If bench-file is set, output is written to a file instead of stdout. + Write any benchmarking data to FILENAME. + -D SECONDS Stop after SECONDS, for debugging + purposes. + -y, --hydra Emit CSV data in the format expected + by the automated Hydra benchmarks. + -h, --help Show this help message. -CPUSET is a list of CPU ranges. For example "3-5,7-9", or "3,4,5,7,8,9" -both allow the lwAFTR to run data planes on the given CPUs. +When the -v option is used at least once, packets on the network +interfaces are counted and recorded, and the corresponding incoming and +outgoing packet rates are written to stdout in CSV format, suitable for +passing to a graphing program. If a --bench-file is set, output is +written to a file instead of stdout. diff --git a/src/program/lwaftr/run/run.lua b/src/program/lwaftr/run/run.lua index 41bb5b90b8..cdf11b9f72 100644 --- a/src/program/lwaftr/run/run.lua +++ b/src/program/lwaftr/run/run.lua @@ -100,18 +100,14 @@ function parse_args(args) .." (valid values: flush, warn, off)") end end - function handlers.reconfigurable() - io.stderr:write("Warning: the --reconfigurable flag has been deprecated") - io.stderr:write(" as the lwaftr is now always reconfigurable.\n") - end function handlers.j(arg) scheduling.j = arg end function handlers.h() show_usage(0) end lib.dogetopt(args, handlers, "b:c:vD:yhir:n:j:t:", - { conf = "c", v4 = 1, v6 = 1, ["v4-pci"] = 1, ["v6-pci"] = 1, - verbose = "v", duration = "D", help = "h", virtio = "i", cpu = 1, - ["ring-buffer-size"] = "r", ["real-time"] = 0, ["bench-file"] = "b", - ["ingress-drop-monitor"] = 1, ["on-a-stick"] = 1, mirror = 1, - hydra = "y", reconfigurable = 0, name = "n", trace = "t" }) + { conf = "c", name = "n", cpu = 1, v4 = 1, v6 = 1, + ["on-a-stick"] = 1, virtio = "i", ["ring-buffer-size"] = "r", + ["real-time"] = 0, mirror = 1, ["ingress-drop-monitor"] = 1, + verbose = "v", trace = "t", ["bench-file"] = "b", + duration = "D", hydra = "y", help = "h" }) if ring_buffer_size ~= nil then if opts.virtio_net then fatal("setting --ring-buffer-size does not work with --virtio") diff --git a/src/program/lwaftr/setup.lua b/src/program/lwaftr/setup.lua index 373b7a8496..7cc1421743 100644 --- a/src/program/lwaftr/setup.lua +++ b/src/program/lwaftr/setup.lua @@ -34,7 +34,7 @@ local alarms = require("lib.yang.alarms") local alarm_notification = false local capabilities = { - ['ietf-softwire-br']={feature={'binding'}}, + ['ietf-softwire-br']={feature={'binding-mode'}}, ['ietf-alarms']={feature={'operator-actions', 'alarm-shelving', 'alarm-history'}}, } require('lib.yang.schema').set_default_capabilities(capabilities) diff --git a/src/program/lwaftr/tests/benchdata/ipv4-0054.pcap b/src/program/lwaftr/tests/benchdata/ipv4-0054.pcap deleted file mode 100644 index d6d3d5fee3..0000000000 Binary files a/src/program/lwaftr/tests/benchdata/ipv4-0054.pcap and /dev/null differ diff --git a/src/program/lwaftr/tests/propbased/genyang.lua b/src/program/lwaftr/tests/propbased/genyang.lua index 4f2614fe1e..ec22103cdb 100644 --- a/src/program/lwaftr/tests/propbased/genyang.lua +++ b/src/program/lwaftr/tests/propbased/genyang.lua @@ -385,7 +385,7 @@ local function path_generator_from_grammar(production, generate_invalid) table.sort(members) return function () local head = keyword or '' - if math.random() < 0.1 then return head end + if #members == 0 or math.random() < 0.1 then return head end if head ~= '' then head = head..'/' end local k = choose(members) return head..gen_tail[k]() diff --git a/src/program/lwaftr/tests/release-benchmarks/release-benchmarks.sh b/src/program/lwaftr/tests/release-benchmarks/release-benchmarks.sh index d2670dd050..b0cbbab518 100755 --- a/src/program/lwaftr/tests/release-benchmarks/release-benchmarks.sh +++ b/src/program/lwaftr/tests/release-benchmarks/release-benchmarks.sh @@ -17,8 +17,8 @@ # NICs: $PCI1 (required, CPU0), $PCI3 (CPU0), $PCI5 (CPU1), $PCI7 (CPIU1) if [ ! $SNABB_LWAFTR_CPU0 ]; then - echo ">> SNABB_LWAFTR_CPU0 not set, defaulting to 0" - SNABB_LWAFTR_CPU0=0 + echo ">> SNABB_LWAFTR_CPU0 must be set" + exit 1 fi if [ ! $SNABB_PCI0 ] || [ ! $SNABB_PCI1 ]; then @@ -78,17 +78,10 @@ function run_benchmark { lwaftr_log=`mktemp -p $TMPDIR` # Only supply the CPU argument only if it's not already specified. - if [[ *"--cpu"* == "$lwaftr_args" ]]; then - $SNABB lwaftr run \ - --name lwaftr-release-benchmarks \ - --conf $dataset/$config $lwaftr_args > $lwaftr_log & - lwaftr_pid=$! - else - $SNABB lwaftr run --cpu $cpu \ - --name lwaftr-release-benchmarks \ - --conf $dataset/$config $lwaftr_args > $lwaftr_log & - lwaftr_pid=$! - fi + $SNABB lwaftr run \ + --name lwaftr-release-benchmarks \ + --conf $dataset/$config $lwaftr_args > $lwaftr_log & + lwaftr_pid=$! # wait briefly to let lwaftr start up sleep 1 @@ -146,14 +139,14 @@ done if [ ! $ON_A_STICK_ONLY ]; then run_benchmark "1 instance, 2 NIC interface" \ "lwaftr.conf" \ - "--v4 $SNABB_PCI0 --v6 $SNABB_PCI2" \ + "--v4 $SNABB_PCI0 --v6 $SNABB_PCI2 --cpu $SNABB_LWAFTR_CPU0" \ "--cpu $SNABB_LOADTEST_CPU0 \ $FROM_INET_PCAP NIC0 NIC1 $SNABB_PCI1 \ $FROM_B4_PCAP NIC1 NIC0 $SNABB_PCI3" run_benchmark "1 instance, 2 NIC interfaces (from config)" \ "lwaftr2.conf" \ - "--v4 $SNABB_PCI0 --v6 $SNABB_PCI2" \ + "--v4 $SNABB_PCI0 --v6 $SNABB_PCI2 --cpu $SNABB_LWAFTR_CPU0" \ "--cpu $SNABB_LOADTEST_CPU0 \ $FROM_INET_PCAP NIC0 NIC1 $SNABB_PCI1 \ $FROM_B4_PCAP NIC1 NIC0 $SNABB_PCI3" @@ -161,18 +154,18 @@ fi run_benchmark "1 instance, 1 NIC (on a stick)" \ "lwaftr.conf" \ - "--on-a-stick $SNABB_PCI0" \ + "--on-a-stick $SNABB_PCI0 --cpu $SNABB_LWAFTR_CPU0" \ "--cpu $SNABB_LOADTEST_CPU0 $FROM_INET_AND_B4_PCAP NIC0 NIC0 $SNABB_PCI1" run_benchmark "1 instance, 1 NIC (on-a-stick, from config file)" \ "lwaftr3.conf" \ - "" \ + "--cpu $SNABB_LWAFTR_CPU0" \ "--cpu $SNABB_LOADTEST_CPU0 $FROM_INET_AND_B4_PCAP NIC0 NIC0 $SNABB_PCI1" if [ ! $ONE_INSTANCE_ONLY ]; then run_benchmark "2 instances, 2 NICs (from config)" \ "lwaftr4.conf" \ - "" \ + "--cpu $SNABB_LWAFTR_CPU0,$SNABB_LWAFTR_CPU1" \ "--cpu $SNABB_LOADTEST_CPU0 $FROM_INET_PCAP NIC0 NIC1 $SNABB_PCI1 \ $FROM_B4_PCAP NIC1 NIC0 $SNABB_PCI3" \ "--cpu $SNABB_LOADTEST_CPU1 $FROM_INET_PCAP NIC0 NIC1 $SNABB_PCI5 \ @@ -180,7 +173,7 @@ if [ ! $ONE_INSTANCE_ONLY ]; then run_benchmark "2 instances, 1 NIC (on a stick, from config)" \ "lwaftr5.conf" \ - "" \ + "--cpu $SNABB_LWAFTR_CPU0,$SNABB_LWAFTR_CPU1" \ "--cpu $SNABB_LOADTEST_CPU0 $FROM_INET_AND_B4_PCAP NIC0 NIC0 $SNABB_PCI1" \ "--cpu $SNABB_LOADTEST_CPU1 $FROM_INET_AND_B4_PCAP NIC0 NIC0 $SNABB_PCI5" fi @@ -191,7 +184,7 @@ if [ ! $SNABB_LWAFTR_CPU1 ]; then else run_benchmark "1 instance, 1 NIC, 2 queues" \ "lwaftr6.conf" \ - "" \ + "--cpu $SNABB_LWAFTR_CPU0,$SNABB_LWAFTR_CPU1" \ "--cpu $SNABB_LOADTEST_CPU0 $FROM_INET_AND_B4_PCAP NIC0 NIC0 $SNABB_PCI1" \ "" \ "$SNABB_LWAFTR_CPU0,$SNABB_LWAFTR_CPU1" diff --git a/src/program/lwaftr/tests/subcommands/config_test.py b/src/program/lwaftr/tests/subcommands/config_test.py index 725de8adf7..8c7d999cfb 100644 --- a/src/program/lwaftr/tests/subcommands/config_test.py +++ b/src/program/lwaftr/tests/subcommands/config_test.py @@ -113,7 +113,7 @@ def test_get_ietf_path(self): DAEMON_PROC_NAME, # Implicit string concatenation, do not add commas. '/br-instances/binding/' - 'br-instance[id=1]/binding-table/binding-entry' + 'bind-instance[name=config-test-daemon]/binding-table/binding-entry' '[binding-ipv6info=127:22:33:44:55:66:77:128]/binding-ipv4-addr', )) output = self.run_cmd(cmd_args) @@ -284,6 +284,9 @@ def test_snabb_get_state_summation(self): continue [cname, cvalue] = line.split(" ") cname = os.path.basename(cname) + if cname == "discontinuity-time": + cvalue = str(cvalue) + continue cvalue = int(cvalue) if line.startswith("/softwire-config"): @@ -420,6 +423,31 @@ def test_add(self): output.strip(), b'::2', '\n'.join(('OUTPUT', str(output, ENC)))) + def test_add_ietf(self): + add_args = self.get_cmd_args('add')[:-1] + add_args[3] = '--schema=ietf-softwire-br' + add_args.extend(( + DAEMON_PROC_NAME, + # Implicit string concatenation, no summing needed. + '/br-instances/binding/bind-instance[name=config-test-daemon]/' + 'binding-table/binding-entry', + '{ binding-ipv6info ::123; binding-ipv4-addr 8.8.8.8;' + 'br-ipv6-addr 2001:db8::; port-set { psid 8; psid-len 15; }}', + )) + self.run_cmd(add_args) + get_args = self.get_cmd_args('get')[:-1] + get_args[3] = '--schema=ietf-softwire-br' + get_args.extend(( + DAEMON_PROC_NAME, + '/br-instances/binding/bind-instance[name=config-test-daemon]/' + 'binding-table/binding-entry[binding-ipv6info=::123]/port-set/psid', + )) + output = self.run_cmd(get_args) + # run_cmd checks the exit code and fails the test if it is not zero. + self.assertEqual( + output.strip(), b'8', + '\n'.join(('OUTPUT', str(output, ENC)))) + def test_get_state(self): get_state_args = self.get_cmd_args('get-state') # Select a few at random which should have non-zero results. @@ -437,6 +465,20 @@ def test_get_state(self): self.run_cmd(get_state_args) # run_cmd checks the exit code and fails the test if it is not zero. + def test_get_state_ietf(self): + get_args = self.get_cmd_args('get-state')[:-1] + get_args[3] = '--schema=ietf-softwire-br' + get_args.extend(( + DAEMON_PROC_NAME, + '/br-instances/binding/bind-instance[name=config-test-daemon]/' + 'traffic-stat/rcvd-ipv4-bytes', + )) + output = self.run_cmd(get_args) + # run_cmd checks the exit code and fails the test if it is not zero. + self.assertNotEqual( + output.strip(), b'0', + '\n'.join(('OUTPUT', str(output, ENC)))) + def test_remove(self): # Verify that the thing we want to remove actually exists. get_args = self.get_cmd_args('get') @@ -450,11 +492,31 @@ def test_remove(self): # Remove it. remove_args = list(get_args) remove_args[2] = 'remove' - self.run_cmd(get_args) + self.run_cmd(remove_args) # run_cmd checks the exit code and fails the test if it is not zero. # Verify we cannot find it anymore. + self.run_cmd(get_args, 1) + # run_cmd checks the exit code and fails the test if it is not 1. + + def test_remove_ietf(self): + # Verify that the thing we want to remove actually exists. + get_args = self.get_cmd_args('get')[:-1] + get_args[3] = '--schema=ietf-softwire-br' + get_args.extend(( + DAEMON_PROC_NAME, + '/br-instances/binding/bind-instance[name=config-test-daemon]/' + 'binding-table/binding-entry[binding-ipv6info=::123]', + )) self.run_cmd(get_args) # run_cmd checks the exit code and fails the test if it is not zero. + # Remove it. + remove_args = list(get_args) + remove_args[2] = 'remove' + self.run_cmd(remove_args) + # run_cmd checks the exit code and fails the test if it is not zero. + # Verify we cannot find it anymore. + self.run_cmd(get_args, 1) + # run_cmd checks the exit code and fails the test if it is not 1.x def test_set(self): """ @@ -501,7 +563,7 @@ def test_set(self): DAEMON_PROC_NAME, # Implicit string concatenation, no summing needed. '/br-instances/binding/' - 'br-instance[id=1]/binding-table/binding-entry' + 'bind-instance[name=config-test-daemon]/binding-table/binding-entry' '[binding-ipv6info=::1]/binding-ipv4-addr', )) output = self.run_cmd(get_args) @@ -515,13 +577,177 @@ def test_set(self): get_args.extend(( DAEMON_PROC_NAME, # Implicit string concatenation, no summing needed. - '/br-instances/binding/br-instance[id=1]/' + '/br-instances/binding/bind-instance[name=config-test-daemon]/' 'binding-table/binding-entry[binding-ipv6info=::1]/port-set/psid', )) output = self.run_cmd(get_args) self.assertEqual(output.strip(), bytes(test_psid, ENC), '\n'.join(('OUTPUT', str(output, ENC)))) + def test_set_ietf(self): + """ + Set binding table, update an entry, check for validity via get. + """ + set_args = self.get_cmd_args('set')[:-1] + set_args[3] = '--schema=ietf-softwire-br' + set_args.extend(( + DAEMON_PROC_NAME, + # Implicit string concatenation, no summing needed. + '/br-instances/binding/bind-instance[name=config-test-daemon]/' + 'binding-table/binding-entry', + '{ binding-ipv6info ::124; binding-ipv4-addr 8.8.8.8;' + 'br-ipv6-addr 2001:db8::; port-set { psid 8; psid-len 15; }}', + )) + self.run_cmd(set_args) + set_args = self.get_cmd_args('set')[:-1] + set_args[3] = '--schema=ietf-softwire-br' + set_args.extend(( + DAEMON_PROC_NAME, + # Implicit string concatenation, no summing needed. + '/br-instances/binding/bind-instance[name=config-test-daemon]/' + 'binding-table/binding-entry[binding-ipv6info=::124]/' + 'binding-ipv4-addr', + '9.9.9.9', + )) + self.run_cmd(set_args) + get_args = self.get_cmd_args('get') + get_args.append( + '/softwire-config/binding-table/softwire[ipv4=9.9.9.9][psid=8]' + '/b4-ipv6') + output = self.run_cmd(get_args) + # run_cmd checks the exit code and fails the test if it is not zero. + self.assertEqual( + output.strip(), b'::124', + '\n'.join(('OUTPUT', str(output, ENC)))) + + def test_set_ietf_special(self): + """ + Test handling of special br attributes. + """ + set_args = self.get_cmd_args('set')[:-1] + set_args[3] = '--schema=ietf-softwire-br' + set_args.extend(( + DAEMON_PROC_NAME, + # Implicit string concatenation, no summing needed. + '/br-instances/binding/bind-instance[name=config-test-daemon]/' + 'softwire-path-mru', + '542', + )) + self.run_cmd(set_args) + get_args = self.get_cmd_args('get')[:-1] + get_args[3] = '--schema=ietf-softwire-br' + get_args.extend(( + DAEMON_PROC_NAME, + '/br-instances/binding/bind-instance[name=config-test-daemon]/' + 'softwire-path-mru', + )) + output = self.run_cmd(get_args) + # run_cmd checks the exit code and fails the test if it is not zero. + self.assertEqual( + output.strip(), b'542', + '\n'.join(('OUTPUT', str(output, ENC)))) + ##### + set_args = self.get_cmd_args('set')[:-1] + set_args[3] = '--schema=ietf-softwire-br' + set_args.extend(( + DAEMON_PROC_NAME, + # Implicit string concatenation, no summing needed. + '/br-instances/binding/bind-instance[name=config-test-daemon]/' + 'icmp-policy/icmpv6-errors/generate-icmpv6-errors', + 'false', + )) + self.run_cmd(set_args) + get_args = self.get_cmd_args('get')[:-1] + get_args[3] = '--schema=ietf-softwire-br' + get_args.extend(( + DAEMON_PROC_NAME, + '/br-instances/binding/bind-instance[name=config-test-daemon]/' + 'icmp-policy/icmpv6-errors/generate-icmpv6-errors', + )) + output = self.run_cmd(get_args) + # run_cmd checks the exit code and fails the test if it is not zero. + self.assertEqual( + output.strip(), b'false', + '\n'.join(('OUTPUT', str(output, ENC)))) + ##### + set_args = self.get_cmd_args('set')[:-1] + set_args[3] = '--schema=ietf-softwire-br' + set_args.extend(( + DAEMON_PROC_NAME, + # Implicit string concatenation, no summing needed. + '/br-instances/binding/bind-instance[name=config-test-daemon]/' + 'icmp-policy/icmpv4-errors/icmpv4-rate', + '1001', + )) + self.run_cmd(set_args) + get_args = self.get_cmd_args('get')[:-1] + get_args[3] = '--schema=ietf-softwire-br' + get_args.extend(( + DAEMON_PROC_NAME, + '/br-instances/binding/bind-instance[name=config-test-daemon]/' + 'icmp-policy/icmpv4-errors/icmpv4-rate', + )) + output = self.run_cmd(get_args) + # run_cmd checks the exit code and fails the test if it is not zero. + self.assertEqual( + output.strip(), b'1001', + '\n'.join(('OUTPUT', str(output, ENC)))) + + def test_wrong_instance_ietf(self): + # Check for failure when querying wrong instance + remove_args = self.get_cmd_args('remove')[:-1] + remove_args[3] = '--schema=ietf-softwire-br' + remove_args.extend(( + DAEMON_PROC_NAME, + # Implicit string concatenation, no summing needed. + '/br-instances/binding/bind-instance[name=nosuchinstance]' + 'binding-table/binding-entry[binding-ipv6info=::123]', + )) + output = str(self.run_cmd(remove_args, 1), ENC) + self.assertRegex(output, 'name does not match', + '\n'.join(('OUTPUT', output))) + #### + set_args = self.get_cmd_args('set')[:-1] + set_args[3] = '--schema=ietf-softwire-br' + set_args.extend(( + DAEMON_PROC_NAME, + # Implicit string concatenation, no summing needed. + '/br-instances/binding/bind-instance[name=nosuchinstance]/' + 'binding-table/binding-entry', + '{ binding-ipv6info ::124; binding-ipv4-addr 8.8.8.8;' + 'br-ipv6-addr 2001:db8::; port-set { psid 8; psid-len 15; }}', + )) + output = str(self.run_cmd(set_args, 1), ENC) + self.assertRegex(output, 'name does not match', + '\n'.join(('OUTPUT', output))) + #### + set_args = self.get_cmd_args('set')[:-1] + set_args[3] = '--schema=ietf-softwire-br' + set_args.extend(( + DAEMON_PROC_NAME, + # Implicit string concatenation, no summing needed. + '/br-instances/binding/bind-instance[name=nosuchinstance]/' + 'binding-table/binding-entry[binding-ipv6info=::124]/' + 'binding-ipv4-addr', + '9.9.9.9', + )) + output = str(self.run_cmd(set_args, 1), ENC) + self.assertRegex(output, 'name does not match', + '\n'.join(('OUTPUT', output))) + #### + set_args = self.get_cmd_args('set')[:-1] + set_args[3] = '--schema=ietf-softwire-br' + set_args.extend(( + DAEMON_PROC_NAME, + # Implicit string concatenation, no summing needed. + '/br-instances/binding/bind-instance[name=nosuchinstance]/' + 'icmp-policy/icmpv4-errors/icmpv4-rate', + '1001', + )) + output = str(self.run_cmd(set_args, 1), ENC) + self.assertRegex(output, 'name does not match', + '\n'.join(('OUTPUT', output))) + if __name__ == '__main__': unittest.main() diff --git a/src/program/lwaftr/tests/test_env.py b/src/program/lwaftr/tests/test_env.py index 6283f7420d..cbf620871e 100644 --- a/src/program/lwaftr/tests/test_env.py +++ b/src/program/lwaftr/tests/test_env.py @@ -76,7 +76,7 @@ def reportAndFail(cls, msg, ret_code): cls.daemon.stderr.close() cls.fail(cls, '\n'.join(msg_lines)) - def run_cmd(self, args): + def run_cmd(self, args, ret=0): proc = Popen(args, stdout=PIPE, stderr=PIPE) try: output, errput = proc.communicate(timeout=COMMAND_TIMEOUT) @@ -86,7 +86,7 @@ def run_cmd(self, args): print('\nTimeout running command, trying to kill PID %s' % proc.pid) proc.kill() raise - if proc.returncode != 0: + if proc.returncode != ret: msg_lines = ( 'Error running command:', " ".join(args), 'Daemon Command:', " ".join(self.daemon_args), diff --git a/src/program/packetblaster/lwaftr/README b/src/program/packetblaster/lwaftr/README index 4c340c98c7..45e5610761 100644 --- a/src/program/packetblaster/lwaftr/README +++ b/src/program/packetblaster/lwaftr/README @@ -14,12 +14,18 @@ Usage: packetblaster lwaftr [OPTIONS] --sock Socket name for virtio interface - --vlan VLANID VLAN tag traffic with VLANID if set + --vlan4 VLANID Encapsulate IPv4 traffic with IEEE 802.1Q with the given VLANID + --vlan6 VLANID Encapsulate IPv6 traffic with IEEE 802.1Q with the given VLANID + --vlan VLANID Same as --vlan4 VLANID --vlan6 VLANID - --src_mac SOURCE Source MAC-Address + --src_mac4 SOURCE Local MAC Address for IPv4 traffic + --src_mac6 SOURCE Local MAC Address for IPv6 traffic + --src_mac SOURCE Same as --src_mac4 SOURCE --src_mac6 SOURCE Default: 00:00:00:00:00:00 - --dst_mac DESTINATION Destination MAC-Address + --dst_mac4 DEST Remote MAC Address for IPv4 traffic + --dst_mac6 DEST Remote MAC Address for IPv6 traffic + --dst_mac DESTINATION Same as --dst_mac4 DEST --dst_mac6 DEST Default: 00:00:00:00:00:00 --size SIZES A comma separated list of numbers. Send packets whose @@ -28,21 +34,22 @@ Usage: packetblaster lwaftr [OPTIONS] headers, and additionally a 4-byte CRC that is written and read by the NIC. - Note that the minimum ethernet frame size is 64 bytes. - While it's technically possible to make smaller frames - and we do allow it, the NIC will pad it up to the + Note that the minimum ethernet frame size is 64 + bytes. While it's technically possible to make + smaller frames, the NIC will pad it up to the minimum before sending, so it's a bit pointless. - Since Snabb does not see the CRC in the packet, that - means that from Snabb's perspective the minimum useful - packet size is 60 bytes. - - The smallest allowed frame size is 46 bytes, - comprising 14 bytes for the ethernet header, 20 for - the IPv4 header, 8 for the UDP header, and 4 - additional bytes for the ethernet checksum. If the - packet has at least 8 bytes of payload, the generated - packets will include a unique identifier in the - payload as well. + We signal an error if the user requests a packet + size that's smaller than 64 bytes, to avoid + misinterpreted benchmarks. + + Packets will consist of 14 bytes for the + ethernet header, 4 additional bytes for the + ethernet checksum, a possible 4 bytes for a VLAN + tag, 20 for the IPv4 header, 8 for the UDP + header, and then a payload. The payload + includes a unique identifier in the first 8 + bytes. IPv6 packets include an additional 40 + bytes for the IPv6 header. Default: 64,64,64,64,64,64,64,594,594,594,1500 diff --git a/src/program/packetblaster/lwaftr/lib.lua b/src/program/packetblaster/lwaftr/lib.lua index 9960fa9783..b672cd6cc2 100644 --- a/src/program/packetblaster/lwaftr/lib.lua +++ b/src/program/packetblaster/lwaftr/lib.lua @@ -10,14 +10,13 @@ local ipv6 = require("lib.protocol.ipv6") local ipsum = require("lib.checksum").ipsum local ffi = require("ffi") -local C = ffi.C local cast = ffi.cast -local copy = ffi.copy +local htons, ntohs = lib.htons, lib.ntohs +local htonl, ntohl = lib.htonl, lib.ntohl local PROTO_IPV4_ENCAPSULATION = 0x4 -local PROTO_VLAN = C.htons(0x8100) -local PROTO_IPV4 = C.htons(0x0800) -local PROTO_IPV6 = C.htons(0x86DD) +local PROTO_IPV4 = htons(0x0800) +local PROTO_IPV6 = htons(0x86DD) local DEFAULT_TTL = 255 local MAGIC = 0xaffeface @@ -30,23 +29,13 @@ struct { } __attribute__((packed)) ]] local ether_header_ptr_type = ffi.typeof("$*", ether_header_t) -local ethernet_header_size = ffi.sizeof(ether_header_t) -local OFFSET_ETHERTYPE = 12 +local ether_header_size = ffi.sizeof(ether_header_t) +local ether_min_frame_size = 64 -- The ethernet CRC field is not included in the packet as seen by -- Snabb, but it is part of the frame and therefore a contributor to the -- frame size. -local ethernet_crc_size = 4 - -local ether_vlan_header_type = ffi.typeof([[ -struct { - uint16_t tag; - uint16_t ether_type; -} -]]) -ether_vlan_header_ptr_type = ffi.typeof("$*", ether_vlan_header_type) -ether_vlan_header_size = ffi.sizeof(ether_vlan_header_type) -local OFFSET_ETHERTYPE_VLAN = OFFSET_ETHERTYPE + ether_vlan_header_size +local ether_crc_size = 4 local ipv4hdr_t = ffi.typeof[[ struct { @@ -97,19 +86,6 @@ struct { local payload_ptr_type = ffi.typeof("$*", payload_t) local payload_size = ffi.sizeof(payload_t) -local uint16_ptr_t = ffi.typeof("uint16_t*") -local uint32_ptr_t = ffi.typeof("uint32_t*") - -local n_cache_src_ipv6 = ipv6:pton("::") - -local function rd32(offset) - return cast(uint32_ptr_t, offset)[0] -end - -local function wr32(offset, val) - cast(uint32_ptr_t, offset)[0] = val -end - local function inc_ipv6(ipv6) for i=15,0,-1 do if ipv6[i] == 255 then @@ -122,202 +98,133 @@ local function inc_ipv6(ipv6) return ipv6 end -Lwaftrgen = { +local function inc_ipv4(ipv4) + ipv4 = cast("uint32_t*", ipv4) + ipv4[0] = htonl(ntohl(ipv4[0]) + 1) +end + +local function printf(fmt, ...) + print(string.format(fmt, ...)) +end + +local receive, transmit = link.receive, link.transmit + +B4Gen = { config = { sizes = {required=true}, - dst_mac = {required=true}, - src_mac = {required=true}, rate = {required=true}, - vlan = {}, - b4_ipv6 = {}, - b4_ipv4 = {}, - public_ipv4 = {}, - aftr_ipv6 = {}, - ipv6_only = {}, - ipv4_only = {}, - b4_port = {}, - protocol = {}, - count = {}, - single_pass = {} + count = {default=1}, + single_pass = {default=false}, + b4_ipv6 = {required=true}, + aftr_ipv6 = {required=true}, + b4_ipv4 = {required=true}, + b4_port = {required=true}, + public_ipv4 = {required=true}, + frame_overhead = {default=0} } } -local receive, transmit = link.receive, link.transmit - -function Lwaftrgen:new(conf) - local dst_mac = ethernet:pton(conf.dst_mac) - local src_mac = ethernet:pton(conf.src_mac) - local vlan = conf.vlan - local b4_ipv6 = conf.b4_ipv6 and ipv6:pton(conf.b4_ipv6) - local b4_ipv4 = conf.b4_ipv4 and ipv4:pton(conf.b4_ipv4) - local public_ipv4 = conf.public_ipv4 and ipv4:pton(conf.public_ipv4) - local aftr_ipv6 = conf.aftr_ipv6 and ipv6:pton(conf.aftr_ipv6) - - local ipv4_pkt = packet.allocate() - ffi.fill(ipv4_pkt.data, packet.max_payload) - local eth_hdr = cast(ether_header_ptr_type, ipv4_pkt.data) - eth_hdr.ether_dhost, eth_hdr.ether_shost = dst_mac, src_mac - - local ipv4_hdr, udp_offset - if vlan then - udp_offset = 38 - eth_hdr.ether_type = PROTO_VLAN - local vlan_hdr = cast(ether_vlan_header_ptr_type, ipv4_pkt.data + ethernet_header_size) - vlan_hdr.ether_type = PROTO_IPV4 - vlan_hdr.tag = C.htons(vlan) - ipv4_hdr = cast(ipv4_header_ptr_type, ipv4_pkt.data + ethernet_header_size + ether_vlan_header_size) - else - udp_offset = 34 - eth_hdr.ether_type = PROTO_IPV4 - ipv4_hdr = cast(ipv4_header_ptr_type, ipv4_pkt.data + ethernet_header_size) +function B4Gen:new(conf) + local b4_ipv6 = ipv6:pton(conf.b4_ipv6) + local b4_ipv4 = ipv4:pton(conf.b4_ipv4) + local public_ipv4 = ipv4:pton(conf.public_ipv4) + local aftr_ipv6 = ipv6:pton(conf.aftr_ipv6) + + -- Template IPv4 in IPv6 packet + local pkt = packet.allocate() + ffi.fill(pkt.data, packet.max_payload) + local function h(ptr_type, offset, size) + return cast(ptr_type, pkt.data + offset), offset + size end + local eth_hdr, ipv6_offset = h(ether_header_ptr_type, 0, ether_header_size) + local ipv6_hdr, ipv4_offset = h(ipv6_header_ptr_type, ipv6_offset, ipv6_header_size) + local ipv4_hdr, udp_offset = h(ipv4_header_ptr_type, ipv4_offset, ipv4_header_size) + local udp_hdr, payload_offset = h(udp_header_ptr_type, udp_offset, udp_header_size) + local payload, min_length = h(payload_ptr_type, payload_offset, payload_size) - ipv4_hdr.src_ip = public_ipv4 - ipv4_hdr.dst_ip = b4_ipv4 - ipv4_hdr.ttl = 15 - ipv4_hdr.ihl_v_tos = C.htons(0x4500) -- v4 - ipv4_hdr.id = 0 - ipv4_hdr.frag_off = 0 + -- The offset in returned packets where we expect to find the payload. + local rx_payload_offset = payload_offset - ipv6_header_size - local ipv4_udp_hdr, ipv4_payload - - ipv4_hdr.protocol = 17 -- UDP(17) - ipv4_udp_hdr = cast(udp_header_ptr_type, ipv4_pkt.data + udp_offset) - ipv4_udp_hdr.src_port = C.htons(12345) - ipv4_udp_hdr.checksum = 0 - ipv4_payload = cast(payload_ptr_type, ipv4_pkt.data + udp_offset + udp_header_size) - ipv4_payload.magic = MAGIC - ipv4_payload.number = 0 - - -- IPv4 in IPv6 packet - copy(n_cache_src_ipv6, b4_ipv6, 16) - local ipv6_pkt = packet.allocate() - ffi.fill(ipv6_pkt.data, packet.max_payload) - local eth_hdr = cast(ether_header_ptr_type, ipv6_pkt.data) - eth_hdr.ether_dhost, eth_hdr.ether_shost = dst_mac, src_mac - - - local ipv6_hdr, ipv6_ipv4_hdr - if vlan then - eth_hdr.ether_type = PROTO_VLAN - local vlan_hdr = cast(ether_vlan_header_ptr_type, ipv6_pkt.data + ethernet_header_size) - vlan_hdr.ether_type = PROTO_IPV6 - vlan_hdr.tag = C.htons(vlan) - ipv6_hdr = cast(ipv6_header_ptr_type, ipv6_pkt.data + ethernet_header_size + ether_vlan_header_size) - ipv6_ipv4_hdr = cast(ipv4_header_ptr_type, ipv6_pkt.data + ethernet_header_size + ether_vlan_header_size + ipv6_header_size) - else - eth_hdr.ether_type = PROTO_IPV6 - ipv6_hdr = cast(ipv6_header_ptr_type, ipv6_pkt.data + ethernet_header_size) - ipv6_ipv4_hdr = cast(ipv4_header_ptr_type, ipv6_pkt.data + ethernet_header_size + ipv6_header_size) - end + eth_hdr.ether_type = PROTO_IPV6 lib.bitfield(32, ipv6_hdr, 'v_tc_fl', 0, 4, 6) -- IPv6 Version lib.bitfield(32, ipv6_hdr, 'v_tc_fl', 4, 8, 1) -- Traffic class ipv6_hdr.next_header = PROTO_IPV4_ENCAPSULATION ipv6_hdr.hop_limit = DEFAULT_TTL + ipv6_hdr.src_ip = b4_ipv6 ipv6_hdr.dst_ip = aftr_ipv6 - ipv6_ipv4_hdr.dst_ip = public_ipv4 - ipv6_ipv4_hdr.ttl = 15 - ipv6_ipv4_hdr.ihl_v_tos = C.htons(0x4500) -- v4 - ipv6_ipv4_hdr.id = 0 - ipv6_ipv4_hdr.frag_off = 0 + ipv4_hdr.src_ip = b4_ipv4 + ipv4_hdr.dst_ip = public_ipv4 + ipv4_hdr.ttl = 15 + ipv4_hdr.ihl_v_tos = htons(0x4500) -- v4 + ipv4_hdr.id = 0 + ipv4_hdr.frag_off = 0 + ipv4_hdr.protocol = 17 -- UDP + + udp_hdr.src_port = htons(conf.b4_port) + udp_hdr.dst_port = htons(12345) + udp_hdr.checksum = 0 - local ipv6_ipv4_udp_hdr, ipv6_payload + payload.magic = MAGIC + payload.number = 0 - local total_packet_count = 0 + -- The sizes are frame sizes, including the 4-byte ethernet CRC + -- that we don't see in Snabb. + local sizes = {} for _,size in ipairs(conf.sizes) do - -- count for IPv4 and IPv6 packets (40 bytes IPv6 encap header) - if conf.ipv4_only or conf.ipv6_only then - total_packet_count = total_packet_count + 1 - else - total_packet_count = total_packet_count + 2 - end + assert(size >= ether_min_frame_size) + table.insert(sizes, size - ether_crc_size - conf.frame_overhead) end - ipv6_ipv4_hdr.protocol = 17 -- UDP(17) - ipv6_ipv4_udp_hdr = cast(udp_header_ptr_type, ipv6_pkt.data + udp_offset + ipv6_header_size) - ipv6_ipv4_udp_hdr.dst_port = C.htons(12345) - ipv6_ipv4_udp_hdr.checksum = 0 - ipv6_payload = cast(payload_ptr_type, ipv6_pkt.data + udp_offset + ipv6_header_size + udp_header_size) - ipv6_payload.magic = MAGIC - ipv6_payload.number = 0 - local o = { b4_ipv6 = b4_ipv6, b4_ipv4 = b4_ipv4, b4_port = conf.b4_port, - current_port = conf.b4_port, - b4_ipv4_offset = 0, - ipv6_address = n_cache_src_ipv6, - count = conf.count, + softwire_idx = 0, + softwire_count = conf.count, single_pass = conf.single_pass, - current_count = 0, - ipv4_pkt = ipv4_pkt, - ipv4_hdr = ipv4_hdr, - ipv4_payload = ipv4_payload, + template_pkt = pkt, ipv6_hdr = ipv6_hdr, - ipv6_pkt = ipv6_pkt, - ipv6_payload = ipv6_payload, - ipv6_ipv4_hdr = ipv6_ipv4_hdr, - ipv4_udp_hdr = ipv4_udp_hdr, - ipv6_ipv4_udp_hdr = ipv6_ipv4_udp_hdr, - ipv4_only = conf.ipv4_only, - ipv6_only = conf.ipv6_only, - vlan = vlan, - udp_offset = udp_offset, - protocol = conf.protocol, + ipv4_hdr = ipv4_hdr, + udp_hdr = udp_hdr, + payload = payload, + rx_payload_offset = rx_payload_offset, rate = conf.rate, - sizes = conf.sizes, - total_packet_count = total_packet_count, + sizes = sizes, bucket_content = conf.rate * 1e6, - ipv4_packets = 0, ipv4_bytes = 0, - ipv6_packets = 0, ipv6_bytes = 0, - ipv4_packet_number = 0, ipv6_packet_number = 0, - last_rx_ipv4_packet_number = 0, last_rx_ipv6_packet_number = 0, + rx_packets = 0, rx_bytes = 0, + tx_packet_number = 0, rx_packet_number = 0, lost_packets = 0 } - return setmetatable(o, {__index=Lwaftrgen}) + return setmetatable(o, {__index=B4Gen}) end -function Lwaftrgen:pull () +function B4Gen:done() return self.stopping end + +function B4Gen:pull () + + if self.stopping then return end local output = self.output.output local input = self.input.input - local ipv6_packets = self.ipv6_packets - local ipv6_bytes = self.ipv6_bytes - local ipv4_packets = self.ipv4_packets - local ipv4_bytes = self.ipv4_bytes + local rx_packets = self.rx_packets + local rx_bytes = self.rx_bytes local lost_packets = self.lost_packets - local udp_offset = self.udp_offset - local o_ethertype = self.vlan and OFFSET_ETHERTYPE_VLAN or OFFSET_ETHERTYPE - - if self.current == 0 then - main.exit(0) - end + local rx_payload_offset = self.rx_payload_offset - -- count and trash incoming packets + -- Count and trash incoming packets. for _=1,link.nreadable(input) do local pkt = receive(input) - if cast(uint16_ptr_t, pkt.data + o_ethertype)[0] == PROTO_IPV6 then - ipv6_bytes = ipv6_bytes + pkt.length - ipv6_packets = ipv6_packets + 1 - local payload = cast(payload_ptr_type, pkt.data + udp_offset + ipv6_header_size + udp_header_size) - if payload.magic == MAGIC then - if self.last_rx_ipv6_packet_number > 0 then - lost_packets = lost_packets + payload.number - self.last_rx_ipv6_packet_number - 1 - end - self.last_rx_ipv6_packet_number = payload.number - end - else - ipv4_bytes = ipv4_bytes + pkt.length - ipv4_packets = ipv4_packets + 1 - local payload = cast(payload_ptr_type, pkt.data + udp_offset + udp_header_size) + if cast(ether_header_ptr_type, pkt.data).ether_type == PROTO_IPV4 then + rx_bytes = rx_bytes + pkt.length + rx_packets = rx_packets + 1 + local payload = cast(payload_ptr_type, pkt.data + rx_payload_offset) if payload.magic == MAGIC then - if self.last_rx_ipv4_packet_number > 0 then - lost_packets = lost_packets + payload.number - self.last_rx_ipv4_packet_number - 1 + if self.last_rx_packet_number and self.last_rx_packet_number > 0 then + lost_packets = lost_packets + payload.number - self.last_rx_packet_number - 1 end - self.last_rx_ipv4_packet_number = payload.number + self.last_rx_packet_number = payload.number end end packet.free(pkt) @@ -327,116 +234,262 @@ function Lwaftrgen:pull () self.period_start = self.period_start or cur_now local elapsed = cur_now - self.period_start if elapsed > 1 then - local ipv6_packet_rate = ipv6_packets / elapsed / 1e6 - local ipv4_packet_rate = ipv4_packets / elapsed / 1e6 - local ipv6_octet_rate = ipv6_bytes * 8 / 1e9 / elapsed - local ipv4_octet_rate = ipv4_bytes * 8 / 1e9 / elapsed - local lost_rate = math.abs(lost_packets / (ipv6_octet_rate + ipv4_octet_rate) / 10000) - print(string.format('v6+v4: %.3f+%.3f = %.6f MPPS, %.3f+%.3f = %.6f Gbps, lost %.3f%%', - ipv6_packet_rate, ipv4_packet_rate, ipv6_packet_rate + ipv4_packet_rate, - ipv6_octet_rate, ipv4_octet_rate, ipv6_octet_rate + ipv4_octet_rate, lost_rate)) + printf('v4 rx: %.6f MPPS, %.6f Gbps, lost %.3f%%', + rx_packets / elapsed / 1e6, + rx_bytes * 8 / 1e9 / elapsed, + lost_packets / (rx_packets + lost_packets) * 100) self.period_start = cur_now - self.ipv6_bytes, self.ipv6_packets = 0, 0 - self.ipv4_bytes, self.ipv4_packets = 0, 0 - self.lost_packets = 0 - else - self.ipv4_bytes, self.ipv4_packets = ipv4_bytes, ipv4_packets - self.ipv6_bytes, self.ipv6_packets = ipv6_bytes, ipv6_packets - self.lost_packets = lost_packets + rx_packets, rx_bytes, lost_packets = 0, 0, 0 end + self.rx_packets = rx_packets + self.rx_bytes = rx_bytes + self.lost_packets = lost_packets - local ipv4_hdr = self.ipv4_hdr local ipv6_hdr = self.ipv6_hdr - local ipv6_ipv4_hdr = self.ipv6_ipv4_hdr - local ipv4_udp_hdr = self.ipv4_udp_hdr - local ipv6_ipv4_udp_hdr = self.ipv6_ipv4_udp_hdr + local ipv4_hdr = self.ipv4_hdr + local udp_hdr = self.udp_hdr + local payload = self.payload local cur_now = tonumber(app.now()) local last_time = self.last_time or cur_now self.bucket_content = self.bucket_content + self.rate * 1e6 * (cur_now - last_time) self.last_time = cur_now - local limit = engine.pull_npackets - while limit > self.total_packet_count and - self.total_packet_count <= self.bucket_content do - limit = limit - 1 - self.bucket_content = self.bucket_content - self.total_packet_count + for _=1, math.min(engine.pull_npackets, self.bucket_content) do + if #self.sizes > self.bucket_content then break end + self.bucket_content = self.bucket_content - #self.sizes - ipv4_hdr.dst_ip = self.b4_ipv4 - ipv6_ipv4_hdr.src_ip = self.b4_ipv4 - ipv6_hdr.src_ip = self.b4_ipv6 - local ipdst = C.ntohl(rd32(ipv4_hdr.dst_ip)) - ipdst = C.htonl(ipdst + self.b4_ipv4_offset) - wr32(ipv4_hdr.dst_ip, ipdst) - wr32(ipv6_ipv4_hdr.src_ip, ipdst) + for _,size in ipairs(self.sizes) do + local ipv4_len = size - ether_header_size + local udp_len = ipv4_len - ipv4_header_size + -- Expectation from callers is to make packets that are SIZE + -- bytes big, *plus* the IPv6 header. + ipv6_hdr.payload_length = htons(ipv4_len) + ipv4_hdr.total_length = htons(ipv4_len) + ipv4_hdr.checksum = 0 + ipv4_hdr.checksum = htons(ipsum(cast("char*", ipv4_hdr), ipv4_header_size, 0)) + udp_hdr.len = htons(udp_len) + self.template_pkt.length = size + ipv6_header_size + payload.number = self.tx_packet_number; + self.tx_packet_number = self.tx_packet_number + 1 + transmit(output, packet.clone(self.template_pkt)) + end - ipv4_udp_hdr.dst_port = C.htons(self.current_port) - ipv6_ipv4_udp_hdr.src_port = C.htons(self.current_port) + -- Next softwire. + inc_ipv6(ipv6_hdr.src_ip) + local next_port = ntohs(udp_hdr.src_port) + self.b4_port + if next_port >= 2^16 then + inc_ipv4(ipv4_hdr.src_ip) + next_port = self.b4_port + end + udp_hdr.src_port = htons(next_port) - -- The sizes are frame sizes, including the 4-byte ethernet CRC - -- that we don't see in Snabb. + self.softwire_idx = self.softwire_idx + 1 + if self.softwire_idx >= self.softwire_count then + if self.single_pass then + printf("generated %d packets for each of %d softwires", + #self.sizes, self.softwire_count) + self.stopping = true + break + end - local vlan_size = self.vlan and ether_vlan_header_size or 0 - local ethernet_total_size = ethernet_header_size + vlan_size - local minimum_size = ethernet_total_size + ipv4_header_size + - udp_header_size + ethernet_crc_size + -- Reset to initial softwire. + self.softwire_idx = 0 + ipv6_hdr.src_ip = self.b4_ipv6 + ipv4_hdr.src_ip = self.b4_ipv4 + udp_hdr.src_port = htons(self.b4_port) + end + end +end - for _,size in ipairs(self.sizes) do - assert(size >= minimum_size) - local packet_len = size - ethernet_crc_size - local ipv4_len = packet_len - ethernet_total_size - local udp_len = ipv4_len - ipv4_header_size - if not self.ipv6_only then - ipv4_hdr.total_length = C.htons(ipv4_len) - ipv4_udp_hdr.len = C.htons(udp_len) - self.ipv4_pkt.length = packet_len - ipv4_hdr.checksum = 0 - ipv4_hdr.checksum = C.htons(ipsum(self.ipv4_pkt.data + ethernet_total_size, 20, 0)) - if size >= minimum_size + payload_size then - self.ipv4_payload.number = self.ipv4_packet_number; - self.ipv4_packet_number = self.ipv4_packet_number + 1 - end - local ipv4_pkt = packet.clone(self.ipv4_pkt) - transmit(output, ipv4_pkt) - end +InetGen = { + config = { + sizes = {required=true}, + rate = {required=true}, + b4_ipv4 = {required=true}, + public_ipv4 = {required=true}, + b4_port = {required=true}, + count = {}, + single_pass = {}, + frame_overhead = {default=0} + } +} + +function InetGen:new(conf) + local b4_ipv4 = ipv4:pton(conf.b4_ipv4) + local public_ipv4 = ipv4:pton(conf.public_ipv4) + + -- Template IPv4 packet + local pkt = packet.allocate() + ffi.fill(pkt.data, packet.max_payload) + local function h(ptr_type, offset, size) + return cast(ptr_type, pkt.data + offset), offset + size + end + local eth_hdr, ipv4_offset = h(ether_header_ptr_type, 0, ether_header_size) + local ipv4_hdr, udp_offset = h(ipv4_header_ptr_type, ipv4_offset, ipv4_header_size) + local udp_hdr, payload_offset = h(udp_header_ptr_type, udp_offset, udp_header_size) + local payload, min_length = h(payload_ptr_type, payload_offset, payload_size) - if not self.ipv4_only then - -- Expectation from callers is to make packets that are SIZE - -- bytes big, *plus* the IPv6 header. - ipv6_hdr.payload_length = C.htons(ipv4_len) - ipv6_ipv4_hdr.total_length = C.htons(ipv4_len) - ipv6_ipv4_udp_hdr.len = C.htons(udp_len) - self.ipv6_pkt.length = packet_len + ipv6_header_size - if size >= minimum_size + payload_size then - self.ipv6_payload.number = self.ipv6_packet_number; - self.ipv6_packet_number = self.ipv6_packet_number + 1 + -- The offset in returned packets where we expect to find the payload. + local rx_payload_offset = payload_offset + ipv6_header_size + + eth_hdr.ether_type = PROTO_IPV4 + + ipv4_hdr.src_ip = public_ipv4 + ipv4_hdr.dst_ip = b4_ipv4 + ipv4_hdr.ttl = 15 + ipv4_hdr.ihl_v_tos = htons(0x4500) -- v4 + ipv4_hdr.id = 0 + ipv4_hdr.frag_off = 0 + ipv4_hdr.protocol = 17 -- UDP + + udp_hdr.src_port = htons(12345) + udp_hdr.dst_port = htons(conf.b4_port) + udp_hdr.checksum = 0 + + payload.magic = MAGIC + payload.number = 0 + + -- The sizes are frame sizes, including the 4-byte ethernet CRC + -- that we don't see in Snabb. + local sizes = {} + for _,size in ipairs(conf.sizes) do + assert(size >= ether_min_frame_size) + table.insert(sizes, size - ether_crc_size - conf.frame_overhead) + end + + local o = { + b4_ipv4 = b4_ipv4, + b4_port = conf.b4_port, + softwire_idx = 0, + softwire_count = conf.count, + single_pass = conf.single_pass, + template_pkt = pkt, + ipv4_hdr = ipv4_hdr, + udp_hdr = udp_hdr, + payload = payload, + rx_payload_offset = rx_payload_offset, + rate = conf.rate, + sizes = sizes, + bucket_content = conf.rate * 1e6, + rx_packets = 0, rx_bytes = 0, + tx_packet_number = 0, rx_packet_number = 0, + lost_packets = 0 + } + return setmetatable(o, {__index=InetGen}) +end + +function InetGen:done() return self.stopping end + +function InetGen:pull () + + if self.stopping then return end + + local output = self.output.output + local input = self.input.input + local rx_packets = self.rx_packets + local rx_bytes = self.rx_bytes + local lost_packets = self.lost_packets + local rx_payload_offset = self.rx_payload_offset + + -- Count and trash incoming packets. + for _=1,link.nreadable(input) do + local pkt = receive(input) + if cast(ether_header_ptr_type, pkt.data).ether_type == PROTO_IPV6 then + rx_bytes = rx_bytes + pkt.length + rx_packets = rx_packets + 1 + local payload = cast(payload_ptr_type, pkt.data + rx_payload_offset) + if payload.magic == MAGIC then + if self.last_rx_packet_number and self.last_rx_packet_number > 0 then + lost_packets = lost_packets + payload.number - self.last_rx_packet_number - 1 end - local ipv6_pkt = packet.clone(self.ipv6_pkt) - transmit(output, ipv6_pkt) + self.last_rx_packet_number = payload.number end + end + packet.free(pkt) + end - end + local cur_now = tonumber(app.now()) + self.period_start = self.period_start or cur_now + local elapsed = cur_now - self.period_start + if elapsed > 1 then + printf('v6 rx: %.6f MPPS, %.6f Gbps, lost %.3f%%', + rx_packets / elapsed / 1e6, + rx_bytes * 8 / 1e9 / elapsed, + lost_packets / (rx_packets + lost_packets) * 100) + self.period_start = cur_now + rx_packets, rx_bytes, lost_packets = 0, 0, 0 + end + self.rx_packets = rx_packets + self.rx_bytes = rx_bytes + self.lost_packets = lost_packets + + local ipv4_hdr = self.ipv4_hdr + local udp_hdr = self.udp_hdr + local payload = self.payload + + local cur_now = tonumber(app.now()) + local last_time = self.last_time or cur_now + self.bucket_content = self.bucket_content + self.rate * 1e6 * (cur_now - last_time) + self.last_time = cur_now - self.b4_ipv6 = inc_ipv6(self.b4_ipv6) - self.current_port = self.current_port + self.b4_port - if self.current_port > 65535 then - self.current_port = self.b4_port - self.b4_ipv4_offset = self.b4_ipv4_offset + 1 - end + for _=1, math.min(engine.pull_npackets, self.bucket_content) do + if #self.sizes > self.bucket_content then break end + self.bucket_content = self.bucket_content - #self.sizes - self.current_count = self.current_count + 1 - if self.current_count >= self.count then + for _,size in ipairs(self.sizes) do + local ipv4_len = size - ether_header_size + local udp_len = ipv4_len - ipv4_header_size + ipv4_hdr.total_length = htons(ipv4_len) + ipv4_hdr.checksum = 0 + ipv4_hdr.checksum = htons(ipsum(cast("char*", ipv4_hdr), ipv4_header_size, 0)) + udp_hdr.len = htons(udp_len) + self.template_pkt.length = size + payload.number = self.tx_packet_number; + self.tx_packet_number = self.tx_packet_number + 1 + transmit(output, packet.clone(self.template_pkt)) + end + + -- Next softwire. + local next_port = ntohs(udp_hdr.dst_port) + self.b4_port + if next_port >= 2^16 then + inc_ipv4(ipv4_hdr.dst_ip) + next_port = self.b4_port + end + udp_hdr.dst_port = htons(next_port) + + self.softwire_idx = self.softwire_idx + 1 + if self.softwire_idx >= self.softwire_count then if self.single_pass then - print(string.format("generated %d packets", self.current_count)) - -- make sure we won't generate more packets in the same breath, then exit - self.current = 0 - self.bucket_content = 0 + printf("generated %d packets for each of %d softwires", + #self.sizes, self.softwire_count) + self.stopping = true + break end - self.current_count = 0 - self.current_port = self.b4_port - self.b4_ipv4_offset = 0 - copy(self.b4_ipv6, self.ipv6_address, 16) - end - end + + -- Reset to initial softwire. + self.softwire_idx = 0 + ipv4_hdr.dst_ip = self.b4_ipv4 + udp_hdr.dst_port = htons(self.b4_port) + end + end +end + +Interleave = {} + +function Interleave:new() + return setmetatable({}, {__index=Interleave}) end +function Interleave:push () + local continue = true + while continue do + continue = false + for _, inport in ipairs(self.input) do + if not link.empty(inport) then + transmit(self.output.output, receive(inport)) + continue = true + end + end + end +end diff --git a/src/program/packetblaster/lwaftr/lwaftr.lua b/src/program/packetblaster/lwaftr/lwaftr.lua index 4fa73b0268..ed4fabc7b4 100644 --- a/src/program/packetblaster/lwaftr/lwaftr.lua +++ b/src/program/packetblaster/lwaftr/lwaftr.lua @@ -6,10 +6,19 @@ local engine = require("core.app") local config = require("core.config") local timer = require("core.timer") local pci = require("lib.hardware.pci") +local ethernet = require("lib.protocol.ethernet") +local ipv4 = require("lib.protocol.ipv4") +local ipv6 = require("lib.protocol.ipv6") local main = require("core.main") local S = require("syscall") -local Lwaftrgen = require("program.packetblaster.lwaftr.lib").Lwaftrgen +local B4Gen = require("program.packetblaster.lwaftr.lib").B4Gen +local InetGen = require("program.packetblaster.lwaftr.lib").InetGen +local Interleave = require("program.packetblaster.lwaftr.lib").Interleave local Tap = require("apps.tap.tap").Tap +local vlan = require("apps.vlan.vlan") +local arp = require("apps.ipv4.arp") +local ndp = require("apps.lwaftr.ndp") +local V4V6 = require("apps.lwaftr.V4V6") local raw = require("apps.socket.raw") local pcap = require("apps.pcap.pcap") local VhostUser = require("apps.vhost.vhost_user").VhostUser @@ -28,7 +37,13 @@ local long_opts = { size = "S", -- frame size list (defaults to IMIX) src_mac = "s", -- source ethernet address dst_mac = "d", -- destination ethernet address + src_mac4 = 1, -- source ethernet address for IPv4 traffic + dst_mac4 = 1, -- destination ethernet address for IPv4 traffic + src_mac6 = 1, -- source ethernet address for IPv6 traffic + dst_mac6 = 1, -- destination ethernet address for IPv6 traffic vlan = "v", -- VLAN id + vlan4 = 1, -- VLAN id for IPv4 traffic + vlan6 = 1, -- VLAN id for IPv6 traffic b4 = "b", -- B4 start IPv6_address,IPv4_address,port aftr = "a", -- fix AFTR public IPv6_address ipv4 = "I", -- fix public IPv4 address @@ -71,11 +86,17 @@ function run (args) end end - local src_mac = "00:00:00:00:00:00" - function opt.s (arg) src_mac = arg end + local v4_src_mac = "00:00:00:00:00:00" + function opt.src_mac4 (arg) v4_src_mac = arg end + local v6_src_mac = "00:00:00:00:00:00" + function opt.src_mac6 (arg) v6_src_mac = arg end + function opt.s (arg) opt.src_mac4(arg); opt.src_mac6(arg) end - local dst_mac = "00:00:00:00:00:00" - function opt.d (arg) dst_mac = arg end + local v4_dst_mac = "00:00:00:00:00:00" + function opt.dst_mac4 (arg) v4_dst_mac = arg end + local v6_dst_mac = "00:00:00:00:00:00" + function opt.dst_mac6 (arg) v6_dst_mac = arg end + function opt.d (arg) opt.dst_mac4(arg); opt.dst_mac6(arg) end local b4_ipv6, b4_ipv4, b4_port = "2001:db8::", "10.0.0.0", 1024 function opt.b (arg) @@ -131,30 +152,36 @@ function run (args) target = sock_interface end - local ipv4_only = false - function opt.v4 () ipv4_only = true end + local v4, v6 = true, true + + function opt.v4 () v6 = false end opt["4"] = opt.v4 - local ipv6_only = false - function opt.v6 () ipv6_only = true end + function opt.v6 () v4 = false end opt["6"] = opt.v6 - local vlan = nil - function opt.v (arg) - vlan = assert(tonumber(arg), "duration is not a number!") + local v4_vlan + function opt.vlan4 (arg) + v4_vlan = assert(tonumber(arg), "vlan is not a number!") + end + local v6_vlan + function opt.vlan6 (arg) + v6_vlan = assert(tonumber(arg), "vlan is not a number!") end + function opt.v (arg) opt.vlan4(arg); opt.vlan6(arg) end - local pcap_file, single_pass + local pcap_file, single_pass = nil, false function opt.o (arg) pcap_file = arg target = pcap_file single_pass = true + rate = 1/0 end args = lib.dogetopt(args, opt, "VD:hS:s:a:d:b:iI:c:r:46p:v:o:t:i:k:", long_opts) for _,s in ipairs(sizes) do - if s < 18 + (vlan and 4 or 0) + 20 + 8 then + if s < 18 + (v4_vlan and v6_vlan and 4 or 0) + 20 + 8 then error("Minimum frame size is 46 bytes (18 ethernet+CRC, 20 IPv4, and 8 UDP)") end end @@ -167,68 +194,184 @@ function run (args) print(string.format("packetblaster lwaftr: Sending %d clients at %.3f MPPS to %s", count, rate, target)) print() - if not ipv4_only then - print(string.format("IPv6: %s > %s: %s:%d > %s:12345", b4_ipv6, aftr_ipv6, b4_ipv4, b4_port, public_ipv4)) - print(" source IPv6 and source IPv4/Port adjusted per client") - local sizes_ipv6 = {} - for i,size in ipairs(sizes) do sizes_ipv6[i] = size + 40 end - print("IPv6 frame sizes: " .. table.concat(sizes_ipv6,",")) + if not (v4 or v6) then + -- Assume that -4 -6 means both instead of neither. + v4, v6 = true, true end - if not ipv6_only then - print() - print(string.format("IPv4: %s:12345 > %s:%d", public_ipv4, b4_ipv4, b4_port)) - print(" destination IPv4 and Port adjusted per client") - print("IPv4 frame sizes: " .. table.concat(sizes,",")) + local v4_input, v4_output, v6_input, v6_output + + local function finish_vlan(input, output, tag) + if not tag then return input, output end + + -- Add and remove the common vlan tag. + config.app(c, "untag", vlan.Untagger, {tag=tag}) + config.app(c, "tag", vlan.Tagger, {tag=tag}) + config.link(c, "tag.output -> " .. input) + config.link(c, input .. " -> untag.input") + return 'tag.input', 'untag.output' end - if ipv4_only and ipv6_only then - print("Remove options v4only and v6only to generate both") - main.exit(1) + local function finish_v4(input, output) + assert(v4) + -- Stamp output with the MAC and make an ARP responder. + local tester_ip = ipv4:pton('1.2.3.4') + local next_ip = nil -- Assume we have a static dst mac. + config.app(c, "arp", arp.ARP, + { self_ip = tester_ip, + self_mac = ethernet:pton(v4_src_mac), + next_mac = ethernet:pton(v4_dst_mac), + next_ip = next_ip }) + config.link(c, output .. ' -> arp.south') + config.link(c, 'arp.south -> ' .. input) + return 'arp.north', 'arp.north' + end + + local function finish_v6(input, output) + assert(v6) + -- Stamp output with the MAC and make an NDP responder. + local tester_ip = ipv6:pton('2001:DB8::1') + local next_ip = nil -- Assume we have a static dst mac. + config.app(c, "ndp", ndp.NDP, + { self_ip = tester_ip, + self_mac = ethernet:pton(v6_src_mac), + next_mac = ethernet:pton(v6_dst_mac), + next_ip = next_ip }) + config.link(c, output .. ' -> ndp.south') + config.link(c, 'ndp.south -> ' .. input) + return 'ndp.north', 'ndp.north' end - config.app(c, "generator", Lwaftrgen, { - sizes = sizes, count = count, aftr_ipv6 = aftr_ipv6, rate = rate, - src_mac = src_mac, dst_mac = dst_mac, vlan = vlan, - b4_ipv6 = b4_ipv6, b4_ipv4 = b4_ipv4, b4_port = b4_port, - public_ipv4 = public_ipv4, single_pass = single_pass, - ipv4_only = ipv4_only, ipv6_only = ipv6_only }) + local function split(input, output) + assert(v4 and v6) + if v4_vlan ~= v6_vlan then + -- Split based on vlan. + config.app(c, "vmux", vlan.VlanMux, {}) + config.link(c, output .. ' -> vmux.trunk') + config.link(c, 'vmux.trunk -> ' .. input) + local v4_link = v4_vlan and 'vmux.vlan'..v4_vlan or 'vmux.native' + v4_input, v4_output = finish_v4(v4_link, v4_link) + local v6_link = v6_vlan and 'vmux.vlan'..v6_vlan or 'vmux.native' + v6_input, v6_output = finish_v6(v6_link, v6_link) + else + input, output = finish_vlan(input, output, v4_vlan) + + -- Split based on ethertype. + config.app(c, "mux", V4V6.V4V6, {}) + config.app(c, "join", Interleave, {}) + v4_input, v4_output = finish_v4('join.v4', 'mux.v4') + v6_input, v6_output = finish_v6('join.v6', 'mux.v6') + config.link(c, output .. " -> mux.input") + config.link(c, "join.output -> " .. input) + end + end - local input, output + local function maybe_split(input, output) + if v4 and v6 then + split(input, output) + elseif v4 then + input, output = finish_vlan(input, output, v4_vlan) + v4_input, v4_output = finish_v4(input, output) + else + input, output = finish_vlan(input, output, v6_vlan) + v6_input, v6_output = finish_v6(input, output) + end + end if tap_interface then if dir_exists(("/sys/devices/virtual/net/%s"):format(tap_interface)) then config.app(c, "tap", Tap, tap_interface) - input, output = "tap.input", "tap.output" else print(string.format("tap interface %s doesn't exist", tap_interface)) main.exit(1) end + maybe_split("tap.input", "tap.output") elseif pciaddr then local device_info = pci.device_info(pciaddr) - if vlan then - print(string.format("vlan set to %d", vlan)) + if v4_vlan then + print(string.format("IPv4 vlan set to %d", v4_vlan)) + end + if v6_vlan then + print(string.format("IPv6 vlan set to %d", v6_vlan)) end - if device_info then + if not device_info then + fatal(("Couldn't find device info for PCI or tap device %s"):format(pciaddr)) + end + if v4 and v6 then + if v4_vlan == v6_vlan and v4_src_mac == v6_src_mac then + config.app(c, "nic", require(device_info.driver).driver, + {pciaddr = pciaddr, vmdq = true, macaddr = v4_src_mac, + mtu = 9500, vlan = v4_vlan}) + maybe_split("nic."..device_info.rx, "nic."..device_info.tx) + else + config.app(c, "v4nic", require(device_info.driver).driver, + {pciaddr = pciaddr, vmdq = true, macaddr = v4_src_mac, + mtu = 9500, vlan = v4_vlan}) + v4_input, v4_output = finish_v4("v4nic."..device_info.rx, + "v4nic."..device_info.tx) + config.app(c, "v6nic", require(device_info.driver).driver, + {pciaddr = pciaddr, vmdq = true, macaddr = v6_src_mac, + mtu = 9500, vlan = v6_vlan}) + v6_input, v6_output = finish_v6("v6nic."..device_info.rx, + "v6nic."..device_info.tx) + end + elseif v4 then config.app(c, "nic", require(device_info.driver).driver, - {pciaddr = pciaddr, vmdq = true, macaddr = src_mac, mtu = 9500}) - input, output = "nic."..device_info.rx, "nic."..device_info.tx + {pciaddr = pciaddr, vmdq = true, macaddr = v4_src_mac, + mtu = 9500, vlan = v4_vlan}) + v4_input, v4_output = finish_v4("nic."..device_info.rx, + "nic."..device_info.tx) else - fatal(("Couldn't find device info for PCI or tap device %s"):format(pciaddr)) + config.app(c, "nic", require(device_info.driver).driver, + {pciaddr = pciaddr, vmdq = true, macaddr = v6_src_mac, + mtu = 9500, vlan = v6_vlan}) + v6_input, v6_output = finish_v6("nic."..device_info.rx, + "nic."..device_info.tx) end elseif int_interface then config.app(c, "int", raw.RawSocket, int_interface) - input, output = "int.rx", "int.tx" + maybe_split("int.rx", "int.tx") elseif sock_interface then config.app(c, "virtio", VhostUser, { socket_path=sock_interface } ) - input, output = "virtio.rx", "virtio.tx" + maybe_split("virtio.rx", "virtio.tx") else config.app(c, "pcap", pcap.PcapWriter, pcap_file) - input, output = "pcap.input", "pcap.output" + maybe_split("pcap.input", "pcap.output") end - config.link(c, output .. " -> generator.input") - config.link(c, "generator.output -> " .. input) + if v4 then + print() + print(string.format("IPv4: %s:12345 > %s:%d", public_ipv4, b4_ipv4, b4_port)) + print(" destination IPv4 and Port adjusted per client") + print("IPv4 frame sizes: " .. table.concat(sizes,",")) + local rate = v6 and rate/2 or rate + config.app(c, "inetgen", InetGen, { + sizes = sizes, rate = rate, count = count, single_pass = single_pass, + b4_ipv4 = b4_ipv4, b4_port = b4_port, public_ipv4 = public_ipv4, + frame_overhead = v4_vlan and 4 or 0}) + if v6_output then + config.link(c, v6_output .. " -> inetgen.input") + end + config.link(c, "inetgen.output -> " .. v4_input) + end + if v6 then + print() + print(string.format("IPv6: %s > %s: %s:%d > %s:12345", b4_ipv6, aftr_ipv6, b4_ipv4, b4_port, public_ipv4)) + print(" source IPv6 and source IPv4/Port adjusted per client") + local sizes_ipv6 = {} + for i,size in ipairs(sizes) do sizes_ipv6[i] = size + 40 end + print("IPv6 frame sizes: " .. table.concat(sizes_ipv6,",")) + local rate = v4 and rate/2 or rate + config.app(c, "b4gen", B4Gen, { + sizes = sizes, rate = rate, count = count, single_pass = single_pass, + b4_ipv6 = b4_ipv6, aftr_ipv6 = aftr_ipv6, + b4_ipv4 = b4_ipv4, b4_port = b4_port, public_ipv4 = public_ipv4, + frame_overhead = v6_vlan and 4 or 0}) + if v4_output then + config.link(c, v4_output .. " -> b4gen.input") + end + config.link(c, "b4gen.output -> " .. v6_input) + end engine.busywait = true engine.configure(c) @@ -243,6 +386,17 @@ function run (args) timer.activate(t) end - if duration then engine.main({duration=duration}) - else engine.main() end + local done + if duration then + done = lib.timeout(duration) + else + local b4gen = engine.app_table.b4gen + local inetgen = engine.app_table.inetgen + print (b4gen, inetgen) + function done() + return ((not b4gen) or b4gen:done()) and ((not inetgen) or inetgen:done()) + end + end + + engine.main({done=done}) end diff --git a/src/program/packetblaster/lwaftr/test_lwaftr_1.pcap b/src/program/packetblaster/lwaftr/test_lwaftr_1.pcap index a3b001e7fb..e90854031d 100644 Binary files a/src/program/packetblaster/lwaftr/test_lwaftr_1.pcap and b/src/program/packetblaster/lwaftr/test_lwaftr_1.pcap differ diff --git a/src/program/packetblaster/lwaftr/test_lwaftr_2.pcap b/src/program/packetblaster/lwaftr/test_lwaftr_2.pcap index 287dd1b8fb..cafd393cbe 100644 Binary files a/src/program/packetblaster/lwaftr/test_lwaftr_2.pcap and b/src/program/packetblaster/lwaftr/test_lwaftr_2.pcap differ diff --git a/src/program/packetblaster/selftest.sh b/src/program/packetblaster/selftest.sh index aa6a1eaf78..ebd7bbba77 100755 --- a/src/program/packetblaster/selftest.sh +++ b/src/program/packetblaster/selftest.sh @@ -16,17 +16,25 @@ function test_lwaftr_pcap { rm $TEMP_PCAP exit 1 fi + if ! which tcpdump; then + echo "Error: no tcpdump to compare packets" + rm $TEMP_PCAP + exit 43 + fi cmp $TEMP_PCAP $PCAP - status=$? + tcpdump -venr $TEMP_PCAP | sort > $TEMP_PCAP.txt rm $TEMP_PCAP - if [ $status != 0 ]; then - echo "Error: lwaftr generated pcap differs from ${PCAP}" + diffies=$(tcpdump -venr $PCAP | sort | diff -u /dev/stdin $TEMP_PCAP.txt) + rm $TEMP_PCAP.txt + if test -n "$diffies"; then + echo "Error: lwaftr generated pcap differs from ${PCAP}:" + echo "$diffies" exit 1 fi } test_lwaftr_pcap program/packetblaster/lwaftr/test_lwaftr_1.pcap --count 1 -test_lwaftr_pcap program/packetblaster/lwaftr/test_lwaftr_2.pcap --count 2 --vlan 100 --size 50 +test_lwaftr_pcap program/packetblaster/lwaftr/test_lwaftr_2.pcap --count 2 --vlan 100 --size 64 # lwaftr tap test sudo ip netns add snabbtest || exit $TEST_SKIPPED diff --git a/src/program/rrdcat/README b/src/program/rrdcat/README new file mode 100644 index 0000000000..99f1757574 --- /dev/null +++ b/src/program/rrdcat/README @@ -0,0 +1,39 @@ +Usage: + rrdcat [OPTIONS] + + -h, --help + Print usage information. + -s, --source + Select source. + -i, --interval + Select interval. + -l, --list-contents + List contents of . + +Summarize the data recorded in RRD with a normalized ASCII plot. Skips +(with indication) over zero or unknown data rows. Select CDP source and +interval with --source and --interval respectively; list the available sources +and intervals and other metadata with --list-contents. + +Legend: + /---(timestamp) + v + Mon Sep 23 18:41:30 2019 ? <---(unknown data) + ? + ? + ... <---(discontinuity) + Tue Sep 24 17:48:00 2019 [ min:- avg:- max:- <---(zero data) + [ min:- avg:- max:- + [ min:- avg:- max:- + [a min:- avg:1K max:4K <---(abs. values) + Tue Sep 24 17:48:30 2019 [ na--m min:9K avg:21K max:42K + [ nam min:20K avg:28K max:40K + [n-a---m min:3K avg:19K max:53K + ^ ^ ^ + | | \---(maximum) + | \---(avgerage) + \---(minimum) + +Examples: + snabb rrdcat --interval 30 rxdrop.rrd + snabb rrdcat --list-contents rxdrop.rrd diff --git a/src/program/rrdcat/README.inc b/src/program/rrdcat/README.inc new file mode 120000 index 0000000000..100b93820a --- /dev/null +++ b/src/program/rrdcat/README.inc @@ -0,0 +1 @@ +README \ No newline at end of file diff --git a/src/program/rrdcat/rrdcat.lua b/src/program/rrdcat/rrdcat.lua new file mode 100644 index 0000000000..e534fb01ef --- /dev/null +++ b/src/program/rrdcat/rrdcat.lua @@ -0,0 +1,278 @@ +-- Use of this source code is governed by the Apache 2.0 license; see COPYING. + +module(..., package.seeall) + +-- snabb rrdcat: summarize the data recorded in a RRD database with a +-- normalized ASCII plot. Skips (with indication) over zero or unknown data +-- rows. + +local rrd = require("lib.rrd") +local lib = require("core.lib") + +local usage = require("program.rrdcat.README_inc") + +local long_opts = { + help = "h", + source = "s", + interval = "i", + ["list-contents"] = "l" +} + +function run (args) + local opt = {} + local source, interval, list_contents + function opt.h () print(usage) main.exit(0) end + function opt.s (arg) source = arg end + function opt.i (arg) interval = tonumber(arg) end + function opt.l () list_contents = true end + args = lib.dogetopt(args, opt, "hs:i:l", long_opts) + + if #args ~= 1 then print(usage) main.exit(1) end + local file = args[1] + + local ok, db = pcall(rrd.open_file, file) + if not ok then + print("Could not open: "..file) + print(db) + main.exit(1) + end + + local sources, default_source = {} + for _, source, typ, heartbeat, min, max in db:isources() do + sources[source] = {typ=typ, heartbeat=heartbeat, min=min, max=max} + default_source = default_source or source + end + + local function list_sources () + print("Available sources:") + for source, s in pairs(sources) do + print('', ("%s (type: %s, heartbeat: %d, min: %.2f, max: %.2f)") + :format(source, s.typ, s.heartbeat, + isnan(s.min) and -1/0 or s.min, + isnan(s.max) and 1/0 or s.max)) + end + end + + local intervals, default_interval = {}, 0 + for _, cf, _, window in db:iarchives() do + local interval = window * tonumber(db.fixed.seconds_per_pdp) + intervals[interval] = intervals[interval] or {} + table.insert(intervals[interval], cf) + default_interval = math.max(default_interval, interval) + end + + local function list_intervals () + print("Available intervals:") + for interval, cfs in pairs(intervals) do + print('', ("%d (%s)"):format(interval, table.concat(cfs, ' '))) + end + end + + if list_contents then + list_sources() + list_intervals() + print("Last updated:") + print('', os.date("%c", db:last_update())) + main.exit(0) + end + + if source and not sources[source] then + print("No such source: "..source) + list_sources() + main.exit(1) + end + source = source or default_source + + if interval and not intervals[interval] then + print("Interval not available: "..interval) + list_intervals() + main.exit(1) + end + interval = interval or default_interval + + -- Compile CDPs for source at intervals. + -- (Inluding unknown (NaN) CDPs.) + local ts = {} -- { t, t-1, t-2, ...} + local rows = {} -- { [t] = { [cf] = { val=x } } } + -- Return row matching source and interval from ref (if any is available.) + local function select_row (ref) + for name, src in pairs(ref) do + if name == source then + local row + for cf, values in pairs(src.cf) do + for _,x in ipairs(values) do + if x.interval == interval then + row = row or {} + row[cf] = {val=x.value} + end + end + end + return row + end + end + end + -- Collect rows. + for t = math.ceil(db:last_update()/60)*60, 0, -interval do + local row = select_row(db:ref(t)) + if row then + ts[#ts+1] = t + rows[t] = row + elseif t < db:last_update() then + -- No row and t is before last update: + -- looks like end of data. + break + end + end + -- Sort timestamps for data points chronologically. + table.sort(ts) + + -- Select any CDP in row. + local function any (row) + return row.max or row.average or row.last or row.min + end + + -- Compute minimum and maximum value in selected CDPs. + local minval, maxval + for _, row in pairs(rows) do + for cf, cdp in pairs(row) do + if not isnan(cdp.val) then + maxval = math.max(maxval or 0, cdp.val) + minval = math.min(minval or maxval, cdp.val) + end + end + end + + -- Compute width-relative value for each CDP. + local width = 20 + for _, row in pairs(rows) do + for cf, cdp in pairs(row) do + if not isnan(cdp.val) then + cdp.rel = math.ceil((cdp.val/maxval) * width) + end + end + end + + -- Format timestamp label every four rows. + local tl_delta = 3 + local tl_delta_ctr = 0 + local date + local function tl (out, t) + if tl_delta_ctr == 0 then + date = os.date("%c", t) + out:write(date) + tl_delta_ctr = tl_delta + else + out:write((" "):rep(#date)) + tl_delta_ctr = tl_delta_ctr - 1 + end + end + + -- Plot a width-relative distribution for row. + local function plot (out, row) + local fill = 0 + if isnan(any(row).val) then + -- Unknown data in row. + out:write(" ?") + else + -- Plot row. + out:write(" [") + if row.min then + out:write((" "):rep(math.max(0, row.min.rel-fill-1))) + out:write(("n"):rep(math.min(1, row.min.rel-fill))) + fill = row.min.rel + end + if row.average then + local bar = (row.min and row.max) and "-" or " " + out:write(bar:rep(math.max(0, row.average.rel-fill-1))) + out:write(("a"):rep(math.min(1, row.average.rel-fill))) + fill = row.average.rel + elseif row.last then + local bar = (row.min and row.max) and "-" or " " + out:write(bar:rep(math.max(0, row.last.rel-fill-1))) + out:write(("l"):rep(math.min(1, row.last.rel-fill))) + fill = row.last.rel + end + if row.max then + out:write(("-"):rep(math.max(0, row.max.rel-fill-1))) + out:write(("m"):rep(math.min(1, row.max.rel-fill))) + fill = row.max.rel + end + end + out:write((" "):rep(width-fill)) + end + + -- Pretty-print numeric value. + local function pp (val) + local function round (n) + -- round to nearest integer + return math.floor(n+.5) + end + if val == 0 then + return "-" + elseif val < 1e2 then + return ("%.2f"):format(val) + elseif val < 1e3 then + return tostring(round(val)) + elseif val < 1e6 then + return ("%dK"):format(round(val/1e3)) + else + return ("%dM"):format(round(val/1e6)) + end + end + + -- Format value summary for row. + local function vals (out, row) + if isnan(any(row).val) then + -- Unknown data in row, do not try to summarize. + return + end + if row.min then + out:write((" min:%s"):format(pp(row.min.val))) + end + if row.average then + out:write((" avg:%s"):format(pp(row.average.val))) + end + if row.last then + out:write((" lst:%s"):format(pp(row.last.val))) + end + if row.max then + out:write((" max:%s"):format(pp(row.max.val))) + end + end + + -- Snip after three consecutive zero or unknown data rows. + local snipz_after = 3 + local snipz_thr = snipz_after + local function snipz (val) + if val > 0 and not isnan(val) then + snipz_thr = snipz_after + else + if snipz_thr == 0 then return true + else snipz_thr = snipz_thr - 1 end + end + end + + -- Print and plot non-zero row clusters. + local snipped + for _, t in ipairs(ts) do + if not snipz(any(rows[t]).val) then + tl(io.stdout, t) + plot(io.stdout, rows[t]) + vals(io.stdout, rows[t]) + io.stdout:write("\n") + snipped = nil + else + if not snipped then + io.stdout:write("...\n") + snipped = true + tl_delta_ctr = 0 -- reset timestamp label interval + end + end + end + + -- fin + main.exit(0) +end + +-- NaN values indicate unknown data. +function isnan (x) return x ~= x end