From 5100c5db7fa79e409bb0bfae101d32e98e41f23f Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Tue, 25 Jul 2017 16:52:12 +0200 Subject: [PATCH 01/72] wip: MCRingBuffer for packets. --- src/apps/inter/mcp_ring.lua | 86 +++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 src/apps/inter/mcp_ring.lua diff --git a/src/apps/inter/mcp_ring.lua b/src/apps/inter/mcp_ring.lua new file mode 100644 index 0000000000..a3503dece0 --- /dev/null +++ b/src/apps/inter/mcp_ring.lua @@ -0,0 +1,86 @@ +-- Use of this source code is governed by the Apache 2.0 license; see COPYING. + +module(...,package.seeall) + +local shm = require("core.shm") +local ffi = require("ffi") +local band = require("bit").band + +function mcp_t (size) + local cacheline = 64 -- XXX - make dynamic + local int = ffi.sizeof("int") + return ffi.typeof([[struct { + char pad0[]]..cacheline..[[]; + int read, write; + char pad1[]]..cacheline-2*int..[[]; + int lwrite, nread, rbatch; + char pad2[]]..cacheline-3*int..[[]; + int lread, nwrite, wbatch; + char pad3[]]..cacheline-3*int..[[]; + int max, batch; + char pad4[]]..cacheline-2*int..[[]; + struct packet *packets[]]..size..[[]; + }]]) +end + +function create_mcp (size, batch, name) + assert(band(size, size-1) == 0, "size is not a power of two") + assert(batch <= size, "batch is greater than size") + local r = shm.create(name, mcp_t(size)) + r.max = size-1 + r.batch = batch + return r +end + +local function NEXT (r, i) + return band(i + 1, r.max) +end + +function mcp_insert (r, p) + local after_nwrite = NEXT(r, r.nwrite) + if after_nwrite == r.lread then + if after_nwrite == r.read then + return false + end + r.lread = r.read + end + r.packets[r.nwrite] = p + r.nwrite = after_nwrite + r.wbatch = r.wbatch + 1 + if r.wbatch >= r.batch then + r.write = r.nwrite + r.wbatch = 0 + end + return true +end + +function mcp_push (r) + if r.wbatch > 0 then + r.write = r.nwrite + r.wbatch = 0 + end +end + +function mcp_extract (r) + if r.nread == r.lwrite then + if r.nread == r.write then + return nil + end + r.lwrite = r.write + end + local p = r.packets[r.nread] + r.nread = NEXT(r, r.nread) + r.rbatch = r.rbatch + 1 + if r.rbatch > r.batch then + r.read = r.nread + r.rbatch = 0 + end + return p +end + +function mcp_pull (r) + if r.rbatch > 0 then + r.read = r.nread + r.rbatch = 0 + end +end From 3ddb3f317f4e8aa12de4cd41901f220a2d63c0e0 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Tue, 25 Jul 2017 18:22:53 +0200 Subject: [PATCH 02/72] manual batching --- src/apps/inter/mcp_ring.lua | 57 ++++++++++++++----------------------- 1 file changed, 22 insertions(+), 35 deletions(-) diff --git a/src/apps/inter/mcp_ring.lua b/src/apps/inter/mcp_ring.lua index a3503dece0..1791455c74 100644 --- a/src/apps/inter/mcp_ring.lua +++ b/src/apps/inter/mcp_ring.lua @@ -13,22 +13,20 @@ function mcp_t (size) char pad0[]]..cacheline..[[]; int read, write; char pad1[]]..cacheline-2*int..[[]; - int lwrite, nread, rbatch; - char pad2[]]..cacheline-3*int..[[]; - int lread, nwrite, wbatch; - char pad3[]]..cacheline-3*int..[[]; - int max, batch; - char pad4[]]..cacheline-2*int..[[]; + int lwrite, nread; + char pad2[]]..cacheline-2*int..[[]; + int lread, nwrite; + char pad3[]]..cacheline-2*int..[[]; + int max; + char pad4[]]..cacheline-1*int..[[]; struct packet *packets[]]..size..[[]; }]]) end -function create_mcp (size, batch, name) +function create (size, name) assert(band(size, size-1) == 0, "size is not a power of two") - assert(batch <= size, "batch is greater than size") local r = shm.create(name, mcp_t(size)) r.max = size-1 - r.batch = batch return r end @@ -36,51 +34,40 @@ local function NEXT (r, i) return band(i + 1, r.max) end -function mcp_insert (r, p) +function full (r) local after_nwrite = NEXT(r, r.nwrite) if after_nwrite == r.lread then if after_nwrite == r.read then - return false + return true end r.lread = r.read end +end + +function insert (r, p) r.packets[r.nwrite] = p - r.nwrite = after_nwrite - r.wbatch = r.wbatch + 1 - if r.wbatch >= r.batch then - r.write = r.nwrite - r.wbatch = 0 - end - return true + r.nwrite = NEXT(r, r.nwrite) end -function mcp_push (r) - if r.wbatch > 0 then - r.write = r.nwrite - r.wbatch = 0 - end +function push (r) + r.write = r.nwrite end -function mcp_extract (r) +function empty (r) if r.nread == r.lwrite then if r.nread == r.write then - return nil + return true end r.lwrite = r.write end +end + +function extract (r) local p = r.packets[r.nread] r.nread = NEXT(r, r.nread) - r.rbatch = r.rbatch + 1 - if r.rbatch > r.batch then - r.read = r.nread - r.rbatch = 0 - end return p end -function mcp_pull (r) - if r.rbatch > 0 then - r.read = r.nread - r.rbatch = 0 - end +function pull (r) + r.read = r.nread end From 86a6ee975a1ac6c08ce4b6659b0af6d56bfd30e6 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Tue, 25 Jul 2017 19:08:46 +0200 Subject: [PATCH 03/72] balance packets --- src/apps/inter/mcp_ring.lua | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/apps/inter/mcp_ring.lua b/src/apps/inter/mcp_ring.lua index 1791455c74..bafa33e1bc 100644 --- a/src/apps/inter/mcp_ring.lua +++ b/src/apps/inter/mcp_ring.lua @@ -26,10 +26,20 @@ end function create (size, name) assert(band(size, size-1) == 0, "size is not a power of two") local r = shm.create(name, mcp_t(size)) - r.max = size-1 + r.max = size - 1 + r.nwrite = r.max -- “full” until initlaized return r end +function init (r) -- initialization must be performed by consumer + assert(full(r) and empty(r)) -- only satisfied if uninitialized + repeat + r.packets[r.nwrite] = packet.allocate() + r.nwrite = r.nwrite - 1 + until r.nwrite == 0 + r.packets[r.nwrite] = packet.allocate() +end + local function NEXT (r, i) return band(i + 1, r.max) end @@ -45,6 +55,7 @@ function full (r) end function insert (r, p) + packet.free(r.packets[r.nwrite]) r.packets[r.nwrite] = p r.nwrite = NEXT(r, r.nwrite) end @@ -64,6 +75,7 @@ end function extract (r) local p = r.packets[r.nread] + r.packets[r.nread] = packet.allocate() r.nread = NEXT(r, r.nread) return p end From 293892b85220de7c9613913cb3027d414e00153c Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Tue, 25 Jul 2017 19:56:06 +0200 Subject: [PATCH 04/72] apps/test --- src/apps/inter/receiver.lua | 44 +++++++++++++++++++++++++++++ src/apps/inter/receiver_test.lua | 13 +++++++++ src/apps/inter/transmitter.lua | 42 +++++++++++++++++++++++++++ src/apps/inter/transmitter_test.lua | 13 +++++++++ 4 files changed, 112 insertions(+) create mode 100644 src/apps/inter/receiver.lua create mode 100644 src/apps/inter/receiver_test.lua create mode 100644 src/apps/inter/transmitter.lua create mode 100644 src/apps/inter/transmitter_test.lua diff --git a/src/apps/inter/receiver.lua b/src/apps/inter/receiver.lua new file mode 100644 index 0000000000..1fa2d7fe80 --- /dev/null +++ b/src/apps/inter/receiver.lua @@ -0,0 +1,44 @@ +-- Use of this source code is governed by the Apache 2.0 license; see COPYING. + +module(...,package.seeall) + +local shm = require("core.shm") +local ring = require("apps.inter.mcp_ring") + +Receiver = { + config = { + name = {required=true}, + create = {default=false}, + size = {default=link.max+1} + } +} + +function Receiver:new (conf) + local self = {} + if conf.create then + self.ring = ring.create(conf.size, conf.name) + self.destroy = conf.name + else + self.ring = shm.open(conf.name, ring.mcp_t(conf.size)) + end + ring.init(self.ring) + return setmetatable(self, {__index=Receiver}) +end + +function Receiver:pull () + local o, r, n = self.output.output, self.ring, 0 + while not ring.empty(r) and n < engine.pull_npackets do + link.transmit(o, ring.extract(r)) + n = n + 1 + end + ring.pull(r) +end + +function Receiver:stop () + shm.unmap(self.ring) + if self.destroy then + shm.unlink(self.destroy) + end +end + +return Receiver diff --git a/src/apps/inter/receiver_test.lua b/src/apps/inter/receiver_test.lua new file mode 100644 index 0000000000..4a05c93d06 --- /dev/null +++ b/src/apps/inter/receiver_test.lua @@ -0,0 +1,13 @@ +-- Use of this source code is governed by the Apache 2.0 license; see COPYING. + +local Receiver = require("apps.inter.receiver") +local Sink = require("apps.basic.basic_apps").Sink + +local c = config.new() + +config.app(c, "rx", Receiver, {name="/inter_test.mcp", create=true}) +config.app(c, "sink", Sink) +config.link(c, "rx.output->sink.input") + +engine.configure(c) +engine.main({duration=10, report={showlinks=true}}) diff --git a/src/apps/inter/transmitter.lua b/src/apps/inter/transmitter.lua new file mode 100644 index 0000000000..13522a2a19 --- /dev/null +++ b/src/apps/inter/transmitter.lua @@ -0,0 +1,42 @@ +-- Use of this source code is governed by the Apache 2.0 license; see COPYING. + +module(...,package.seeall) + +local shm = require("core.shm") +local ring = require("apps.inter.mcp_ring") + +Transmitter = { + config = { + name = {required=true}, + create = {default=false}, + size = {default=link.max+1} + } +} + +function Transmitter:new (conf) + local self = {} + if conf.create then + self.ring = ring.create(conf.size, conf.name) + self.destroy = conf.name + else + self.ring = shm.open(conf.name, ring.mcp_t(conf.size)) + end + return setmetatable(self, {__index=Transmitter}) +end + +function Transmitter:push () + local i, r = self.input.input, self.ring + while not (ring.full(r) or link.empty(i)) do + ring.insert(r, link.receive(i)) + end + ring.push(r) +end + +function Transmitter:stop () + shm.unmap(self.ring) + if self.destroy then + shm.unlink(self.destroy) + end +end + +return Transmitter diff --git a/src/apps/inter/transmitter_test.lua b/src/apps/inter/transmitter_test.lua new file mode 100644 index 0000000000..1619d60688 --- /dev/null +++ b/src/apps/inter/transmitter_test.lua @@ -0,0 +1,13 @@ +-- Use of this source code is governed by the Apache 2.0 license; see COPYING. + +local Transmitter = require("apps.inter.transmitter") +local Source = require("apps.basic.basic_apps").Source + +local c = config.new() + +config.app(c, "tx", Transmitter, {name="/inter_test.mcp"}) +config.app(c, "source", Source) +config.link(c, "source.output->tx.input") + +engine.configure(c) +engine.main({duration=10, report={showlinks=true}}) From 9cdd697edb7ce62d9e8088fc87aab17a02fe0613 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Tue, 25 Jul 2017 23:01:00 +0200 Subject: [PATCH 05/72] worker --- src/apps/inter/inter_test.lua | 26 ++++++++++++++++++++++++++ src/apps/inter/receiver_test.lua | 13 ------------- src/apps/inter/test_source.lua | 15 +++++++++++++++ src/apps/inter/transmitter_test.lua | 13 ------------- 4 files changed, 41 insertions(+), 26 deletions(-) create mode 100644 src/apps/inter/inter_test.lua delete mode 100644 src/apps/inter/receiver_test.lua create mode 100644 src/apps/inter/test_source.lua delete mode 100644 src/apps/inter/transmitter_test.lua diff --git a/src/apps/inter/inter_test.lua b/src/apps/inter/inter_test.lua new file mode 100644 index 0000000000..36e3bcd319 --- /dev/null +++ b/src/apps/inter/inter_test.lua @@ -0,0 +1,26 @@ +-- Use of this source code is governed by the Apache 2.0 license; see COPYING. + +local worker = require("core.worker") +local ring = require("apps.inter.mcp_ring") +local Receiver = require("apps.inter.receiver") +local Sink = require("apps.basic.basic_apps").Sink + +ring.create(link.max+1, "group/test.mcp") + +worker.start("source", + [[require("apps.inter.test_source").start("group/test.mcp")]]) + +local c = config.new() + +config.app(c, "rx", Receiver, {name="group/test.mcp"}) +config.app(c, "sink", Sink) +config.link(c, "rx.output->sink.input") + +engine.configure(c) +engine.main({duration=10, report={showlinks=true}}) +for w, s in pairs(worker.status()) do + print((" worker %s: pid=%s alive=%s"):format( + w, s.pid, s.alive)) +end +local stats = link.stats(engine.app_table["sink"].input.input) +print(stats.txpackets / 1e6 / 10 .. " Mpps") diff --git a/src/apps/inter/receiver_test.lua b/src/apps/inter/receiver_test.lua deleted file mode 100644 index 4a05c93d06..0000000000 --- a/src/apps/inter/receiver_test.lua +++ /dev/null @@ -1,13 +0,0 @@ --- Use of this source code is governed by the Apache 2.0 license; see COPYING. - -local Receiver = require("apps.inter.receiver") -local Sink = require("apps.basic.basic_apps").Sink - -local c = config.new() - -config.app(c, "rx", Receiver, {name="/inter_test.mcp", create=true}) -config.app(c, "sink", Sink) -config.link(c, "rx.output->sink.input") - -engine.configure(c) -engine.main({duration=10, report={showlinks=true}}) diff --git a/src/apps/inter/test_source.lua b/src/apps/inter/test_source.lua new file mode 100644 index 0000000000..4a3ebec35b --- /dev/null +++ b/src/apps/inter/test_source.lua @@ -0,0 +1,15 @@ +-- Use of this source code is governed by the Apache 2.0 license; see COPYING. + +module(...,package.seeall) + +local Transmitter = require("apps.inter.transmitter") +local Source = require("apps.basic.basic_apps").Source + +function start (ring_name) + local c = config.new() + config.app(c, "tx", Transmitter, {name=ring_name}) + config.app(c, "source", Source) + config.link(c, "source.output->tx.input") + engine.configure(c) + engine.main() +end diff --git a/src/apps/inter/transmitter_test.lua b/src/apps/inter/transmitter_test.lua deleted file mode 100644 index 1619d60688..0000000000 --- a/src/apps/inter/transmitter_test.lua +++ /dev/null @@ -1,13 +0,0 @@ --- Use of this source code is governed by the Apache 2.0 license; see COPYING. - -local Transmitter = require("apps.inter.transmitter") -local Source = require("apps.basic.basic_apps").Source - -local c = config.new() - -config.app(c, "tx", Transmitter, {name="/inter_test.mcp"}) -config.app(c, "source", Source) -config.link(c, "source.output->tx.input") - -engine.configure(c) -engine.main({duration=10, report={showlinks=true}}) From 322b61ccfa3653b1fa26df0bcc6b9dd5fd3ded86 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Tue, 25 Jul 2017 23:16:00 +0200 Subject: [PATCH 06/72] factor --- src/apps/inter/inter_test.lua | 5 ++-- src/apps/inter/mcp_ring.lua | 43 ++++++++++++++++++---------------- src/apps/inter/receiver.lua | 9 ++++--- src/apps/inter/transmitter.lua | 9 ++++--- 4 files changed, 34 insertions(+), 32 deletions(-) diff --git a/src/apps/inter/inter_test.lua b/src/apps/inter/inter_test.lua index 36e3bcd319..18a8019160 100644 --- a/src/apps/inter/inter_test.lua +++ b/src/apps/inter/inter_test.lua @@ -5,7 +5,7 @@ local ring = require("apps.inter.mcp_ring") local Receiver = require("apps.inter.receiver") local Sink = require("apps.basic.basic_apps").Sink -ring.create(link.max+1, "group/test.mcp") +ring.create("group/test.mcp") worker.start("source", [[require("apps.inter.test_source").start("group/test.mcp")]]) @@ -18,8 +18,9 @@ config.link(c, "rx.output->sink.input") engine.configure(c) engine.main({duration=10, report={showlinks=true}}) + for w, s in pairs(worker.status()) do - print((" worker %s: pid=%s alive=%s"):format( + print(("worker %s: pid=%s alive=%s"):format( w, s.pid, s.alive)) end local stats = link.stats(engine.app_table["sink"].input.input) diff --git a/src/apps/inter/mcp_ring.lua b/src/apps/inter/mcp_ring.lua index bafa33e1bc..ef5e9d26b1 100644 --- a/src/apps/inter/mcp_ring.lua +++ b/src/apps/inter/mcp_ring.lua @@ -2,31 +2,34 @@ module(...,package.seeall) +-- Based on MCRingBuffer, see +-- http://www.cse.cuhk.edu.hk/%7Epclee/www/pubs/ipdps10.pdf + local shm = require("core.shm") local ffi = require("ffi") local band = require("bit").band -function mcp_t (size) - local cacheline = 64 -- XXX - make dynamic - local int = ffi.sizeof("int") - return ffi.typeof([[struct { - char pad0[]]..cacheline..[[]; - int read, write; - char pad1[]]..cacheline-2*int..[[]; - int lwrite, nread; - char pad2[]]..cacheline-2*int..[[]; - int lread, nwrite; - char pad3[]]..cacheline-2*int..[[]; - int max; - char pad4[]]..cacheline-1*int..[[]; - struct packet *packets[]]..size..[[]; - }]]) -end +local SIZE = link.max + 1 +local CACHELINE = 64 -- XXX - make dynamic +local INT = ffi.sizeof("int") + +mcp_t = ffi.typeof([[struct { + char pad0[]]..CACHELINE..[[]; + int read, write; + char pad1[]]..CACHELINE-2*INT..[[]; + int lwrite, nread; + char pad2[]]..CACHELINE-2*INT..[[]; + int lread, nwrite; + char pad3[]]..CACHELINE-2*INT..[[]; + int max; + char pad4[]]..CACHELINE-1*INT..[[]; + struct packet *packets[]]..SIZE..[[]; +}]]) -function create (size, name) - assert(band(size, size-1) == 0, "size is not a power of two") - local r = shm.create(name, mcp_t(size)) - r.max = size - 1 +function create (name) + assert(band(SIZE, SIZE-1) == 0, "SIZE is not a power of two") + local r = shm.create(name, mcp_t) + r.max = SIZE - 1 r.nwrite = r.max -- “full” until initlaized return r end diff --git a/src/apps/inter/receiver.lua b/src/apps/inter/receiver.lua index 1fa2d7fe80..8604f8b958 100644 --- a/src/apps/inter/receiver.lua +++ b/src/apps/inter/receiver.lua @@ -5,21 +5,20 @@ module(...,package.seeall) local shm = require("core.shm") local ring = require("apps.inter.mcp_ring") -Receiver = { +local Receiver = { config = { name = {required=true}, - create = {default=false}, - size = {default=link.max+1} + create = {default=false} } } function Receiver:new (conf) local self = {} if conf.create then - self.ring = ring.create(conf.size, conf.name) + self.ring = ring.create(conf.name) self.destroy = conf.name else - self.ring = shm.open(conf.name, ring.mcp_t(conf.size)) + self.ring = shm.open(conf.name, ring.mcp_t) end ring.init(self.ring) return setmetatable(self, {__index=Receiver}) diff --git a/src/apps/inter/transmitter.lua b/src/apps/inter/transmitter.lua index 13522a2a19..9bf2ace57e 100644 --- a/src/apps/inter/transmitter.lua +++ b/src/apps/inter/transmitter.lua @@ -5,21 +5,20 @@ module(...,package.seeall) local shm = require("core.shm") local ring = require("apps.inter.mcp_ring") -Transmitter = { +local Transmitter = { config = { name = {required=true}, - create = {default=false}, - size = {default=link.max+1} + create = {default=false} } } function Transmitter:new (conf) local self = {} if conf.create then - self.ring = ring.create(conf.size, conf.name) + self.ring = ring.create(conf.name) self.destroy = conf.name else - self.ring = shm.open(conf.name, ring.mcp_t(conf.size)) + self.ring = shm.open(conf.name, ring.mcp_t) end return setmetatable(self, {__index=Transmitter}) end From 6306ed7d03f0795494ddf10ac55291c58db343db Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Wed, 26 Jul 2017 00:06:58 +0200 Subject: [PATCH 07/72] core.worker: include si_status in status --- src/apps/inter/inter_test.lua | 4 ++-- src/core/worker.lua | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/apps/inter/inter_test.lua b/src/apps/inter/inter_test.lua index 18a8019160..67c0f4cd82 100644 --- a/src/apps/inter/inter_test.lua +++ b/src/apps/inter/inter_test.lua @@ -20,8 +20,8 @@ engine.configure(c) engine.main({duration=10, report={showlinks=true}}) for w, s in pairs(worker.status()) do - print(("worker %s: pid=%s alive=%s"):format( - w, s.pid, s.alive)) + print(("worker %s: pid=%s alive=%s status=%s"):format( + w, s.pid, s.alive, s.status)) end local stats = link.stats(engine.app_table["sink"].input.input) print(stats.txpackets / 1e6 / 10 .. " Mpps") diff --git a/src/core/worker.lua b/src/core/worker.lua index 33dd9e3f41..3ebd81fbd5 100644 --- a/src/core/worker.lua +++ b/src/core/worker.lua @@ -63,7 +63,8 @@ function status () local infop = S.waitid("pid", info.pid, "nohang, exited") status[name] = { pid = info.pid, - alive = infop and infop.code == 0 or false + alive = infop and infop.code == 0 or false, + status = infop and infop.status } end return status From 4e538cf1da25a783de99eb33e180353d496d9d5e Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Wed, 26 Jul 2017 01:15:05 +0200 Subject: [PATCH 08/72] fix init race --- src/apps/inter/mcp_ring.lua | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/apps/inter/mcp_ring.lua b/src/apps/inter/mcp_ring.lua index ef5e9d26b1..780aa7730c 100644 --- a/src/apps/inter/mcp_ring.lua +++ b/src/apps/inter/mcp_ring.lua @@ -36,11 +36,10 @@ end function init (r) -- initialization must be performed by consumer assert(full(r) and empty(r)) -- only satisfied if uninitialized - repeat - r.packets[r.nwrite] = packet.allocate() - r.nwrite = r.nwrite - 1 - until r.nwrite == 0 - r.packets[r.nwrite] = packet.allocate() + for i = 0, r.max do + r.packets[i] = packet.allocate() + end + r.nwrite = 0 end local function NEXT (r, i) From 626c8b6e4b3cede9e2e4e13db5261685ac6336fc Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Wed, 26 Jul 2017 01:49:13 +0200 Subject: [PATCH 09/72] reorg --- src/apps/{inter => interlink}/receiver.lua | 18 +++++++++--------- .../inter_test.lua => interlink/test.lua} | 8 ++++---- src/apps/{inter => interlink}/test_source.lua | 6 +++--- src/apps/{inter => interlink}/transmitter.lua | 16 ++++++++-------- .../inter/mcp_ring.lua => lib/interlink.lua} | 4 ++-- 5 files changed, 26 insertions(+), 26 deletions(-) rename src/apps/{inter => interlink}/receiver.lua (58%) rename src/apps/{inter/inter_test.lua => interlink/test.lua} (75%) rename src/apps/{inter => interlink}/test_source.lua (69%) rename src/apps/{inter => interlink}/transmitter.lua (62%) rename src/{apps/inter/mcp_ring.lua => lib/interlink.lua} (95%) diff --git a/src/apps/inter/receiver.lua b/src/apps/interlink/receiver.lua similarity index 58% rename from src/apps/inter/receiver.lua rename to src/apps/interlink/receiver.lua index 8604f8b958..80721f8a01 100644 --- a/src/apps/inter/receiver.lua +++ b/src/apps/interlink/receiver.lua @@ -3,7 +3,7 @@ module(...,package.seeall) local shm = require("core.shm") -local ring = require("apps.inter.mcp_ring") +local interlink = require("lib.interlink") local Receiver = { config = { @@ -15,26 +15,26 @@ local Receiver = { function Receiver:new (conf) local self = {} if conf.create then - self.ring = ring.create(conf.name) + self.interlink = interlink.create(conf.name) self.destroy = conf.name else - self.ring = shm.open(conf.name, ring.mcp_t) + self.interlink = shm.open(conf.name, "struct interlink") end - ring.init(self.ring) + interlink.init(self.interlink) return setmetatable(self, {__index=Receiver}) end function Receiver:pull () - local o, r, n = self.output.output, self.ring, 0 - while not ring.empty(r) and n < engine.pull_npackets do - link.transmit(o, ring.extract(r)) + local o, r, n = self.output.output, self.interlink, 0 + while not interlink.empty(r) and n < engine.pull_npackets do + link.transmit(o, interlink.extract(r)) n = n + 1 end - ring.pull(r) + interlink.pull(r) end function Receiver:stop () - shm.unmap(self.ring) + shm.unmap(self.interlink) if self.destroy then shm.unlink(self.destroy) end diff --git a/src/apps/inter/inter_test.lua b/src/apps/interlink/test.lua similarity index 75% rename from src/apps/inter/inter_test.lua rename to src/apps/interlink/test.lua index 67c0f4cd82..c6beae57c6 100644 --- a/src/apps/inter/inter_test.lua +++ b/src/apps/interlink/test.lua @@ -1,14 +1,14 @@ -- Use of this source code is governed by the Apache 2.0 license; see COPYING. local worker = require("core.worker") -local ring = require("apps.inter.mcp_ring") -local Receiver = require("apps.inter.receiver") +local interlink = require("lib.interlink") +local Receiver = require("apps.interlink.receiver") local Sink = require("apps.basic.basic_apps").Sink -ring.create("group/test.mcp") +interlink.create("group/test.mcp") worker.start("source", - [[require("apps.inter.test_source").start("group/test.mcp")]]) + [[require("apps.interlink.test_source").start("group/test.mcp")]]) local c = config.new() diff --git a/src/apps/inter/test_source.lua b/src/apps/interlink/test_source.lua similarity index 69% rename from src/apps/inter/test_source.lua rename to src/apps/interlink/test_source.lua index 4a3ebec35b..86a057c6d3 100644 --- a/src/apps/inter/test_source.lua +++ b/src/apps/interlink/test_source.lua @@ -2,12 +2,12 @@ module(...,package.seeall) -local Transmitter = require("apps.inter.transmitter") +local Transmitter = require("apps.interlink.transmitter") local Source = require("apps.basic.basic_apps").Source -function start (ring_name) +function start (link_name) local c = config.new() - config.app(c, "tx", Transmitter, {name=ring_name}) + config.app(c, "tx", Transmitter, {name=link_name}) config.app(c, "source", Source) config.link(c, "source.output->tx.input") engine.configure(c) diff --git a/src/apps/inter/transmitter.lua b/src/apps/interlink/transmitter.lua similarity index 62% rename from src/apps/inter/transmitter.lua rename to src/apps/interlink/transmitter.lua index 9bf2ace57e..881a08a348 100644 --- a/src/apps/inter/transmitter.lua +++ b/src/apps/interlink/transmitter.lua @@ -3,7 +3,7 @@ module(...,package.seeall) local shm = require("core.shm") -local ring = require("apps.inter.mcp_ring") +local interlink = require("lib.interlink") local Transmitter = { config = { @@ -15,24 +15,24 @@ local Transmitter = { function Transmitter:new (conf) local self = {} if conf.create then - self.ring = ring.create(conf.name) + self.interlink = interlink.create(conf.name) self.destroy = conf.name else - self.ring = shm.open(conf.name, ring.mcp_t) + self.interlink = shm.open(conf.name, "struct interlink") end return setmetatable(self, {__index=Transmitter}) end function Transmitter:push () - local i, r = self.input.input, self.ring - while not (ring.full(r) or link.empty(i)) do - ring.insert(r, link.receive(i)) + local i, r = self.input.input, self.interlink + while not (interlink.full(r) or link.empty(i)) do + interlink.insert(r, link.receive(i)) end - ring.push(r) + interlink.push(r) end function Transmitter:stop () - shm.unmap(self.ring) + shm.unmap(self.interlink) if self.destroy then shm.unlink(self.destroy) end diff --git a/src/apps/inter/mcp_ring.lua b/src/lib/interlink.lua similarity index 95% rename from src/apps/inter/mcp_ring.lua rename to src/lib/interlink.lua index 780aa7730c..baa317e9d9 100644 --- a/src/apps/inter/mcp_ring.lua +++ b/src/lib/interlink.lua @@ -13,7 +13,7 @@ local SIZE = link.max + 1 local CACHELINE = 64 -- XXX - make dynamic local INT = ffi.sizeof("int") -mcp_t = ffi.typeof([[struct { +ffi.cdef([[ struct interlink { char pad0[]]..CACHELINE..[[]; int read, write; char pad1[]]..CACHELINE-2*INT..[[]; @@ -28,7 +28,7 @@ mcp_t = ffi.typeof([[struct { function create (name) assert(band(SIZE, SIZE-1) == 0, "SIZE is not a power of two") - local r = shm.create(name, mcp_t) + local r = shm.create(name, "struct interlink") r.max = SIZE - 1 r.nwrite = r.max -- “full” until initlaized return r From ac886badfea790f2b2c80ff68f6e8460ce73889d Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Wed, 26 Jul 2017 02:01:55 +0200 Subject: [PATCH 10/72] global max --- src/lib/interlink.lua | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/lib/interlink.lua b/src/lib/interlink.lua index baa317e9d9..d239b14428 100644 --- a/src/lib/interlink.lua +++ b/src/lib/interlink.lua @@ -13,6 +13,8 @@ local SIZE = link.max + 1 local CACHELINE = 64 -- XXX - make dynamic local INT = ffi.sizeof("int") +assert(band(SIZE, SIZE-1) == 0, "SIZE is not a power of two") + ffi.cdef([[ struct interlink { char pad0[]]..CACHELINE..[[]; int read, write; @@ -21,29 +23,25 @@ ffi.cdef([[ struct interlink { char pad2[]]..CACHELINE-2*INT..[[]; int lread, nwrite; char pad3[]]..CACHELINE-2*INT..[[]; - int max; - char pad4[]]..CACHELINE-1*INT..[[]; struct packet *packets[]]..SIZE..[[]; }]]) function create (name) - assert(band(SIZE, SIZE-1) == 0, "SIZE is not a power of two") local r = shm.create(name, "struct interlink") - r.max = SIZE - 1 - r.nwrite = r.max -- “full” until initlaized + r.nwrite = link.max -- “full” until initlaized return r end function init (r) -- initialization must be performed by consumer assert(full(r) and empty(r)) -- only satisfied if uninitialized - for i = 0, r.max do + for i = 0, link.max do r.packets[i] = packet.allocate() end r.nwrite = 0 end local function NEXT (r, i) - return band(i + 1, r.max) + return band(i + 1, link.max) end function full (r) From 06053f695c3ebe2bc7a8b9228b01fb5887aeb9ed Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Tue, 1 Aug 2017 15:11:24 +0200 Subject: [PATCH 11/72] lib.interlink: fix initialization race. --- src/lib/interlink.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/interlink.lua b/src/lib/interlink.lua index d239b14428..cd2e12412c 100644 --- a/src/lib/interlink.lua +++ b/src/lib/interlink.lua @@ -33,7 +33,7 @@ function create (name) end function init (r) -- initialization must be performed by consumer - assert(full(r) and empty(r)) -- only satisfied if uninitialized + assert(r.packets[0] == ffi.new("void *")) -- only satisfied if uninitialized for i = 0, link.max do r.packets[i] = packet.allocate() end From 2e32f8387dc8f5bab281aeef43ee5609124adc94 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Wed, 16 Aug 2017 13:54:41 +0200 Subject: [PATCH 12/72] core.worker: include si_status in status # Conflicts: # src/apps/inter/inter_test.lua --- src/core/worker.lua | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/core/worker.lua b/src/core/worker.lua index 33dd9e3f41..3ebd81fbd5 100644 --- a/src/core/worker.lua +++ b/src/core/worker.lua @@ -63,7 +63,8 @@ function status () local infop = S.waitid("pid", info.pid, "nohang, exited") status[name] = { pid = info.pid, - alive = infop and infop.code == 0 or false + alive = infop and infop.code == 0 or false, + status = infop and infop.status } end return status From 47d87924e46302d262b4a171cb780b572a85d399 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Tue, 25 Jul 2017 16:52:12 +0200 Subject: [PATCH 13/72] lib.interlink: specialized inter-process links --- src/apps/interlink/receiver.lua | 43 +++++++++++++++ src/apps/interlink/test.lua | 27 ++++++++++ src/apps/interlink/test_source.lua | 15 ++++++ src/apps/interlink/transmitter.lua | 41 ++++++++++++++ src/lib/interlink.lua | 85 ++++++++++++++++++++++++++++++ 5 files changed, 211 insertions(+) create mode 100644 src/apps/interlink/receiver.lua create mode 100644 src/apps/interlink/test.lua create mode 100644 src/apps/interlink/test_source.lua create mode 100644 src/apps/interlink/transmitter.lua create mode 100644 src/lib/interlink.lua diff --git a/src/apps/interlink/receiver.lua b/src/apps/interlink/receiver.lua new file mode 100644 index 0000000000..80721f8a01 --- /dev/null +++ b/src/apps/interlink/receiver.lua @@ -0,0 +1,43 @@ +-- Use of this source code is governed by the Apache 2.0 license; see COPYING. + +module(...,package.seeall) + +local shm = require("core.shm") +local interlink = require("lib.interlink") + +local Receiver = { + config = { + name = {required=true}, + create = {default=false} + } +} + +function Receiver:new (conf) + local self = {} + if conf.create then + self.interlink = interlink.create(conf.name) + self.destroy = conf.name + else + self.interlink = shm.open(conf.name, "struct interlink") + end + interlink.init(self.interlink) + return setmetatable(self, {__index=Receiver}) +end + +function Receiver:pull () + local o, r, n = self.output.output, self.interlink, 0 + while not interlink.empty(r) and n < engine.pull_npackets do + link.transmit(o, interlink.extract(r)) + n = n + 1 + end + interlink.pull(r) +end + +function Receiver:stop () + shm.unmap(self.interlink) + if self.destroy then + shm.unlink(self.destroy) + end +end + +return Receiver diff --git a/src/apps/interlink/test.lua b/src/apps/interlink/test.lua new file mode 100644 index 0000000000..c6beae57c6 --- /dev/null +++ b/src/apps/interlink/test.lua @@ -0,0 +1,27 @@ +-- Use of this source code is governed by the Apache 2.0 license; see COPYING. + +local worker = require("core.worker") +local interlink = require("lib.interlink") +local Receiver = require("apps.interlink.receiver") +local Sink = require("apps.basic.basic_apps").Sink + +interlink.create("group/test.mcp") + +worker.start("source", + [[require("apps.interlink.test_source").start("group/test.mcp")]]) + +local c = config.new() + +config.app(c, "rx", Receiver, {name="group/test.mcp"}) +config.app(c, "sink", Sink) +config.link(c, "rx.output->sink.input") + +engine.configure(c) +engine.main({duration=10, report={showlinks=true}}) + +for w, s in pairs(worker.status()) do + print(("worker %s: pid=%s alive=%s status=%s"):format( + w, s.pid, s.alive, s.status)) +end +local stats = link.stats(engine.app_table["sink"].input.input) +print(stats.txpackets / 1e6 / 10 .. " Mpps") diff --git a/src/apps/interlink/test_source.lua b/src/apps/interlink/test_source.lua new file mode 100644 index 0000000000..86a057c6d3 --- /dev/null +++ b/src/apps/interlink/test_source.lua @@ -0,0 +1,15 @@ +-- Use of this source code is governed by the Apache 2.0 license; see COPYING. + +module(...,package.seeall) + +local Transmitter = require("apps.interlink.transmitter") +local Source = require("apps.basic.basic_apps").Source + +function start (link_name) + local c = config.new() + config.app(c, "tx", Transmitter, {name=link_name}) + config.app(c, "source", Source) + config.link(c, "source.output->tx.input") + engine.configure(c) + engine.main() +end diff --git a/src/apps/interlink/transmitter.lua b/src/apps/interlink/transmitter.lua new file mode 100644 index 0000000000..881a08a348 --- /dev/null +++ b/src/apps/interlink/transmitter.lua @@ -0,0 +1,41 @@ +-- Use of this source code is governed by the Apache 2.0 license; see COPYING. + +module(...,package.seeall) + +local shm = require("core.shm") +local interlink = require("lib.interlink") + +local Transmitter = { + config = { + name = {required=true}, + create = {default=false} + } +} + +function Transmitter:new (conf) + local self = {} + if conf.create then + self.interlink = interlink.create(conf.name) + self.destroy = conf.name + else + self.interlink = shm.open(conf.name, "struct interlink") + end + return setmetatable(self, {__index=Transmitter}) +end + +function Transmitter:push () + local i, r = self.input.input, self.interlink + while not (interlink.full(r) or link.empty(i)) do + interlink.insert(r, link.receive(i)) + end + interlink.push(r) +end + +function Transmitter:stop () + shm.unmap(self.interlink) + if self.destroy then + shm.unlink(self.destroy) + end +end + +return Transmitter diff --git a/src/lib/interlink.lua b/src/lib/interlink.lua new file mode 100644 index 0000000000..cd2e12412c --- /dev/null +++ b/src/lib/interlink.lua @@ -0,0 +1,85 @@ +-- Use of this source code is governed by the Apache 2.0 license; see COPYING. + +module(...,package.seeall) + +-- Based on MCRingBuffer, see +-- http://www.cse.cuhk.edu.hk/%7Epclee/www/pubs/ipdps10.pdf + +local shm = require("core.shm") +local ffi = require("ffi") +local band = require("bit").band + +local SIZE = link.max + 1 +local CACHELINE = 64 -- XXX - make dynamic +local INT = ffi.sizeof("int") + +assert(band(SIZE, SIZE-1) == 0, "SIZE is not a power of two") + +ffi.cdef([[ struct interlink { + char pad0[]]..CACHELINE..[[]; + int read, write; + char pad1[]]..CACHELINE-2*INT..[[]; + int lwrite, nread; + char pad2[]]..CACHELINE-2*INT..[[]; + int lread, nwrite; + char pad3[]]..CACHELINE-2*INT..[[]; + struct packet *packets[]]..SIZE..[[]; +}]]) + +function create (name) + local r = shm.create(name, "struct interlink") + r.nwrite = link.max -- “full” until initlaized + return r +end + +function init (r) -- initialization must be performed by consumer + assert(r.packets[0] == ffi.new("void *")) -- only satisfied if uninitialized + for i = 0, link.max do + r.packets[i] = packet.allocate() + end + r.nwrite = 0 +end + +local function NEXT (r, i) + return band(i + 1, link.max) +end + +function full (r) + local after_nwrite = NEXT(r, r.nwrite) + if after_nwrite == r.lread then + if after_nwrite == r.read then + return true + end + r.lread = r.read + end +end + +function insert (r, p) + packet.free(r.packets[r.nwrite]) + r.packets[r.nwrite] = p + r.nwrite = NEXT(r, r.nwrite) +end + +function push (r) + r.write = r.nwrite +end + +function empty (r) + if r.nread == r.lwrite then + if r.nread == r.write then + return true + end + r.lwrite = r.write + end +end + +function extract (r) + local p = r.packets[r.nread] + r.packets[r.nread] = packet.allocate() + r.nread = NEXT(r, r.nread) + return p +end + +function pull (r) + r.read = r.nread +end From 7318cc0a9624b36c5a74bfc1cc0701aafa32293b Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Fri, 18 Aug 2017 00:48:27 +0200 Subject: [PATCH 14/72] apps.interlink: name apps --- src/apps/interlink/receiver.lua | 1 + src/apps/interlink/transmitter.lua | 1 + 2 files changed, 2 insertions(+) diff --git a/src/apps/interlink/receiver.lua b/src/apps/interlink/receiver.lua index 80721f8a01..d93efd538c 100644 --- a/src/apps/interlink/receiver.lua +++ b/src/apps/interlink/receiver.lua @@ -6,6 +6,7 @@ local shm = require("core.shm") local interlink = require("lib.interlink") local Receiver = { + name = "apps.interlink.Receiver", config = { name = {required=true}, create = {default=false} diff --git a/src/apps/interlink/transmitter.lua b/src/apps/interlink/transmitter.lua index 881a08a348..63d6b5109b 100644 --- a/src/apps/interlink/transmitter.lua +++ b/src/apps/interlink/transmitter.lua @@ -6,6 +6,7 @@ local shm = require("core.shm") local interlink = require("lib.interlink") local Transmitter = { + name = "apps.interlink.Transmitter", config = { name = {required=true}, create = {default=false} From 8dff93bfda1f9be7046db2a4a8400b71eeeb0c0b Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Mon, 21 Aug 2017 19:51:47 +0200 Subject: [PATCH 15/72] apps.interlink: use robust setup/teardown routine, add sensible memory barriers --- src/apps/interlink/receiver.lua | 7 +++-- src/apps/interlink/test.lua | 4 +++ src/apps/interlink/transmitter.lua | 6 ++-- src/lib/interlink.lua | 48 ++++++++++++++++++++++-------- 4 files changed, 47 insertions(+), 18 deletions(-) diff --git a/src/apps/interlink/receiver.lua b/src/apps/interlink/receiver.lua index d93efd538c..a9d7273040 100644 --- a/src/apps/interlink/receiver.lua +++ b/src/apps/interlink/receiver.lua @@ -19,9 +19,8 @@ function Receiver:new (conf) self.interlink = interlink.create(conf.name) self.destroy = conf.name else - self.interlink = shm.open(conf.name, "struct interlink") + self.interlink = interlink.open(conf.name) end - interlink.init(self.interlink) return setmetatable(self, {__index=Receiver}) end @@ -35,9 +34,11 @@ function Receiver:pull () end function Receiver:stop () - shm.unmap(self.interlink) if self.destroy then + interlink.free(self.interlink) shm.unlink(self.destroy) + else + shm.unmap(self.interlink) end end diff --git a/src/apps/interlink/test.lua b/src/apps/interlink/test.lua index c6beae57c6..0d19ae16e1 100644 --- a/src/apps/interlink/test.lua +++ b/src/apps/interlink/test.lua @@ -25,3 +25,7 @@ for w, s in pairs(worker.status()) do end local stats = link.stats(engine.app_table["sink"].input.input) print(stats.txpackets / 1e6 / 10 .. " Mpps") + +-- test teardown +interlink.free(interlink.open("group/test.mcp")) +engine.main({duration=1}) diff --git a/src/apps/interlink/transmitter.lua b/src/apps/interlink/transmitter.lua index 63d6b5109b..838096e3b6 100644 --- a/src/apps/interlink/transmitter.lua +++ b/src/apps/interlink/transmitter.lua @@ -19,7 +19,7 @@ function Transmitter:new (conf) self.interlink = interlink.create(conf.name) self.destroy = conf.name else - self.interlink = shm.open(conf.name, "struct interlink") + self.interlink = interlink.open(conf.name) end return setmetatable(self, {__index=Transmitter}) end @@ -33,9 +33,11 @@ function Transmitter:push () end function Transmitter:stop () - shm.unmap(self.interlink) if self.destroy then + interlink.free(self.interlink) shm.unlink(self.destroy) + else + shm.unmap(self.interlink) end end diff --git a/src/lib/interlink.lua b/src/lib/interlink.lua index cd2e12412c..a58c0422d8 100644 --- a/src/lib/interlink.lua +++ b/src/lib/interlink.lua @@ -8,6 +8,8 @@ module(...,package.seeall) local shm = require("core.shm") local ffi = require("ffi") local band = require("bit").band +local waitfor = require("core.lib").waitfor +local full_memory_barrier = ffi.C.full_memory_barrier local SIZE = link.max + 1 local CACHELINE = 64 -- XXX - make dynamic @@ -15,10 +17,12 @@ local INT = ffi.sizeof("int") assert(band(SIZE, SIZE-1) == 0, "SIZE is not a power of two") +local status = { Locked = 0, Unlocked = 1 } + ffi.cdef([[ struct interlink { char pad0[]]..CACHELINE..[[]; - int read, write; - char pad1[]]..CACHELINE-2*INT..[[]; + int read, write, lock; + char pad1[]]..CACHELINE-3*INT..[[]; int lwrite, nread; char pad2[]]..CACHELINE-2*INT..[[]; int lread, nwrite; @@ -28,26 +32,42 @@ ffi.cdef([[ struct interlink { function create (name) local r = shm.create(name, "struct interlink") - r.nwrite = link.max -- “full” until initlaized + for i = 0, link.max do + r.packets[i] = packet.allocate() + end + full_memory_barrier() + r.lock = status.Unlocked return r end -function init (r) -- initialization must be performed by consumer - assert(r.packets[0] == ffi.new("void *")) -- only satisfied if uninitialized +function free (r) + r.lock = status.Locked + full_memory_barrier() + local function ring_consistent () + return r.write == r.nwrite and r.read == r.nread + end + waitfor(ring_consistent) for i = 0, link.max do - r.packets[i] = packet.allocate() + packet.free(r.packets[i]) end - r.nwrite = 0 + shm.unmap(r) +end + +function open (name) + local r = shm.open(name, "struct interlink") + waitfor(function () return r.lock == status.Unlocked end) + full_memory_barrier() + return r end -local function NEXT (r, i) +local function NEXT (i) return band(i + 1, link.max) end function full (r) - local after_nwrite = NEXT(r, r.nwrite) + local after_nwrite = NEXT(r.nwrite) if after_nwrite == r.lread then - if after_nwrite == r.read then + if after_nwrite == r.read or r.lock == status.Locked then return true end r.lread = r.read @@ -57,16 +77,17 @@ end function insert (r, p) packet.free(r.packets[r.nwrite]) r.packets[r.nwrite] = p - r.nwrite = NEXT(r, r.nwrite) + r.nwrite = NEXT(r.nwrite) end function push (r) + full_memory_barrier() r.write = r.nwrite end function empty (r) if r.nread == r.lwrite then - if r.nread == r.write then + if r.nread == r.write or r.lock == status.Locked then return true end r.lwrite = r.write @@ -76,10 +97,11 @@ end function extract (r) local p = r.packets[r.nread] r.packets[r.nread] = packet.allocate() - r.nread = NEXT(r, r.nread) + r.nread = NEXT(r.nread) return p end function pull (r) + full_memory_barrier() r.read = r.nread end From 4486728f43d59709ac2cca3e2e99fbab04a450aa Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Mon, 21 Aug 2017 17:36:48 +0200 Subject: [PATCH 16/72] =?UTF-8?q?apps.interlink.receiver:=20don=E2=80=99t?= =?UTF-8?q?=20forward=20packets=20until=20connected?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/apps/interlink/receiver.lua | 1 + 1 file changed, 1 insertion(+) diff --git a/src/apps/interlink/receiver.lua b/src/apps/interlink/receiver.lua index a9d7273040..a019218172 100644 --- a/src/apps/interlink/receiver.lua +++ b/src/apps/interlink/receiver.lua @@ -26,6 +26,7 @@ end function Receiver:pull () local o, r, n = self.output.output, self.interlink, 0 + if not o then return end -- don’t forward packets until connected while not interlink.empty(r) and n < engine.pull_npackets do link.transmit(o, interlink.extract(r)) n = n + 1 From db61756ecbb1b0715df92191044852f52e6cd3bd Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Mon, 30 Oct 2017 15:37:13 +0100 Subject: [PATCH 17/72] extract lib.print_object from lib.store_config --- src/core/lib.lua | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/core/lib.lua b/src/core/lib.lua index b7d75678d3..248d925c05 100644 --- a/src/core/lib.lua +++ b/src/core/lib.lua @@ -126,7 +126,8 @@ function load_conf (file) end -- Store Lua representation of value in file. -function store_conf (file, value) +function print_object (value, stream) + stream = stream or io.stdout local indent = 0 local function print_indent (stream) for i = 1, indent do stream:write(" ") end @@ -159,10 +160,13 @@ function store_conf (file, value) stream:write(("%s"):format(value)) end end - local stream = assert(io.open(file, "w")) - stream:write("return ") print_value(value, stream) stream:write("\n") +end +function store_conf (file, value) + local stream = assert(io.open(file, "w")) + stream:write("return ") + print_object(value, stream) stream:close() end From a60a42b1a7b5f164650fda71f1251887e3c9815d Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Mon, 30 Oct 2017 15:37:35 +0100 Subject: [PATCH 18/72] lib.xsd_regexp: port MaxPC to lib.maxpc, implement regular expression parser --- src/lib/maxpc.lua | 375 +++++++++++++++++++++++++++++++ src/lib/yang/xsd_types_regex.lua | 262 +++++++++++++++++++++ 2 files changed, 637 insertions(+) create mode 100644 src/lib/maxpc.lua create mode 100644 src/lib/yang/xsd_types_regex.lua diff --git a/src/lib/maxpc.lua b/src/lib/maxpc.lua new file mode 100644 index 0000000000..46a7eb2676 --- /dev/null +++ b/src/lib/maxpc.lua @@ -0,0 +1,375 @@ +-- Use of this source code is governed by the Apache 2.0 license; see COPYING. + +-- Max’s parser combinators (for Lua) +module(..., package.seeall) + + +-- interface + +-- use like this: +-- local match, capture, combine = require("lib.maxpc").import() +function import () + local l_match, l_capture, l_combine = {}, {}, {} + for key, value in pairs(match) do + l_match[key] = value + end + for key, value in pairs(capture) do + l_capture[key] = value + end + for key, value in pairs(combine) do + l_combine[key] = value + end + return l_match, l_capture, l_combine +end + +-- parse(str, parser) => result_value, was_successful, has_reached_eof +function parse (str, parser) + local rest, value = parser(input.new(str)) + return value, rest and true, #str == 0 or (rest and input.empty(rest)) +end + + +-- input protocol + +input = {} + +function input.new (str) + return { idx = 1, str = str } +end + +function input.empty (s) + return s.idx > #s.str +end + +function input.first (s, n) + return s.str:sub(s.idx, s.idx + (n or 1) - 1) +end + +function input.rest (s) + return { idx = s.idx + 1, str = s.str } +end + +function input.position (s) + return s.idx +end + + +-- primitives + +capture, match, combine = {}, {}, {} + +function match.eof () + return function (s) + if input.empty(s) then + return s + end + end +end + +function capture.element () + return function (s) + if not input.empty(s) then + return input.rest(s), input.first(s), true + end + end +end + +function match.fail (handler) + return function (s) + if handler then + handler(input.position(s)) + end + end +end + +function match.satisfies (test, parser) + parser = parser or capture.element() + return function (s) + local rest, value = parser(s) + if rest and test(value) then + return rest + end + end +end + +function capture.subseq (parser) + return function (s) + local rest = parser(s) + if rest then + local diff = input.position(rest) - input.position(s) + return rest, input.first(s, diff), true + end + end +end + +function match.seq (...) + local parsers = {...} + return function (s) + for _, parser in ipairs(parsers) do + s = parser(s) + if not s then + return + end + end + return s + end +end + +function capture.seq (...) + local parsers = {...} + return function (s) + local seq = {} + for _, parser in ipairs(parsers) do + local rest, value = parser(s) + if rest then + table.insert(seq, value or false) + s = rest + else + return + end + end + return s, seq, true + end +end + +function combine.any (parser) + return function (s) + local seq = {} + while true do + local rest, value, present = parser(s) + if rest then + s = rest + else + local value + if #seq > 0 then + value = seq + end + return s, value, value ~= nil + end + if present then + table.insert(seq, value or false) + end + end + end +end + +function combine._or (...) + local parsers = {...} + return function (s) + for _, parser in ipairs(parsers) do + local rest, value, present = parser(s) + if rest then + return rest, value, present + end + end + end +end + +function combine._and (...) + local parsers = {...} + return function (s) + local rest, value, present + for _, parser in ipairs(parsers) do + rest, value, present = parser(s) + if not rest then + return + end + end + return rest, value, present + end +end + +function combine.diff (parser, ...) + local punion = combine._or(...) + return function (s) + if not punion(s) then + return parser(s) + end + end +end + +function capture.transform (parser, transform) + return function (s) + local rest, value = parser(s) + if rest then + return rest, transform(value), true + end + end +end + + +-- built-in combinators + +function combine.maybe (parser) + return combine._or(parser, match.seq()) +end + +function match._not (parser) + local function constantly_nil () + return nil + end + return combine.diff( + capture.transform(capture.element(), constantly_nil), + parser + ) +end + +function combine.some (parser) + return combine._and(parser, combine.any(parser)) +end + +function match.equal (x, parser) + local function is_equal_to_x (y) + return x == y + end + return match.satisfies(is_equal_to_x, parser) +end + +function capture.unpack (parser, f) + local function destructure (seq) + return f(unpack(seq)) + end + return capture.transform(parser, destructure) +end + + +-- tests + +function selftest () + local lib = require("core.lib") + + -- match.eof + local result, matched, eof = parse("", match.eof()) + assert(not result) assert(matched) assert(eof) + local result, matched, eof = parse("f", match.eof()) + assert(not result) assert(not matched) assert(not eof) + + -- match.fail + local result, matched, eof = parse("f", match.fail()) + assert(not result) assert(not matched) assert(not eof) + local result, matched, eof = parse("f", combine.maybe(match.fail())) + assert(not result) assert(matched) assert(not eof) + local success, err = pcall(parse, "", match.fail( + function (pos) + error(pos .. ": fail") + end + )) + assert(not success) assert(err:find("1: fail", 1, true)) + + -- capture.element + local result, matched, eof = parse("foo", capture.element()) + assert(result == "f") assert(matched) assert(not eof) + local result, matched, eof = parse("", capture.element()) + assert(not result) assert(not matched) assert(eof) + + -- match.satisfied + local function is_digit (x) + return ("01234567890"):find(x, 1, true) + end + local result, matched, eof = + parse("123", capture.subseq(match.satisfies(is_digit))) + assert(result == "1") assert(matched) assert(not eof) + local result, matched, eof = parse("foo", match.satisfies(is_digit)) + assert(not result) assert(not matched) assert(not eof) + + -- match.seq + local result, matched, eof = parse("fo", match.seq(capture.element(), + capture.element(), + match.eof())) + assert(not result) assert(matched) assert(eof) + local result, matched, eof = parse("foo", match.seq(capture.element(), + capture.element(), + match.eof())) + assert(not result) assert(not matched) assert(not eof) + local result, matched, eof = + parse("fo", match.seq(match.seq(match.equal("f"), capture.element()), + match.eof())) + assert(not result) assert(matched) assert(eof) + local result, matched, eof = parse("", match.seq()) + assert(not result) assert(matched) assert(eof) + + -- capture.seq + local result, matched, eof = parse("fo", capture.seq(capture.element(), + capture.element(), + match.eof())) + assert(lib.equal(result, {"f", "o", false})) assert(matched) assert(eof) + local result, matched, eof = parse("foo", capture.seq(capture.element(), + capture.element(), + match.eof())) + assert(not result) assert(not matched) assert(not eof) + local result, matched, eof = + parse("fo", capture.seq(match.seq(match.equal("f"), capture.element()), + match.eof())) + assert(result) assert(matched) assert(eof) + local result, matched, eof = parse("", capture.seq()) + assert(result) assert(matched) assert(eof) + + -- combine.any + local result, matched, eof = parse("", combine.any(capture.element())) + assert(not result) assert(matched) assert(eof) + local result, matched, eof = + parse("123foo", capture.subseq(combine.any(match.satisfies(is_digit)))) + assert(result == "123") assert(matched) assert(not eof) + local result, matched, eof = parse("", combine.some(capture.element())) + assert(not result) assert(not matched) assert(eof) + local result, matched, eof = + parse("foo", capture.seq(combine.some(capture.element()), match.eof())) + assert(lib.equal(result, {{"f","o","o"},false})) assert(matched) assert(eof) + + -- combine._or + local fo = combine._or(match.equal("f"), match.equal("o")) + local result, matched, eof = parse("fo", capture.seq(fo, fo, match.eof())) + assert(result) assert(matched) assert(eof) + local result, matched, eof = parse("x", fo) + assert(not result) assert(not matched) assert(not eof) + local result, matched, eof = parse("", fo) + assert(not result) assert(not matched) assert(eof) + + -- combine._and + local function is_alphanumeric (x) + return ("01234567890abcdefghijklmnopqrstuvwxyz"):find(x, 1, true) + end + local d = combine._and(match.satisfies(is_alphanumeric), + match.satisfies(is_digit)) + local result, matched, eof = parse("12", capture.seq(d, d, match.eof())) + assert(result) assert(matched) assert(eof) + local result, matched, eof = parse("f", capture.seq(d, match.eof())) + assert(not result) assert(not matched) assert(not eof) + local result, matched, eof = parse("x1", capture.seq(d, d)) + assert(not result) assert(not matched) assert(not eof) + + -- combine.diff + local ins = combine.diff(match.satisfies(is_alphanumeric), match.equal("c")) + local result, matched, eof = parse("fo", capture.seq(ins, ins, match.eof())) + assert(result) assert(matched) assert(eof) + local result, matched, eof = parse("c", capture.seq(ins)) + assert(not result) assert(not matched) assert(not eof) + local result, matched, eof = parse("ac", capture.seq(ins, ins)) + assert(not result) assert(not matched) assert(not eof) + local result, matched, eof = + parse("f", capture.seq(match._not(match.eof()), match.eof())) + assert(result) assert(matched) assert(eof) + + -- capture.transform + parse("foo", capture.transform(match.fail(), error)) + local function constantly_true () return true end + local result, matched, eof = + parse("", capture.transform(match.eof(), constantly_true)) + assert(result) assert(matched) assert(eof) + parse("_abce", capture.unpack(combine.any(capture.element()), + function (_, a, b, c) + assert(a == "a") + assert(b == "b") + assert(c == "c") + end + )) + parse(":a:b", capture.unpack(capture.seq(match.equal("_"), + capture.element(), + match.equal("_"), + capture.element()), + function (_, a, _, b) + assert(a == "a") + assert(b == "b") + end + )) +end diff --git a/src/lib/yang/xsd_types_regex.lua b/src/lib/yang/xsd_types_regex.lua new file mode 100644 index 0000000000..18dbbcc715 --- /dev/null +++ b/src/lib/yang/xsd_types_regex.lua @@ -0,0 +1,262 @@ +-- Use of this source code is governed by the Apache 2.0 license; see COPYING. + +-- “XSD types” regular expression implementation (ASCII only), see: +-- https://www.w3.org/TR/xmlschema11-2/#regexs +module(..., package.seeall) + +local maxpc = require("lib.maxpc") +local match, capture, combine = maxpc.import() + +function capture.regExp () + return capture.unpack( + capture.seq(capture.branch(), combine.any(capture.otherBranch())), + function (branch, otherBranches) + local branches = {branch} + for _, branch in ipairs(otherBranches or {}) do + table.insert(branches, branch) + end + return {branches=branches} + end + ) +end + +function capture.branch () + return capture.transform(combine.any(capture.piece()), + function (pieces) return {pieces=pieces} end) +end + +function capture.otherBranch () + return capture.unpack( + capture.seq(match.equal("|"), capture.branch()), + function (_, branch) return branch end + ) +end + +function capture.piece () + return capture.unpack( + capture.seq(capture.atom(), combine.maybe(capture.quantifier())), + function (atom, quantifier) + return {atom=atom, quantifier=quantifier or nil} + end + ) +end + +function capture.quantifier () + return combine._or( + capture.subseq(match.equal("?")), + capture.subseq(match.equal("*")), + capture.subseq(match.equal("+")), + capture.unpack( + capture.seq(match.equal("{"), capture.quantity(), match.equal("}")), + function (_, quantity, _) return quantity end + ) + ) +end + +function match.digit (s) + return match.satisfies( + function (s) + return ("0123456789"):find(s, 1, true) + end + ) +end + +function capture.quantity () + return combine._or( + capture.quantRange(), + capture.quantMin(), + capture.transform(capture.quantExact(), + function (n) return {exactly=n} end) + ) +end + +function capture.quantRange () + return capture.unpack( + capture.seq(capture.quantExact(), + match.equal(","), + capture.quantExact()), + function (min, _, max) return {min=min, max=max} end + ) +end + +function capture.quantMin () + return capture.unpack( + capture.seq(capture.quantExact(), match.equal(",")), + function (min, _) return {min=min} end + ) +end + +function capture.quantExact () + return capture.transform( + capture.subseq(combine.some(match.digit())), + tonumber + ) +end + +function capture.atom () + return combine._or( + capture.NormalChar(), + capture.charClass(), + capture.subExp() + ) +end + +local regExp_parser -- forward definition +local function regExp_binding (s) return regExp_parser(s) end + +function capture.subExp () + return capture.unpack( + capture.seq(match.equal('('), regExp_binding, match.equal(')')), + function (_, expression, _) return expression end + ) +end + +function match.MetaChar () + return match.satisfies( + function (s) + return (".\\?*+{}()|[]"):find(s, 1, true) + end + ) +end + +function match.NormalChar (s) + return match._not(match.MetaChar()) +end + +function capture.NormalChar () + return capture.subseq(match.NormalChar()) +end + +function capture.charClass () + return combine._or( + capture.SingleCharEsc(), + capture.charClassEsc(), + capture.charClassExpr(), + capture.WildcardEsc() + ) +end + +function capture.charClassExpr () + return capture.unpack( + capture.seq(match.equal("["), capture.charGroup(), match.equal("]")), + function (_, charGroup, _) return charGroup end + ) +end + +function capture.charGroup () + return capture.unpack( + capture.seq( + combine._or(capture.negCharGroup(), capture.posCharGroup()), + combine.maybe(capture.charClassSubtraction()) + ), + function (group, subtract) + return {class=group, subtract=subtract or nil} + end + ) +end + +local charClassExpr_parser -- forward declaration +local function charClassExpr_binding (s) + return charClassExpr_parser(s) +end + +function capture.charClassSubtraction () + return capture.unpack( + capture.seq(match.equal("-"), charClassExpr_binding), + function (_, charClassExpr, _) return charClassExpr end + ) +end + +function capture.posCharGroup () + return capture.transform( + combine.some(capture.charGroupPart()), + function (parts) return {include=parts} end + ) +end + +function capture.negCharGroup () + return capture.unpack( + capture.seq(match.equal("^"), capture.posCharGroup()), + function (_, group) return {exclude=group.include} end + ) +end + +function capture.charGroupPart () + return combine._or( + capture.charClassEsc(), + capture.charRange(), + capture.singleChar() + ) +end + +function capture.singleChar () + return combine._or(capture.SingleCharEsc(), capture.singleCharNoEsc()) +end + +function capture.charRange () + local rangeChar = combine.diff(capture.singleChar(), match.equal("-")) + return capture.unpack( + capture.seq(rangeChar, match.equal("-"), rangeChar), + function (from, _, to) return {range={from,to}} end + ) +end + +function capture.singleCharNoEsc () + local function is_singleCharNoEsc (s) + return not ("[]"):find(s, 1, true) + end + return combine.diff( + capture.subseq(match.satisfies(is_singleCharNoEsc)), + -- don’t match the "-" leading a character class subtraction + match.seq(match.equal("-"), match.equal("[")) + ) +end + +function capture.charClassEsc () + return combine._or( + capture.MultiCharEsc() --, capture.catEsc(), capture.complEsc() + ) +end + +function capture.SingleCharEsc () + local function is_SingleCharEsc (s) + return ("nrt\\|.?*+(){}-[]^"):find(s, 1, true) + end + return capture.unpack( + capture.seq( + match.equal("\\"), + capture.subseq(match.satisfies(is_SingleCharEsc)) + ), + function (_, char) return {escape=char} end + ) +end + +-- NYI: catEsc, complEsc + +function capture.MultiCharEsc () + local function is_multiCharEsc (s) + return ("sSiIcCdDwW"):find(s, 1, true) + end + return capture.unpack( + capture.seq( + match.equal("\\"), + capture.subseq(match.satisfies(is_multiCharEsc)) + ), + function (_, char) return {escape=char} end + ) +end + +function capture.WildcardEsc () + return capture.transform( + match.equal("."), + function (_) return {escape="."} end + ) +end + +regExp_parser = capture.regExp() +charClassExpr_parser = capture.charClassExpr() + +function parse (expr) + local result, success, is_eof = maxpc.parse(expr, regExp_parser) + return (success and is_eof and result) or nil +end From 204f21e277a9fbbc8dd2a55fbd7616b142480668 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 2 Nov 2017 16:56:25 +0100 Subject: [PATCH 19/72] lib.xsd_regexp: implement basic regexp compiler --- src/lib/xsd_regexp.lua | 503 +++++++++++++++++++++++++++++++ src/lib/yang/xsd_types_regex.lua | 262 ---------------- 2 files changed, 503 insertions(+), 262 deletions(-) create mode 100644 src/lib/xsd_regexp.lua delete mode 100644 src/lib/yang/xsd_types_regex.lua diff --git a/src/lib/xsd_regexp.lua b/src/lib/xsd_regexp.lua new file mode 100644 index 0000000000..db1990af8c --- /dev/null +++ b/src/lib/xsd_regexp.lua @@ -0,0 +1,503 @@ +-- Use of this source code is governed by the Apache 2.0 license; see COPYING. + +module(..., package.seeall) +local maxpc = require("lib.maxpc") +local match, capture, combine = maxpc.import() + +-- Implementation of regular expressions (ASCII only) as defined in Appendix G +-- of "W3C XML Schema Definition Language (XSD) 1.1 Part 2: Datatypes", see: +-- +-- https://www.w3.org/TR/xmlschema11-2/#regexs +-- +-- The main entry function `regexp.compile' accepts a regular expression +-- string, and returns a predicate function that tests whether a string is part +-- of the language defined by the expression. +-- +-- Example: +-- local is_identifier = regexp.compile("[a-zA-Z][a-zA-Z0-9]*") +-- is_identifier("Foo3") -> true +-- is_identifier("7up") -> false +-- +-- It uses a combinatory parsing library (MaxPC) to parse a regular expression +-- in the format defined by the specification referenced above, and compiles +-- the denoted regular language to a MaxPC grammar. +-- +-- NYI: any Unicode support (i.e. currently a character is a single byte and no +-- category escapes are implemented) + +function compile (expr) + local ast = parse(expr) + local parser = compile_branches(ast.branches) + return function (str) + local _, success, is_eof = maxpc.parse(str, parser) + return success and is_eof + end +end + +local regExp_parser -- forward definition + +function parse (expr) + local result, success, is_eof = maxpc.parse(expr, regExp_parser) + if not (success and is_eof) then + error("Unable to parse regular expression: " .. expr) + else + return result + end +end + + +-- Parser rules: string -> AST + +function capture.regExp () + return capture.unpack( + capture.seq(capture.branch(), combine.any(capture.otherBranch())), + function (branch, otherBranches) + local branches = {branch} + for _, branch in ipairs(otherBranches or {}) do + table.insert(branches, branch) + end + return {branches=branches} + end + ) +end + +function capture.branch () + return capture.transform(combine.any(capture.piece()), + function (pieces) return {pieces=pieces} end) +end + +function capture.otherBranch () + return capture.unpack( + capture.seq(match.equal("|"), capture.branch()), + function (_, branch) return branch end + ) +end + +function capture.piece () + return capture.unpack( + capture.seq(capture.atom(), combine.maybe(capture.quantifier())), + function (atom, quantifier) + return {atom=atom, quantifier=quantifier or nil} + end + ) +end + +function capture.quantifier () + return combine._or( + capture.subseq(match.equal("?")), + capture.subseq(match.equal("*")), + capture.subseq(match.equal("+")), + capture.unpack( + capture.seq(match.equal("{"), capture.quantity(), match.equal("}")), + function (_, quantity, _) return quantity end + ) + ) +end + +function match.digit (s) + return match.satisfies( + function (s) + return ("0123456789"):find(s, 1, true) + end + ) +end + +function capture.quantity () + return combine._or( + capture.quantRange(), + capture.quantMin(), + capture.transform(capture.quantExact(), + function (n) return {exactly=n} end) + ) +end + +function capture.quantRange () + return capture.unpack( + capture.seq(capture.quantExact(), + match.equal(","), + capture.quantExact()), + function (min, _, max) return {min=min, max=max} end + ) +end + +function capture.quantMin () + return capture.unpack( + capture.seq(capture.quantExact(), match.equal(",")), + function (min, _) return {min=min} end + ) +end + +function capture.quantExact () + return capture.transform( + capture.subseq(combine.some(match.digit())), + tonumber + ) +end + +function capture.atom () + return combine._or( + capture.NormalChar(), + capture.charClass(), + capture.subExp() + ) +end + +local function regExp_binding (s) return regExp_parser(s) end + +function capture.subExp () + return capture.unpack( + capture.seq(match.equal('('), regExp_binding, match.equal(')')), + function (_, expression, _) return expression end + ) +end + +function match.MetaChar () + return match.satisfies( + function (s) + return (".\\?*+{}()|[]"):find(s, 1, true) + end + ) +end + +function match.NormalChar (s) + return match._not(match.MetaChar()) +end + +function capture.NormalChar () + return capture.subseq(match.NormalChar()) +end + +function capture.charClass () + return combine._or( + capture.SingleCharEsc(), + capture.charClassEsc(), + capture.charClassExpr(), + capture.WildcardEsc() + ) +end + +function capture.charClassExpr () + return capture.unpack( + capture.seq(match.equal("["), capture.charGroup(), match.equal("]")), + function (_, charGroup, _) return charGroup end + ) +end + +function capture.charGroup () + return capture.unpack( + capture.seq( + combine._or(capture.negCharGroup(), capture.posCharGroup()), + combine.maybe(capture.charClassSubtraction()) + ), + function (group, subtract) + return {group=group, subtract=subtract or nil} + end + ) +end + +local charClassExpr_parser -- forward declaration +local function charClassExpr_binding (s) + return charClassExpr_parser(s) +end + +function capture.charClassSubtraction () + return capture.unpack( + capture.seq(match.equal("-"), charClassExpr_binding), + function (_, charClassExpr, _) return charClassExpr end + ) +end + +function capture.posCharGroup () + return capture.transform( + combine.some(capture.charGroupPart()), + function (parts) return {include=parts} end + ) +end + +function capture.negCharGroup () + return capture.unpack( + capture.seq(match.equal("^"), capture.posCharGroup()), + function (_, group) return {exclude=group.include} end + ) +end + +function capture.charGroupPart () + return combine._or( + capture.charClassEsc(), + capture.charRange(), + capture.singleChar() + ) +end + +function capture.singleChar () + return combine._or(capture.SingleCharEsc(), capture.singleCharNoEsc()) +end + +function capture.charRange () + local rangeChar = combine.diff(capture.singleChar(), match.equal("-")) + return capture.unpack( + capture.seq(rangeChar, match.equal("-"), rangeChar), + function (from, _, to) return {range={from,to}} end + ) +end + +function capture.singleCharNoEsc () + local function is_singleCharNoEsc (s) + return not ("[]"):find(s, 1, true) + end + return combine.diff( + capture.subseq(match.satisfies(is_singleCharNoEsc)), + -- don’t match the "-" leading a character class subtraction + match.seq(match.equal("-"), match.equal("[")) + ) +end + +function capture.charClassEsc () + return combine._or( + capture.MultiCharEsc() --, capture.catEsc(), capture.complEsc() + ) +end + +function capture.SingleCharEsc () + local function is_SingleCharEsc (s) + return ("nrt\\|.?*+(){}-[]^"):find(s, 1, true) + end + return capture.unpack( + capture.seq( + match.equal("\\"), + capture.subseq(match.satisfies(is_SingleCharEsc)) + ), + function (_, char) return {escape=char} end + ) +end + +-- NYI: catEsc, complEsc + +function capture.MultiCharEsc () + local function is_multiCharEsc (s) + return ("sSiIcCdDwW"):find(s, 1, true) + end + return capture.unpack( + capture.seq( + match.equal("\\"), + capture.subseq(match.satisfies(is_multiCharEsc)) + ), + function (_, char) return {escape=char} end + ) +end + +function capture.WildcardEsc () + return capture.transform( + match.equal("."), + function (_) return {escape="."} end + ) +end + +regExp_parser = capture.regExp() +charClassExpr_parser = capture.charClassExpr() + + +-- Compiler rules: AST -> MaxPC parser + +function compile_branches (branches) + local parsers = {} + for _, branch in ipairs(branches) do + if branch.pieces then + table.insert(parsers, compile_pieces(branch.pieces)) + end + end + if #parsers == 0 then return match.eof() + elseif #parsers == 1 then return parsers[1] + elseif #parsers > 1 then return combine._or(unpack(parsers)) end +end + +function compile_pieces (pieces) + local parsers = {} + for _, piece in ipairs(pieces) do + local atom_parser = compile_atom(piece.atom) + if piece.quantifier then + local quanitify = compile_quantifier(piece.quantifier) + table.insert(parsers, quanitify(atom_parser)) + else + table.insert(parsers, atom_parser) + end + end + return match.seq(unpack(parsers)) +end + +function compile_quantifier (quantifier) + if quantifier == "?" then return combine.maybe + elseif quantifier == "*" then return combine.any + elseif quantifier == "+" then return combine.some + elseif quantifier.min and quantifier.max then + -- [min * parser] .. [max * maybe(parser)] + return function (parser) + local parsers = {} + for n = 1, quantifier.min do + table.insert(parsers, parser) + end + for n = 1, quantifier.max - quantifier.min do + table.insert(parsers, combine.maybe(parser)) + end + return match.seq(unpack(parsers)) + end + elseif quantifier.min then + -- [min * parser] any(parser) + return function (parser) + local parsers = {} + for n = 1, quantifier.min do + table.insert(parsers, parser) + end + table.insert(parsers, combine.any(parser)) + return match.seq(unpack(parsers)) + end + elseif quantifier.exactly then + -- [exactly * parser] + return function (parser) + local parsers = {} + for n = 1, quantifier.exactly do + table.insert(parsers, parser) + end + return match.seq(unpack(parsers)) + end + else + error("Invalid quantifier") + end +end + +function compile_atom (atom) + -- NYI: \i, \I, \c, \C + local function memberTest (set) + return function (s) return set:find(s, 1, true) end + end + local is_special_escape = memberTest("\\|.-^?*+{}()[]") + local match_wildcard = function (x) return not memberTest("\n\r") end + local is_space = memberTest(" \t\n\r") + local is_digit = memberTest("0123456789") + local is_word = memberTest("0123456789abcdefghijklmnopqrstiuvwxyzABCDEFGHIJKLMNOPQRSTIUVWXYZ") + if type(atom) == 'string' then return match.equal(atom) + elseif atom.escape == "n" then return match.equal("\n") + elseif atom.escape == "r" then return match.equal("\r") + elseif atom.escape == "t" then return match.equal("\t") + elseif atom.escape and is_special_escape(atom.escape) then + return match.equal(atom.escape) + elseif atom.escape == "." then + return match.satisfies(match_wildcard) + elseif atom.escape == "s" then + return match.satisfies(is_space) + elseif atom.escape == "S" then + return match._not(match.satisfies(is_space)) + elseif atom.escape == "d" then + return match.satisfies(is_digit) + elseif atom.escape == "D" then + return match._not(match.satisfies(is_digit)) + elseif atom.escape == "w" then + return match.satisfies(is_word) + elseif atom.escape == "W" then + return match._not(match.satisfies(is_word)) + elseif atom.group then + return compile_class(atom.group, atom.subtract) + elseif atom.range then + return compile_range(unpack(atom.range)) + elseif atom.branches then + return compile_branches(atom.branches) + else + error("Invalid atom") + end +end + +function compile_class (group, subtract) + if not subtract then + return compile_group(group) + else + return combine.diff( + compile_group(group), + compile_class(subtract.group, subtract.subtract) + ) + end +end + +function compile_group (group) + local function compile_group_atoms (atoms) + local parsers = {} + for _, atom in ipairs(atoms) do + table.insert(parsers, compile_atom(atom)) + end + return combine._or(unpack(parsers)) + end + if group.include then + return compile_group_atoms(group.include) + elseif group.exclude then + return match._not(compile_group_atoms(group.exclude)) + else + error("Invalid group") + end +end + +function compile_range (start, stop) + start, stop = start:byte(), stop:byte() + local function in_range (s) + s = s:byte() + return start <= s and s <= stop + end + return match.satisfies(in_range) +end + + +-- Tests + +local function test (o) + local match = compile(o.regexp) + for _, input in ipairs(o.accept) do + assert(match(input), o.regexp .. " should match " .. input) + end + for _, input in ipairs(o.reject) do + assert(not match(input), o.regexp .. " should not match " .. input) + end +end + +function selftest () + test {regexp="[a-zA-Z][a-zA-Z0-9]*", + accept={"Foo3", "baz"}, + reject={"7Up", "123", "äöü", ""}} + + test {regexp="", + accept={""}, + reject={"foo"}} + + test {regexp="abc", + accept={"abc"}, + reject={"abcd", "0abc", ""}} + + test {regexp="a[bc]", + accept={"ab", "ac"}, + reject={"abcd", "0abc", "aa", ""}} + + test {regexp="\\n+", + accept={"\n", "\n\n\n"}, + reject={"", "\n\n\t", "\naa"}} + + test {regexp="(foo|bar)?", + accept={"foo", "bar", ""}, + reject={"foobar"}} + + test {regexp="foo|bar|baz", + accept={"foo", "bar", "baz"}, + reject={"", "fo"}} + + test {regexp="\\]", + accept={"]"}, + reject={"", "\\]"}} + + test {regexp="\\d{3,}", + accept={"123", "45678910"}, + reject={"", "12", "foo"}} + + test {regexp="[^\\d]{3,5}", + accept={"foo", "....", ".-.-."}, + reject={"", "foobar", "123", "4567", "45678"}} + + test {regexp="[abc-[ab]]{3}", + accept={"ccc"}, + reject={"", "abc"}} +end diff --git a/src/lib/yang/xsd_types_regex.lua b/src/lib/yang/xsd_types_regex.lua deleted file mode 100644 index 18dbbcc715..0000000000 --- a/src/lib/yang/xsd_types_regex.lua +++ /dev/null @@ -1,262 +0,0 @@ --- Use of this source code is governed by the Apache 2.0 license; see COPYING. - --- “XSD types” regular expression implementation (ASCII only), see: --- https://www.w3.org/TR/xmlschema11-2/#regexs -module(..., package.seeall) - -local maxpc = require("lib.maxpc") -local match, capture, combine = maxpc.import() - -function capture.regExp () - return capture.unpack( - capture.seq(capture.branch(), combine.any(capture.otherBranch())), - function (branch, otherBranches) - local branches = {branch} - for _, branch in ipairs(otherBranches or {}) do - table.insert(branches, branch) - end - return {branches=branches} - end - ) -end - -function capture.branch () - return capture.transform(combine.any(capture.piece()), - function (pieces) return {pieces=pieces} end) -end - -function capture.otherBranch () - return capture.unpack( - capture.seq(match.equal("|"), capture.branch()), - function (_, branch) return branch end - ) -end - -function capture.piece () - return capture.unpack( - capture.seq(capture.atom(), combine.maybe(capture.quantifier())), - function (atom, quantifier) - return {atom=atom, quantifier=quantifier or nil} - end - ) -end - -function capture.quantifier () - return combine._or( - capture.subseq(match.equal("?")), - capture.subseq(match.equal("*")), - capture.subseq(match.equal("+")), - capture.unpack( - capture.seq(match.equal("{"), capture.quantity(), match.equal("}")), - function (_, quantity, _) return quantity end - ) - ) -end - -function match.digit (s) - return match.satisfies( - function (s) - return ("0123456789"):find(s, 1, true) - end - ) -end - -function capture.quantity () - return combine._or( - capture.quantRange(), - capture.quantMin(), - capture.transform(capture.quantExact(), - function (n) return {exactly=n} end) - ) -end - -function capture.quantRange () - return capture.unpack( - capture.seq(capture.quantExact(), - match.equal(","), - capture.quantExact()), - function (min, _, max) return {min=min, max=max} end - ) -end - -function capture.quantMin () - return capture.unpack( - capture.seq(capture.quantExact(), match.equal(",")), - function (min, _) return {min=min} end - ) -end - -function capture.quantExact () - return capture.transform( - capture.subseq(combine.some(match.digit())), - tonumber - ) -end - -function capture.atom () - return combine._or( - capture.NormalChar(), - capture.charClass(), - capture.subExp() - ) -end - -local regExp_parser -- forward definition -local function regExp_binding (s) return regExp_parser(s) end - -function capture.subExp () - return capture.unpack( - capture.seq(match.equal('('), regExp_binding, match.equal(')')), - function (_, expression, _) return expression end - ) -end - -function match.MetaChar () - return match.satisfies( - function (s) - return (".\\?*+{}()|[]"):find(s, 1, true) - end - ) -end - -function match.NormalChar (s) - return match._not(match.MetaChar()) -end - -function capture.NormalChar () - return capture.subseq(match.NormalChar()) -end - -function capture.charClass () - return combine._or( - capture.SingleCharEsc(), - capture.charClassEsc(), - capture.charClassExpr(), - capture.WildcardEsc() - ) -end - -function capture.charClassExpr () - return capture.unpack( - capture.seq(match.equal("["), capture.charGroup(), match.equal("]")), - function (_, charGroup, _) return charGroup end - ) -end - -function capture.charGroup () - return capture.unpack( - capture.seq( - combine._or(capture.negCharGroup(), capture.posCharGroup()), - combine.maybe(capture.charClassSubtraction()) - ), - function (group, subtract) - return {class=group, subtract=subtract or nil} - end - ) -end - -local charClassExpr_parser -- forward declaration -local function charClassExpr_binding (s) - return charClassExpr_parser(s) -end - -function capture.charClassSubtraction () - return capture.unpack( - capture.seq(match.equal("-"), charClassExpr_binding), - function (_, charClassExpr, _) return charClassExpr end - ) -end - -function capture.posCharGroup () - return capture.transform( - combine.some(capture.charGroupPart()), - function (parts) return {include=parts} end - ) -end - -function capture.negCharGroup () - return capture.unpack( - capture.seq(match.equal("^"), capture.posCharGroup()), - function (_, group) return {exclude=group.include} end - ) -end - -function capture.charGroupPart () - return combine._or( - capture.charClassEsc(), - capture.charRange(), - capture.singleChar() - ) -end - -function capture.singleChar () - return combine._or(capture.SingleCharEsc(), capture.singleCharNoEsc()) -end - -function capture.charRange () - local rangeChar = combine.diff(capture.singleChar(), match.equal("-")) - return capture.unpack( - capture.seq(rangeChar, match.equal("-"), rangeChar), - function (from, _, to) return {range={from,to}} end - ) -end - -function capture.singleCharNoEsc () - local function is_singleCharNoEsc (s) - return not ("[]"):find(s, 1, true) - end - return combine.diff( - capture.subseq(match.satisfies(is_singleCharNoEsc)), - -- don’t match the "-" leading a character class subtraction - match.seq(match.equal("-"), match.equal("[")) - ) -end - -function capture.charClassEsc () - return combine._or( - capture.MultiCharEsc() --, capture.catEsc(), capture.complEsc() - ) -end - -function capture.SingleCharEsc () - local function is_SingleCharEsc (s) - return ("nrt\\|.?*+(){}-[]^"):find(s, 1, true) - end - return capture.unpack( - capture.seq( - match.equal("\\"), - capture.subseq(match.satisfies(is_SingleCharEsc)) - ), - function (_, char) return {escape=char} end - ) -end - --- NYI: catEsc, complEsc - -function capture.MultiCharEsc () - local function is_multiCharEsc (s) - return ("sSiIcCdDwW"):find(s, 1, true) - end - return capture.unpack( - capture.seq( - match.equal("\\"), - capture.subseq(match.satisfies(is_multiCharEsc)) - ), - function (_, char) return {escape=char} end - ) -end - -function capture.WildcardEsc () - return capture.transform( - match.equal("."), - function (_) return {escape="."} end - ) -end - -regExp_parser = capture.regExp() -charClassExpr_parser = capture.charClassExpr() - -function parse (expr) - local result, success, is_eof = maxpc.parse(expr, regExp_parser) - return (success and is_eof and result) or nil -end From 6f419662fd0e039cf2b6128fc35d27952022f470 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Fri, 3 Nov 2017 22:49:09 +0100 Subject: [PATCH 20/72] lib.maxpc: add input_class argument to parse / use classes for input objects --- src/lib/maxpc.lua | 99 ++++++++++++++++++++++++----------------------- 1 file changed, 50 insertions(+), 49 deletions(-) diff --git a/src/lib/maxpc.lua b/src/lib/maxpc.lua index 46a7eb2676..4f95966f38 100644 --- a/src/lib/maxpc.lua +++ b/src/lib/maxpc.lua @@ -22,10 +22,11 @@ function import () return l_match, l_capture, l_combine end --- parse(str, parser) => result_value, was_successful, has_reached_eof -function parse (str, parser) - local rest, value = parser(input.new(str)) - return value, rest and true, #str == 0 or (rest and input.empty(rest)) +-- str, parser, [input_class] => result_value, was_successful, has_reached_eof +function parse (str, parser, input_class) + input_class = input_class or input + local rest, value = parser(input_class:new(str)) + return value, rest and true, #str == 0 or (rest and rest:empty()) end @@ -33,24 +34,24 @@ end input = {} -function input.new (str) - return { idx = 1, str = str } +function input:new (str) + return setmetatable({idx = 1, str = str}, {__index=input}) end -function input.empty (s) - return s.idx > #s.str +function input:empty () + return self.idx > #self.str end -function input.first (s, n) - return s.str:sub(s.idx, s.idx + (n or 1) - 1) +function input:first (n) + return self.str:sub(self.idx, self.idx + (n or 1) - 1) end -function input.rest (s) - return { idx = s.idx + 1, str = s.str } +function input:rest () + return setmetatable({idx = self.idx + 1, str = self.str}, {__index=input}) end -function input.position (s) - return s.idx +function input:position () + return self.idx end @@ -59,33 +60,33 @@ end capture, match, combine = {}, {}, {} function match.eof () - return function (s) - if input.empty(s) then - return s + return function (input) + if input:empty() then + return input end end end function capture.element () - return function (s) - if not input.empty(s) then - return input.rest(s), input.first(s), true + return function (input) + if not input:empty() then + return input:rest(), input:first(), true end end end function match.fail (handler) - return function (s) + return function (input) if handler then - handler(input.position(s)) + handler(input:position()) end end end function match.satisfies (test, parser) parser = parser or capture.element() - return function (s) - local rest, value = parser(s) + return function (input) + local rest, value = parser(input) if rest and test(value) then return rest end @@ -93,58 +94,58 @@ function match.satisfies (test, parser) end function capture.subseq (parser) - return function (s) - local rest = parser(s) + return function (input) + local rest = parser(input) if rest then - local diff = input.position(rest) - input.position(s) - return rest, input.first(s, diff), true + local diff = rest:position() - input:position() + return rest, input:first(diff), true end end end function match.seq (...) local parsers = {...} - return function (s) + return function (input) for _, parser in ipairs(parsers) do - s = parser(s) - if not s then + input = parser(input) + if not input then return end end - return s + return input end end function capture.seq (...) local parsers = {...} - return function (s) + return function (input) local seq = {} for _, parser in ipairs(parsers) do - local rest, value = parser(s) + local rest, value = parser(input) if rest then table.insert(seq, value or false) - s = rest + input = rest else return end end - return s, seq, true + return input, seq, true end end function combine.any (parser) - return function (s) + return function (input) local seq = {} while true do - local rest, value, present = parser(s) + local rest, value, present = parser(input) if rest then - s = rest + input = rest else local value if #seq > 0 then value = seq end - return s, value, value ~= nil + return input, value, value ~= nil end if present then table.insert(seq, value or false) @@ -155,9 +156,9 @@ end function combine._or (...) local parsers = {...} - return function (s) + return function (input) for _, parser in ipairs(parsers) do - local rest, value, present = parser(s) + local rest, value, present = parser(input) if rest then return rest, value, present end @@ -167,10 +168,10 @@ end function combine._and (...) local parsers = {...} - return function (s) + return function (input) local rest, value, present for _, parser in ipairs(parsers) do - rest, value, present = parser(s) + rest, value, present = parser(input) if not rest then return end @@ -181,16 +182,16 @@ end function combine.diff (parser, ...) local punion = combine._or(...) - return function (s) - if not punion(s) then - return parser(s) + return function (input) + if not punion(input) then + return parser(input) end end end function capture.transform (parser, transform) - return function (s) - local rest, value = parser(s) + return function (input) + local rest, value = parser(input) if rest then return rest, transform(value), true end From 660ae79c468bbba1e9fa1e7e4fa0936136f65c0f Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Fri, 3 Nov 2017 22:50:38 +0100 Subject: [PATCH 21/72] Revert "lib.maxpc: add input_class argument to parse / use classes [...]" This reverts commit 958c37ddbc5d270ef82401925d20daaab6262dc6. --- src/lib/maxpc.lua | 99 +++++++++++++++++++++++------------------------ 1 file changed, 49 insertions(+), 50 deletions(-) diff --git a/src/lib/maxpc.lua b/src/lib/maxpc.lua index 4f95966f38..46a7eb2676 100644 --- a/src/lib/maxpc.lua +++ b/src/lib/maxpc.lua @@ -22,11 +22,10 @@ function import () return l_match, l_capture, l_combine end --- str, parser, [input_class] => result_value, was_successful, has_reached_eof -function parse (str, parser, input_class) - input_class = input_class or input - local rest, value = parser(input_class:new(str)) - return value, rest and true, #str == 0 or (rest and rest:empty()) +-- parse(str, parser) => result_value, was_successful, has_reached_eof +function parse (str, parser) + local rest, value = parser(input.new(str)) + return value, rest and true, #str == 0 or (rest and input.empty(rest)) end @@ -34,24 +33,24 @@ end input = {} -function input:new (str) - return setmetatable({idx = 1, str = str}, {__index=input}) +function input.new (str) + return { idx = 1, str = str } end -function input:empty () - return self.idx > #self.str +function input.empty (s) + return s.idx > #s.str end -function input:first (n) - return self.str:sub(self.idx, self.idx + (n or 1) - 1) +function input.first (s, n) + return s.str:sub(s.idx, s.idx + (n or 1) - 1) end -function input:rest () - return setmetatable({idx = self.idx + 1, str = self.str}, {__index=input}) +function input.rest (s) + return { idx = s.idx + 1, str = s.str } end -function input:position () - return self.idx +function input.position (s) + return s.idx end @@ -60,33 +59,33 @@ end capture, match, combine = {}, {}, {} function match.eof () - return function (input) - if input:empty() then - return input + return function (s) + if input.empty(s) then + return s end end end function capture.element () - return function (input) - if not input:empty() then - return input:rest(), input:first(), true + return function (s) + if not input.empty(s) then + return input.rest(s), input.first(s), true end end end function match.fail (handler) - return function (input) + return function (s) if handler then - handler(input:position()) + handler(input.position(s)) end end end function match.satisfies (test, parser) parser = parser or capture.element() - return function (input) - local rest, value = parser(input) + return function (s) + local rest, value = parser(s) if rest and test(value) then return rest end @@ -94,58 +93,58 @@ function match.satisfies (test, parser) end function capture.subseq (parser) - return function (input) - local rest = parser(input) + return function (s) + local rest = parser(s) if rest then - local diff = rest:position() - input:position() - return rest, input:first(diff), true + local diff = input.position(rest) - input.position(s) + return rest, input.first(s, diff), true end end end function match.seq (...) local parsers = {...} - return function (input) + return function (s) for _, parser in ipairs(parsers) do - input = parser(input) - if not input then + s = parser(s) + if not s then return end end - return input + return s end end function capture.seq (...) local parsers = {...} - return function (input) + return function (s) local seq = {} for _, parser in ipairs(parsers) do - local rest, value = parser(input) + local rest, value = parser(s) if rest then table.insert(seq, value or false) - input = rest + s = rest else return end end - return input, seq, true + return s, seq, true end end function combine.any (parser) - return function (input) + return function (s) local seq = {} while true do - local rest, value, present = parser(input) + local rest, value, present = parser(s) if rest then - input = rest + s = rest else local value if #seq > 0 then value = seq end - return input, value, value ~= nil + return s, value, value ~= nil end if present then table.insert(seq, value or false) @@ -156,9 +155,9 @@ end function combine._or (...) local parsers = {...} - return function (input) + return function (s) for _, parser in ipairs(parsers) do - local rest, value, present = parser(input) + local rest, value, present = parser(s) if rest then return rest, value, present end @@ -168,10 +167,10 @@ end function combine._and (...) local parsers = {...} - return function (input) + return function (s) local rest, value, present for _, parser in ipairs(parsers) do - rest, value, present = parser(input) + rest, value, present = parser(s) if not rest then return end @@ -182,16 +181,16 @@ end function combine.diff (parser, ...) local punion = combine._or(...) - return function (input) - if not punion(input) then - return parser(input) + return function (s) + if not punion(s) then + return parser(s) end end end function capture.transform (parser, transform) - return function (input) - local rest, value = parser(input) + return function (s) + local rest, value = parser(s) if rest then return rest, transform(value), true end From ba37cd82d02d89b3d2ad6d9a4ee72cfd3b48a0ea Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Sat, 4 Nov 2017 00:09:17 +0100 Subject: [PATCH 22/72] lib.maxpc: decode UTF-8 input --- src/lib/maxpc.lua | 57 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 4 deletions(-) diff --git a/src/lib/maxpc.lua b/src/lib/maxpc.lua index 46a7eb2676..8d55267a44 100644 --- a/src/lib/maxpc.lua +++ b/src/lib/maxpc.lua @@ -31,22 +31,27 @@ end -- input protocol +-- NB: its trivial to support *both* octet and UTF-8 input, see +-- commit 085a5813473f1fa64502b480cc00122bef0fb32a + input = {} function input.new (str) - return { idx = 1, str = str } + return { pos = 1, idx = 1, str = str } end function input.empty (s) return s.idx > #s.str end -function input.first (s, n) - return s.str:sub(s.idx, s.idx + (n or 1) - 1) +function input.first (s, n) n = n or 1 + local to = utf8next(s.str, s.idx) + while n > 1 do n, to = n - 1, utf8next(s.str, to) end + return s.str:sub(s.idx, to - 1) end function input.rest (s) - return { idx = s.idx + 1, str = s.str } + return { pos = s.pos + 1, idx = utf8next(s.str, s.idx), str = s.str } end function input.position (s) @@ -233,6 +238,46 @@ function capture.unpack (parser, f) end +-- UTF-8 decoding (see http://nullprogram.com/blog/2017/10/06/) + +local bit = require("bit") +local lshift, rshift, band, bor = bit.lshift, bit.rshift, bit.band, bit.bor + +function utf8length (str, idx) idx = idx or 1 + local lengths = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 3, 3, 4, 0 + } + return lengths[rshift(str:byte(idx), 3) + 1] +end + +function utf8next (str, idx) idx = idx or 1 + return idx + math.max(utf8length(str, idx), 1) -- advance even on error +end + +function codepoint (str, idx) idx = idx or 1 + local length = utf8length(str, idx) + local point + if length == 1 then point = str:byte(idx) + elseif length == 2 then point = bor(lshift(band(str:byte(idx), 0x1f), 6), + band(str:byte(idx+1), 0x3f)) + elseif length == 3 then point = bor(lshift(band(str:byte(idx), 0x0f), 12), + lshift(band(str:byte(idx+1), 0x3f), 6), + band(str:byte(idx+2), 0x3f)) + elseif length == 4 then point = bor(lshift(band(str:byte(idx), 0x07), 18), + lshift(band(str:byte(idx+1), 0x3f), 12), + lshift(band(str:byte(idx+2), 0x3f), 6), + band(str:byte(idx+3), 0x3f)) + else + point = -1 -- invalid + end + if point >= 0xd800 and point <= 0xdfff then + point = -1 -- surrogate half + end + return point +end + + -- tests function selftest () @@ -372,4 +417,8 @@ function selftest () assert(b == "b") end )) + + -- test UTF-8 input + local result, matched, eof = parse("λ", capture.element()) + assert(result == "λ") assert(matched) assert(eof) end From d3cacc66a91e40f0314ed29df6547e81028d7e16 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Sun, 5 Nov 2017 20:20:21 +0100 Subject: [PATCH 23/72] lib.maxpc: fix bug in match._not --- src/lib/maxpc.lua | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/lib/maxpc.lua b/src/lib/maxpc.lua index 8d55267a44..22d12a61ce 100644 --- a/src/lib/maxpc.lua +++ b/src/lib/maxpc.lua @@ -210,13 +210,8 @@ function combine.maybe (parser) end function match._not (parser) - local function constantly_nil () - return nil - end - return combine.diff( - capture.transform(capture.element(), constantly_nil), - parser - ) + local function constantly_true () return true end + return combine.diff(match.satisfies(constantly_true), parser) end function combine.some (parser) @@ -394,6 +389,9 @@ function selftest () local result, matched, eof = parse("f", capture.seq(match._not(match.eof()), match.eof())) assert(result) assert(matched) assert(eof) + local result, matched, eof = + parse("foo", combine.any(match._not(match.eof()))) + assert(not result) assert(matched) assert(eof) -- capture.transform parse("foo", capture.transform(match.fail(), error)) From b6cc1a66d6246e66b3f22ce95c377919743b9ba8 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Sun, 5 Nov 2017 20:15:35 +0100 Subject: [PATCH 24/72] lib.maxpc: port digit/number parsers --- src/lib/maxpc.lua | 56 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/src/lib/maxpc.lua b/src/lib/maxpc.lua index 22d12a61ce..cceeeeec02 100644 --- a/src/lib/maxpc.lua +++ b/src/lib/maxpc.lua @@ -233,6 +233,43 @@ function capture.unpack (parser, f) end +-- Digit parsing + +function match.digit (radix) + radix = radix or 10 + local digits = "0123456789abcdefghijklmnopqrstuvwxyz" + assert(radix >= 2 and radix <= 36) + return match.satisfies( + function (s) + return digits:sub(1, radix):find(s:lower(), 1, true) + end + ) +end + +function capture.natural_number (radix) + return capture.transform( + capture.subseq(combine.some(match.digit(radix))), + function (s) return tonumber(s, radix) end + ) +end + +function capture.sign () + local function is_sign (s) return s == "+" or s == "-" end + return combine._and(match.satisfies(is_sign), capture.element()) +end + +function capture.integer_number (radix) + return capture.unpack( + capture.seq(combine.maybe(capture.sign()), + capture.natural_number(radix)), + function (sign, number) + if sign == "-" then number = -number end + return number + end + ) +end + + -- UTF-8 decoding (see http://nullprogram.com/blog/2017/10/06/) local bit = require("bit") @@ -406,9 +443,9 @@ function selftest () assert(c == "c") end )) - parse(":a:b", capture.unpack(capture.seq(match.equal("_"), + parse(":a:b", capture.unpack(capture.seq(match.equal(":"), capture.element(), - match.equal("_"), + match.equal(":"), capture.element()), function (_, a, _, b) assert(a == "a") @@ -416,6 +453,21 @@ function selftest () end )) + local result, matched, eof = parse("f", match.digit(16)) + assert(not result) assert(matched) assert(eof) + local result, matched, eof = parse("f423", capture.natural_number(16)) + assert(result == 0xf423) assert(matched) assert(eof) + local result, matched, eof = parse("f423", capture.integer_number(16)) + assert(result == 0xf423) assert(matched) assert(eof) + local result, matched, eof = parse("+f423", capture.integer_number(16)) + assert(result == 0xf423) assert(matched) assert(eof) + local result, matched, eof = parse("-f423", capture.integer_number(16)) + assert(result == -0xf423) assert(matched) assert(eof) + local result, matched, eof = parse("a1234", capture.integer_number()) + assert(not result) assert(not matched) assert(not eof) + local result, matched, eof = parse("1234a", capture.integer_number()) + assert(result == 1234) assert(matched) assert(not eof) + -- test UTF-8 input local result, matched, eof = parse("λ", capture.element()) assert(result == "λ") assert(matched) assert(eof) From 5fd0198857a0bd41e64690b7c18e374b6d4f38e4 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Wed, 8 Nov 2017 17:12:03 +0100 Subject: [PATCH 25/72] lib.xsd_regexp: implement unicode ranges --- src/lib/xsd_regexp.lua | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/lib/xsd_regexp.lua b/src/lib/xsd_regexp.lua index db1990af8c..14ccbcab76 100644 --- a/src/lib/xsd_regexp.lua +++ b/src/lib/xsd_regexp.lua @@ -3,6 +3,7 @@ module(..., package.seeall) local maxpc = require("lib.maxpc") local match, capture, combine = maxpc.import() +local codepoint = maxpc.codepoint -- Implementation of regular expressions (ASCII only) as defined in Appendix G -- of "W3C XML Schema Definition Language (XSD) 1.1 Part 2: Datatypes", see: @@ -435,9 +436,9 @@ function compile_group (group) end function compile_range (start, stop) - start, stop = start:byte(), stop:byte() + start, stop = codepoint(start), codepoint(stop) local function in_range (s) - s = s:byte() + s = codepoint(s) return start <= s and s <= stop end return match.satisfies(in_range) From 7de7fd36391668dc880d61d6ac9af3e2434582b4 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Wed, 8 Nov 2017 17:18:50 +0100 Subject: [PATCH 26/72] lib.ucd: supporting Unicode database for lib.xsd_regexp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit General categories are only implemented for the ASCII range, the predicates return false for all codepoints outside of ASCII. For lib.xsd_regexp that means it can’t match category escapes on non-ASCII input, i.e. it can’t fully handle Unicode input. It can however apply all valid regular expressions to ASCII input. Caveat: users of category escapes must restrict their input to ASCII. In order to fix this, we would need (access to) a full Unicode database in which we could look up codepoints and their attributes. --- src/lib/ucd.lua | 514 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 514 insertions(+) create mode 100644 src/lib/ucd.lua diff --git a/src/lib/ucd.lua b/src/lib/ucd.lua new file mode 100644 index 0000000000..71b94671f4 --- /dev/null +++ b/src/lib/ucd.lua @@ -0,0 +1,514 @@ +-- Use of this source code is governed by the Apache 2.0 license; see COPYING. + +module(..., package.seeall) + +local lib = require("core.lib") +local maxpc = require("lib.maxpc") +local codepoint = maxpc.codepoint +local match, capture, combine = maxpc.import() + +function load_ucd (txt) + local parser = capture.unpack( + capture.seq( + match.equal("\n"), + capture.natural_number(16), + combine.maybe( + capture.unpack( + capture.seq( + match.equal("."), match.equal("."), + capture.natural_number(16) + ), + function (_, _, stop) return stop end + ) + ), + combine.any(match.equal(" ")), + match.equal(";"), + combine.any(match.equal(" ")), + capture.subseq( + combine.some( + match._not( + match.seq(combine.any(match.equal(" ")), + combine._or(match.equal("#"), + match.equal("\n"))) + ) + ) + ) + ), + function (_, start, stop, _, _, _, value) + return {start=start, stop=stop, value=value} + end + ) + return maxpc.parse( + lib.readfile(txt, "*a"), + combine.any( + capture.unpack( + capture.seq(combine.any(match._not(parser)), parser), + function (_, mapping) return mapping end + ) + ) + ) +end + +function block_name (name) + return name:gsub("[ _-]", ""):lower() +end + +function compile_block_predicates (ucd_path) + print("block = {}") + for _, block in ipairs(load_ucd(ucd_path.."/Blocks.txt")) do + if not block.stop then + print(("function block.%s (c) return c == %d end") + :format(block_name(block.value), block.start)) + else + print(("function block.%s (c) return %d <= c and c <= %d end") + :format(block_name(block.value), block.start, block.stop)) + end + end +end + +local function restrict_to_ascii (entries) + local for_ascii = {} + for _, entry in ipairs(entries) do + if entry.start <= 127 and (entry.stop or 0) <= 127 then + table.insert(for_ascii, entry) + end + end + return for_ascii +end + +function compile_category_predicates (ucd_path) + print("category = {}") + local categories = {} + local entries = load_ucd(ucd_path.."/extracted/DerivedGeneralCategory.txt") + local ascii_entries = restrict_to_ascii(entries) + for _, entry in ipairs(entries) do + if not categories[entry.value] then categories[entry.value] = {} end + end + -- Compile predicates for ASCII only + for _, entry in ipairs(ascii_entries) do + table.insert(categories[entry.value], entry) + end + for cat, entries in pairs(categories) do + print(("function category.%s (c)"):format(cat)) + for _, entry in ipairs(entries) do + if not entry.stop then + print((" if c == %d then return true end") + :format(entry.start)) + else + print((" if %d <= c and c <= %d then return true end"):format( + entry.start, entry.stop)) + end + end + print(" return false end") + end + -- Compile super categories + local super_categories = {} + for cat, _ in pairs(categories) do + local super = cat:sub(1,1) + if not super_categories[super] then super_categories[super] = {} end + table.insert(super_categories[super], "category."..cat) + end + for super, cats in pairs(super_categories) do + print(("function category.%s (c)"):format(super)) + print((" return %s(c) end"):format(table.concat(cats, "(c) or "))) + end +end + +function selftest () + local ucd_path = os.getenv("SNABB_UCD_PATH") + if not ucd_path then main.exit(engine.test_skipped_code) end + + compile_block_predicates(ucd_path) + print() + compile_category_predicates(ucd_path) +end + + +-- Code below here is automatically generated (for Unicode 10.0.0) by the +-- functions above. + +block = {} +function block.basiclatin (c) return 0 <= c and c <= 127 end +function block.latin1supplement (c) return 128 <= c and c <= 255 end +function block.latinextendeda (c) return 256 <= c and c <= 383 end +function block.latinextendedb (c) return 384 <= c and c <= 591 end +function block.ipaextensions (c) return 592 <= c and c <= 687 end +function block.spacingmodifierletters (c) return 688 <= c and c <= 767 end +function block.combiningdiacriticalmarks (c) return 768 <= c and c <= 879 end +function block.greekandcoptic (c) return 880 <= c and c <= 1023 end +function block.cyrillic (c) return 1024 <= c and c <= 1279 end +function block.cyrillicsupplement (c) return 1280 <= c and c <= 1327 end +function block.armenian (c) return 1328 <= c and c <= 1423 end +function block.hebrew (c) return 1424 <= c and c <= 1535 end +function block.arabic (c) return 1536 <= c and c <= 1791 end +function block.syriac (c) return 1792 <= c and c <= 1871 end +function block.arabicsupplement (c) return 1872 <= c and c <= 1919 end +function block.thaana (c) return 1920 <= c and c <= 1983 end +function block.nko (c) return 1984 <= c and c <= 2047 end +function block.samaritan (c) return 2048 <= c and c <= 2111 end +function block.mandaic (c) return 2112 <= c and c <= 2143 end +function block.syriacsupplement (c) return 2144 <= c and c <= 2159 end +function block.arabicextendeda (c) return 2208 <= c and c <= 2303 end +function block.devanagari (c) return 2304 <= c and c <= 2431 end +function block.bengali (c) return 2432 <= c and c <= 2559 end +function block.gurmukhi (c) return 2560 <= c and c <= 2687 end +function block.gujarati (c) return 2688 <= c and c <= 2815 end +function block.oriya (c) return 2816 <= c and c <= 2943 end +function block.tamil (c) return 2944 <= c and c <= 3071 end +function block.telugu (c) return 3072 <= c and c <= 3199 end +function block.kannada (c) return 3200 <= c and c <= 3327 end +function block.malayalam (c) return 3328 <= c and c <= 3455 end +function block.sinhala (c) return 3456 <= c and c <= 3583 end +function block.thai (c) return 3584 <= c and c <= 3711 end +function block.lao (c) return 3712 <= c and c <= 3839 end +function block.tibetan (c) return 3840 <= c and c <= 4095 end +function block.myanmar (c) return 4096 <= c and c <= 4255 end +function block.georgian (c) return 4256 <= c and c <= 4351 end +function block.hanguljamo (c) return 4352 <= c and c <= 4607 end +function block.ethiopic (c) return 4608 <= c and c <= 4991 end +function block.ethiopicsupplement (c) return 4992 <= c and c <= 5023 end +function block.cherokee (c) return 5024 <= c and c <= 5119 end +function block.unifiedcanadianaboriginalsyllabics (c) return 5120 <= c and c <= 5759 end +function block.ogham (c) return 5760 <= c and c <= 5791 end +function block.runic (c) return 5792 <= c and c <= 5887 end +function block.tagalog (c) return 5888 <= c and c <= 5919 end +function block.hanunoo (c) return 5920 <= c and c <= 5951 end +function block.buhid (c) return 5952 <= c and c <= 5983 end +function block.tagbanwa (c) return 5984 <= c and c <= 6015 end +function block.khmer (c) return 6016 <= c and c <= 6143 end +function block.mongolian (c) return 6144 <= c and c <= 6319 end +function block.unifiedcanadianaboriginalsyllabicsextended (c) return 6320 <= c and c <= 6399 end +function block.limbu (c) return 6400 <= c and c <= 6479 end +function block.taile (c) return 6480 <= c and c <= 6527 end +function block.newtailue (c) return 6528 <= c and c <= 6623 end +function block.khmersymbols (c) return 6624 <= c and c <= 6655 end +function block.buginese (c) return 6656 <= c and c <= 6687 end +function block.taitham (c) return 6688 <= c and c <= 6831 end +function block.combiningdiacriticalmarksextended (c) return 6832 <= c and c <= 6911 end +function block.balinese (c) return 6912 <= c and c <= 7039 end +function block.sundanese (c) return 7040 <= c and c <= 7103 end +function block.batak (c) return 7104 <= c and c <= 7167 end +function block.lepcha (c) return 7168 <= c and c <= 7247 end +function block.olchiki (c) return 7248 <= c and c <= 7295 end +function block.cyrillicextendedc (c) return 7296 <= c and c <= 7311 end +function block.sundanesesupplement (c) return 7360 <= c and c <= 7375 end +function block.vedicextensions (c) return 7376 <= c and c <= 7423 end +function block.phoneticextensions (c) return 7424 <= c and c <= 7551 end +function block.phoneticextensionssupplement (c) return 7552 <= c and c <= 7615 end +function block.combiningdiacriticalmarkssupplement (c) return 7616 <= c and c <= 7679 end +function block.latinextendedadditional (c) return 7680 <= c and c <= 7935 end +function block.greekextended (c) return 7936 <= c and c <= 8191 end +function block.generalpunctuation (c) return 8192 <= c and c <= 8303 end +function block.superscriptsandsubscripts (c) return 8304 <= c and c <= 8351 end +function block.currencysymbols (c) return 8352 <= c and c <= 8399 end +function block.combiningdiacriticalmarksforsymbols (c) return 8400 <= c and c <= 8447 end +function block.letterlikesymbols (c) return 8448 <= c and c <= 8527 end +function block.numberforms (c) return 8528 <= c and c <= 8591 end +function block.arrows (c) return 8592 <= c and c <= 8703 end +function block.mathematicaloperators (c) return 8704 <= c and c <= 8959 end +function block.miscellaneoustechnical (c) return 8960 <= c and c <= 9215 end +function block.controlpictures (c) return 9216 <= c and c <= 9279 end +function block.opticalcharacterrecognition (c) return 9280 <= c and c <= 9311 end +function block.enclosedalphanumerics (c) return 9312 <= c and c <= 9471 end +function block.boxdrawing (c) return 9472 <= c and c <= 9599 end +function block.blockelements (c) return 9600 <= c and c <= 9631 end +function block.geometricshapes (c) return 9632 <= c and c <= 9727 end +function block.miscellaneoussymbols (c) return 9728 <= c and c <= 9983 end +function block.dingbats (c) return 9984 <= c and c <= 10175 end +function block.miscellaneousmathematicalsymbolsa (c) return 10176 <= c and c <= 10223 end +function block.supplementalarrowsa (c) return 10224 <= c and c <= 10239 end +function block.braillepatterns (c) return 10240 <= c and c <= 10495 end +function block.supplementalarrowsb (c) return 10496 <= c and c <= 10623 end +function block.miscellaneousmathematicalsymbolsb (c) return 10624 <= c and c <= 10751 end +function block.supplementalmathematicaloperators (c) return 10752 <= c and c <= 11007 end +function block.miscellaneoussymbolsandarrows (c) return 11008 <= c and c <= 11263 end +function block.glagolitic (c) return 11264 <= c and c <= 11359 end +function block.latinextendedc (c) return 11360 <= c and c <= 11391 end +function block.coptic (c) return 11392 <= c and c <= 11519 end +function block.georgiansupplement (c) return 11520 <= c and c <= 11567 end +function block.tifinagh (c) return 11568 <= c and c <= 11647 end +function block.ethiopicextended (c) return 11648 <= c and c <= 11743 end +function block.cyrillicextendeda (c) return 11744 <= c and c <= 11775 end +function block.supplementalpunctuation (c) return 11776 <= c and c <= 11903 end +function block.cjkradicalssupplement (c) return 11904 <= c and c <= 12031 end +function block.kangxiradicals (c) return 12032 <= c and c <= 12255 end +function block.ideographicdescriptioncharacters (c) return 12272 <= c and c <= 12287 end +function block.cjksymbolsandpunctuation (c) return 12288 <= c and c <= 12351 end +function block.hiragana (c) return 12352 <= c and c <= 12447 end +function block.katakana (c) return 12448 <= c and c <= 12543 end +function block.bopomofo (c) return 12544 <= c and c <= 12591 end +function block.hangulcompatibilityjamo (c) return 12592 <= c and c <= 12687 end +function block.kanbun (c) return 12688 <= c and c <= 12703 end +function block.bopomofoextended (c) return 12704 <= c and c <= 12735 end +function block.cjkstrokes (c) return 12736 <= c and c <= 12783 end +function block.katakanaphoneticextensions (c) return 12784 <= c and c <= 12799 end +function block.enclosedcjklettersandmonths (c) return 12800 <= c and c <= 13055 end +function block.cjkcompatibility (c) return 13056 <= c and c <= 13311 end +function block.cjkunifiedideographsextensiona (c) return 13312 <= c and c <= 19903 end +function block.yijinghexagramsymbols (c) return 19904 <= c and c <= 19967 end +function block.cjkunifiedideographs (c) return 19968 <= c and c <= 40959 end +function block.yisyllables (c) return 40960 <= c and c <= 42127 end +function block.yiradicals (c) return 42128 <= c and c <= 42191 end +function block.lisu (c) return 42192 <= c and c <= 42239 end +function block.vai (c) return 42240 <= c and c <= 42559 end +function block.cyrillicextendedb (c) return 42560 <= c and c <= 42655 end +function block.bamum (c) return 42656 <= c and c <= 42751 end +function block.modifiertoneletters (c) return 42752 <= c and c <= 42783 end +function block.latinextendedd (c) return 42784 <= c and c <= 43007 end +function block.sylotinagri (c) return 43008 <= c and c <= 43055 end +function block.commonindicnumberforms (c) return 43056 <= c and c <= 43071 end +function block.phagspa (c) return 43072 <= c and c <= 43135 end +function block.saurashtra (c) return 43136 <= c and c <= 43231 end +function block.devanagariextended (c) return 43232 <= c and c <= 43263 end +function block.kayahli (c) return 43264 <= c and c <= 43311 end +function block.rejang (c) return 43312 <= c and c <= 43359 end +function block.hanguljamoextendeda (c) return 43360 <= c and c <= 43391 end +function block.javanese (c) return 43392 <= c and c <= 43487 end +function block.myanmarextendedb (c) return 43488 <= c and c <= 43519 end +function block.cham (c) return 43520 <= c and c <= 43615 end +function block.myanmarextendeda (c) return 43616 <= c and c <= 43647 end +function block.taiviet (c) return 43648 <= c and c <= 43743 end +function block.meeteimayekextensions (c) return 43744 <= c and c <= 43775 end +function block.ethiopicextendeda (c) return 43776 <= c and c <= 43823 end +function block.latinextendede (c) return 43824 <= c and c <= 43887 end +function block.cherokeesupplement (c) return 43888 <= c and c <= 43967 end +function block.meeteimayek (c) return 43968 <= c and c <= 44031 end +function block.hangulsyllables (c) return 44032 <= c and c <= 55215 end +function block.hanguljamoextendedb (c) return 55216 <= c and c <= 55295 end +function block.highsurrogates (c) return 55296 <= c and c <= 56191 end +function block.highprivateusesurrogates (c) return 56192 <= c and c <= 56319 end +function block.lowsurrogates (c) return 56320 <= c and c <= 57343 end +function block.privateusearea (c) return 57344 <= c and c <= 63743 end +function block.cjkcompatibilityideographs (c) return 63744 <= c and c <= 64255 end +function block.alphabeticpresentationforms (c) return 64256 <= c and c <= 64335 end +function block.arabicpresentationformsa (c) return 64336 <= c and c <= 65023 end +function block.variationselectors (c) return 65024 <= c and c <= 65039 end +function block.verticalforms (c) return 65040 <= c and c <= 65055 end +function block.combininghalfmarks (c) return 65056 <= c and c <= 65071 end +function block.cjkcompatibilityforms (c) return 65072 <= c and c <= 65103 end +function block.smallformvariants (c) return 65104 <= c and c <= 65135 end +function block.arabicpresentationformsb (c) return 65136 <= c and c <= 65279 end +function block.halfwidthandfullwidthforms (c) return 65280 <= c and c <= 65519 end +function block.specials (c) return 65520 <= c and c <= 65535 end +function block.linearbsyllabary (c) return 65536 <= c and c <= 65663 end +function block.linearbideograms (c) return 65664 <= c and c <= 65791 end +function block.aegeannumbers (c) return 65792 <= c and c <= 65855 end +function block.ancientgreeknumbers (c) return 65856 <= c and c <= 65935 end +function block.ancientsymbols (c) return 65936 <= c and c <= 65999 end +function block.phaistosdisc (c) return 66000 <= c and c <= 66047 end +function block.lycian (c) return 66176 <= c and c <= 66207 end +function block.carian (c) return 66208 <= c and c <= 66271 end +function block.copticepactnumbers (c) return 66272 <= c and c <= 66303 end +function block.olditalic (c) return 66304 <= c and c <= 66351 end +function block.gothic (c) return 66352 <= c and c <= 66383 end +function block.oldpermic (c) return 66384 <= c and c <= 66431 end +function block.ugaritic (c) return 66432 <= c and c <= 66463 end +function block.oldpersian (c) return 66464 <= c and c <= 66527 end +function block.deseret (c) return 66560 <= c and c <= 66639 end +function block.shavian (c) return 66640 <= c and c <= 66687 end +function block.osmanya (c) return 66688 <= c and c <= 66735 end +function block.osage (c) return 66736 <= c and c <= 66815 end +function block.elbasan (c) return 66816 <= c and c <= 66863 end +function block.caucasianalbanian (c) return 66864 <= c and c <= 66927 end +function block.lineara (c) return 67072 <= c and c <= 67455 end +function block.cypriotsyllabary (c) return 67584 <= c and c <= 67647 end +function block.imperialaramaic (c) return 67648 <= c and c <= 67679 end +function block.palmyrene (c) return 67680 <= c and c <= 67711 end +function block.nabataean (c) return 67712 <= c and c <= 67759 end +function block.hatran (c) return 67808 <= c and c <= 67839 end +function block.phoenician (c) return 67840 <= c and c <= 67871 end +function block.lydian (c) return 67872 <= c and c <= 67903 end +function block.meroitichieroglyphs (c) return 67968 <= c and c <= 67999 end +function block.meroiticcursive (c) return 68000 <= c and c <= 68095 end +function block.kharoshthi (c) return 68096 <= c and c <= 68191 end +function block.oldsoutharabian (c) return 68192 <= c and c <= 68223 end +function block.oldnortharabian (c) return 68224 <= c and c <= 68255 end +function block.manichaean (c) return 68288 <= c and c <= 68351 end +function block.avestan (c) return 68352 <= c and c <= 68415 end +function block.inscriptionalparthian (c) return 68416 <= c and c <= 68447 end +function block.inscriptionalpahlavi (c) return 68448 <= c and c <= 68479 end +function block.psalterpahlavi (c) return 68480 <= c and c <= 68527 end +function block.oldturkic (c) return 68608 <= c and c <= 68687 end +function block.oldhungarian (c) return 68736 <= c and c <= 68863 end +function block.ruminumeralsymbols (c) return 69216 <= c and c <= 69247 end +function block.brahmi (c) return 69632 <= c and c <= 69759 end +function block.kaithi (c) return 69760 <= c and c <= 69839 end +function block.sorasompeng (c) return 69840 <= c and c <= 69887 end +function block.chakma (c) return 69888 <= c and c <= 69967 end +function block.mahajani (c) return 69968 <= c and c <= 70015 end +function block.sharada (c) return 70016 <= c and c <= 70111 end +function block.sinhalaarchaicnumbers (c) return 70112 <= c and c <= 70143 end +function block.khojki (c) return 70144 <= c and c <= 70223 end +function block.multani (c) return 70272 <= c and c <= 70319 end +function block.khudawadi (c) return 70320 <= c and c <= 70399 end +function block.grantha (c) return 70400 <= c and c <= 70527 end +function block.newa (c) return 70656 <= c and c <= 70783 end +function block.tirhuta (c) return 70784 <= c and c <= 70879 end +function block.siddham (c) return 71040 <= c and c <= 71167 end +function block.modi (c) return 71168 <= c and c <= 71263 end +function block.mongoliansupplement (c) return 71264 <= c and c <= 71295 end +function block.takri (c) return 71296 <= c and c <= 71375 end +function block.ahom (c) return 71424 <= c and c <= 71487 end +function block.warangciti (c) return 71840 <= c and c <= 71935 end +function block.zanabazarsquare (c) return 72192 <= c and c <= 72271 end +function block.soyombo (c) return 72272 <= c and c <= 72367 end +function block.paucinhau (c) return 72384 <= c and c <= 72447 end +function block.bhaiksuki (c) return 72704 <= c and c <= 72815 end +function block.marchen (c) return 72816 <= c and c <= 72895 end +function block.masaramgondi (c) return 72960 <= c and c <= 73055 end +function block.cuneiform (c) return 73728 <= c and c <= 74751 end +function block.cuneiformnumbersandpunctuation (c) return 74752 <= c and c <= 74879 end +function block.earlydynasticcuneiform (c) return 74880 <= c and c <= 75087 end +function block.egyptianhieroglyphs (c) return 77824 <= c and c <= 78895 end +function block.anatolianhieroglyphs (c) return 82944 <= c and c <= 83583 end +function block.bamumsupplement (c) return 92160 <= c and c <= 92735 end +function block.mro (c) return 92736 <= c and c <= 92783 end +function block.bassavah (c) return 92880 <= c and c <= 92927 end +function block.pahawhhmong (c) return 92928 <= c and c <= 93071 end +function block.miao (c) return 93952 <= c and c <= 94111 end +function block.ideographicsymbolsandpunctuation (c) return 94176 <= c and c <= 94207 end +function block.tangut (c) return 94208 <= c and c <= 100351 end +function block.tangutcomponents (c) return 100352 <= c and c <= 101119 end +function block.kanasupplement (c) return 110592 <= c and c <= 110847 end +function block.kanaextendeda (c) return 110848 <= c and c <= 110895 end +function block.nushu (c) return 110960 <= c and c <= 111359 end +function block.duployan (c) return 113664 <= c and c <= 113823 end +function block.shorthandformatcontrols (c) return 113824 <= c and c <= 113839 end +function block.byzantinemusicalsymbols (c) return 118784 <= c and c <= 119039 end +function block.musicalsymbols (c) return 119040 <= c and c <= 119295 end +function block.ancientgreekmusicalnotation (c) return 119296 <= c and c <= 119375 end +function block.taixuanjingsymbols (c) return 119552 <= c and c <= 119647 end +function block.countingrodnumerals (c) return 119648 <= c and c <= 119679 end +function block.mathematicalalphanumericsymbols (c) return 119808 <= c and c <= 120831 end +function block.suttonsignwriting (c) return 120832 <= c and c <= 121519 end +function block.glagoliticsupplement (c) return 122880 <= c and c <= 122927 end +function block.mendekikakui (c) return 124928 <= c and c <= 125151 end +function block.adlam (c) return 125184 <= c and c <= 125279 end +function block.arabicmathematicalalphabeticsymbols (c) return 126464 <= c and c <= 126719 end +function block.mahjongtiles (c) return 126976 <= c and c <= 127023 end +function block.dominotiles (c) return 127024 <= c and c <= 127135 end +function block.playingcards (c) return 127136 <= c and c <= 127231 end +function block.enclosedalphanumericsupplement (c) return 127232 <= c and c <= 127487 end +function block.enclosedideographicsupplement (c) return 127488 <= c and c <= 127743 end +function block.miscellaneoussymbolsandpictographs (c) return 127744 <= c and c <= 128511 end +function block.emoticons (c) return 128512 <= c and c <= 128591 end +function block.ornamentaldingbats (c) return 128592 <= c and c <= 128639 end +function block.transportandmapsymbols (c) return 128640 <= c and c <= 128767 end +function block.alchemicalsymbols (c) return 128768 <= c and c <= 128895 end +function block.geometricshapesextended (c) return 128896 <= c and c <= 129023 end +function block.supplementalarrowsc (c) return 129024 <= c and c <= 129279 end +function block.supplementalsymbolsandpictographs (c) return 129280 <= c and c <= 129535 end +function block.cjkunifiedideographsextensionb (c) return 131072 <= c and c <= 173791 end +function block.cjkunifiedideographsextensionc (c) return 173824 <= c and c <= 177983 end +function block.cjkunifiedideographsextensiond (c) return 177984 <= c and c <= 178207 end +function block.cjkunifiedideographsextensione (c) return 178208 <= c and c <= 183983 end +function block.cjkunifiedideographsextensionf (c) return 183984 <= c and c <= 191471 end +function block.cjkcompatibilityideographssupplement (c) return 194560 <= c and c <= 195103 end +function block.tags (c) return 917504 <= c and c <= 917631 end +function block.variationselectorssupplement (c) return 917760 <= c and c <= 917999 end +function block.supplementaryprivateuseareaa (c) return 983040 <= c and c <= 1048575 end +function block.supplementaryprivateuseareab (c) return 1048576 <= c and c <= 1114111 end + +category = {} +function category.Lm (c) + return false end +function category.Zs (c) + if c == 32 then return true end + return false end +function category.Nd (c) + if 48 <= c and c <= 57 then return true end + return false end +function category.Co (c) + return false end +function category.Mc (c) + return false end +function category.Pc (c) + if c == 95 then return true end + return false end +function category.No (c) + return false end +function category.Pi (c) + return false end +function category.Lo (c) + return false end +function category.So (c) + return false end +function category.Cs (c) + return false end +function category.Sk (c) + if c == 94 then return true end + if c == 96 then return true end + return false end +function category.Pd (c) + if c == 45 then return true end + return false end +function category.Sc (c) + if c == 36 then return true end + return false end +function category.Mn (c) + return false end +function category.Po (c) + if 33 <= c and c <= 35 then return true end + if 37 <= c and c <= 39 then return true end + if c == 42 then return true end + if c == 44 then return true end + if 46 <= c and c <= 47 then return true end + if 58 <= c and c <= 59 then return true end + if 63 <= c and c <= 64 then return true end + if c == 92 then return true end + return false end +function category.Cn (c) + return false end +function category.Pe (c) + if c == 41 then return true end + if c == 93 then return true end + if c == 125 then return true end + return false end +function category.Cf (c) + return false end +function category.Me (c) + return false end +function category.Lt (c) + return false end +function category.Zp (c) + return false end +function category.Cc (c) + if 0 <= c and c <= 31 then return true end + return false end +function category.Pf (c) + return false end +function category.Lu (c) + if 65 <= c and c <= 90 then return true end + return false end +function category.Ps (c) + if c == 40 then return true end + if c == 91 then return true end + if c == 123 then return true end + return false end +function category.Ll (c) + if 97 <= c and c <= 122 then return true end + return false end +function category.Sm (c) + if c == 43 then return true end + if 60 <= c and c <= 62 then return true end + if c == 124 then return true end + if c == 126 then return true end + return false end +function category.Nl (c) + return false end +function category.Zl (c) + return false end +function category.M (c) + return category.Mc(c) or category.Mn(c) or category.Me(c) end +function category.S (c) + return category.So(c) or category.Sk(c) or category.Sc(c) or category.Sm(c) end +function category.N (c) + return category.Nd(c) or category.No(c) or category.Nl(c) end +function category.Z (c) + return category.Zs(c) or category.Zp(c) or category.Zl(c) end +function category.L (c) + return category.Lm(c) or category.Lo(c) or category.Lt(c) or category.Lu(c) or category.Ll(c) end +function category.C (c) + return category.Co(c) or category.Cs(c) or category.Cn(c) or category.Cf(c) or category.Cc(c) end +function category.P (c) + return category.Pc(c) or category.Pi(c) or category.Pd(c) or category.Po(c) or category.Pe(c) or category.Pf(c) or category.Ps(c) end From f62c95a7718fbc9c13738caca55b4806d4358c3d Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 9 Nov 2017 11:56:12 +0100 Subject: [PATCH 27/72] lib.xsd_regexp: implement limited category escapes --- src/lib/xsd_regexp.lua | 67 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 2 deletions(-) diff --git a/src/lib/xsd_regexp.lua b/src/lib/xsd_regexp.lua index 14ccbcab76..fe8b10a7b6 100644 --- a/src/lib/xsd_regexp.lua +++ b/src/lib/xsd_regexp.lua @@ -4,6 +4,7 @@ module(..., package.seeall) local maxpc = require("lib.maxpc") local match, capture, combine = maxpc.import() local codepoint = maxpc.codepoint +local ucd = require("lib.ucd") -- Implementation of regular expressions (ASCII only) as defined in Appendix G -- of "W3C XML Schema Definition Language (XSD) 1.1 Part 2: Datatypes", see: @@ -255,7 +256,7 @@ end function capture.charClassEsc () return combine._or( - capture.MultiCharEsc() --, capture.catEsc(), capture.complEsc() + capture.MultiCharEsc(), capture.catEsc(), capture.complEsc() ) end @@ -272,7 +273,44 @@ function capture.SingleCharEsc () ) end --- NYI: catEsc, complEsc +function capture.catEsc () + return capture.unpack( + capture.seq(match.equal("\\"), match.equal("p"), match.equal("{"), + capture.charProp(), + match.equal("}")), + function (_, _, _, charProp, _) return {property=charProp} end + ) +end + +function capture.complEsc () + return capture.unpack( + capture.seq(match.equal("\\"), match.equal("P"), match.equal("{"), + capture.charProp(), + match.equal("}")), + function (_, _, _, charProp, _) return {complement=charProp} end + ) +end + +function capture.charProp () + local function is_name (s) + return + ("-0123456789abcdefghijklmnopqrstiuvwxyzABCDEFGHIJKLMNOPQRSTIUVWXYZ") + :find(s, 1, true) + end + return combine._or( + capture.unpack( + capture.seq( + match.equal("I"), match.equal("s"), + capture.subseq(combine.some(match.satisfies(is_name))) + ), + function (_, _, block) return {block=block} end + ), + capture.transform( + capture.subseq(combine.some(match.satisfies(is_name))), + function (category) return {category=category} end + ) + ) +end function capture.MultiCharEsc () local function is_multiCharEsc (s) @@ -400,6 +438,10 @@ function compile_atom (atom) return compile_class(atom.group, atom.subtract) elseif atom.range then return compile_range(unpack(atom.range)) + elseif atom.property then + return match.satisfies(propertyPredicate(atom.property)) + elseif atom.complement then + return match._not(match.satisfies(propertyPredicate(atom.complement))) elseif atom.branches then return compile_branches(atom.branches) else @@ -444,6 +486,21 @@ function compile_range (start, stop) return match.satisfies(in_range) end +function propertyPredicate (property) + local predicate + if property.category then + predicate = assert(ucd.category[property.category], + "Invalid category: "..property.category) + predicate = function (c) return c <= 127 and predicate(c) end + elseif property.block then + predicate = assert(ucd.block[ucd.block_name(property.block)], + "Invalid block: "..property.block) + else + error("Invalid property.") + end + return function (s) return predicate(codepoint(s)) end +end + -- Tests @@ -501,4 +558,10 @@ function selftest () test {regexp="[abc-[ab]]{3}", accept={"ccc"}, reject={"", "abc"}} + + require("core.lib").print_object(parse("[\\p{L}]")) + + test {regexp="[\\p{L}]", + accept={"A", "b", "y", "Z"}, + reject={"0", "-", " "}} end From 5eaf4ac9f36f4d145cdaee4bf5eea90bc55875d0 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 9 Nov 2017 15:19:21 +0100 Subject: [PATCH 28/72] lib.xsd_regexp: remove support for block escapes, copy in GC predicates --- src/lib/xsd_regexp.lua | 167 ++++++++++++++++++++++++++--------------- 1 file changed, 107 insertions(+), 60 deletions(-) diff --git a/src/lib/xsd_regexp.lua b/src/lib/xsd_regexp.lua index fe8b10a7b6..16681ec328 100644 --- a/src/lib/xsd_regexp.lua +++ b/src/lib/xsd_regexp.lua @@ -4,10 +4,9 @@ module(..., package.seeall) local maxpc = require("lib.maxpc") local match, capture, combine = maxpc.import() local codepoint = maxpc.codepoint -local ucd = require("lib.ucd") --- Implementation of regular expressions (ASCII only) as defined in Appendix G --- of "W3C XML Schema Definition Language (XSD) 1.1 Part 2: Datatypes", see: +-- Implementation of regular expressions as defined in Appendix G of "W3C XML +-- Schema Definition Language (XSD) 1.1 Part 2: Datatypes", see: -- -- https://www.w3.org/TR/xmlschema11-2/#regexs -- @@ -24,8 +23,10 @@ local ucd = require("lib.ucd") -- in the format defined by the specification referenced above, and compiles -- the denoted regular language to a MaxPC grammar. -- --- NYI: any Unicode support (i.e. currently a character is a single byte and no --- category escapes are implemented) +-- NYI: Block escapes and Unicode support for category escapes are not +-- implemented. Category escapes and complements only match codepoints in the +-- Basic Latin block (ASCII). Users of category escapes and complements need to +-- ensure their input is ASCII-only. function compile (expr) local ast = parse(expr) @@ -97,11 +98,7 @@ function capture.quantifier () end function match.digit (s) - return match.satisfies( - function (s) - return ("0123456789"):find(s, 1, true) - end - ) + return match.satisfies(function (s) return member(s, "0123456789") end) end function capture.quantity () @@ -154,11 +151,7 @@ function capture.subExp () end function match.MetaChar () - return match.satisfies( - function (s) - return (".\\?*+{}()|[]"):find(s, 1, true) - end - ) + return match.satisfies(function (s) return member(s, ".\\?*+{}()|[]") end) end function match.NormalChar (s) @@ -245,7 +238,7 @@ end function capture.singleCharNoEsc () local function is_singleCharNoEsc (s) - return not ("[]"):find(s, 1, true) + return not member(s, "[]") end return combine.diff( capture.subseq(match.satisfies(is_singleCharNoEsc)), @@ -262,7 +255,7 @@ end function capture.SingleCharEsc () local function is_SingleCharEsc (s) - return ("nrt\\|.?*+(){}-[]^"):find(s, 1, true) + return member(s, "nrt\\|.?*+(){}-[]^") end return capture.unpack( capture.seq( @@ -292,29 +285,14 @@ function capture.complEsc () end function capture.charProp () - local function is_name (s) - return - ("-0123456789abcdefghijklmnopqrstiuvwxyzABCDEFGHIJKLMNOPQRSTIUVWXYZ") - :find(s, 1, true) - end - return combine._or( - capture.unpack( - capture.seq( - match.equal("I"), match.equal("s"), - capture.subseq(combine.some(match.satisfies(is_name))) - ), - function (_, _, block) return {block=block} end - ), - capture.transform( - capture.subseq(combine.some(match.satisfies(is_name))), - function (category) return {category=category} end - ) - ) + local nameChars = "-0123456789abcdefghijklmnopqrstiuvwxyzABCDEFGHIJKLMNOPQRSTIUVWXYZ" + local function is_name (s) return member(s, nameChars) end + return capture.subseq(combine.some(match.satisfies(is_name))) end function capture.MultiCharEsc () local function is_multiCharEsc (s) - return ("sSiIcCdDwW"):find(s, 1, true) + return member(s, "sSiIcCdDwW") end return capture.unpack( capture.seq( @@ -406,14 +384,22 @@ end function compile_atom (atom) -- NYI: \i, \I, \c, \C - local function memberTest (set) - return function (s) return set:find(s, 1, true) end + local function is_special_escape (s) + return member(s, "\\|.-^?*+{}()[]") + end + local function match_wildcard (s) + return not member(s, "\n\r") + end + local function is_space (s) + return member(s, " \t\n\r") + end + local function is_digit (s) + return GC.Nd(codepoint(s)) + end + local function is_word (s) + s = codepoint(s) + return not (GC.P(s) or GC.Z(s) or GC.C(s)) end - local is_special_escape = memberTest("\\|.-^?*+{}()[]") - local match_wildcard = function (x) return not memberTest("\n\r") end - local is_space = memberTest(" \t\n\r") - local is_digit = memberTest("0123456789") - local is_word = memberTest("0123456789abcdefghijklmnopqrstiuvwxyzABCDEFGHIJKLMNOPQRSTIUVWXYZ") if type(atom) == 'string' then return match.equal(atom) elseif atom.escape == "n" then return match.equal("\n") elseif atom.escape == "r" then return match.equal("\r") @@ -439,9 +425,9 @@ function compile_atom (atom) elseif atom.range then return compile_range(unpack(atom.range)) elseif atom.property then - return match.satisfies(propertyPredicate(atom.property)) + return compile_category(atom.property) elseif atom.complement then - return match._not(match.satisfies(propertyPredicate(atom.complement))) + return match._not(compile_category(atom.complement)) elseif atom.branches then return compile_branches(atom.branches) else @@ -486,19 +472,74 @@ function compile_range (start, stop) return match.satisfies(in_range) end -function propertyPredicate (property) - local predicate - if property.category then - predicate = assert(ucd.category[property.category], - "Invalid category: "..property.category) - predicate = function (c) return c <= 127 and predicate(c) end - elseif property.block then - predicate = assert(ucd.block[ucd.block_name(property.block)], - "Invalid block: "..property.block) - else - error("Invalid property.") - end - return function (s) return predicate(codepoint(s)) end +function compile_category (name) + local predicate = assert(GC[name], "Invalid category: "..name) + return match.satisfies(function (s) return predicate(codepoint(s)) end) +end + + +-- General category predicates for ASCII + +local function empty_category (c) return false end + +GC = {} +GC.Lu = function (c) return 65 <= c and c <= 90 end +GC.Ll = function (c) return 97 <= c and c <= 122 end +GC.Lt = empty_category +GC.Lm = empty_category +GC.Lo = empty_category +GC.L = function (c) return GC.Lu(c) or GC.Ll(c) end +GC.Mn = empty_category +GC.Mc = empty_category +GC.Me = empty_category +GC.M = empty_category +GC.Nd = function (c) return 48 <= c and c <= 57 end +GC.Nl = empty_category +GC.No = empty_category +GC.N = GC.Nd +GC.Pc = function (c) return c == 95 end +GC.Pd = function (c) return c == 45 end +GC.Ps = function (c) return c == 40 or c == 91 or c == 123 end +GC.Pe = function (c) return c == 41 or c == 93 or c == 125 end +GC.Pi = empty_category +GC.Pf = empty_category +GC.Po = function (c) return (33 <= c and c <= 35) + or (37 <= c and c <= 39) + or c == 42 + or c == 44 + or (46 <= c and c <= 47) + or (58 <= c and c <= 59) + or (63 <= c and c <= 64) + or c == 92 end +GC.P = function (c) return GC.Pc(c) + or GC.Pd(c) + or GC.Ps(c) + or GC.Pe(c) + or GC.Po(c) end +GC.Sm = function (c) return c == 43 + or (60 <= c and c <= 62) + or c == 124 + or c == 126 end +GC.Sc = function (c) return c == 36 end +GC.Sk = function (c) return c == 94 or c == 96 end +GC.So = empty_category +GC.S = function (c) return GC.Sm(c) or GC.Sc(c) end +GC.Zs = function (c) return c == 32 end +GC.Zl = empty_category +GC.Zp = empty_category +GC.Z = GC.Zs +GC.Cc = function (c) return 0 <= c and c <= 31 end +GC.Cf = empty_category +GC.Cs = empty_category +GC.Co = empty_category +GC.Cn = empty_category +GC.C = GC.Cc + + +-- Utilities + +function member (element, set) + return set:find(element, 1, true) end @@ -559,9 +600,15 @@ function selftest () accept={"ccc"}, reject={"", "abc"}} - require("core.lib").print_object(parse("[\\p{L}]")) - test {regexp="[\\p{L}]", accept={"A", "b", "y", "Z"}, reject={"0", "-", " "}} + + test {regexp="[\\P{L}]", + accept={"0", "-", " "}, + reject={"A", "b", "y", "Z"}} + + test {regexp="\\P{Ps}", + accept={"}", "]", ")", "A", "b", "y", "Z", "0", "-", " "}, + reject={"(", "[", "{"}} end From ba292f0ccb40e2d7b6c324945b5c56f193ee1dbb Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 9 Nov 2017 15:24:31 +0100 Subject: [PATCH 29/72] lib.xsd_regexp: remove obsolete match.digit --- src/lib/xsd_regexp.lua | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/lib/xsd_regexp.lua b/src/lib/xsd_regexp.lua index 16681ec328..16e814eac3 100644 --- a/src/lib/xsd_regexp.lua +++ b/src/lib/xsd_regexp.lua @@ -97,10 +97,6 @@ function capture.quantifier () ) end -function match.digit (s) - return match.satisfies(function (s) return member(s, "0123456789") end) -end - function capture.quantity () return combine._or( capture.quantRange(), From ba818b9c495b11146f85c3bba5c520a89f180591 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 9 Nov 2017 15:25:13 +0100 Subject: [PATCH 30/72] Revert "lib.ucd: supporting Unicode database for lib.xsd_regexp" This reverts commit 76acfd3d8e4db2e2b3e745da577c56cf3f3cf517. --- src/lib/ucd.lua | 514 ------------------------------------------------ 1 file changed, 514 deletions(-) delete mode 100644 src/lib/ucd.lua diff --git a/src/lib/ucd.lua b/src/lib/ucd.lua deleted file mode 100644 index 71b94671f4..0000000000 --- a/src/lib/ucd.lua +++ /dev/null @@ -1,514 +0,0 @@ --- Use of this source code is governed by the Apache 2.0 license; see COPYING. - -module(..., package.seeall) - -local lib = require("core.lib") -local maxpc = require("lib.maxpc") -local codepoint = maxpc.codepoint -local match, capture, combine = maxpc.import() - -function load_ucd (txt) - local parser = capture.unpack( - capture.seq( - match.equal("\n"), - capture.natural_number(16), - combine.maybe( - capture.unpack( - capture.seq( - match.equal("."), match.equal("."), - capture.natural_number(16) - ), - function (_, _, stop) return stop end - ) - ), - combine.any(match.equal(" ")), - match.equal(";"), - combine.any(match.equal(" ")), - capture.subseq( - combine.some( - match._not( - match.seq(combine.any(match.equal(" ")), - combine._or(match.equal("#"), - match.equal("\n"))) - ) - ) - ) - ), - function (_, start, stop, _, _, _, value) - return {start=start, stop=stop, value=value} - end - ) - return maxpc.parse( - lib.readfile(txt, "*a"), - combine.any( - capture.unpack( - capture.seq(combine.any(match._not(parser)), parser), - function (_, mapping) return mapping end - ) - ) - ) -end - -function block_name (name) - return name:gsub("[ _-]", ""):lower() -end - -function compile_block_predicates (ucd_path) - print("block = {}") - for _, block in ipairs(load_ucd(ucd_path.."/Blocks.txt")) do - if not block.stop then - print(("function block.%s (c) return c == %d end") - :format(block_name(block.value), block.start)) - else - print(("function block.%s (c) return %d <= c and c <= %d end") - :format(block_name(block.value), block.start, block.stop)) - end - end -end - -local function restrict_to_ascii (entries) - local for_ascii = {} - for _, entry in ipairs(entries) do - if entry.start <= 127 and (entry.stop or 0) <= 127 then - table.insert(for_ascii, entry) - end - end - return for_ascii -end - -function compile_category_predicates (ucd_path) - print("category = {}") - local categories = {} - local entries = load_ucd(ucd_path.."/extracted/DerivedGeneralCategory.txt") - local ascii_entries = restrict_to_ascii(entries) - for _, entry in ipairs(entries) do - if not categories[entry.value] then categories[entry.value] = {} end - end - -- Compile predicates for ASCII only - for _, entry in ipairs(ascii_entries) do - table.insert(categories[entry.value], entry) - end - for cat, entries in pairs(categories) do - print(("function category.%s (c)"):format(cat)) - for _, entry in ipairs(entries) do - if not entry.stop then - print((" if c == %d then return true end") - :format(entry.start)) - else - print((" if %d <= c and c <= %d then return true end"):format( - entry.start, entry.stop)) - end - end - print(" return false end") - end - -- Compile super categories - local super_categories = {} - for cat, _ in pairs(categories) do - local super = cat:sub(1,1) - if not super_categories[super] then super_categories[super] = {} end - table.insert(super_categories[super], "category."..cat) - end - for super, cats in pairs(super_categories) do - print(("function category.%s (c)"):format(super)) - print((" return %s(c) end"):format(table.concat(cats, "(c) or "))) - end -end - -function selftest () - local ucd_path = os.getenv("SNABB_UCD_PATH") - if not ucd_path then main.exit(engine.test_skipped_code) end - - compile_block_predicates(ucd_path) - print() - compile_category_predicates(ucd_path) -end - - --- Code below here is automatically generated (for Unicode 10.0.0) by the --- functions above. - -block = {} -function block.basiclatin (c) return 0 <= c and c <= 127 end -function block.latin1supplement (c) return 128 <= c and c <= 255 end -function block.latinextendeda (c) return 256 <= c and c <= 383 end -function block.latinextendedb (c) return 384 <= c and c <= 591 end -function block.ipaextensions (c) return 592 <= c and c <= 687 end -function block.spacingmodifierletters (c) return 688 <= c and c <= 767 end -function block.combiningdiacriticalmarks (c) return 768 <= c and c <= 879 end -function block.greekandcoptic (c) return 880 <= c and c <= 1023 end -function block.cyrillic (c) return 1024 <= c and c <= 1279 end -function block.cyrillicsupplement (c) return 1280 <= c and c <= 1327 end -function block.armenian (c) return 1328 <= c and c <= 1423 end -function block.hebrew (c) return 1424 <= c and c <= 1535 end -function block.arabic (c) return 1536 <= c and c <= 1791 end -function block.syriac (c) return 1792 <= c and c <= 1871 end -function block.arabicsupplement (c) return 1872 <= c and c <= 1919 end -function block.thaana (c) return 1920 <= c and c <= 1983 end -function block.nko (c) return 1984 <= c and c <= 2047 end -function block.samaritan (c) return 2048 <= c and c <= 2111 end -function block.mandaic (c) return 2112 <= c and c <= 2143 end -function block.syriacsupplement (c) return 2144 <= c and c <= 2159 end -function block.arabicextendeda (c) return 2208 <= c and c <= 2303 end -function block.devanagari (c) return 2304 <= c and c <= 2431 end -function block.bengali (c) return 2432 <= c and c <= 2559 end -function block.gurmukhi (c) return 2560 <= c and c <= 2687 end -function block.gujarati (c) return 2688 <= c and c <= 2815 end -function block.oriya (c) return 2816 <= c and c <= 2943 end -function block.tamil (c) return 2944 <= c and c <= 3071 end -function block.telugu (c) return 3072 <= c and c <= 3199 end -function block.kannada (c) return 3200 <= c and c <= 3327 end -function block.malayalam (c) return 3328 <= c and c <= 3455 end -function block.sinhala (c) return 3456 <= c and c <= 3583 end -function block.thai (c) return 3584 <= c and c <= 3711 end -function block.lao (c) return 3712 <= c and c <= 3839 end -function block.tibetan (c) return 3840 <= c and c <= 4095 end -function block.myanmar (c) return 4096 <= c and c <= 4255 end -function block.georgian (c) return 4256 <= c and c <= 4351 end -function block.hanguljamo (c) return 4352 <= c and c <= 4607 end -function block.ethiopic (c) return 4608 <= c and c <= 4991 end -function block.ethiopicsupplement (c) return 4992 <= c and c <= 5023 end -function block.cherokee (c) return 5024 <= c and c <= 5119 end -function block.unifiedcanadianaboriginalsyllabics (c) return 5120 <= c and c <= 5759 end -function block.ogham (c) return 5760 <= c and c <= 5791 end -function block.runic (c) return 5792 <= c and c <= 5887 end -function block.tagalog (c) return 5888 <= c and c <= 5919 end -function block.hanunoo (c) return 5920 <= c and c <= 5951 end -function block.buhid (c) return 5952 <= c and c <= 5983 end -function block.tagbanwa (c) return 5984 <= c and c <= 6015 end -function block.khmer (c) return 6016 <= c and c <= 6143 end -function block.mongolian (c) return 6144 <= c and c <= 6319 end -function block.unifiedcanadianaboriginalsyllabicsextended (c) return 6320 <= c and c <= 6399 end -function block.limbu (c) return 6400 <= c and c <= 6479 end -function block.taile (c) return 6480 <= c and c <= 6527 end -function block.newtailue (c) return 6528 <= c and c <= 6623 end -function block.khmersymbols (c) return 6624 <= c and c <= 6655 end -function block.buginese (c) return 6656 <= c and c <= 6687 end -function block.taitham (c) return 6688 <= c and c <= 6831 end -function block.combiningdiacriticalmarksextended (c) return 6832 <= c and c <= 6911 end -function block.balinese (c) return 6912 <= c and c <= 7039 end -function block.sundanese (c) return 7040 <= c and c <= 7103 end -function block.batak (c) return 7104 <= c and c <= 7167 end -function block.lepcha (c) return 7168 <= c and c <= 7247 end -function block.olchiki (c) return 7248 <= c and c <= 7295 end -function block.cyrillicextendedc (c) return 7296 <= c and c <= 7311 end -function block.sundanesesupplement (c) return 7360 <= c and c <= 7375 end -function block.vedicextensions (c) return 7376 <= c and c <= 7423 end -function block.phoneticextensions (c) return 7424 <= c and c <= 7551 end -function block.phoneticextensionssupplement (c) return 7552 <= c and c <= 7615 end -function block.combiningdiacriticalmarkssupplement (c) return 7616 <= c and c <= 7679 end -function block.latinextendedadditional (c) return 7680 <= c and c <= 7935 end -function block.greekextended (c) return 7936 <= c and c <= 8191 end -function block.generalpunctuation (c) return 8192 <= c and c <= 8303 end -function block.superscriptsandsubscripts (c) return 8304 <= c and c <= 8351 end -function block.currencysymbols (c) return 8352 <= c and c <= 8399 end -function block.combiningdiacriticalmarksforsymbols (c) return 8400 <= c and c <= 8447 end -function block.letterlikesymbols (c) return 8448 <= c and c <= 8527 end -function block.numberforms (c) return 8528 <= c and c <= 8591 end -function block.arrows (c) return 8592 <= c and c <= 8703 end -function block.mathematicaloperators (c) return 8704 <= c and c <= 8959 end -function block.miscellaneoustechnical (c) return 8960 <= c and c <= 9215 end -function block.controlpictures (c) return 9216 <= c and c <= 9279 end -function block.opticalcharacterrecognition (c) return 9280 <= c and c <= 9311 end -function block.enclosedalphanumerics (c) return 9312 <= c and c <= 9471 end -function block.boxdrawing (c) return 9472 <= c and c <= 9599 end -function block.blockelements (c) return 9600 <= c and c <= 9631 end -function block.geometricshapes (c) return 9632 <= c and c <= 9727 end -function block.miscellaneoussymbols (c) return 9728 <= c and c <= 9983 end -function block.dingbats (c) return 9984 <= c and c <= 10175 end -function block.miscellaneousmathematicalsymbolsa (c) return 10176 <= c and c <= 10223 end -function block.supplementalarrowsa (c) return 10224 <= c and c <= 10239 end -function block.braillepatterns (c) return 10240 <= c and c <= 10495 end -function block.supplementalarrowsb (c) return 10496 <= c and c <= 10623 end -function block.miscellaneousmathematicalsymbolsb (c) return 10624 <= c and c <= 10751 end -function block.supplementalmathematicaloperators (c) return 10752 <= c and c <= 11007 end -function block.miscellaneoussymbolsandarrows (c) return 11008 <= c and c <= 11263 end -function block.glagolitic (c) return 11264 <= c and c <= 11359 end -function block.latinextendedc (c) return 11360 <= c and c <= 11391 end -function block.coptic (c) return 11392 <= c and c <= 11519 end -function block.georgiansupplement (c) return 11520 <= c and c <= 11567 end -function block.tifinagh (c) return 11568 <= c and c <= 11647 end -function block.ethiopicextended (c) return 11648 <= c and c <= 11743 end -function block.cyrillicextendeda (c) return 11744 <= c and c <= 11775 end -function block.supplementalpunctuation (c) return 11776 <= c and c <= 11903 end -function block.cjkradicalssupplement (c) return 11904 <= c and c <= 12031 end -function block.kangxiradicals (c) return 12032 <= c and c <= 12255 end -function block.ideographicdescriptioncharacters (c) return 12272 <= c and c <= 12287 end -function block.cjksymbolsandpunctuation (c) return 12288 <= c and c <= 12351 end -function block.hiragana (c) return 12352 <= c and c <= 12447 end -function block.katakana (c) return 12448 <= c and c <= 12543 end -function block.bopomofo (c) return 12544 <= c and c <= 12591 end -function block.hangulcompatibilityjamo (c) return 12592 <= c and c <= 12687 end -function block.kanbun (c) return 12688 <= c and c <= 12703 end -function block.bopomofoextended (c) return 12704 <= c and c <= 12735 end -function block.cjkstrokes (c) return 12736 <= c and c <= 12783 end -function block.katakanaphoneticextensions (c) return 12784 <= c and c <= 12799 end -function block.enclosedcjklettersandmonths (c) return 12800 <= c and c <= 13055 end -function block.cjkcompatibility (c) return 13056 <= c and c <= 13311 end -function block.cjkunifiedideographsextensiona (c) return 13312 <= c and c <= 19903 end -function block.yijinghexagramsymbols (c) return 19904 <= c and c <= 19967 end -function block.cjkunifiedideographs (c) return 19968 <= c and c <= 40959 end -function block.yisyllables (c) return 40960 <= c and c <= 42127 end -function block.yiradicals (c) return 42128 <= c and c <= 42191 end -function block.lisu (c) return 42192 <= c and c <= 42239 end -function block.vai (c) return 42240 <= c and c <= 42559 end -function block.cyrillicextendedb (c) return 42560 <= c and c <= 42655 end -function block.bamum (c) return 42656 <= c and c <= 42751 end -function block.modifiertoneletters (c) return 42752 <= c and c <= 42783 end -function block.latinextendedd (c) return 42784 <= c and c <= 43007 end -function block.sylotinagri (c) return 43008 <= c and c <= 43055 end -function block.commonindicnumberforms (c) return 43056 <= c and c <= 43071 end -function block.phagspa (c) return 43072 <= c and c <= 43135 end -function block.saurashtra (c) return 43136 <= c and c <= 43231 end -function block.devanagariextended (c) return 43232 <= c and c <= 43263 end -function block.kayahli (c) return 43264 <= c and c <= 43311 end -function block.rejang (c) return 43312 <= c and c <= 43359 end -function block.hanguljamoextendeda (c) return 43360 <= c and c <= 43391 end -function block.javanese (c) return 43392 <= c and c <= 43487 end -function block.myanmarextendedb (c) return 43488 <= c and c <= 43519 end -function block.cham (c) return 43520 <= c and c <= 43615 end -function block.myanmarextendeda (c) return 43616 <= c and c <= 43647 end -function block.taiviet (c) return 43648 <= c and c <= 43743 end -function block.meeteimayekextensions (c) return 43744 <= c and c <= 43775 end -function block.ethiopicextendeda (c) return 43776 <= c and c <= 43823 end -function block.latinextendede (c) return 43824 <= c and c <= 43887 end -function block.cherokeesupplement (c) return 43888 <= c and c <= 43967 end -function block.meeteimayek (c) return 43968 <= c and c <= 44031 end -function block.hangulsyllables (c) return 44032 <= c and c <= 55215 end -function block.hanguljamoextendedb (c) return 55216 <= c and c <= 55295 end -function block.highsurrogates (c) return 55296 <= c and c <= 56191 end -function block.highprivateusesurrogates (c) return 56192 <= c and c <= 56319 end -function block.lowsurrogates (c) return 56320 <= c and c <= 57343 end -function block.privateusearea (c) return 57344 <= c and c <= 63743 end -function block.cjkcompatibilityideographs (c) return 63744 <= c and c <= 64255 end -function block.alphabeticpresentationforms (c) return 64256 <= c and c <= 64335 end -function block.arabicpresentationformsa (c) return 64336 <= c and c <= 65023 end -function block.variationselectors (c) return 65024 <= c and c <= 65039 end -function block.verticalforms (c) return 65040 <= c and c <= 65055 end -function block.combininghalfmarks (c) return 65056 <= c and c <= 65071 end -function block.cjkcompatibilityforms (c) return 65072 <= c and c <= 65103 end -function block.smallformvariants (c) return 65104 <= c and c <= 65135 end -function block.arabicpresentationformsb (c) return 65136 <= c and c <= 65279 end -function block.halfwidthandfullwidthforms (c) return 65280 <= c and c <= 65519 end -function block.specials (c) return 65520 <= c and c <= 65535 end -function block.linearbsyllabary (c) return 65536 <= c and c <= 65663 end -function block.linearbideograms (c) return 65664 <= c and c <= 65791 end -function block.aegeannumbers (c) return 65792 <= c and c <= 65855 end -function block.ancientgreeknumbers (c) return 65856 <= c and c <= 65935 end -function block.ancientsymbols (c) return 65936 <= c and c <= 65999 end -function block.phaistosdisc (c) return 66000 <= c and c <= 66047 end -function block.lycian (c) return 66176 <= c and c <= 66207 end -function block.carian (c) return 66208 <= c and c <= 66271 end -function block.copticepactnumbers (c) return 66272 <= c and c <= 66303 end -function block.olditalic (c) return 66304 <= c and c <= 66351 end -function block.gothic (c) return 66352 <= c and c <= 66383 end -function block.oldpermic (c) return 66384 <= c and c <= 66431 end -function block.ugaritic (c) return 66432 <= c and c <= 66463 end -function block.oldpersian (c) return 66464 <= c and c <= 66527 end -function block.deseret (c) return 66560 <= c and c <= 66639 end -function block.shavian (c) return 66640 <= c and c <= 66687 end -function block.osmanya (c) return 66688 <= c and c <= 66735 end -function block.osage (c) return 66736 <= c and c <= 66815 end -function block.elbasan (c) return 66816 <= c and c <= 66863 end -function block.caucasianalbanian (c) return 66864 <= c and c <= 66927 end -function block.lineara (c) return 67072 <= c and c <= 67455 end -function block.cypriotsyllabary (c) return 67584 <= c and c <= 67647 end -function block.imperialaramaic (c) return 67648 <= c and c <= 67679 end -function block.palmyrene (c) return 67680 <= c and c <= 67711 end -function block.nabataean (c) return 67712 <= c and c <= 67759 end -function block.hatran (c) return 67808 <= c and c <= 67839 end -function block.phoenician (c) return 67840 <= c and c <= 67871 end -function block.lydian (c) return 67872 <= c and c <= 67903 end -function block.meroitichieroglyphs (c) return 67968 <= c and c <= 67999 end -function block.meroiticcursive (c) return 68000 <= c and c <= 68095 end -function block.kharoshthi (c) return 68096 <= c and c <= 68191 end -function block.oldsoutharabian (c) return 68192 <= c and c <= 68223 end -function block.oldnortharabian (c) return 68224 <= c and c <= 68255 end -function block.manichaean (c) return 68288 <= c and c <= 68351 end -function block.avestan (c) return 68352 <= c and c <= 68415 end -function block.inscriptionalparthian (c) return 68416 <= c and c <= 68447 end -function block.inscriptionalpahlavi (c) return 68448 <= c and c <= 68479 end -function block.psalterpahlavi (c) return 68480 <= c and c <= 68527 end -function block.oldturkic (c) return 68608 <= c and c <= 68687 end -function block.oldhungarian (c) return 68736 <= c and c <= 68863 end -function block.ruminumeralsymbols (c) return 69216 <= c and c <= 69247 end -function block.brahmi (c) return 69632 <= c and c <= 69759 end -function block.kaithi (c) return 69760 <= c and c <= 69839 end -function block.sorasompeng (c) return 69840 <= c and c <= 69887 end -function block.chakma (c) return 69888 <= c and c <= 69967 end -function block.mahajani (c) return 69968 <= c and c <= 70015 end -function block.sharada (c) return 70016 <= c and c <= 70111 end -function block.sinhalaarchaicnumbers (c) return 70112 <= c and c <= 70143 end -function block.khojki (c) return 70144 <= c and c <= 70223 end -function block.multani (c) return 70272 <= c and c <= 70319 end -function block.khudawadi (c) return 70320 <= c and c <= 70399 end -function block.grantha (c) return 70400 <= c and c <= 70527 end -function block.newa (c) return 70656 <= c and c <= 70783 end -function block.tirhuta (c) return 70784 <= c and c <= 70879 end -function block.siddham (c) return 71040 <= c and c <= 71167 end -function block.modi (c) return 71168 <= c and c <= 71263 end -function block.mongoliansupplement (c) return 71264 <= c and c <= 71295 end -function block.takri (c) return 71296 <= c and c <= 71375 end -function block.ahom (c) return 71424 <= c and c <= 71487 end -function block.warangciti (c) return 71840 <= c and c <= 71935 end -function block.zanabazarsquare (c) return 72192 <= c and c <= 72271 end -function block.soyombo (c) return 72272 <= c and c <= 72367 end -function block.paucinhau (c) return 72384 <= c and c <= 72447 end -function block.bhaiksuki (c) return 72704 <= c and c <= 72815 end -function block.marchen (c) return 72816 <= c and c <= 72895 end -function block.masaramgondi (c) return 72960 <= c and c <= 73055 end -function block.cuneiform (c) return 73728 <= c and c <= 74751 end -function block.cuneiformnumbersandpunctuation (c) return 74752 <= c and c <= 74879 end -function block.earlydynasticcuneiform (c) return 74880 <= c and c <= 75087 end -function block.egyptianhieroglyphs (c) return 77824 <= c and c <= 78895 end -function block.anatolianhieroglyphs (c) return 82944 <= c and c <= 83583 end -function block.bamumsupplement (c) return 92160 <= c and c <= 92735 end -function block.mro (c) return 92736 <= c and c <= 92783 end -function block.bassavah (c) return 92880 <= c and c <= 92927 end -function block.pahawhhmong (c) return 92928 <= c and c <= 93071 end -function block.miao (c) return 93952 <= c and c <= 94111 end -function block.ideographicsymbolsandpunctuation (c) return 94176 <= c and c <= 94207 end -function block.tangut (c) return 94208 <= c and c <= 100351 end -function block.tangutcomponents (c) return 100352 <= c and c <= 101119 end -function block.kanasupplement (c) return 110592 <= c and c <= 110847 end -function block.kanaextendeda (c) return 110848 <= c and c <= 110895 end -function block.nushu (c) return 110960 <= c and c <= 111359 end -function block.duployan (c) return 113664 <= c and c <= 113823 end -function block.shorthandformatcontrols (c) return 113824 <= c and c <= 113839 end -function block.byzantinemusicalsymbols (c) return 118784 <= c and c <= 119039 end -function block.musicalsymbols (c) return 119040 <= c and c <= 119295 end -function block.ancientgreekmusicalnotation (c) return 119296 <= c and c <= 119375 end -function block.taixuanjingsymbols (c) return 119552 <= c and c <= 119647 end -function block.countingrodnumerals (c) return 119648 <= c and c <= 119679 end -function block.mathematicalalphanumericsymbols (c) return 119808 <= c and c <= 120831 end -function block.suttonsignwriting (c) return 120832 <= c and c <= 121519 end -function block.glagoliticsupplement (c) return 122880 <= c and c <= 122927 end -function block.mendekikakui (c) return 124928 <= c and c <= 125151 end -function block.adlam (c) return 125184 <= c and c <= 125279 end -function block.arabicmathematicalalphabeticsymbols (c) return 126464 <= c and c <= 126719 end -function block.mahjongtiles (c) return 126976 <= c and c <= 127023 end -function block.dominotiles (c) return 127024 <= c and c <= 127135 end -function block.playingcards (c) return 127136 <= c and c <= 127231 end -function block.enclosedalphanumericsupplement (c) return 127232 <= c and c <= 127487 end -function block.enclosedideographicsupplement (c) return 127488 <= c and c <= 127743 end -function block.miscellaneoussymbolsandpictographs (c) return 127744 <= c and c <= 128511 end -function block.emoticons (c) return 128512 <= c and c <= 128591 end -function block.ornamentaldingbats (c) return 128592 <= c and c <= 128639 end -function block.transportandmapsymbols (c) return 128640 <= c and c <= 128767 end -function block.alchemicalsymbols (c) return 128768 <= c and c <= 128895 end -function block.geometricshapesextended (c) return 128896 <= c and c <= 129023 end -function block.supplementalarrowsc (c) return 129024 <= c and c <= 129279 end -function block.supplementalsymbolsandpictographs (c) return 129280 <= c and c <= 129535 end -function block.cjkunifiedideographsextensionb (c) return 131072 <= c and c <= 173791 end -function block.cjkunifiedideographsextensionc (c) return 173824 <= c and c <= 177983 end -function block.cjkunifiedideographsextensiond (c) return 177984 <= c and c <= 178207 end -function block.cjkunifiedideographsextensione (c) return 178208 <= c and c <= 183983 end -function block.cjkunifiedideographsextensionf (c) return 183984 <= c and c <= 191471 end -function block.cjkcompatibilityideographssupplement (c) return 194560 <= c and c <= 195103 end -function block.tags (c) return 917504 <= c and c <= 917631 end -function block.variationselectorssupplement (c) return 917760 <= c and c <= 917999 end -function block.supplementaryprivateuseareaa (c) return 983040 <= c and c <= 1048575 end -function block.supplementaryprivateuseareab (c) return 1048576 <= c and c <= 1114111 end - -category = {} -function category.Lm (c) - return false end -function category.Zs (c) - if c == 32 then return true end - return false end -function category.Nd (c) - if 48 <= c and c <= 57 then return true end - return false end -function category.Co (c) - return false end -function category.Mc (c) - return false end -function category.Pc (c) - if c == 95 then return true end - return false end -function category.No (c) - return false end -function category.Pi (c) - return false end -function category.Lo (c) - return false end -function category.So (c) - return false end -function category.Cs (c) - return false end -function category.Sk (c) - if c == 94 then return true end - if c == 96 then return true end - return false end -function category.Pd (c) - if c == 45 then return true end - return false end -function category.Sc (c) - if c == 36 then return true end - return false end -function category.Mn (c) - return false end -function category.Po (c) - if 33 <= c and c <= 35 then return true end - if 37 <= c and c <= 39 then return true end - if c == 42 then return true end - if c == 44 then return true end - if 46 <= c and c <= 47 then return true end - if 58 <= c and c <= 59 then return true end - if 63 <= c and c <= 64 then return true end - if c == 92 then return true end - return false end -function category.Cn (c) - return false end -function category.Pe (c) - if c == 41 then return true end - if c == 93 then return true end - if c == 125 then return true end - return false end -function category.Cf (c) - return false end -function category.Me (c) - return false end -function category.Lt (c) - return false end -function category.Zp (c) - return false end -function category.Cc (c) - if 0 <= c and c <= 31 then return true end - return false end -function category.Pf (c) - return false end -function category.Lu (c) - if 65 <= c and c <= 90 then return true end - return false end -function category.Ps (c) - if c == 40 then return true end - if c == 91 then return true end - if c == 123 then return true end - return false end -function category.Ll (c) - if 97 <= c and c <= 122 then return true end - return false end -function category.Sm (c) - if c == 43 then return true end - if 60 <= c and c <= 62 then return true end - if c == 124 then return true end - if c == 126 then return true end - return false end -function category.Nl (c) - return false end -function category.Zl (c) - return false end -function category.M (c) - return category.Mc(c) or category.Mn(c) or category.Me(c) end -function category.S (c) - return category.So(c) or category.Sk(c) or category.Sc(c) or category.Sm(c) end -function category.N (c) - return category.Nd(c) or category.No(c) or category.Nl(c) end -function category.Z (c) - return category.Zs(c) or category.Zp(c) or category.Zl(c) end -function category.L (c) - return category.Lm(c) or category.Lo(c) or category.Lt(c) or category.Lu(c) or category.Ll(c) end -function category.C (c) - return category.Co(c) or category.Cs(c) or category.Cn(c) or category.Cf(c) or category.Cc(c) end -function category.P (c) - return category.Pc(c) or category.Pi(c) or category.Pd(c) or category.Po(c) or category.Pe(c) or category.Pf(c) or category.Ps(c) end From b26d5fcc9ca9f950c37661b4ed71cf52e2eb17c7 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 9 Nov 2017 15:25:57 +0100 Subject: [PATCH 31/72] Revert "lib.xsd_regexp: implement unicode ranges" This reverts commit ea0978a16cb8529624daa8f2fcd82c4193d704d9. --- src/lib/xsd_regexp.lua | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/lib/xsd_regexp.lua b/src/lib/xsd_regexp.lua index 16e814eac3..b981b3a80e 100644 --- a/src/lib/xsd_regexp.lua +++ b/src/lib/xsd_regexp.lua @@ -3,7 +3,6 @@ module(..., package.seeall) local maxpc = require("lib.maxpc") local match, capture, combine = maxpc.import() -local codepoint = maxpc.codepoint -- Implementation of regular expressions as defined in Appendix G of "W3C XML -- Schema Definition Language (XSD) 1.1 Part 2: Datatypes", see: @@ -460,9 +459,9 @@ function compile_group (group) end function compile_range (start, stop) - start, stop = codepoint(start), codepoint(stop) + start, stop = start:byte(), stop:byte() local function in_range (s) - s = codepoint(s) + s = s:byte() return start <= s and s <= stop end return match.satisfies(in_range) From 6fcebb897722d51dcfd18e59914482dc10c1fdc5 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 9 Nov 2017 15:28:30 +0100 Subject: [PATCH 32/72] Revert "lib.maxpc: decode UTF-8 input", revert lib.xsd_regexp to ASCII-only This reverts commit c3ef6591e45b89c6aabd907941d891a9f266a8b8. --- src/lib/maxpc.lua | 59 ++++-------------------------------------- src/lib/xsd_regexp.lua | 19 +++++--------- 2 files changed, 12 insertions(+), 66 deletions(-) diff --git a/src/lib/maxpc.lua b/src/lib/maxpc.lua index cceeeeec02..8f7eb00696 100644 --- a/src/lib/maxpc.lua +++ b/src/lib/maxpc.lua @@ -31,27 +31,22 @@ end -- input protocol --- NB: its trivial to support *both* octet and UTF-8 input, see --- commit 085a5813473f1fa64502b480cc00122bef0fb32a - input = {} function input.new (str) - return { pos = 1, idx = 1, str = str } + return { idx = 1, str = str } end function input.empty (s) return s.idx > #s.str end -function input.first (s, n) n = n or 1 - local to = utf8next(s.str, s.idx) - while n > 1 do n, to = n - 1, utf8next(s.str, to) end - return s.str:sub(s.idx, to - 1) +function input.first (s, n) + return s.str:sub(s.idx, s.idx + (n or 1) - 1) end function input.rest (s) - return { pos = s.pos + 1, idx = utf8next(s.str, s.idx), str = s.str } + return { idx = s.idx + 1, str = s.str } end function input.position (s) @@ -233,7 +228,7 @@ function capture.unpack (parser, f) end --- Digit parsing +-- digit parsing function match.digit (radix) radix = radix or 10 @@ -270,46 +265,6 @@ function capture.integer_number (radix) end --- UTF-8 decoding (see http://nullprogram.com/blog/2017/10/06/) - -local bit = require("bit") -local lshift, rshift, band, bor = bit.lshift, bit.rshift, bit.band, bit.bor - -function utf8length (str, idx) idx = idx or 1 - local lengths = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 3, 3, 4, 0 - } - return lengths[rshift(str:byte(idx), 3) + 1] -end - -function utf8next (str, idx) idx = idx or 1 - return idx + math.max(utf8length(str, idx), 1) -- advance even on error -end - -function codepoint (str, idx) idx = idx or 1 - local length = utf8length(str, idx) - local point - if length == 1 then point = str:byte(idx) - elseif length == 2 then point = bor(lshift(band(str:byte(idx), 0x1f), 6), - band(str:byte(idx+1), 0x3f)) - elseif length == 3 then point = bor(lshift(band(str:byte(idx), 0x0f), 12), - lshift(band(str:byte(idx+1), 0x3f), 6), - band(str:byte(idx+2), 0x3f)) - elseif length == 4 then point = bor(lshift(band(str:byte(idx), 0x07), 18), - lshift(band(str:byte(idx+1), 0x3f), 12), - lshift(band(str:byte(idx+2), 0x3f), 6), - band(str:byte(idx+3), 0x3f)) - else - point = -1 -- invalid - end - if point >= 0xd800 and point <= 0xdfff then - point = -1 -- surrogate half - end - return point -end - - -- tests function selftest () @@ -467,8 +422,4 @@ function selftest () assert(not result) assert(not matched) assert(not eof) local result, matched, eof = parse("1234a", capture.integer_number()) assert(result == 1234) assert(matched) assert(not eof) - - -- test UTF-8 input - local result, matched, eof = parse("λ", capture.element()) - assert(result == "λ") assert(matched) assert(eof) end diff --git a/src/lib/xsd_regexp.lua b/src/lib/xsd_regexp.lua index b981b3a80e..c83e215995 100644 --- a/src/lib/xsd_regexp.lua +++ b/src/lib/xsd_regexp.lua @@ -4,8 +4,8 @@ module(..., package.seeall) local maxpc = require("lib.maxpc") local match, capture, combine = maxpc.import() --- Implementation of regular expressions as defined in Appendix G of "W3C XML --- Schema Definition Language (XSD) 1.1 Part 2: Datatypes", see: +-- ASCII only implementation of regular expressions as defined in Appendix G of +-- "W3C XML Schema Definition Language (XSD) 1.1 Part 2: Datatypes", see: -- -- https://www.w3.org/TR/xmlschema11-2/#regexs -- @@ -22,10 +22,7 @@ local match, capture, combine = maxpc.import() -- in the format defined by the specification referenced above, and compiles -- the denoted regular language to a MaxPC grammar. -- --- NYI: Block escapes and Unicode support for category escapes are not --- implemented. Category escapes and complements only match codepoints in the --- Basic Latin block (ASCII). Users of category escapes and complements need to --- ensure their input is ASCII-only. +-- NYI: Block escapes, Unicode handling. function compile (expr) local ast = parse(expr) @@ -389,11 +386,10 @@ function compile_atom (atom) return member(s, " \t\n\r") end local function is_digit (s) - return GC.Nd(codepoint(s)) + return GC.Nd(s:byte()) end local function is_word (s) - s = codepoint(s) - return not (GC.P(s) or GC.Z(s) or GC.C(s)) + return not (GC.P(s:byte()) or GC.Z(s:byte()) or GC.C(s:byte())) end if type(atom) == 'string' then return match.equal(atom) elseif atom.escape == "n" then return match.equal("\n") @@ -461,15 +457,14 @@ end function compile_range (start, stop) start, stop = start:byte(), stop:byte() local function in_range (s) - s = s:byte() - return start <= s and s <= stop + return start <= s:byte() and s:byte() <= stop end return match.satisfies(in_range) end function compile_category (name) local predicate = assert(GC[name], "Invalid category: "..name) - return match.satisfies(function (s) return predicate(codepoint(s)) end) + return match.satisfies(function (s) return predicate(s:byte()) end) end From 1187fc73cff45b274c1d4c51d06ae69a1dcd2c33 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 9 Nov 2017 16:24:10 +0100 Subject: [PATCH 33/72] lib.xsd_regexp: implement \i, \I, \c, and \C --- src/lib/xsd_regexp.lua | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/src/lib/xsd_regexp.lua b/src/lib/xsd_regexp.lua index c83e215995..720c2b45cf 100644 --- a/src/lib/xsd_regexp.lua +++ b/src/lib/xsd_regexp.lua @@ -375,7 +375,6 @@ function compile_quantifier (quantifier) end function compile_atom (atom) - -- NYI: \i, \I, \c, \C local function is_special_escape (s) return member(s, "\\|.-^?*+{}()[]") end @@ -385,6 +384,12 @@ function compile_atom (atom) local function is_space (s) return member(s, " \t\n\r") end + local function is_NameStartChar (s) + return GC.L(s:byte()) or member(s, ":_") + end + local function is_NameChar (s) + return is_NameStartChar(s) or GC.Nd(s:byte()) or member(s, "-.") + end local function is_digit (s) return GC.Nd(s:byte()) end @@ -403,6 +408,14 @@ function compile_atom (atom) return match.satisfies(is_space) elseif atom.escape == "S" then return match._not(match.satisfies(is_space)) + elseif atom.escape == "i" then + return match.satisfies(is_NameStartChar) + elseif atom.escape == "I" then + return match._not(match.satisfies(is_NameStartChar)) + elseif atom.escape == "c" then + return match.satisfies(is_NameChar) + elseif atom.escape == "C" then + return match._not(match.satisfies(is_NameChar)) elseif atom.escape == "d" then return match.satisfies(is_digit) elseif atom.escape == "D" then @@ -601,4 +614,20 @@ function selftest () test {regexp="\\P{Ps}", accept={"}", "]", ")", "A", "b", "y", "Z", "0", "-", " "}, reject={"(", "[", "{"}} + + test {regexp="\\P{Ps}", + accept={"}", "]", ")", "A", "b", "y", "Z", "0", "-", " "}, + reject={"(", "[", "{"}} + + test {regexp="\\w", + accept={"F", "0", "a", "~"}, + reject={"-", " ", ".", "\t"}} + + test {regexp="\\i", + accept={"a", "B", "_", ":"}, + reject={"-", "1", " ", "."}} + + test {regexp="\\C", + accept={"~", " ", "\t", "\n"}, + reject={"a", "B", "1", ".", "_", ":"}} end From 074ff0c4fafbc587564208ccdcde806bf867f10d Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Fri, 10 Nov 2017 10:15:45 +0100 Subject: [PATCH 34/72] lib.xsd_regexp: implement backtracking lib.maxpc: add backtracking combinators --- src/lib/maxpc.lua | 162 ++++++++++++++++++++++++++++++++++++++++- src/lib/xsd_regexp.lua | 79 ++++++++++---------- 2 files changed, 202 insertions(+), 39 deletions(-) diff --git a/src/lib/maxpc.lua b/src/lib/maxpc.lua index 8f7eb00696..d544691a4a 100644 --- a/src/lib/maxpc.lua +++ b/src/lib/maxpc.lua @@ -265,6 +265,110 @@ function capture.integer_number (radix) end +-- backtracking combinators + +function match.plus (a, b) + return function (s) + local a_more, b_more, more + a_more = function () return a(s) end + more = function () + if b_more then + local rest + rest, _, _, b_more = b_more() + if rest then + return rest, nil, nil, more + else + return more() + end + elseif a_more then + local suffix + suffix, _, _, a_more = a_more() + if suffix then + b_more = function () return b(suffix) end + return more() + end + end + end + return more() + end +end + +function match.alternate (x, y) + return function (s) + local x_more, more + x_more = function () + return x(s) + end + more = function () + local rest + if x_more then + rest, _, _, x_more = x_more() + end + if rest then + return rest, nil, nil, more + else + return y(s) + end + end + return more() + end +end + +function match.optional (parser) + return match.alternate(parser, match.seq()) +end + +function match.range (parser, min, max) + return function (s) + local rests = {} + while s and (not max or #rests <= max) do + table.insert(rests, s) + s = parser(s) + end + local more + more = function () + local rest = table.remove(rests) + if rest and (not min or #rests >= min) then + return rest, nil, nil, more + end + end + return more() + end +end + +function match.all (parser) + return match.range(parser, 0) +end + +function match.one_or_more (parser) + return match.plus(parser, match.all(parser)) +end + +local function make_reducer (combinator, sentinel) + local reduce + reduce = function (parsers) + if #parsers == 0 then + return sentinel + elseif #parsers == 1 then + return parsers[1] + else + local head = table.remove(parsers, 1) + local tail = reduce(parsers) + return combinator(head, tail) + end + end + return function (...) + return reduce({...}) + end +end + +local function identity (...) return ... end +match.path = make_reducer(match.plus, identity) + +local function constantly_nil () end +match.either = make_reducer(match.alternate, constantly_nil) + + -- tests function selftest () @@ -294,10 +398,11 @@ function selftest () local result, matched, eof = parse("", capture.element()) assert(not result) assert(not matched) assert(eof) - -- match.satisfied local function is_digit (x) return ("01234567890"):find(x, 1, true) end + + -- match.satisfied local result, matched, eof = parse("123", capture.subseq(match.satisfies(is_digit))) assert(result == "1") assert(matched) assert(not eof) @@ -357,10 +462,11 @@ function selftest () local result, matched, eof = parse("", fo) assert(not result) assert(not matched) assert(eof) - -- combine._and local function is_alphanumeric (x) return ("01234567890abcdefghijklmnopqrstuvwxyz"):find(x, 1, true) end + + -- combine._and local d = combine._and(match.satisfies(is_alphanumeric), match.satisfies(is_digit)) local result, matched, eof = parse("12", capture.seq(d, d, match.eof())) @@ -408,6 +514,7 @@ function selftest () end )) + -- digits local result, matched, eof = parse("f", match.digit(16)) assert(not result) assert(matched) assert(eof) local result, matched, eof = parse("f423", capture.natural_number(16)) @@ -422,4 +529,55 @@ function selftest () assert(not result) assert(not matched) assert(not eof) local result, matched, eof = parse("1234a", capture.integer_number()) assert(result == 1234) assert(matched) assert(not eof) + + -- backtracking + local result, matched, eof = parse( + "0aaaaaaaa1", + match.path(match.equal("0"), + match.all(match.satisfies(is_alphanumeric)), + match.equal("1")) + ) + assert(not result) assert(matched) assert(eof) + local result, matched, eof = + parse("a", match.either(match.equal("a"), match.equal("b"))) + assert(not result) assert(matched) assert(eof) + local result, matched, eof = + parse("b", match.either(match.equal("a"), match.equal("b"))) + assert(not result) assert(matched) assert(eof) + local result, matched, eof = parse(".", match.optional(match.equal("."))) + assert(not result) assert(matched) + local result, matched, eof = parse("", match.optional(match.equal("."))) + assert(not result) assert(matched) assert(eof) + local domain_like = match.either( + match.path( + match.path( + match.all(match.path(match.all(match.satisfies(is_alphanumeric)), + combine.diff(match.satisfies(is_alphanumeric), + match.satisfies(is_digit)), + match.equal("."))) + ), + match.path(match.all(match.satisfies(is_alphanumeric)), + combine.diff(match.satisfies(is_alphanumeric), + match.satisfies(is_digit)), + match.optional(match.equal("."))), + match.eof() + ), + match.seq(match.equal("."), match.eof()) + ) + local result, matched, eof = parse(".", domain_like) + assert(not result) assert(matched) assert(eof) + local result, matched, eof = parse("foo.", domain_like) + assert(not result) assert(matched) assert(eof) + local result, matched, eof = parse("1foo.bar", domain_like) + assert(not result) assert(matched) assert(eof) + local result, matched, eof = parse("foo.b2ar.baz", domain_like) + assert(not result) assert(matched) assert(eof) + local result, matched, eof = parse("foo.bar.2baz.", domain_like) + assert(not result) assert(matched) assert(eof) + local result, matched, eof = parse("foo2", domain_like) + assert(not result) assert(not matched) assert(not eof) + local result, matched, eof = parse("..", domain_like) + assert(not result) assert(not matched) assert(not eof) + local result, matched, eof = parse("123.456", domain_like) + assert(not result) assert(not matched) assert(not eof) end diff --git a/src/lib/xsd_regexp.lua b/src/lib/xsd_regexp.lua index 720c2b45cf..4a6f187803 100644 --- a/src/lib/xsd_regexp.lua +++ b/src/lib/xsd_regexp.lua @@ -26,10 +26,10 @@ local match, capture, combine = maxpc.import() function compile (expr) local ast = parse(expr) - local parser = compile_branches(ast.branches) + local parser = compile_branches(ast.branches, 'toplevel') return function (str) - local _, success, is_eof = maxpc.parse(str, parser) - return success and is_eof + local _, success, eof = maxpc.parse(str, parser) + return success and eof end end @@ -308,16 +308,20 @@ charClassExpr_parser = capture.charClassExpr() -- Compiler rules: AST -> MaxPC parser -function compile_branches (branches) +function compile_branches (branches, is_toplevel) local parsers = {} for _, branch in ipairs(branches) do if branch.pieces then - table.insert(parsers, compile_pieces(branch.pieces)) + local parser = compile_pieces(branch.pieces) + if is_toplevel then + parser = match.path(parser, match.eof()) + end + table.insert(parsers, parser) end end if #parsers == 0 then return match.eof() elseif #parsers == 1 then return parsers[1] - elseif #parsers > 1 then return combine._or(unpack(parsers)) end + elseif #parsers > 1 then return match.either(unpack(parsers)) end end function compile_pieces (pieces) @@ -331,43 +335,20 @@ function compile_pieces (pieces) table.insert(parsers, atom_parser) end end - return match.seq(unpack(parsers)) + return match.path(unpack(parsers)) end function compile_quantifier (quantifier) - if quantifier == "?" then return combine.maybe - elseif quantifier == "*" then return combine.any - elseif quantifier == "+" then return combine.some - elseif quantifier.min and quantifier.max then - -- [min * parser] .. [max * maybe(parser)] - return function (parser) - local parsers = {} - for n = 1, quantifier.min do - table.insert(parsers, parser) - end - for n = 1, quantifier.max - quantifier.min do - table.insert(parsers, combine.maybe(parser)) - end - return match.seq(unpack(parsers)) - end - elseif quantifier.min then - -- [min * parser] any(parser) + if quantifier == "?" then return match.optional + elseif quantifier == "*" then return match.all + elseif quantifier == "+" then return match.one_or_more + elseif quantifier.min or quantifier.max then return function (parser) - local parsers = {} - for n = 1, quantifier.min do - table.insert(parsers, parser) - end - table.insert(parsers, combine.any(parser)) - return match.seq(unpack(parsers)) + return match.range(parser, quantifier.min, quantifier.max) end elseif quantifier.exactly then - -- [exactly * parser] return function (parser) - local parsers = {} - for n = 1, quantifier.exactly do - table.insert(parsers, parser) - end - return match.seq(unpack(parsers)) + return match.range(parser, quantifier.exactly, quantifier.exactly) end else error("Invalid quantifier") @@ -456,7 +437,7 @@ function compile_group (group) for _, atom in ipairs(atoms) do table.insert(parsers, compile_atom(atom)) end - return combine._or(unpack(parsers)) + return match.either(unpack(parsers)) end if group.include then return compile_group_atoms(group.include) @@ -630,4 +611,28 @@ function selftest () test {regexp="\\C", accept={"~", " ", "\t", "\n"}, reject={"a", "B", "1", ".", "_", ":"}} + + test {regexp="a|aa", + accept={"a", "aa"}, + reject={"ab", ""}} + + test{regexp="([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])", + accept={"0","12", "123", "192","168","178",}, + reject={"a.a.a.", ""}} + + local ipv4_address = + "(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\.){3}" + .. "([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])" + .. "(%[\\p{N}\\p{L}]+)?" + test {regexp=ipv4_address, + accept={"192.168.0.1", "8.8.8.8%eth0"}, + reject={"1.256.8.8", "1.2.3%foo", "1.1.1.1%~"}} + + local domain_name = + "((([a-zA-Z0-9_]([a-zA-Z0-9\\-_]){0,61})?[a-zA-Z0-9]\\.)*" + .. "([a-zA-Z0-9_]([a-zA-Z0-9\\-_]){0,61})?[a-zA-Z0-9]\\.?)" + .. "|\\." + test {regexp=domain_name, + accept={"hello", "foo-z.bar.de", "123.com", "."}, + reject={"___.com", "foo-.baz.de", ".."}} end From 1be3a55d8d62187482c9bb2a4a068f7cf1928c32 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Sun, 12 Nov 2017 23:02:23 +0100 Subject: [PATCH 35/72] lib.yang.schema: fix parse_range --- src/lib/yang/schema.lua | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/lib/yang/schema.lua b/src/lib/yang/schema.lua index ebeb2babcf..77e4e61213 100644 --- a/src/lib/yang/schema.lua +++ b/src/lib/yang/schema.lua @@ -50,17 +50,20 @@ end local function parse_range(loc, range) local function parse_part(part) local l, r = part:match("^%s*([^%.]*)%s*%.%.%s*([^%s]*)%s*$") + if not l then + l = part:match("^%s*([^%.]*)%s*$") + l = (l ~= 'min') and l + end assert_with_loc(l, loc, 'bad range component: %s', part) if l ~= 'min' then l = util.tointeger(l) end - if r ~= 'max' then r = util.tointeger(r) end + if r ~= 'max' then r = r and util.tointeger(r) or l end return { l, r } end local parts = range:split("|") - local res = {'or'} - for part in range:split("|") do table.insert(res, parse_part(part)) end - if #res == 1 then error_with_loc(loc, "empty range", range) - elseif #res == 2 then return res[2] - else return res end + local parts = {} + for part in range:split("|") do table.insert(parts, parse_part(part)) end + if #parts == 0 then error_with_loc(loc, "empty range", range) end + return parts end local function collect_children(children, kinds) @@ -319,8 +322,7 @@ local function init_leaf_list(node, loc, argument, children) node.reference = maybe_child_property(loc, children, 'reference', 'value') end local function init_length(node, loc, argument, children) - -- TODO: parse length arg str - node.value = require_argument(loc, argument) + node.value = parse_range(loc, require_argument(loc, argument)) node.description = maybe_child_property(loc, children, 'description', 'value') node.reference = maybe_child_property(loc, children, 'reference', 'value') end From de468cb223d4ce3f03a5deed1be119e680e93665 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Sun, 12 Nov 2017 23:03:00 +0100 Subject: [PATCH 36/72] lib.yang.data: implement range and pattern restrictions --- src/lib/yang/data.lua | 81 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 64 insertions(+), 17 deletions(-) diff --git a/src/lib/yang/data.lua b/src/lib/yang/data.lua index 7b1e596b2d..8a9a191a31 100644 --- a/src/lib/yang/data.lua +++ b/src/lib/yang/data.lua @@ -9,6 +9,7 @@ local value = require("lib.yang.value") local ffi = require("ffi") local ctable = require('lib.ctable') local cltable = require('lib.cltable') +local regexp = require("lib.xsd_regexp") function normalize_id(id) return id:gsub('[^%w_]', '_') @@ -150,26 +151,67 @@ local function integer_type(min, max) end end --- FIXME :) -local function range_validator(range, f) return f end -local function length_validator(range, f) return f end -local function pattern_validator(range, f) return f end -local function bit_validator(range, f) return f end -local function enum_validator(range, f) return f end +-- TODO: length, bit and enum validators (: +local validators = {} +function validators.range(range, f) + if not range then return end + local rangestr + for _, part in ipairs(range.value) do + rangestr = (rangestr and rangestr.." ") or "" + local min, max = unpack(part) + if min == max then + rangestr = rangestr..min + else + rangestr = rangestr..min..".."..max + end + end + return function (val, P) + if f then f(val, P) end + if not val then return end + for _, part in ipairs(range.value) do + local min, max = unpack(part) + if (min == 'min' or val >= min) + and (max == 'max' or val <= max) then + return + end + end + P:error("value not in range\n"..rangestr.."\n"..val) + end +end +function validators.length(length, f) return f end +function validators.patterns(patterns, f) + if not patterns then return end + local compiled = {} + for _, pattern in ipairs(patterns) do + compiled[pattern.value] = regexp.compile(pattern.value) + end + return function (val, P) + if f then f(val, P) end + if not val then return end + for pattern, match in pairs(compiled) do + if not match(val) then + P:error("pattern mismatch\n"..pattern.."\n"..val) + end + end + end +end +function validators.bit(bit, f) return f end +function validators.enum(enum, f) return f end local function value_parser(typ) local prim = typ.primitive_type local parse = assert(value.types[prim], prim).parse local validate - validate = range_validator(typ.range, validate) - validate = length_validator(typ.length, validate) - validate = pattern_validator(typ.pattern, validate) - validate = bit_validator(typ.bit, validate) - validate = enum_validator(typ.enums, validate) + for restriction, validator in pairs(validators) do + validate = validator(typ[restriction], validate) + if typ.base_type then + validate = validator(typ.base_type.type[restriction], validate) + end + end -- TODO: union, require-instance. - return function(str, k) + return function(str, k, P) local val = parse(str, k) - if validate then validate(val) end + if validate then validate(val, P) end return val end end @@ -228,7 +270,7 @@ local function array_parser(keyword, element_type, ctype) local str = P:parse_string() P:skip_whitespace() P:consume(";") - return parsev(str, keyword) + return parsev(str, keyword, P) end local function parse(P, out) table.insert(out, parse1(P)) @@ -246,6 +288,9 @@ local function array_parser(keyword, element_type, ctype) return {init=init, parse=parse, finish=finish} end +local default_parser = {} +function default_parser:error (...) error(...) end + local function scalar_parser(keyword, argument_type, default, mandatory) local function init() return nil end local parsev = value_parser(argument_type) @@ -257,7 +302,7 @@ local function scalar_parser(keyword, argument_type, default, mandatory) end P:skip_whitespace() P:consume(";") - return parsev(maybe_str, keyword) + return parsev(maybe_str, keyword, P) end local function parse(P, out) if out ~= nil then P:error('duplicate parameter: '..keyword) end @@ -265,7 +310,7 @@ local function scalar_parser(keyword, argument_type, default, mandatory) end local function finish(out) if out ~= nil then return out end - if default then return parsev(default, keyword) end + if default then return parsev(default, keyword, default_parser) end if mandatory then error('missing scalar value: '..keyword) end end return {init=init, parse=parse, finish=finish} @@ -463,7 +508,9 @@ function data_parser_from_grammar(production) function top_parsers.scalar(production) local parse = value_parser(production.argument_type) return function(str, filename) - return parse(parser_mod.parse_string(str, filename), '[bare scalar]') + return parse(parser_mod.parse_string(str, filename), + '[bare scalar]', + default_parser) end end return assert(top_parsers[production.type])(production) From e54b5f65841a6c6222c0f4be62d4f4c1ace135f3 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Fri, 12 Jan 2018 12:48:27 +0100 Subject: [PATCH 37/72] lib.yang.data: do not compile pattern validator for empty set of patterns --- src/lib/yang/data.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/yang/data.lua b/src/lib/yang/data.lua index 8a9a191a31..3cf38465e9 100644 --- a/src/lib/yang/data.lua +++ b/src/lib/yang/data.lua @@ -180,7 +180,7 @@ function validators.range(range, f) end function validators.length(length, f) return f end function validators.patterns(patterns, f) - if not patterns then return end + if not patterns or #patterns == 0 then return end local compiled = {} for _, pattern in ipairs(patterns) do compiled[pattern.value] = regexp.compile(pattern.value) From 88cbeb043145798f3f5137530cf863a58e976a75 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Fri, 12 Jan 2018 12:57:16 +0100 Subject: [PATCH 38/72] lib.yang.data: only run pattern validator on string values This extra guard is necessary because some intrinsic lib.yang extensions will parse/validate some types natively (e.g. IP addresses using lib.protocol.ipvX) circumventing the YANG validation stack. --- src/lib/yang/data.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/yang/data.lua b/src/lib/yang/data.lua index 3cf38465e9..f66d6dad0f 100644 --- a/src/lib/yang/data.lua +++ b/src/lib/yang/data.lua @@ -187,7 +187,7 @@ function validators.patterns(patterns, f) end return function (val, P) if f then f(val, P) end - if not val then return end + if not val or type(val) ~= 'string' then return end for pattern, match in pairs(compiled) do if not match(val) then P:error("pattern mismatch\n"..pattern.."\n"..val) From d1a7409593f0707e25be8bb8398efd2b6755d98b Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Fri, 12 Jan 2018 14:27:34 +0100 Subject: [PATCH 39/72] lib.yang.schema: fix bug in selftest (range can have multiple intervals) --- src/lib/yang/schema.lua | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib/yang/schema.lua b/src/lib/yang/schema.lua index 77e4e61213..cce3872002 100644 --- a/src/lib/yang/schema.lua +++ b/src/lib/yang/schema.lua @@ -982,8 +982,8 @@ function selftest() assert(contents.body["name"].description == "Name of fruit.") assert(contents.body["score"].type.id == "uint8") assert(contents.body["score"].mandatory == true) - assert(contents.body["score"].type.range.value[1] == 0) - assert(contents.body["score"].type.range.value[2] == 10) + assert(contents.body["score"].type.range.value[1][1] == 0) + assert(contents.body["score"].type.range.value[1][2] == 10) -- Check the container has a leaf called "description" local desc = schema.body["fruit-bowl"].body['description'] From e577721b417011e06b5a0c073fcfba5e1d047256 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Fri, 12 Jan 2018 16:59:36 +0100 Subject: [PATCH 40/72] program.lwaftr.tests.propbased.genyang: adapt to new range format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When hacking the range validator I had misunderstood the format used to express YANG range restrictions (I thought it didn’t feature multiple intervals), and changed it along the way. I think the new format is a bit simpler though (more regular) and requires less special cases, so I decided to keep it. --- src/program/lwaftr/tests/propbased/genyang.lua | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/src/program/lwaftr/tests/propbased/genyang.lua b/src/program/lwaftr/tests/propbased/genyang.lua index e2a532afd0..41145c8434 100644 --- a/src/program/lwaftr/tests/propbased/genyang.lua +++ b/src/program/lwaftr/tests/propbased/genyang.lua @@ -156,17 +156,8 @@ local function choose_range(rng, lo, hi) if #rng == 0 or (generate_invalid and r < 0.1) then return choose_bounded(lo, hi) - elseif rng[1] == "or" then - local intervals = {} - local num_intervals = (#rng - 1) / 2 - - for i=1, num_intervals do - intervals[i] = { rng[2*i], rng[2*i+1] } - end - - return choose_range(choose(intervals), lo, hi) else - local lo_rng, hi_rng = rng[1], rng[2] + local lo_rng, hi_rng = unpack(choose(rng)) if lo_rng == "min" then lo_rng = lo @@ -526,11 +517,11 @@ function selftest() -- check some int types with range statements for i=1, 100 do local val1 = value_from_type({ primitive_type="uint8", - range={ value = {1, 16} } }) + range={ value = {{1, 16}} } }) local val2 = value_from_type({ primitive_type="uint8", - range={ value = {"or", 1, 16, 18, 32} } }) + range={ value = {{1, 16}, {18, 32}} } }) local val3 = value_from_type({ primitive_type="uint8", - range={ value = {"or", "min", 10, 250, "max"} } }) + range={ value = {{'min', 10}, {250, 'max'}} } }) assert(val1 >= 1 and val1 <= 16, string.format("test value: %d", val1)) assert(val2 >= 1 and val2 <= 32 and val2 ~= 17, string.format("test value: %d", val2)) From 090d6179d7ad1ec9016325ff957da43ca558d305 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Fri, 26 Jan 2018 17:49:47 +0100 Subject: [PATCH 41/72] lib.numa: skip selftest if running on a non-NUMA machine --- src/lib/numa.lua | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/lib/numa.lua b/src/lib/numa.lua index 03534b8cd4..1530ec9fee 100644 --- a/src/lib/numa.lua +++ b/src/lib/numa.lua @@ -128,6 +128,9 @@ function prevent_preemption(priority) end function selftest () + if not has_numa() then + os.exit(engine.test_skipped_code) + end function test_cpu(cpu) local node = cpu_get_numa_node(cpu) From 2ece0c1c4d8f31431b94860d9a9afeeb7da851f9 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Mon, 29 Jan 2018 19:52:08 +0100 Subject: [PATCH 42/72] lib.numa: more fine grained approach to 090d6179d MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of skipping the selftest on non-NUMA systems, simply do not attempt to do NUMA things on system where it isn’t available. --- src/lib/numa.lua | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/lib/numa.lua b/src/lib/numa.lua index 1530ec9fee..4a8d1e0bcb 100644 --- a/src/lib/numa.lua +++ b/src/lib/numa.lua @@ -109,6 +109,7 @@ function unbind_numa_node () end function bind_to_numa_node (node) + if not has_numa() then return end if node == bound_numa_node then return end if not node then return unbind_numa_node() end assert(not bound_numa_node, "already bound") @@ -128,9 +129,6 @@ function prevent_preemption(priority) end function selftest () - if not has_numa() then - os.exit(engine.test_skipped_code) - end function test_cpu(cpu) local node = cpu_get_numa_node(cpu) @@ -138,11 +136,11 @@ function selftest () assert(bound_cpu == cpu) assert(bound_numa_node == node) assert(S.getcpu().cpu == cpu) - assert(S.getcpu().node == node) + assert(not has_numa() or S.getcpu().node == node) bind_to_cpu(nil) assert(bound_cpu == nil) assert(bound_numa_node == node) - assert(S.getcpu().node == node) + assert(not has_numa() or S.getcpu().node == node) bind_to_numa_node(nil) assert(bound_cpu == nil) assert(bound_numa_node == nil) From 6783dac1b9e622420c31a581b6908062ee3c5210 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 25 Jan 2018 22:07:19 +0100 Subject: [PATCH 43/72] lib.interlink: use __attribute((packet, aligned(64)))__ to align s --- src/lib/interlink.lua | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/lib/interlink.lua b/src/lib/interlink.lua index a58c0422d8..1062e0bf38 100644 --- a/src/lib/interlink.lua +++ b/src/lib/interlink.lua @@ -20,7 +20,6 @@ assert(band(SIZE, SIZE-1) == 0, "SIZE is not a power of two") local status = { Locked = 0, Unlocked = 1 } ffi.cdef([[ struct interlink { - char pad0[]]..CACHELINE..[[]; int read, write, lock; char pad1[]]..CACHELINE-3*INT..[[]; int lwrite, nread; @@ -28,7 +27,7 @@ ffi.cdef([[ struct interlink { int lread, nwrite; char pad3[]]..CACHELINE-2*INT..[[]; struct packet *packets[]]..SIZE..[[]; -}]]) +} __attribute__((packed, aligned(]]..CACHELINE..[[)))]]) function create (name) local r = shm.create(name, "struct interlink") From cae255473e40ba21518f152e1fdacedf4f3f6e98 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Fri, 9 Feb 2018 20:41:05 +0100 Subject: [PATCH 44/72] apps/interlink: turn test.lua into Snabb script selftest.snabb (These should really be run by make test, no?) --- src/Makefile | 4 ++-- src/apps/interlink/{test.lua => selftest.snabb} | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) rename src/apps/interlink/{test.lua => selftest.snabb} (98%) mode change 100644 => 100755 diff --git a/src/Makefile b/src/Makefile index 453f59c30c..46860bfa65 100644 --- a/src/Makefile +++ b/src/Makefile @@ -51,8 +51,8 @@ TESTMODS = $(shell find . -regex '[^\#]*\.\(lua\|dasl\)' -printf '%P ' | \ # TESTSCRIPTS expands to: # lib/watchdog/selftest.sh ... -# for each executable selftext.sh script in src. -TESTSCRIPTS = $(shell find . -name "selftest.sh" -executable | xargs) +# for each executable selftext.* script in src. +TESTSCRIPTS = $(shell find . -name "selftest.*" -executable | xargs) PATH := ../lib/luajit/usr/local/bin:$(PATH) diff --git a/src/apps/interlink/test.lua b/src/apps/interlink/selftest.snabb old mode 100644 new mode 100755 similarity index 98% rename from src/apps/interlink/test.lua rename to src/apps/interlink/selftest.snabb index 0d19ae16e1..e3a96ad445 --- a/src/apps/interlink/test.lua +++ b/src/apps/interlink/selftest.snabb @@ -1,3 +1,5 @@ +#!snabb snsh + -- Use of this source code is governed by the Apache 2.0 license; see COPYING. local worker = require("core.worker") From 022ab0c92fc876e85c711c097701fc0db54ab2f0 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Fri, 9 Feb 2018 20:46:13 +0100 Subject: [PATCH 45/72] apps.interlink: add duration parameter to selftest.snabb --- src/apps/interlink/selftest.snabb | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/apps/interlink/selftest.snabb b/src/apps/interlink/selftest.snabb index e3a96ad445..01798acc47 100755 --- a/src/apps/interlink/selftest.snabb +++ b/src/apps/interlink/selftest.snabb @@ -7,6 +7,9 @@ local interlink = require("lib.interlink") local Receiver = require("apps.interlink.receiver") local Sink = require("apps.basic.basic_apps").Sink +-- Synopsis: selftest.snabb [duration] +local DURATION = tonumber(main.parameters[1]) or 10 + interlink.create("group/test.mcp") worker.start("source", @@ -19,15 +22,15 @@ config.app(c, "sink", Sink) config.link(c, "rx.output->sink.input") engine.configure(c) -engine.main({duration=10, report={showlinks=true}}) +engine.main({duration=DURATION, report={showlinks=true}}) for w, s in pairs(worker.status()) do print(("worker %s: pid=%s alive=%s status=%s"):format( w, s.pid, s.alive, s.status)) end local stats = link.stats(engine.app_table["sink"].input.input) -print(stats.txpackets / 1e6 / 10 .. " Mpps") +print(stats.txpackets / 1e6 / DURATION .. " Mpps") -- test teardown interlink.free(interlink.open("group/test.mcp")) -engine.main({duration=1}) +engine.main({duration=0.1}) From c888a869e9a6e5c92c1e574a3c75db7b28585e0e Mon Sep 17 00:00:00 2001 From: "R. Matthew Emerson" Date: Mon, 22 Feb 2016 16:30:59 -0500 Subject: [PATCH 46/72] Simple spinlock implementation based on xchg instruction. --- src/core/spinlock.dasl | 46 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 src/core/spinlock.dasl diff --git a/src/core/spinlock.dasl b/src/core/spinlock.dasl new file mode 100644 index 0000000000..01936658d6 --- /dev/null +++ b/src/core/spinlock.dasl @@ -0,0 +1,46 @@ +module(...,package.seeall) + +local dasm = require("dasm") +local ffi = require("ffi") + +| .arch x64 +| .actionlist actions +| .globalnames globalnames + +local function generate(Dst) + Dst:growpc(16) + + | .align 16 + |->lock: + -- attempt to acquire + | mov eax, 1 + | xchg eax, dword [rdi] + | test eax, eax -- was it 0 (unlocked)? + | jnz >1 -- no, go spin + | ret + -- spin + |1: + | pause + | cmp dword [rdi], 1 -- does it look locked? + | je <1 -- spin if it does + | jmp ->lock -- otherwise try to acquire + + | .align 16 + |->unlock: + | mov dword [rdi], 0 + | ret +end + +local Dst, globals = dasm.new(actions, nil, nil, 1 + #globalnames) +generate(Dst) +local code, size = Dst:build() + +if nil then + dasm.dump(code, size) +end + +local entry = dasm.globals(globals, globalnames) + +return setmetatable ({ lock = ffi.cast("void (*)(int32_t *)", entry.lock), + unlock = ffi.cast("void (*)(int32_t *)", entry.unlock) + }, {_anchor = code}) From d1adbcb153a48a17a8c11d03781f4c2fa0449a22 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Wed, 7 Feb 2018 17:12:36 +0100 Subject: [PATCH 47/72] process group: rebalance packets exchanged across process boundaries This adds a shared (among the process group that is) packet freelist mapped to group/packets.freelist as a shared memory object. Multiple Snabb processes compete for access to this shared freelist by means of a spinlock in order to rebalance the free packets in their individual per-process freelists. --- src/core/app.lua | 7 ++++-- src/core/packet.lua | 57 +++++++++++++++++++++++++++++++++++++++------ 2 files changed, 55 insertions(+), 9 deletions(-) diff --git a/src/core/app.lua b/src/core/app.lua index 9ea416797c..e9eb0e0238 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -517,8 +517,11 @@ function breathe () end end counter.add(breaths) - -- Commit counters at a reasonable frequency - if counter.read(breaths) % 100 == 0 then counter.commit() end + -- Commit counters and rebalance freelists at a reasonable frequency + if counter.read(breaths) % 100 == 0 then + counter.commit() + packet.rebalance_freelists() + end running = false end diff --git a/src/core/packet.lua b/src/core/packet.lua index 828b3aa7fe..cf8e6eb041 100644 --- a/src/core/packet.lua +++ b/src/core/packet.lua @@ -10,7 +10,9 @@ local C = ffi.C local lib = require("core.lib") local memory = require("core.memory") +local shm = require("core.shm") local counter = require("core.counter") +local spinlock = require("core.spinlock") require("core.packet_h") @@ -45,18 +47,25 @@ end -- Freelist containing empty packets ready for use. -ffi.cdef[[ +local max_packets = 1e6 + +ffi.cdef([[ struct freelist { + int32_t lock[1]; uint64_t nfree; uint64_t max; - struct packet *list[?]; + struct packet *list[]]..max_packets..[[]; }; -]] +]]) + +local function freelist_full(freelist) + return freelist.nfree == freelist.max +end local function freelist_add(freelist, element) -- Safety check if _G.developer_debug then - assert(freelist.nfree < freelist.max, "freelist overflow") + assert(not freelist_full(freelist), "freelist overflow") end freelist.list[freelist.nfree] = element freelist.nfree = freelist.nfree + 1 @@ -75,15 +84,49 @@ local function freelist_nfree(freelist) return freelist.nfree end -local max_packets = 1e6 +local function freelist_lock(freelist) + spinlock.lock(freelist.lock) +end + +local function freelist_unlock(freelist) + spinlock.unlock(freelist.lock) +end + local packet_allocation_step = 1000 local packets_allocated = 0 -local packets_fl = ffi.new("struct freelist", max_packets, 0, max_packets) +local packets_fl = ffi.new("struct freelist") + packets_fl.max = max_packets +local group_fl +if not shm.exists("group/packets.freelist") then + group_fl = shm.create("group/packets.freelist", "struct freelist") + group_fl.max = max_packets +else + group_fl = shm.open("group/packets.freelist", "struct freelist") +end + +function rebalance_freelists () + if freelist_nfree(packets_fl) > packets_allocated then + freelist_lock(group_fl) + while freelist_nfree(packets_fl) > packets_allocated + and not freelist_full(group_fl) do + freelist_add(group_fl, freelist_remove(packets_fl)) + end + freelist_unlock(group_fl) + end +end -- Return an empty packet. function allocate () if freelist_nfree(packets_fl) == 0 then - preallocate_step() + freelist_lock(group_fl) + while freelist_nfree(group_fl) > 0 + and freelist_nfree(packets_fl) < packets_allocated do + free(freelist_remove(group_fl)) + end + freelist_unlock(group_fl) + if freelist_nfree(packets_fl) == 0 then + preallocate_step() + end end return freelist_remove(packets_fl) end From 9f0694fb9a50e5302928ebaa33a1fe468ae15dfc Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Fri, 9 Feb 2018 13:13:20 +0100 Subject: [PATCH 48/72] core.packet: extract packet.account_free from packet.free Extract a function packet.account_free from packet.free to expose the accounting aspects of freeing packets (engine.{frees,freebytes,freebits)) to apps that might need it (e.g., an app that transfers packets across process boundaries.) This is needed because the engine paces its breathing (or rather, estimates load) based on the counter engine.frees. --- src/core/packet.lua | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/core/packet.lua b/src/core/packet.lua index cf8e6eb041..50f052f6b2 100644 --- a/src/core/packet.lua +++ b/src/core/packet.lua @@ -216,12 +216,16 @@ local function free_internal (p) freelist_add(packets_fl, p) end -function free (p) +function account_free (p) counter.add(engine.frees) counter.add(engine.freebytes, p.length) -- Calculate bits of physical capacity required for packet on 10GbE -- Account for minimum data size and overhead of CRC and inter-packet gap counter.add(engine.freebits, (math.max(p.length, 46) + 4 + 5) * 8) +end + +function free (p) + account_free(p) free_internal(p) end From 756255cedf5de0cde351cc551e5779710185226e Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 18 Jan 2018 01:32:40 +0100 Subject: [PATCH 49/72] DynASM: add CMPXCHG instruction See https://github.com/LuaJIT/LuaJIT/issues/257#issuecomment-267465900 --- src/dasm_x86.lua | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/dasm_x86.lua b/src/dasm_x86.lua index 0c11f020ec..22fbbc4516 100644 --- a/src/dasm_x86.lua +++ b/src/dasm_x86.lua @@ -1215,6 +1215,8 @@ local map_op = { imul_2 = "rmqdw:0FAFrM|rIqdw:69rmI|rSqdw:6BrmS|riqdw:69rmi", imul_3 = "rmIqdw:69rMI|rmSqdw:6BrMS|rmiqdw:69rMi", + cmpxchg_2 = "mr:0FB1Rm", + movzx_2 = "rm/db:0FB6rM|rm/qb:|rm/wb:0FB6rM|rm/dw:0FB7rM|rm/qw:", movsx_2 = "rm/db:0FBErM|rm/qb:|rm/wb:0FBErM|rm/dw:0FBFrM|rm/qw:", From d05da38463ea19e1e30238456250b2b242295e59 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 18 Jan 2018 01:37:25 +0100 Subject: [PATCH 50/72] core.sync: add atomic compare-and-swap primitive based on CMPXCHG The cas function provided in this commit is quite primitive: it expects its arguments to be of type int (no other types supported, unsigned ints will work unless they are truncated during type conversion). Time will show if fancier type support is required or desired. --- src/core/sync.dasl | 60 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 src/core/sync.dasl diff --git a/src/core/sync.dasl b/src/core/sync.dasl new file mode 100644 index 0000000000..bd05cc1710 --- /dev/null +++ b/src/core/sync.dasl @@ -0,0 +1,60 @@ +module(...,package.seeall) + +local dasm = require("dasm") +local ffi = require("ffi") + +| .arch x64 +| .actionlist actions +| .globalnames globalnames + +|.define arg1, edi +|.define arg2, esi +|.define arg3, edx + +-- cas(dst, old, new) -> true|false: Atomic compare-and-swap; compare old with +-- value pointed to by dst. If equal, stores new at dst and returns true. Else, +-- returns false. +local cas_t = "bool (*) (int *, int, int)" +local function cas (Dst) + | mov eax, arg2 + | lock; cmpxchg [arg1], arg3 -- compare-and-swap; sets ZF flag on success + | mov eax, 0 -- clear eax for return value + | setz al -- set eax to 1 (true) if ZF is set + | ret +end + +local function generate (Dst) + Dst:growpc(16) + | .align 16 + |->cas: + || cas(Dst) +end + +local Dst, globals = dasm.new(actions, nil, nil, 1 + #globalnames) +generate(Dst) +local code, size = Dst:build() + +if nil then + dasm.dump(code, size) +end + +local entry = dasm.globals(globals, globalnames) + +local sync = { + cas = ffi.cast(cas_t, entry.cas) +} + +sync.selftest = function () + local box = ffi.new( + "struct { unsigned int pad1, state[1], pad2; } __attribute__((packed))" + ) + assert(sync.cas(box.state, 0, 1) and box.state[0] == 1) + assert(not sync.cas(box.state, 0, 2) and box.state[0] == 1) + assert(sync.cas(box.state, 1, 2) and box.state[0] == 2) + assert(sync.cas(box.state, 2, 0x5555555555555555) + and box.state[0] == 2147483648 + and box.pad1 == 0 + and box.pad2 == 0) +end + +return setmetatable(sync, {_anchor = code}) From 9dfa543ea142661da463acadec5696267bd1f192 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Fri, 9 Feb 2018 13:16:39 +0100 Subject: [PATCH 51/72] interlink: adapt interlink and related apps to group freelist By making use of behind-the-scenes freelist rebalancing (group-freelist) we can eliminate the need for the explicit packet swapping performed by prior versions of lib.interlink. This in turn enables us to implement a simpler, non-blocking initialization procedure (new/free). --- src/apps/interlink/receiver.lua | 20 ++--- src/apps/interlink/selftest.snabb | 4 +- src/apps/interlink/transmitter.lua | 26 ++---- src/lib/interlink.lua | 138 ++++++++++++++++++++++------- 4 files changed, 119 insertions(+), 69 deletions(-) diff --git a/src/apps/interlink/receiver.lua b/src/apps/interlink/receiver.lua index a019218172..4432674024 100644 --- a/src/apps/interlink/receiver.lua +++ b/src/apps/interlink/receiver.lua @@ -9,19 +9,14 @@ local Receiver = { name = "apps.interlink.Receiver", config = { name = {required=true}, - create = {default=false} } } function Receiver:new (conf) - local self = {} - if conf.create then - self.interlink = interlink.create(conf.name) - self.destroy = conf.name - else - self.interlink = interlink.open(conf.name) - end - return setmetatable(self, {__index=Receiver}) + return setmetatable( + {name=conf.name, interlink=interlink.new(conf.name)}, + {__index=Receiver} + ) end function Receiver:pull () @@ -35,12 +30,7 @@ function Receiver:pull () end function Receiver:stop () - if self.destroy then - interlink.free(self.interlink) - shm.unlink(self.destroy) - else - shm.unmap(self.interlink) - end + interlink.free(self.interlink, self.name) end return Receiver diff --git a/src/apps/interlink/selftest.snabb b/src/apps/interlink/selftest.snabb index 01798acc47..823fff41bc 100755 --- a/src/apps/interlink/selftest.snabb +++ b/src/apps/interlink/selftest.snabb @@ -10,8 +10,6 @@ local Sink = require("apps.basic.basic_apps").Sink -- Synopsis: selftest.snabb [duration] local DURATION = tonumber(main.parameters[1]) or 10 -interlink.create("group/test.mcp") - worker.start("source", [[require("apps.interlink.test_source").start("group/test.mcp")]]) @@ -32,5 +30,5 @@ local stats = link.stats(engine.app_table["sink"].input.input) print(stats.txpackets / 1e6 / DURATION .. " Mpps") -- test teardown -interlink.free(interlink.open("group/test.mcp")) +engine.configure(config.new()) engine.main({duration=0.1}) diff --git a/src/apps/interlink/transmitter.lua b/src/apps/interlink/transmitter.lua index 838096e3b6..88d6ab4125 100644 --- a/src/apps/interlink/transmitter.lua +++ b/src/apps/interlink/transmitter.lua @@ -8,37 +8,29 @@ local interlink = require("lib.interlink") local Transmitter = { name = "apps.interlink.Transmitter", config = { - name = {required=true}, - create = {default=false} + name = {required=true} } } function Transmitter:new (conf) - local self = {} - if conf.create then - self.interlink = interlink.create(conf.name) - self.destroy = conf.name - else - self.interlink = interlink.open(conf.name) - end - return setmetatable(self, {__index=Transmitter}) + return setmetatable( + {name=conf.name, interlink=interlink.new(conf.name)}, + {__index=Transmitter} + ) end function Transmitter:push () local i, r = self.input.input, self.interlink while not (interlink.full(r) or link.empty(i)) do - interlink.insert(r, link.receive(i)) + local p = link.receive(i) + packet.account_free(p) -- stimulate breathing + interlink.insert(r, p) end interlink.push(r) end function Transmitter:stop () - if self.destroy then - interlink.free(self.interlink) - shm.unlink(self.destroy) - else - shm.unmap(self.interlink) - end + interlink.free(self.interlink, self.name) end return Transmitter diff --git a/src/lib/interlink.lua b/src/lib/interlink.lua index 1062e0bf38..57cb712c28 100644 --- a/src/lib/interlink.lua +++ b/src/lib/interlink.lua @@ -2,14 +2,57 @@ module(...,package.seeall) --- Based on MCRingBuffer, see --- http://www.cse.cuhk.edu.hk/%7Epclee/www/pubs/ipdps10.pdf +-- INTERLINK: packet queue optimized for inter-process links +-- +-- An “interlink” is a thread safe single-producer/single-consumer queue +-- implemented as a ring buffer with a memory layout that is carefully +-- optimized for multi-threaded performance (keyword: “false sharing”). +-- +-- The processes at each end of an interlink will both call `new' and `free' in +-- order to create/delete the shared ring buffer. Beyond this, the processes +-- that share an interlink each must restrict themselves to calling either +-- +-- full insert push (transmitting) +-- +-- or +-- +-- empty extract pull (receiving) +-- +-- on the queue. +-- +-- I.e., the transmitting process `insert's new packets into the queue while it +-- is not `full', and makes new packets visible to the receiving process by +-- calling `push'. The receiving process, on the other hand, `extract's packets +-- while the queue is not `empty', and notifies the transmitting process of +-- newly available slots by calling `pull'. +-- +-- new(name) +-- Attaches to and returns a shared memory interlink object by name (a +-- SHM path). If the target name is unavailable (possibly because it is +-- already in use), this operation will block until it becomes available +-- again. +-- +-- free(r, name) +-- Unmaps interlink r and unlinks it from its name. If other end has +-- already freed the interlink, any packets remaining in the queue are +-- freed. +-- +-- full(r) / empty(r) +-- Return true if the interlink r is full / empty. +-- +-- insert(r, p) / extract(r) +-- Insert a packet p into / extract a packet from interlink r. Must not +-- be called if r is full / empty. +-- +-- push(r) / pull(r) +-- Makes subsequent calls to full / empty reflect updates to the queue +-- caused by insert / extract. local shm = require("core.shm") local ffi = require("ffi") local band = require("bit").band local waitfor = require("core.lib").waitfor -local full_memory_barrier = ffi.C.full_memory_barrier +local sync = require("core.sync") local SIZE = link.max + 1 local CACHELINE = 64 -- XXX - make dynamic @@ -17,10 +60,11 @@ local INT = ffi.sizeof("int") assert(band(SIZE, SIZE-1) == 0, "SIZE is not a power of two") -local status = { Locked = 0, Unlocked = 1 } +-- Based on MCRingBuffer, see +-- http://www.cse.cuhk.edu.hk/%7Epclee/www/pubs/ipdps10.pdf ffi.cdef([[ struct interlink { - int read, write, lock; + int read, write, state[1]; char pad1[]]..CACHELINE-3*INT..[[]; int lwrite, nread; char pad2[]]..CACHELINE-2*INT..[[]; @@ -29,36 +73,64 @@ ffi.cdef([[ struct interlink { struct packet *packets[]]..SIZE..[[]; } __attribute__((packed, aligned(]]..CACHELINE..[[)))]]) -function create (name) - local r = shm.create(name, "struct interlink") - for i = 0, link.max do - r.packets[i] = packet.allocate() - end - full_memory_barrier() - r.lock = status.Unlocked +-- The life cycle of an interlink is managed using a state machine. This is +-- necessary because we allow receiving and transmitting processes to attach +-- and detach in any order, and even for multiple processes to attempt to +-- attach to the same interlink at the same time. +-- +-- Interlinks can be in one of three states: + +local FREE = 0 -- Implicit initial state due to 0 value. +local UP = 1 -- Other end has attached. +local DOWN = 2 -- Either end has detached; must be re-allocated. + +-- Once either end detaches from an interlink it stays in the DOWN state +-- until it is deallocated. +-- +-- Here are the valid state transitions and when they occur: +-- +-- Change When +-- ------------- -------------------------------------------------------- +-- none -> FREE a process has successfully created the queue. +-- FREE -> UP another process has attached to the queue. +-- UP -> DOWN either process has detached from the queue. +-- FREE -> DOWN creator detached before any other process could attach. +-- DOWN -> none the process that detaches last frees the queue (and the +-- packets remaining in it). + +function new (name) + local ok, r + local first_try = true + waitfor( + function () + -- First we try to create the queue. + ok, r = pcall(shm.create, name, "struct interlink") + if ok then return true end + -- If that failed then we try to open (attach to) it. + ok, r = pcall(shm.open, name, "struct interlink") + if ok and sync.cas(r.state, FREE, UP) then return true end + -- We failed; handle error and try again. + if ok then shm.unmap(r) end + if first_try then + print("interlink: waiting for "..name.." to become available...") + first_try = false + end + end + ) return r end -function free (r) - r.lock = status.Locked - full_memory_barrier() - local function ring_consistent () - return r.write == r.nwrite and r.read == r.nread - end - waitfor(ring_consistent) - for i = 0, link.max do - packet.free(r.packets[i]) +function free (r, name) + if sync.cas(r.state, FREE, DOWN) + or not sync.cas(r.state, UP, DOWN) then + while not empty(r) do + packet.free(extract(r)) + end + shm.unlink(name) end shm.unmap(r) end -function open (name) - local r = shm.open(name, "struct interlink") - waitfor(function () return r.lock == status.Unlocked end) - full_memory_barrier() - return r -end - local function NEXT (i) return band(i + 1, link.max) end @@ -66,7 +138,7 @@ end function full (r) local after_nwrite = NEXT(r.nwrite) if after_nwrite == r.lread then - if after_nwrite == r.read or r.lock == status.Locked then + if after_nwrite == r.read then return true end r.lread = r.read @@ -74,19 +146,18 @@ function full (r) end function insert (r, p) - packet.free(r.packets[r.nwrite]) r.packets[r.nwrite] = p r.nwrite = NEXT(r.nwrite) end function push (r) - full_memory_barrier() + -- NB: no need for memory barrier on x86 because of TSO. r.write = r.nwrite end function empty (r) if r.nread == r.lwrite then - if r.nread == r.write or r.lock == status.Locked then + if r.nread == r.write then return true end r.lwrite = r.write @@ -95,12 +166,11 @@ end function extract (r) local p = r.packets[r.nread] - r.packets[r.nread] = packet.allocate() r.nread = NEXT(r.nread) return p end function pull (r) - full_memory_barrier() + -- NB: no need for memory barrier on x86 (see push.) r.read = r.nread end From 020a9fd65b8de2dc09759f26b4ac59607c71bcb8 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Sat, 10 Feb 2018 18:55:06 +0100 Subject: [PATCH 52/72] core.sync: consolidate core.spinlock into core.sync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The idea is to have in core.sync as general toolbox of multiprocessing synchronization primitives. Not sure if the name sync is a good choice, but my alternative “mp” isn’t particularily convincing either. --- src/core/packet.lua | 6 ++--- src/core/spinlock.dasl | 46 ------------------------------------- src/core/sync.dasl | 51 ++++++++++++++++++++++++++++++++++++++---- 3 files changed, 50 insertions(+), 53 deletions(-) delete mode 100644 src/core/spinlock.dasl diff --git a/src/core/packet.lua b/src/core/packet.lua index 50f052f6b2..e7c992c48a 100644 --- a/src/core/packet.lua +++ b/src/core/packet.lua @@ -12,7 +12,7 @@ local lib = require("core.lib") local memory = require("core.memory") local shm = require("core.shm") local counter = require("core.counter") -local spinlock = require("core.spinlock") +local sync = require("core.sync") require("core.packet_h") @@ -85,11 +85,11 @@ local function freelist_nfree(freelist) end local function freelist_lock(freelist) - spinlock.lock(freelist.lock) + sync.lock(freelist.lock) end local function freelist_unlock(freelist) - spinlock.unlock(freelist.lock) + sync.unlock(freelist.lock) end local packet_allocation_step = 1000 diff --git a/src/core/spinlock.dasl b/src/core/spinlock.dasl deleted file mode 100644 index 01936658d6..0000000000 --- a/src/core/spinlock.dasl +++ /dev/null @@ -1,46 +0,0 @@ -module(...,package.seeall) - -local dasm = require("dasm") -local ffi = require("ffi") - -| .arch x64 -| .actionlist actions -| .globalnames globalnames - -local function generate(Dst) - Dst:growpc(16) - - | .align 16 - |->lock: - -- attempt to acquire - | mov eax, 1 - | xchg eax, dword [rdi] - | test eax, eax -- was it 0 (unlocked)? - | jnz >1 -- no, go spin - | ret - -- spin - |1: - | pause - | cmp dword [rdi], 1 -- does it look locked? - | je <1 -- spin if it does - | jmp ->lock -- otherwise try to acquire - - | .align 16 - |->unlock: - | mov dword [rdi], 0 - | ret -end - -local Dst, globals = dasm.new(actions, nil, nil, 1 + #globalnames) -generate(Dst) -local code, size = Dst:build() - -if nil then - dasm.dump(code, size) -end - -local entry = dasm.globals(globals, globalnames) - -return setmetatable ({ lock = ffi.cast("void (*)(int32_t *)", entry.lock), - unlock = ffi.cast("void (*)(int32_t *)", entry.unlock) - }, {_anchor = code}) diff --git a/src/core/sync.dasl b/src/core/sync.dasl index bd05cc1710..ac3823c5d6 100644 --- a/src/core/sync.dasl +++ b/src/core/sync.dasl @@ -1,5 +1,7 @@ module(...,package.seeall) +-- core.sync: Multiprocessing synchronization primitives for x86_64. + local dasm = require("dasm") local ffi = require("ffi") @@ -7,13 +9,14 @@ local ffi = require("ffi") | .actionlist actions | .globalnames globalnames +-- This module happens to use 32-bit arguments only. |.define arg1, edi |.define arg2, esi |.define arg3, edx --- cas(dst, old, new) -> true|false: Atomic compare-and-swap; compare old with --- value pointed to by dst. If equal, stores new at dst and returns true. Else, --- returns false. +-- cas(dst, old, new) -> true|false +-- Atomic compare-and-swap; compare old with value pointed to by dst. If +-- equal, stores new at dst and returns true. Else, returns false. local cas_t = "bool (*) (int *, int, int)" local function cas (Dst) | mov eax, arg2 @@ -23,11 +26,42 @@ local function cas (Dst) | ret end +-- lock(dst) +-- unlock(dst) +-- Acquire/release spinlock at dst. Acquiring implies busy-waiting until the +-- lock is available. +local lock_t = "void (*) (int *)" +local function lock (Dst) + -- attempt to acquire + | mov eax, 1 + | xchg eax, [arg1] + | test eax, eax -- was it 0 (unlocked)? + | jnz >1 -- no, go spin + | ret + -- spin + |1: + | pause + | cmp dword [arg1], 1 -- does it look locked? + | je <1 -- spin if it does + | jmp ->lock -- otherwise try to acquire +end +local unlock_t = "void (*) (int *)" +local function unlock (Dst) + | mov dword [arg1], 0 + | ret +end + local function generate (Dst) Dst:growpc(16) | .align 16 |->cas: || cas(Dst) + | .align 16 + |->lock: + || lock(Dst) + | .align 16 + |->unlock: + || unlock(Dst) end local Dst, globals = dasm.new(actions, nil, nil, 1 + #globalnames) @@ -41,10 +75,13 @@ end local entry = dasm.globals(globals, globalnames) local sync = { - cas = ffi.cast(cas_t, entry.cas) + cas = ffi.cast(cas_t, entry.cas), + lock = ffi.cast(lock_t, entry.lock), + unlock = ffi.cast(unlock_t, entry.unlock) } sync.selftest = function () + -- cas local box = ffi.new( "struct { unsigned int pad1, state[1], pad2; } __attribute__((packed))" ) @@ -55,6 +92,12 @@ sync.selftest = function () and box.state[0] == 2147483648 and box.pad1 == 0 and box.pad2 == 0) + -- lock / unlock + local spinlock = ffi.new("int[1]") + sync.lock(spinlock) + sync.unlock(spinlock) + sync.lock(spinlock) + sync.unlock(spinlock) end return setmetatable(sync, {_anchor = code}) From 302f9527d76560f0c9ae87b9d6b688ab4221448b Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 15 Feb 2018 23:21:35 +0100 Subject: [PATCH 53/72] core.app: pass app name as second argument to :new in start_app MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During app instantiation the field `appname' is set to the app’s name, but this field is not yet visible in constructor method (`new'). We pass the app’s name as the second argument to the constructor so that apps can know their own name during initialization. --- src/core/app.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/app.lua b/src/core/app.lua index e9eb0e0238..6eb06a1245 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -339,7 +339,7 @@ function apply_config_actions (actions) configuration.apps[name] = nil end function ops.start_app (name, class, arg) - local app = class:new(arg) + local app = class:new(arg, name) if type(app) ~= 'table' then error(("bad return value from app '%s' start() method: %s"):format( name, tostring(app))) From 4686cbf93a9c5ce4aca6039c1f80f686b6edcdf8 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 15 Feb 2018 23:26:15 +0100 Subject: [PATCH 54/72] apps.interlink: use app name to derive SHM interlink path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removed configuration parameter `name' in favor of using the app’s name as given for constructing the SHM interlink path. The configuration was somewhat redundant, no? --- src/apps/interlink/receiver.lua | 19 +++++++------------ src/apps/interlink/selftest.snabb | 7 +++---- src/apps/interlink/test_source.lua | 6 +++--- src/apps/interlink/transmitter.lua | 19 +++++++------------ 4 files changed, 20 insertions(+), 31 deletions(-) diff --git a/src/apps/interlink/receiver.lua b/src/apps/interlink/receiver.lua index 4432674024..59a0bc30bc 100644 --- a/src/apps/interlink/receiver.lua +++ b/src/apps/interlink/receiver.lua @@ -5,18 +5,13 @@ module(...,package.seeall) local shm = require("core.shm") local interlink = require("lib.interlink") -local Receiver = { - name = "apps.interlink.Receiver", - config = { - name = {required=true}, - } -} +local Receiver = {name="apps.interlink.Receiver"} -function Receiver:new (conf) - return setmetatable( - {name=conf.name, interlink=interlink.new(conf.name)}, - {__index=Receiver} - ) +function Receiver:new (_, name) + local self = {} + self.shm_name = "group/interlink/"..name + self.interlink = interlink.new(self.shm_name) + return setmetatable(self, {__index=Receiver}) end function Receiver:pull () @@ -30,7 +25,7 @@ function Receiver:pull () end function Receiver:stop () - interlink.free(self.interlink, self.name) + interlink.free(self.interlink, self.shm_name) end return Receiver diff --git a/src/apps/interlink/selftest.snabb b/src/apps/interlink/selftest.snabb index 823fff41bc..4c7508eeaa 100755 --- a/src/apps/interlink/selftest.snabb +++ b/src/apps/interlink/selftest.snabb @@ -10,14 +10,13 @@ local Sink = require("apps.basic.basic_apps").Sink -- Synopsis: selftest.snabb [duration] local DURATION = tonumber(main.parameters[1]) or 10 -worker.start("source", - [[require("apps.interlink.test_source").start("group/test.mcp")]]) +worker.start("source", [[require("apps.interlink.test_source").start("test")]]) local c = config.new() -config.app(c, "rx", Receiver, {name="group/test.mcp"}) +config.app(c, "test", Receiver) config.app(c, "sink", Sink) -config.link(c, "rx.output->sink.input") +config.link(c, "test.output->sink.input") engine.configure(c) engine.main({duration=DURATION, report={showlinks=true}}) diff --git a/src/apps/interlink/test_source.lua b/src/apps/interlink/test_source.lua index 86a057c6d3..cfa71b7417 100644 --- a/src/apps/interlink/test_source.lua +++ b/src/apps/interlink/test_source.lua @@ -5,11 +5,11 @@ module(...,package.seeall) local Transmitter = require("apps.interlink.transmitter") local Source = require("apps.basic.basic_apps").Source -function start (link_name) +function start (name) local c = config.new() - config.app(c, "tx", Transmitter, {name=link_name}) + config.app(c, name, Transmitter) config.app(c, "source", Source) - config.link(c, "source.output->tx.input") + config.link(c, "source.output -> "..name..".input") engine.configure(c) engine.main() end diff --git a/src/apps/interlink/transmitter.lua b/src/apps/interlink/transmitter.lua index 88d6ab4125..40a1dfef62 100644 --- a/src/apps/interlink/transmitter.lua +++ b/src/apps/interlink/transmitter.lua @@ -5,18 +5,13 @@ module(...,package.seeall) local shm = require("core.shm") local interlink = require("lib.interlink") -local Transmitter = { - name = "apps.interlink.Transmitter", - config = { - name = {required=true} - } -} +local Transmitter = {name="apps.interlink.Transmitter"} -function Transmitter:new (conf) - return setmetatable( - {name=conf.name, interlink=interlink.new(conf.name)}, - {__index=Transmitter} - ) +function Transmitter:new (_, name) + local self = {} + self.shm_name = "group/interlink/"..name + self.interlink = interlink.new(self.shm_name) + return setmetatable(self, {__index=Transmitter}) end function Transmitter:push () @@ -30,7 +25,7 @@ function Transmitter:push () end function Transmitter:stop () - interlink.free(self.interlink, self.name) + interlink.free(self.interlink, self.shm_name) end return Transmitter From cb9250fde54a9b0fc01b00de0306de97c11b98af Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Sun, 18 Feb 2018 12:40:31 +0100 Subject: [PATCH 55/72] lib.interlink: fix life-time fsm design and implementation bugs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As it happens, quite a few edge (and not so edge) cases in the state machine that manages an interlink’s life-time were previously overlooked due to overly narrow testing. Prior to this commit, multiple concurrent receivers / transmitters were not handled properly, among other things. This patch fleshes out the state machine in a way that should hopefully live up to the requirements. Externally visible changes include the split of new and free to attach_receiver / attach_transmitter and detach_receiver / detach_transmitter. This is obviously necessary, since lib.interlink has to ensure that an interlink is never used by two receivers or two transmitters at the same time. It should now also possible to “reuse” interlink queues. I.e., it is possible for processes to detach and reattach without reallocating the queue. This is handy for scenarios in which interlinks are used for hot fail-over, and basic use cases like restarting group process individually without restarting the process at the other end of the queues. In the future, we should really add a comprehensive test suite for the interlink apps that exercise all these cases! --- src/apps/interlink/receiver.lua | 4 +- src/apps/interlink/transmitter.lua | 4 +- src/lib/interlink.lua | 151 +++++++++++++++++++++-------- 3 files changed, 114 insertions(+), 45 deletions(-) diff --git a/src/apps/interlink/receiver.lua b/src/apps/interlink/receiver.lua index 59a0bc30bc..e46b08b05f 100644 --- a/src/apps/interlink/receiver.lua +++ b/src/apps/interlink/receiver.lua @@ -10,7 +10,7 @@ local Receiver = {name="apps.interlink.Receiver"} function Receiver:new (_, name) local self = {} self.shm_name = "group/interlink/"..name - self.interlink = interlink.new(self.shm_name) + self.interlink = interlink.attach_receiver(self.shm_name) return setmetatable(self, {__index=Receiver}) end @@ -25,7 +25,7 @@ function Receiver:pull () end function Receiver:stop () - interlink.free(self.interlink, self.shm_name) + interlink.detach_receiver(self.interlink, self.shm_name) end return Receiver diff --git a/src/apps/interlink/transmitter.lua b/src/apps/interlink/transmitter.lua index 40a1dfef62..cf92e4a099 100644 --- a/src/apps/interlink/transmitter.lua +++ b/src/apps/interlink/transmitter.lua @@ -10,7 +10,7 @@ local Transmitter = {name="apps.interlink.Transmitter"} function Transmitter:new (_, name) local self = {} self.shm_name = "group/interlink/"..name - self.interlink = interlink.new(self.shm_name) + self.interlink = interlink.attach_transmitter(self.shm_name) return setmetatable(self, {__index=Transmitter}) end @@ -25,7 +25,7 @@ function Transmitter:push () end function Transmitter:stop () - interlink.free(self.interlink, self.shm_name) + interlink.detach_transmitter(self.interlink, self.shm_name) end return Transmitter diff --git a/src/lib/interlink.lua b/src/lib/interlink.lua index 57cb712c28..c76923aeef 100644 --- a/src/lib/interlink.lua +++ b/src/lib/interlink.lua @@ -6,33 +6,42 @@ module(...,package.seeall) -- -- An “interlink” is a thread safe single-producer/single-consumer queue -- implemented as a ring buffer with a memory layout that is carefully --- optimized for multi-threaded performance (keyword: “false sharing”). +-- optimized for multi-threaded performance (keyword: “false sharing”). It is +-- represented by a struct allocated in shared memory. -- --- The processes at each end of an interlink will both call `new' and `free' in --- order to create/delete the shared ring buffer. Beyond this, the processes --- that share an interlink each must restrict themselves to calling either +-- The processes at each end of an interlink are called the “receiver” and +-- “transmitter” which use disjoint, symmetric subsets of the API on a given +-- queue, as shown below. -- --- full insert push (transmitting) +-- Receiver Transmitter +-- ---------- ------------- +-- attach_receiver(name) attach_transmitter(name) +-- empty(r) full(r) +-- extract(r) insert(r, p) +-- pull(r) push(r) +-- detach_receiver(name) detach_transmitter(name) -- --- or +-- I.e., both receiver and transmitter will attach to a queue object they wish +-- to communicate over, and detach once they cease operations. -- --- empty extract pull (receiving) +-- Meanwhile, the receiver can extract packets from the queue unless it is +-- empty, while the transmitter can insert new packets into the queue unless +-- it is full. -- --- on the queue. +-- Packets inserted by the transmitter only become visible to the receiver once +-- the transmitter calls push. Likewise, queue slots freed from extracting +-- packets only become visible to the transmitter once the receiver calls pull. -- --- I.e., the transmitting process `insert's new packets into the queue while it --- is not `full', and makes new packets visible to the receiving process by --- calling `push'. The receiving process, on the other hand, `extract's packets --- while the queue is not `empty', and notifies the transmitting process of --- newly available slots by calling `pull'. +-- API +-- ---- -- --- new(name) +-- attach_receiver(name), attach_transmitter(name) -- Attaches to and returns a shared memory interlink object by name (a -- SHM path). If the target name is unavailable (possibly because it is -- already in use), this operation will block until it becomes available -- again. -- --- free(r, name) +-- detach_receiver(r, name), detach_transmitter(r, name) -- Unmaps interlink r and unlinks it from its name. If other end has -- already freed the interlink, any packets remaining in the queue are -- freed. @@ -78,59 +87,119 @@ ffi.cdef([[ struct interlink { -- and detach in any order, and even for multiple processes to attempt to -- attach to the same interlink at the same time. -- --- Interlinks can be in one of three states: +-- Interlinks can be in one of five states: local FREE = 0 -- Implicit initial state due to 0 value. -local UP = 1 -- Other end has attached. -local DOWN = 2 -- Either end has detached; must be re-allocated. +local RXUP = 1 -- Receiver has attached. +local TXUP = 2 -- Transmitter has attached. +local DXUP = 3 -- Both ends have attached. +local DOWN = 4 -- Either end has detached; must be re-allocated. -- Once either end detaches from an interlink it stays in the DOWN state -- until it is deallocated. -- -- Here are the valid state transitions and when they occur: -- --- Change When --- ------------- -------------------------------------------------------- --- none -> FREE a process has successfully created the queue. --- FREE -> UP another process has attached to the queue. --- UP -> DOWN either process has detached from the queue. --- FREE -> DOWN creator detached before any other process could attach. --- DOWN -> none the process that detaches last frees the queue (and the --- packets remaining in it). - -function new (name) +-- Who Change Why +-- ------ ------------- --------------------------------------------------- +-- (any) none -> FREE A process creates the queue (initial state). +-- recv. FREE -> RXUP Receiver attaches to free queue. +-- recv. TXUP -> DXUP Receiver attaches to queue with ready transmitter. +-- recv. DXUP -> TXUP Receiver detaches from queue. +-- recv. RXUP -> DOWN Receiver deallocates queue. +-- trans. FREE -> TXUP Transmitter attaches to free queue. +-- trans. RXUP -> DXUP Transmitter attaches to queue with ready receiver. +-- trans. DXUP -> RXUP Transmitter detaches from queue. +-- trans. TXUP -> DOWN Transmitter deallocates queue. +-- +-- These state transitions are *PROHIBITED* for important reasons: +-- +-- Who Change Why *PROHIBITED* +-- ------ ----------- -------------------------------------------------------- +-- (any) FREE->DEAD Cannot shutdown before having attached. +-- (any) *->FREE Cannot transition to FREE except by reallocating. +-- recv. TXUP->DEAD Receiver cannot mutate queue after it has detached. +-- recv. DXUP->RXUP Receiver cannot detach Transmitter. +-- trans. RXUP->DEAD Transmitter cannot mutate queue after it has detached. +-- trans. DXUP->TXUP Transmitter cannot detach receiver. +-- (any) DXUP->DOWN Cannot shutdown queue while it is in use. +-- (any) DOWN->* Cannot transition from DOWN (must create new queue.) + +local function attach (name, initialize) local ok, r local first_try = true waitfor( function () - -- First we try to create the queue. - ok, r = pcall(shm.create, name, "struct interlink") - if ok then return true end - -- If that failed then we try to open (attach to) it. + -- Try to open the queue. ok, r = pcall(shm.open, name, "struct interlink") - if ok and sync.cas(r.state, FREE, UP) then return true end + -- If that failed then we try to create it. + if not ok then ok, r = pcall(shm.create, name, "struct interlink") end + -- Return if we could map the queue and succeed to initialize it. + if ok and initialize(r) then return true end -- We failed; handle error and try again. - if ok then shm.unmap(r) end + if ok then shm.unmap(r); ok, r = nil end if first_try then print("interlink: waiting for "..name.." to become available...") first_try = false end end ) + -- Ready for action :) return r end -function free (r, name) - if sync.cas(r.state, FREE, DOWN) - or not sync.cas(r.state, UP, DOWN) then - while not empty(r) do - packet.free(extract(r)) +function attach_receiver (name) + return attach(name, + -- Attach to free queue as receiver (FREE -> RXUP) + -- or queue with ready transmitter (TXUP -> DXUP.) + function (r) return sync.cas(r.state, FREE, RXUP) + or sync.cas(r.state, TXUP, DXUP) end) +end + +function attach_transmitter (name) + return attach(name, + -- Attach to free queue as transmitter (FREE -> TXUP) + -- or queue with ready receiver (RXUP -> DXUP.) + function (r) return sync.cas(r.state, FREE, TXUP) + or sync.cas(r.state, RXUP, DXUP) end) +end + +local function detach (r, name, reset, shutdown) + waitfor( + function () + -- Try to detach from queue and leave it for reuse (soft reset). + if reset(r) then return true + -- Alternatively, attempt to shutdown and deallocate queue. + elseif shutdown(r) then + while not empty(r) do + packet.free(extract(r)) + end + shm.unlink(name) + return true + end end - shm.unlink(name) - end + ) shm.unmap(r) end +function detach_receiver (r, name) + detach(r, name, + -- Reset: detach from queue with active transmitter (DXUP -> TXUP.) + function () return sync.cas(r.state, DXUP, TXUP) end, + -- Shutdown: deallocate no longer used (RXUP -> DOWN.) + function () return sync.cas(r.state, RXUP, DOWN) end) +end + +function detach_transmitter (r, name) + detach(r, name, + -- Reset: detach from queue with ready receiver (DXUP -> RXUP.) + function () return sync.cas(r.state, DXUP, RXUP) end, + -- Shutdown: deallocate no longer used queue (TXUP -> DOWN.) + function () return sync.cas(r.state, TXUP, DOWN) end) +end + +-- Queue operations follow below. + local function NEXT (i) return band(i + 1, link.max) end From 274ea4b63e6c189cc83bda03c66ed7474eaa8be9 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Mon, 19 Feb 2018 15:48:14 +0100 Subject: [PATCH 56/72] core.packet: do not count packets reclaimed from group_fl as freed There was a bug in packet.allocate that counted packets reclaimed via the group freelist as freed, subsequently leading to bugs free statistics. This change appears to also positively impact performance. --- src/core/packet.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/packet.lua b/src/core/packet.lua index e7c992c48a..2ea1e51482 100644 --- a/src/core/packet.lua +++ b/src/core/packet.lua @@ -121,7 +121,7 @@ function allocate () freelist_lock(group_fl) while freelist_nfree(group_fl) > 0 and freelist_nfree(packets_fl) < packets_allocated do - free(freelist_remove(group_fl)) + freelist_add(packets_fl, freelist_remove(group_fl)) end freelist_unlock(group_fl) if freelist_nfree(packets_fl) == 0 then From 59caa02ca5d101bf8ae2b61d3c2b1536d86c3730 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Mon, 19 Feb 2018 23:10:17 +0100 Subject: [PATCH 57/72] apps.interlink: shutdown handler to release stale attachments Add calls to shutdown routines for the interlink receiver and transmitter apps in cleanup handler of core.main (run by the supervisor.) This is to ensure the release of inter-process links attached to by Snabb processes that exit uncleanly. --- src/apps/interlink/receiver.lua | 18 ++++++++++++++++++ src/apps/interlink/transmitter.lua | 18 ++++++++++++++++++ src/core/main.lua | 15 +++++++++------ src/lib/interlink.lua | 12 ++++++++---- 4 files changed, 53 insertions(+), 10 deletions(-) diff --git a/src/apps/interlink/receiver.lua b/src/apps/interlink/receiver.lua index e46b08b05f..675bf461a4 100644 --- a/src/apps/interlink/receiver.lua +++ b/src/apps/interlink/receiver.lua @@ -10,7 +10,9 @@ local Receiver = {name="apps.interlink.Receiver"} function Receiver:new (_, name) local self = {} self.shm_name = "group/interlink/"..name + self.backlink = "interlink/receiver/"..name self.interlink = interlink.attach_receiver(self.shm_name) + shm.alias(self.backlink, self.shm_name) return setmetatable(self, {__index=Receiver}) end @@ -26,6 +28,22 @@ end function Receiver:stop () interlink.detach_receiver(self.interlink, self.shm_name) + shm.unlink(self.backlink) +end + +-- Detach receivers to prevent leaking interlinks opened by pid. +-- +-- This is an internal API function provided for cleanup during +-- process termination. +function Receiver.shutdown (pid) + for _, name in ipairs(shm.children("/"..pid.."/interlink/receiver")) do + local backlink = "/"..pid.."/interlink/receiver/"..name + local shm_name = "/"..pid.."/group/interlink/"..name + -- Call protected in case //group is already unlinked. + local ok, r = pcall(interlink.open, shm_name) + if ok then interlink.detach_receiver(r, shm_name) end + shm.unlink(backlink) + end end return Receiver diff --git a/src/apps/interlink/transmitter.lua b/src/apps/interlink/transmitter.lua index cf92e4a099..55e3cae934 100644 --- a/src/apps/interlink/transmitter.lua +++ b/src/apps/interlink/transmitter.lua @@ -10,7 +10,9 @@ local Transmitter = {name="apps.interlink.Transmitter"} function Transmitter:new (_, name) local self = {} self.shm_name = "group/interlink/"..name + self.backlink = "interlink/transmitter/"..name self.interlink = interlink.attach_transmitter(self.shm_name) + shm.alias(self.backlink, self.shm_name) return setmetatable(self, {__index=Transmitter}) end @@ -26,6 +28,22 @@ end function Transmitter:stop () interlink.detach_transmitter(self.interlink, self.shm_name) + shm.unlink(self.backlink) +end + +-- Detach transmitters to prevent leaking interlinks opened by pid. +-- +-- This is an internal API function provided for cleanup during +-- process termination. +function Transmitter.shutdown (pid) + for _, name in ipairs(shm.children("/"..pid.."/interlink/transmitter")) do + local backlink = "/"..pid.."/interlink/transmitter/"..name + local shm_name = "/"..pid.."/group/interlink/"..name + -- Call protected in case //group is already unlinked. + local ok, r = pcall(interlink.open, shm_name) + if ok then interlink.detach_transmitter(r, shm_name) end + shm.unlink(backlink) + end end return Transmitter diff --git a/src/core/main.lua b/src/core/main.lua index 9bf260ccd5..6ea0531604 100644 --- a/src/core/main.lua +++ b/src/core/main.lua @@ -172,17 +172,19 @@ end -- Cleanup after Snabb process. function shutdown (pid) + -- simple pcall helper to print error and continue + local function safely (f) + local ok, err = pcall(f) + if not ok then print(err) end + end + -- Run cleanup hooks + safely(function () require("apps.interlink.receiver").shutdown(pid) end) + safely(function () require("apps.interlink.transmitter").shutdown(pid) end) -- Parent process performs additional cleanup steps. -- (Parent is the process whose 'group' folder is not a symlink.) local st, err = S.lstat(shm.root.."/"..pid.."/group") local is_parent = st and st.isdir if is_parent then - -- simple pcall helper to print error and continue - local function safely (f) - local ok, err = pcall(f) - if not ok then print(err) end - end - -- Run cleanup hooks safely(function () require("lib.hardware.pci").shutdown(pid) end) safely(function () require("core.memory").shutdown(pid) end) end @@ -229,6 +231,7 @@ if assert(S.fork()) ~= 0 then -- parent process: run snabb xpcall(main, handler) else + initialize() -- child process: supervise parent & perform cleanup -- Subscribe to SIGHUP on parent death S.prctl("set_name", "[snabb sup]") diff --git a/src/lib/interlink.lua b/src/lib/interlink.lua index c76923aeef..7bdc5f3832 100644 --- a/src/lib/interlink.lua +++ b/src/lib/interlink.lua @@ -185,17 +185,17 @@ end function detach_receiver (r, name) detach(r, name, -- Reset: detach from queue with active transmitter (DXUP -> TXUP.) - function () return sync.cas(r.state, DXUP, TXUP) end, + function (r) return sync.cas(r.state, DXUP, TXUP) end, -- Shutdown: deallocate no longer used (RXUP -> DOWN.) - function () return sync.cas(r.state, RXUP, DOWN) end) + function (r) return sync.cas(r.state, RXUP, DOWN) end) end function detach_transmitter (r, name) detach(r, name, -- Reset: detach from queue with ready receiver (DXUP -> RXUP.) - function () return sync.cas(r.state, DXUP, RXUP) end, + function (r) return sync.cas(r.state, DXUP, RXUP) end, -- Shutdown: deallocate no longer used queue (TXUP -> DOWN.) - function () return sync.cas(r.state, TXUP, DOWN) end) + function (r) return sync.cas(r.state, TXUP, DOWN) end) end -- Queue operations follow below. @@ -243,3 +243,7 @@ function pull (r) -- NB: no need for memory barrier on x86 (see push.) r.read = r.nread end + +function open (name, readonly) + return shm.open(name, "struct interlink", readonly) +end From b1472e82d1822eec36e0d67137ff78b227945f82 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Tue, 20 Feb 2018 14:04:08 +0100 Subject: [PATCH 58/72] lib.interlink: integrate with shm.open_frame for snabb top MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds auxiliary code to help tools like `snabb top -l' print meaningful descriptions of shared memory interlink objects. I.e., in order to get some useful debugging information about inter-process links used by a Snabb process one could do $ snabb top -l group/interlink some_interlink 0/1024 (waiting for transmitter) … which shows the occupancy (/) and status of the queue. --- src/apps/interlink/receiver.lua | 8 ++++---- src/apps/interlink/transmitter.lua | 8 ++++---- src/lib/interlink.lua | 26 ++++++++++++++++++++++++++ src/program/top/top.lua | 4 ++++ 4 files changed, 38 insertions(+), 8 deletions(-) diff --git a/src/apps/interlink/receiver.lua b/src/apps/interlink/receiver.lua index 675bf461a4..206a5ea6f0 100644 --- a/src/apps/interlink/receiver.lua +++ b/src/apps/interlink/receiver.lua @@ -9,8 +9,8 @@ local Receiver = {name="apps.interlink.Receiver"} function Receiver:new (_, name) local self = {} - self.shm_name = "group/interlink/"..name - self.backlink = "interlink/receiver/"..name + self.shm_name = "group/interlink/"..name..".interlink" + self.backlink = "interlink/receiver/"..name..".interlink" self.interlink = interlink.attach_receiver(self.shm_name) shm.alias(self.backlink, self.shm_name) return setmetatable(self, {__index=Receiver}) @@ -37,8 +37,8 @@ end -- process termination. function Receiver.shutdown (pid) for _, name in ipairs(shm.children("/"..pid.."/interlink/receiver")) do - local backlink = "/"..pid.."/interlink/receiver/"..name - local shm_name = "/"..pid.."/group/interlink/"..name + local backlink = "/"..pid.."/interlink/receiver/"..name..".interlink" + local shm_name = "/"..pid.."/group/interlink/"..name..".interlink" -- Call protected in case //group is already unlinked. local ok, r = pcall(interlink.open, shm_name) if ok then interlink.detach_receiver(r, shm_name) end diff --git a/src/apps/interlink/transmitter.lua b/src/apps/interlink/transmitter.lua index 55e3cae934..21fc67787f 100644 --- a/src/apps/interlink/transmitter.lua +++ b/src/apps/interlink/transmitter.lua @@ -9,8 +9,8 @@ local Transmitter = {name="apps.interlink.Transmitter"} function Transmitter:new (_, name) local self = {} - self.shm_name = "group/interlink/"..name - self.backlink = "interlink/transmitter/"..name + self.shm_name = "group/interlink/"..name..".interlink" + self.backlink = "interlink/transmitter/"..name..".interlink" self.interlink = interlink.attach_transmitter(self.shm_name) shm.alias(self.backlink, self.shm_name) return setmetatable(self, {__index=Transmitter}) @@ -37,8 +37,8 @@ end -- process termination. function Transmitter.shutdown (pid) for _, name in ipairs(shm.children("/"..pid.."/interlink/transmitter")) do - local backlink = "/"..pid.."/interlink/transmitter/"..name - local shm_name = "/"..pid.."/group/interlink/"..name + local backlink = "/"..pid.."/interlink/transmitter/"..name..".interlink" + local shm_name = "/"..pid.."/group/interlink/"..name..".interlink" -- Call protected in case //group is already unlinked. local ok, r = pcall(interlink.open, shm_name) if ok then interlink.detach_transmitter(r, shm_name) end diff --git a/src/lib/interlink.lua b/src/lib/interlink.lua index 7bdc5f3832..8ed94fde8a 100644 --- a/src/lib/interlink.lua +++ b/src/lib/interlink.lua @@ -244,6 +244,32 @@ function pull (r) r.read = r.nread end +-- The code below registers an abstract SHM object type with core.shm, and +-- implements the minimum API necessary for programs like snabb top to inspect +-- interlink queues (including a tostring meta-method to describe queue +-- objects.) + +shm.register('interlink', getfenv()) + function open (name, readonly) return shm.open(name, "struct interlink", readonly) end + +local function describe (r) + local function queue_fill (r) + local read, write = r.read, r.write + return read > write and write + SIZE - read or write - read + end + local function status (r) + return ({ + [FREE] = "initializing", + [RXUP] = "waiting for transmitter", + [TXUP] = "waiting for receiver", + [DXUP] = "in active use", + [DOWN] = "deallocating" + })[r.state[0]] + end + return ("%d/%d (%s)"):format(queue_fill(r), SIZE, status(r)) +end + +ffi.metatype(ffi.typeof("struct interlink"), {__tostring=describe}) diff --git a/src/program/top/top.lua b/src/program/top/top.lua index c9754dfbda..fafd23a64d 100644 --- a/src/program/top/top.lua +++ b/src/program/top/top.lua @@ -11,6 +11,10 @@ local S = require("syscall") local histogram = require("core.histogram") local usage = require("program.top.README_inc") +-- We must load any modules that register abstract shm types that we may +-- wish to inspect. +require("lib.interlink") + local long_opts = { help = "h", list = "l" } From 5ac340e262b5545591563d8555bed5588010ca42 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Tue, 20 Feb 2018 14:13:34 +0100 Subject: [PATCH 59/72] core.shm: explicitly open shm objects read-only in open_frame In open_frame, add an explicit 'readonly' argument to calls opening SHM objects. This allows SHM object type providers to have an open function that opens objects read-write by default without the dangers of open_frame opening them read-write. Existing SHM object type providers (core.counter, core.histogram) ignore this parameter and always open read-only. --- src/core/shm.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/shm.lua b/src/core/shm.lua index 8187eb281d..f2380cee82 100644 --- a/src/core/shm.lua +++ b/src/core/shm.lua @@ -158,7 +158,7 @@ function open_frame (path) local module = types[type] if module then assert(frame[name] == nil, "shm: duplicate name: "..name) - frame[name] = module.open(frame.path..file) + frame[name] = module.open(frame.path..file, 'readonly') frame.specs[name] = {module} end end From 95de7043dfde1306107752ceb65c392fc0eeec1f Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Wed, 21 Feb 2018 17:57:57 +0100 Subject: [PATCH 60/72] lib.interlink: fix outdated comments --- src/lib/interlink.lua | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/lib/interlink.lua b/src/lib/interlink.lua index 8ed94fde8a..32bee487f1 100644 --- a/src/lib/interlink.lua +++ b/src/lib/interlink.lua @@ -19,7 +19,7 @@ module(...,package.seeall) -- empty(r) full(r) -- extract(r) insert(r, p) -- pull(r) push(r) --- detach_receiver(name) detach_transmitter(name) +-- detach_receiver(r, name) detach_transmitter(r, name) -- -- I.e., both receiver and transmitter will attach to a queue object they wish -- to communicate over, and detach once they cease operations. @@ -38,13 +38,13 @@ module(...,package.seeall) -- attach_receiver(name), attach_transmitter(name) -- Attaches to and returns a shared memory interlink object by name (a -- SHM path). If the target name is unavailable (possibly because it is --- already in use), this operation will block until it becomes available +-- already in use) this operation will block until it becomes available -- again. -- -- detach_receiver(r, name), detach_transmitter(r, name) --- Unmaps interlink r and unlinks it from its name. If other end has --- already freed the interlink, any packets remaining in the queue are --- freed. +-- Unmaps interlink r after detaching from the shared queue. Unless the +-- other end is still attached the shared queue is unlinked from its +-- name, and any packets remaining are freed. -- -- full(r) / empty(r) -- Return true if the interlink r is full / empty. @@ -87,16 +87,21 @@ ffi.cdef([[ struct interlink { -- and detach in any order, and even for multiple processes to attempt to -- attach to the same interlink at the same time. -- +-- Furthermore, more than two processes can attach to and detach from an +-- interlink during its life time. I.e., a new receiver can attach to the queue +-- once the former receiver has detached while the transmitter stays attached +-- throughout, and vice-versa. +-- -- Interlinks can be in one of five states: local FREE = 0 -- Implicit initial state due to 0 value. local RXUP = 1 -- Receiver has attached. local TXUP = 2 -- Transmitter has attached. local DXUP = 3 -- Both ends have attached. -local DOWN = 4 -- Either end has detached; must be re-allocated. +local DOWN = 4 -- Both ends have detached; must be re-allocated. --- Once either end detaches from an interlink it stays in the DOWN state --- until it is deallocated. +-- If at any point both ends have detached from an interlink it stays in the +-- DOWN state until it is deallocated. -- -- Here are the valid state transitions and when they occur: -- From 154d63c111525c8f0ac9400b8d24a107f6a0d7be Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 22 Feb 2018 17:28:43 +0100 Subject: [PATCH 61/72] lib.interlink / core.main: fix shutdown bugs 59caa02ca (apps.interlink: shutdown handler to release stale attachments) introduced a bug where the supervisor processes would create directories under SNABB_SHM_ROOT. This happened because lib.interlink depended on core.link and core.packet, and I mistakenly added a call to initialize() to main in order to resolve these dependencies. The dependencies existed because detaching possibly involves freeing any packets remaining on the queue, and the queue size was based on link.max (for no particular reason.) Besides introducing this bug, this approach was wrong for another reason: the supervisor has no packet freelist, and hence there is no point in freeing packets within the shutdown code. This commit removes the unnecessary dependency on core.link, and makes sure that the dependency on core.packet is only exercised when it makes sense to do so. E.g., when detaching from queues in the supervisor no freeing of packets takes place. Finally, the call to initialize() in the supervisor is reverted. --- src/core/main.lua | 1 - src/lib/interlink.lua | 9 ++++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/core/main.lua b/src/core/main.lua index 6ea0531604..b0ece96391 100644 --- a/src/core/main.lua +++ b/src/core/main.lua @@ -231,7 +231,6 @@ if assert(S.fork()) ~= 0 then -- parent process: run snabb xpcall(main, handler) else - initialize() -- child process: supervise parent & perform cleanup -- Subscribe to SIGHUP on parent death S.prctl("set_name", "[snabb sup]") diff --git a/src/lib/interlink.lua b/src/lib/interlink.lua index 32bee487f1..78b8f89eda 100644 --- a/src/lib/interlink.lua +++ b/src/lib/interlink.lua @@ -63,7 +63,7 @@ local band = require("bit").band local waitfor = require("core.lib").waitfor local sync = require("core.sync") -local SIZE = link.max + 1 +local SIZE = 1024 local CACHELINE = 64 -- XXX - make dynamic local INT = ffi.sizeof("int") @@ -176,7 +176,10 @@ local function detach (r, name, reset, shutdown) if reset(r) then return true -- Alternatively, attempt to shutdown and deallocate queue. elseif shutdown(r) then - while not empty(r) do + -- If detach is called by the supervisor (due to an abnormal exit) + -- the packet module will not be loaded (and there will be no + -- freelist to put the packets into.) + while packet and not empty(r) do packet.free(extract(r)) end shm.unlink(name) @@ -206,7 +209,7 @@ end -- Queue operations follow below. local function NEXT (i) - return band(i + 1, link.max) + return band(i + 1, SIZE - 1) end function full (r) From 72aadf7471ae109e9afdcb578d150474b53159dc Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 22 Feb 2018 18:24:21 +0100 Subject: [PATCH 62/72] lib.interlink: take advantage of non-excl shm.create semantics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit shm.create does not ask for O_EXCL to open(2), so we don’t actually need to do the pcall dance in attach. Part of me thinks maybe we should ask for O_EXCL in shm.create, but OTOH this way the code is simpler… --- src/lib/interlink.lua | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/lib/interlink.lua b/src/lib/interlink.lua index 78b8f89eda..92cb9431f8 100644 --- a/src/lib/interlink.lua +++ b/src/lib/interlink.lua @@ -131,18 +131,16 @@ local DOWN = 4 -- Both ends have detached; must be re-allocated. -- (any) DOWN->* Cannot transition from DOWN (must create new queue.) local function attach (name, initialize) - local ok, r + local r local first_try = true waitfor( function () - -- Try to open the queue. - ok, r = pcall(shm.open, name, "struct interlink") - -- If that failed then we try to create it. - if not ok then ok, r = pcall(shm.create, name, "struct interlink") end - -- Return if we could map the queue and succeed to initialize it. - if ok and initialize(r) then return true end + -- Create/open the queue. + r = shm.create(name, "struct interlink") + -- Return if we succeed to initialize it. + if initialize(r) then return true end -- We failed; handle error and try again. - if ok then shm.unmap(r); ok, r = nil end + shm.unmap(r) if first_try then print("interlink: waiting for "..name.." to become available...") first_try = false From 44b4cdf1c53bdf7fe0638b9bddd84f182a1894c9 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Sat, 24 Feb 2018 16:10:38 +0100 Subject: [PATCH 63/72] core.packet: use ffi init argument to initialize packet_fl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Didn’t know you could pass tables to initialize structs, cool! --- src/core/packet.lua | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/core/packet.lua b/src/core/packet.lua index 2ea1e51482..bed98f7f29 100644 --- a/src/core/packet.lua +++ b/src/core/packet.lua @@ -94,8 +94,7 @@ end local packet_allocation_step = 1000 local packets_allocated = 0 -local packets_fl = ffi.new("struct freelist") - packets_fl.max = max_packets +local packets_fl = ffi.new("struct freelist", {max=max_packets}) local group_fl if not shm.exists("group/packets.freelist") then group_fl = shm.create("group/packets.freelist", "struct freelist") From 5ac6e7baebeef95aa37e402a601c6aae28d3f82c Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Mon, 5 Mar 2018 11:32:57 +0100 Subject: [PATCH 64/72] core.packet: enable group freelist only on demand This adds a function enable_group_freelist() that will enable the group freelist, its idempotent. The interlink receiver and transmitter apps call it during initialization to ensure the group freelist is present/enabled. Due to JuJITsu the extra branch to check whether the group freelist is enabled is expected to be virtually free. --- src/apps/interlink/receiver.lua | 1 + src/apps/interlink/transmitter.lua | 1 + src/core/packet.lua | 29 +++++++++++++++++------------ 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/apps/interlink/receiver.lua b/src/apps/interlink/receiver.lua index 206a5ea6f0..ed00222a30 100644 --- a/src/apps/interlink/receiver.lua +++ b/src/apps/interlink/receiver.lua @@ -8,6 +8,7 @@ local interlink = require("lib.interlink") local Receiver = {name="apps.interlink.Receiver"} function Receiver:new (_, name) + packet.enable_group_freelist() local self = {} self.shm_name = "group/interlink/"..name..".interlink" self.backlink = "interlink/receiver/"..name..".interlink" diff --git a/src/apps/interlink/transmitter.lua b/src/apps/interlink/transmitter.lua index 21fc67787f..f0598b5ff8 100644 --- a/src/apps/interlink/transmitter.lua +++ b/src/apps/interlink/transmitter.lua @@ -8,6 +8,7 @@ local interlink = require("lib.interlink") local Transmitter = {name="apps.interlink.Transmitter"} function Transmitter:new (_, name) + packet.enable_group_freelist() local self = {} self.shm_name = "group/interlink/"..name..".interlink" self.backlink = "interlink/transmitter/"..name..".interlink" diff --git a/src/core/packet.lua b/src/core/packet.lua index bed98f7f29..608409e757 100644 --- a/src/core/packet.lua +++ b/src/core/packet.lua @@ -95,16 +95,19 @@ end local packet_allocation_step = 1000 local packets_allocated = 0 local packets_fl = ffi.new("struct freelist", {max=max_packets}) -local group_fl -if not shm.exists("group/packets.freelist") then - group_fl = shm.create("group/packets.freelist", "struct freelist") - group_fl.max = max_packets -else - group_fl = shm.open("group/packets.freelist", "struct freelist") +local group_fl -- Initialized on demand. + +-- Call to ensure group freelist is enabled. +function enable_group_freelist () + if not group_fl then + group_fl = shm.create("group/packets.freelist", "struct freelist") + group_fl.max = max_packets + end end +-- Return borrowed packets to group freelist. function rebalance_freelists () - if freelist_nfree(packets_fl) > packets_allocated then + if group_fl and freelist_nfree(packets_fl) > packets_allocated then freelist_lock(group_fl) while freelist_nfree(packets_fl) > packets_allocated and not freelist_full(group_fl) do @@ -117,12 +120,14 @@ end -- Return an empty packet. function allocate () if freelist_nfree(packets_fl) == 0 then - freelist_lock(group_fl) - while freelist_nfree(group_fl) > 0 - and freelist_nfree(packets_fl) < packets_allocated do - freelist_add(packets_fl, freelist_remove(group_fl)) + if group_fl then + freelist_lock(group_fl) + while freelist_nfree(group_fl) > 0 + and freelist_nfree(packets_fl) < packets_allocated do + freelist_add(packets_fl, freelist_remove(group_fl)) + end + freelist_unlock(group_fl) end - freelist_unlock(group_fl) if freelist_nfree(packets_fl) == 0 then preallocate_step() end From 02bdf78b89f4af5a6c82e006bc00397d901d8ba2 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Tue, 6 Mar 2018 11:45:56 +0100 Subject: [PATCH 65/72] apps.interlink.*: add user-facing documentation. --- src/apps/interlink/README.md | 56 ++++++++++++++++++++++++++++++++++++ src/doc/genbook.sh | 2 ++ 2 files changed, 58 insertions(+) create mode 100644 src/apps/interlink/README.md diff --git a/src/apps/interlink/README.md b/src/apps/interlink/README.md new file mode 100644 index 0000000000..a13629905b --- /dev/null +++ b/src/apps/interlink/README.md @@ -0,0 +1,56 @@ +# Inter-process links (apps.interlink.*) + +The “interlink” transmitter and receiver apps allow for efficient exchange +of packets between Snabb processes within the same process group (see +[Multiprocess operation (core.worker)](#multiprocess-operation-core.worker)). + + DIAGRAM: Transmitter and Receiver + +----------------------------+ +-------------------------+ + | | | | + | | | | + input----* apps.interlink.transmitter | | apps.interlink.reciever *---- output + | | | | + | | | | + +----------------------------+ +-------------------------+ + +To make packets from an output port available to other processes, configure a +transmitter app, and link the appropriate output port to its `input` port. + +```lua +local Transmitter = require("apps.interlink.transmitter) + +config.app(c, "interlink", Transmitter) +config.link(c, "myapp.output -> interlink.input") +``` + +Then, in the process that should receive the packets, configure a receiver app +with the same name, and link its `output` port as suitable. + +```lua +local Receiver = require("apps.interlink.receiver) + +config.app(c, "interlink", Receiver) +config.link(c, "interlink.output -> otherapp.input") +``` + +Subsequently, packets transmitted to the transmitter’s `input` port will appear +on the receiver’s `output` port. + +## Configuration + +None, but the configured app names are globally unique within the process +group. + +Starting either the transmitter or receiver app attaches them to a shared +packet queue visible to the process group under the name that was given to the +app. When the queue identified by the name is unavailable, because it is +already in use by a pair of processes within the group, configuration of the +app network will block until the queue becomes available. Once the transmitter +or receiver apps are stopped they detach from the queue. + +Only two processes (one receiver and one transmitter) can be attached to an +interlink queue at the same time, but during the lifetime of the queue (e.g., +from when the first process attached to when the last process detaches) it can +be shared by any number of receivers and transmitters. Meaning, either process +attached to the queue can be restarted or replaced by another process without +packet loss. diff --git a/src/doc/genbook.sh b/src/doc/genbook.sh index 308aeff793..e760c13021 100755 --- a/src/doc/genbook.sh +++ b/src/doc/genbook.sh @@ -70,6 +70,8 @@ $(cat $mdroot/apps/test/README.md) $(cat $mdroot/apps/wall/README.md) +$(cat $mdroot/apps/interlink/README.md) + # Libraries $(cat $mdroot/lib/README.checksum.md) From e7f63d64c6e624db2b80b2bf4ad361d720abe1db Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Wed, 4 Apr 2018 08:45:16 +0000 Subject: [PATCH 66/72] doc/branches.md: Add snabbco/raptorjit integration branch --- src/doc/branches.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/doc/branches.md b/src/doc/branches.md index cbe3329711..d4f09c0ecd 100644 --- a/src/doc/branches.md +++ b/src/doc/branches.md @@ -175,3 +175,12 @@ The current state of each branch with respect to master is visible here: Maintainer: Jianbo Liu +#### raptorjit + + BRANCH: raptorjit git://github.com/snabbco/raptorjit + RaptorJIT updates integration and testing branch. + + - Contains new RaptorJIT changes targeted to Snabb. + + Maintainers: Luke Gorrie (@lukego) and Andy Wingo (@wingo) + From be6c657ee1c004a056bdc7558ea6e740400c062a Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Fri, 6 Apr 2018 17:28:18 +0200 Subject: [PATCH 67/72] =?UTF-8?q?lib.maxpc:=20fix=20backtracking=20?= =?UTF-8?q?=E2=80=9Czero=20or=20more=E2=80=9D=20combinator=20match.all?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removes the broken match.range and replaces match.all with a simple implementation by means of match.plus. In lib.xsd_regexp implement backtracking match.range combinator by means of match.all. Simplify variable argument variants of match.plus and match.alternate along the way. --- src/lib/maxpc.lua | 96 +++++++++++++++++++++--------------------- src/lib/xsd_regexp.lua | 36 +++++++++++++++- 2 files changed, 83 insertions(+), 49 deletions(-) diff --git a/src/lib/maxpc.lua b/src/lib/maxpc.lua index d544691a4a..c5dec94fb2 100644 --- a/src/lib/maxpc.lua +++ b/src/lib/maxpc.lua @@ -318,55 +318,41 @@ function match.optional (parser) return match.alternate(parser, match.seq()) end -function match.range (parser, min, max) - return function (s) - local rests = {} - while s and (not max or #rests <= max) do - table.insert(rests, s) - s = parser(s) - end - local more - more = function () - local rest = table.remove(rests) - if rest and (not min or #rests >= min) then - return rest, nil, nil, more - end - end - return more() - end -end - function match.all (parser) - return match.range(parser, 0) -end - -function match.one_or_more (parser) - return match.plus(parser, match.all(parser)) + return match.optional( + match.plus(parser, function (s) return match.all(parser)(s) end) + ) end -local function make_reducer (combinator, sentinel) - local reduce - reduce = function (parsers) - if #parsers == 0 then - return sentinel - elseif #parsers == 1 then - return parsers[1] - else - local head = table.remove(parsers, 1) - local tail = reduce(parsers) - return combinator(head, tail) - end - end - return function (...) - return reduce({...}) +local function reduce (fun, tab) + local acc + for _, val in ipairs(tab) do + if not acc then acc = val + else acc = fun(acc, val) end end + return acc end local function identity (...) return ... end -match.path = make_reducer(match.plus, identity) - local function constantly_nil () end -match.either = make_reducer(match.alternate, constantly_nil) + +function match.path (...) + local parsers = {...} + if #parsers > 0 then + return reduce(match.plus, parsers) + else + return identity + end +end + +function match.either (...) + local parsers = {...} + if #parsers > 0 then + return reduce(match.alternate, parsers) + else + return constantly_nil + end +end -- tests @@ -531,13 +517,6 @@ function selftest () assert(result == 1234) assert(matched) assert(not eof) -- backtracking - local result, matched, eof = parse( - "0aaaaaaaa1", - match.path(match.equal("0"), - match.all(match.satisfies(is_alphanumeric)), - match.equal("1")) - ) - assert(not result) assert(matched) assert(eof) local result, matched, eof = parse("a", match.either(match.equal("a"), match.equal("b"))) assert(not result) assert(matched) assert(eof) @@ -548,6 +527,27 @@ function selftest () assert(not result) assert(matched) local result, matched, eof = parse("", match.optional(match.equal("."))) assert(not result) assert(matched) assert(eof) + local result, matched, eof = parse( + "0aaaaaaaa1", + match.path(match.equal("0"), + match.all(match.satisfies(is_alphanumeric)), + match.equal("1")) + ) + assert(not result) assert(matched) assert(eof) + local result, matched, eof = parse( + "aaac", + match.path( + match.all( + match.either( + match.seq(match.equal("a"), match.equal("a")), + match.seq(match.equal("a"), match.equal("a"), match.equal("a")), + match.equal("c") + ) + ), + match.eof() + ) + ) + assert(not result) assert(matched) assert(eof) local domain_like = match.either( match.path( match.path( diff --git a/src/lib/xsd_regexp.lua b/src/lib/xsd_regexp.lua index 4a6f187803..305cdfa79e 100644 --- a/src/lib/xsd_regexp.lua +++ b/src/lib/xsd_regexp.lua @@ -348,13 +348,43 @@ function compile_quantifier (quantifier) end elseif quantifier.exactly then return function (parser) - return match.range(parser, quantifier.exactly, quantifier.exactly) + return match.exactly_n(parser, quantifier.exactly) end else error("Invalid quantifier") end end +function match.one_or_more (parser) + return match.path(parser, match.all(parser)) +end + +function match.exactly_n (parser, n) + local ps = {} + for i = 1, n do table.insert(ps, parser) end + return match.seq(unpack(ps)) +end + +function match.upto_n (parser, n) + local p = match.seq() + for i = 1, n do p = match.optional(match.plus(parser, p)) end + return p +end + +function match.range (parser, min, max) + if min and max then + assert(min <= max, "Invalid quanitity: "..min.."-"..max) + return match.path(match.exactly_n(parser, min), + match.upto_n(parser, max - min)) + elseif min then + return match.path(match.exactly_n(parser, min), match.all(parser)) + elseif max then + return match.upto_n(parser, max) + else + return match.all(parser) + end +end + function compile_atom (atom) local function is_special_escape (s) return member(s, "\\|.-^?*+{}()[]") @@ -620,6 +650,10 @@ function selftest () accept={"0","12", "123", "192","168","178",}, reject={"a.a.a.", ""}} + test{regexp="(aa|aaa|bb)*", + accept={"", "aa", "aaa", "aaaa", "aabb", "aaabb", "bb"}, + reject={"a", "b", "bbb", "aaaab"}} + local ipv4_address = "(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\.){3}" .. "([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])" From bf4307d9aa37fff50812c58d45b295a2632862cc Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Wed, 2 May 2018 07:22:50 +0200 Subject: [PATCH 68/72] Make "offset" arg to lseek a signed integer Otherwise, passing a negative seek amount as a normal Lua number will involve a cast from double to uint64. In C it's undefined behavior when a double outside the [0,2^64) range is cast to uint64. In Lua we try to additionally accomodate the range [-2^63, -1], but there is a bug on x64-64 and might be a bug on other platforms: https://github.com/LuaJIT/LuaJIT/pull/415 However it's cheaper to simply target an int64_t when you want to allow negative numbers, as is our case, because you don't have to do assembly heroics for it to do what you want. The `tonumber` call in the return of `linux.lua:lseek` indicates anyway that our range is not the full 64-bit range, so we might as well restrict instead to long rather than ulong. --- lib/ljsyscall/syscall/linux/c.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ljsyscall/syscall/linux/c.lua b/lib/ljsyscall/syscall/linux/c.lua index 8af630fedd..33c09e1425 100644 --- a/lib/ljsyscall/syscall/linux/c.lua +++ b/lib/ljsyscall/syscall/linux/c.lua @@ -171,7 +171,7 @@ else -- 64 bit function C.fstatfs(fd, buf) return syscall(sys.fstatfs, int(fd), void(buf)) end function C.preadv(fd, iov, iovcnt, offset) return syscall_long(sys.preadv, int(fd), void(iov), long(iovcnt), ulong(offset)) end function C.pwritev(fd, iov, iovcnt, offset) return syscall_long(sys.pwritev, int(fd), void(iov), long(iovcnt), ulong(offset)) end - function C.lseek(fd, offset, whence) return syscall_off(sys.lseek, int(fd), ulong(offset), int(whence)) end + function C.lseek(fd, offset, whence) return syscall_off(sys.lseek, int(fd), long(offset), int(whence)) end function C.sendfile(outfd, infd, offset, count) return syscall_long(sys.sendfile, int(outfd), int(infd), void(offset), ulong(count)) end From c583ac74368c687c1ed6acf2243c91f1724d09d0 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 3 May 2018 15:33:52 +0200 Subject: [PATCH 69/72] apps.interlink: fix documentation bugs --- src/apps/interlink/README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/apps/interlink/README.md b/src/apps/interlink/README.md index a13629905b..b03859f7b0 100644 --- a/src/apps/interlink/README.md +++ b/src/apps/interlink/README.md @@ -5,19 +5,19 @@ of packets between Snabb processes within the same process group (see [Multiprocess operation (core.worker)](#multiprocess-operation-core.worker)). DIAGRAM: Transmitter and Receiver - +----------------------------+ +-------------------------+ - | | | | - | | | | - input----* apps.interlink.transmitter | | apps.interlink.reciever *---- output - | | | | - | | | | - +----------------------------+ +-------------------------+ + +-------------+ +-------------+ + | | | | + input | | | | + ----* Transmitter | | Reciever *---- + | | | | output + | | | | + +-------------+ +-------------+ To make packets from an output port available to other processes, configure a transmitter app, and link the appropriate output port to its `input` port. ```lua -local Transmitter = require("apps.interlink.transmitter) +local Transmitter = require("apps.interlink.transmitter") config.app(c, "interlink", Transmitter) config.link(c, "myapp.output -> interlink.input") @@ -27,7 +27,7 @@ Then, in the process that should receive the packets, configure a receiver app with the same name, and link its `output` port as suitable. ```lua -local Receiver = require("apps.interlink.receiver) +local Receiver = require("apps.interlink.receiver") config.app(c, "interlink", Receiver) config.link(c, "interlink.output -> otherapp.input") From 5fbe79ec35f2e985ef98e5b0c83d5cc441db779b Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 3 May 2018 18:06:31 +0200 Subject: [PATCH 70/72] apps.interlink: avoid engine/appname kludge by attaching in :link() By postponing attaching inter-process links to the :link() callback we can avoid the kludge introduced to core.app that passes the app name to :new(). Semantics should be unchanged, although it might be a little less obvious when the initialization happens. --- src/apps/interlink/receiver.lua | 25 ++++++++++++++++--------- src/apps/interlink/transmitter.lua | 25 ++++++++++++++++--------- 2 files changed, 32 insertions(+), 18 deletions(-) diff --git a/src/apps/interlink/receiver.lua b/src/apps/interlink/receiver.lua index ed00222a30..6bf28b67e8 100644 --- a/src/apps/interlink/receiver.lua +++ b/src/apps/interlink/receiver.lua @@ -7,14 +7,19 @@ local interlink = require("lib.interlink") local Receiver = {name="apps.interlink.Receiver"} -function Receiver:new (_, name) +function Receiver:new () packet.enable_group_freelist() - local self = {} - self.shm_name = "group/interlink/"..name..".interlink" - self.backlink = "interlink/receiver/"..name..".interlink" - self.interlink = interlink.attach_receiver(self.shm_name) - shm.alias(self.backlink, self.shm_name) - return setmetatable(self, {__index=Receiver}) + return setmetatable({attached=false}, {__index=Receiver}) +end + +function Receiver:link () + if not self.attached then + self.shm_name = "group/interlink/"..self.appname..".interlink" + self.backlink = "interlink/receiver/"..self.appname..".interlink" + self.interlink = interlink.attach_receiver(self.shm_name) + shm.alias(self.backlink, self.shm_name) + self.attached = true + end end function Receiver:pull () @@ -28,8 +33,10 @@ function Receiver:pull () end function Receiver:stop () - interlink.detach_receiver(self.interlink, self.shm_name) - shm.unlink(self.backlink) + if self.attached then + interlink.detach_receiver(self.interlink, self.shm_name) + shm.unlink(self.backlink) + end end -- Detach receivers to prevent leaking interlinks opened by pid. diff --git a/src/apps/interlink/transmitter.lua b/src/apps/interlink/transmitter.lua index f0598b5ff8..06d8611d7a 100644 --- a/src/apps/interlink/transmitter.lua +++ b/src/apps/interlink/transmitter.lua @@ -7,14 +7,19 @@ local interlink = require("lib.interlink") local Transmitter = {name="apps.interlink.Transmitter"} -function Transmitter:new (_, name) +function Transmitter:new () packet.enable_group_freelist() - local self = {} - self.shm_name = "group/interlink/"..name..".interlink" - self.backlink = "interlink/transmitter/"..name..".interlink" - self.interlink = interlink.attach_transmitter(self.shm_name) - shm.alias(self.backlink, self.shm_name) - return setmetatable(self, {__index=Transmitter}) + return setmetatable({attached=false}, {__index=Transmitter}) +end + +function Transmitter:link () + if not self.attached then + self.shm_name = "group/interlink/"..self.appname..".interlink" + self.backlink = "interlink/transmitter/"..self.appname..".interlink" + self.interlink = interlink.attach_transmitter(self.shm_name) + shm.alias(self.backlink, self.shm_name) + self.attached = true + end end function Transmitter:push () @@ -28,8 +33,10 @@ function Transmitter:push () end function Transmitter:stop () - interlink.detach_transmitter(self.interlink, self.shm_name) - shm.unlink(self.backlink) + if self.attached then + interlink.detach_transmitter(self.interlink, self.shm_name) + shm.unlink(self.backlink) + end end -- Detach transmitters to prevent leaking interlinks opened by pid. From a0834879bb44a158e8daa63cec4c587fc63877a8 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 3 May 2018 15:38:26 +0200 Subject: [PATCH 71/72] Revert "core.app: pass app name as second argument to :new in start_app" This reverts commit 302f9527d76560f0c9ae87b9d6b688ab4221448b. --- src/core/app.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/app.lua b/src/core/app.lua index 6eb06a1245..e9eb0e0238 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -339,7 +339,7 @@ function apply_config_actions (actions) configuration.apps[name] = nil end function ops.start_app (name, class, arg) - local app = class:new(arg, name) + local app = class:new(arg) if type(app) ~= 'table' then error(("bad return value from app '%s' start() method: %s"):format( name, tostring(app))) From b699d7e8938bc5a39fed0d4ae2ee84d82b0766c8 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Wed, 30 May 2018 18:56:04 +0200 Subject: [PATCH 72/72] apps.interlink: allow specifying queue name per config --- src/apps/interlink/README.md | 13 +++++++++++-- src/apps/interlink/receiver.lua | 15 ++++++++------- src/apps/interlink/transmitter.lua | 15 ++++++++------- 3 files changed, 27 insertions(+), 16 deletions(-) diff --git a/src/apps/interlink/README.md b/src/apps/interlink/README.md index b03859f7b0..796f35d914 100644 --- a/src/apps/interlink/README.md +++ b/src/apps/interlink/README.md @@ -36,10 +36,19 @@ config.link(c, "interlink.output -> otherapp.input") Subsequently, packets transmitted to the transmitter’s `input` port will appear on the receiver’s `output` port. +Alternatively, a name can be supplied as a configuration argument to be used +instead of the app’s name: + +```lua +config.app(c, "mylink", Receiver, "interlink") +config.link(c, "mylink.output -> otherapp.input") +``` + ## Configuration -None, but the configured app names are globally unique within the process -group. +The configured app names denote globally unique queues within the process +group. Alternativelyy, the receiver and transmitter apps can instead be passed +a string that names the shared queue to which to attach to. Starting either the transmitter or receiver app attaches them to a shared packet queue visible to the process group under the name that was given to the diff --git a/src/apps/interlink/receiver.lua b/src/apps/interlink/receiver.lua index 6bf28b67e8..10b2f02ee7 100644 --- a/src/apps/interlink/receiver.lua +++ b/src/apps/interlink/receiver.lua @@ -7,15 +7,16 @@ local interlink = require("lib.interlink") local Receiver = {name="apps.interlink.Receiver"} -function Receiver:new () +function Receiver:new (queue) packet.enable_group_freelist() - return setmetatable({attached=false}, {__index=Receiver}) + return setmetatable({attached=false, queue=queue}, {__index=Receiver}) end function Receiver:link () + local queue = self.queue or self.appname if not self.attached then - self.shm_name = "group/interlink/"..self.appname..".interlink" - self.backlink = "interlink/receiver/"..self.appname..".interlink" + self.shm_name = "group/interlink/"..queue..".interlink" + self.backlink = "interlink/receiver/"..queue..".interlink" self.interlink = interlink.attach_receiver(self.shm_name) shm.alias(self.backlink, self.shm_name) self.attached = true @@ -44,9 +45,9 @@ end -- This is an internal API function provided for cleanup during -- process termination. function Receiver.shutdown (pid) - for _, name in ipairs(shm.children("/"..pid.."/interlink/receiver")) do - local backlink = "/"..pid.."/interlink/receiver/"..name..".interlink" - local shm_name = "/"..pid.."/group/interlink/"..name..".interlink" + for _, queue in ipairs(shm.children("/"..pid.."/interlink/receiver")) do + local backlink = "/"..pid.."/interlink/receiver/"..queue..".interlink" + local shm_name = "/"..pid.."/group/interlink/"..queue..".interlink" -- Call protected in case //group is already unlinked. local ok, r = pcall(interlink.open, shm_name) if ok then interlink.detach_receiver(r, shm_name) end diff --git a/src/apps/interlink/transmitter.lua b/src/apps/interlink/transmitter.lua index 06d8611d7a..4ce6afb572 100644 --- a/src/apps/interlink/transmitter.lua +++ b/src/apps/interlink/transmitter.lua @@ -7,15 +7,16 @@ local interlink = require("lib.interlink") local Transmitter = {name="apps.interlink.Transmitter"} -function Transmitter:new () +function Transmitter:new (queue) packet.enable_group_freelist() - return setmetatable({attached=false}, {__index=Transmitter}) + return setmetatable({attached=false, queue=queue}, {__index=Transmitter}) end function Transmitter:link () + local queue = self.queue or self.appname if not self.attached then - self.shm_name = "group/interlink/"..self.appname..".interlink" - self.backlink = "interlink/transmitter/"..self.appname..".interlink" + self.shm_name = "group/interlink/"..queue..".interlink" + self.backlink = "interlink/transmitter/"..queue..".interlink" self.interlink = interlink.attach_transmitter(self.shm_name) shm.alias(self.backlink, self.shm_name) self.attached = true @@ -44,9 +45,9 @@ end -- This is an internal API function provided for cleanup during -- process termination. function Transmitter.shutdown (pid) - for _, name in ipairs(shm.children("/"..pid.."/interlink/transmitter")) do - local backlink = "/"..pid.."/interlink/transmitter/"..name..".interlink" - local shm_name = "/"..pid.."/group/interlink/"..name..".interlink" + for _, queue in ipairs(shm.children("/"..pid.."/interlink/transmitter")) do + local backlink = "/"..pid.."/interlink/transmitter/"..queue..".interlink" + local shm_name = "/"..pid.."/group/interlink/"..queue..".interlink" -- Call protected in case //group is already unlinked. local ok, r = pcall(interlink.open, shm_name) if ok then interlink.detach_transmitter(r, shm_name) end