Refine lua scripts to handle caret better. Fix schema dependencies

of xuma_52p_qshare. Remove unused items in xuma_52p_reverse.dict.
Ace-Who · Apr 12, 2020 · 1196538 · 1196538
1 parent d16d4bf
commit 1196538
Show file tree

Hide file tree

Showing 6 changed files with 2,143 additions and 4,262 deletions.
diff --git a/schema/lua/ace/xuma_52p_precand.lua b/schema/lua/ace/xuma_52p_precand.lua
@@ -16,16 +16,18 @@ end
 local function filter(input, env)
   local context = env.engine.context
   if context:get_option("xuma_52p_precand") and
-      context.caret_pos > 0 and
-      not context.input:find('^;')  -- 本方案中分号引导用于实现其他多个功能
-      then
-    local sel_inp = context.input:sub(1, context.caret_pos)
-    if sel_inp:find('%a$') then  -- 一定要加这个判断，否则会影响符号键的功能
-      sel_inp = sel_inp:match('%a+$')  -- 去掉前置标点部分
+      (context.input:find('%l$') or
+       context.input:find('^;')) then
+    local sel_inp = context.caret_pos == 0 and context.input or
+        context.input:sub(1, context.caret_pos)
+    if sel_inp:find('%l$') then  -- 一定要加这个判断，否则会影响符号键的功能
+      sel_inp = sel_inp:match('%l+$')  -- 去掉前置标点部分
     end
     local detailed = context:get_option("detailed_x52_precand")
     local codes, cand1, cand2, seg1, seg2
-    if sel_inp:len() < 3 then
+    if context.input:find('^;') then -- 本方案中分号引导用于实现其他多个功能
+      -- Do nothing
+    elseif sel_inp:len() < 3 then
       if detailed then
         codes = { sel_inp .. '_1', sel_inp .. '_2', sel_inp .. '_3' }
         cand2 = map(codes, lookup(env.code2cand_rvdb))
@@ -39,29 +41,35 @@ local function filter(input, env)
     end
     -- 这是以候选迭代为基础的，因此要求无空码。
     for cand in input:iter() do
-      if false then
-        cand.preedit = cand.text
+      local rep = ''
+      if context.input:find('^;') then
+        rep = cand.text
       elseif sel_inp:len() < 3 then
         if detailed then
           if cand2[1] == cand.text then
-            cand.preedit = table.concat(cand2, '|')
+            rep = table.concat(cand2, '|')
           else
-            cand.preedit = ('%s>%s'):format(table.concat(cand2, '|'), cand.text)
+            rep = ('%s>%s'):format(table.concat(cand2, '|'), cand.text)
           end
         else
-          cand.preedit = cand.text
+          rep = cand.text
         end
       else
         if detailed then
-          -- cand.preedit = ('%s%s|%s'):format(cand1, cand2[1], cand.text)
-          cand.preedit = ('%s+%s=%s'):format(cand1, table.concat(cand2, '|'), cand.text)
-          -- cand.preedit = ('%s%s%s%s'):format(
+          -- rep = ('%s%s|%s'):format(cand1, cand2[1], cand.text)
+          rep = ('%s+%s=%s'):format(cand1, table.concat(cand2, '|'), cand.text)
+          -- rep = ('%s%s%s%s'):format(
           -- cand1, sel_inp:sub(1,2), cand2[1], sel_inp:sub(3))
         else
-          cand.preedit = ('%s%s'):format(cand1, cand2[1])
+          rep = ('%s%s'):format(cand1, cand2[1])
         end
       end
-      cand.preedit = cand.preedit .. '\t'
+      if cand.preedit:find('\t') then
+        cand.preedit = cand.preedit:gsub('.+\t', rep .. '\t')
+      else
+        cand.preedit = rep .. '\t'
+      end
+      -- simplifier 的候选无法这样修改 preedit，只能用 Candidate() 生成再修改。
       yield(cand)
     end
   else

diff --git a/schema/xuma.dict.yaml b/schema/xuma.dict.yaml
@@ -1,7 +1,7 @@
 # encoding: utf-8
 #
 # 码表根据徐国银提供的《徐码全集码表（+词）》和《徐码高频先GB18030》
-# (2019.08.09) 整合修订而成。
+# (2020.02.15) 整合修订而成。
 # 官方网址：www.chinput.com/xuma
 #
 # 码表去重 by Q
@@ -15,8 +15,8 @@
 
 # 排序目标之一：保证重码时首选永远是单字，简码词组为次选和三选。
 # 《徐码全集码表（+词）》的排序满足这点，定义排序方式为 original 即可。
-# 有个别例外，包括首选为词组的「出来ai」等和次选和三选为单字的「啊o」「们t 」
-# ，而「吗？oa」「呢？od」可视作词组。
+# 有个别例外，包括三选为单字的「啊o」「们t 」，而「吗？oa」「呢？od」可视作词
+# 组。
 
 # 排序目标之二：保证全码词组在单字之后。
 # 目前通过调整全码词组区的位置实现了这点。置于所有单字之后，所有符号之前。
@@ -33,7 +33,7 @@
 
 ---
 name: "xuma"
-version: "2020.03.18"
+version: "2020.04.09"
 sort: original
 import_tables:
   - xuma.extended
@@ -45,6 +45,7 @@ columns:
 encoder:
   exclude_patterns:
     - '^[a-z]$'  # 一简不参与造词
+    - '[^a-z]'
     - '^bg(zs|zx|ys|yx|jc|hx|sx)$'  # 表格
     - '^sz[lyeswlqbj]'  # 数字
     # - '^dl..$'  # 特殊字符和八卦名 "乾兑坎坤巽离艮震" 的符号编码（dl?g）.
@@ -8577,7 +8578,7 @@ encoder:
 浒	zzp	15300000
 谰	zzx	421000
 
-# 字根置换码（特码）区
+# 字根扩展码区（设置方式见《置换码研究》或主文档的「置换码」部分）
 艮	bgo	5590000
 # 予	byhz
 巛	ccj	709000
@@ -36409,7 +36410,7 @@ encoder:
 兀	gw	1550000
 嗀	jqoq	3280
 﨎	ytdw	31500
-﨏	jtto	49400
+﨏	jtuo	49400
 﨑	myob	802000
 﨓	ihti	26500
 﨔	iztr	22700

diff --git a/schema/xuma.extended.dict.yaml b/schema/xuma.extended.dict.yaml
@@ -1,26 +1,12 @@
 ---
 name: "xuma.extended"
-version: "2020.01.24"
+version: "2020.04.12"
 sort: by_weight
 columns:
   - text
   - code
   - weight
   - stem
-encoder:
-  exclude_patterns:
-    - '^[a-z]$'  # 一简不参与造词
-    - '^bg(zs|zx|ys|yx|jc|hx|sx)$'  # 表格
-    - '^sz[lyeswlqbj]'  # 数字
-    # - '^dl..$'  # 特殊字符和八卦名 "乾兑坎坤巽离艮震" 的符号编码（dl?g）.
-      # 误伤太多，取消这条。
-  rules:
-    - length_equal: 2
-      formula: "AaAbBaBb"
-    - length_equal: 3
-      formula: "AaBaCaCb"
-    - length_in_range: [4, 10]
-      formula: "AaBaCaZa"
 ...
 
 # 「#」号开头的行为注释，不生效。

diff --git a/schema/xuma_52p.dict.yaml b/schema/xuma_52p.dict.yaml
@@ -2,7 +2,7 @@
 
 ---
 name: "xuma_52p"
-version: "2020.04.05"
+version: "2020.04.09"
 sort: original
 import_tables:
   # 仅针对扩展词库中定义了一码或二码词条的情况。因字库不全，会导致无编码词组编
@@ -15,9 +15,9 @@ columns:
 encoder:
   exclude_patterns:
     - '^[a-z][^a-z]*$'  # 一简不参与造词
+    - '[^a-z]'
     - '^bg(zs|zx|ys|yx|jc|hx|sx)$'  # 表格
     - '^sz[lyeswlqbj]'  # 数字
-    - '[^a-z]'
     # - '^dl..$'  # 特殊字符和八卦名 "乾兑坎坤巽离艮震" 的符号编码（dl?g）.
       # 误伤太多，取消这条。
   rules:
@@ -5296,7 +5296,7 @@ encoder:
 ㈩	eu
 
 # 符号区2
-Ⓐ	a
+Ⓐ	a
 ⓐ	a
 ⒜	a
 ㈬	as

diff --git a/schema/xuma_52p_qshare.schema.yaml b/schema/xuma_52p_qshare.schema.yaml
@@ -16,6 +16,7 @@ schema:
   dependencies:
     - xuma_qshare
     - xuma_52p_long
+    - xuma_52p_reverse
     - xuma_spelling_pseudo
     - luna_pinyin
     - stroke