Skip to content

Commit

Permalink
Refine lua scripts to handle caret better. Fix schema dependencies
Browse files Browse the repository at this point in the history
of xuma_52p_qshare. Remove unused items in xuma_52p_reverse.dict.
  • Loading branch information
Ace-Who committed Apr 12, 2020
1 parent d16d4bf commit 1196538
Show file tree
Hide file tree
Showing 6 changed files with 2,143 additions and 4,262 deletions.
42 changes: 25 additions & 17 deletions schema/lua/ace/xuma_52p_precand.lua
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,18 @@ end
local function filter(input, env)
local context = env.engine.context
if context:get_option("xuma_52p_precand") and
context.caret_pos > 0 and
not context.input:find('^;') -- 本方案中分号引导用于实现其他多个功能
then
local sel_inp = context.input:sub(1, context.caret_pos)
if sel_inp:find('%a$') then -- 一定要加这个判断,否则会影响符号键的功能
sel_inp = sel_inp:match('%a+$') -- 去掉前置标点部分
(context.input:find('%l$') or
context.input:find('^;')) then
local sel_inp = context.caret_pos == 0 and context.input or
context.input:sub(1, context.caret_pos)
if sel_inp:find('%l$') then -- 一定要加这个判断,否则会影响符号键的功能
sel_inp = sel_inp:match('%l+$') -- 去掉前置标点部分
end
local detailed = context:get_option("detailed_x52_precand")
local codes, cand1, cand2, seg1, seg2
if sel_inp:len() < 3 then
if context.input:find('^;') then -- 本方案中分号引导用于实现其他多个功能
-- Do nothing
elseif sel_inp:len() < 3 then
if detailed then
codes = { sel_inp .. '_1', sel_inp .. '_2', sel_inp .. '_3' }
cand2 = map(codes, lookup(env.code2cand_rvdb))
Expand All @@ -39,29 +41,35 @@ local function filter(input, env)
end
-- 这是以候选迭代为基础的,因此要求无空码。
for cand in input:iter() do
if false then
cand.preedit = cand.text
local rep = ''
if context.input:find('^;') then
rep = cand.text
elseif sel_inp:len() < 3 then
if detailed then
if cand2[1] == cand.text then
cand.preedit = table.concat(cand2, '|')
rep = table.concat(cand2, '|')
else
cand.preedit = ('%s>%s'):format(table.concat(cand2, '|'), cand.text)
rep = ('%s>%s'):format(table.concat(cand2, '|'), cand.text)
end
else
cand.preedit = cand.text
rep = cand.text
end
else
if detailed then
-- cand.preedit = ('%s%s|%s'):format(cand1, cand2[1], cand.text)
cand.preedit = ('%s+%s=%s'):format(cand1, table.concat(cand2, '|'), cand.text)
-- cand.preedit = ('%s%s%s%s'):format(
-- rep = ('%s%s|%s'):format(cand1, cand2[1], cand.text)
rep = ('%s+%s=%s'):format(cand1, table.concat(cand2, '|'), cand.text)
-- rep = ('%s%s%s%s'):format(
-- cand1, sel_inp:sub(1,2), cand2[1], sel_inp:sub(3))
else
cand.preedit = ('%s%s'):format(cand1, cand2[1])
rep = ('%s%s'):format(cand1, cand2[1])
end
end
cand.preedit = cand.preedit .. '\t'
if cand.preedit:find('\t') then
cand.preedit = cand.preedit:gsub('.+\t', rep .. '\t')
else
cand.preedit = rep .. '\t'
end
-- simplifier 的候选无法这样修改 preedit,只能用 Candidate() 生成再修改。
yield(cand)
end
else
Expand Down
13 changes: 7 additions & 6 deletions schema/xuma.dict.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# encoding: utf-8
#
# 码表根据徐国银提供的《徐码全集码表(+词)》和《徐码高频先GB18030》
# (2019.08.09) 整合修订而成。
# (2020.02.15) 整合修订而成。
# 官方网址:www.chinput.com/xuma
#
# 码表去重 by Q
Expand All @@ -15,8 +15,8 @@

# 排序目标之一:保证重码时首选永远是单字,简码词组为次选和三选。
# 《徐码全集码表(+词)》的排序满足这点,定义排序方式为 original 即可。
# 有个别例外,包括首选为词组的「出来ai」等和次选和三选为单字的「啊o」「们t 」
# ,而「吗?oa」「呢?od」可视作词组
# 有个别例外,包括三选为单字的「啊o」「们t 」,而「吗?oa」「呢?od」可视作词
#

# 排序目标之二:保证全码词组在单字之后。
# 目前通过调整全码词组区的位置实现了这点。置于所有单字之后,所有符号之前。
Expand All @@ -33,7 +33,7 @@

---
name: "xuma"
version: "2020.03.18"
version: "2020.04.09"
sort: original
import_tables:
- xuma.extended
Expand All @@ -45,6 +45,7 @@ columns:
encoder:
exclude_patterns:
- '^[a-z]$' # 一简不参与造词
- '[^a-z]'
- '^bg(zs|zx|ys|yx|jc|hx|sx)$' # 表格
- '^sz[lyeswlqbj]' # 数字
# - '^dl..$' # 特殊字符和八卦名 "乾兑坎坤巽离艮震" 的符号编码(dl?g).
Expand Down Expand Up @@ -8577,7 +8578,7 @@ encoder:
浒 zzp 15300000
谰 zzx 421000

# 字根置换码(特码)区
# 字根扩展码区(设置方式见《置换码研究》或主文档的「置换码」部分)
艮 bgo 5590000
# 予 byhz
巛 ccj 709000
Expand Down Expand Up @@ -36409,7 +36410,7 @@ encoder:
兀 gw 1550000
嗀 jqoq 3280
﨎 ytdw 31500
jtto 49400
jtuo 49400
﨑 myob 802000
﨓 ihti 26500
﨔 iztr 22700
Expand Down
16 changes: 1 addition & 15 deletions schema/xuma.extended.dict.yaml
Original file line number Diff line number Diff line change
@@ -1,26 +1,12 @@
---
name: "xuma.extended"
version: "2020.01.24"
version: "2020.04.12"
sort: by_weight
columns:
- text
- code
- weight
- stem
encoder:
exclude_patterns:
- '^[a-z]$' # 一简不参与造词
- '^bg(zs|zx|ys|yx|jc|hx|sx)$' # 表格
- '^sz[lyeswlqbj]' # 数字
# - '^dl..$' # 特殊字符和八卦名 "乾兑坎坤巽离艮震" 的符号编码(dl?g).
# 误伤太多,取消这条。
rules:
- length_equal: 2
formula: "AaAbBaBb"
- length_equal: 3
formula: "AaBaCaCb"
- length_in_range: [4, 10]
formula: "AaBaCaZa"
...

# 「#」号开头的行为注释,不生效。
Expand Down
6 changes: 3 additions & 3 deletions schema/xuma_52p.dict.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

---
name: "xuma_52p"
version: "2020.04.05"
version: "2020.04.09"
sort: original
import_tables:
# 仅针对扩展词库中定义了一码或二码词条的情况。因字库不全,会导致无编码词组编
Expand All @@ -15,9 +15,9 @@ columns:
encoder:
exclude_patterns:
- '^[a-z][^a-z]*$' # 一简不参与造词
- '[^a-z]'
- '^bg(zs|zx|ys|yx|jc|hx|sx)$' # 表格
- '^sz[lyeswlqbj]' # 数字
- '[^a-z]'
# - '^dl..$' # 特殊字符和八卦名 "乾兑坎坤巽离艮震" 的符号编码(dl?g).
# 误伤太多,取消这条。
rules:
Expand Down Expand Up @@ -5296,7 +5296,7 @@ encoder:
㈩ eu

# 符号区2
Ⓐ a
Ⓐ a
ⓐ a
⒜ a
㈬ as
Expand Down
1 change: 1 addition & 0 deletions schema/xuma_52p_qshare.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ schema:
dependencies:
- xuma_qshare
- xuma_52p_long
- xuma_52p_reverse
- xuma_spelling_pseudo
- luna_pinyin
- stroke
Expand Down
Loading

0 comments on commit 1196538

Please sign in to comment.