Skip to content

Commit

Permalink
feat: 英文词中数字和标点自动转写 (iDvel#326)
Browse files Browse the repository at this point in the history
  • Loading branch information
mirtlecn authored and ann61c committed Jul 11, 2023
1 parent 0f3713e commit 8472acc
Show file tree
Hide file tree
Showing 2 changed files with 204 additions and 58 deletions.
75 changes: 37 additions & 38 deletions en_dicts/en_ext.dict.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,10 @@ Phoebe Phoebe

# 带权重的系列
iPhone iPhone 999
iPhone 14 iPhone 4
iPhone 14 Plus iPhone 3
iPhone 14 Pro iPhone 2
iPhone 14 Pro Max iPhone 1
iPhone 14 iPhone14 4
iPhone 14 Plus iPhone14Plus 3
iPhone 14 Pro iPhone14Pro 2
iPhone 14 Pro Max iPhone14ProMax 1
iPhone SE iPhoneSE

Mac OS X MacOSX 999
Expand Down Expand Up @@ -185,21 +185,21 @@ Ventura Ventura
Sonoma Sonoma

Windows Windows 999
Windows 95 Windows 1
Windows 98 Windows 2
Windows 95 Windows95 1
Windows 98 Windows98 2
Windows Me Windows 3
Windows 2000 Windows 4
Windows 2000 Windows2000 4
Windows XP Windows 5
Windows XP WindowsXP
Windows Vista Windows 6
Windows 7 Windows 7
Windows 8 Windows 8
Windows 10 Windows 10
Windows 11 Windows 11
Windows Vista WindowsVista 6
Windows 7 Windows7 7
Windows 8 Windows8 8
Windows 10 Windows10 10
Windows 11 Windows11 11

ERC-20 ERC
ERC-721 ERC
ERC-1155 ERC
ERC-20 ERC-20
ERC-721 ERC-721
ERC-1155 ERC-1155


# +_+
Expand Down Expand Up @@ -284,10 +284,10 @@ Secure Shell SSH
Secure Shell SecureShell
ISO ISO
International Organization for Standardization ISO
ISO 8601 ISO
ISO 8601 ISO8601
RFC RFC
Request for Comments RFC
RFC 3339 RFC
RFC 3339 RFC3339
RFCs RFCs
IETF IETF
Internet Engineering Task Force IETF
Expand Down Expand Up @@ -439,12 +439,8 @@ VIP VIP
D.VA DVA
Logo Logo
Telnet Telnet
IPv4 IPv
IPv6 IPv
IPv4 IPvs
IPv6 IPvl
IPv4 IPvsi
IPv6 IPvliu
IPv4 IPv4
IPv6 IPv6
Unix Unix
Rick and Morty RickandMorty
Tokyo Tokyo
Expand Down Expand Up @@ -561,11 +557,9 @@ Structured Query Language StructuredQueryLanguage
SQLite SQLite
Sourcetree Sourcetree
Spiritfarer Spiritfarer
TCP/IP tcpip
TCP/IP TCP/IP
Twitter Twitter
V2EX Ver
V2EX VerEX
V2EX VtoEX
V2EX V2EX
VSCode VSCode
Vue Vue
Vue.js Vuejs
Expand Down Expand Up @@ -767,10 +761,8 @@ MIPS MIPS
Moke Moke
Moneywiz Moneywiz
Moom Moom
MP3 MPs
MP3 MPsan
MP4 MPs
MP4 MPsi
MP3 MP3
MP4 MP4
MplayerX MplayerX
Netflix Netflix
Noizio Noizio
Expand Down Expand Up @@ -1106,7 +1098,7 @@ Shottr Shottr
turbo turbo
Turbo Boost TurboBoost
Turbo Boost Switcher TurboBoostSwitcher
P2P ptop
P2P P2P
OPPO OPPO
vivo vivo
Uno Uno
Expand Down Expand Up @@ -1143,11 +1135,11 @@ Minions Minions
Rush B RushB
Rush A RushA
AK AK
AK47 AK
AK74 AK
AK47 AK47
AK74 AK74
Mastercard Mastercard
COVID COVID
COVID-19 COVID
COVID-19 COVID-19
Wallpaper Engine WallpaperEngine
Erlang Erlang
FAQ FAQ
Expand Down Expand Up @@ -1626,7 +1618,8 @@ DVD DVD
CD CD
eSIM eSIM
UTF UTF
UTF-8 UTF
UTF-8 UTF-8
UTF-16 UTF-16
ASCII ASCII
ANSI ANSI
IEEE IEEE
Expand All @@ -1648,6 +1641,7 @@ ribonucleic acid RNA
mRNA mRNA
messenger RNA mRNA
C++ Cpp
C++ C++
Python Python
Perl Perl
Swift Swift
Expand Down Expand Up @@ -2040,8 +2034,8 @@ WWII WWII
World War II WWII
WWIII WWIII
World War III WWIII
PL/pgSQL PLpgSQL
Procedural Language / PostGres Structured Query Language PLpgSQL
PL/pgSQL PL/pgSQL
Procedural Language / PostGres Structured Query Language PL/pgSQL
IQ IQ
Intelligence quotient IQ
quotient quotient
Expand Down Expand Up @@ -2198,3 +2192,8 @@ Apple Vision AppleVision
Apple Vision Pro AppleVisionPro
Vision Pro VisionPro
shithead shithead
i18n i18n
internationalization i18n
GPT-4 GPT-4
a11y a11y
accessibility a11y
187 changes: 167 additions & 20 deletions melt_eng.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,28 +41,70 @@ engine:
- uniquifier

speller:
alphabet: zyxwvutsrqponmlkjihgfedcbaZYXWVUTSRQPONMLKJIHGFEDCBA-_
alphabet: zyxwvutsrqponmlkjihgfedcbaZYXWVUTSRQPONMLKJIHGFEDCBA
delimiter: " '"
algebra:
# 为编码派生新的拼写:
# 删除特殊字符
- derive/['\-_+,.]+//
# 全小写
- derive/^.+$/\L$0/
# 全大写
- derive/^.+$/\U$0/
# 首字母大写
- derive/^./\U$0/
# 前 2~10 个字母大写
- derive/^([a-z]{2})/\U$1/
- derive/^([a-z]{3})/\U$1/
- derive/^([a-z]{4})/\U$1/
- derive/^([a-z]{5})/\U$1/
- derive/^([a-z]{6})/\U$1/
- derive/^([a-z]{7})/\U$1/
- derive/^([a-z]{8})/\U$1/
- derive/^([a-z]{9})/\U$1/
- derive/^([a-z]{10})/\U$1/
# 根据当前所用中文方案修改 __include 的值
# algebra_rime_ice ; algebra_flypy ; algebra_mspy ; algebra_ziguang ; algebra_double_pinyin
__include: algebra_rime_ice
__append:
# 数字派生
- derive/1([4-7|9])/$1teen/
- derive/11/eleven/
- derive/12/twelve/
- derive/13/thirteen/
- derive/15/fifteen/
- derive/18/eighteen/
- derive/0/o/ # 如 1000 -> oneOOO
- derive/0/O/
- derive/0/zero/
- derive/1/one/
- derive/10/ten/
- derive/2/to/ #
- derive/2/two/
- derive/3/three/
- derive/4/for/
- derive/4/four/
- derive/5/five/
- derive/6/six/
- derive/7/seven/
- derive/8/eight/
- derive/9/nine/
# 符号派生
- derive/\+/plus/
- derive/\./dot/
- derive/@/at/
- derive/-/hyphen/
- derive / slash
# 派生无单个特殊字符的拼写
- derive/[.]//
- derive/[+]//
- derive/[@]//
- derive/[-]//
- derive/[_]//
# 派生无任何非字母数字字符的拼写
- derive/[^a-zA-Z0-9]//
# 禁用非英文、数字开头的编码
- erase/^[\W|_].+$/
# 当开启自动调频时,保证大写+小写开头的词排在全大写的前面
- derive/^([A-Z][A-Z]+)[a-z]+/$1/
# 全小写
- derive/^.+$/\L$0/
# 全大写
- derive/^.+$/\U$0/
# 首字母大写
- derive/^./\U$0/
# 前 2~10 个字母大写
- derive/^([a-z]{2})/\U$1/
- derive/^([a-z]{3})/\U$1/
- derive/^([a-z]{4})/\U$1/
- derive/^([a-z]{5})/\U$1/
- derive/^([a-z]{6})/\U$1/
- derive/^([a-z]{7})/\U$1/
- derive/^([a-z]{8})/\U$1/
- derive/^([a-z]{9})/\U$1/
- derive/^([a-z]{10})/\U$1/

translator:
dictionary: melt_eng
spelling_hints: 9
Expand All @@ -75,3 +117,108 @@ punctuator:

recognizer:
import_preset: default

algebra_rime_ice:
- derive/(?<!\d)1([1-9])(?!\d)/shi$1/
- derive/([1-9])0000(?!0)/$1wan/
- derive/([1-9])000(?!0)/$1qian/
- derive/([1-9])00(?!0)/$1bai/
- derive/([2-9])0(?!0)/$1shi/
- derive/(?<!\d)([2-9])([1-9])(?!\d)/$1shi$2/
- derive/\./dian/
- derive/10/shi/
- derive/0/ling/
- derive/1/yi/
- derive/2/er/
- derive/2/liang/
- derive/3/san/
- derive/4/si/
- derive/5/wu/
- derive/6/liu/
- derive/7/qi/
- derive/8/ba/
- derive/9/jiu/

algebra_flypy:
- derive/(?<!\d)1([1-9])(?!\d)/ui$1/
- derive/([1-9])0000(?!0)/$1wj/
- derive/([1-9])000(?!0)/$1qm/
- derive/([1-9])00(?!0)/$1bd/
- derive/([2-9])0(?!0)/$1ui/
- derive/(?<!\d)([2-9])([1-9])(?!\d)/$1ui$2/
- derive/\./dm/
- derive/10/ui/
- derive/0/lk/
- derive/1/yi/
- derive/2/er/
- derive/2/ll/
- derive/3/sj/
- derive/4/si/
- derive/5/wu/
- derive/6/lq/
- derive/7/qi/
- derive/8/ba/
- derive/9/jq/

algebra_mspy:
- derive/(?<!\d)1([1-9])(?!\d)/ui$1/
- derive/([1-9])0000(?!0)/$1wj/
- derive/([1-9])000(?!0)/$1qm/
- derive/([1-9])00(?!0)/$1bl/
- derive/([2-9])0(?!0)/$1ui/
- derive/(?<!\d)([2-9])([1-9])(?!\d)/$1ui$2/
- derive/\./dm/
- derive/10/ui/
- derive/0/l;/
- derive/1/yi/
- derive/2/er/
- derive/2/ld/
- derive/3/sj/
- derive/4/si/
- derive/5/wu/
- derive/6/lq/
- derive/7/qi/
- derive/8/ba/
- derive/9/jq/

algebra_ziguang:
- derive/(?<!\d)1([1-9])(?!\d)/ii$1/
- derive/([1-9])0000(?!0)/$1wr/
- derive/([1-9])000(?!0)/$1qf/
- derive/([1-9])00(?!0)/$1bp/
- derive/([2-9])0(?!0)/$1ii/
- derive/(?<!\d)([2-9])([1-9])(?!\d)/$1ii$2/
- derive/\./df/
- derive/10/ii/
- derive/0/l;/
- derive/1/yi/
- derive/2/er/
- derive/2/lg/
- derive/3/sr/
- derive/4/si/
- derive/5/wu/
- derive/6/lj/
- derive/7/qi/
- derive/8/ba/
- derive/9/jj/

algebra_double_pinyin:
- derive/(?<!\d)1([1-9])(?!\d)/ui$1/
- derive/([1-9])0000(?!0)/$1wj/
- derive/([1-9])000(?!0)/$1qm/
- derive/([1-9])00(?!0)/$1bl/
- derive/([2-9])0(?!0)/$1ui/
- derive/(?<!\d)([2-9])([1-9])(?!\d)/$1ui$2/
- derive/\./dm/
- derive/10/ui/
- derive/0/ly/
- derive/1/yi/
- derive/2/er/
- derive/2/ld/
- derive/3/sj/
- derive/4/si/
- derive/5/wu/
- derive/6/lq/
- derive/7/qi/
- derive/8/ba/
- derive/9/jq/

0 comments on commit 8472acc

Please sign in to comment.