-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhananeko.js
132 lines (123 loc) · 3.8 KB
/
hananeko.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
const fs = require("fs");
var tiger = fs.readFileSync("tiger-ja.txt", "utf-8");
tiger = tiger.split("\r\n");
tiger.pop(0);
let dict = new Map();
tiger.map(x => {
let [code, word, ...rest] = x.trim().split("\t");
word = word.split("#")[0];
const dictItem = dict.get(word);
dictItem ? dictItem.push(code) : dict.set(word, [code]);
});
console.info(dict)
delete tiger;
let lastcode,
hzxr = "";
function lookup(word, w = 1) {
const dictItem = dict.get(word);
if (dictItem && dictItem.length >= w)
return dictItem[w - 1];
return word;
}
function parse(input) {
// parse '
let _input = input.split("'");
if (_input != [input]) {
// 奇偶交替
let is_raw = true;
_input = _input.map(part => {
is_raw = !is_raw;
return is_raw ? part : up(part);
});
let res = _input.join("");
return res;
} else return input;
}
let xparse = input => [parse(input), lastcode, lookupAll(lastcode, true)];
function lookupAll(code, formatting) {
if (code) code = lastcode;
let results = dict.get(code);
if (!results) results = [""];
console.log(results);
return formatting ? results.join(" / ") : results;
}
function up(input) {
input = input + "␃";
let chars = input.split("");
let stack = "";
let output = [];
function dealWithStack() {
console.log("==" + stack);
if (!stack) return;
lastcode = stack;
output.push(
/[0-9]$/.test(stack) ?
lookup(stack.slice(0, -1), stack.charAt(stack.length - 1)) : lookup(stack));
stack = "";
}
const punctuations = (",./?<>~!'[{]}" + '"\\').split("");
let allowMoreSpaces = false;
let i = chars.length;
while (i > 0) {
i = i - 1;
var char = chars.shift();
let puncflag = false;
console.log({ char, stack });
if ((char === " " && allowMoreSpaces) || char === "␃") {
if (stack) dealWithStack();
} else if (char === " " && !allowMoreSpaces) {
console.info('空格', { stack })
stack ? dealWithStack() : output.push(" ");
} else if (/^[0-9]$/.test(char)) {
console.info("带数字的长度 < 4 的码", { char, stack });
if (stack) {
stack += char;
dealWithStack();
} else {
output += char;
}
} else if (/* puncflag!= false && */ punctuations.indexOf(char) != -1) {
/* if (puncflag === 1) stack = "";
else */ dealWithStack();
switch (char) {
case ".":
output.push("。");
break;
case ",":
output.push(",");
break;
case "!":
output.push("!");
break;
case "?":
output.push("?");
break;
}
stack = "";
}
else if (stack.length == 4) {
console.log("stack length is 4", { char, stack })
if (/^[0-9]$/.test(char)) {
console.log({ char }, "is ^[0-9]$")
stack += char;
dealWithStack();
}
else {
dealWithStack();
stack = char;
}
let puncflag = 1;
} else if (/[a-z]/.test(char)) {
console.log("char is [a-z]", { char, stack });
stack += char;
} else if (/[A-Z]/.test(char)) {
stack ? dealWithStack() : output.push(char);
} else {
puncflag = 2;
}
//else if (chars === []) dealWithStack();
}
//if (stack) dealWithStack();
return output.join("");
}
module.exports = { up, parse, xparse };