-
Notifications
You must be signed in to change notification settings - Fork 354
/
utils_test.go
144 lines (130 loc) · 2.86 KB
/
utils_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
package sego
import (
"fmt"
"testing"
"github.com/issue9/assert"
)
/*
* 作者:张晓明 时间:18/6/14
*/
var (
strs = []Text{
Text("one"),
Text("two"),
Text("three"),
Text("four"),
Text("five"),
Text("six"),
Text("seven"),
Text("eight"),
Text("nine"),
Text("ten"),
}
)
func Test_textSliceToString(t *testing.T) {
a := textSliceToString(strs)
b := Join(strs)
assert.Equal(t, a, b)
}
func StringsJoin(b *testing.B) {
for i := 0; i < b.N; i++ {
Join(strs)
}
}
func TextSliceToString(b *testing.B) {
for i := 0; i < b.N; i++ {
textSliceToString(strs)
}
}
func Test_Benchmark(t *testing.T) {
fmt.Println("strings.Join:")
fmt.Println(testing.Benchmark(StringsJoin))
fmt.Println("textSliceToString")
fmt.Println(testing.Benchmark(TextSliceToString))
}
func Test_Token_TextEquals(t *testing.T) {
token := Token{
text: []Text{
[]byte("one"),
[]byte("two"),
},
}
assert.True(t, token.TextEquals("onetwo"))
}
func Test_Token_TextEquals_CN(t *testing.T) {
token := Token{
text: []Text{
[]byte("中国"),
[]byte("文字"),
},
}
assert.True(t, token.TextEquals("中国文字"))
}
func Test_Token_TextNotEquals(t *testing.T) {
token := Token{
text: []Text{
[]byte("one"),
[]byte("two"),
},
}
assert.False(t, token.TextEquals("one-two"))
}
func Test_Token_TextNotEquals_CN(t *testing.T) {
token := Token{
text: []Text{
[]byte("中国"),
[]byte("文字"),
},
}
assert.False(t, token.TextEquals("中国文字1"))
}
func Test_Token_TextNotEquals_CN_B(t *testing.T) {
token := Token{
text: []Text{
[]byte("中国"),
[]byte("文字"),
},
}
assert.False(t, token.TextEquals("中国文"))
}
func Test_Token_Split(t *testing.T) {
probMap := map[string]string{
"衣门襟": "拉链",
"品牌": "天奕",
"图案": "纯色 字母",
"颜色分类": "牛奶白 水粉色 湖水蓝 浅军绿 雅致灰",
"尺码": "大码XL 大码XXL 大码XXXL 大码XXXXL",
"组合形式": "单件",
"面料": "聚酯",
"领型": "连帽",
"服饰工艺": "立体裁剪",
"货号": "YZL-1806052",
"厚薄": "超薄",
"年份季节": "2018年夏季",
"通勤": "韩版",
"服装款式细节": "不对称",
"成分含量": "81%(含)-90%(含)",
"袖型": "常规",
"风格": "通勤",
"适用年龄": "18-24周岁",
"服装版型": "宽松",
"大码女装分类": "其它特大款式",
"衣长": "中长款",
"袖长": "长袖",
"穿着方式": "开衫",
}
word := "卫衣女宽松拉链外套开衫韩版"
var segmenter Segmenter
segmenter.LoadDictionary("dictionary.txt")
segments := segmenter.InternalSegment([]byte(word),true)
for _,s := range segments{
fmt.Println(s.token.Text())
}
for _, value := range probMap {
for _, s := range segments {
if s.Token().Text() == value {
fmt.Println("=",value)
}
}
}
}