Skip to content

Commit

Permalink
4.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
jimichan committed Jul 7, 2021
1 parent 6647fc3 commit 5333e87
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 19 deletions.
2 changes: 1 addition & 1 deletion README.adoc
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
= Mynlp: 高性能、可扩展的中文NLP工具包
:version: 3.3.0
:version: 4.0.0
:icons: font

image:https://img.shields.io/github/license/mayabot/mynlp.svg[]
Expand Down
5 changes: 2 additions & 3 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ plugins {

description = "mynlp是mayabot开源的中文自然语言处理工具集"

val buildVersion = "4.0.0-beta16"
val buildVersion = "4.0.0"
//val buildVersion = "4.0.0-local"
val snapShot = false

Expand Down Expand Up @@ -154,9 +154,8 @@ subprojects {
quiet()
charset("UTF-8")
}


}

tasks.withType<JavaCompile>{
options.encoding = "UTF-8"
options.compilerArgs = options.compilerArgs + listOf("-Xdoclint:none", "-Xlint:none", "-nowarn")
Expand Down
54 changes: 44 additions & 10 deletions mynlp/src/main/java/com/mayabot/nlp/module/pinyin/PinyinResult.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
Expand Down Expand Up @@ -98,6 +97,39 @@ public String asString() {

private static Map<String, String> fuzzyMap = fmap();

private static Map<String, String> fuzzyPinyinMap = fzMap();

public static Map<String, String> fzMap() {
Map<String, String> map = new HashMap<>();

for (Pinyin value : Pinyin.values()) {
String sm = value.getShengmu().toString();
String ym = value.getYunmu().toString();

if (sm.equals("zh")) {
sm = "z";
} else if (sm.equals("ch")) {
sm = "c";
} else if (sm.equals("sh")) {
sm = "s";
}

if (ym.equals("eng")) {
ym = "en";
} else if (ym.equals("ang")) {
ym = "an";
} else if (ym.equals("ing")) {
ym = "in";
} else if (ym.equals("iang")) {
ym = "ian";
} else if (ym.equals("uang")) {
ym = "uan";
}
map.put(value.getPinyinWithoutTone(), sm + ym);
}
return map;
}

private static Map<String, String> fmap() {
HashMap<String, String> map = new HashMap<>();
map.put("zh", "z");
Expand All @@ -112,6 +144,7 @@ private static Map<String, String> fmap() {
}



public List<String> asList() {
List<String> list = Lists.newArrayListWithCapacity(pinyinList.size());
int i = 0;
Expand All @@ -138,16 +171,17 @@ public List<String> asList() {

} else {
String withoutTone = pinyin.getPinyinWithoutTone();

if (fuzzy) {
Matcher matcher = pattern.matcher(withoutTone);
StringBuffer sb = new StringBuffer();
if (matcher.find()) {
String part = matcher.group();
matcher.appendReplacement(sb, fuzzyMap.get(part));
}
matcher.appendTail(sb);
list.add(sb.toString());

list.add(fuzzyPinyinMap.getOrDefault(withoutTone, withoutTone));
// Matcher matcher = pattern.matcher(withoutTone);
// StringBuffer sb = new StringBuffer();
// if (matcher.find()) {
// String part = matcher.group();
// matcher.appendReplacement(sb, fuzzyMap.get(part));
// }
// matcher.appendTail(sb);
// list.add(sb.toString());
} else {
list.add(withoutTone);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ public CustomDictFillSubword(CustomDictionary dictionary) {
@Override
public void fill(@NotNull Wordnet wordnet, @NotNull Wordpath wordPath) {
DoubleArrayTrieStringIntMap trie = dictionary.getTrie();
if (trie == null) {
return;
}
char[] text = wordnet.getCharArray();
DoubleArrayTrieStringIntMap.DATMapMatcherInt searcher = trie.match(text, 0);

Expand Down
9 changes: 9 additions & 0 deletions mynlp/src/test/java/com/mayabot/nlp/pinyin/PinyinTest.kt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package com.mayabot.nlp.pinyin

import com.mayabot.nlp.Mynlp
import com.mayabot.nlp.Mynlp.Companion.instance
import org.junit.Assert
import org.junit.Test

Expand All @@ -11,5 +12,13 @@ class PinyinTest {
Assert.assertEquals("[zhao, zhao, mu, mu]", "朝朝暮暮".py())
}

@Test
fun test2() {
println(
instance().convertPinyin("转战")
.fuzzy(true).asList()
)
}

private fun String.py() = Mynlp.instance().convertPinyin(this).asList().toString()
}
10 changes: 5 additions & 5 deletions mynlp/src/test/java/com/mayabot/nlp/segment/CustomDictTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,18 @@ class CustomDictTest {
mem.addWord("固收");
mem.rebuild()

mem.clear()

mem.addWord("固收");
mem.rebuild()

val lexer = Lexers.coreBuilder()
.withCustomDictionary(mem)
.collector()
.smartPickup()
.fillSubwordCustomDict(mem)
.done()
.build()
println("-----")
for (wordTerm in lexer.scan("长江1号")) {
println(wordTerm)
}
println("-----")

println(lexer.scan("ECS固收"))
println("----")
Expand Down

0 comments on commit 5333e87

Please sign in to comment.