This repository has been archived by the owner on Jul 16, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Borrowed from monodict copyright wak by apache-2.0 Signed-off-by: Hiroshi Miura <miurahr@linux.com>
- Loading branch information
Showing
10 changed files
with
1,322 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
package io.github.eb4j.ebview.dictionary; | ||
|
||
import io.github.eb4j.ebview.data.IDictionary; | ||
import io.github.eb4j.ebview.dictionary.pdic.PdicDictionary; | ||
|
||
import java.io.File; | ||
import java.io.IOException; | ||
import java.util.HashSet; | ||
import java.util.Set; | ||
|
||
public class PDic implements IDictionaryFactory { | ||
/** | ||
* Determine whether or not the supplied file is supported by this factory. | ||
* This is intended to be a lightweight check, e.g. looking for a file | ||
* extension. | ||
* | ||
* @param file The file to check | ||
* @return Whether or not the file is supported | ||
*/ | ||
@Override | ||
public boolean isSupportedFile(File file) { | ||
return file.getPath().endsWith(".DIC") || file.getPath().endsWith(".dic"); | ||
} | ||
|
||
/** | ||
* Load the given file and return an {@link IDictionary} that wraps it. | ||
* | ||
* @param file The file to load | ||
* @return An IDictionary file that can read articles from the file | ||
*/ | ||
@Override | ||
public Set<IDictionary> loadDict(File file) { | ||
Set<IDictionary> result = new HashSet<>(); | ||
try { | ||
IDictionary dictionary = new PdicDictionary(file); | ||
result.add(dictionary); | ||
} catch (IOException e) { | ||
e.printStackTrace(); | ||
} | ||
return result; | ||
} | ||
} |
111 changes: 111 additions & 0 deletions
111
src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicDictionary.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
package io.github.eb4j.ebview.dictionary.pdic; | ||
|
||
import io.github.eb4j.ebview.data.DictionaryEntry; | ||
import io.github.eb4j.ebview.data.IDictionary; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
import java.io.File; | ||
import java.io.FileInputStream; | ||
import java.io.IOException; | ||
import java.nio.ByteBuffer; | ||
import java.nio.channels.FileChannel; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
public class PdicDictionary implements IDictionary { | ||
|
||
static final Logger LOG = LoggerFactory.getLogger(PdicDictionary.class.getName()); | ||
|
||
private final File srcFile; | ||
private final String cachePath; | ||
private PdicInfo dicInfo; | ||
|
||
public PdicDictionary(final File file) throws IOException { | ||
this.srcFile = file; | ||
cachePath = file.getPath() + ".idx"; | ||
final int headerSize = 256; | ||
PdicHeader header; // ヘッダー | ||
|
||
ByteBuffer headerbuff = ByteBuffer.allocate(headerSize); | ||
try (FileInputStream srcStream = new FileInputStream(srcFile); | ||
FileChannel srcChannel = srcStream.getChannel()) { | ||
int len = srcChannel.read(headerbuff); | ||
srcChannel.close(); | ||
if (len == headerSize) { | ||
header = new PdicHeader(); | ||
if (header.load(headerbuff) != 0) { | ||
// Unicode辞書 かつ ver6以上のみ許容 | ||
if ((header.version & 0xFF00) < 0x0600 || header.os != 0x20) { | ||
LOG.warn("Unsupported dictionary version" + srcFile.getName()); | ||
throw new RuntimeException(); | ||
} | ||
dicInfo = new PdicInfo(srcFile, header.header_size + header.extheader, | ||
header.block_size * header.index_block, header.nindex2, header.index_blkbit, | ||
header.block_size); | ||
if (!dicInfo.readIndexBlock(cachePath)) { | ||
LOG.warn("Failed to load dictionary index of " + srcFile.getName()); | ||
throw new RuntimeException(); | ||
} | ||
dicInfo.SetDicName(file.getName()); | ||
} | ||
} | ||
} | ||
} | ||
|
||
@Override | ||
public String getDictionaryName() { | ||
return dicInfo.GetDicName(); | ||
} | ||
|
||
/** | ||
* Read article's text. | ||
* | ||
* @param word The word to look up in the dictionary | ||
* @return List of entries. May be empty, but cannot be null. | ||
*/ | ||
@Override | ||
public List<DictionaryEntry> readArticles(String word) { | ||
List<DictionaryEntry> lists = new ArrayList<>(); | ||
if (dicInfo.searchWord(word.toLowerCase())) { | ||
PdicResult result = dicInfo.getResult(); | ||
for (int i = 0; i < result.getCount(); i ++) { | ||
String disp = result.getDisp(i); | ||
if (disp.equals("")) { | ||
disp = result.getIndex(i); | ||
} | ||
StringBuilder sb = new StringBuilder(); | ||
String phone = result.getPhone(i); | ||
if (phone != null) { | ||
sb.append(phone).append(" / "); | ||
} | ||
sb.append(result.getTrans(i)).append("<br/>"); | ||
String sample = result.getSample(i); | ||
if (sample != null) { | ||
sb.append(sample); | ||
} | ||
lists.add(new DictionaryEntry(disp, sb.toString(), getDictionaryName())); | ||
} | ||
} | ||
return lists; | ||
} | ||
|
||
/** | ||
* Read article's text. Matching is predictive, so e.g. supplying "term" | ||
* will return articles for "term", "terminology", "termite", etc. | ||
* | ||
* @param word The word to look up in the dictionary | ||
* @return List of entries. May be empty, but cannot be null. | ||
*/ | ||
@Override | ||
public List<DictionaryEntry> readArticlesPredictive(String word) { | ||
return readArticles(word); | ||
} | ||
|
||
/** | ||
* Dispose IDictionary. Default is no action. | ||
*/ | ||
@Override | ||
public void close() throws IOException { | ||
} | ||
} |
18 changes: 18 additions & 0 deletions
18
src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicElement.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
/** | ||
* Copyright (C) 2014 wak (Apache-2.0) | ||
*/ | ||
package io.github.eb4j.ebview.dictionary.pdic; | ||
|
||
final class PdicElement { | ||
public byte mAttr = 0; | ||
public String mIndex = null; | ||
public String mDisp = null; | ||
public String mTrans = null; | ||
public String mSample = null; | ||
public String mPhone = null; | ||
|
||
public PdicElement() { | ||
} | ||
|
||
} | ||
|
156 changes: 156 additions & 0 deletions
156
src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicHeader.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
/** | ||
* Copyright (C) 2014 wak (Apache-2.0) | ||
*/ | ||
package io.github.eb4j.ebview.dictionary.pdic; | ||
|
||
import java.nio.ByteBuffer; | ||
import java.nio.ByteOrder; | ||
import java.nio.charset.Charset; | ||
|
||
final class PdicHeader { | ||
private final int L_HEADERNAME = 100; // ヘッダー部文字列長 | ||
private final int L_DICTITLE = 40; // 辞書タイトル名長 | ||
|
||
public String headername; // 辞書ヘッダータイトル | ||
public String dictitle; // 辞書名 | ||
public short version; // 辞書のバージョン | ||
public short lword; // 見出語の最大長 | ||
public short ljapa; // 訳語の最大長 | ||
public short block_size; // (256 ) 1ブロックのバイト数 固定 | ||
public short index_block; // インデックスブロック数 | ||
public short header_size; // ヘッダーのバイト数 | ||
public short index_size; // ( ) インデックスのバイト数 未使用 | ||
|
||
public short empty_block; // 空きブロックの先頭物理ブロック番号(ないときは-1 ) | ||
public short nindex; // ( ) インデックスの要素の数 未使用 | ||
public short nblock; // ( ) 使用データブロック数 未使用 | ||
public int nword; // 登録単語数 | ||
|
||
public byte dicorder; // 辞書の順番 | ||
public byte dictype; // 辞書の種別 | ||
|
||
public byte attrlen; // 単語属性の長さ | ||
public byte os; // OS | ||
public int olenumber; // OLE 用シリアル番号 | ||
public short lid_word; // ID 見出語言語 | ||
|
||
public short lid_japa; // ID 訳語部言語 | ||
public short lid_exp; // ID 用例部言語 | ||
public short lid_pron; // ID 発音記号言語 | ||
public short lid_other; // ID その他言語 | ||
public boolean index_blkbit; // false:16bit, true:32bit | ||
public int extheader; // 拡張ヘッダーサイズ | ||
public int empty_block2; // 空きブロック先頭物理ブロック番号 | ||
public int nindex2; // インデックス要素の数 | ||
public int nblock2; // 使用データブロック数 | ||
|
||
public int update_count; // 辞書更新回数 | ||
public String dicident; // 辞書識別子 | ||
|
||
/** | ||
* コンストラクタ. | ||
*/ | ||
public PdicHeader() { | ||
} | ||
|
||
/** | ||
* @param header_block ヘッダーデータ部分 | ||
* @return 辞書バージョン | ||
*/ | ||
public int load(ByteBuffer header_block) throws RuntimeException { | ||
int ret = 0; | ||
Charset sjisset = Charset.forName("X-SJIS"); | ||
|
||
byte[] headernamebuff = new byte[L_HEADERNAME]; | ||
byte[] dictitlebuff = new byte[L_DICTITLE]; | ||
|
||
header_block.flip(); | ||
header_block.order(ByteOrder.LITTLE_ENDIAN); | ||
header_block.get(headernamebuff); | ||
headername = sjisset.decode(ByteBuffer.wrap(headernamebuff)) | ||
.toString(); | ||
header_block.get(dictitlebuff); | ||
dictitle = sjisset.decode(ByteBuffer.wrap(dictitlebuff)).toString(); | ||
version = header_block.getShort(); | ||
if ((version & 0xFF00) == 0x0500 || (version & 0xFF00) == 0x0600) { | ||
lword = header_block.getShort(); | ||
ljapa = header_block.getShort(); | ||
|
||
block_size = header_block.getShort(); | ||
index_block = header_block.getShort(); | ||
header_size = header_block.getShort(); | ||
index_size = header_block.getShort(); | ||
empty_block = header_block.getShort(); | ||
nindex = header_block.getShort(); | ||
nblock = header_block.getShort(); | ||
|
||
nword = header_block.getInt(); | ||
|
||
dicorder = header_block.get(); | ||
dictype = header_block.get(); | ||
attrlen = header_block.get(); | ||
os = header_block.get(); | ||
|
||
olenumber = header_block.getInt(); | ||
lid_word = header_block.getShort(); | ||
|
||
lid_japa = header_block.getShort(); | ||
lid_exp = header_block.getShort(); | ||
lid_pron = header_block.getShort(); | ||
lid_other = header_block.getShort(); | ||
index_blkbit = (header_block.get() != 0); | ||
header_block.get(); // dummy0 | ||
extheader = header_block.getInt(); | ||
empty_block2 = header_block.getInt(); | ||
nindex2 = header_block.getInt(); | ||
nblock2 = header_block.getInt(); | ||
|
||
// 固定部分チェック | ||
if (attrlen == 1) { | ||
ret = version >> 8; | ||
} | ||
} else if ((version & 0xFF00) == 0x0400) { | ||
|
||
lword = header_block.getShort(); | ||
ljapa = header_block.getShort(); | ||
|
||
block_size = header_block.getShort(); | ||
index_block = header_block.getShort(); | ||
header_size = header_block.getShort(); | ||
index_size = header_block.getShort(); | ||
empty_block = header_block.getShort(); | ||
nindex = header_block.getShort(); | ||
nblock = header_block.getShort(); | ||
|
||
nword = header_block.getInt(); | ||
|
||
dicorder = header_block.get(); | ||
dictype = header_block.get(); | ||
attrlen = header_block.get(); | ||
|
||
olenumber = header_block.getInt(); | ||
os = header_block.get(); | ||
|
||
lid_word = header_block.getShort(); | ||
lid_japa = header_block.getShort(); | ||
lid_exp = header_block.getShort(); | ||
lid_pron = header_block.getShort(); | ||
lid_other = header_block.getShort(); | ||
extheader = header_block.getInt(); | ||
empty_block2 = header_block.getInt(); | ||
nindex2 = header_block.getInt(); | ||
nblock2 = header_block.getInt(); | ||
index_blkbit = (header_block.get() != 0); | ||
// 固定部分チェック | ||
if (block_size == 0x100 && | ||
header_size == 0x100 && | ||
attrlen == 1) { | ||
ret = version >> 8; | ||
} | ||
} else { | ||
throw new RuntimeException("Unsupported format"); | ||
} | ||
return ret; | ||
} | ||
|
||
} |
Oops, something went wrong.