From e5c984472d899e07ac482105f5e9c28620da088f Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Sun, 9 Jan 2022 12:40:16 +0900 Subject: [PATCH] Pdic: use pdic4j library@0.3.2 Signed-off-by: Hiroshi Miura --- build.gradle.kts | 2 +- .../github/eb4j/ebview/dictionary/PDic.java | 4 +- .../eb4j/ebview/dictionary/pdic/PdicDict.java | 95 +++ .../dictionary/pdic/PdicDictionary.java | 115 --- .../ebview/dictionary/pdic/PdicElement.java | 20 - .../ebview/dictionary/pdic/PdicHeader.java | 111 --- .../eb4j/ebview/dictionary/pdic/PdicInfo.java | 699 ------------------ .../ebview/dictionary/pdic/PdicInfoCache.java | 254 ------- .../ebview/dictionary/pdic/PdicResult.java | 41 - 9 files changed, 98 insertions(+), 1243 deletions(-) create mode 100644 src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicDict.java delete mode 100644 src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicDictionary.java delete mode 100644 src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicElement.java delete mode 100644 src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicHeader.java delete mode 100644 src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicInfo.java delete mode 100644 src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicInfoCache.java delete mode 100644 src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicResult.java diff --git a/build.gradle.kts b/build.gradle.kts index 88e00e7..df58cec 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -87,7 +87,7 @@ dependencies { implementation("tokyo.northside:url-protocol-handler:0.1.4") // for pdic - implementation("com.ibm.icu:icu4j-charset:70.1") + implementation("io.github.eb4j:pdic4j:0.3.2") // for stardict implementation("io.github.dictzip:dictzip:0.9.5") diff --git a/src/main/java/io/github/eb4j/ebview/dictionary/PDic.java b/src/main/java/io/github/eb4j/ebview/dictionary/PDic.java index c5423f5..3212e02 100644 --- a/src/main/java/io/github/eb4j/ebview/dictionary/PDic.java +++ b/src/main/java/io/github/eb4j/ebview/dictionary/PDic.java @@ -1,7 +1,7 @@ package io.github.eb4j.ebview.dictionary; import io.github.eb4j.ebview.data.IDictionary; -import io.github.eb4j.ebview.dictionary.pdic.PdicDictionary; +import io.github.eb4j.ebview.dictionary.pdic.PdicDict; import java.io.File; import java.io.IOException; @@ -35,7 +35,7 @@ public boolean isSupportedFile(final File file) { public Set loadDict(final File file) { Set result = new HashSet<>(); try { - IDictionary dictionary = new PdicDictionary(file); + IDictionary dictionary = new PdicDict(file); result.add(dictionary); } catch (IOException e) { e.printStackTrace(); diff --git a/src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicDict.java b/src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicDict.java new file mode 100644 index 0000000..9d1581b --- /dev/null +++ b/src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicDict.java @@ -0,0 +1,95 @@ +package io.github.eb4j.ebview.dictionary.pdic; + +import io.github.eb4j.ebview.data.DictionaryEntry; +import io.github.eb4j.ebview.data.IDictionary; +import io.github.eb4j.pdic.PdicDictionary; +import io.github.eb4j.pdic.PdicElement; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Locale; + +/** + * @author wak (Apache-2.0) + * @author Hiroshi Miura + */ +public class PdicDict implements IDictionary { + + private final PdicDictionary dict; + private final Locale sourceLocale; + private final String dictionaryName; + + /** + * Construct with .dic file. + * It create index cache file with name .dic.idx. + * + * @param file PDIC .dic file. + * @throws IOException when access error occurred. + */ + public PdicDict(final File file) throws IOException { + File cache = new File(file.getPath() + ".idx"); + sourceLocale = Locale.ROOT; + this.dict = PdicDictionary.loadDictionary(file, cache); + dictionaryName = file.getName(); + } + + @Override + public String getDictionaryName() { + return dictionaryName; + } + + /** + * Read article's text. + * + * @param word The word to look up in the dictionary + * @return List of entries. May be empty, but cannot be null. + */ + @Override + public List readArticles(final String word) throws IOException { + return makeDictionaryEntries(dict.getEntries(word.toLowerCase(sourceLocale))); + } + + /** + * Read article's text. Matching is predictive, so e.g. supplying "term" + * will return articles for "term", "terminology", "termite", etc. + * + * @param word The word to look up in the dictionary + * @return List of entries. May be empty, but cannot be null. + */ + @Override + public List readArticlesPredictive(final String word) throws IOException { + return makeDictionaryEntries(dict.getEntriesPredictive(word.toLowerCase(sourceLocale))); + } + + /** + * Dispose IDictionary. Default is no action. + */ + @Override + public void close() { + } + + private List makeDictionaryEntries(final List results) { + List lists = new ArrayList<>(); + for (PdicElement result : results) { + String word = result.getHeadWord(); + if (word.equals("")) { + word = result.getIndexWord(); + } + StringBuilder articleBuilder = new StringBuilder(); + String pronunciation = result.getPronunciation(); + if (pronunciation != null) { + articleBuilder.append(pronunciation).append(" / "); + } + articleBuilder.append(result.getTranslation()).append("
"); + String example = result.getExample(); + if (example != null) { + articleBuilder.append(example); + } + lists.add(new DictionaryEntry(word, articleBuilder.toString(), dictionaryName)); + } + return lists; + } + +} diff --git a/src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicDictionary.java b/src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicDictionary.java deleted file mode 100644 index f2e4882..0000000 --- a/src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicDictionary.java +++ /dev/null @@ -1,115 +0,0 @@ -package io.github.eb4j.ebview.dictionary.pdic; - -import io.github.eb4j.ebview.data.DictionaryEntry; -import io.github.eb4j.ebview.data.IDictionary; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.channels.FileChannel; -import java.util.ArrayList; -import java.util.List; - -/** - * @author wak (Apache-2.0) - * @author Hiroshi Miura - */ -public class PdicDictionary implements IDictionary { - - static final Logger LOG = LoggerFactory.getLogger(PdicDictionary.class.getName()); - - private final File srcFile; - private final String cachePath; - private PdicInfo dicInfo; - - public PdicDictionary(final File file) throws IOException { - this.srcFile = file; - cachePath = file.getPath() + ".idx"; - final int headerSize = 256; - PdicHeader header; // ヘッダー - - ByteBuffer headerbuff = ByteBuffer.allocate(headerSize); - try (FileInputStream srcStream = new FileInputStream(srcFile); - FileChannel srcChannel = srcStream.getChannel()) { - int len = srcChannel.read(headerbuff); - srcChannel.close(); - if (len == headerSize) { - header = new PdicHeader(); - if (header.load(headerbuff) != 0) { - // Unicode辞書 かつ ver5以上のみ許容 - if ((header.version & 0xFF00) < 0x0500 || header.os != 0x20) { - LOG.warn("Unsupported dictionary version" + srcFile.getName()); - throw new RuntimeException(); - } - dicInfo = new PdicInfo(srcFile, header.header_size + header.extheader, - header.block_size * header.index_block, header.nindex2, header.index_blkbit, - header.block_size); - if (!dicInfo.readIndexBlock(cachePath)) { - LOG.warn("Failed to load dictionary index of " + srcFile.getName()); - throw new RuntimeException(); - } - dicInfo.setDicName(file.getName()); - } - } - } - } - - @Override - public String getDictionaryName() { - return dicInfo.getDicName(); - } - - /** - * Read article's text. - * - * @param word The word to look up in the dictionary - * @return List of entries. May be empty, but cannot be null. - */ - @Override - public List readArticles(final String word) { - List lists = new ArrayList<>(); - if (dicInfo.searchWord(word.toLowerCase())) { - PdicResult result = dicInfo.getResult(); - for (int i = 0; i < result.getCount(); i++) { - String disp = result.getDisp(i); - if (disp.equals("")) { - disp = result.getIndex(i); - } - StringBuilder sb = new StringBuilder(); - String phone = result.getPhone(i); - if (phone != null) { - sb.append(phone).append(" / "); - } - sb.append(result.getTrans(i)).append("
"); - String sample = result.getSample(i); - if (sample != null) { - sb.append(sample); - } - lists.add(new DictionaryEntry(disp, sb.toString(), getDictionaryName())); - } - } - return lists; - } - - /** - * Read article's text. Matching is predictive, so e.g. supplying "term" - * will return articles for "term", "terminology", "termite", etc. - * - * @param word The word to look up in the dictionary - * @return List of entries. May be empty, but cannot be null. - */ - @Override - public List readArticlesPredictive(final String word) { - return readArticles(word); - } - - /** - * Dispose IDictionary. Default is no action. - */ - @Override - public void close() { - } -} diff --git a/src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicElement.java b/src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicElement.java deleted file mode 100644 index a688c16..0000000 --- a/src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicElement.java +++ /dev/null @@ -1,20 +0,0 @@ -package io.github.eb4j.ebview.dictionary.pdic; - -/** - * @author wak (Apache-2.0) - * @author Hiroshi Miura - */ -@SuppressWarnings("visibilitymodifier") -final class PdicElement { - public byte mAttr = 0; - public String mIndex = null; - public String mDisp = null; - public String mTrans = null; - public String mSample = null; - public String mPhone = null; - - PdicElement() { - } - -} - diff --git a/src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicHeader.java b/src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicHeader.java deleted file mode 100644 index cac72fc..0000000 --- a/src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicHeader.java +++ /dev/null @@ -1,111 +0,0 @@ -package io.github.eb4j.ebview.dictionary.pdic; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * @author wak (Apache-2.0) - * @author Hiroshi Miura - */ -@SuppressWarnings({"visibilitymodifier", "membername"}) -final class PdicHeader { - private static final int L_HEADERNAME = 100; // ヘッダー部文字列長 - private static final int L_DICTITLE = 40; // 辞書タイトル名長 - - // public String headername; // 辞書ヘッダータイトル - // public String dictitle; // 辞書名 - public short version; // 辞書のバージョン - // public short lword; // 見出語の最大長 - // public short ljapa; // 訳語の最大長 - public short block_size; // (256 ) 1ブロックのバイト数 固定 - public short index_block; // インデックスブロック数 - public short header_size; // ヘッダーのバイト数 - // public short index_size; // ( ) インデックスのバイト数 未使用 - - // public short nindex; // ( ) インデックスの要素の数 未使用 - // public short nblock; // ( ) 使用データブロック数 未使用 - // public int nword; // 登録単語数 - - // public byte dicorder; // 辞書の順番 - // public byte dictype; // 辞書の種別 - - public byte attrlen; // 単語属性の長さ - public byte os; // OS - public boolean index_blkbit; // false:16bit, true:32bit - public int extheader; // 拡張ヘッダーサイズ - public int nindex2; // インデックス要素の数 - // public int nblock2; // 使用データブロック数 - - // public int update_count; // 辞書更新回数 - // public String dicident; // 辞書識別子 - - /** - * コンストラクタ. - */ - PdicHeader() { - } - - /** - * @param headerBlock ヘッダーデータ部分 - * @return 辞書バージョン - */ - public int load(final ByteBuffer headerBlock) throws RuntimeException { - int ret = 0; - // Charset sjisset = Charset.forName("X-SJIS"); - - byte[] headernamebuff = new byte[L_HEADERNAME]; - byte[] dictitlebuff = new byte[L_DICTITLE]; - - headerBlock.flip(); - headerBlock.order(ByteOrder.LITTLE_ENDIAN); - headerBlock.get(headernamebuff); - // headername = sjisset.decode(ByteBuffer.wrap(headernamebuff)).toString(); - headerBlock.get(dictitlebuff); - // dictitle = sjisset.decode(ByteBuffer.wrap(dictitlebuff)).toString(); - version = headerBlock.getShort(); - if ((version & 0xFF00) == 0x0500 || (version & 0xFF00) == 0x0600) { - headerBlock.getShort(); // lword - headerBlock.getShort(); // ljapa - - block_size = headerBlock.getShort(); - index_block = headerBlock.getShort(); - header_size = headerBlock.getShort(); - headerBlock.getShort(); // index_size - headerBlock.getShort(); // empty_block - headerBlock.getShort(); // nindex - headerBlock.getShort(); // nblock - - headerBlock.getInt(); // nword - - headerBlock.get(); // dicorder - headerBlock.get(); // dictype - attrlen = headerBlock.get(); - os = headerBlock.get(); - - headerBlock.getInt(); // ole_number - - // lid_dummy - headerBlock.getShort(); - headerBlock.getShort(); - headerBlock.getShort(); - headerBlock.getShort(); - headerBlock.getShort(); - - index_blkbit = (headerBlock.get() != 0); - headerBlock.get(); // dummy0 - extheader = headerBlock.getInt(); - headerBlock.getInt(); //empty_block2 - nindex2 = headerBlock.getInt(); - headerBlock.getInt(); // nblock2 - - // 固定部分チェック - if (attrlen == 1) { - ret = version >> 8; - } - } else { - throw new RuntimeException("Unsupported format"); - } - return ret; - } - -} diff --git a/src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicInfo.java b/src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicInfo.java deleted file mode 100644 index d00d7d2..0000000 --- a/src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicInfo.java +++ /dev/null @@ -1,699 +0,0 @@ -package io.github.eb4j.ebview.dictionary.pdic; - -import com.ibm.icu.charset.CharsetICU; -import org.apache.commons.lang3.ArrayUtils; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.RandomAccessFile; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.CharBuffer; -import java.nio.charset.Charset; -import java.util.WeakHashMap; - -/** - * @author wak (Apache-2.0) - * @author Hiroshi Miura - */ -@SuppressWarnings("membername") -class PdicInfo { - protected File m_file; - protected int m_bodyptr; - protected PdicResult mSearchResult; - - protected int m_start; - protected int m_size; - protected int m_blockbits; - protected int m_nindex; - protected int m_blocksize; - protected boolean m_match; - protected int m_searchmax; // 最大検索件数 - protected String m_dicname; // 辞書名 - - protected int[] mIndexPtr; - - protected Charset mMainCharset; - protected Charset mPhoneCharset; - protected WeakHashMap mEncodeCache = new WeakHashMap<>(); - - protected AnalyzeBlock mAnalyze; - protected int mLastIndex = 0; - protected PdicInfoCache mPdicInfoCache; - - private RandomAccessFile mSrcStream = null; - - @SuppressWarnings("avoidinlineconditionals") - PdicInfo(final File file, final int start, final int size, final int nindex, final boolean blockbits, - final int blocksize) { - m_file = file; - m_start = start; - m_size = size; - m_nindex = nindex; - m_blockbits = (blockbits) ? 4 : 2; - m_blocksize = blocksize; - m_searchmax = 10; - - mSearchResult = new PdicResult(); - mPhoneCharset = CharsetICU.forNameICU("BOCU-1"); - mMainCharset = CharsetICU.forNameICU("BOCU-1"); - try { - mSrcStream = new RandomAccessFile(m_file, "r"); - mAnalyze = new AnalyzeBlock(); - mPdicInfoCache = new PdicInfoCache(mSrcStream, m_start, m_size); - } catch (FileNotFoundException ignored) { - } - } - - /** - * byte配列の本文文字列をCharBufferに変換する. - */ - static CharBuffer decodetoCharBuffer(final Charset cs, final byte[] array, final int pos, final int len) { - return cs.decode(ByteBuffer.wrap(array, pos, len)); - } - - /** - * 本文の文字列をByteBufferに変換する. - */ - static ByteBuffer encodetoByteBuffer(final Charset cs, final String str) { - return cs.encode(str); - } - - /** - * インデックス領域を検索. - * - * @return index of block - */ - public int searchIndexBlock(final String word) { - int min = 0; - int max = m_nindex - 1; - - ByteBuffer buffer = mEncodeCache.get(word); - if (buffer == null) { - buffer = encodetoByteBuffer(mMainCharset, word); - mEncodeCache.put(word, buffer); - } - int limit = buffer.limit(); - byte[] bytes = new byte[limit]; - System.arraycopy(buffer.array(), 0, bytes, 0, limit); - int wordlen = bytes.length; - - int[] indexPtr = mIndexPtr; - int blockbits = m_blockbits; - PdicInfoCache pdicInfoCache = mPdicInfoCache; - - for (int i = 0; i < 32; i++) { - if ((max - min) <= 1) { - return min; - } - final int look = (int) (((long) min + max) / 2); - final int len = indexPtr[look + 1] - indexPtr[look] - blockbits; - final int comp = pdicInfoCache.compare(bytes, 0, wordlen, indexPtr[look], len); - if (comp < 0) { - max = look; - } else if (comp > 0) { - min = look; - } else { - return look; - } - } - return min; - } - - /** - * Read index blocks. - * - * @return true when successfully read block, otherwise false. - */ - public boolean readIndexBlock(final String indexcache) { - if (mSrcStream != null) { - m_bodyptr = m_start + m_size; // 本体位置=( index開始位置+インデックスのサイズ) - if (indexcache != null) { - try (FileInputStream fis = new FileInputStream(indexcache)) { - byte[] buff = new byte[(m_nindex + 1) * 4]; - int readlen = fis.read(buff); - if (readlen == buff.length) { - final int indexlen = m_nindex; - final int[] indexptr = new int[m_nindex + 1]; - mIndexPtr = indexptr; - int ptr = 0; - for (int i = 0; i <= indexlen; i++) { - int b; - int dat; - b = buff[ptr++]; - b &= 0xFF; - dat = b; - b = buff[ptr++]; - b &= 0xFF; - dat |= (b << 8); - b = buff[ptr++]; - b &= 0xFF; - dat |= (b << 16); - b = buff[ptr++]; - b &= 0xFF; - dat |= (b << 24); - indexptr[i] = dat; - } - return true; - } - } catch (IOException ignored) { - } - } - - // インデックスの先頭から見出し語のポインタを拾っていく - final int nindex = m_nindex; - final int[] indexPtr = new int[nindex + 1]; // インデックスポインタの配列確保 - mIndexPtr = indexPtr; - if (mPdicInfoCache.createIndex(m_blockbits, nindex, indexPtr)) { - byte[] buff = new byte[indexPtr.length * 4]; - int p = 0; - for (int c = 0; c <= nindex; c++) { - int data = indexPtr[c]; - buff[p++] = (byte) (data & 0xFF); - data >>= 8; - buff[p++] = (byte) (data & 0xFF); - data >>= 8; - buff[p++] = (byte) (data & 0xFF); - data >>= 8; - buff[p++] = (byte) (data & 0xFF); - } - if (indexcache != null) { - try (FileOutputStream fos = new FileOutputStream(indexcache)) { - fos.write(buff, 0, buff.length); - } catch (IOException ignored) { - } - } - return true; - } - } - mIndexPtr = null; - return false; - } - - /** - * num個目の見出し語の実体が入っているブロック番号を返す. - */ - public int getBlockNo(final int num) { - int blkptr = mIndexPtr[num] - m_blockbits; - mLastIndex = num; - if (m_blockbits == 4) { - return mPdicInfoCache.getInt(blkptr); - } else { - return mPdicInfoCache.getShort(blkptr); - } - } - - /** - * 次の0までの長さを返す. - * - * @param array target byte array - * @param pos start position - * @return length of index. - */ - static int getLengthToNextZero(final byte[] array, final int pos) { - return ArrayUtils.indexOf(array, (byte) 0, pos) - pos; - // int len = 0; - // while (array[pos + len] != 0) - // len++; - // return len; - } - - boolean isMatch() { - return m_match; - } - - public String getFilename() { - return m_file.getName(); - } - - public int getSearchMax() { - return m_searchmax; - } - - public void setSearchMax(final int m) { - m_searchmax = m; - } - - public void setDicName(final String b) { - m_dicname = b; - } - - public String getDicName() { - return m_dicname; - } - - // 単語を検索する - public boolean searchWord(final String word) { - // 検索結果クリア - int cnt = 0; - mSearchResult.clear(); - - int ret = searchIndexBlock(word); - - boolean match = false; - - boolean searchret = false; - while (true) { - // 最終ブロックは超えない - if (ret < m_nindex) { - // 該当ブロック読み出し - int block = getBlockNo(ret++); - byte[] pblk = readBlockData(block); - if (pblk != null) { - mAnalyze.setBuffer(pblk); - mAnalyze.setSearch(word); - searchret = mAnalyze.searchWord(); - // 未発見でEOBの時のみもう一回、回る - if (!searchret && mAnalyze.mEob) { - continue; - } - } - } - // 基本一回で抜ける - break; - } - if (searchret) { - // 前方一致するものだけ結果に入れる - do { - PdicElement res = mAnalyze.getRecord(); - if (res == null) { - break; - } - // 完全一致するかチェック - if (res.mIndex.compareTo(word) == 0) { - match = true; - } - mSearchResult.add(res); - - cnt++; - // 取得最大件数超えたら打ち切り - } while (cnt < m_searchmax && hasMoreResult(true)); - } - return match; - } - - // 前方一致する単語の有無を返す - boolean searchPrefix(final String word) { - int ret = searchIndexBlock(word); - - for (int blk = 0; blk < 2; blk++) { - // 最終ブロックは超えない - if (ret + blk >= m_nindex) { - break; - } - int block = getBlockNo(ret + blk); - - // 該当ブロック読み出し - byte[] pblk = readBlockData(block); - - if (pblk != null) { - mAnalyze.setBuffer(pblk); - mAnalyze.setSearch(word); - - if (mAnalyze.searchWord()) { - return true; - } - } - } - return false; - } - - PdicResult getResult() { - return mSearchResult; - } - - public PdicResult getMoreResult() { - mSearchResult.clear(); - if (mAnalyze != null) { - int cnt = 0; - // 前方一致するものだけ結果に入れる - while (cnt < m_searchmax && hasMoreResult(true)) { - PdicElement res = mAnalyze.getRecord(); - if (res == null) { - break; - } - mSearchResult.add(res); - cnt++; - } - } - return mSearchResult; - } - - public boolean hasMoreResult(final boolean incrementptr) { - boolean result = mAnalyze.hasMoreResult(incrementptr); - if (!result) { - if (mAnalyze.isEob()) { // EOBなら次のブロック読み出し - int nextindex = mLastIndex + 1; - // 最終ブロックは超えない - if (nextindex < m_nindex) { - int block = getBlockNo(nextindex); - - // 該当ブロック読み出し - byte[] pblk = readBlockData(block); - - if (pblk != null) { - mAnalyze.setBuffer(pblk); - result = mAnalyze.hasMoreResult(incrementptr); - } - } - } - } - return result; - } - - /** - * データブロックを読み込み. - * - * @param blkno - * @return 読み込まれたデータブロック - */ - byte[] readBlockData(final int blkno) { - byte[] buff = new byte[0x200]; - byte[] pbuf = buff; - try { - mSrcStream.seek(m_bodyptr + (long) blkno * m_blocksize); - - // 1ブロック分読込(1セクタ分先読み) - if (mSrcStream.read(pbuf, 0, 0x200) < 0) { - return null; - } - - // 長さ取得 - int len = ((int) (pbuf[0])) & 0xFF; - len |= (((int) (pbuf[1])) & 0xFF) << 8; - - // ブロック長判定 - if ((len & 0x8000) != 0) { // 32bit - len &= 0x7FFF; - } - if (len > 0) { - // ブロック不足分読込 - if (len * m_blocksize > 0x200) { - pbuf = new byte[m_blocksize * len]; - System.arraycopy(buff, 0, pbuf, 0, 0x200); - if (mSrcStream.read(pbuf, 0x200, len * m_blocksize - 0x200) < 0) { - return null; - } - } - } else { - pbuf = null; - } - return pbuf; - } catch (IOException ignored) { - } - return null; - } - - final class AnalyzeBlock { - private byte[] mBuff; - private boolean mLongfield; - private byte[] mWord; - private int mFoundPtr = -1; - private int mNextPtr = -1; - private final byte[] mCompbuff = new byte[1024]; - private int mCompLen = 0; - private boolean mEob = false; - - AnalyzeBlock() { - } - - public void setBuffer(final byte[] buff) { - mBuff = buff; - mLongfield = ((buff[1] & 0x80) != 0); - ByteBuffer mBB = ByteBuffer.wrap(buff); - mBB.order(ByteOrder.LITTLE_ENDIAN); - mNextPtr = 2; - mEob = false; - mCompLen = 0; - } - - public void setSearch(final String word) { - ByteBuffer buffer = encodetoByteBuffer(mMainCharset, word); - mEncodeCache.put(word, buffer); - mWord = new byte[buffer.limit()]; - System.arraycopy(buffer.array(), 0, mWord, 0, buffer.limit()); - } - - public boolean isEob() { - return mEob; - } - - /** - * ブロックデータの中から指定語を探す. - */ - public boolean searchWord() { - final byte[] bytes = mWord; - final byte[] buff = mBuff; - final boolean longfield = mLongfield; - final byte[] compbuff = mCompbuff; - final int wordlen = bytes.length; - - mFoundPtr = -1; - - // 訳語データ読込 - int ptr = mNextPtr; - mNextPtr = -1; - while (true) { - int flen = 0; - int retptr = ptr; - int b; - - b = buff[ptr++]; - flen |= (b & 0xFF); - - b = buff[ptr++]; - b <<= 8; - flen |= (b & 0xFF00); - - if (longfield) { - b = buff[ptr++]; - b <<= 16; - flen |= (b & 0xFF0000); - - b = buff[ptr++]; - b <<= 24; - flen |= (b & 0x7F000000); - } - if (flen == 0) { - mEob = true; - break; - } - int qtr = ptr; - ptr += flen + 1; - ptr++; - - - // 圧縮長 - int complen = (int) buff[qtr++]; - complen &= 0xFF; - - // 見出し語属性 skip - qtr++; - - // 見出し語圧縮位置保存 - while ((compbuff[complen++] = buff[qtr++]) != 0) ; - - // 見出し語の方が短ければ不一致 - if (complen < wordlen) { - continue; - } - - - // 前方一致で比較 - boolean equal = true; - for (int i = 0; i < wordlen; i++) { - - if (compbuff[i] != bytes[i]) { - equal = false; - int cc = compbuff[i]; - cc &= 0xFF; - int cw = bytes[i]; - cw &= 0xFF; - // 超えてたら打ち切る - if (cc > cw) { - return false; - } - break; - } - } - if (equal) { - mFoundPtr = retptr; - mNextPtr = ptr; - mCompLen = complen - 1; - return true; - } - } - return false; - } - - /** - * 最後の検索結果の単語を返す. - * - * @return search result - */ - PdicElement getRecord() { - if (mFoundPtr == -1) { - return null; - } - final PdicElement res = new PdicElement(); - - res.mIndex = decodetoCharBuffer(mMainCharset, mCompbuff, 0, mCompLen).toString(); - // ver6対応 見出し語が、<検索インデックス><表示用文字列>の順に - // 設定されていてるので、分割する。 - // それ以前のverではdispに空文字列を保持させる。 - - final String indexstr = res.mIndex; - final int tab = indexstr.indexOf('\t'); - if (tab == -1) { - res.mDisp = ""; - } else { - res.mIndex = indexstr.substring(0, tab); - res.mDisp = indexstr.substring(tab + 1); - } - - final byte[] buff = mBuff; - final boolean longfield = mLongfield; - byte attr = 0; - - // 訳語データ読込 - int ptr = mFoundPtr; - - if (longfield) { - ptr += 4; - } else { - ptr += 2; - } - int qtr = ptr; - - // 圧縮長 - // int complen = buff[qtr++]; - // complen &= 0xFF; - qtr++; - - // 見出し語属性 skip - attr = buff[qtr++]; - - while (buff[qtr++] != 0) { - // 見出し語 skip - } - - // 訳語 - if ((attr & 0x10) != 0) { // 拡張属性ありの時 - int trnslen = getLengthToNextZero(buff, qtr); - res.mTrans = decodetoCharBuffer(mMainCharset, buff, qtr, trnslen).toString().replace("\r", ""); - qtr += trnslen; // 次のNULLまでスキップ - - // 拡張属性取得 - byte eatr; - while (((eatr = buff[qtr++]) & 0x80) == 0) { - if ((eatr & (0x10 | 0x40)) == 0) { // バイナリOFF&圧縮OFFの場合 - if ((eatr & 0x0F) == 0x01) { // 用例 - int len = getLengthToNextZero(buff, qtr); - res.mSample = decodetoCharBuffer(mMainCharset, buff, qtr, len).toString().replace("\r", ""); - qtr += len; // 次のNULLまでスキップ - } else if ((eatr & 0x0F) == 0x02) { // 発音 - int len = getLengthToNextZero(buff, qtr); - res.mPhone = decodetoCharBuffer(mPhoneCharset, buff, qtr, len).toString(); - qtr += len; // 次のNULLまでスキップ - } - } else { - // バイナリ属性か圧縮属性が来たら打ち切り - break; - } - } - } else { - // 残り全部が訳文 - res.mTrans = decodetoCharBuffer(mMainCharset, buff, qtr, mNextPtr - qtr).toString().replace("\r", ""); - } - return res; - } - - // 次の項目が検索語に前方一致するかチェックする - public boolean hasMoreResult(final boolean incrementptr) { - byte[] word; - final byte[] buff = mBuff; - final boolean longfield = mLongfield; - final byte[] compbuff = mCompbuff; - - // next search - if (mFoundPtr == -1) { - return false; - } - word = mWord; - - int wordlen = word.length; - - // 訳語データ読込 - int ptr = mNextPtr; - - int retptr = ptr; - int flen; - int b; - - b = buff[ptr++]; - flen = (b & 0xFF); - - b = buff[ptr++]; - b <<= 8; - flen |= (b & 0xFF00); - - if (longfield) { - b = buff[ptr++]; - b <<= 16; - flen |= (b & 0xFF0000); - - b = buff[ptr++]; - b <<= 24; - flen |= (b & 0x7F000000); - } - if (flen == 0) { - mEob = true; - return false; - } - int qtr = ptr; - ptr += flen + 1; - ptr++; - - // 圧縮長 - int complen = buff[qtr++]; - complen &= 0xFF; - - // 見出し語属性 skip - qtr++; - - // 見出し語圧縮位置保存 - while ((compbuff[complen++] = buff[qtr++]) != 0) ; - - // 見出し語の方が短ければ不一致 - if (complen < wordlen) { - return false; - } - - // 前方一致で比較 - boolean equal = true; - for (int i = 0; i < wordlen; i++) { - if (compbuff[i] != word[i]) { - equal = false; - int cc = compbuff[i]; - cc &= 0xFF; - int cw = word[i]; - cw &= 0xFF; - // 超えてたら打ち切る - if (cc > cw) { - return false; - } - break; - } - } - if (equal && incrementptr) { - mFoundPtr = retptr; - mNextPtr = ptr; - mCompLen = complen - 1; - } - return equal; - } - } -} diff --git a/src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicInfoCache.java b/src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicInfoCache.java deleted file mode 100644 index 5f4cd90..0000000 --- a/src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicInfoCache.java +++ /dev/null @@ -1,254 +0,0 @@ -package io.github.eb4j.ebview.dictionary.pdic; - -import com.ibm.icu.charset.CharsetICU; - -import java.io.IOException; -import java.io.RandomAccessFile; -import java.lang.ref.WeakReference; -import java.util.WeakHashMap; - -/** - * @author wak (Apache-2.0) - * @author Hiroshi Miura - */ -class PdicInfoCache { - private final boolean mFix; - private final int mBlockSize; - private final RandomAccessFile mFile; - private final int mStart; - private final int mSize; - private final WeakHashMap> mMap = new WeakHashMap<>(); - private byte[] mFixedBuffer; - - PdicInfoCache(final RandomAccessFile file, final int start, final int size) { - mFile = file; - mStart = start; - mSize = size; - if (mSize < 1024 * 512) { - mFix = true; - mBlockSize = mSize; - } else { - mFix = false; - mBlockSize = 1024; - } - } - - byte[] getSegment(final int segment) { - byte[] segmentdata = null; - - if (mFix) { - if (mFixedBuffer == null) { - mFixedBuffer = new byte[mSize]; - try { - mFile.seek(mStart); - if (mFile.read(mFixedBuffer, 0, mSize) >= 0) { - return mFixedBuffer; - } - } catch (IOException ignored) { - } - } - } - - WeakReference ref = mMap.get(segment); - if (ref != null) { - segmentdata = ref.get(); - } - if (segmentdata == null) { - segmentdata = new byte[mBlockSize]; - try { - mFile.seek(mStart + (long) segment * mBlockSize); - int len = mFile.read(segmentdata, 0, mBlockSize); - if (len == mBlockSize || len == mSize % mBlockSize) { - mMap.put(segment, new WeakReference<>(segmentdata)); - } else { - return null; - } - } catch (IOException e) { - return null; - } - } - return segmentdata; - } - - - public int getShort(final int ptr) { - int segment = ptr / mBlockSize; - int address = ptr % mBlockSize; - byte[] segmentdata = getSegment(segment++); - - int dat = 0; - if (segmentdata != null) { - int b = 0; - b = segmentdata[address++]; - b &= 0xFF; - dat |= b; - - if (address >= mBlockSize) { - address %= mBlockSize; - segmentdata = getSegment(segment); - } - b = segmentdata[address]; - b &= 0xFF; - dat |= (b << 8); - } - return dat; - } - - public int getInt(final int ptr) { - int segment = ptr / mBlockSize; - int address = ptr % mBlockSize; - byte[] segmentdata = getSegment(segment++); - - int dat = 0; - if (segmentdata != null) { - int b = 0; - b = segmentdata[address++]; - b &= 0xFF; - dat |= b; - if (address >= mBlockSize) { - address %= mBlockSize; - segmentdata = getSegment(segment++); - } - b = segmentdata[address++]; - b &= 0xFF; - dat |= (b << 8); - if (address >= mBlockSize) { - address %= mBlockSize; - segmentdata = getSegment(segment++); - } - b = segmentdata[address++]; - b &= 0xFF; - dat |= (b << 16); - if (address >= mBlockSize) { - address %= mBlockSize; - segmentdata = getSegment(segment); - } - b = segmentdata[address]; - b &= 0x7F; - dat |= (b << 24); - } - return dat; - } - - @SuppressWarnings("finalparameters") - private static int compareArrayAsUnsigned(byte[] aa, int pa, int la, byte[] ab, int pb, int lb) { - while (la-- > 0) { - short sa = aa[pa++]; - if (lb-- > 0) { - short sb = ab[pb++]; - if (sa != sb) { - sa &= 0xFF; - sb &= 0xFF; - return (sa - sb); - } - } else { - return 1; - } - } - if (lb > 0) { - short sb = ab[pb]; - if (sb == 0x09) { // 比較対象の'\t'は'\0'とみなす - return 0; - } - return -1; - } - return 0; - } - - /** - * - * @param aa - * @param pa - * @param la - * @param ptr - * @param len - * @return - */ - @SuppressWarnings("finalparameters") - public int compare(final byte[] aa, final int pa, final int la, final int ptr, final int len) { - int segment = ptr / mBlockSize; - int address = ptr % mBlockSize; - byte[] segmentdata = getSegment(segment++); - - if (segmentdata == null) { - return -1; - } - - if (len < 0) { - return 1; - } - - if (address + len < mBlockSize) { - PdicInfo.decodetoCharBuffer(CharsetICU.forNameICU("BOCU-1"), segmentdata, address, len); - return compareArrayAsUnsigned(aa, pa, la, segmentdata, address, len); - } else { - int lena = mBlockSize - address; - int leno = Math.min(la, lena); - int ret = compareArrayAsUnsigned(aa, pa, leno, segmentdata, address, lena); - PdicInfo.decodetoCharBuffer(CharsetICU.forNameICU("BOCU-1"), segmentdata, address, lena); - if (ret != 0) { - return ret; - } - if (la < lena) { - return -1; - } - address = 0; - segmentdata = getSegment(segment); - PdicInfo.decodetoCharBuffer(CharsetICU.forNameICU("BOCU-1"), segmentdata, address, len - lena); - return compareArrayAsUnsigned(aa, pa + lena, la - lena, segmentdata, address, len - lena); - } - } - - - /** - * Create index of words. - * @param blockbits - * @param nindex - * @param indexPtr - * @return true when success, otherwise false. - */ - public boolean createIndex(final int blockbits, final int nindex, final int[] indexPtr) { - // インデックスの先頭から見出し語のポインタを拾っていく - int blocksize = 64 * 1024; - int[] params = new int[]{0, 0, nindex, blocksize, blockbits, 1, 0}; - - boolean hasNext = true; - for (int i = 0; hasNext; i++) { - hasNext = countIndexWords(params, getSegment(i), indexPtr); - } - indexPtr[params[0]] = params[1] + blockbits; // ターミネータを入れておく - return true; - } - - private boolean countIndexWords(final int[] params, final byte[] buff, final int[] indexPtr) { - int curidx = params[0]; - int curptr = params[1]; - int max = params[2]; - int buffmax = params[3]; - int blockbits = params[4]; - int found = params[5]; - int ignore = params[6]; - - int i = 0; - - for (; i < buffmax && curidx < max; i++) { - if (ignore > 0) { - ignore--; - } else if (found != 0) { - int ptr = curptr + i + blockbits; // ブロック番号サイズポインタを進める - indexPtr[curidx++] = ptr; // 見出し語部分のポインタを保存 - ignore = blockbits - 1; - found = 0; - } else if (buff[i] == 0) { - found = 1; - } - } - - params[0] = curidx; - params[1] = curptr + i; - params[5] = found; - params[6] = ignore; - return curidx < max; - } - -} diff --git a/src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicResult.java b/src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicResult.java deleted file mode 100644 index 2db71a9..0000000 --- a/src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicResult.java +++ /dev/null @@ -1,41 +0,0 @@ -package io.github.eb4j.ebview.dictionary.pdic; - -import java.util.ArrayList; - -/** - * @author wak (Apache-2.0) - * @author Hiroshi Miura - */ -final class PdicResult extends ArrayList { - - private static final long serialVersionUID = -7784622190169021306L; - - public int getCount() { - return size(); - } - - public String getIndex(final int idx) { - return get(idx).mIndex; - } - - public String getDisp(final int idx) { - return get(idx).mDisp; - } - - public byte getAttr(final int idx) { - return get(idx).mAttr; - } - - public String getTrans(final int idx) { - return get(idx).mTrans; - } - - public String getPhone(final int idx) { - return get(idx).mPhone; - } - - public String getSample(final int idx) { - return get(idx).mSample; - } - -}