Skip to content
This repository has been archived by the owner on Jul 16, 2022. It is now read-only.

Commit

Permalink
Merge pull request #35 from eb4j/topic-support-pdic
Browse files Browse the repository at this point in the history
Support PDIC/Unicode format
  • Loading branch information
miurahr authored Sep 28, 2021
2 parents 672405c + 15643bd commit 6bc8002
Show file tree
Hide file tree
Showing 10 changed files with 1,294 additions and 2 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Very simple dictionary search application supporting;
- LingvoDSL dz compression (.dsl.dz)
- StarDict (.ifo .dict)
- StarDict (.dict.dz)
- PDIC

![Application image](https://raw.githubusercontent.com/eb4j/ebviewer/main/docs/img/screen_image.png)

Expand Down Expand Up @@ -81,6 +82,8 @@ Copyright (C) 2015-2020 Aaron Madlon-Kay

Copyright (C) 2007-2015 Didier Briel

Copyright (C) 2014 wak

Copyright (C) 2008-2010 Alex Buloichik

Copyright (C) 2007 Zoltan Bartko
Expand Down
1 change: 1 addition & 0 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ dependencies {
implementation("commons-io:commons-io:2.11.0")
implementation("org.apache.commons:commons-lang3:3.12.0")
implementation("tokyo.northside:url-protocol-handler:0.1.4")
implementation("com.ibm.icu:icu4j-charset:69.1")

implementation("io.github.dictzip:dictzip:0.9.5")
implementation("com.github.takawitter:trie4j:0.9.8")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,13 @@ public class DictionariesManager {

protected final List<IDictionaryFactory> factories = new ArrayList<>();
protected final List<IDictionary> dictionaries = new ArrayList<>();
private Stemmer stemmer;
private final Stemmer stemmer;

public DictionariesManager() {
factories.add(new EPWING());
factories.add(new LingvoDSL());
factories.add(new StarDict());
factories.add(new PDic());
stemmer = new Stemmer();
}

Expand Down Expand Up @@ -94,7 +95,9 @@ public List<DictionaryEntry> findWord(final String word) {
if (result.size() == 0) {
String[] stemmed = stemmer.doStem(word);
if (stemmed.length > 1) {
result = dictionaries.stream().flatMap(dict -> doPredictiveLookup(dict, stemmed[0]).stream()).collect(Collectors.toList());
result = dictionaries.stream()
.flatMap(dict -> doPredictiveLookup(dict, stemmed[0]).stream())
.collect(Collectors.toList());
}
}
return result;
Expand Down
45 changes: 45 additions & 0 deletions src/main/java/io/github/eb4j/ebview/dictionary/PDic.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package io.github.eb4j.ebview.dictionary;

import io.github.eb4j.ebview.data.IDictionary;
import io.github.eb4j.ebview.dictionary.pdic.PdicDictionary;

import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;

/**
* @author Hiroshi Miura
*/
public class PDic implements IDictionaryFactory {
/**
* Determine whether or not the supplied file is supported by this factory.
* This is intended to be a lightweight check, e.g. looking for a file
* extension.
*
* @param file The file to check
* @return Whether or not the file is supported
*/
@Override
public boolean isSupportedFile(final File file) {
return file.getPath().endsWith(".DIC") || file.getPath().endsWith(".dic");
}

/**
* Load the given file and return an {@link IDictionary} that wraps it.
*
* @param file The file to load
* @return An IDictionary file that can read articles from the file
*/
@Override
public Set<IDictionary> loadDict(final File file) {
Set<IDictionary> result = new HashSet<>();
try {
IDictionary dictionary = new PdicDictionary(file);
result.add(dictionary);
} catch (IOException e) {
e.printStackTrace();
}
return result;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
package io.github.eb4j.ebview.dictionary.pdic;

import io.github.eb4j.ebview.data.DictionaryEntry;
import io.github.eb4j.ebview.data.IDictionary;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.List;

/**
* @author wak (Apache-2.0)
* @author Hiroshi Miura
*/
public class PdicDictionary implements IDictionary {

static final Logger LOG = LoggerFactory.getLogger(PdicDictionary.class.getName());

private final File srcFile;
private final String cachePath;
private PdicInfo dicInfo;

public PdicDictionary(final File file) throws IOException {
this.srcFile = file;
cachePath = file.getPath() + ".idx";
final int headerSize = 256;
PdicHeader header; // ヘッダー

ByteBuffer headerbuff = ByteBuffer.allocate(headerSize);
try (FileInputStream srcStream = new FileInputStream(srcFile);
FileChannel srcChannel = srcStream.getChannel()) {
int len = srcChannel.read(headerbuff);
srcChannel.close();
if (len == headerSize) {
header = new PdicHeader();
if (header.load(headerbuff) != 0) {
// Unicode辞書 かつ ver6以上のみ許容
if ((header.version & 0xFF00) < 0x0600 || header.os != 0x20) {
LOG.warn("Unsupported dictionary version" + srcFile.getName());
throw new RuntimeException();
}
dicInfo = new PdicInfo(srcFile, header.header_size + header.extheader,
header.block_size * header.index_block, header.nindex2, header.index_blkbit,
header.block_size);
if (!dicInfo.readIndexBlock(cachePath)) {
LOG.warn("Failed to load dictionary index of " + srcFile.getName());
throw new RuntimeException();
}
dicInfo.setDicName(file.getName());
}
}
}
}

@Override
public String getDictionaryName() {
return dicInfo.getDicName();
}

/**
* Read article's text.
*
* @param word The word to look up in the dictionary
* @return List of entries. May be empty, but cannot be null.
*/
@Override
public List<DictionaryEntry> readArticles(final String word) {
List<DictionaryEntry> lists = new ArrayList<>();
if (dicInfo.searchWord(word.toLowerCase())) {
PdicResult result = dicInfo.getResult();
for (int i = 0; i < result.getCount(); i++) {
String disp = result.getDisp(i);
if (disp.equals("")) {
disp = result.getIndex(i);
}
StringBuilder sb = new StringBuilder();
String phone = result.getPhone(i);
if (phone != null) {
sb.append(phone).append(" / ");
}
sb.append(result.getTrans(i)).append("<br/>");
String sample = result.getSample(i);
if (sample != null) {
sb.append(sample);
}
lists.add(new DictionaryEntry(disp, sb.toString(), getDictionaryName()));
}
}
return lists;
}

/**
* Read article's text. Matching is predictive, so e.g. supplying "term"
* will return articles for "term", "terminology", "termite", etc.
*
* @param word The word to look up in the dictionary
* @return List of entries. May be empty, but cannot be null.
*/
@Override
public List<DictionaryEntry> readArticlesPredictive(final String word) {
return readArticles(word);
}

/**
* Dispose IDictionary. Default is no action.
*/
@Override
public void close() {
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package io.github.eb4j.ebview.dictionary.pdic;

/**
* @author wak (Apache-2.0)
* @author Hiroshi Miura
*/
@SuppressWarnings("visibilitymodifier")
final class PdicElement {
public byte mAttr = 0;
public String mIndex = null;
public String mDisp = null;
public String mTrans = null;
public String mSample = null;
public String mPhone = null;

PdicElement() {
}

}

111 changes: 111 additions & 0 deletions src/main/java/io/github/eb4j/ebview/dictionary/pdic/PdicHeader.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
package io.github.eb4j.ebview.dictionary.pdic;

import java.nio.ByteBuffer;
import java.nio.ByteOrder;

/**
* @author wak (Apache-2.0)
* @author Hiroshi Miura
*/
@SuppressWarnings({"visibilitymodifier", "membername"})
final class PdicHeader {
private static final int L_HEADERNAME = 100; // ヘッダー部文字列長
private static final int L_DICTITLE = 40; // 辞書タイトル名長

// public String headername; // 辞書ヘッダータイトル
// public String dictitle; // 辞書名
public short version; // 辞書のバージョン
// public short lword; // 見出語の最大長
// public short ljapa; // 訳語の最大長
public short block_size; // (256 ) 1ブロックのバイト数 固定
public short index_block; // インデックスブロック数
public short header_size; // ヘッダーのバイト数
// public short index_size; // ( ) インデックスのバイト数 未使用

// public short nindex; // ( ) インデックスの要素の数 未使用
// public short nblock; // ( ) 使用データブロック数 未使用
// public int nword; // 登録単語数

// public byte dicorder; // 辞書の順番
// public byte dictype; // 辞書の種別

public byte attrlen; // 単語属性の長さ
public byte os; // OS
public boolean index_blkbit; // false:16bit, true:32bit
public int extheader; // 拡張ヘッダーサイズ
public int nindex2; // インデックス要素の数
// public int nblock2; // 使用データブロック数

// public int update_count; // 辞書更新回数
// public String dicident; // 辞書識別子

/**
* コンストラクタ.
*/
PdicHeader() {
}

/**
* @param header_block ヘッダーデータ部分
* @return 辞書バージョン
*/
public int load(final ByteBuffer header_block) throws RuntimeException {
int ret = 0;
// Charset sjisset = Charset.forName("X-SJIS");

byte[] headernamebuff = new byte[L_HEADERNAME];
byte[] dictitlebuff = new byte[L_DICTITLE];

header_block.flip();
header_block.order(ByteOrder.LITTLE_ENDIAN);
header_block.get(headernamebuff);
// headername = sjisset.decode(ByteBuffer.wrap(headernamebuff)).toString();
header_block.get(dictitlebuff);
// dictitle = sjisset.decode(ByteBuffer.wrap(dictitlebuff)).toString();
version = header_block.getShort();
if ((version & 0xFF00) == 0x0500 || (version & 0xFF00) == 0x0600) {
header_block.getShort(); // lword
header_block.getShort(); // ljapa

block_size = header_block.getShort();
index_block = header_block.getShort();
header_size = header_block.getShort();
header_block.getShort(); // index_size
header_block.getShort(); // empty_block
header_block.getShort(); // nindex
header_block.getShort(); // nblock

header_block.getInt(); // nword

header_block.get(); // dicorder
header_block.get(); // dictype
attrlen = header_block.get();
os = header_block.get();

header_block.getInt(); // ole_number

// lid_dummy
header_block.getShort();
header_block.getShort();
header_block.getShort();
header_block.getShort();
header_block.getShort();

index_blkbit = (header_block.get() != 0);
header_block.get(); // dummy0
extheader = header_block.getInt();
header_block.getInt(); //empty_block2
nindex2 = header_block.getInt();
header_block.getInt(); // nblock2

// 固定部分チェック
if (attrlen == 1) {
ret = version >> 8;
}
} else {
throw new RuntimeException("Unsupported format");
}
return ret;
}

}
Loading

0 comments on commit 6bc8002

Please sign in to comment.