Skip to content

Commit

Permalink
Merge pull request #13 from neocl/dev
Browse files Browse the repository at this point in the history
Pump master version to 0.1a6
  • Loading branch information
letuananh authored Jan 8, 2020
2 parents e70124c + d94fd05 commit 6fa5464
Show file tree
Hide file tree
Showing 10 changed files with 224 additions and 5 deletions.
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ include LICENSE
include requirements*.txt
recursive-include jamdict/data/ *.sql
recursive-include jamdict/data/ *.json
recursive-include jamdict/data/ *.gz
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,5 @@ result = jmd.lookup('食べる')
...
[id#1358280] たべる (食べる) : 1. to eat ((Ichidan verb|transitive verb)) 2. to live on (e.g. a salary)/to live off/to subsist on
```
>>> for c in result.chars:
... print(repr(c))

See `jamdict_demo.py` and `jamdict/tools.py` for more information.
2 changes: 1 addition & 1 deletion jamdict/__version__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@
__url__ = "https://github.com/neocl/jamdict"
__maintainer__ = "Le Tuan Anh"
__version_major__ = "0.1"
__version__ = "{}a5".format(__version_major__)
__version__ = "{}a6".format(__version_major__)
__version_long__ = "{} - Alpha".format(__version_major__)
__status__ = "Prototype"
Binary file added jamdict/data/kradfile-u.gz
Binary file not shown.
Binary file added jamdict/data/radkfile.gz
Binary file not shown.
21 changes: 21 additions & 0 deletions jamdict/kanjidic2.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,17 @@
from lxml import etree

from chirptext import chio
from chirptext.sino import Radical as KangxiRadical

from .krad import KRad

# ------------------------------------------------------------------------------
# Configuration
# ------------------------------------------------------------------------------

krad = KRad()


def getLogger():
return logging.getLogger(__name__)

Expand Down Expand Up @@ -112,6 +117,7 @@ def __init__(self):
self.literal = '' # <!ELEMENT literal (#PCDATA)> The character itself in UTF8 coding.
self.codepoints = [] # <!ELEMENT codepoint (cp_value+)>
self.radicals = [] # <!ELEMENT radical (rad_value+)>
self.__canon_radical = None
self.stroke_count = None # first stroke_count in misc
self.grade = None # <misc>/<grade>
self.stroke_miscounts = [] # <misc>/stroke_count[1:]
Expand All @@ -135,6 +141,21 @@ def __repr__(self):
def __str__(self):
return self.literal

@property
def components(self):
if self.literal in krad.krad:
return krad.krad[self.literal]
else:
return []

@property
def radical(self):
if self.__canon_radical is None:
for rad in self.radicals:
if rad.rad_type == 'classical':
self.__canon_radical = KangxiRadical.kangxi()[rad.value]
return self.__canon_radical

def to_json(self):
return {'literal': self.literal,
'codepoints': [cp.to_json() for cp in self.codepoints],
Expand Down
108 changes: 108 additions & 0 deletions jamdict/krad.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# -*- coding: utf-8 -*-

'''
Module for retrieving kanji components (i.e. radicals)
Latest version can be found at https://github.com/neocl/jamdict
This package uses the RADKFILE/KRADFILE[1] file.
These files are the property of the [Electronic Dictionary Research and Development Group][2], and are used in conformance with the Group's [licence][3].
[1]: http://www.edrdg.org/krad/kradinf.html
[2]: http://www.edrdg.org/
[3]: http://www.edrdg.org/edrdg/licence.html
References:
JMDict website:
http://www.csse.monash.edu.au/~jwb/edict.html
Python documentation:
https://docs.python.org/
PEP 257 - Python Docstring Conventions:
https://www.python.org/dev/peps/pep-0257/
@author: Le Tuan Anh <tuananh.ke@gmail.com>
@license: MIT
'''

# Copyright (c) 2016, Le Tuan Anh <tuananh.ke@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

########################################################################

import os
import logging
import threading
from collections import OrderedDict
from collections import defaultdict as dd
from lxml import etree

from chirptext import chio
from chirptext.sino import Radical

# ------------------------------------------------------------------------------
# Configuration
# ------------------------------------------------------------------------------
MY_FOLDER = os.path.dirname(os.path.abspath(__file__))
DATA_FOLDER = os.path.join(MY_FOLDER, 'data')
KRADFILE = os.path.join(DATA_FOLDER, 'kradfile-u.gz')
RADKFILE = os.path.join(DATA_FOLDER, 'radkfile.gz')

logger = logging.getLogger(__name__)


########################################################################

class KRad:
def __init__(self, **kwargs):
""" Kanji Radical management
"""
self.__krad_map = None
self.__radk_map = None
self.__rads = {}
self.lock = threading.Lock()

def _build_krad_map(self):
with self.lock:
lines = chio.read_file(KRADFILE, mode='rt').splitlines()
# build the krad map
self.__krad_map = {}
self.__radk_map = dd(set)
for line in lines:
if line.startswith("#"):
continue
else:
parts = line.split(':', maxsplit=1)
if len(parts) == 2:
rads = [r.strip() for r in parts[1].split()]
char_literal = parts[0].strip()
self.__krad_map[char_literal] = rads
for rad in rads:
self.__radk_map[rad].add(char_literal)

@property
def radk(self):
if self.__radk_map is None:
self._build_krad_map()
return self.__radk_map

@property
def krad(self):
if self.__krad_map is None:
self._build_krad_map()
return self.__krad_map
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
lxml
chirptext >= 0.1a18
puchikarui
puchikarui >= 0.1a3
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def read(*filenames, **kwargs):
long_description=long_description,
long_description_content_type='text/markdown',
packages=['jamdict'],
package_data={'jamdict': ['data/*.sql', 'data/*.json']},
package_data={'jamdict': ['data/*.sql', 'data/*.json', 'data/*.gz']},
include_package_data=True,
platforms='any',
test_suite='test',
Expand Down
91 changes: 91 additions & 0 deletions test/test_krad.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

'''
Script for testing KRad module library
Latest version can be found at https://github.com/neocl/jamdict
References:
Python documentation:
https://docs.python.org/
Python unittest
https://docs.python.org/3/library/unittest.html
--
argparse module:
https://docs.python.org/3/howto/argparse.html
PEP 257 - Python Docstring Conventions:
https://www.python.org/dev/peps/pep-0257/
@author: Le Tuan Anh <tuananh.ke@gmail.com>
'''

# Copyright (c) 2016, Le Tuan Anh <tuananh.ke@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

__author__ = "Le Tuan Anh <tuananh.ke@gmail.com>"
__copyright__ = "Copyright 2016, jamdict"
__license__ = "MIT"

########################################################################

import os
import logging
import unittest
from jamdict import config
from jamdict.jmdict import JMDictXMLParser
from jamdict.kanjidic2 import Kanjidic2XMLParser
from jamdict.krad import KRad

########################################################################

MY_DIR = os.path.abspath(os.path.dirname(__file__))
TEST_DATA = os.path.join(MY_DIR, 'data')
MINI_JMD = os.path.join(TEST_DATA, 'JMdict_mini.xml')
MINI_KD2 = os.path.join(TEST_DATA, 'kanjidic2_mini.xml')
TEST_DB = os.path.join(TEST_DATA, 'jamdict_test.db')


def getLogger():
return logging.getLogger(__name__)


class TestConfig(unittest.TestCase):

def test_config(self):
cfg = config.read_config()
self.assertIn('KD2_XML', cfg)
self.assertTrue(config.get_file('KD2_XML'))
getLogger().info("jamdict log file location: {}".format(config._get_config_manager().locate_config()))


class TestModels(unittest.TestCase):

def test_read_krad(self):
krad = KRad()
self.assertEqual(krad.krad['㘅'], ['亅', '二', '口', '彳', '金'])
self.assertEqual(krad.krad['𪚲'], ['乙', '勹', '月', '田', '亀'])
self.assertEqual(krad.radk['龠'], {'籥', '鸙', '龢', '龠', '龡', '籲', '瀹', '龥', '禴', '鑰', '爚', '龣'})


########################################################################

if __name__ == "__main__":
logging.getLogger('jamdict').setLevel(logging.DEBUG)
unittest.main()

0 comments on commit 6fa5464

Please sign in to comment.