Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for stateless environments added to SpellCheck #16

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 32 additions & 5 deletions parsivar/spell_checker.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,49 @@
import pickle
import math
import os
import math
from typing import Counter, Union, Optional

from .normalizer import Normalizer
from .tokenizer import Tokenizer
from .data_helper import DataHelper


class SpellCheck:
def __init__(self):
def __init__(self,
onegram_lm: Optional[Union[str, Counter]] = None,
bigram_lm: Optional[Union[str, Counter]] = None
):
"""SpellCheck init method

Args:
onegram_lm (Union[None, str, Counter]): The One-gram language model,
based on the type of this input, the SpellCheck instance can be
created using data file under __file__, or another user-provided
path, or the object of the language model.
bigram_lm (Union[None, str, Counter]): The Bi-gram language model,
based on the type of this input, the SpellCheck instance can be
created using data file under __file__, or another user-provided
path, or the object of the language model.
"""
self.normalizer = Normalizer()
self.tokenizer = Tokenizer()
self.data_helper = DataHelper()

self.dir_path = os.path.dirname(os.path.realpath(__file__)) + "/"

self.bigram_lm = self.data_helper.load_var(self.dir_path + "resource/spell/mybigram_lm.pckl")
self.onegram_lm = self.data_helper.load_var(self.dir_path + "resource/spell/onegram.pckl")
if bigram_lm is None or isinstance(bigram_lm, str):
bigram_file_path = bigram_lm if bigram_lm else self.dir_path + \
"resource/spell/mybigram_lm.pckl"
self.bigram_lm = self.data_helper.load_var(bigram_file_path)
else:
self.bigram_lm = bigram_lm

if onegram_lm is None or isinstance(onegram_lm, str):
onegram_file_path = onegram_lm if onegram_lm else self.dir_path + \
"resource/spell/onegram.pckl"
self.onegram_lm = self.data_helper.load_var(onegram_file_path)
else:
self.onegram_lm = onegram_lm

self.ingroup_chars = [{'ا', 'آ', 'ع'},
{'ت', 'ط'},
{'ث', 'س', 'ص'},
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(name='parsivar',
packages=['parsivar'],
version='0.2.3',
version='0.2.4',
description='Python library for preprocessing Persian text.',
author='ICT',
author_email='',
Expand Down