forked from roshan-research/hazm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
tests.py
74 lines (60 loc) · 1.73 KB
/
tests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# coding: utf-8
from __future__ import unicode_literals
import sys, inspect, doctest, unittest
from hazm import *
modules = {
'persica': PersicaReader,
'hamshahri': HamshahriReader,
'bijankhan': BijankhanReader,
'peykare': PeykareReader,
'dadegan': DadeganReader,
'valency': VerbValencyReader,
'treebank': TreebankReader,
'sentipers': SentiPersReader,
'degarbayan': DegarbayanReader,
'tnews': TNewsReader,
'quran': QuranCorpusReader,
'miras_text': MirasTextReader,
'sentence_tokenizer': SentenceTokenizer,
'word_tokenizer': WordTokenizer,
'splitter': TokenSplitter,
'normalizer': Normalizer,
'stemmer': Stemmer,
'lemmatizer': Lemmatizer,
'tagger': SequenceTagger,
'postagger': POSTagger,
'chunker': Chunker,
'parser': DependencyParser,
'informal_normalizer': InformalNormalizer
}
class UnicodeOutputChecker(doctest.OutputChecker):
def check_output(self, want, got, optionflags):
try:
want, got = eval(want), eval(got)
except:
pass
try:
got = got.decode('unicode-escape')
want = want.replace('آ', 'ا') # decode issue
except:
pass
if type(want) == unicode:
want = want.replace('٫', '.') # eval issue
return want == got
if __name__ == '__main__':
# test all modules if no one specified
all_modules = len(sys.argv) < 2
suites = []
checker = UnicodeOutputChecker() if utils.PY2 else None
for name, object in modules.items():
if all_modules or name in sys.argv:
suites.append(doctest.DocTestSuite(inspect.getmodule(object), checker=checker))
if not utils.PY2 and all_modules:
suites.append(doctest.DocFileSuite('README.md'))
failure = False
runner = unittest.TextTestRunner(verbosity=2)
for suite in suites:
if not runner.run(suite).wasSuccessful():
failure = True
if failure:
exit(1)