From 0261c60f7a051cef20c0c940a6c65a6151c75127 Mon Sep 17 00:00:00 2001
From: Shachar Mirkin <shacharmirkin@gmail.com>
Date: Wed, 9 Dec 2020 23:09:36 +0100
Subject: [PATCH 1/4] Add mutliple options

---
 src/xpinyin/__init__.py | 52 ++++++++++++++++++++++++++++------
 src/xpinyin/combs.py    | 63 +++++++++++++++++++++++++++++++++++++++++
 src/xpinyin/tests.py    |  9 ++++--
 3 files changed, 113 insertions(+), 11 deletions(-)
 create mode 100644 src/xpinyin/combs.py

diff --git a/src/xpinyin/__init__.py b/src/xpinyin/__init__.py
index 1f3bcbc..82db0b8 100644
--- a/src/xpinyin/__init__.py
+++ b/src/xpinyin/__init__.py
@@ -4,6 +4,9 @@
 
 import os.path
 import re
+from typing import List, Optional
+
+from xpinyin.combs import get_combs
 
 PinyinToneMark = {
     0: u"aoeiuv\u00fc",
@@ -15,7 +18,6 @@
 
 
 class Pinyin(object):
-
     """translate chinese hanzi to pinyin by python, inspired by flyerhzm’s
     `chinese\_pinyin`_ gem
 
@@ -60,7 +62,7 @@ def __init__(self, data_path=data_path):
         with open(data_path) as f:
             for line in f:
                 k, v = line.split('\t')
-                self.dict[k] = v
+                self.dict[k] = v.rstrip()
 
     @staticmethod
     def decode_pinyin(s):
@@ -107,27 +109,59 @@ def convert_pinyin(word, convert):
         if convert == 'upper':
             return word.upper()
 
-    def get_pinyin(self, chars=u'你好', splitter=u'-',
-                   tone_marks=None, convert='lower'):
+    def get_pinyins(self, chars: str, splitter: str = u'-',
+                    tone_marks: Optional[str] = None, convert: str = 'lower', comb: bool = True) -> List[str]:
+        all_pinyins = []  # a list of lists of pinyin options for each char
+        flag = 1  # in the list (probably not aChinese character)
+        for char in chars:
+            key = "%X" % ord(char)
+            if key not in self.dict:
+                if flag == 1:
+                    all_pinyins.append([char])  # add as is
+                else:
+                    all_pinyins[-1][-1] += char  # add to previous sequence of non Chinese chars
+                flag = 0
+            else:
+                flag = 1
+                char_py_versions = self.dict[key].split()
+                last = 1 if comb is False else len(char_py_versions)
+                if tone_marks == 'marks':
+                    char_options = [Pinyin.decode_pinyin(o) for o in char_py_versions[0:last]]
+                elif tone_marks == 'numbers':
+                    char_options = [o for o in char_py_versions[0:last]]
+                else:
+                    char_options = [o[:-1] for o in char_py_versions[0:last]]
+                all_pinyins.append([Pinyin.convert_pinyin(c, convert) for c in char_options])
+
+        return list(set(get_combs(all_pinyins, splitter)))  # note: ignoring order
+
+    def get_pinyin(self, chars: str, splitter: str = u'-',
+                   tone_marks=None, convert: str = 'lower') -> str:
+
+        return self.get_pinyins(chars, splitter=splitter, tone_marks=tone_marks, convert=convert, comb=False)[0]
+
+    def get_pinyin_old(self, chars=u'你好', splitter=u'-',
+                       tone_marks=None, convert='lower'):
         result = []
         flag = 1
+
         for char in chars:
             key = "%X" % ord(char)
             try:
                 if tone_marks == 'marks':
-                    word = self.decode_pinyin(self.dict[key].split()[0].strip())
+                    word = self.decode_pinyin(self.dict[key].split()[0])  # TODO comb
                 elif tone_marks == 'numbers':
-                    word = self.dict[key].split()[0].strip()
+                    word = self.dict[key].split()[0]  # TODO comb
                 else:
-                    word = self.dict[key].split()[0].strip()[:-1]
+                    word = self.dict[key].split()[0][:-1]  # TODO comb
                 word = self.convert_pinyin(word, convert)
                 result.append(word)
                 flag = 1
             except KeyError:
                 if flag:
-                    result.append(char)
+                    result.append(char)  # TODO this is adding the original
                 else:
-                    result[-1] += char
+                    result[-1] += char  # TODO replacing the last char with the original if already was in error state
                 flag = 0
         return splitter.join(result)
 
diff --git a/src/xpinyin/combs.py b/src/xpinyin/combs.py
new file mode 100644
index 0000000..4e63653
--- /dev/null
+++ b/src/xpinyin/combs.py
@@ -0,0 +1,63 @@
+from typing import List
+
+
+def _get_comb_indexes(lengths: List[int], n=None) -> List[List[int]]:
+    """
+    Given a list with the number of possible options per place, returns a list of numbers representing combinations.
+    The combinations are created via additions to a multi-radix number, from left to right
+    (i.e. from smaller to larger numbers).
+
+    @param n The maximal number of requested combinations.
+    """
+    # calculate the maximal number of possible combinations
+    n_max = 1
+    for j in lengths:
+        n_max *= j
+
+    n = min(n, n_max) if n is not None else n_max
+    if n == 0:
+        raise ValueError("Can't create combinations with 0-length lists")
+    n_items = len(lengths)
+
+    curr = [0] * n_items
+    combs = [list.copy(curr)]
+    i = n_items - 1
+    count = 1
+    while count < n:
+        curr[i] = (curr[i] + 1) % lengths[i]
+        if curr[i] != 0:
+            combs.append(list.copy(curr))
+            count += 1
+            i = n_items - 1  # reset to right-most digit
+        else:
+            i -= 1  # try previous (left) digit
+
+    return combs
+
+
+def get_combs(options: List[List[str]], splitter='', n=10) -> List[str]:
+    """
+    e.g.: [['a'], ['1' ,'2'], ['@']] -> [a1@, a2@]
+    Note: the order is not guaranteed
+    """
+    combs = []
+    comb_numbers = [len(o) for o in options]
+    combs_indexes = _get_comb_indexes(comb_numbers, n)
+
+    for c in combs_indexes:  # e.g. [0,2,1]
+        comb = []
+        for i in range(len(c)):
+            comb.append(options[i][c[i]])
+        combs.append(splitter.join(comb))
+
+    return combs
+
+
+def main():
+    lengths = [1, 2, 1]
+    print(_get_comb_indexes(lengths))
+    print(get_combs([['a', 'b'], ['1', '2'], ['@', '#']]))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/src/xpinyin/tests.py b/src/xpinyin/tests.py
index 3c795c9..b09d56a 100644
--- a/src/xpinyin/tests.py
+++ b/src/xpinyin/tests.py
@@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-
 import unittest
 
+
 class PinyinTests(unittest.TestCase):
     def Pinyin(self, *a, **kw):
         from xpinyin import Pinyin
@@ -23,8 +24,6 @@ def test_get_pinyin_mixed_words(self):
 
     def test_get_pinyin_with_tone_marks(self):
         self.assertEqual(self.p.get_pinyin(u'上海', tone_marks='marks'), u'sh\xe0ng-h\u01cei')
-
-    def test_get_pinyin_with_tone_marks(self):
         self.assertEqual(self.p.get_pinyin(u'秋', tone_marks='marks'), u'qiū')
 
     def test_get_initial(self):
@@ -37,5 +36,11 @@ def test_get_initials_with_splitter(self):
         self.assertEqual(self.p.get_initials(u'你好', u' '), u'N H')
         self.assertEqual(self.p.get_initials(u'你好', u''), u'NH')
 
+    # --- testing combinations ---
+
+    def test_get_pinyins_with_default_splitter(self):
+        self.assertEqual(self.p.get_pinyins(u'上海'), [u'shang-hai'])
+
+
 if __name__ == '__main__':
     unittest.main()

From 16763a0e453c8aa8654ac0af3f46592893b6e90e Mon Sep 17 00:00:00 2001
From: Shachar Mirkin <shacharmirkin@gmail.com>
Date: Thu, 10 Dec 2020 00:38:37 +0100
Subject: [PATCH 2/4] Restore original order of combinations

Restored original order of combinations (reflexted frequency); added unit tests; fixed max number
---
 src/xpinyin/__init__.py | 48 ++++++++++++-----------------------------
 src/xpinyin/combs.py    |  4 ++--
 src/xpinyin/tests.py    | 17 +++++++++++++++
 3 files changed, 33 insertions(+), 36 deletions(-)

diff --git a/src/xpinyin/__init__.py b/src/xpinyin/__init__.py
index 82db0b8..7cae020 100644
--- a/src/xpinyin/__init__.py
+++ b/src/xpinyin/__init__.py
@@ -110,7 +110,7 @@ def convert_pinyin(word, convert):
             return word.upper()
 
     def get_pinyins(self, chars: str, splitter: str = u'-',
-                    tone_marks: Optional[str] = None, convert: str = 'lower', comb: bool = True) -> List[str]:
+                    tone_marks: Optional[str] = None, convert: str = 'lower', n=None) -> List[str]:
         all_pinyins = []  # a list of lists of pinyin options for each char
         flag = 1  # in the list (probably not aChinese character)
         for char in chars:
@@ -122,48 +122,28 @@ def get_pinyins(self, chars: str, splitter: str = u'-',
                     all_pinyins[-1][-1] += char  # add to previous sequence of non Chinese chars
                 flag = 0
             else:
-                flag = 1
-                char_py_versions = self.dict[key].split()
-                last = 1 if comb is False else len(char_py_versions)
+                if tone_marks is None:  # in this case we may have duplicates if the variations differ just by the tones
+                    char_py_versions = []
+                    for v in self.dict[key].split():
+                        if v[0:-1] not in char_py_versions:  # we remove the tone mark while we're at it
+                            char_py_versions.append(v[0:-1])
+                else:
+                    char_py_versions = self.dict[key].split()
+                last = 1 if n == 1 else len(char_py_versions)
                 if tone_marks == 'marks':
                     char_options = [Pinyin.decode_pinyin(o) for o in char_py_versions[0:last]]
-                elif tone_marks == 'numbers':
+                else:  # 'numbers' or None
                     char_options = [o for o in char_py_versions[0:last]]
-                else:
-                    char_options = [o[:-1] for o in char_py_versions[0:last]]
+
                 all_pinyins.append([Pinyin.convert_pinyin(c, convert) for c in char_options])
+                flag = 1
 
-        return list(set(get_combs(all_pinyins, splitter)))  # note: ignoring order
+        return get_combs(all_pinyins, splitter, n=n)
 
     def get_pinyin(self, chars: str, splitter: str = u'-',
                    tone_marks=None, convert: str = 'lower') -> str:
 
-        return self.get_pinyins(chars, splitter=splitter, tone_marks=tone_marks, convert=convert, comb=False)[0]
-
-    def get_pinyin_old(self, chars=u'你好', splitter=u'-',
-                       tone_marks=None, convert='lower'):
-        result = []
-        flag = 1
-
-        for char in chars:
-            key = "%X" % ord(char)
-            try:
-                if tone_marks == 'marks':
-                    word = self.decode_pinyin(self.dict[key].split()[0])  # TODO comb
-                elif tone_marks == 'numbers':
-                    word = self.dict[key].split()[0]  # TODO comb
-                else:
-                    word = self.dict[key].split()[0][:-1]  # TODO comb
-                word = self.convert_pinyin(word, convert)
-                result.append(word)
-                flag = 1
-            except KeyError:
-                if flag:
-                    result.append(char)  # TODO this is adding the original
-                else:
-                    result[-1] += char  # TODO replacing the last char with the original if already was in error state
-                flag = 0
-        return splitter.join(result)
+        return self.get_pinyins(chars, splitter=splitter, tone_marks=tone_marks, convert=convert, n=1)[0]
 
     def get_initial(self, char=u'你'):
         try:
diff --git a/src/xpinyin/combs.py b/src/xpinyin/combs.py
index 4e63653..406ecb7 100644
--- a/src/xpinyin/combs.py
+++ b/src/xpinyin/combs.py
@@ -35,10 +35,10 @@ def _get_comb_indexes(lengths: List[int], n=None) -> List[List[int]]:
     return combs
 
 
-def get_combs(options: List[List[str]], splitter='', n=10) -> List[str]:
+def get_combs(options: List[List[str]], splitter='', n=None) -> List[str]:
     """
+    Given a list of options, returns up to n combinations
     e.g.: [['a'], ['1' ,'2'], ['@']] -> [a1@, a2@]
-    Note: the order is not guaranteed
     """
     combs = []
     comb_numbers = [len(o) for o in options]
diff --git a/src/xpinyin/tests.py b/src/xpinyin/tests.py
index b09d56a..6b8cd2d 100644
--- a/src/xpinyin/tests.py
+++ b/src/xpinyin/tests.py
@@ -41,6 +41,23 @@ def test_get_initials_with_splitter(self):
     def test_get_pinyins_with_default_splitter(self):
         self.assertEqual(self.p.get_pinyins(u'上海'), [u'shang-hai'])
 
+    def test_get_pinyins_single_char(self):
+        self.assertEqual(self.p.get_pinyins(u'乐', splitter='', tone_marks='marks'),
+                         ['lè', 'yuè', 'yào', 'luò', 'liáo'])  # 4E50	LE4 YUE4 YAO4 LUO4 LIAO2
+
+    def test_get_pinyins_two_chars(self):
+        combs1 = self.p.get_pinyins(u'音', splitter='', tone_marks='marks')
+        combs2 = self.p.get_pinyins(u'乐', splitter='', tone_marks='marks')
+        combs12 = self.p.get_pinyins(u'音乐', splitter='', tone_marks='marks')
+        self.assertEqual(len(combs12), len(combs1) * len(combs2))
+        self.assertIn('yīnyuè', combs12)
+
+    def test_get_pinyins_no_tones_uniq(self):
+        self.assertEqual(['ma'], self.p.get_pinyins(u'吗', splitter='', tone_marks=None))
+
+    def test_get_pinyins_max_number(self):
+        self.assertEqual(5, len(self.p.get_pinyins(u'音乐', splitter='', n=5)))
+
 
 if __name__ == '__main__':
     unittest.main()

From f10a70091cb733267c90f3b7702fd5a4b99a09a1 Mon Sep 17 00:00:00 2001
From: Shachar Mirkin <shacharmirkin@gmail.com>
Date: Thu, 10 Dec 2020 22:55:08 +0100
Subject: [PATCH 3/4] Add tests, set combinations limit

Add documentation, tests; set limit to avoid explonantion explosion
---
 src/xpinyin/__init__.py | 38 ++++++++++++++++--------------
 src/xpinyin/combs.py    | 34 +++++++++++++--------------
 src/xpinyin/tests.py    | 52 ++++++++++++++++++++++++++++++++++-------
 3 files changed, 81 insertions(+), 43 deletions(-)

diff --git a/src/xpinyin/__init__.py b/src/xpinyin/__init__.py
index 7cae020..961d3d6 100644
--- a/src/xpinyin/__init__.py
+++ b/src/xpinyin/__init__.py
@@ -19,7 +19,7 @@
 
 class Pinyin(object):
     """translate chinese hanzi to pinyin by python, inspired by flyerhzm’s
-    `chinese\_pinyin`_ gem
+    `chinese_pinyin`_ gem
 
     usage
     -----
@@ -51,7 +51,7 @@ class Pinyin(object):
         'S H'
 
     请输入utf8编码汉字
-    .. _chinese\_pinyin: https://github.com/flyerhzm/chinese_pinyin
+    .. _chinese_pinyin: https://github.com/flyerhzm/chinese_pinyin
     """
 
     data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
@@ -110,35 +110,39 @@ def convert_pinyin(word, convert):
             return word.upper()
 
     def get_pinyins(self, chars: str, splitter: str = u'-',
-                    tone_marks: Optional[str] = None, convert: str = 'lower', n=None) -> List[str]:
-        all_pinyins = []  # a list of lists of pinyin options for each char
-        flag = 1  # in the list (probably not aChinese character)
+                    tone_marks: Optional[str] = None, convert: str = 'lower', n: int = 10) -> List[str]:
+        """
+        Get All pinyin combinations given all possible readings of each character.
+        The number of combinations is limited par default to 10 to avoid exponential explosion on long texts.
+        """
+        all_pinyin_options = []  # a list of lists that we'll fill with all pinyin options for each character
+        flag = 1  # in the list (otherwise, probably not a Chinese character)
         for char in chars:
             key = "%X" % ord(char)
             if key not in self.dict:
                 if flag == 1:
-                    all_pinyins.append([char])  # add as is
+                    all_pinyin_options.append([char])  # add as is
                 else:
-                    all_pinyins[-1][-1] += char  # add to previous sequence of non Chinese chars
-                flag = 0
+                    all_pinyin_options[-1][-1] += char  # add to previous sequence of non Chinese chars
+                flag = 0  # within a sequence of non Chinese characters
             else:
                 if tone_marks is None:  # in this case we may have duplicates if the variations differ just by the tones
-                    char_py_versions = []
+                    char_py_options = []
                     for v in self.dict[key].split():
-                        if v[0:-1] not in char_py_versions:  # we remove the tone mark while we're at it
-                            char_py_versions.append(v[0:-1])
+                        if v[0:-1] not in char_py_options:  # we remove the tone mark while we're at it
+                            char_py_options.append(v[0:-1])
                 else:
-                    char_py_versions = self.dict[key].split()
-                last = 1 if n == 1 else len(char_py_versions)
+                    char_py_options = self.dict[key].split()
+                last = 1 if n == 1 else len(char_py_options)
                 if tone_marks == 'marks':
-                    char_options = [Pinyin.decode_pinyin(o) for o in char_py_versions[0:last]]
+                    char_options = [Pinyin.decode_pinyin(o) for o in char_py_options[0:last]]
                 else:  # 'numbers' or None
-                    char_options = [o for o in char_py_versions[0:last]]
+                    char_options = [o for o in char_py_options[0:last]]
 
-                all_pinyins.append([Pinyin.convert_pinyin(c, convert) for c in char_options])
+                all_pinyin_options.append([Pinyin.convert_pinyin(c, convert) for c in char_options])
                 flag = 1
 
-        return get_combs(all_pinyins, splitter, n=n)
+        return get_combs(options=all_pinyin_options, splitter=splitter, n=n)
 
     def get_pinyin(self, chars: str, splitter: str = u'-',
                    tone_marks=None, convert: str = 'lower') -> str:
diff --git a/src/xpinyin/combs.py b/src/xpinyin/combs.py
index 406ecb7..ab62f22 100644
--- a/src/xpinyin/combs.py
+++ b/src/xpinyin/combs.py
@@ -1,30 +1,33 @@
 from typing import List
 
 
-def _get_comb_indexes(lengths: List[int], n=None) -> List[List[int]]:
+def _get_comb_indexes(num_options_list: List[int], n=None) -> List[List[int]]:
     """
     Given a list with the number of possible options per place, returns a list of numbers representing combinations.
     The combinations are created via additions to a multi-radix number, from left to right
     (i.e. from smaller to larger numbers).
 
-    @param n The maximal number of requested combinations.
+    e.g. [2, 2, 1] -> [[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 0]]
+    i.e. we have 2 options (0, 1) for the first and second places and one option (0) for the third.
+
+    :param num_options_list: a list with the number of options per place
+    :param n: The maximal number of requested combinations. If None, all possible combinations will be returned
     """
     # calculate the maximal number of possible combinations
     n_max = 1
-    for j in lengths:
+    for j in num_options_list:
         n_max *= j
-
     n = min(n, n_max) if n is not None else n_max
     if n == 0:
         raise ValueError("Can't create combinations with 0-length lists")
-    n_items = len(lengths)
 
+    n_items = len(num_options_list)
     curr = [0] * n_items
     combs = [list.copy(curr)]
     i = n_items - 1
     count = 1
     while count < n:
-        curr[i] = (curr[i] + 1) % lengths[i]
+        curr[i] = (curr[i] + 1) % num_options_list[i]
         if curr[i] != 0:
             combs.append(list.copy(curr))
             count += 1
@@ -35,10 +38,15 @@ def _get_comb_indexes(lengths: List[int], n=None) -> List[List[int]]:
     return combs
 
 
-def get_combs(options: List[List[str]], splitter='', n=None) -> List[str]:
+def get_combs(options: List[List[str]], splitter: str = '', n: int = None) -> List[str]:
     """
-    Given a list of options, returns up to n combinations
+    Given a list of options per place, returns up to n combinations
     e.g.: [['a'], ['1' ,'2'], ['@']] -> [a1@, a2@]
+    For instance, ['1' ,'2'] is the group defining the options for the second place
+
+    :param options: a list with a list of options for each group.
+    :param splitter: a string to separate the groups
+    :param n: The maximal number of requested combinations. If None, all possible combinations will be returned
     """
     combs = []
     comb_numbers = [len(o) for o in options]
@@ -51,13 +59,3 @@ def get_combs(options: List[List[str]], splitter='', n=None) -> List[str]:
         combs.append(splitter.join(comb))
 
     return combs
-
-
-def main():
-    lengths = [1, 2, 1]
-    print(_get_comb_indexes(lengths))
-    print(get_combs([['a', 'b'], ['1', '2'], ['@', '#']]))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/src/xpinyin/tests.py b/src/xpinyin/tests.py
index 6b8cd2d..5c6f68f 100644
--- a/src/xpinyin/tests.py
+++ b/src/xpinyin/tests.py
@@ -2,9 +2,12 @@
 # -*- coding: utf-8 -*-
 import unittest
 
+from xpinyin.combs import _get_comb_indexes, get_combs
+
 
 class PinyinTests(unittest.TestCase):
-    def Pinyin(self, *a, **kw):
+    @staticmethod
+    def Pinyin(*a, **kw):
         from xpinyin import Pinyin
 
         return Pinyin(*a, **kw)
@@ -36,14 +39,34 @@ def test_get_initials_with_splitter(self):
         self.assertEqual(self.p.get_initials(u'你好', u' '), u'N H')
         self.assertEqual(self.p.get_initials(u'你好', u''), u'NH')
 
-    # --- testing combinations ---
+    # --- testing combinations auxiliary functions ---
+
+    def test_get_comb_indexes(self):
+        self.assertEqual([[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 0]], _get_comb_indexes([2, 2, 1]))
+
+    def test_get_comb_indexes_max_num(self):
+        self.assertEqual([[0, 0, 0], [0, 1, 0], [1, 0, 0]], _get_comb_indexes([2, 2, 1], 3))
+
+    def test_get_combs(self):
+        self.assertEqual(['a1@', 'a1#', 'a2@', 'a2#', 'b1@', 'b1#', 'b2@', 'b2#'],
+                         get_combs([['a', 'b'], ['1', '2'], ['@', '#']]))
+
+    def test_get_combs_splitter_max_num(self):
+        self.assertEqual(['a 1 @', 'a 1 #', 'a 2 @', 'a 2 #', 'b 1 @'],
+                         get_combs([['a', 'b'], ['1', '2'], ['@', '#']], splitter=' ', n=5))
+
+    def test_get_combs_max_num_too_big(self):
+        self.assertEqual(['a||1||@', 'a||1||#', 'a||2||@', 'a||2||#', 'b||1||@', 'b||1||#', 'b||2||@', 'b||2||#'],
+                         get_combs([['a', 'b'], ['1', '2'], ['@', '#']], splitter='||', n=100))
+
+    # --- testing pinyin combinations ---
 
     def test_get_pinyins_with_default_splitter(self):
         self.assertEqual(self.p.get_pinyins(u'上海'), [u'shang-hai'])
 
     def test_get_pinyins_single_char(self):
-        self.assertEqual(self.p.get_pinyins(u'乐', splitter='', tone_marks='marks'),
-                         ['lè', 'yuè', 'yào', 'luò', 'liáo'])  # 4E50	LE4 YUE4 YAO4 LUO4 LIAO2
+        self.assertEqual(['lè', 'yuè', 'yào', 'luò', 'liáo'],  # 4E50	LE4 YUE4 YAO4 LUO4 LIAO2
+                         self.p.get_pinyins(u'乐', splitter='', tone_marks='marks'))
 
     def test_get_pinyins_two_chars(self):
         combs1 = self.p.get_pinyins(u'音', splitter='', tone_marks='marks')
@@ -55,9 +78,22 @@ def test_get_pinyins_two_chars(self):
     def test_get_pinyins_no_tones_uniq(self):
         self.assertEqual(['ma'], self.p.get_pinyins(u'吗', splitter='', tone_marks=None))
 
-    def test_get_pinyins_max_number(self):
+    def test_get_pinyins_max_num(self):
         self.assertEqual(5, len(self.p.get_pinyins(u'音乐', splitter='', n=5)))
 
-
-if __name__ == '__main__':
-    unittest.main()
+    def test_get_pinyins_mixed_words(self):
+        self.assertEqual(self.p.get_pinyins(u'ABC串123', splitter=u' ', tone_marks='marks'),
+                         ['ABC chuàn 123', 'ABC guàn 123'])
+
+    def test_get_pinyins_long_seq(self):
+        text = u"""汉语拼音（Hànyǔ Pīnyīn），
+            簡稱拼音，是一種以拉丁字母作普通话（現代標準漢語）標音的方案，為中文羅馬拼音的國際標準規範。
+            汉语拼音在中国大陆作为基础教育内容全面使用，是义务教育的重要内容。在海外，特别是常用現代標準漢語的地区如新加坡、
+            马来西亚、菲律宾和美国唐人街等，目前也在汉语教育中进行汉语拼音教学。臺灣自2008年開始，
+            中文譯音使用原則也採用漢語拼音[1]，但舊護照姓名和部分地名、道路名稱仍採用威妥瑪拼音、
+            郵政式拼音、國語羅馬字、國音二式抑或通用拼音[2]。"""
+        self.assertEqual(20, len(self.p.get_pinyins(text, n=20)))
+        self.assertEqual(10, len(self.p.get_pinyins(text)))  # limited to 10 by default
+
+        if __name__ == '__main__':
+            unittest.main()

From 4e00960cec93f218e9067368c98541385ecf5f27 Mon Sep 17 00:00:00 2001
From: Shachar Mirkin <shacharmirkin@gmail.com>
Date: Fri, 11 Dec 2020 13:15:22 +0100
Subject: [PATCH 4/4] Add multiple readings example to README

---
 README.rst | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/README.rst b/README.rst
index 5bc7dd5..c1a6ac0 100644
--- a/README.rst
+++ b/README.rst
@@ -50,13 +50,17 @@ Usage
     'SH'
     >>> p.get_initials(u"上海", u' ')
     'S H'
-    
+    >>> # get combinations of the multiple readings of the characters
+    >>> p.get_pinyins(u'好吗？', splitter=u'', tone_marks='marks')
+    ['hǎoma？', 'hǎomá？', 'hǎomǎ？', 'hàoma？', 'hàomá？', 'hàomǎ？']
+
+
     如果方法中传入变量，那么直接加前缀是不可以了。而是要将变量转为utf-8编码：
     >>> wordvalue = '中国'
     >>> wordvalue= unicode(wordvalue,'utf-8')
     >>> s = p.get_initials(wordvalue, u'').lower()
     'zg'
-    
+
 
 请输入utf8编码汉字