From 5e0d36c8a63b48bf09d1b85c4e2d93524f48de00 Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Sat, 20 Jul 2024 05:52:08 +0800 Subject: [PATCH] Attempt to Optimize Performance & Memory Usage --- src/ToJyutping/Jyutping.py | 14 +++++++------- src/ToJyutping/ToJyutping.py | 14 +++++--------- src/ToJyutping/Trie.py | 34 +++++++++++++++++++++++----------- 3 files changed, 35 insertions(+), 27 deletions(-) diff --git a/src/ToJyutping/Jyutping.py b/src/ToJyutping/Jyutping.py index 6ea669d..251b4e9 100644 --- a/src/ToJyutping/Jyutping.py +++ b/src/ToJyutping/Jyutping.py @@ -50,7 +50,7 @@ class Jyutping: rhyme: str tone_id: int tone: str - syllable: str + jyutping: str def __init__(self, x: Union[str, int]): if type(x) == int: @@ -61,9 +61,9 @@ def __init__(self, x: Union[str, int]): object.__setattr__(self, "rhyme", rhyme[self.rhyme_id - 54] if self.rhyme_id >= 54 else nucleus[self.rhyme_id // 9] + coda[self.rhyme_id % 9]) object.__setattr__(self, "tone_id", x % 6) object.__setattr__(self, "tone", str(self.tone_id + 1)) - object.__setattr__(self, "syllable", self.onset + self.rhyme + self.tone) + object.__setattr__(self, "jyutping", self.onset + self.rhyme + self.tone) else: - object.__setattr__(self, "syllable", x) + object.__setattr__(self, "jyutping", x) _onset, _rhyme, _nucleus, _coda, _tone = re.match(regex, x).groups() object.__setattr__(self, "onset", _onset) object.__setattr__(self, "onset_id", onset.index(_onset)) @@ -77,7 +77,7 @@ def __init__(self, x: Union[str, int]): object.__setattr__(self, "id", self.tone_id + self.rhyme_id * 6 + self.onset_id * 402) def __str__(self): - return self.syllable + return self.jyutping def __eq__(self, other): return isinstance(other, Jyutping) and self.id == other.id @@ -112,15 +112,15 @@ def g2p(self, offset=0, *, minimal=False) -> Union[Tuple[int, int, int], Tuple[i class JyutpingList(List[Jyutping]): @property - def syllables(self): - return ' '.join(map(attrgetter('syllable'), self)) + def jyutping(self): + return ' '.join(map(attrgetter('jyutping'), self)) @property def ipa(self): return '.'.join(map(attrgetter('ipa'), self)) def __str__(self): - return self.syllables + return self.jyutping def __hash__(self): return hash(tuple(self)) diff --git a/src/ToJyutping/ToJyutping.py b/src/ToJyutping/ToJyutping.py index ddf8e3d..63a9400 100644 --- a/src/ToJyutping/ToJyutping.py +++ b/src/ToJyutping/ToJyutping.py @@ -1,6 +1,5 @@ from os import path from typing import List, Literal, Optional, Tuple, Union, overload -from operator import attrgetter import re if __package__: from . import utils @@ -16,11 +15,8 @@ with open(path.join(here, 'trie.txt'), encoding='utf-8') as f: t = Trie.Trie(f.read()) -_get_syllables = attrgetter("syllables") -_get_ipa = attrgetter("ipa") - def get_jyutping_list(s: str) -> List[Tuple[str, Optional[str]]]: - return [(k, v and v.syllables) for k, v in t.get(s)] + return t.get(s, 'jyutping') def get_jyutping(s: str) -> str: l = '' @@ -32,10 +28,10 @@ def get_jyutping_text(s: str) -> str: return utils.format_romanization_text(s, get_jyutping_list) def get_jyutping_candidates(s: str) -> List[Tuple[str, List[str]]]: - return [(k, list(map(_get_syllables, v))) for k, v in t.get_all(s)] + return t.get_all(s, 'jyutping') def get_ipa_list(s: str) -> List[Tuple[str, Optional[str]]]: - return [(k, v and v.ipa) for k, v in t.get(s)] + return t.get(s, 'ipa') def get_ipa(s: str) -> str: l = '' @@ -47,7 +43,7 @@ def get_ipa_text(s: str) -> str: return utils.format_ipa_text(s, get_ipa_list) def get_ipa_candidates(s: str) -> List[Tuple[str, List[str]]]: - return [(k, list(map(_get_ipa, v))) for k, v in t.get_all(s)] + return t.get_all(s, 'ipa') @overload def g2p(s: str, offset: int = 0, *, minimal: Literal[False] = False) -> List[Tuple[int, int, int]]: ... @@ -56,7 +52,7 @@ def g2p(s: str, offset: int = 0, *, minimal: Literal[False] = False) -> List[Tup def g2p(s: str, offset: int = 0, *, minimal: Literal[True]) -> List[Tuple[int, int, int, int]]: ... def g2p(s: str, offset=0, minimal=False) -> Union[List[Tuple[int, int, int]], List[Tuple[int, int, int, int]]]: - return [p.g2p(offset=offset, minimal=minimal) for k, v in t.get(s) for p in v or ()] + return [p.g2p(offset=offset, minimal=minimal) for k, v in t.get(s) for p in (v if isinstance(v, list) else v and (v,) or ())] def jyutping2ipa(s: str) -> str: return '.'.join(Jyutping.Jyutping(t).ipa for t in re.split('\\W+', s.lower())) diff --git a/src/ToJyutping/Trie.py b/src/ToJyutping/Trie.py index 25a6e34..24459f1 100644 --- a/src/ToJyutping/Trie.py +++ b/src/ToJyutping/Trie.py @@ -1,4 +1,4 @@ -from typing import DefaultDict, Dict, List, Optional, Tuple +from typing import DefaultDict, Dict, List, Literal, Optional, Tuple, Union, overload from functools import reduce from collections import defaultdict from operator import itemgetter @@ -10,7 +10,7 @@ import Jyutping class Node(Dict[str, 'Node']): - v: Optional[List[Jyutping.JyutpingList]] = None + v: Optional[List[Union[Jyutping.Jyutping, Jyutping.JyutpingList]]] = None class Trie: def __init__(self, s: str): @@ -26,7 +26,7 @@ def __init__(self, s: str): while ord(s[j]) < 123 or s[j] == '|': j += 1 if i != j: - f.v = [Jyutping.JyutpingList([Jyutping.Jyutping(s) for s in Jyutping.to_id(x)]) for x in s[i:j].split('|')] + f.v = [Jyutping.Jyutping(next(Jyutping.to_id(x))) if len(x) == 2 else Jyutping.JyutpingList(Jyutping.Jyutping(s) for s in Jyutping.to_id(x)) for x in s[i:j].split('|')] i = j if s[i] == '{': i += 1 @@ -35,8 +35,14 @@ def __init__(self, s: str): i += 1 n.pop() - def get(self, s: str): - r: List[Tuple[str, Optional[Jyutping.JyutpingList]]] = [] + @overload + def get(self, s: str, attr: Literal['jyutping', 'ipa']) -> List[Tuple[str, Optional[str]]]: ... + + @overload + def get(self, s: str, attr: None = None) -> List[Tuple[str, Optional[Union[Jyutping.Jyutping, Jyutping.JyutpingList]]]]: ... + + def get(self, s: str, attr: Optional[Literal['jyutping', 'ipa']] = None): + r: List[Tuple[str, Optional[Union[str, Jyutping.Jyutping, Jyutping.JyutpingList]]]] = [] i = 0 while i < len(s): t = self.t @@ -51,24 +57,30 @@ def get(self, s: str): c = t.v[0] k = j if k == i: - r.append((s[i], c)) + r.append((s[i], getattr(c, attr, None) if attr else c)) i += 1 elif c: n = i while i <= k: - r.append((s[i], Jyutping.JyutpingList([c[i - n]]))) + r.append((s[i], getattr(c[i - n], attr, None) if attr else c[i - n])) i += 1 return r - def get_all(self, s: str) -> List[Tuple[str, List[Jyutping.JyutpingList]]]: + @overload + def get_all(self, s: str, attr: Literal['jyutping', 'ipa']) -> List[Tuple[str, List[str]]]: ... + + @overload + def get_all(self, s: str, attr: None = None) -> List[Tuple[str, List[Union[Jyutping.Jyutping, Jyutping.JyutpingList]]]]: ... + + def get_all(self, s: str, attr: Optional[Literal['jyutping', 'ipa']] = None) -> List[Tuple[str, List[Union[str, Jyutping.Jyutping, Jyutping.JyutpingList]]]]: t = self.t def initialize(c: str): d = defaultdict(list) u = t.get(c) if u is not None and u.v: - d[0] = u.v + d[0] = [getattr(p, attr, None) for p in u.v] if attr else u.v return d - r: List[Tuple[str, DefaultDict[int, List[Jyutping.JyutpingList]]]] = [(c, initialize(c)) for c in s] + r: List[Tuple[str, DefaultDict[int, List[Union[str, Jyutping.Jyutping, Jyutping.JyutpingList]]]]] = [(c, initialize(c)) for c in s] for i in range(len(r)): u = t.get(r[i][0]) if u is None: @@ -81,5 +93,5 @@ def initialize(c: str): l = j - i for p in u.v: for k in range(i, j + 1): - r[k][1][l].append(Jyutping.JyutpingList([p[k - i]])) + r[k][1][l].append(getattr(p[k - i], attr, None) if attr else p[k - i]) return [(c, utils.flat_dedupe(map(itemgetter(1), sorted(s.items(), key=itemgetter(0), reverse=True)))) for c, s in r]