Skip to content

Commit

Permalink
Merge pull request #250 from mh-northlander/fix/python-doc
Browse files Browse the repository at this point in the history
Fix python doc
  • Loading branch information
mh-northlander authored Jun 3, 2024
2 parents 5a92ccd + 96ce49e commit 707e1bf
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 10 deletions.
2 changes: 1 addition & 1 deletion python/docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_static_path = []


# -- Extension configuration -------------------------------------------------
Expand Down
31 changes: 24 additions & 7 deletions python/py_src/sudachipy/sudachipy.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ PartialPOS = Union[
FieldSet = Optional[Set[Literal["surface", "pos", "normalized_form", "dictionary_form", "reading_form",
"word_structure", "split_a", "split_b", "synonym_group_id"]]]


class SplitMode:
"""
Unit to split text.
Expand All @@ -31,6 +32,7 @@ class SplitMode:
A: ClassVar[SplitMode] = ...
B: ClassVar[SplitMode] = ...
C: ClassVar[SplitMode] = ...

@classmethod
def __init__(cls, mode: str = "C") -> None:
"""
Expand Down Expand Up @@ -113,8 +115,8 @@ class Dictionary:
:param mode: Use this split mode (C by default)
:param fields: ask Sudachi to load only a subset of fields. See https://worksapplications.github.io/sudachi.rs/python/topics/subsetting.html
:param handler: custom callable to transform MorphemeList into list of tokens. See https://github.com/huggingface/tokenizers/blob/master/bindings/python/examples/custom_components.py
First two parameters are the index (int) and HuggingFace NormalizedString.
The handler must return a List[NormalizedString]. By default, just segment the tokens.
First two parameters are the index (int) and HuggingFace NormalizedString.
The handler must return a List[NormalizedString]. By default, just segment the tokens.
:param projection: Projection override for created Tokenizer. See Config.projection for values.
"""
...
Expand All @@ -128,12 +130,27 @@ class Dictionary:
"""
...

def lookup(self, query: str, out: Optional[MorphemeList] = None) -> MorphemeList: ...
def lookup(self, surface: str, out: Optional[MorphemeList] = None) -> MorphemeList:
"""
Look up morphemes in the binary dictionary without performing the analysis.
All morphemes from the dictionary with the given surface string are returned,
with the last user dictionary searched first and the system dictionary searched last.
Inside a dictionary, morphemes are outputted in-binary-dictionary order.
Morphemes which are not indexed are not returned.
:param surface: find all morphemes with the given surface
:param out: if passed, reuse the given morpheme list instead of creating a new one.
See https://worksapplications.github.io/sudachi.rs/python/topics/out_param.html for details.
"""
...


class Morpheme:
"""
A morpheme (basic semantic unit of language).
"""

def __init__(self) -> None: ...

def begin(self) -> int:
Expand Down Expand Up @@ -248,6 +265,7 @@ class MorphemeList:
An object can not be instantiated manually.
Use Tokenizer.tokenize("") to create an empty morpheme list.
"""

def __init__(self) -> None: ...

@classmethod
Expand All @@ -274,9 +292,6 @@ class MorphemeList:
def __len__(self) -> int: ...





class Tokenizer:
SplitMode: ClassVar[SplitMode] = ...
@classmethod
Expand Down Expand Up @@ -325,9 +340,11 @@ class WordInfo:
def __init__(self) -> None: ...
def length(self) -> int: ...


class PosMatcher:
def __iter__(self) -> Iterator[POS]: ...
def __len__(self) -> int: ...

def __call__(self, m: Morpheme) -> bool:
"""
Checks whether a morpheme has matching POS
Expand Down Expand Up @@ -362,4 +379,4 @@ class PosMatcher:
Returns a POS matcher which matches all POS tags except ones defined in the current POS matcher
:return: PosMatcher
"""
...
...
5 changes: 3 additions & 2 deletions python/src/dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -328,16 +328,17 @@ impl PyDictionary {
}

/// Look up morphemes in the binary dictionary without performing the analysis.
///
/// All morphemes from the dictionary with the given surface string are returned,
/// with the last user dictionary searched first and the system dictionary searched last.
/// Inside a dictionary, morphemes are outputted in-binary-dictionary order.
/// Morphemes which are not indexed are not returned.
///
/// :param surface: find all morphemes with the given surface
/// :param out: if passed, reuse the given morpheme list instead of creating a new one.
/// See https://worksapplications.github.io/sudachi.rs/python/topics/out_param.html for details.
/// See https://worksapplications.github.io/sudachi.rs/python/topics/out_param.html for details.
/// :type surface: str
/// type: out: sudachipy.MorphemeList
/// :type out: sudachipy.MorphemeList
#[pyo3(text_signature = "($self, surface, out = None) -> sudachipy.MorphemeList")]
fn lookup<'p>(
&'p self,
Expand Down

0 comments on commit 707e1bf

Please sign in to comment.