Skip to content

Commit

Permalink
to and from bytes method for rule and MWE rule
Browse files Browse the repository at this point in the history
  • Loading branch information
apmoore1 committed Mar 21, 2022
1 parent 75e341d commit c71c3b5
Show file tree
Hide file tree
Showing 6 changed files with 186 additions and 2 deletions.
41 changes: 41 additions & 0 deletions docs/docs/api/taggers/rules/mwe.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,44 @@ the rule matches stated in the class docstring above.

- `List[List[RankingMetaData]]` <br/>

<a id="pymusas.taggers.rules.mwe.MWERule.to_bytes"></a>

### to\_bytes

```python
class MWERule(Rule):
| ...
| def to_bytes() -> bytes
```

Serialises the [`MWERule`](#mwerule) to a bytestring.

<h4 id="to_bytes.returns">Returns<a className="headerlink" href="#to_bytes.returns" title="Permanent link">&para;</a></h4>


- `bytes` <br/>

<a id="pymusas.taggers.rules.mwe.MWERule.from_bytes"></a>

### from\_bytes

```python
class MWERule(Rule):
| ...
| @staticmethod
| def from_bytes(bytes_data: bytes) -> "MWERule"
```

Loads [`MWERule`](#mwerule) from the given bytestring and returns it.

<h4 id="from_bytes.parameters">Parameters<a className="headerlink" href="#from_bytes.parameters" title="Permanent link">&para;</a></h4>


- __bytes\_data__ : `bytes` <br/>
The bytestring to load.

<h4 id="from_bytes.returns">Returns<a className="headerlink" href="#from_bytes.returns" title="Permanent link">&para;</a></h4>


- [`MWERule`](#mwerule) <br/>

43 changes: 43 additions & 0 deletions docs/docs/api/taggers/rules/rule.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,46 @@ equal length.

- `List[List[RankingMetaData]]` <br/>

<a id="pymusas.taggers.rules.rule.Rule.to_bytes"></a>

### to\_bytes

```python
class Rule(ABC):
| ...
| @abstractmethod
| def to_bytes() -> bytes
```

Serialises the [`Rule`](#rule) to a bytestring.

<h4 id="to_bytes.returns">Returns<a className="headerlink" href="#to_bytes.returns" title="Permanent link">&para;</a></h4>


- `bytes` <br/>

<a id="pymusas.taggers.rules.rule.Rule.from_bytes"></a>

### from\_bytes

```python
class Rule(ABC):
| ...
| @staticmethod
| @abstractmethod
| def from_bytes(bytes_data: bytes) -> "Rule"
```

Loads [`Rule`](#rule) from the given bytestring and returns it.

<h4 id="from_bytes.parameters">Parameters<a className="headerlink" href="#from_bytes.parameters" title="Permanent link">&para;</a></h4>


- __bytes\_data__ : `bytes` <br/>
The bytestring to load.

<h4 id="from_bytes.returns">Returns<a className="headerlink" href="#from_bytes.returns" title="Permanent link">&para;</a></h4>


- [`Rule`](#rule) <br/>

29 changes: 29 additions & 0 deletions pymusas/taggers/rules/mwe.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,3 +203,32 @@ def tag_n_gram_based_MWE(mwe_type: LexiconType,
token_ranking_meta_data)

return token_ranking_meta_data

def to_bytes(self) -> bytes:
'''
Serialises the :class:`MWERule` to a bytestring.
# Returns
`bytes`
'''
return self.mwe_lexicon_collection.to_bytes()

@staticmethod
def from_bytes(bytes_data: bytes) -> "MWERule":
'''
Loads :class:`MWERule` from the given bytestring and returns it.
# Parameters
bytes_data : `bytes`
The bytestring to load.
# Returns
:class:`MWERule`
'''
mwe_rule = MWERule({}, None)
mwe_lexicon_collection = MWELexiconCollection.from_bytes(bytes_data)
mwe_rule.mwe_lexicon_collection = mwe_lexicon_collection
return mwe_rule
28 changes: 28 additions & 0 deletions pymusas/taggers/rules/rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,31 @@ def __call__(self, tokens: List[str], lemmas: List[str],
`List[List[RankingMetaData]]`
'''
... # pragma: no cover

@abstractmethod
def to_bytes(self) -> bytes:
'''
Serialises the :class:`Rule` to a bytestring.
# Returns
`bytes`
'''
... # pragma: no cover

@staticmethod
@abstractmethod
def from_bytes(bytes_data: bytes) -> "Rule":
'''
Loads :class:`Rule` from the given bytestring and returns it.
# Parameters
bytes_data : `bytes`
The bytestring to load.
# Returns
:class:`Rule`
'''
... # pragma: no cover
36 changes: 34 additions & 2 deletions tests/taggers/rules/test_mwe.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,12 +215,14 @@ def wildcard_data(request: SubRequest) -> Tuple[Tuple[List[str],
return (test_data, pos_mapped_lexicon, noun_mapper)


@pytest.mark.parametrize('from_bytes', [False, True])
def test_mwe_rule__NON_SPECIAL_CASES(non_special_data: Tuple[Tuple[List[str],
List[str],
List[str],
List[List[RankingMetaData]]],
Dict[str, List[str]],
Optional[Dict[str, List[str]]]]
Optional[Dict[str, List[str]]]],
from_bytes: bool
) -> None:
'''
This tests MWE Rule when using only NON SPECIAL CASES, which are direct
Expand All @@ -246,16 +248,21 @@ def test_mwe_rule__NON_SPECIAL_CASES(non_special_data: Tuple[Tuple[List[str],
# Test that it covers all of the non special syntax cases, e.g. all of the
# cases that do not contain a wildcard or curly braces.
mwe_rule = MWERule(lexicon, pos_mapper)
if from_bytes:
mwe_rule = MWERule.from_bytes(mwe_rule.to_bytes())

compare_token_ranking_meta_data(expected_ranking_meta_data,
mwe_rule(tokens, lemmas, pos_tags))


@pytest.mark.parametrize('from_bytes', [False, True])
def test_mwe_rules_WILDCARD_CASES(wildcard_data: Tuple[Tuple[List[str],
List[str],
List[str],
List[List[RankingMetaData]]],
Dict[str, List[str]],
Optional[Dict[str, List[str]]]]
Optional[Dict[str, List[str]]]],
from_bytes: bool
) -> None:
'''
This tests MWE Rule when using only WILDCARD cases, e.g. `ski_noun *_noun`
Expand All @@ -264,5 +271,30 @@ def test_mwe_rules_WILDCARD_CASES(wildcard_data: Tuple[Tuple[List[str],
tokens, lemmas, pos_tags, expected_ranking_meta_data = data

mwe_rule = MWERule(lexicon, pos_mapper)
if from_bytes:
mwe_rule = MWERule.from_bytes(mwe_rule.to_bytes())

compare_token_ranking_meta_data(expected_ranking_meta_data,
mwe_rule(tokens, lemmas, pos_tags))


def test_to_from_bytes() -> None:
lexicon = {
"North_noun East_noun London_*": ['Z1'],
"North_* East**_noun London_noun": ['Z2'],
"East_* London_noun": ['Z3'],
"East_* London_*": ['Z4'],
"*as*_noun London_*": ['Z5']
}
pos_mapper = {'NN': ['noun']}
mwe_rule = MWERule(lexicon, pos_mapper)
mwe_rule_from_bytes = MWERule.from_bytes(mwe_rule.to_bytes())

assert len(mwe_rule_from_bytes.mwe_lexicon_collection) \
== len(mwe_rule.mwe_lexicon_collection)

assert mwe_rule.mwe_lexicon_collection.pos_mapper \
== mwe_rule_from_bytes.mwe_lexicon_collection.pos_mapper

for key, value in mwe_rule.mwe_lexicon_collection.items():
assert value == mwe_rule_from_bytes.mwe_lexicon_collection[key]
11 changes: 11 additions & 0 deletions tests/taggers/rules/test_rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,17 @@ def __call__(self, tokens: List[str], lemmas: List[str],
pos_tags: List[str]) -> List[List[RankingMetaData]]:
ranking_meta_data: List[List[RankingMetaData]] = [[]]
return ranking_meta_data

def to_bytes(self) -> bytes:
return b'test'

@staticmethod
def from_bytes(bytes_data: bytes) -> 'TestRule':
return TestRule()

concrete_rule = TestRule()
assert [[]] == concrete_rule([], [], [])
assert isinstance(concrete_rule, Rule)

assert b'test' == concrete_rule.to_bytes()
assert isinstance(TestRule.from_bytes(b'test'), TestRule)

0 comments on commit c71c3b5

Please sign in to comment.