Skip to content

Commit

Permalink
Test #656, #624: special case rules for tokenizer with attributes.
Browse files Browse the repository at this point in the history
  • Loading branch information
honnibal committed Nov 25, 2016
1 parent 1e0f566 commit 6652f2a
Showing 1 changed file with 48 additions and 0 deletions.
48 changes: 48 additions & 0 deletions spacy/tests/unit/test_tokenizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from __future__ import unicode_literals
import pytest
import re

from ...vocab import Vocab
from ...tokenizer import Tokenizer


@pytest.fixture
def vocab():
return Vocab(tag_map={'NN': {'pos': 'NOUN'}})

@pytest.fixture
def rules():
return {}

@pytest.fixture
def prefix_search():
return None

@pytest.fixture
def suffix_search():
return None

@pytest.fixture
def infix_finditer():
return None


@pytest.fixture
def tokenizer(vocab, rules, prefix_search, suffix_search, infix_finditer):
return Tokenizer(vocab, rules, prefix_search, suffix_search, infix_finditer)


def test_add_special_case(tokenizer):
tokenizer.add_special_case('dog', [{'orth': 'd'}, {'orth': 'og'}])
doc = tokenizer('dog')
assert doc[0].text == 'd'
assert doc[1].text == 'og'


def test_special_case_tag(tokenizer):
tokenizer.add_special_case('dog', [{'orth': 'd', 'tag': 'NN'}, {'orth': 'og'}])
doc = tokenizer('dog')
assert doc[0].text == 'd'
assert doc[0].tag_ == 'NN'
assert doc[0].pos_ == 'NOUN'
assert doc[1].text == 'og'

0 comments on commit 6652f2a

Please sign in to comment.