Skip to content

Commit

Permalink
Add unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
jrrobison1 committed Aug 16, 2024
1 parent fe1d16b commit f16ae86
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 6 deletions.
6 changes: 3 additions & 3 deletions pycpidr/idea_density_rater_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from pycpidr.utils.constants import *
from pycpidr.utils.word_search_utils import (
is_beginning_of_sentence,
beginning_of_sentence,
is_repetition,
search_backwards,
)
Expand Down Expand Up @@ -203,7 +203,7 @@ def adjust_word_order(word_list: List[WordListItem], i: int, speech_mode: bool)
# Note: In some cases this may move a word too far right,
# but the effect on proposition counting is benign.
if word.lowercase_token in AUXILIARY_VERBS:
sentence_start = is_beginning_of_sentence(word_list, i)
sentence_start = beginning_of_sentence(word_list, i)
if sentence_start == i or word_list[sentence_start].tag in INTERROGATIVES:
# find out where to move to
target_position = i + 1
Expand Down Expand Up @@ -627,7 +627,7 @@ def handle_fillers(word_list: List[WordListItem], i: int, speech_mode: bool) ->
# 610
# A sentence consisting entirely of probable filler words is propositionless
if speech_mode and word.tag == SENTENCE_END:
bos = is_beginning_of_sentence(word_list, i)
bos = beginning_of_sentence(word_list, i)
k = 0
for j in range(bos, i):
if word_list[j].tag != "UH" and word_list[j].lowercase_token not in FILLER:
Expand Down
9 changes: 6 additions & 3 deletions tests/test_idea_density_rater.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
from pycpidr.idea_density_rater import count_words_and_propositions, rate_text
from pycpidr.tagger import tag_text
from pycpidr.word_item import WordListItem, WordList
from pycpidr.utils.word_search_utils import beginning_of_sentence
from pycpidr.utils.constants import SENTENCE_END

try:
nlp = spacy.load("en_core_web_sm")
Expand Down Expand Up @@ -220,11 +222,12 @@ def test_turner_1987_passage_2():

word_count, proposition_count, idea_density, word_list = rate_text(text, nlp)

assert word_count == 363
# Note: The original CPIDR 3.2 finds 366 words
assert word_count == 362

# Note: The original CPIDR 3.2 finds 191 propositions
assert proposition_count == 189
assert idea_density == pytest.approx(0.520, abs=1e-3)
assert proposition_count == 188
assert idea_density == pytest.approx(0.519, abs=1e-3)


def test_turner_1987_passage_3():
Expand Down
62 changes: 62 additions & 0 deletions tests/utils/test_word_search_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import pytest
from pycpidr.utils.word_search_utils import beginning_of_sentence
from pycpidr.word_item import WordListItem
from pycpidr.utils.constants import SENTENCE_END


def create_word_list(tokens, tags):
return [WordListItem(token=t, tag=tag) for t, tag in zip(tokens, tags)]


def test_beginning_of_sentence_middle():
words = create_word_list(
["This", "is", "a", "sentence", ".", "Another", "one", "."],
[
"PLC_TAG",
"PLC_TAG",
"PLC_TAG",
"PLC_TAG",
SENTENCE_END,
"PLC_TAG",
"PLC_TAG",
SENTENCE_END,
],
)
assert beginning_of_sentence(words, 6) == 5


def test_beginning_of_sentence_start():
words = create_word_list(
["This", "is", "a", "sentence", "."],
["PLC_TAG", "PLC_TAG", "PLC_TAG", "PLC_TAG", SENTENCE_END],
)
assert beginning_of_sentence(words, 2) == 0


def test_beginning_of_sentence_end():
words = create_word_list(
["This", "is", "a", "sentence", "."],
["PLC_TAG", "PLC_TAG", "PLC_TAG", "PLC_TAG", SENTENCE_END],
)
assert beginning_of_sentence(words, 4) == 0


def test_beginning_of_sentence_single_word():
words = create_word_list(["Word"], [""])
assert beginning_of_sentence(words, 0) == 0


def test_beginning_of_sentence_multiple_sentences():
words = create_word_list(
["First", ".", "Second", ".", "Third", "sentence", "."],
[
"PLC_TAG",
SENTENCE_END,
"PLC_TAG",
SENTENCE_END,
"PLC_TAG",
"PLC_TAG",
SENTENCE_END,
],
)
assert beginning_of_sentence(words, 5) == 4

0 comments on commit f16ae86

Please sign in to comment.