Skip to content

Commit

Permalink
hunspell: disallow hidden title-case entries from compound middle/end (
Browse files Browse the repository at this point in the history
…#12220)

if we only have custom-case uART and capitalized UART, we shouldn't accept StandUart as a compound (although we keep hidden "Uart" dictionary entries for internal purposes)
  • Loading branch information
donnerpeter authored Apr 3, 2023
1 parent 56e6591 commit 56aef72
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ && checkCompoundRules(wordChars, 0, length, new ArrayList<>())) {
Root<CharsRef> findStem(
char[] wordChars, int offset, int length, WordCase originalCase, WordContext context) {
checkCanceled.run();
boolean checkCase = context != COMPOUND_MIDDLE && context != COMPOUND_END;
WordCase toCheck = context != COMPOUND_MIDDLE && context != COMPOUND_END ? originalCase : null;
@SuppressWarnings({"rawtypes", "unchecked"})
Root<CharsRef>[] result = new Root[1];
stemmer.doStem(
Expand All @@ -173,7 +173,7 @@ Root<CharsRef> findStem(
length,
context,
(stem, formID, morphDataId, outerPrefix, innerPrefix, outerSuffix, innerSuffix) -> {
if (checkCase && !acceptCase(originalCase, formID, stem)) {
if (!acceptCase(toCheck, formID, stem)) {
return dictionary.hasFlag(formID, Dictionary.HIDDEN_FLAG);
}
if (acceptsStem(formID)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,10 @@ public void testGermanCompounding() throws Exception {
doTest("germancompounding");
}

public void testGermanManualCase() throws Exception {
doTest("germanManualCase");
}

public void testApplyOconvToSuggestions() throws Exception {
doTest("oconv");
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# no CHECKCOMPOUNDCASE

# compound flags

COMPOUNDBEGIN U
COMPOUNDMIDDLE V
COMPOUNDEND W

ONLYINCOMPOUND X
COMPOUNDPERMITFLAG P

COMPOUNDMIN 1
WORDCHARS -

# dash prefix for compounds with dash (Arbeits-Computer)

PFX - Y 1
PFX - 0 -/P .

# decapitalizing prefix

PFX D Y 29
PFX D A a/PX A
PFX D � �/PX �
PFX D B b/PX B
PFX D C c/PX C
PFX D D d/PX D
PFX D E e/PX E
PFX D F f/PX F
PFX D G g/PX G
PFX D H h/PX H
PFX D I i/PX I
PFX D J j/PX J
PFX D K k/PX K
PFX D L l/PX L
PFX D M m/PX M
PFX D N n/PX N
PFX D O o/PX O
PFX D � �/PX �
PFX D P p/PX P
PFX D Q q/PX Q
PFX D R r/PX R
PFX D S s/PX S
PFX D T t/PX T
PFX D U u/PX U
PFX D � �/PX �
PFX D V v/PX V
PFX D W w/PX W
PFX D X x/PX X
PFX D Y y/PX Y
PFX D Z z/PX Z
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
4
uART/XW-
bein/XW-
Stand/UX
UART/-
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
UART
Standbein
Stand-uART
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
StandUart
uART
Uart

0 comments on commit 56aef72

Please sign in to comment.