From d6d9243b4aac605f9aee58fade354382e774a6bf Mon Sep 17 00:00:00 2001 From: Olly Betts Date: Fri, 3 Sep 2021 15:06:40 +1200 Subject: [PATCH] swedish: Remove -et or -en when stem ends in et Removing -et and -en in general is problematic, as many words end in -et or -en where this isn't a suffix, but very few end in -etet or -eten where the last two letters aren't a suffix (and those that do don't seem to suffer if we make the stem not have the -et). Fixes #47 --- algorithms/swedish.sbl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/algorithms/swedish.sbl b/algorithms/swedish.sbl index 2cbb8859..9225ba82 100644 --- a/algorithms/swedish.sbl +++ b/algorithms/swedish.sbl @@ -1,5 +1,6 @@ routines ( mark_regions + R1 main_suffix consonant_pair other_suffix @@ -33,6 +34,8 @@ define mark_regions as ( backwardmode ( + define R1 as $p1 <= cursor + define main_suffix as ( setlimit tomark p1 for ([substring]) among( @@ -66,6 +69,7 @@ define stem as ( do mark_regions backwards ( do main_suffix + do ( ['et' or 'en' R1 ] 'et' delete ) do consonant_pair do other_suffix )