Skip to content

Commit

Permalink
Add turkish normalizer definition and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
tkhshtsh0917 committed Aug 18, 2024
1 parent 6debc92 commit a8014ed
Show file tree
Hide file tree
Showing 2 changed files with 427 additions and 0 deletions.
6 changes: 6 additions & 0 deletions charabia/src/normalizer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ use self::nonspacing_mark::NonspacingMarkNormalizer;
use self::quote::QuoteNormalizer;
#[cfg(feature = "swedish-recomposition")]
use self::swedish_recomposition::SwedishRecompositionNormalizer;
#[cfg(feature = "turkish")]
pub use self::turkish::TurkishNormalizer;
#[cfg(feature = "vietnamese")]
pub use self::vietnamese::VietnameseNormalizer;
use crate::segmenter::SegmentedTokenIter;
Expand All @@ -39,6 +41,8 @@ mod nonspacing_mark;
mod quote;
#[cfg(feature = "swedish-recomposition")]
mod swedish_recomposition;
#[cfg(feature = "turkish")]
mod turkish;
#[cfg(feature = "vietnamese")]
mod vietnamese;

Expand Down Expand Up @@ -71,6 +75,8 @@ pub static LOSSY_NORMALIZERS: Lazy<Vec<Box<dyn Normalizer>>> = Lazy::new(|| {
Box::new(NonspacingMarkNormalizer),
#[cfg(feature = "vietnamese")]
Box::new(VietnameseNormalizer),
#[cfg(feature = "turkish")]
Box::new(TurkishNormalizer),
]
});

Expand Down
Loading

0 comments on commit a8014ed

Please sign in to comment.