From f65d78befe9d08775ee4cc7ebb9cc3f3955f9c9b Mon Sep 17 00:00:00 2001 From: Andrea Fontana Date: Wed, 23 Mar 2016 16:28:56 +0100 Subject: [PATCH] First commit --- dub.sdl | 6 ++++++ source/app.d | 27 +++++++++++++++++++++++++++ source/stemmer.d | 45 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 78 insertions(+) create mode 100644 dub.sdl create mode 100644 source/app.d create mode 100644 source/stemmer.d diff --git a/dub.sdl b/dub.sdl new file mode 100644 index 0000000..6cca020 --- /dev/null +++ b/dub.sdl @@ -0,0 +1,6 @@ +name "libstemmerd" +description "libstemmer biding for d programming language" +copyright "Copyright © 2016, 2night.it" +authors "Andrea Fontana" +libs "stemmer" +license "MIT License" diff --git a/source/app.d b/source/app.d new file mode 100644 index 0000000..2955cb2 --- /dev/null +++ b/source/app.d @@ -0,0 +1,27 @@ +import std.stdio; +import stemmer; + +void main() +{ + // Print a list of available languages + writeln("Available languages: "); + foreach(stemmer; Stemmer.list) + writeln(" - ", stemmer); + + // Create an english stemmer + Stemmer s = Stemmer("english"); + + // Try it! + assert(s.stem("testing") == "test"); + assert(s.stem("test") == "test"); + assert(s.stem("tested") == "test"); + + assert(s.stem("consist") == "consist"); + assert(s.stem("consisted") == "consist"); + assert(s.stem("consistency") == "consist"); + assert(s.stem("consistent") == "consist"); + assert(s.stem("consistently") == "consist"); + assert(s.stem("consisting") == "consist"); + assert(s.stem("consists") == "consist"); + +} diff --git a/source/stemmer.d b/source/stemmer.d new file mode 100644 index 0000000..398df66 --- /dev/null +++ b/source/stemmer.d @@ -0,0 +1,45 @@ +private import std.string : toStringz; +private import std.conv : to; + +struct Stemmer +{ + /// Return a list of valid algorithms you can use. + static list() + { + string[] results; + auto stemmers = sb_stemmer_list(); + + size_t idx = 0; + while(stemmers[idx] != null) + { + results ~= stemmers[idx].to!string; + ++idx; + } + + return results; + } + + this(in string algorithm) { _stemmer = sb_stemmer_new(algorithm.toStringz, null); } + ~this() { if (_stemmer) sb_stemmer_delete(_stemmer); } + + /// Stem a word using selected algorithm + string stem(in string word) + { + return to!string(cast(char*)(sb_stemmer_stem(_stemmer, cast(sb_symbol*)word.toStringz, to!int(word.length)))); + } + + private sb_stemmer* _stemmer = null; +} + +// We just need a couple of imports. +private +{ + extern (C): + alias ubyte sb_symbol; + struct sb_stemmer; + const(char*)* sb_stemmer_list (); + sb_stemmer* sb_stemmer_new (const(char)* algorithm, const(char)* charenc); + void sb_stemmer_delete (sb_stemmer* stemmer); + const(sb_symbol)* sb_stemmer_stem (sb_stemmer* stemmer, const(sb_symbol)* word, int size); + int sb_stemmer_length (sb_stemmer* stemmer); +}