Skip to content

Commit

Permalink
First commit
Browse files Browse the repository at this point in the history
  • Loading branch information
trikko committed Mar 23, 2016
1 parent a2bea0b commit f65d78b
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 0 deletions.
6 changes: 6 additions & 0 deletions dub.sdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
name "libstemmerd"
description "libstemmer biding for d programming language"
copyright "Copyright © 2016, 2night.it"
authors "Andrea Fontana"
libs "stemmer"
license "MIT License"
27 changes: 27 additions & 0 deletions source/app.d
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import std.stdio;
import stemmer;

void main()
{
// Print a list of available languages
writeln("Available languages: ");
foreach(stemmer; Stemmer.list)
writeln(" - ", stemmer);

// Create an english stemmer
Stemmer s = Stemmer("english");

// Try it!
assert(s.stem("testing") == "test");
assert(s.stem("test") == "test");
assert(s.stem("tested") == "test");

assert(s.stem("consist") == "consist");
assert(s.stem("consisted") == "consist");
assert(s.stem("consistency") == "consist");
assert(s.stem("consistent") == "consist");
assert(s.stem("consistently") == "consist");
assert(s.stem("consisting") == "consist");
assert(s.stem("consists") == "consist");

}
45 changes: 45 additions & 0 deletions source/stemmer.d
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
private import std.string : toStringz;
private import std.conv : to;

struct Stemmer
{
/// Return a list of valid algorithms you can use.
static list()
{
string[] results;
auto stemmers = sb_stemmer_list();

size_t idx = 0;
while(stemmers[idx] != null)
{
results ~= stemmers[idx].to!string;
++idx;
}

return results;
}

this(in string algorithm) { _stemmer = sb_stemmer_new(algorithm.toStringz, null); }
~this() { if (_stemmer) sb_stemmer_delete(_stemmer); }

/// Stem a word using selected algorithm
string stem(in string word)
{
return to!string(cast(char*)(sb_stemmer_stem(_stemmer, cast(sb_symbol*)word.toStringz, to!int(word.length))));
}

private sb_stemmer* _stemmer = null;
}

// We just need a couple of imports.
private
{
extern (C):
alias ubyte sb_symbol;
struct sb_stemmer;
const(char*)* sb_stemmer_list ();
sb_stemmer* sb_stemmer_new (const(char)* algorithm, const(char)* charenc);
void sb_stemmer_delete (sb_stemmer* stemmer);
const(sb_symbol)* sb_stemmer_stem (sb_stemmer* stemmer, const(sb_symbol)* word, int size);
int sb_stemmer_length (sb_stemmer* stemmer);
}

0 comments on commit f65d78b

Please sign in to comment.