-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathnode-crf.js
66 lines (45 loc) · 1.51 KB
/
node-crf.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
var _nodecrf = require('./build/Release/nodecrf');
//Constructor
//All the properties besides the model are currently useless (I don't retrieve the advanced statistics)
var CRF = function(model, nbest, deepinfo) {
this._crf = null;
if (model === undefined) {
throw new Error('A path to the model must be specified');
}
this.model = model;
this.nbest = nbest || 2;
this.deepinfo = deepinfo || true;
this.isInitialized = false;
//Separator used to separate the actual word from the POS tag
this.separator = '_';
};
//Initialize the classficator
//It creates the actual C++ object
//Any changes performed after the invokation of this event are useless
CRF.prototype.init = function() {
var command = '-m ' + this.model;
if (this.nbest > 0) {
command += ' -n' + this.nbest;
}
if (this.deepinfo) {
command += ' -v 2';
}
this._crf = new _nodecrf.CRF(command);
this.isInitialized = true;
};
//It peform the classification of the text
//The text must be previously POS tagged and in the form "WordSEPARATORTag WordSEPARATORTag"
//It return the category according to the trained model
CRF.prototype.classify = function(text) {
if (!this.isInitialized) {
throw new Error('Call the init() methods before classfying');
}
//TODO: regexp matching to verify the format of the text?
var array = text.split(' ');
for (var i = 0; i < array.length; i++) {
array[i] = array[i].replace(this.separator, ' ');
}
var classification = this._crf.classify(array);
return classification;
};
module.exports.CRF = CRF;