-
Notifications
You must be signed in to change notification settings - Fork 5
/
pronouncing.js
114 lines (103 loc) · 2.69 KB
/
pronouncing.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
'use strict';
var _ = require('underscore');
var fs = require('fs');
function parseCMU(str) {
var pronunciations = [];
_.each(str.split("\n"), function(line) {
if (/^;/.test(line)) { return; }
if (line.length == 0) { return; }
var parts = line.split(" ");
var word = parts[0];
var phones = parts[1];
word = word.replace(/\(\d\)$/, '').toLowerCase();
pronunciations.push([word, phones]);
});
return pronunciations;
}
var pronunciations = parseCMU(
fs.readFileSync(__dirname + "/cmudict-0.7b", {encoding: 'utf8'}));
function syllableCount(phones) {
return _.reduce(
_.map(phones, function(i) { return (i.match(/[012]/g)||[]).length; }),
function (a, b) { return a+b; })
}
function phonesForWord(find) {
var matches = [];
_.each(pronunciations, function(item) {
var word = item[0];
var phones = item[1];
if (word == find) {
matches.push(phones);
}
});
return matches;
}
function rhymingPart(phones) {
var idx = 0;
var phonesList = phones.split(" ");
for (var i = phonesList.length-1; i >= 0; i--) {
if (phonesList[i].slice(-1).match(/[12]$/)) {
idx = i;
break;
}
}
return phonesList.slice(idx).join(' ');
}
/**
* If you give this function a string, it turns it into a RegExp object with
* added word boundary anchors at beginning and end. You can also pass a
* RegExp object, but in that case you need to add the word boundary anchors
* yourself!
*/
function search(pattern) {
var matches = [];
if (pattern instanceof RegExp) {
re = pattern;
}
else {
var re = new RegExp("\\b" + pattern + "\\b");
}
_.each(pronunciations, function(item) {
var word = item[0];
var phones = item[1];
if (phones.match(re)) {
matches.push(word);
}
});
return matches;
}
function searchStresses(pattern) {
var matches = [];
var re = new RegExp("\\b" + pattern + "\\b");
_.each(pronunciations, function(item) {
var word = item[0];
var phones = item[1];
if (stresses(phones).match(re)) {
matches.push(word);
}
});
return matches;
}
function rhymes(word) {
var allRhymes = [];
var allPhones = phonesForWord(word);
_.each(allPhones, function(phonesStr) {
var part = rhymingPart(phonesStr);
var rhymes = search(part + "$");
allRhymes.push.apply(allRhymes, rhymes);
});
return _.filter(allRhymes, function(r) { return r != word; });
}
function stresses(s) {
return s.replace(/[^012]/g, "");
}
module.exports = {
parseCMU: parseCMU,
syllableCount: syllableCount,
phonesForWord: phonesForWord,
rhymingPart: rhymingPart,
search: search,
rhymes: rhymes,
stresses: stresses,
searchStresses: searchStresses
};