-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgen-pedref.js
44 lines (42 loc) · 1.05 KB
/
gen-pedref.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
const fs=require("fs")
const ptsjson=fs.readFileSync("./pts.json","utf8");//form suttacentral/sc-data/dictionaries
console.log('parsing json');
const ped=JSON.parse(ptsjson);
//available symbols: $ ^
//māsati māsana māsin duplicate with | , and jimha , only 4 |
const refs=[];
ped.forEach( entry=>{
const text=entry.text;
//text.replace(/<dfn>(.+?)<\/dfn>/g,(m,dfn)=>{
// console.log(dfn,entry.word)
//})
text.replace(/<span class='ref'>(.+?)<\/span>/g,(m,ref)=>{
refs.push(ref);
})
})
console.log('stat')
refs.sort();
const books={};
refs.forEach(ref=>{
const at=ref.indexOf(".");
if (at==-1){
console.log(ref);
return;
}
const bk=ref.substr(0,at)
if (!books[bk]) books[bk]=0;
books[bk]++;
})
let out=[],total=0;
for (bk in books){
out.push([bk,books[bk]])
total+=books[bk];
}
out.sort((a,b)=>b[1]-a[1]);
let acc=0;
for (var i=0;i<out.length;i++){
acc+=out[i][1];
out[i][2]=(acc/total).toFixed(2);
out[i]=out[i].join("\t")
}
fs.writeFileSync("ped-refs.txt",out.join("\n")+"\n"+refs.join("\n"),"utf8");