forked from suresh-srinivas/everyday-gita-text-automation
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgita-text-automation.js
104 lines (86 loc) · 3.9 KB
/
gita-text-automation.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
const baseUrl = 'https://shlokam.org/bhagavad-gita/';
const chapterData = [
{ chapter: 1, name: 'Arjuna Vishada Yoga', verses: 47 },
{ chapter: 2, name: 'Sankhya Yoga', verses: 72 },
{ chapter: 3, name: 'Karma Yoga', verses: 43 },
{ chapter: 4, name: 'Gyana-Karma-Sanyasa Yoga', verses: 42 },
{ chapter: 5, name: 'Karma-Sanyasa Yoga', verses: 29 },
{ chapter: 6, name: 'Atma-Samyama Yoga', verses: 47 },
{ chapter: 7, name: 'gnana-Vignana Yoga', verses: 30 },
{ chapter: 8, name: 'Aksara-ParaBrahma Yoga', verses: 28 },
{ chapter: 9, name: 'Raja-Vidya-Raja-Guhya Yoga', verses: 34 },
{ chapter: 10, name: 'Vibhuti Yoga', verses: 42 },
{ chapter: 11, name: 'Vishwarupa-Darsana Yoga', verses: 55 },
{ chapter: 12, name: 'Bhakti Yoga', verses: 20 },
{ chapter: 13, name: 'Ksetra-Ksetrajna-Vibhaga Yoga', verses: 34 },
{ chapter: 14, name: 'Gunatraya-Vibhaga Yoga', verses: 27 },
{ chapter: 15, name: 'Purushottama Yoga', verses: 20 },
{ chapter: 16, name: 'Daivasura-Sampad-Vibhaga Yoga', verses: 24 },
{ chapter: 17, name: 'Shraddhatraya-Vibhaga Yoga', verses: 28 },
{ chapter: 18, name: 'Moksha-Sanyasa Yoga', verses: 78 }
];
let currentChapterIndex = chapterData.findIndex(ch => ch.chapter === 12); // Start with Chapter 12
let chapterVerses = [];
let verseIndex = 1; // starting from verse 1
const batchSize = 10;
function extractVerseContent(document) {
let paragraphs = document.querySelectorAll('p');
let extractedText = '';
for (let i = 0; i < 4 && i < paragraphs.length; i++) {
extractedText += paragraphs[i].innerText + '\n\n';
}
return extractedText.trim() || null;
}
function downloadToFile(content, filename, contentType) {
const a = document.createElement('a');
const file = new Blob([content], { type: contentType });
a.href = URL.createObjectURL(file);
a.download = filename;
a.click();
URL.revokeObjectURL(a.href);
}
function fetchSingleVerse(chapter, verseNum) {
return fetch(baseUrl + chapter + '-' + verseNum)
.then(response => response.text())
.then(text => {
const parser = new DOMParser();
const doc = parser.parseFromString(text, 'text/html');
return extractVerseContent(doc);
});
}
function fetchVerse(verseNum) {
const currentChapter = chapterData[currentChapterIndex];
if (verseNum > currentChapter.verses) {
console.log(`Extraction complete for Chapter ${currentChapter.chapter}.`);
downloadToFile(chapterVerses.join('\n\n'), `Chapter_${currentChapter.chapter}_Verses.txt`, 'text/plain');
// Reset chapterVerses and move to next chapter
chapterVerses = [];
currentChapterIndex++;
if (currentChapterIndex < chapterData.length) {
setTimeout(() => fetchVerse(1), 3000); // Wait 3 seconds before starting next chapter
}
return;
}
const fetchPromises = [];
// Prepare a batch of fetch promises, but not exceeding the lastVerse of current chapter
for (let i = 0; i < batchSize && (verseNum + i) <= currentChapter.verses; i++) {
fetchPromises.push(fetchSingleVerse(currentChapter.chapter, verseNum + i));
}
Promise.all(fetchPromises)
.then(verseContents => {
verseContents.forEach((content, index) => {
if (content) {
chapterVerses.push(content);
console.log(`Chapter ${currentChapter.chapter}, Verse ${verseNum + index} extracted.`);
}
});
// Continue fetching the next batch after a slight delay.
setTimeout(() => {
fetchVerse(verseNum + batchSize);
}, 2000); // 2-second delay. Adjust as needed.
})
.catch(error => {
console.error(`Failed to fetch verses for Chapter ${currentChapter.chapter} starting from ${verseNum}.`, error);
});
}
fetchVerse(verseIndex);