-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmainScraper.js
103 lines (99 loc) · 2.76 KB
/
mainScraper.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import got from "got";
import { load } from "cheerio";
import { writeFile } from "fs";
import extractMcq from "./subj.js";
const extractLinks = async (url) => {
try {
const response = await got(url);
const html = response.body;
const links = [];
let contents = [];
let count = 1;
const $ = load(html);
$(".grid-33 ul").each(function () {
let main = $(this).prev().text();
let content = $(this).text().trim().split("\n");
let link = [];
$(this)
.children()
.children()
.each(function () {
let extras = [
"questions-answers",
"https://www.sanfoundry.com/technical-interview-questions/",
"https://www.sanfoundry.com/san-storage-mcqs-freshers-experienced/",
];
extras.join(",");
let l = $(this).attr("href");
if (l && extras.some((str) => l.includes(str))) {
link.push(l);
}
});
let final_content = [];
for (let i = 0; i < content.length; i++) {
if (content[i].length > 0 && content[i].includes("MCQs")) {
final_content.push({ title: content[i], link: link.shift() });
}
}
if (final_content.length > 1) {
contents.push({
id: count++,
Title: main,
content: final_content,
});
}
});
$(".grid-33 a").each(function () {
let link = $(this).attr("href");
let title = $(this).text();
let done = false;
let Main;
for (let i = 0; i < contents.length; i++) {
if (done) break;
if (contents[i].content.includes(title)) {
Main = contents[i].Title;
done = true;
}
}
// console.log(found);
if (title && title.includes("1000") && !link.includes("problems")) {
links.push({
Category: Main,
Title: title,
Link: link,
});
}
// links.push(link);
});
return [links, contents];
} catch (error) {
console.log(error);
}
};
extractLinks("https://www.sanfoundry.com/").then(async ([links, contents]) => {
for (let i = 0; i < contents.length; i++) {
for (let j = 0; j < contents[i].content.length; j++)
await extractMcq(
contents[i].content[j].link,
contents[i].content[j].title
);
}
writeFile(
"./saved/links.json",
JSON.stringify(links, null, 4),
function (err) {
if (err) throw err;
console.log("Saved JSON!");
}
);
writeFile(
"./saved/contents.json",
JSON.stringify(contents, null, 4),
function (err) {
if (err) throw err;
console.log("Saved JSON!");
}
);
});
// let links = extractLinks("https://www.sanfoundry.com/");
// console.log(extractLinks("https://www.sanfoundry.com/"));