-
Notifications
You must be signed in to change notification settings - Fork 0
/
books.js
60 lines (56 loc) · 1.71 KB
/
books.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
const fs = require('fs');
const path = require('path');
const puppeteer = require('puppeteer');
const url = 'http://books.toscrape.com/';
const file = path.join(__dirname, 'data/books.json');
(async () => {
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
await page.goto(url);
const json = [];
let next = await page.$('.pager .next a');
while (next) {
next = await page.$('.pager .next a');
let articles = await page.$$('.product_pod h3 a');
for (let index = 0; index < articles.length; index++) {
await Promise.all([
page.waitForNavigation(),
articles[index].click(),
]);
const data = await page.evaluate(getData);
json.push(data);
await page.goBack();
articles = await page.$$('.product_pod h3 a');
}
if (next) {
await Promise.all([
page.waitForNavigation(),
page.click('.pager .next a'),
]);
}
}
fs.writeFileSync(file, JSON.stringify(json), 'utf8');
await browser.close();
})();
/**
* @return {object}
*/
function getData() {
const product = document.querySelector('.product_page');
return {
title: product.querySelector('h1').textContent,
price: product.querySelector('.price_color').textContent,
description:
document.querySelector('#product_description ~ p')
? document.querySelector('#product_description ~ p').textContent
: '',
category:
document.querySelector('.breadcrumb li:nth-child(3) a')
? document.querySelector('.breadcrumb li:nth-child(3) a').textContent
: '',
cover:
location.origin +
document.querySelector('#product_gallery img')
.getAttribute('src').slice(5),
};
}