-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscraper.js
58 lines (48 loc) · 2.15 KB
/
scraper.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import puppeteer from "puppeteer";
import fs from "fs";
import { match } from "assert";
const url = "https://cinematek.be/programma/kalender";
const main = async () => {
const browser = await puppeteer.launch({
// headless: true,
// defaultViewport: null,
});
const page = await browser.newPage();
await page.goto(url);
console.log("launch");
const CineData = await page.evaluate(() => {
const CineCard = Array.from(document.querySelectorAll(".py-1"));
const regex_color = /⁄([^⁄]+)⁄/;
const regex_timeMovie = /⁄([^⁄]+)⁄([^⁄]+)⁄/;
const regex_subtitles = /⁄(?:[^⁄]+⁄){2}([^⁄]+)/;
// const match_color = querySelector(".small.film__details")?.innerText.match(regex_color);
const data = CineCard.map((film) => ({
title: film.querySelector(".film__title.film__titles").textContent.split("\u2044")[0],
time: film.querySelector(
"div.col-12.col-md-2.d-flex.justify-content-between.flex-md-column.flex-row.align-self-center h4.screening__time",
).innerText,
date: film.getAttribute("data-date"),
piano: film.getAttribute("data-piano"),
location: film.querySelector(
"div.col-12.col-md-2.d-flex.justify-content-between.flex-md-column.flex-row.align-self-center p span.badge.icon.screening__location",
).innerText,
// details: film.querySelector(".small").innerText,
details_color: film.querySelector(".small.film__details")?.innerText.match(regex_color)?.[1].trim() || undefined,
details_timeMovie:
film.querySelector(".small.film__details")?.innerText.match(regex_timeMovie)?.[2].trim() || undefined,
details_subtitles:
film.querySelector(".small.film__details")?.innerText.match(regex_subtitles)?.[1].trim() || undefined,
director: film.querySelector(".film__directors")?.innerText.split(",")[0] || undefined,
year: film.querySelector(".film__directors")?.innerText.slice(-4) || undefined,
cast: film.querySelector(".film__cast")?.innerText.replace(/^, /, "") || undefined,
}));
return data;
});
console.log(CineData);
await browser.close();
fs.writeFile("cine_data.json", JSON.stringify(CineData), (err) => {
if (err) throw err;
console.log("Success boys & girls");
});
};
main();