-
Notifications
You must be signed in to change notification settings - Fork 1
/
server.js
181 lines (163 loc) · 5.42 KB
/
server.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
const request = require('request');
const cheerio = require('cheerio');
const express = require('express');
const app = express();
const parser = require('body-parser');
var port = process.env.PORT || 3000;
// allow access to static files in public directory
app.use(express.static('public'));
// body parsing middleware, provides req.body
app.use(parser.urlencoded({extended: true}));
// use templating engine
app.set('view engine', 'ejs');
// get random wikipedia page
var randomURL = "https://en.wikipedia.org/wiki/Special:Random#/random";
var chain = [];
var maxChainLength = 6;
var articleLinks = [];
function renderChain(url, isManual, res) {
// get contents of webpage
request(url, function(error, response, body) {
if (error) {
res.render('chain', {
articleTitle: null,
articleLinks: null,
error: "Error, please try again"
});
return 1;
} else {
// parse page
var $ = cheerio.load(body);
// get article heading and paragraph in context
var title = $("#firstHeading").html();
var link_title = $("#firstHeading").text();
var para = "";
for (var i = 0; i < articleLinks.length; i++) {
if (articleLinks[i].url == url) {
para = articleLinks[i].paragraph;
}
}
// get url after redirect
if (url == randomURL) {
url = "https://en.wikipedia.org" + response.socket._httpMessage.path;
}
chain.push({
title: title,
link_title: link_title,
paragraph: para,
url: url
});
if (chain.length < maxChainLength) {
// get links on page (only those going to other wikipedia articles)
articleLinks = [];
$(
'.mw-parser-output > p > a' +
'[href^="/wiki/"]' +
':not' +
'(:has(>img),' +
'[href^="/wiki/Category:"],' +
'[href^="/wiki/Portal:"],' +
'[href^="/wiki/Special:"],' +
'[href^="/wiki/Wikipedia:"],' +
'[href^="/wiki/Template:"],' +
'[href^="/wiki/Template_talk:"],' +
'[href^="/wiki/Talk:"],' +
'[href^="/wiki/Help:"],' +
'.internal)'
).each(function() {
var title = $(this).attr('title');
// check for existing link with same title
// https://stackoverflow.com/a/8217584
if (articleLinks.filter(link => link.title === title).length == 0) {
var href = $(this).attr("href");
var url = "https://en.wikipedia.org" + href;
var url_stub = href.substring(6); // the part after /wiki/
$(this).attr("href", url); // make href full url
$(this).removeClass(); // remove all classes
$(this).addClass("keyword");
var para = $(this).closest("p").clone(); // get copy of containing paragraph
para.find(".reference").remove(); // remove references
para.find(".noprint").remove(); // remove other marks
para.find(".Inline-Template").remove();
para.find(".Template-Fact").remove();
// unwrap all other anchors
var anchors = para.find("a[href!='" + url + "']");
for (var i = 0; i < anchors.length; i++) {
$(anchors[i]).replaceWith($(anchors[i]).html());
}
para = para.html();
// add to array
articleLinks.push({
title: title,
url: url,
url_stub: url_stub,
paragraph: para
});
}
});
// shuffle array with Fisher-Yates algorithm
// medium.com/@nitinpatel_20236/how-to-shuffle-correctly-shuffle-an-array-in-javascript-15ea3f84bfb
for (var i = articleLinks.length-1; i > 0; i--) {
var j = Math.floor(Math.random() * i);
var temp = articleLinks[i];
articleLinks[i] = articleLinks[j];
articleLinks[j] = temp;
}
}
if (chain.length < maxChainLength) {
if (isManual) {
// render page
res.render('chain', {
chain: chain,
articleLinks: articleLinks,
error: null
});
} else {
// automatically choose link
var chosenLink = articleLinks[Math.floor(Math.random() * articleLinks.length)];
renderChain(chosenLink.url, false, res);
}
} else {
// render without links
res.render('chain', {
chain: chain,
articleLinks: null,
error: null
});
}
}
});
}
// force https
// https://jaketrent.com/post/https-redirect-node-heroku/
if (process.env.NODE_ENV === "production") {
app.use((req, res, next) => {
if (req.header("x-forwarded-proto") !== "https") {
res.redirect(`https://${req.header('host')}${req.url}`);
} else {
next();
}
});
}
// load start page
app.get('/', function(req, res) {
res.render('index');
});
// generate new chain
app.get('/chain', function(req, res) {
chain = [];
if (req.query.pathType == 'manual') {
renderChain(randomURL, true, res);
} else {
renderChain(randomURL, false, res);
}
});
// respond to user action
app.post('/chain', function(req, res) {
var url = 'https://en.wikipedia.org/wiki/' + req.body.article_link;
renderChain(url, true, res);
});
// start server
app.listen(port, function() {
console.log('App listening on port ' + port + '!');
});