-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathindex.js
85 lines (73 loc) · 2 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
/**
* this should start indexing mran
*/
var Crawler = require('crawler');
var fs = require('fs');
var chunkSize = 3000;
var total = 0;
var getAllPackages = require('./lib/getAllPackages');
var outFile = 'cran_files.json';
var results = [];
console.log('Downloading packages list...');
getAllPackages().then(crawlDependencies);
function crawlDependencies(index) {
console.log('Loaded ' + index.length + ' packages to scan');
var c = new Crawler({
maxConnections: 10,
// This will be called for each crawled page
callback: indexPackage,
onDrain: saveAndExit
});
queueChunk();
function indexPackage(err, res) {
total += 1;
if (err) {
console.log('ERROR: ' + err);
return;
}
console.log('Downloading ' + res.uri);
if (total % chunkSize === 0) {
queueChunk();
}
if (res.body === 'Not Found') {
console.log('Not found: ' + res.uri);
return;
}
try {
var body = JSON.parse(res.body);
results.push(toObject(body.data));
} catch (e) {
console.log('IGNORING: Failed to parse response body: ' + res.body);
console.log(e);
}
}
function queueChunk() {
if (!index.length) return;
if (index.length < chunkSize) {
console.log('Queueing last ' + index.length + ' packages');
c.queue(index.splice(0, index.length));
} else {
console.log('Queueing next ' + chunkSize + ' packages.');
c.queue(index.splice(0, chunkSize));
console.log('Remaining: ' + index.length);
}
}
function saveAndExit() {
fs.writeFileSync(outFile, JSON.stringify(results), 'utf8');
console.log('Done');
console.log('Indexed ' + results.length + ' packages. Saved into ' + outFile);
console.log('Run: node layout.js');
process.exit(0);
}
}
function toObject(x) {
var result = {};
result.name = x.Package[0];
var imports = (x.Imports[0] || '').replace(/ /g, '');
if (imports) {
result.imports = imports.split(',');
} else {
result.imports = [];
}
return result;
}