forked from zotero/translators
-
Notifications
You must be signed in to change notification settings - Fork 1
/
BusinessWeek.js
119 lines (116 loc) · 3.85 KB
/
BusinessWeek.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
{
"translatorID": "fb342bae-7727-483b-a871-c64c663c2fae",
"label": "BusinessWeek",
"creator": "Michael Berkowitz",
"target": "^https?://(www\\.|search\\.)?businessweek\\.com",
"minVersion": "3.0",
"maxVersion": "",
"priority": 100,
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsv",
"lastUpdated": "2012-10-21 22:38:34"
}
function detectWeb(doc, url) {
if (doc.evaluate('//body[contains(@class, "searchResults")]|//div[contains(@class, "search_result")]', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
return "multiple";
} else if (doc.evaluate('//h1[@id="article_headline"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
return "magazineArticle";
}
}
function doWeb(doc, url) {
var articles = new Array();
if (detectWeb(doc, url) == "multiple") {
if (ZU.xpathText(doc, '//h3[@class="story"]/a')){
var results = doc.evaluate('//h3[@class="story"]/a', doc, null, XPathResult.ANY_TYPE, null);
}
else if (ZU.xpathText(doc, '//div[@class="search_result"]/a[./h4]')){
var results = doc.evaluate('//div[@class="search_result"]/a[./h4]', doc, null, XPathResult.ANY_TYPE, null);
}
var result;
var items = new Object();
while (result = results.iterateNext()) {
items[result.href] = Zotero.Utilities.trimInternal(result.textContent);
}
Zotero.selectItems(items, function (items) {
if (!items) {
return true;
}
for (var i in items) {
articles.push(i);
}
Zotero.Utilities.processDocuments(articles, scrape, function () {
Zotero.done();
});
});
} else {
scrape(doc, url)
}
}
function scrape(doc, url){
var metaTags = new Object();
var metas = doc.evaluate('//meta', doc, null, XPathResult.ANY_TYPE, null);
var meta;
while (meta = metas.iterateNext()) {
metaTags[meta.name] = meta.content;
}
Zotero.debug(metaTags);
var item = new Zotero.Item("magazineArticle");
item.title = ZU.xpathText(doc, '//h1[@id="article_headline"]');
if (metaTags['description']) item.abstractNote = metaTags['description'];
if (metaTags['keywords']) item.tags = metaTags['keywords'].split(/\s*,\s*/);
//some articles don't have author tags - prevent this from failing
if (metaTags['author']) {
var authors = metaTags['author'].split(/\s*and\s*|\s*,\s*/);
for (i in authors){
item.creators.push(Zotero.Utilities.cleanAuthor(authors[i], "author"));
}
}
item.publicationTitle = "BusinessWeek: " + metaTags['channel'];
item.url = url;
item.date = metaTags['pub_date'].replace(/(\d{4})(\d{2})(\d{2})/, "$2/$3/$1").replace(/T.+/, "");
item.complete();
}/** BEGIN TEST CASES **/
var testCases = [
{
"type": "web",
"url": "http://www.businessweek.com/management/ten-things-only-bad-managers-say-09232011.html?campaign_id=rss_topStories",
"items": [
{
"itemType": "magazineArticle",
"creators": [
{
"firstName": "Liz",
"lastName": "Ryan",
"creatorType": "author"
}
],
"notes": [],
"tags": [
"leadership",
"Management",
"bad bosses",
"leaders",
"Liz Ryan",
"bad managers",
"lousy managers"
],
"seeAlso": [],
"attachments": [],
"title": "Ten Things Only Bad Managers Say",
"abstractNote": "We know the kinds of things good managers say: They say “Attaboy” or “Attagirl,” “Let me know if you run into any roadblocks, and I’ll try to get rid of them for you,” and “You’ve been killing yourself—why don’t you take off at noon on Friday?”",
"publicationTitle": "BusinessWeek: management",
"url": "http://www.businessweek.com/management/ten-things-only-bad-managers-say-09232011.html?campaign_id=rss_topStories",
"date": "2011-09-23",
"libraryCatalog": "BusinessWeek",
"accessDate": "CURRENT_TIMESTAMP"
}
]
},
{
"type": "web",
"url": "http://www.businessweek.com/search?q=linux&resultsPerPage=20&sort=date",
"items": "multiple"
}
]
/** END TEST CASES **/