forked from ContentMine/journal-scrapers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
science_direct.json
44 lines (44 loc) · 1.13 KB
/
science_direct.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
{
"url": "sciencedirect.com",
"elements": {
"fulltext_pdf": {
"selector": "//a[@id='pdfLink']",
"attribute": "href",
"download": true
},
"fulltext_html": {
"selector": "//meta[@name='citation_fulltext_html_url']",
"attribute": "content",
"download": true
},
"title": {
"selector": "//h1[@class='svTitle']"
},
"author": {
"selector": "//a[@class='authorName']"
},
"date": {
"selector": "//p[@class='volIssue']",
"patternProperties": "[A-Za-z0-9 ]+, [A-Za-z0-9 ]+, ([A-Za-z0-9 ]+),.*"
},
"doi": {
"selector": "//dd[@class='doiLink']/dd[@class='doi']/span",
"patternProperties": "DOI: ([A-Za-z0-9./]+)"
},
"volume": {
"selector": "//p[@class='volIssue']/a",
"patternProperties": "Volume ([0-9]+)"
},
"issue": {
"selector": "//p[@class='volIssue']/a",
"patternProperties": "Issue ([0-9]+)"
},
"firstpage": {
"selector": "//p[@class='volIssue']",
"patternProperties": "Pages ([0-9]+)[^0-9]"
},
"abstract": {
"selector": "//div[@class='abstract svAbstract ']/p"
}
}
}