-
Notifications
You must be signed in to change notification settings - Fork 0
/
paperDownload(web of sci).py
68 lines (54 loc) · 2.4 KB
/
paperDownload(web of sci).py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#excel operate module
import xlrd,xlwt
from xlutils.copy import copy
#browser auto operate module
import time
import selenium
from selenium import webdriver
from selenium.webdriver.support.ui import Select
#warning:different environment need difference setup
browser = webdriver.Chrome()
#pleace enter your search paper web,now only support springer
targetWeb = "http://apps.webofknowledge.com/summary.do?locale=zh_CN&errorKey=&viewType=summary&product=WOS&search_mode=GeneralSearch&qid=1&SID=6EMbPCCNbKabQj64xiQ"
#openTheXls
paperList = xlwt.Workbook(encoding = 'utf-8')
paperInfo = paperList.add_sheet("paperList")
#browser.implicitly_wait(25) #wait for browser open the web
browser.get(targetWeb)
#operate funtion
def selectXpathAndClick(Xpath):
element = browser.find_element_by_xpath(Xpath)
element.click()
return element
def selectCsspathAndCLick(Csspath):
element = browser.find_element_by_css_selector(Csspath)
element.click()
return element
def selectNameAndCLick(Name):
element = browser.find_element("title",Name)
element.click()
return element
def selectXpathAndReturnElementName(Xpath):
element = browser.find_element_by_xpath(Xpath)
return element.text
def selectCsspathAndReturnElementName(Csspath):
element = browser.find_element_by_css_selector(Csspath)
return element.text
def selectCsspathAndReturnIncludeLink(Csspath):
element = browser.find_element_by_css_selector(Csspath)
return element.get_attribute("href")
#the program start
row = 0
for i in range (0,10):#get 10 page of the sreach site
for num in range (1,10):#tyr to open each paper information site,and get key information
#get the title
paperInfo.write(row,0,selectXpathAndReturnElementName("/html/body/div[1]/div[26]/div[2]/div/div/div/div[2]/div[3]/div[5]/form[2]/div/div[1]/div/span/div[2]/div[" + str(num) + "]/div[3]/div/div[1]/div/a/value"))
#get the
paperInfo.write(row,2,selectXpathAndReturnElementName("/html/body/div[1]/div[26]/div[2]/div/div/div/div[2]/div[3]/div[5]/form[2]/div/div[1]/div/span/div[2]/div[" + str(num) + "]/div[3]/div/div[3]/span[2]/a/span/value"))
paperList.save('paperList.xls')#save flie
row = row + 1
#click next page
selectXpathAndClick("/html/body/div[1]/div[26]/div[2]/div/div/div/div[2]/div[4]/div/div/div[3]/div/form/nav/table/tbody/tr/td[3]/a")
time.sleep(3)
browser.close()
browser.quit()