-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPyScraper.py
161 lines (133 loc) · 5.93 KB
/
PyScraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
from contextlib import closing
from selenium.webdriver import Chrome
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.keys import Keys
import time
with open("symbols.txt") as f:
symbols = f.readlines()
symbols = [x.strip().upper() for x in symbols]
class FutureClass:
def __init__(self,symbol):
self.symbol = symbol
self.url = None
self.productName = None
self.clearing = None
self.globex = None
self.floor = None
self.clearPort = None
self.exchange = None
self.productGroup = None
self.clearedAs = None
self.volume = None
self.openInterest = None
self.contractUnit = None
self.priceQuotation = None
self.tradingHours = None
self.tradingHoursGlobex = None
self.tradingHoursClearPort = None
self.minimumPriceFluctuation = None
self.productCode = None
self.listedContracts = None
self.settlementMethod = None
self.floatingPrice = None
self.terminationOfTrading = None
self.settlementProcedures = None
self.positionLimits = None
self.exchangeRulebook = None
self.blockMinimum = None
self.priceLimitOrCircut = None
self.vendorCodes = None
self.errorFlag = None
ls = []
with closing(Chrome()) as browser:
for i in range(len(symbols)):
tmp = FutureClass(symbols[i])
browser.get('http://www.cmegroup.com/search/?q='+symbols[i])
elements = browser.find_elements_by_xpath("//tbody//tr[td/text()='"+symbols[i]+"'and td/text()='Futures']//td")
if not elements:
print(tmp.symbol+": No such symbol!")
tmp.errorFlag = 1
ls.append(tmp)
continue
tmp.productName = elements[0].text
tmp.clearing = elements[1].text
tmp.globex = elements[2].text
tmp.floor = elements[3].text
tmp.clearPort = elements[4].text
tmp.exchange = elements[5].text
tmp.productGroup = elements[6].text
tmp.clearedAs = elements[7].text
tmp.volume = elements[8].text
tmp.openInterest = elements[9].text
# elements[0].click()#The first qualified element
linkElement = browser.find_elements_by_xpath("//tbody//tr[td/text()='"+symbols[i]+"' and td/text()='Futures']//td/a")
linkElement[0].click()
tmp.url = browser.current_url
elements2 = browser.find_elements_by_xpath("//tbody/tr/td[@class='prodSpecAtribute']/../..//tr//td[2]")
if not elements2:
print(tmp.symbol+": Page not jump!")
tmp.errorFlag = 2
ls.append(tmp)
continue
elements2 = browser.find_elements_by_xpath("//tbody/tr/td[text()='Contract Unit']/../td")
if elements2:
tmp.contractUnit = elements2[1].text
elements2 = browser.find_elements_by_xpath("//tbody/tr/td[text()='Price Quotation']/../td")
if elements2:
tmp.priceQuotation = elements2[1].text
elements2 = browser.find_elements_by_xpath("//tbody/tr/td[text()='Trading Hours']/../td")
if elements2:
if len(elements2) == 2:
tmp.tradingHours = elements2[1].text
else:
tmp.tradingHoursGlobex = elements2[2].text
tmp.tradingHoursClearPort = browser.find_elements_by_xpath("//tbody/tr/td[text()='CME ClearPort:']/../td")[1].text
elements2 = browser.find_elements_by_xpath("//tbody/tr/td[text()='Minimum Price Fluctuation']/../td")
if elements2:
tmp.minimumPriceFluctuation = elements2[1].text
elements2 = browser.find_elements_by_xpath("//tbody/tr/td[text()='Product Code']/../td")
if elements2:
tmp.productCode = elements2[1].text
elements2 = browser.find_elements_by_xpath("//tbody/tr/td[text()='Listed Contracts']/../td")
if elements2:
tmp.listedContracts = elements2[1].text
elements2 = browser.find_elements_by_xpath("//tbody/tr/td[text()='Settlement Method']/../td")
if elements2:
tmp.settlementMethod = elements2[1].text
elements2 = browser.find_elements_by_xpath("//tbody/tr/td[text()='Floating Price']/../td")
if elements2:
tmp.floatingPrice = elements2[1].text
elements2 = browser.find_elements_by_xpath("//tbody/tr/td[text()='Termination Of Trading']/../td")
if elements2:
tmp.terminationOfTrading = elements2[1].text
elements2 = browser.find_elements_by_xpath("//tbody/tr/td[text()='Settlement Procedures']/../td")
if elements2:
tmp.settlementProcedures = elements2[1].text
elements2 = browser.find_elements_by_xpath("//tbody/tr/td[text()='Position Limits']/../td")
if elements2:
tmp.positionLimits = elements2[1].text
elements2 = browser.find_elements_by_xpath("//tbody/tr/td[text()='Exchange Rulebook']/../td")
if elements2:
tmp.exchangeRulebook = elements2[1].text
elements2 = browser.find_elements_by_xpath("//tbody/tr/td[text()='Block Minimum']/../td")
if elements2:
tmp.blockMinimum = elements2[1].text
elements2 = browser.find_elements_by_xpath("//tbody/tr/td[text()='Price Limit Or Circuit']/../td")
if elements2:
tmp.priceLimitOrCircut = elements2[1].text
elements2 = browser.find_elements_by_xpath("//tbody/tr/td[text()='Vendor Codes']/../td")
if elements2:
tmp.vendorCodes = elements2[1].text
ls.append(tmp)
# print(str(i)+": "+symbols[i])
import pickle
with open("results.txt",'wb') as f:
pickle.dump(ls,f)
with open("results.txt","rb") as f:
ls2 = pickle.load(f)
import pandas as pd
df = pd.DataFrame(columns=list(ls2[0].__dict__.keys()))
for i in range(len(ls2)):
df = df.append(pd.DataFrame([list(ls2[i].__dict__.values())],columns=list(ls2[0].__dict__.keys())))
print(i)
df.to_csv("resuls.csv")