-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathexportCloseData_Py2.py
86 lines (77 loc) · 3.22 KB
/
exportCloseData_Py2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# -*- coding: UTF-8 -*-
# pylint: disable=I0011,C0111, C0103,C0326,C0301, C0304, W0401,W0614
from cassandra.cluster import Cluster
from cassandra.util import Date
import time
import datetime
import math
import numpy as np
#####################################################################################
## Generate 'close' training file within required periods in TXT separated by '\t' ##
#####################################################################################
def exportClose(fileName, startTime, endTime=datetime.datetime.today().date(), table = "factors_day", TYPE='D'):
if startTime > endTime:
return
cluster = Cluster(['202.120.40.111'])
session = cluster.connect('factors') #connect to the keyspace 'factors'
# get valid stocks in A share
rows = session.execute('''select stock from stock_info''')
stocks = []
for row in rows:
stocks.append(row[0])
print (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), "Total A stocks number: ", len(stocks))
#time list
rows = session.execute('''
select * from transaction_time
where type= %s and time >= %s and time <= %s ALLOW FILTERING;''', [TYPE,startTime, endTime])
dateList = []
SQL = "SELECT value FROM "+table+" WHERE stock = ? AND factor = 'close' and time >= '" + datetime.datetime.strftime( startTime,"%Y-%m-%d") +"' and time <= '" + datetime.datetime.strftime(endTime,"%Y-%m-%d")+"'"
for row in rows:
#dateList.append(row.time)
dateList.append(datetime.datetime.strptime(str(row.time), "%Y-%m-%d").strftime('%Y%m%d'))
# 拉取数据,一次拉一只股票
dataList = []
preparedStmt = session.prepare(SQL)
for stock in stocks:
rows = session.execute(preparedStmt,(stock,))
data = []
for row in rows:
data.append(row[0])
dataList.append(data)
cluster.shutdown()
colNum = len(stocks)
rowNum = len(dateList)
# 数据写入文件中
f = open(fileName, "w")
#f.write(str(colNum))
#f.write('\t')
#f.write(str(rowNum))
#f.write('\n')
stock_index = np.array(stocks)
stock_order = np.argsort(stock_index)
stocks = [stocks[j] for j in stock_order]
dataList = [dataList[k][:] for k in stock_order]
f.write('close')
for stock in stocks:
f.write('\t'+stock)
f.write('\n')
for i in range(rowNum):
f.write(str(dateList[i]))
for s in range(colNum):
try:
data = dataList[s][i]
if math.isnan(data):
data = 0 # default value
f.write('\t' + str(data))
except IndexError:
print ("End of reading and writing daily close data...")
f.close()
print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), 'Writing to ', fileName, ' complete!')
return
#print (timeList[i],stocks[s],dataList[s][0][i])
f.write('\n')
f.close()
print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), 'Writing to ',fileName,' complete!')
##############################################
################# EXAMPLE USAGE ##############
exportClose("E:\\close-5-31.txt",datetime.date(2017,5,1),datetime.date(2017,5,31))