-
Notifications
You must be signed in to change notification settings - Fork 0
/
analyzeItem.py
140 lines (111 loc) · 4.4 KB
/
analyzeItem.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
from bs4 import BeautifulSoup
import requests
locations = [
"基隆市", "臺北市", "新北市",
"桃園市", "新竹市", "新竹縣",
"苗栗縣", "臺中市", "彰化縣", "雲林縣", "南投縣",
"嘉義縣", "嘉義市", "臺南市", "高雄市", "屏東縣",
"宜蘭縣", "花蓮縣", "臺東縣",
"澎湖縣", "金門縣","連江縣"
]
item_list = []
class Locations:
def __init__(self, location_list=locations):
self.__count = {}
assert location_list!=None, "地區不能是空的"
assert isinstance(location_list[0], str), "地區清單需要為字串"
for _ in location_list:
self.__count[_] = 0
def count(self):
return self.__count
def addCount(self, location):
location_list = self.getPlaceList()
assert location in location_list, "沒有此地區"
self.__count[location] += 1
def getCount(self, location):
return self.__count[location]
def getPlaceList(self):
return list(self.__count.keys())
class AnalyzeItem:
def __init__(self, prize=1000):
self.__prize = prize # 中獎金額 (e.g. 1000萬 => prize=1000)
self.__locate = Locations() # 每個地區的中獎次數, type => dict
self.__alldata = []
self.__company = []
self.__address = []
self.__item = {}
# get value
def prize(self):
return self.__prize
def alldata(self):
return self.__alldata
def company(self):
return self.__company
def address(self):
return self.__address
def item(self):
return self.__item
def locate(self):
return self.__locate
# update data
def update(self, time):
prize = self.__prize
# time example: 10705
url='https://www.etax.nat.gov.tw/etw-main/ETW183W3_'+ time +'/'
html = requests.get(url).content.decode('utf-8')
sp = BeautifulSoup(html,'html.parser')
if int(prize) == 1000:
table = sp.find('table',{'id':"tenMillionsTable"})
#h_id = ''
elif int(prize) == 200:
table = sp.find('table',{'id':"twoMillionsTable"})
#h_id = '2'
else:
assert True, "目前只提供200萬、1000萬的獎項分析功能"
save = table.find_all('tr')
save_company = table.find_all('td',{'data-th':"開立發票營業人"})
save_address = table.find_all('td',{'data-th':"營業地址"})
save_item = table.find_all('td',{'data-th':"交易項目"})
# 更新到 analyzerItem
for row in save_company:
self.__company.append(row.get_text())
for row in save_address:
self.__address.append(row.get_text())
for row in save_item:
# 去除項目中的單位以及價錢(會在逗號後面)
# e.g. 菸品2包,計250元
item = row.get_text()
item = item[ 0:item.find(",") ]
for i in range(len(item)):
if item[i] >= '0' and item[i] <='9':
item = item[0:i]
break
if item not in self.__item.keys():
self.__item[item]=1
else:
self.__item[item]+=1
for idx,row in enumerate(save[1:]):
self.__alldata.append(row.get_text())
def cal_locate_count(self):
locate = self.__locate
for addr in self.__address:
for i in locate.getPlaceList():
# 改制
if addr.find("桃園縣")!=-1:
locate.addCount('桃園市')
break
if addr.find("臺北縣")!=-1:
locate.addCount('新北市')
break
if addr.find("臺中縣")!=-1:
locate.addCount('臺中市')
break
if addr.find("臺南縣")!=-1:
locate.addCount('臺南市')
break
if addr.find("高雄縣")!=-1:
locate.addCount('高雄市')
break
# 正常
if addr.find(i) !=-1:
locate.addCount(str(i))