-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathperceptron.py
87 lines (76 loc) · 2.39 KB
/
perceptron.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
import app.parser.getData as importArticles
import app.parser.articleRetrieval.getArticles as getContent
import app.parser.sentences as sent
import app.parser.getChunks as gc
import app.analytics.tag as tag
import app.parser.articleRetrieval.wikipediaParse as wp
import app.analytics.features as fe
from sklearn import tree, feature_extraction, svm, linear_model
from sklearn.feature_extraction.text import CountVectorizer
from multiprocessing import Pool
import numpy as np
import datetime
import app.analytics.filterSentences as fl
import networkx as nx
import matplotlib.pyplot as plt
G=nx.DiGraph()
np.seterr(divide='ignore',invalid='ignore')
listOfYears = []
clf = linear_model.Perceptron(n_iter=90)#svm.SVC(probability=True)
probs = []
titles = []
trainData = eval(open('trainDoubleSet','r').readlines()[0])
testData = open('testDoubleSet','r').readlines()
#
#C
def train(features):
features = [item for item in features if len(item[0]) != 0]
feats = [item[0] for item in features]
A = len(features)
B = min(map(len,feats))
X = np.ones((A,B))
Y = np.ones((A))
for feature in range(len(features)):
print (feature, features[feature][2])
Y[feature] = features[feature][2]#label
for item in range(0,B):
X[feature][item] = features[feature][0][item]
clf.fit(X,Y)
return B
def test(features,B):
correct = 0
probs = []
features = [item for item in features if len(item[0]) != 0]
for feature in features:
temp = np.array(feature[0][0:B]).reshape((1, -1))
predict = clf.predict(temp[0][0:B])
#prob = max(clf.predict_proba(temp)[0])
probs.append([predict, feature[2]])
if(feature[2] == predict[0]):
correct +=1
print "Accuracy = " + str(correct) + '/' + str(len(features))
print datetime.datetime.now()
p = Pool(20)
trainFeatures = []
for I in range(len(trainData)):
try:
if trainData[I] is not None:
trainFeatures.append(trainData[I])
except:
print ""
B = train(trainFeatures)
print datetime.datetime.now()
#train(generateDataPoints(trainArticles))
print "Training Complere. Now For Testing"
testFeatures = []
testData = eval(testData[0])
for I in range(len(testData)):
try:
if testData[I] is not None:
testFeatures.append(testData[I])
except:
print ""
test(testFeatures,B)