-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathkmm.py
133 lines (103 loc) · 3.55 KB
/
kmm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import math, numpy, sklearn.metrics.pairwise as sk, sys
from sklearn import linear_model
from cvxopt import matrix, solvers
#DENSITY ESTIMATION
#KMM solving the quadratic programming problem to get betas (weights) for each training instance
def kmm(Xtrain, Xtest, sigma):
n_tr = len(Xtrain)
n_te = len(Xtest)
#calculate Kernel
print 'Computing kernel for training data ...'
K_ns = sk.rbf_kernel(Xtrain, Xtrain, sigma)
#make it symmetric
K = 0.5*(K_ns + K_ns.transpose())
#calculate kappa
print 'Computing kernel for kappa ...'
kappa_r = sk.rbf_kernel(Xtrain, Xtest, sigma)
ones = numpy.ones(shape=(n_te, 1))
kappa = numpy.dot(kappa_r, ones)
kappa = -(float(n_tr)/float(n_te)) * kappa
#calculate eps
eps = (math.sqrt(n_tr) - 1)/math.sqrt(n_tr)
#constraints
A0 = numpy.ones(shape=(1,n_tr))
A1 = -numpy.ones(shape=(1,n_tr))
A = numpy.vstack([A0, A1, -numpy.eye(n_tr), numpy.eye(n_tr)])
b = numpy.array([[n_tr*(eps+1), n_tr*(eps-1)]])
b = numpy.vstack([b.T, -numpy.zeros(shape=(n_tr,1)), numpy.ones(shape=(n_tr,1))*1000])
print 'Solving quadratic program for beta ...'
P = matrix(K, tc='d')
q = matrix(kappa, tc='d')
G = matrix(A, tc='d')
h = matrix(b, tc='d')
beta = solvers.qp(P,q,G,h)
return [i for i in beta['x']]
#KMM PARAMETER TUNING
#Train a linear regression model with Lasso (L1 regularization).
#Model parameter selection via cross validation
#Predict the target (Beta) for a given test dataset
def regression(XTrain, betaTrain, XTest):
model = linear_model.LassoCV(cv=10, alphas=[0.001,0.005,0.01,0.05,0.1,0.5,1,5,10])
model.fit(XTrain, betaTrain)
Beta = model.predict(XTest)
return [i for i in Beta]
#KMM PARAMETER TUNING
#Compute J score for parameter tuning of KMM
def computeJ(betaTrain, betaTest):
tr = sum([i ** 2 for i in betaTrain])
te = sum(betaTest)
return ((1/float(len(betaTrain)))*tr) - ((2/float(len(betaTest)))*te)
#I/O OPERATIONS
#Read input csv file
def getData(filename):
data = []
with open(filename) as f:
content = f.readlines()
for line in content:
line = line.strip()
data.append(map(float,line.split(",")))
return data
#I/O OPERATIONS
#Write Output to file
def writeFile(filename, data):
if len(data) == 0:
return
with open(filename, 'w') as f:
for i in data:
f.write(str(i) + '\n')
#MAIN ALGORITHM
#compute beta
def getBeta(traindata, testdata, gammab):
Jmin = 0
beta = []
for g in gammab:
betaTrain = kmm(traindata, testdata, g)
betaTest = regression(traindata, betaTrain, testdata)
J = computeJ(betaTrain,betaTest)
#print betaTrain
#print betaTest
#print J
if len(beta) == 0:
Jmin = J
beta = list(betaTrain)
elif Jmin > J:
Jmin = J
beta = list(betaTrain)
return beta
#MAIN METHOD
def main():
#traindata = [[1,2,3],[4,7,4],[3,3,3],[4,4,4],[5,5,5],[3,4,5],[1,2,3],[4,7,4],[3,3,3],[4,4,4],[5,5,5],[3,4,5],[1,2,3],[4,7,4],[3,3,3],[4,4,4],[5,5,5],[3,4,5],[1,2,3],[4,7,4],[3,3,3],[4,4,4],[5,5,5],[3,4,5]]
#testdata = [[5,9,10],[4,5,6],[10,20,30],[1,2,3],[3,4,5],[5,6,7],[7,8,9],[100,100,100],[11,22,33],[12,11,5],[5,9,10],[4,5,6],[10,20,30],[1,2,3],[3,4,5],[5,6,7],[7,8,9],[100,100,100],[11,22,33],[12,11,5]]
#gammab = [0.001]
if len(sys.argv) != 4:
print 'Incorrect number of arguments.'
print 'Arg: training_file, test_file, output_file.'
return
traindata = getData(sys.argv[1])
testdata = getData(sys.argv[2])
gammab = [1/float(len(traindata)),0.0001,0.0005,0.001,0.005,0.01,0.05,0.1,0.5,1,5,10]
print 'Got training and test data.'
beta = getBeta(traindata, testdata, gammab)
writeFile(sys.argv[3], beta)
if __name__ == '__main__':
main()