-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathutils.go
152 lines (142 loc) · 3.54 KB
/
utils.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
package tsne4go
// Utility functions for tSNE
import (
"math"
"math/rand"
)
// return 0 mean unit standard deviation random number
func gaussRandom() float64 {
u := 2*rand.Float64() - 1
v := 2*rand.Float64() - 1
r := u*u + v*v
for r == 0 || r > 1 {
u = 2*rand.Float64() - 1
v = 2*rand.Float64() - 1
r = u*u + v*v
}
c := math.Sqrt(-2 * math.Log(r) / r)
return u * c
}
// return random normal number
func randn(mu, std float64) float64 {
return mu + gaussRandom()*std
}
// returns 2d array filled with random numbers
func randn2d(n int) []Point {
res := make([]Point, n)
for i := range res {
for j := range res[i] {
res[i][j] = randn(0.0, 1e-4)
}
}
return res
}
// returns 2d array filled with 'val'
func fill2d(n int, val float64) []Point {
res := make([]Point, n)
for i := range res {
for j := range res[i] {
res[i][j] = val
}
}
return res
}
// compute pairwise distance in all vectors in X
func xtod(x Distancer) []float64 {
length := x.Len()
dists := make([]float64, length*length) // allocate contiguous array
for i := 0; i < length-1; i++ {
for j := i + 1; j < length; j++ {
d := x.Distance(i, j)
dists[i*length+j] = d
dists[j*length+i] = d
}
}
return dists
}
// "constants" for positive and negative infinity
var (
inf = math.Inf(1)
negInf = math.Inf(-1)
hTarget = math.Log(perplexity) // target entropy of distribution
)
// compute (p_{i|j} + p_{j|i})/(2n)
func d2p(D []float64, perplexity, tol float64) []float64 {
length := int(math.Sqrt(float64(len(D))))
pTemp := make([]float64, length*length) // temporary probability matrix
prow := make([]float64, length) // a temporary storage compartment
for i := 0; i < length; i++ {
betamin := negInf
betamax := inf
beta := 1.0 // initial value of precision
const maxtries = 500
// perform binary search to find a suitable precision beta
// so that the entropy of the distribution is appropriate
for num := 0; num < maxtries; num++ {
// compute entropy and kernel row with beta precision
psum := 0.0
for j := 0; j < length; j++ {
if i != j { // we dont care about diagonals
pj := math.Exp(-D[i*length+j] * beta)
prow[j] = pj
psum += pj
} else {
prow[j] = 0.0
}
}
// normalize p and compute entropy
hHere := 0.0
for j := 0; j < length; j++ {
pj := prow[j] / psum
prow[j] = pj
if pj > 1e-7 {
hHere -= pj * math.Log(pj)
}
}
// adjust beta based on result
if hHere > hTarget {
// entropy was too high (distribution too diffuse)
// so we need to increase the precision for more peaky distribution
betamin = beta // move up the bounds
if betamax == inf {
beta = beta * 2
} else {
beta = (beta + betamax) / 2
}
} else {
// converse case. make distrubtion less peaky
betamax = beta
if betamin == negInf {
beta = beta / 2
} else {
beta = (beta + betamin) / 2
}
}
// stopping conditions: too many tries or got a good precision
if math.Abs(hHere-hTarget) < tol {
break
}
}
// copy over the final prow to P at row i
for j := 0; j < length; j++ {
pTemp[i*length+j] = prow[j]
}
} // end loop over examples i
// symmetrize P and normalize it to sum to 1 over all ij
probas := make([]float64, length*length)
length2 := float64(length * 2)
for i := 0; i < length; i++ {
for j := 0; j < length; j++ {
probas[i*length+j] = math.Max((pTemp[i*length+j]+pTemp[j*length+i])/length2, 1e-100)
}
}
return probas
}
func sign(x float64) int {
if x > 0 {
return 1
} else if x < 0 {
return -1
}
return 0
}