-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathutils.py
263 lines (224 loc) · 8.87 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
#!/usr/bin/env python3
#
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
import numpy as np
from tqdm import tqdm
from scipy.optimize import linprog
from scipy.special import softmax
import math
import torch
import sys
sys.path.append("private_prediction/")
from util import binary_search
sys.path.append("fastwht/python/")
from hadamard import *
class FWHTRandomProjector:
'''
Fast random projector using the Fast Walsh-Hadamard Transform.
Arguments:
num_parameters: Number of input data dimensions
rank: Rank of projection
seed: Random seed to control the projection.
Default None uses a different random seed each time.
'''
def __init__(self, num_parameters, expanded_dim):
self.num_parameters = num_parameters
self.expanded_dim = expanded_dim
self.D = np.sign(np.random.normal(0, 1, self.expanded_dim))
def project(self, x):
x_expanded = np.zeros(self.expanded_dim)
x_expanded[:len(x)] = x
z = fastwht(x_expanded * self.D, order='hadamard') * math.sqrt(float(x_expanded.shape[0]))
return z
def inverse(self, x):
out_expanded = fastwht(x, order='hadamard') * self.D * math.sqrt(float(x.shape[0]))
return out_expanded[:self.num_parameters]
def _log_add(logx, logy):
a, b = np.minimum(logx, logy), np.maximum(logx, logy)
eq_zero = (a == -np.inf)
# Use exp(a) + exp(b) = (exp(a - b) + 1) * exp(b)
result = np.log(np.exp(logx - logy) + 1) + logy
result[eq_zero] = b[eq_zero]
return result
def log_sum(log_zs):
shape = log_zs.sum(-1).shape
log_zs = log_zs.reshape(-1, log_zs.shape[-1])
result = log_zs[:, 0]
for i in range(1, log_zs.shape[-1]):
result = _log_add(result, log_zs[:, i])
return result
def renyi_div_stable(P, order):
"""
Computes (log) Renyi divergence using stable log-space computation
"""
assert order > 1, "Renyi divergence order must be > 1"
assert np.all(P >= 0)
log_P = np.log(P)
log_Q = np.copy(log_P)
log_Q[log_Q == -np.inf] = 0
log_zs = order * log_P[:, None, :] - (order - 1) * log_Q[None, :, :]
return np.reshape(log_sum(log_zs) / (order - 1), (P.shape[0], P.shape[0]))
def renyi_div_bound_lp(alphas, d, P, Delta, greedy=False, verbose=False):
"""
Computes LP relaxation upper bound for Renyi divergence of MVU mechanism.
If greedy=True, always compute greedy solution.
"""
B = P.shape[0]
bounds = np.zeros(alphas.shape)
for i in tqdm(range(alphas.shape[0])):
alpha = alphas[i]
D = renyi_div_stable(P, alpha)
D[D < 0] = 0
irange = np.arange(0, B) / (B-1)
# add identity to cost to avoid division by zero
cost = (np.power(irange[:, None] - irange[None, :], 2) + np.eye(B)).flatten()
opt = (D / cost).argmax()
max_div, cost_per_dim = D[opt], cost[opt]
if d * cost_per_dim < Delta**2 and (not greedy):
c = -np.tile(D, d)
A = np.tile(np.power(irange[:, None] - irange[None, :], 2).flatten(), d)[None, :]
A = np.concatenate([A, np.kron(np.eye(d), np.ones((1, B*B)))], 0)
b = np.array([0.25] + [1] * d)
res = linprog(c, A_ub=A, b_ub=b, options={"disp": verbose})
bounds[i] = -res.fun
else:
# compute bound using optimal greedy solution
num_items = Delta**2 / cost_per_dim
bounds[i] = max_div * num_items
return bounds
def optimal_scaling_mvu(samples, mechanism, conf, p=2):
"""
Computes optimal scaling factor for MVU mechanism to ensure that vector norm after
dithering does not increase.
"""
B = 2 ** mechanism.input_bits
def constraint(t):
x = t * samples
x = np.clip((x+1)/2, 0, 1)
z = mechanism.dither(x, mechanism.input_bits) / (B-1)
z = 2*z - 1
norms = np.linalg.norm(z, p, 1)
return (norms > 1).astype(float).mean() < conf
return binary_search(lambda t: t, constraint, 0, 1, tol=1e-3)
def post_rounding_l2_norm_bound(d, l2_norm_bound, beta):
"""
Function for computing vector norm bound after quantizing to the integer grid.
Adapted from https://github.com/google-research/federated/blob/master/distributed_dp/compression_utils.py
"""
bound1 = l2_norm_bound + math.sqrt(d)
squared_bound2 = l2_norm_bound**2 + 0.25 * d
squared_bound2 += (math.sqrt(2.0 * math.log(1.0 / beta)) * (l2_norm_bound + 0.5 * math.sqrt(d)))
bound2 = math.sqrt(squared_bound2)
# bound2 is inf if beta = 0, in which case we fall back to bound1.
return min(bound1, bound2)
def optimal_scaling_integer(d, l2_norm_bound, beta, tol=1e-3):
"""
Computes optimal scaling factor for DDG/Skellam mechanism to ensure that vector norm after
dithering does not increase.
"""
def constraint(t):
if t == 0:
return True
quantized_norm = post_rounding_l2_norm_bound(d, t, beta)
return quantized_norm <= l2_norm_bound + 1e-6
opt_norm = binary_search(lambda t: t, constraint, 0, l2_norm_bound, tol=tol)
return opt_norm / l2_norm_bound
def max_divergence_bound(log_P, precision=1e-4):
"""
Computes L1 metric DP additive factor for the interpolated MVU mechanism.
"""
B = log_P.shape[0]
epsilon_bound = np.zeros(B - 1)
for i in range(log_P.shape[0] - 1):
theta = log_P[i+1] - log_P[i]
xs = np.arange(0, 1 + precision/2, precision)
etas = xs[:, None] * log_P[None, i+1] + (1 - xs)[:, None] * log_P[None, i]
func = abs(softmax(etas, axis=1) @ theta) * (B - 1)
epsilon_bound[i] = func.max()
return epsilon_bound.max()
def fisher_information(bs, eta1, eta2):
"""
Computes Fisher information of the interpolated MVU mechanism for a range of coefficients.
"""
etas = (1 - bs)[:, None] * eta1[None, :] + bs[:, None] * eta2[None, :]
P = softmax(etas, axis=1)
fi = P @ np.power(eta2 - eta1, 2) - np.power(P @ (eta2 - eta1), 2)
return fi
def fisher_information_bound(p, precision=1e-4):
"""
Asserts the condition that maximum FI occurs in range [0,1] and returns this maximum.
If condition fails, return -1.
"""
# sampling probability vectors must be symmetric about 0.5(!)
eta1 = np.log(p)
eta2 = eta1[::-1]
# compute maximum FI within range [0,1]
bs = np.arange(0, 1+precision/2, precision)
fi_max = fisher_information(bs, eta1, eta2).max()
# compute range [a_min, a_max]
j_max = np.argmax(eta2 - eta1)
eta_sq_max = np.power(eta2 - eta1, 2)[j_max]
c = fi_max / (4 * eta_sq_max)
p = (1 + math.sqrt(1 - 4*c)) / 2
assert p >= 0.5
a_max = 1
while True:
eta = (1-a_max) * eta1 + a_max * eta2
p_eta = softmax(eta)
if p_eta[j_max] >= p:
break
else:
a_max += 1
a_min = 1 - a_max
# compute maximum FI within range [a_min, a_max]
bs = np.arange(a_min, a_max + precision/2, precision)
fi = fisher_information(bs, eta1, eta2)
return fi.max()
def consolidate(mechanism, tol=1e-8):
"""
Consolidates the sampling probability matrix P to remove redundant columns.
Must be called prior to computing the Fisher information to avoid infinite loop!
"""
digits = int(-math.log10(tol))
uniques = np.unique(mechanism.alpha.round(digits))
P_new = []
alpha_new = []
for i in range(len(uniques)):
indices = np.arange(0, len(mechanism.alpha))[np.isclose(mechanism.alpha, uniques[i])]
p = mechanism.P[:, indices].sum(1)
P_new.append(p[:, None])
alpha_new.append(mechanism.alpha[indices[0]])
P_new = np.concatenate(P_new, 1)
alpha_new = np.hstack(alpha_new)
return P_new, alpha_new
def params_to_vec(model, return_type="param"):
'''
Helper function that concatenates model parameters or gradients into a single vector.
'''
vec = []
for param in model.parameters():
if return_type == "param":
vec.append(param.data.view(1, -1))
elif return_type == "grad":
vec.append(param.grad.view(1, -1))
elif return_type == "grad_sample":
if hasattr(param, "grad_sample"):
vec.append(param.grad_sample.view(param.grad_sample.size(0), -1))
else:
print("Error: Per-sample gradient not found")
sys.exit(1)
return torch.cat(vec, dim=1).squeeze()
def set_grad_to_vec(model, vec):
'''
Helper function that sets the model's gradient to a given vector.
'''
model.zero_grad()
for param in model.parameters():
size = param.data.view(1, -1).size(1)
param.grad = vec[:size].view_as(param.data).clone()
vec = vec[size:]
return