Skip to content

Commit

Permalink
Continuous Extentions
Browse files Browse the repository at this point in the history
  • Loading branch information
mitchspa authored Jun 9, 2024
1 parent 47d30ff commit b27fd77
Show file tree
Hide file tree
Showing 9 changed files with 1,650 additions and 0 deletions.
37 changes: 37 additions & 0 deletions NonStatContinuousPricing/GPUCBAgent2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/usr/bin/env python
# coding: utf-8

# In[1]:


import numpy as np
import scipy
from RBFGaussianProcess import RBFGaussianProcess

class GPUCBAgent:
def __init__(self, T):
self.T = T
self.gp = RBFGaussianProcess(scale=2).fit()
self.a_t = None
self.action_hist = np.array([])
self.reward_hist = np.array([])
self.mu_t = 0
self.sigma_t = 0
self.gamma = lambda t: np.log(t+1)**2
self.beta = lambda t: 1 + 0.5*np.sqrt(2 * (self.gamma(t) + 1 + np.log(T)))
self.t = 0

def pull_arm(self):
self.a_t = scipy.optimize.minimize(self.ucbs_func, x0 = 0.5, bounds = [(0,1)]).x
return self.a_t[0]

def ucbs_func(self, price):
self.mu_t, self.sigma_t = self.gp.predict(price)
return 1/(self.mu_t + self.beta(self.t) * self.sigma_t)

def update(self, r_t):
self.action_hist = np.append(self.action_hist, self.a_t)
self.reward_hist = np.append(self.reward_hist, r_t)
self.gp = self.gp.fit(self.a_t, r_t)
self.t += 1

1,240 changes: 1,240 additions & 0 deletions NonStatContinuousPricing/Main_NonStatContinuousPricing.ipynb

Large diffs are not rendered by default.

59 changes: 59 additions & 0 deletions NonStatContinuousPricing/RBFGaussianProcess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import numpy as np

class RBFGaussianProcess:
def __init__(self, scale=1, reg=1e-2):
self.scale = scale # controls width of the gaussian
self.reg = reg #regularization parameter to be added to the diagonal of the kernel matrix
self.k_xx_inv = None # stores inverse of kernel function

def rbf_kernel_incr_inv(self, B, C, D):
"""_summary_
Args:
B : The kernel values between existing training data and new data points
C : B transposed
D : Regularization term
Returns:
inverse of the kernel matrix online
"""
temp = np.linalg.inv(D - C @ self.k_xx_inv @ B)
block1 = self.k_xx_inv + self.k_xx_inv @ B @ temp @ C @ self.k_xx_inv
block2 = - self.k_xx_inv @ B @ temp
block3 = - temp @ C @ self.k_xx_inv
block4 = temp
res1 = np.concatenate((block1, block2), axis=1)
res2 = np.concatenate((block3, block4), axis=1)
res = np.concatenate((res1, res2), axis=0)
return res

def rbf_kernel(self, a, b):
a_ = a.reshape(-1, 1)
b_ = b.reshape(-1, 1)
output = -1 * np.ones((a_.shape[0], b_.shape[0]))
for i in range(a_.shape[0]):
output[i, :] = np.power(a_[i] - b_, 2).ravel()
return np.exp(-self.scale * output)

def fit(self, x=np.array([]), y=np.array([])):
x,y = np.array(x),np.array(y)
if self.k_xx_inv is None:
self.y = y.reshape(-1,1)
self.x = x.reshape(-1,1)
k_xx = self.rbf_kernel(self.x, self.x) + self.reg * np.eye(self.x.shape[0])
self.k_xx_inv = np.linalg.inv(k_xx)
else:
B = self.rbf_kernel(self.x, x)
self.x = np.vstack((self.x, x))
self.y = np.vstack((self.y, y))
self.k_xx_inv = self.rbf_kernel_incr_inv(B, B.T, np.array([1 + self.reg]))

return self

def predict(self, x_predict):
k = self.rbf_kernel(x_predict, self.x)

mu_hat = k @ self.k_xx_inv @ self.y
sigma_hat = 1 - np.diag(k @ self.k_xx_inv @ k.T)

return mu_hat.ravel(), sigma_hat.ravel()
20 changes: 20 additions & 0 deletions NonStatContinuousPricing/StochasticPricingEnvironment2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/usr/bin/env python
# coding: utf-8

# In[1]:


import numpy as np

class StochasticPricingEnvironment:
def __init__(self, conversion_probability, cost):
self.conversion_probability = conversion_probability
self.cost = cost
self.t = 0

def round(self, p_t, n_t):
d_t = np.random.binomial(n_t, self.conversion_probability(p_t, self.t))
r_t = (p_t - self.cost)*d_t
self.t += 1
return d_t, r_t

31 changes: 31 additions & 0 deletions StatContinuousPricing/GPThompson_Continuous.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import numpy as np
from RBFGaussianProcess import RBFGaussianProcess
import scipy

class GPThompson:
def __init__(self, T):
self.T = T
self.gp = RBFGaussianProcess(scale=2).fit()
self.a_t = None
self.action_hist = np.array([])
self.reward_hist = np.array([])
self.mu_t = 0
self.sigma_t = 0
self.gamma = lambda t: np.log(t+1)**2
self.beta = lambda t: 1 + 0.5*np.sqrt(2 * (self.gamma(t) + 1 + np.log(T)))
self.t = 0

def pull_arm(self):
self.a_t = scipy.optimize.minimize(self.obj, x0 = 0.5, bounds = [(0,1)]).x
return self.a_t

def obj(self, arm):
self.mu_t, self.sigma_t = self.gp.predict(arm)
sample = np.random.normal(self.mu_t, self.sigma_t)
return 1/sample

def update(self, r_t):
self.action_hist = np.append(self.action_hist, self.a_t)
self.reward_hist = np.append(self.reward_hist, r_t)
self.gp = self.gp.fit(self.a_t, r_t)
self.t += 1
37 changes: 37 additions & 0 deletions StatContinuousPricing/GPUCBAgent_Continuous.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/usr/bin/env python
# coding: utf-8

# In[ ]:


import numpy as np
from RBFGaussianProcess import RBFGaussianProcess
import scipy

class GPUCBAgent:
def __init__(self, T):
self.T = T
self.gp = RBFGaussianProcess(scale=2).fit()
self.a_t = None
self.action_hist = np.array([])
self.reward_hist = np.array([])
self.mu_t = 0
self.sigma_t = 0
self.gamma = lambda t: np.log(t+1)**2
self.beta = lambda t: 1 + 0.5*np.sqrt(2 * (self.gamma(t) + 1 + np.log(T)))
self.t = 0

def pull_arm(self):
self.a_t = scipy.optimize.minimize(self.ucbs_func, x0 = 0.5, bounds = [(0,1)]).x
return self.a_t[0]

def ucbs_func(self, price):
self.mu_t, self.sigma_t = self.gp.predict(price)
return 1/(self.mu_t + self.beta(self.t) * self.sigma_t)

def update(self, r_t):
self.action_hist = np.append(self.action_hist, self.a_t)
self.reward_hist = np.append(self.reward_hist, r_t)
self.gp = self.gp.fit(self.a_t, r_t)
self.t += 1

156 changes: 156 additions & 0 deletions StatContinuousPricing/Main_StatContinuousPricing.ipynb

Large diffs are not rendered by default.

59 changes: 59 additions & 0 deletions StatContinuousPricing/RBFGaussianProcess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import numpy as np

class RBFGaussianProcess:
def __init__(self, scale=1, reg=1e-2):
self.scale = scale # controls width of the gaussian
self.reg = reg #regularization parameter to be added to the diagonal of the kernel matrix
self.k_xx_inv = None # stores inverse of kernel function

def rbf_kernel_incr_inv(self, B, C, D):
"""_summary_
Args:
B : The kernel values between existing training data and new data points
C : B transposed
D : Regularization term
Returns:
inverse of the kernel matrix online
"""
temp = np.linalg.inv(D - C @ self.k_xx_inv @ B)
block1 = self.k_xx_inv + self.k_xx_inv @ B @ temp @ C @ self.k_xx_inv
block2 = - self.k_xx_inv @ B @ temp
block3 = - temp @ C @ self.k_xx_inv
block4 = temp
res1 = np.concatenate((block1, block2), axis=1)
res2 = np.concatenate((block3, block4), axis=1)
res = np.concatenate((res1, res2), axis=0)
return res

def rbf_kernel(self, a, b):
a_ = a.reshape(-1, 1)
b_ = b.reshape(-1, 1)
output = -1 * np.ones((a_.shape[0], b_.shape[0]))
for i in range(a_.shape[0]):
output[i, :] = np.power(a_[i] - b_, 2).ravel()
return np.exp(-self.scale * output)

def fit(self, x=np.array([]), y=np.array([])):
x,y = np.array(x),np.array(y)
if self.k_xx_inv is None:
self.y = y.reshape(-1,1)
self.x = x.reshape(-1,1)
k_xx = self.rbf_kernel(self.x, self.x) + self.reg * np.eye(self.x.shape[0])
self.k_xx_inv = np.linalg.inv(k_xx)
else:
B = self.rbf_kernel(self.x, x)
self.x = np.vstack((self.x, x))
self.y = np.vstack((self.y, y))
self.k_xx_inv = self.rbf_kernel_incr_inv(B, B.T, np.array([1 + self.reg]))

return self

def predict(self, x_predict):
k = self.rbf_kernel(x_predict, self.x)

mu_hat = k @ self.k_xx_inv @ self.y
sigma_hat = 1 - np.diag(k @ self.k_xx_inv @ k.T)

return mu_hat.ravel(), sigma_hat.ravel()
11 changes: 11 additions & 0 deletions StatContinuousPricing/StochasticPricingEnvironment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import numpy as np

class StochasticPricingEnvironment:
def __init__(self, conversion_probability, cost):
self.conversion_probability = conversion_probability
self.cost = cost

def round(self, p_t, n_t):
d_t = np.random.binomial(n_t, self.conversion_probability(p_t))
r_t = (p_t - self.cost)*d_t
return d_t, r_t

0 comments on commit b27fd77

Please sign in to comment.