Continuous Extentions

PaoloBiolghini · Jun 9, 2024 · b27fd77 · b27fd77
1 parent 47d30ff
commit b27fd77
Show file tree

Hide file tree

Showing 9 changed files with 1,650 additions and 0 deletions.
diff --git a/NonStatContinuousPricing/GPUCBAgent2.py b/NonStatContinuousPricing/GPUCBAgent2.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# In[1]:
+
+
+import numpy as np
+import scipy
+from RBFGaussianProcess import RBFGaussianProcess
+
+class GPUCBAgent:
+    def __init__(self, T):
+        self.T = T
+        self.gp = RBFGaussianProcess(scale=2).fit()
+        self.a_t = None
+        self.action_hist = np.array([])
+        self.reward_hist = np.array([])
+        self.mu_t = 0
+        self.sigma_t = 0
+        self.gamma = lambda t: np.log(t+1)**2 
+        self.beta = lambda t: 1 + 0.5*np.sqrt(2 * (self.gamma(t) + 1 + np.log(T)))
+        self.t = 0
+
+    def pull_arm(self):
+        self.a_t = scipy.optimize.minimize(self.ucbs_func, x0 = 0.5, bounds = [(0,1)]).x
+        return self.a_t[0]
+
+    def ucbs_func(self, price):
+        self.mu_t, self.sigma_t = self.gp.predict(price) 
+        return 1/(self.mu_t + self.beta(self.t) * self.sigma_t)
+
+    def update(self, r_t):
+        self.action_hist = np.append(self.action_hist, self.a_t)
+        self.reward_hist = np.append(self.reward_hist, r_t)
+        self.gp = self.gp.fit(self.a_t, r_t)
+        self.t += 1
+
diff --git a/NonStatContinuousPricing/Main_NonStatContinuousPricing.ipynb b/NonStatContinuousPricing/Main_NonStatContinuousPricing.ipynb
diff --git a/NonStatContinuousPricing/RBFGaussianProcess.py b/NonStatContinuousPricing/RBFGaussianProcess.py
@@ -0,0 +1,59 @@
+import numpy as np
+
+class RBFGaussianProcess:
+    def __init__(self, scale=1, reg=1e-2):
+        self.scale = scale # controls width of the gaussian
+        self.reg = reg #regularization parameter to be added to the diagonal of the kernel matrix
+        self.k_xx_inv = None # stores inverse of kernel function
+
+    def rbf_kernel_incr_inv(self, B, C, D):
+        """_summary_
+
+        Args:
+            B : The kernel values between existing training data and new data points
+            C : B transposed
+            D : Regularization term
+
+        Returns:
+            inverse of the kernel matrix online
+        """
+        temp = np.linalg.inv(D - C @ self.k_xx_inv @ B)
+        block1 = self.k_xx_inv + self.k_xx_inv @ B @ temp @ C @ self.k_xx_inv
+        block2 = - self.k_xx_inv @ B @ temp
+        block3 = - temp @ C @ self.k_xx_inv
+        block4 = temp
+        res1 = np.concatenate((block1, block2), axis=1)
+        res2 = np.concatenate((block3, block4), axis=1)
+        res = np.concatenate((res1, res2), axis=0)
+        return res
+
+    def rbf_kernel(self, a, b):
+        a_ = a.reshape(-1, 1)
+        b_ = b.reshape(-1, 1)
+        output = -1 * np.ones((a_.shape[0], b_.shape[0]))
+        for i in range(a_.shape[0]):
+            output[i, :] = np.power(a_[i] - b_, 2).ravel()
+        return np.exp(-self.scale * output)
+
+    def fit(self, x=np.array([]), y=np.array([])):
+        x,y = np.array(x),np.array(y)
+        if self.k_xx_inv is None:
+            self.y = y.reshape(-1,1)
+            self.x = x.reshape(-1,1)
+            k_xx = self.rbf_kernel(self.x, self.x) + self.reg * np.eye(self.x.shape[0])
+            self.k_xx_inv = np.linalg.inv(k_xx)
+        else:
+            B = self.rbf_kernel(self.x, x)
+            self.x = np.vstack((self.x, x))
+            self.y = np.vstack((self.y, y))
+            self.k_xx_inv = self.rbf_kernel_incr_inv(B, B.T, np.array([1 + self.reg]))
+
+        return self
+
+    def predict(self, x_predict):
+        k = self.rbf_kernel(x_predict, self.x)
+
+        mu_hat = k @ self.k_xx_inv @ self.y
+        sigma_hat = 1 - np.diag(k @ self.k_xx_inv @ k.T)
+
+        return mu_hat.ravel(), sigma_hat.ravel()
diff --git a/NonStatContinuousPricing/StochasticPricingEnvironment2.py b/NonStatContinuousPricing/StochasticPricingEnvironment2.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# In[1]:
+
+
+import numpy as np
+
+class StochasticPricingEnvironment:
+    def __init__(self, conversion_probability, cost):
+        self.conversion_probability = conversion_probability
+        self.cost = cost
+        self.t = 0
+
+    def round(self, p_t, n_t):
+        d_t = np.random.binomial(n_t, self.conversion_probability(p_t, self.t))
+        r_t = (p_t - self.cost)*d_t
+        self.t += 1
+        return d_t, r_t
+
diff --git a/StatContinuousPricing/GPThompson_Continuous.py b/StatContinuousPricing/GPThompson_Continuous.py
@@ -0,0 +1,31 @@
+import numpy as np
+from RBFGaussianProcess import RBFGaussianProcess
+import scipy
+
+class GPThompson:
+    def __init__(self, T):
+        self.T = T
+        self.gp = RBFGaussianProcess(scale=2).fit()
+        self.a_t = None
+        self.action_hist = np.array([])
+        self.reward_hist = np.array([])
+        self.mu_t = 0
+        self.sigma_t = 0
+        self.gamma = lambda t: np.log(t+1)**2 
+        self.beta = lambda t: 1 + 0.5*np.sqrt(2 * (self.gamma(t) + 1 + np.log(T)))
+        self.t = 0
+
+    def pull_arm(self):
+        self.a_t = scipy.optimize.minimize(self.obj, x0 = 0.5, bounds = [(0,1)]).x
+        return self.a_t
+
+    def obj(self, arm):
+        self.mu_t, self.sigma_t = self.gp.predict(arm) 
+        sample = np.random.normal(self.mu_t, self.sigma_t)
+        return 1/sample
+
+    def update(self, r_t):
+        self.action_hist = np.append(self.action_hist, self.a_t)
+        self.reward_hist = np.append(self.reward_hist, r_t)
+        self.gp = self.gp.fit(self.a_t, r_t)
+        self.t += 1
diff --git a/StatContinuousPricing/GPUCBAgent_Continuous.py b/StatContinuousPricing/GPUCBAgent_Continuous.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# In[ ]:
+
+
+import numpy as np
+from RBFGaussianProcess import RBFGaussianProcess
+import scipy
+
+class GPUCBAgent:
+    def __init__(self, T):
+        self.T = T
+        self.gp = RBFGaussianProcess(scale=2).fit()
+        self.a_t = None
+        self.action_hist = np.array([])
+        self.reward_hist = np.array([])
+        self.mu_t = 0
+        self.sigma_t = 0
+        self.gamma = lambda t: np.log(t+1)**2 
+        self.beta = lambda t: 1 + 0.5*np.sqrt(2 * (self.gamma(t) + 1 + np.log(T)))
+        self.t = 0
+
+    def pull_arm(self):
+        self.a_t = scipy.optimize.minimize(self.ucbs_func, x0 = 0.5, bounds = [(0,1)]).x
+        return self.a_t[0]
+
+    def ucbs_func(self, price):
+        self.mu_t, self.sigma_t = self.gp.predict(price) 
+        return 1/(self.mu_t + self.beta(self.t) * self.sigma_t)
+
+    def update(self, r_t):
+        self.action_hist = np.append(self.action_hist, self.a_t)
+        self.reward_hist = np.append(self.reward_hist, r_t)
+        self.gp = self.gp.fit(self.a_t, r_t)
+        self.t += 1
+
diff --git a/StatContinuousPricing/Main_StatContinuousPricing.ipynb b/StatContinuousPricing/Main_StatContinuousPricing.ipynb
diff --git a/StatContinuousPricing/RBFGaussianProcess.py b/StatContinuousPricing/RBFGaussianProcess.py
@@ -0,0 +1,59 @@
+import numpy as np
+
+class RBFGaussianProcess:
+    def __init__(self, scale=1, reg=1e-2):
+        self.scale = scale # controls width of the gaussian
+        self.reg = reg #regularization parameter to be added to the diagonal of the kernel matrix
+        self.k_xx_inv = None # stores inverse of kernel function
+
+    def rbf_kernel_incr_inv(self, B, C, D):
+        """_summary_
+
+        Args:
+            B : The kernel values between existing training data and new data points
+            C : B transposed
+            D : Regularization term
+
+        Returns:
+            inverse of the kernel matrix online
+        """
+        temp = np.linalg.inv(D - C @ self.k_xx_inv @ B)
+        block1 = self.k_xx_inv + self.k_xx_inv @ B @ temp @ C @ self.k_xx_inv
+        block2 = - self.k_xx_inv @ B @ temp
+        block3 = - temp @ C @ self.k_xx_inv
+        block4 = temp
+        res1 = np.concatenate((block1, block2), axis=1)
+        res2 = np.concatenate((block3, block4), axis=1)
+        res = np.concatenate((res1, res2), axis=0)
+        return res
+
+    def rbf_kernel(self, a, b):
+        a_ = a.reshape(-1, 1)
+        b_ = b.reshape(-1, 1)
+        output = -1 * np.ones((a_.shape[0], b_.shape[0]))
+        for i in range(a_.shape[0]):
+            output[i, :] = np.power(a_[i] - b_, 2).ravel()
+        return np.exp(-self.scale * output)
+
+    def fit(self, x=np.array([]), y=np.array([])):
+        x,y = np.array(x),np.array(y)
+        if self.k_xx_inv is None:
+            self.y = y.reshape(-1,1)
+            self.x = x.reshape(-1,1)
+            k_xx = self.rbf_kernel(self.x, self.x) + self.reg * np.eye(self.x.shape[0])
+            self.k_xx_inv = np.linalg.inv(k_xx)
+        else:
+            B = self.rbf_kernel(self.x, x)
+            self.x = np.vstack((self.x, x))
+            self.y = np.vstack((self.y, y))
+            self.k_xx_inv = self.rbf_kernel_incr_inv(B, B.T, np.array([1 + self.reg]))
+
+        return self
+
+    def predict(self, x_predict):
+        k = self.rbf_kernel(x_predict, self.x)
+
+        mu_hat = k @ self.k_xx_inv @ self.y
+        sigma_hat = 1 - np.diag(k @ self.k_xx_inv @ k.T)
+
+        return mu_hat.ravel(), sigma_hat.ravel()
diff --git a/StatContinuousPricing/StochasticPricingEnvironment.py b/StatContinuousPricing/StochasticPricingEnvironment.py
@@ -0,0 +1,11 @@
+import numpy as np
+
+class StochasticPricingEnvironment:
+    def __init__(self, conversion_probability, cost):
+        self.conversion_probability = conversion_probability
+        self.cost = cost
+
+    def round(self, p_t, n_t):
+        d_t = np.random.binomial(n_t, self.conversion_probability(p_t))
+        r_t = (p_t - self.cost)*d_t
+        return d_t, r_t