Skip to content

Commit

Permalink
Implement and test the ExpectationMaximizer; stunningly, it produces …
Browse files Browse the repository at this point in the history
…the exact same RAEs as the LikelihoodMaximizer
  • Loading branch information
mirkobunse committed Sep 20, 2024
1 parent 04889cc commit a3741d0
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 8 deletions.
17 changes: 12 additions & 5 deletions qunfold/methods/likelihood.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import jax.numpy as jnp
from . import AbstractMethod, check_y, class_prevalences, minimize
from . import AbstractMethod, check_y, class_prevalences, minimize, Result

class LikelihoodMaximizer(AbstractMethod):
"""The maximum likelihood method, as studied by Alexandari et al. (2020).
Expand Down Expand Up @@ -41,7 +41,7 @@ def fit(self, X, y, n_classes=None):
self.classifier.fit(X, y)
return self
def predict(self, X):
pXY = self.classifier.predict_proba(X) / self.p_trn # proportional to P(X|Y)
pXY = jnp.array(self.classifier.predict_proba(X) / self.p_trn) # proportional to P(X|Y)
pXY = pXY / pXY.sum(axis=1, keepdims=True) # normalize to P(X|Y)

# TODO 1) filter out all rows from pXY that contain zeros or ones, or values close to zero or one up to some self.epsilon. Goal: to reduce thrown errors / warnings and to replace the corresponding estimates with proper ones.
Expand Down Expand Up @@ -95,6 +95,13 @@ def fit(self, X, y, n_classes=None):
self.classifier.fit(X, y)
return self
def predict(self, X):
pXY = classifier.predict_proba(X) / self.p_trn # proportional to P(X|Y)
pXY = pXY / pXY.sum(axis=1, keepdims=True) # normalize to P(X|Y)
raise NotImplementedError("TODO")
pYX_pY = jnp.array(self.classifier.predict_proba(X) / self.p_trn) # P(Y|X) / P_trn(Y)
p_prev = jnp.array(self.p_trn) # the current estimate
for n_iter in range(self.max_iter):
pYX = pYX_pY * p_prev
pYX = pYX / pYX.sum(axis=1, keepdims=True)
p_next = pYX.mean(axis=0)
if jnp.linalg.norm(p_next - p_prev) < self.tol:
return Result(p_next, n_iter+1, "Optimization terminated successfully.")
p_prev = p_next
return Result(p_prev, self.max_iter, "Maximum number of iterations reached.")
9 changes: 6 additions & 3 deletions qunfold/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def generate_data(M, p, n_samples=1000):
class TestMethods(TestCase):
def test_methods(self):
start = time.time()
for _ in range(10):
for _ in range(5):
q, M, p_trn = make_problem()
n_classes = len(p_trn)
X_trn, y_trn = generate_data(M, p_trn)
Expand All @@ -63,6 +63,7 @@ def test_methods(self):
p_kmme = qunfold.KMM('energy').fit(X_trn, y_trn).predict(X_tst)
p_rff = qunfold.KMM('rff').fit(X_trn, y_trn).predict(X_tst)
p_maxl = qunfold.LikelihoodMaximizer(rf).fit(X_trn, y_trn).predict(X_tst)
p_sld = qunfold.ExpectationMaximizer(rf).fit(X_trn, y_trn).predict(X_tst)
qp.environ["SAMPLE_SIZE"] = len(X_tst) # needed to compute the RAE
print(
f" p_pacc = {p_pacc} (RAE {qp.error.rae(p_pacc, p_tst):.4f})",
Expand All @@ -81,6 +82,8 @@ def test_methods(self):
f" {p_rff.nit} it.; {p_rff.message}",
f" p_maxl = {p_maxl} (RAE {qp.error.rae(p_maxl, p_tst):.4f})",
f" {p_maxl.nit} it.; {p_maxl.message}",
f" p_sld = {p_sld} (RAE {qp.error.rae(p_sld, p_tst):.4f})",
f" {p_sld.nit} it.; {p_sld.message}",
f" p_tst = {p_tst}",
sep = "\n",
end = "\n"*2
Expand All @@ -91,7 +94,7 @@ def test_methods(self):
class TestCVClassifier(TestCase):
def test_methods(self):
start = time.time()
for _ in range(10):
for _ in range(5):
q, M, p_trn = make_problem()
n_classes = len(p_trn)
X_trn, y_trn = generate_data(M, p_trn)
Expand Down Expand Up @@ -128,7 +131,7 @@ def __call__(self):

class TestQuaPyWrapper(TestCase):
def test_methods(self):
for _ in range(10):
for _ in range(5):
q, M, p_trn = make_problem()
X_trn, y_trn = generate_data(M, p_trn)
p_tst = RNG.permutation(p_trn)
Expand Down

0 comments on commit a3741d0

Please sign in to comment.