08_BernTwoPyMC.py

"""
Inferring two binomial proportions using PyMC.
"""
from __future__ import division
import matplotlib.pyplot as plt
import numpy as np
import pymc3 as pm
from plot_post import plot_post

# Generate the data
y1 = np.array([1, 1, 1, 1, 1, 0, 0])  # 5 heads and 2 tails
y2 = np.array([1, 1, 0, 0, 0, 0, 0])  # 2 heads and 5 tails


with pm.Model() as model:
    # define the prior
    theta1 = pm.Beta('theta1', 3, 3)  # prior
    theta2 = pm.Beta('theta2', 3, 3)  # prior
    # define the likelihood
    y1 = pm.Bernoulli('y1', p=theta1, observed=y1)
    y2 = pm.Bernoulli('y2', p=theta2, observed=y2)

    # Generate a MCMC chain
    start = pm.find_MAP()  # Find starting value by optimization
    trace = pm.sample(10000, pm.Metropolis(),
                      progressbar=False)  # Use Metropolis sampling
#    start = pm.find_MAP()  # Find starting value by optimization
#    step = pm.NUTS()  # Instantiate NUTS sampler
#    trace = pm.sample(10000, step, start=start, progressbar=False)

# create an array with the posterior sample
theta1_sample = trace['theta1']
theta2_sample = trace['theta2']

# Plot the trajectory of the last 500 sampled values.
plt.plot(theta1_sample[:-500], theta2_sample[:-500], marker='o')
plt.xlim(0, 1)
plt.ylim(0, 1)
plt.xlabel(r'$\theta1$')
plt.ylabel(r'$\theta2$')

# Display means in plot.
plt.plot(0, label='M = %.3f, %.3f' % (np.mean(theta1_sample), np.mean(theta2_sample)), alpha=0.0)

plt.legend(loc='upper left')
plt.savefig('Figure_8.6.png')

# Plot a histogram of the posterior differences of theta values.
plt.figure()
theta_diff = theta1_sample - theta2_sample
plot_post(theta_diff, xlab=r'\theta1-theta2', comp_val=0.0, show_mode=False, bins=30)
plt.savefig('Figure_8.8.png')


# For Exercise 8.5:
# Posterior prediction. For each step in the chain, use the posterior thetas 
# to flip the coins.
chain_len = len(theta1_sample)
# Create matrix to hold results of simulated flips:
y_pred = np.zeros((2, chain_len))
for step_idx in range(chain_len):  # step through the chain
    # flip the first coin:
    p_head1 = theta1_sample[step_idx]
    y_pred[0, step_idx] = np.random.choice([0,1], p=[1-p_head1, p_head1])
    # flip the second coin:
    p_head2 = theta2_sample[step_idx]
    y_pred[1, step_idx] = np.random.choice([0,1], p=[1-p_head2, p_head2])


# Now determine the proportion of times that y1==1 and y2==0
pY1eq1andY2eq0 = sum((y_pred[0] ==1) & (y_pred[1] == 0)) / chain_len

print pY1eq1andY2eq0
plt.show()