Skip to content

Instantly share code, notes, and snippets.

@straypacket
Last active August 29, 2015 13:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save straypacket/9519462 to your computer and use it in GitHub Desktop.
Save straypacket/9519462 to your computer and use it in GitHub Desktop.
Using SciPy, NumPy and PyPlot to show vertical error bar graphs. Notes on comparison of Frequentism vs. Bayesianism.
import numpy as np
from scipy import stats
np.random.seed(1)
F_true = 1000 # random range
N = 50 # number of measurements
F = stats.poisson(F_true).rvs(N)
e = np.sqrt(F) # errors on Poisson counts estimated via square root
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax.errorbar(F, np.arange(N), xerr=e, fmt='ok', ecolor='gray', alpha=0.5)
ax.vlines([F_true], 0, N, linewidth=5, alpha=0.2)
ax.set_xlabel("Range")
ax.set_ylabel("Measurement number")
plt.show()
# Frequentism approach
# Based on likelihood function L(D|F_true)
# L(D|F_true) = Prod_1_to_n(P(D_n|F_true))
w = 1. / e ** 2
print("""
F_true = {0}
F_est = {1:.0f} +/- {2:.0f} (based on {3} measurements)
""".format(F_true, (w * F).sum() / w.sum(), w.sum() ** -0.5, N))
# From the current 50 samples, our error is of 0.4%
#
#F_true = 1000
#F_est = 998 +/- 4 (based on 50 measurements)
# Bayesianism approach
# Bayes law:
# P(F_true|D) = (P(D|F_true) P(F_true)) / P(D)
#
# P(F_true|D) => Posterior
# P(D|F_True) => Likelihood (similar to frequentist)
# P(F_true) => Prior (what we knew before applying data)
# P(D) => Data probability (normalization term)
#
# If P(F_true) == 1
# P(F_true|D) == L(D|F_true)
#
# Proof using MCMC
import emcee
def log_prior(theta):
return 1 # flat prior
def log_likelihood(theta, F, e):
return -0.5 * np.sum(np.log(2 * np.pi * e ** 2) + (F - theta[0]) ** 2 / e ** 2)
def log_posterior(theta, F, e):
return log_prior(theta) + log_likelihood(theta, F, e)
# Drawing samples from posterior
ndim = 1 # number of parameters in the model
nwalkers = 50 # number of MCMC walkers
nburn = 1000 # "burn-in" period to let chains stabilize
nsteps = 2000 # number of MCMC steps to take
# we'll start at random locations between 0 and 2000
starting_guesses = 2000 * np.random.rand(nwalkers, ndim)
sampler = emcee.EnsembleSampler(nwalkers, ndim, log_posterior, args=[F, e])
sampler.run_mcmc(starting_guesses, nsteps)
sample = sampler.chain # shape = (nwalkers, nsteps, ndim)
sample = sampler.chain[:, nburn:, :].ravel() # discard burn-in points
# Check result
plt.hist(sample, bins=50, histtype="stepfilled", alpha=0.3, normed=True)
# plot a best-fit Gaussian
F_fit = np.linspace(975, 1025)
pdf = stats.norm(np.mean(sample), np.std(sample)).pdf(F_fit)
plt.plot(F_fit, pdf, '-k')
plt.xlabel("F"); plt.ylabel("P(F)")
plt.show()
print("""
F_true = {0}
F_est = {1:.0f} +/- {2:.0f} (based on {3} measurements)
""".format(F_true, np.mean(sample), np.std(sample), N))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment