Skip to content

Instantly share code, notes, and snippets.

@jfirebaugh
Created April 3, 2011 20:25
  • Star 3 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save jfirebaugh/900762 to your computer and use it in GitHub Desktop.

A kernel density estimate provides a means of estimating and visualizing the probability distribution function of a random variable based on a random sample. In contrast to a histogram, a kernel density estimate provides a smooth estimate, via the effect of a smoothing parameter called the bandwidth, here denoted by h. With the correct choice of bandwidth, important features of the distribution can be seen; an incorrect choice will result in undersmoothing or oversmoothing and obscure those features.

Here we see a histogram and three kernel density estimates for a sample of waiting times in minutes between eruptions of Old Faithful Geyser in Yellowstone National Park, taken from R's faithful dataset. The data follow a bimodal distribution; short eruptions are followed by a wait time averaging about 55 minutes, and long eruptions by a wait time averaging about 80 minutes. In recent years, wait times have been increasing, possibly due to the effects of earthquakes on the geyser's geohydrology.

var faithful = [
79, 54, 74, 62, 85, 55, 88, 85, 51, 85, 54, 84, 78, 47, 83, 52, 62, 84, 52, 79, 51, 47, 78, 69, 74, 83, 55, 76, 78, 79, 73, 77, 66, 80, 74, 52, 48, 80, 59, 90, 80, 58, 84, 58, 73, 83, 64, 53,
82, 59, 75, 90, 54, 80, 54, 83, 71, 64, 77, 81, 59, 84, 48, 82, 60, 92, 78, 78, 65, 73, 82, 56, 79, 71, 62, 76, 60, 78, 76, 83, 75, 82, 70, 65, 73, 88, 76, 80, 48, 86, 60, 90, 50, 78, 63, 72,
84, 75, 51, 82, 62, 88, 49, 83, 81, 47, 84, 52, 86, 81, 75, 59, 89, 79, 59, 81, 50, 85, 59, 87, 53, 69, 77, 56, 88, 81, 45, 82, 55, 90, 45, 83, 56, 89, 46, 82, 51, 86, 53, 79, 81, 60, 82, 77,
76, 59, 80, 49, 96, 53, 77, 77, 65, 81, 71, 70, 81, 93, 53, 89, 45, 86, 58, 78, 66, 76, 63, 88, 52, 93, 49, 57, 77, 68, 81, 81, 73, 50, 85, 74, 55, 77, 83, 83, 51, 78, 84, 46, 83, 55, 81, 57,
76, 84, 77, 81, 87, 77, 51, 78, 60, 82, 91, 53, 78, 46, 77, 84, 49, 83, 71, 80, 49, 75, 64, 76, 53, 94, 55, 76, 50, 82, 54, 75, 78, 79, 78, 78, 70, 79, 70, 54, 86, 50, 90, 54, 54, 77, 79, 64,
75, 47, 86, 63, 85, 82, 57, 82, 67, 74, 54, 83, 73, 73, 88, 80, 71, 83, 56, 79, 78, 84, 58, 83, 43, 60, 75, 81, 46, 90, 46, 74
];
<html>
<head>
<title>Kernel Density Estimation</title>
<script type="text/javascript" src="http://vis.stanford.edu/protovis/protovis-r3.2.js"></script>
<script type="text/javascript" src="kde.js"></script>
<script type="text/javascript" src="faithful.js"></script>
<style type="text/css">
body {
margin: 0;
}
</style>
</head>
<body>
<script type="text/javascript+protovis">
/* Sizing and scales. */
var w = document.body.clientWidth - 40,
h = document.body.clientHeight - 25,
x = pv.Scale.linear(30, 110).range(0, w),
y = pv.Scale.linear(0, 0.1).range(0, h),
bandwidths = [1, 7, 20];
/* The root panel. */
var vis = new pv.Panel()
.width(w)
.height(h)
.bottom(20)
.left(30)
.right(10)
.top(5);
/* X-axis ticks. */
vis.add(pv.Rule)
.data(x.ticks())
.left(x)
.strokeStyle("#eee")
.add(pv.Rule)
.bottom(-5)
.height(5)
.strokeStyle("#000")
.anchor("bottom").add(pv.Label)
.text(x.tickFormat);
/* Y-axis ticks. */
vis.add(pv.Rule)
.data(y.ticks(5))
.bottom(y)
.strokeStyle("#eee")
.anchor("left").add(pv.Label)
.text(y.tickFormat);
/* Histogram */
vis.add(pv.Bar)
.data(pv.histogram(faithful).frequency(false).bins(x.ticks(60)))
.bottom(0)
.left(function(d) x(d.x))
.width(function(d) x(d.dx + 30))
.height(function(d) y(d.y))
.fillStyle("#aaa")
.strokeStyle("rgba(255, 255, 255, .2)")
.lineWidth(1);
/* Kernel density estimates */
var panel = vis.add(pv.Panel)
.data(bandwidths);
var line = panel.add(pv.Line)
.data(function(h) kde(faithful).scale(h).points(pv.range(30, 110, 0.1)))
.left(function(d) x(d.x))
.bottom(function(d) y(d.y));
panel.add(pv.Dot)
.shape("square")
.lineWidth(0)
.fillStyle(function() line.strokeStyle())
.top(function () this.parent.index * 12 + 10)
.left(10)
.anchor("right").add(pv.Label)
.text(function (h) "h = " + h);
vis.render();
</script>
</body>
</html>
function kde(sample) {
/* Epanechnikov kernel */
function epanechnikov(u) {
return Math.abs(u) <= 1 ? 0.75 * (1 - u*u) : 0;
};
var kernel = epanechnikov;
return {
scale: function(h) {
kernel = function (u) { return epanechnikov(u / h) / h; };
return this;
},
points: function(points) {
return points.map(function(x) {
var y = pv.sum(sample.map(function (v) {
return kernel(x - v);
})) / sample.length;
return {x: x, y: y};
});
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment