Skip to content

Instantly share code, notes, and snippets.

@cdmuhlb
Last active October 19, 2015 00:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cdmuhlb/7e965dd93580faac3c55 to your computer and use it in GitHub Desktop.
Save cdmuhlb/7e965dd93580faac3c55 to your computer and use it in GitHub Desktop.
Bayesian Blocks Density Estimation

Kernel density estimation with per-sample bandwidths determined by the widths of Bayesian Blocks.

As the measurements in this example are quantized, the block likelihood needs to account for the bin width. However, the current implementation assumes continuous, unique event data. Therefore, noise has been added to the dataset to approximate continuous measurements.

This example is based on a constant-bandwidth version by Mike Bostock.

<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
<head>
<meta charset="utf-8" />
<title>Bayesian Blocks Density Estimation</title>
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.6/d3.min.js" charset="utf-8"></script>
<style>
body {
font: 10px sans-serif;
}
.bar {
fill: #bbb;
shape-rendering: crispEdges;
}
.line {
fill: none;
stroke: #000;
stroke-width: 1.5px;
}
.axis path,
.axis line {
fill: none;
stroke: #000;
shape-rendering: crispEdges;
}
.y.axis path {
display: none;
}
</style>
</head>
<body>
<svg id="bbkdesvg" version="1.1" xmlns="http://www.w3.org/2000/svg" width="960" height="500" viewBox="0 0 960 500">
</svg>
<script src="/cdmuhlb/raw/fbc2b1ac41d6a6a7d695/interactive-bayesian-blocks.js"></script>
<script>
"use strict";
var svgElem = document.getElementById("bbkdesvg");
var gfxWidth = svgElem.viewBox.baseVal.width;
var gfxHeight = svgElem.viewBox.baseVal.height;
var margin = {top: 20, right: 30, bottom: 30, left: 40},
width = gfxWidth - margin.left - margin.right,
height = gfxHeight - margin.top - margin.bottom;
var x = d3.scale.linear()
.domain([30, 110])
.range([0, width]);
var y = d3.scale.linear()
.domain([0, 13])
.range([height, 0]);
var xAxis = d3.svg.axis()
.scale(x)
.orient("bottom");
var yAxis = d3.svg.axis()
.scale(y)
.orient("left")
var line = d3.svg.line()
.x(function(d) { return x(d[0]); })
.y(function(d) { return y(d[1]); })
.interpolate("cardinal");
var svg = d3.select(svgElem)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
svg.append("g")
.attr("class", "x axis")
.attr("transform", "translate(0," + height + ")")
.call(xAxis)
.append("text")
.attr("class", "label")
.attr("x", width)
.attr("y", -6)
.style("text-anchor", "end")
.text("Time between Eruptions (min.)");
svg.append("g")
.attr("class", "y axis")
.call(yAxis);
d3.json("/mbostock/raw/4341954/faithful.json", function(error, faithful) {
if (error) throw error;
// Add some noise (my current implementation doesn't handle duplicate
// data robustly and is inappropriate for quantized measurements)
// TODO: Given the quantization of the measurements, this should really
// use Bayesian Blocks in "mode 2", not "mode 1"
for (var i = 0; i < faithful.length; ++i) {
faithful[i] += Math.random() - 0.5;
}
// Compute Bayesian Blocks
var ncp_prior = 3;
var alg = new BayesianBlocksAlgorithm(faithful, ncp_prior);
while (!alg.isFinished()) {
alg.step();
}
var thresholds = [alg.bins[0].start];
for (var bin of alg.bins) {
thresholds.push(bin.end);
}
var histogram = d3.layout.histogram()
.bins(thresholds);
var data = histogram(faithful),
kde = kernelDensityEstimator(epanechnikovKernel, data);
svg.selectAll(".bar")
.data(data)
.enter().insert("rect", ".axis")
.attr("class", "bar")
.attr("x", function(d) { return x(d.x) + 1; })
.attr("y", function(d) { return y(d.y/d.dx); })
.attr("width", function(d) { return x(d.dx + d.x) - x(d.x) - 1; })
.attr("height", function(d) { return height - y(d.y/d.dx); });
svg.append("path")
.datum(kde(x.ticks(100)))
.attr("class", "line")
.attr("d", line);
});
function binarySearch(values, x, start, end) {
var lo = (typeof start !== 'undefined') ? (start|0) : 0;
var hi = (typeof end !== 'undefined') ? (end|0) : values.length;
if ((lo < 0) || (hi > values.length) || (lo > hi)) {
throw new Error("Invalid search parameters: " + lo + ", " + hi);
}
while (lo < hi) {
var i = (hi + lo) >>> 1
var xi = values[i]
if (x < xi) hi = i;
else if (x > xi) lo = i + 1;
else return i;
}
return lo;
}
function kernelDensityEstimator(kernel, bins) {
return function(xs) {
return xs.map(function(x) {
var y = d3.sum(bins, function(b) {
return d3.sum(b, function(v) { return kernel(v - x, b.dx); });
});
return [x, y];
});
}
}
function epanechnikovKernel(x, scale) {
var u = x/scale;
return (Math.abs(u) <= 1) ? (0.75*(1 - u*u)/scale) : 0;
}
</script>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment