Skip to content

Instantly share code, notes, and snippets.

@armollica
Last active May 21, 2016 10:53
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save armollica/0f28622e38434b04907d768df6cec684 to your computer and use it in GitHub Desktop.
Save armollica/0f28622e38434b04907d768df6cec684 to your computer and use it in GitHub Desktop.
Visualizing Distributions

A few ways to visualize 1D distributions.

Swarm and pile plots show all the data points but can't be used when there are a lot of data points. Histograms and boxplots work for any number of data points since they are visualizing summary statistics. Histograms provide more information than a boxplot. Boxplots fit in small spaces, making them nice for comparing many distributions side-by-side.

Other chart types for 1D distributions: violin plot, kernel density, empirical CDF.

The d3.forceChart() plugin is used for the swarm and pile plots; d3.layout.histogram() for the histogram; d3.scale.quantile() for the quartile summary statistics used in the boxplot.

d3.forceChart = function() {
var width = 400,
height = 300,
padding = 3,
x = function(d) { return d[0]; },
y = function(d) { return d[1]; },
r = function(d) { return d[2]; },
xStart = function(d) { return x(d) + 50*Math.random() - 25},
yStart = function(d) { return y(d) + 50*Math.random() - 25},
rStart = function(d) { return r(d); },
draggable = true,
xGravity = function(d) { return 1; },
yGravity = function(d) { return 1; },
rGravity = function(d) { return 1; },
shape = "circle",
tickUpdate = function() {};
var force = d3.layout.force()
.charge(0)
.gravity(0);
function chart(selection, nodes) {
if (shape === "circle") { collide = collideCircle; }
else if (shape === "square") { collide = collideSquare; }
else { console.error("forceChart.shape must be 'circle' or 'square'"); }
nodes = nodes
.map(function(d) {
d.x = xStart(d);
d.y = yStart(d);
d.r = rStart(d);
d.x0 = x(d);
d.y0 = y(d);
d.r0 = r(d);
return d;
});
var gNodes = selection.selectAll(".node").data(nodes)
.enter().append("g")
.attr("class", "node")
.call(draggable ? force.drag : function() {});
force
.size([width, height])
.nodes(nodes)
.on("tick", tick)
.start();
function tick(e) {
gNodes
.each(gravity(e.alpha * .1))
.each(collide(.5))
.attr("transform", function(d) {
return "translate(" + d.x + "," + d.y + ")";
})
.call(tickUpdate);
}
function gravity(k) {
return function(d) {
var dx = d.x0 - d.x,
dy = d.y0 - d.y,
dr = d.r0 - d.r;
d.x += dx * k * xGravity(d);
d.y += dy * k * yGravity(d);
d.r += dr * k * rGravity(d);
};
}
function collideCircle(k) {
var q = d3.geom.quadtree(nodes);
return function(node) {
var nr = node.r + padding,
nx1 = node.x - nr,
nx2 = node.x + nr,
ny1 = node.y - nr,
ny2 = node.y + nr;
q.visit(function(quad, x1, y1, x2, y2) {
if (quad.point && (quad.point !== node)) {
var x = node.x - quad.point.x,
y = node.y - quad.point.y,
l = x * x + y * y,
r = nr + quad.point.r;
if (l < r * r) {
l = ((l = Math.sqrt(l)) - r) / l * k;
node.x -= x *= l;
node.y -= y *= l;
quad.point.x += x;
quad.point.y += y;
}
}
return x1 > nx2 || x2 < nx1 || y1 > ny2 || y2 < ny1;
});
};
}
function collideSquare(k) {
var q = d3.geom.quadtree(nodes);
return function(node) {
var nr = node.r + padding,
nx1 = node.x - nr,
nx2 = node.x + nr,
ny1 = node.y - nr,
ny2 = node.y + nr;
q.visit(function(quad, x1, y1, x2, y2) {
if (quad.point && (quad.point !== node)) {
var x = node.x - quad.point.x,
y = node.y - quad.point.y,
lx = Math.abs(x),
ly = Math.abs(y),
r = nr + quad.point.r;
if (lx < r && ly < r) {
if (lx > ly) {
lx = (lx - r) * (x < 0 ? -k : k);
node.x -= lx;
quad.point.x += lx;
} else {
ly = (ly - r) * (y < 0 ? -k : k);
node.y -= ly;
quad.point.y += ly;
}
}
}
return x1 > nx2 || x2 < nx1 || y1 > ny2 || y2 < ny1;
});
};
}
}
chart.size = function(_) {
if (!arguments.length) return [width, height];
width = _[0];
height = _[1];
return chart;
};
chart.x = function(_) {
if (!arguments.length) return x;
if (typeof _ === "number") {
x = function() { return _; };
}
else if (typeof _ === "function") {
x = _;
}
return chart;
};
chart.y = function(_) {
if (!arguments.length) return y;
if (typeof _ === "number") {
y = function() { return _; };
}
else if (typeof _ === "function") {
y = _;
}
return chart;
};
chart.r = function(_) {
if (!arguments.length) return r;
if (typeof _ === "number") {
r = function() { return _; };
}
else if (typeof _ === "function") {
r = _;
}
return chart;
};
chart.draggable = function(_) {
if (!arguments.length) return draggable;
draggable = _;
return chart;
};
chart.padding = function(_) {
if (!arguments.length) return padding;
padding = _;
return chart;
};
chart.xGravity = function(_) {
if (!arguments.length) return xGravity;
if (typeof _ === "number") {
xGravity = function() { return _; };
}
else if (typeof _ === "function") {
xGravity = _;
}
return chart;
};
chart.yGravity = function(_) {
if (!arguments.length) return yGravity;
if (typeof _ === "number") {
yGravity = function() { return _; };
}
else if (typeof _ === "function") {
yGravity = _;
}
return chart;
};
chart.rGravity = function(_) {
if (!arguments.length) return rGravity;
if (typeof _ === "number") {
rGravity = function() { return _; };
}
else if (typeof _ === "function") {
rGravity = _;
}
return chart;
};
chart.xStart = function(_) {
if (!arguments.length) return xStart;
if (typeof _ === "number") {
xStart = function() { return _; };
}
else if (typeof _ === "function") {
xStart = _;
}
return chart;
};
chart.yStart = function(_) {
if (!arguments.length) return yStart;
if (typeof _ === "number") {
yStart = function() { return _; };
}
else if (typeof _ === "function") {
yStart = _;
}
return chart;
};
chart.rStart = function(_) {
if (!arguments.length) return rStart;
if (typeof _ === "number") {
rStart = function() { return _; };
}
else if (typeof _ === "function") {
rStart = _;
}
return chart;
};
chart.shape = function(_) {
if (!arguments.length) return shape;
shape = _;
return chart;
};
chart.tickUpdate = function(_) {
if (!arguments.length) return tickUpdate;
tickUpdate = _;
return chart;
};
chart.force = function() {
return force;
};
return chart;
};
<html>
<head>
<style>
body { font-family: monospace; }
.axis line,
.axis path { fill: none; }
.y.axis line { stroke: black; }
.x.axis {
font-size: 16px;
font-weight: bold;
}
</style>
</head>
<body>
<script src="https://d3js.org/d3.v3.min.js" charset="utf-8"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.11.1/lodash.min.js"></script>
<script src="force-chart.js"></script>
<script>
var margin = { top: 30, left: 50, bottom: 30, right: 10 },
width = 960 - margin.left - margin.right,
height = 500 - margin.top - margin.bottom;
var chartTypes = ["Swarm", "Pile", "Histogram", "Boxplot"];
var chartScale = d3.scale.ordinal()
.domain(chartTypes)
.rangeRoundBands([0, width], .3),
barScale = d3.scale.linear()
.range([0, chartScale.rangeBand()]),
yScale = d3.scale.linear().range([height, 0]).nice();
var xAxis = d3.svg.axis().scale(chartScale).orient("top"),
yAxis = d3.svg.axis().scale(yScale).orient("left");
var swarmChart = d3.forceChart()
.padding(1)
.x(0)
.y(function(d) { return yScale(d.yVal); })
.r(1.5)
.xGravity(1/5)
.yGravity(100)
.draggable(false);
var pileChart = d3.forceChart()
.padding(1)
.x(0)
.y(function(d) { return yScale(d.yVal); })
.r(1.5)
.xGravity(function(d) { return d.x <= 0 ? 400 : 1/2; })
.yGravity(100)
.draggable(false);
var svg = d3.select("body").append("svg")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
var data = d3.range(700)
.map(function(i) { return { yVal: d3.random.logNormal(1, 0.5)() }; });
yScale.domain(d3.extent(data, function(d) { return d.yVal; }));
// Draw axes
svg.append("g").call(xAxis)
.attr("class", "x axis");
svg.append("g").call(yAxis)
.attr("class", "y axis");
// Draw swarm chart
svg.append("g").call(swarmChart, _.cloneDeep(data))
.attr("class", "swarm")
.attr("transform", "translate(" + (chartScale("Swarm") + chartScale.rangeBand()/2) + ",0)")
.selectAll(".node").append("circle")
.attr("r", function(d) { return d.r; });
// Run for a bunch of ticks and freeze
swarmChart.force().stop().start();
for (var i = 0; i < 200; i++) { swarmChart.force().tick(); }
swarmChart.force().stop();
// Draw pile chart
svg.append("g").call(pileChart, _.cloneDeep(data))
.attr("class", "pile")
.attr("transform", "translate(" + chartScale("Pile") + ",0)")
.selectAll(".node").append("circle")
.attr("r", function(d) { return d.r; });
// Run for a bunch of ticks and freeze
pileChart.force().stop().start();
for (var i = 0; i < 500; i++) { pileChart.force().tick(); }
pileChart.force().stop();
// Draw histogram
svg.append("g").call(histogram, data)
.attr("class", "histogram")
.attr("transform", "translate(" + chartScale("Histogram") + ",0)");
// Draw boxplot
var boxWidth = 20;
svg.append("g").call(boxplot, data, boxWidth)
.attr("class", "boxplot")
.attr("transform", "translate(" + (chartScale("Boxplot") + chartScale.rangeBand()/2 - boxWidth/2) + ",0)");
function histogram(selection, data) {
var bins = yScale.ticks(25);
var binnedData = d3.layout.histogram()
.bins(bins)
.value(function(d) { return d.yVal; })
(data);
barScale.domain([0, d3.max(binnedData, function(d) { return d.y; })]);
var barHeight = yScale(bins[0]) - yScale(bins[1]),
barPadding = 2;
selection.selectAll(".bar").data(binnedData)
.enter().append("rect")
.attr("class", "bar")
.attr("transform", function(d) {
return "translate(0," + yScale(d.x) + ")";
})
.attr("y", barPadding - barHeight)
.attr("height", barHeight - barPadding)
.attr("width", function(d) { return barScale(d.y); });
}
function boxplot(selection, data, boxWidth) {
var quartiles = d3.scale.quantile()
.domain(data.map(function(d) { return d.yVal; }))
.range(d3.range(4))
.quantiles();
var extent = d3.extent(data, function(d) { return d.yVal; });
// Draw dashes
selection.append("path").datum([
[boxWidth/2, yScale(extent[1])],
[boxWidth/2, yScale(extent[0])]
])
.attr("d", d3.svg.line())
.style("stroke", "black")
.style("stroke-dasharray", "4, 10");
// Draw box
selection.append("rect")
.attr("y", yScale(quartiles[2]))
.attr("height", yScale(quartiles[0]) - yScale(quartiles[2]))
.attr("width", boxWidth)
.attr("fill", "white")
.attr("stroke", "black");
// Draw lines
selection.selectAll("line").data([extent[1], quartiles[1], extent[0]])
.enter().append("line")
.attr("x2", boxWidth)
.attr("y1", function(d) { return yScale(d); })
.attr("y2", function(d) { return yScale(d); })
.attr("stroke", "black");
}
</script>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment