Skip to content

Instantly share code, notes, and snippets.

@vsapsai
Last active October 23, 2017 02:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vsapsai/1e5f60e33a8b7ae49c8a10451f84f0e4 to your computer and use it in GitHub Desktop.
Save vsapsai/1e5f60e33a8b7ae49c8a10451f84f0e4 to your computer and use it in GitHub Desktop.
Experimenting with Edward Tufte-style boxplots for requests' latencies and volume
license: mit

Experimenting with Edward Tufte-style boxplots for requests' latencies and volume. Main goal is to gain some insight into latency distribution for different operations (inspired by Achieving Rapid Response Times in Large Online Services talk by Jeff Dean). Requests volume was added because I had no other ideas for cross value and it can be helpful to decide which latencies should be improved first. Used data is made up and not based on any real service (thought it would be interesting to look at real data).

Known problems with current implementation:

  • percentile values are noisy;
  • the meaning of each number is not clear;
  • not clear where is 0 on y-axis.

This representation isn't suitable for observing continuous data changes over time. But I don't think it's a deficiency, I've taken time out deliberately to be able to compare latencies at different percentiles and for different operations. Though boxplots should be still applicable for comparison over distinct time periods, like weeks or months.

Also can be relevant Mike Bostock's implementation of Box Plots.

<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Boxplot for requests latencies and volume</title>
<style>
text {
font-size: 12px;
font-family: sans-serif;
}
text.label {
font-size: 15px;
}
line.cross-value, line.percentile-value {
stroke: black;
stroke-width: 1px;
shape-rendering: crispEdges;
}
</style>
</head>
<body>
<svg width="960" height="500"></svg>
<script src="https://d3js.org/d3.v4.min.js"></script>
<script>
var svg = d3.select("svg"),
margin = {top: 20, right: 10, bottom: 40, left: 10},
width = 960 - margin.left - margin.right,
height = 500 - margin.top - margin.bottom,
g = svg.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
var firstLevelXScale = d3.scaleBand()
.rangeRound([0, width])
.paddingInner(0.1);
var yScale = d3.scaleLinear()
.rangeRound([height, 0]);
d3.csv("request_metrics.csv", function(error, metricsData) {
if (error) {
throw error;
}
//console.log(metricsData);
// Convert strings to numbers.
metricsData.columns.slice(1).forEach(function(numericColumn) {
metricsData.forEach(function(item) {
item[numericColumn] = +item[numericColumn];
});
});
// Calculate scales.
var requestNames = metricsData.map(function(item) { return item.Name; });
firstLevelXScale.domain(requestNames);
var maxVolume = d3.max(metricsData, function(item) { return item.Volume; });
var secondLevelXScale = d3.scaleLinear()
.rangeRound([0, firstLevelXScale.bandwidth()])
// Use negative domain to achieve horizontal centering.
.domain([-maxVolume, maxVolume]);
var maxLatency = d3.max(metricsData, function(item) {
var requestMax = d3.max(metricsData.columns.slice(2), function(columnName) {
return item[columnName];
});
return requestMax;
});
yScale.domain([0, maxLatency]);
g.selectAll(".box")
.data(metricsData)
.enter().append("g")
.attr("class", "box")
.call(singleBox);
function singleBox(g) {
g.attr("transform", function(d) {
return "translate(" + firstLevelXScale(d.Name) + ",0)";
});
// Lines.
g.append("line")
.attr("class", "cross-value")
.attr("x1", function(d) { return secondLevelXScale(-d.Volume); })
.attr("y1", function(d) { return yScale(d.Percentile90); })
.attr("x2", function(d) { return secondLevelXScale(d.Volume); })
.attr("y2", function(d) { return yScale(d.Percentile90); });
g.append("line")
.attr("class", "percentile-value")
.attr("x1", function(d) { return secondLevelXScale(0); })
.attr("y1", function(d) { return yScale(d.Percentile0); })
.attr("x2", function(d) { return secondLevelXScale(0); })
.attr("y2", function(d) { return yScale(d.Percentile50); })
g.append("line")
.attr("class", "percentile-value")
.attr("x1", function(d) { return secondLevelXScale(0); })
.attr("y1", function(d) { return yScale(d.Percentile99); })
.attr("x2", function(d) { return secondLevelXScale(0); })
.attr("y2", function(d) { return yScale(d.Percentile100); })
// Labels.
var format = d3.format(",.6");
// ...percentile values.
["Percentile0", "Percentile50", "Percentile99", "Percentile100"]
.forEach(function(percentileColumn) {
g.append("text")
.attr("class", "percentile-text")
.attr("x", function(d) { return secondLevelXScale(0); })
.attr("y", function(d) { return yScale(d[percentileColumn]); })
.attr("dx", "5px")
.attr("dy", ".3em")
.text(function(d) { return format(d[percentileColumn]); });
});
g.append("text")
.attr("class", "percentile-text")
.attr("x", function(d) { return secondLevelXScale(0); })
.attr("y", function(d) { return yScale(d.Percentile90); })
.attr("dx", "5px")
.attr("dy", "-2px")
.attr("text-anchor", "end")
.text(function(d) { return format(d.Percentile90); });
// ...operation name.
g.append("text")
.attr("class", "label")
.attr("x", function(d) { return secondLevelXScale(0); })
.attr("y", function(d) { return yScale(0); })
.attr("dy", "1em")
.attr("text-anchor", "middle")
.text(function(d) { return d.Name; });
// ...volume value.
g.append("text")
.attr("class", "cross-text")
.attr("x", function(d) { return secondLevelXScale(0); })
.attr("y", function(d) { return yScale(0); })
.attr("dy", "2.5em")
.attr("text-anchor", "middle")
.text(function(d) { return format(d.Volume); });
}
});
</script>
</body>
</html>
Name Volume Percentile0 Percentile50 Percentile90 Percentile99 Percentile100
listFoo 45 3.8 6.3 8.9 14.5 27.6
getFoo 156 0.5 1.7 4.6 9.1 10.0
createFoo 23 4.2 6.3 11.7 13.2 35.4
deleteFoo 17 2.7 4.4 8.1 9.7 13.4
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment