Skip to content

Instantly share code, notes, and snippets.

@osroca
Last active September 22, 2016 09:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save osroca/4fb0f64b71e838d70d31 to your computer and use it in GitHub Desktop.
Save osroca/4fb0f64b71e838d70d31 to your computer and use it in GitHub Desktop.
Insuline histogram

A visualization of blood insuline from the UCI diabetes dataset. The distribution is stored with a streaming histogram. Brush to zoom. Click to zoom out.

{
"anomaly_seed": "2c249dda00fbf54ab4cdd850532a584f286af5b6",
"category": 0,
"code": 200,
"columns": 1,
"constraints": false,
"created": "2014-10-28T20:13:11.111000",
"credits": 0.69940185546875,
"credits_per_prediction": 0.0,
"dataset": "dataset/5427f656ec65d11671000004",
"dataset_field_types": {
"categorical": 1,
"datetime": 0,
"numeric": 8,
"preferred": 9,
"text": 0,
"total": 9
},
"dataset_status": true,
"dataset_type": 0,
"description": "",
"excluded_fields": [],
"fields_meta": {
"count": 1,
"limit": 1000,
"offset": 0,
"query_total": 1,
"total": 9
},
"forest_size": 128,
"id_fields": [],
"input_fields": [
"000000",
"000001",
"000002",
"000003",
"000004",
"000005",
"000006",
"000007",
"000008"
],
"locale": "en-US",
"max_columns": 9,
"max_rows": 768,
"model": {
"fields": {
"000004": {
"column_number": 4,
"datatype": "int16",
"name": "insulin",
"optype": "numeric",
"order": 0,
"preferred": true,
"summary": {
"bins": [
[
0,
374
],
[
19.33333,
9
],
[
43.4375,
32
],
[
59.894739999999999,
38
],
[
74.384619999999998,
26
],
[
91.217389999999995,
46
],
[
112.23256000000001,
43
],
[
134.34884,
43
],
[
160.17646999999999,
34
],
[
184.35293999999999,
34
],
[
208.57894999999999,
19
],
[
232,
11
],
[
251.40000000000001,
5
],
[
272.69999999999999,
10
],
[
288.5,
6
],
[
304.66667000000001,
3
],
[
324.75,
8
],
[
338.5,
2
],
[
368.33332999999999,
3
],
[
393.66667000000001,
3
],
[
415,
1
],
[
440,
1
],
[
465,
1
],
[
479.39999999999998,
5
],
[
495,
2
],
[
510,
1
],
[
542.66666999999995,
3
],
[
579,
1
],
[
600,
1
],
[
680,
1
],
[
744,
1
],
[
846,
1
]
],
"maximum": 846,
"mean": 79.799480000000003,
"median": 30.5,
"minimum": 0,
"missing_count": 0,
"population": 768,
"splits": [
0.18842999999999999,
30.5,
127.5
],
"standard_deviation": 115.244,
"sum": 61286,
"sum_squares": 15077256,
"variance": 13281.18008
}
}
},
"kind": "iforest",
"mean_depth": 14.774407996894411
},
"name": "diabetes' dataset anomaly detector",
"number_of_anomalyscores": 0,
"number_of_batchanomalyscores": 0,
"number_of_public_anomalyscores": 0,
"out_of_bag": false,
"price": 0.0,
"private": true,
"project": "project/542537ecec65d1fc17000279",
"range": [
1,
768
],
"replacement": false,
"resource": "anomaly/544ff8d7ec65d102f200067d",
"rows": 768,
"sample_rate": 1.0,
"sample_size": 483,
"shared": false,
"size": 26192,
"source": "source/5427f64dec65d11671000000",
"source_status": true,
"status": {
"code": 5,
"elapsed": 2514,
"message": "The anomaly detector has been created",
"progress": 1.0
},
"subscription": true,
"tags": [],
"top_n": 10,
"updated": "2014-10-28T20:13:23.050000",
"white_box": false
}
<!DOCTYPE html>
<meta charset="utf-8">
<style>
.bar {
fill: #5AC;
opacity: 0.6;
}
.axis {
font: 12px sans-serif;
}
.axis path,
.axis line {
fill: none;
stroke: #000;
shape-rendering: crispEdges;
}
.brush .extent {
stroke: #fff;
fill-opacity: .125;
shape-rendering: crispEdges;
}
.fname {
font: 12px sans-serif;
font-weight: bold;
}
</style>
<body>
<script src="https://d3js.org/d3.v3.min.js"></script>
<!--script src="http://localhost:1025/static/js/d3.v3.min.js"></script-->
<script>
/* Given the raw bins (the distribution) and a point, estimate
* the total population up to that point.
*/
function sum(rawBins, value) {
var total = 0;
for (var i = 0; i < rawBins.length; i++) {
var bin = rawBins[i];
if ((bin.exact && bin.mean <= value) || value >= bin.next) {
total += bin.pop;
} else if (bin.prev < value && bin.mean >= value) {
var range = (bin.mean - value) / (bin.mean - bin.prev);
// Optionally, square the range for trapezoidal interpolation
total += (1 - range) * bin.pop / 2;
} else if (bin.mean < value && bin.next >= value) {
var range = (value - bin.mean) / (bin.next - bin.mean);
// Optionally, square the range for trapezoidal interpolation
total += bin.pop / 2 + range * bin.pop / 2;
}
}
return total;
}
/* Loads the distribution into a convenient format. */
function loadRawBins(data) {
var bins = [];
if ("bins" in data.summary) {
var sbins = data.summary.bins;
minimum = data.summary.minimum;
maximum = data.summary.maximum;
for (var i = 0; i < sbins.length; i++) {
var bin = {mean: sbins[i][0], pop: sbins[i][1]};
if (bin.pop == 1 || bin.mean == minimum || bin.mean == maximum) {
bin.exact = true;
} else {
bin.exact = false;
if (i > 0) {
bin.prev = sbins[i-1][0];
} else {
bin.prev = minimum;
}
if (i < sbins.length - 1) {
bin.next = sbins[i+1][0];
} else {
bin.next = maximum;
}
}
bins[i] = bin;
}
} else {
sbins = data.summary.counts;
for (var i = 0; i < sbins.length; i++) {
var bin = {mean: sbins[i][0], pop: sbins[i][1], exact: true};
bins[i] = bin;
}
}
return bins;
}
/* Build equidistant bins for visualization given the
* raw bins and a range.
*/
function buildViz(rawBins, range, vizBinCount) {
var vizBins = [];
var maxPop = 0;
var inc = (range[1] - range[0]) / vizBinCount;
// Using a slight shift to act like a closed interval
var shift = 0.0000001;
var prev = range[0] - shift;
var prevPop = sum(rawBins, prev);
for (var i = 0; i < vizBinCount; i++) {
var next = prev + inc;
var nextPop = sum(rawBins, next);
var pop = Math.round(nextPop - prevPop);
maxPop = Math.max(maxPop, pop);
vizBins[i] = {range: [prev + shift, next], pop: pop};
prev = next;
prevPop = nextPop;
}
return {bins: vizBins, maxPop: maxPop, range: range};
}
function capitalize(term) {
return term.charAt(0).toUpperCase() + term.slice(1);
}
function trimRange(fullRange, rawBins, buffer, segments) {
var total = 0;
for (var i = 0; i < rawBins.length; i++) {
total += rawBins[i].pop;
}
var interval = (fullRange[1] - fullRange[0]) / segments;
var start = fullRange[0];
var startThreshold = total * buffer;
for (var i = 0; i < segments; i++) {
if (sum(rawBins, start) > startThreshold) break;
start += interval;
}
var end = fullRange[1];
var endThreshold = total - (total * buffer);
for (var i = 0; i < segments; i++) {
if (sum(rawBins, end) < endThreshold) break;
end -= interval;
}
return [start - interval, end + interval];
}
var margin = {top: 30, right: 40, bottom: 30, left: 40},
width = 960 - margin.left - margin.right,
height = 500 - margin.top - margin.bottom;
var svg = d3.select("body").append("svg")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
d3.json('diabetes-000004.json'/*"plasma.json"*/, function(error, data) {
data = data.model.fields['000004'];
var rawBins = loadRawBins(data);
var x, y;
var xAxis, yAxis;
var xAxisG, yAxisG;
var vizBinCount = 32;
var buffer = (data.summary.maximum - data.summary.minimum) * 0.01;
var initRange = [data.summary.minimum - buffer, data.summary.maximum + buffer];
// Remove to default to the entire range rather than trimming 0.75% from the edges
initRange = trimRange(initRange, rawBins, 0.0075, 128);
function init() {
var viz = buildViz(rawBins, initRange, vizBinCount);
x = d3.scale.linear()
.domain(viz.range)
.range([0, width], .1);
y = d3.scale.linear()
.domain([0, viz.maxPop])
.range([height, 0]);
svg.selectAll(".bar")
.data(viz.bins)
.enter().append("rect")
.attr("class", "bar")
.attr("x", function(d) { return x(d.range[0]) + 1; })
.attr("width", function(d) { return x(d.range[1]) - x(d.range[0]) - 2; })
.attr("y", function(d) { return y(d.pop);})
.attr("height", function(d) { return height - y(d.pop); });
xAxis = d3.svg.axis()
.ticks(Math.round(width / 60))
.scale(x)
.orient("bottom");
yAxis = d3.svg.axis()
.ticks(Math.round(height / 30))
.scale(y)
.orient("left");
xAxisG = svg.append("g")
.attr("class", "x axis")
.attr("transform", "translate(0," + height + ")")
.call(xAxis);
xAxisG.append("text")
.attr("class", "fname")
.attr("y", -16)
.attr("x", width)
.attr("dy", ".71em")
.style("text-anchor", "end")
.text(capitalize(data.name));
yAxisG = svg.append("g")
.attr("class", "y axis")
.call(yAxis);
yAxisG.append("text")
.attr("class", "fname")
.attr("transform", "rotate(-90)")
.attr("y", 6)
.attr("dy", ".71em")
.style("text-anchor", "end")
.text("Population");
}
init();
function update(rawBins, range, vizBinCount) {
var animationDuration = 700;
var viz = buildViz(rawBins, range, vizBinCount);
x = d3.scale.linear()
.domain(viz.range)
.range([0, width], .1);
y = d3.scale.linear()
.domain([0, viz.maxPop])
.range([height, 0], .1);
svg.selectAll(".bar")
.data(viz.bins)
.transition().duration(animationDuration)
.attr("y", function(d) { return y(d.pop);})
.attr("height", function(d) { return height - y(d.pop); });
xAxis = d3.svg.axis()
.ticks(Math.round(width / 60))
.scale(x)
.orient("bottom");
yAxis = d3.svg.axis()
.ticks(Math.round(height / 30))
.scale(y)
.orient("left");
xAxisG.transition().duration(animationDuration).call(xAxis);
yAxisG.transition().duration(animationDuration).call(yAxis);
}
var brushX = d3.scale.identity().domain([0, width]);
var brushExtent;
var brush = d3.svg.brush()
.x(brushX)
.on("brush", brushed)
.on("brushend", brushended);
var gBrush = svg.append("g")
.attr("class", "brush")
.call(brush)
.call(brush.event);
gBrush.selectAll("rect")
.attr("height", height);
function brushed() {
brushExtent = brush.extent();
}
function brushended() {
if (!d3.event.sourceEvent) return; // only transition after input
var start = x.invert(brushExtent[0]);
var end = x.invert(brushExtent[1]);
d3.select(this).call(brush.extent([[0], [0]]));
if (start == end) {
update(rawBins, initRange, vizBinCount);
} else {
update(rawBins, [start, end], vizBinCount);
}
}
});
</script>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment