Skip to content

Instantly share code, notes, and snippets.

@ashenfad
Last active August 29, 2015 13:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ashenfad/10781486 to your computer and use it in GitHub Desktop.
Save ashenfad/10781486 to your computer and use it in GitHub Desktop.
BigML Clusters - Pima Diabetes

A visualization of five clusters discovered on four fields from the Pima Indian Diabetes dataset.

Each cluster is represented by a ball. The cluster radii are proportional to the population of each cluster.

The y-axis shows the distance of each cluster to the current point (selected by the sliders). The lower a cluster's position, the nearer it is the current point. The order of the clusters on the x-axis is from nearest to furthest.

The initial point is the median for each field. Selecting a cluster will set the current point equal to the cluster's centroid.

Finally, the colors on each slider represent the closest cluster to the current point for that range.

<!DOCTYPE html>
<meta charset="utf-8">
<style>
.axis {
font: 10px sans-serif;
-webkit-user-select: none;
-moz-user-select: none;
user-select: none;
}
.axis .domain {
fill: none;
stroke: #000;
stroke-opacity: .3;
stroke-width: 10px;
stroke-linecap: round;
}
.slider .handle {
fill: #fff;
stroke: #000;
stroke-opacity: .5;
stroke-width: 1.25px;
pointer-events: none;
}
</style>
<body>
<script src="http://d3js.org/d3.v3.min.js"></script>
<script>
var margin = {top: 40, right: 20, bottom: 50, left: 20},
width = 960 - margin.left - margin.right,
height = 500 - margin.bottom - margin.top;
var svg = d3.select("body").append("svg")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
var defs = d3.select("svg").append("defs");
d3.json("pima.json", function(error, root) {
var model = root.model;
var fields = model.fields;
var max_y = height / 2;
var buffer = width / (model.clusters.length + 1);
var slider_size = width / 3;
function make_hline(y_loc) {
svg.append("rect")
.attr("x", buffer - 20)
.attr("y", y_loc)
.attr("width", width - 2 * buffer + 40)
.attr("height", 0.5)
.style("fill", "none")
.style("stroke", "#ccc");
}
make_hline(0);
make_hline(max_y / 4);
make_hline(max_y / 2);
make_hline(3 * max_y / 4);
make_hline(max_y);
var handle_scales = new Array();
var mins = new Array();
var maxs = new Array();
var keys = new Array();
var point = new Array();
var fnames = new Array();
var i = 0;
for (id in fields) {
if (!fields.hasOwnProperty(id)) { continue; }
keys[i] = id;
point[i] = fields[id].summary.median;
mins[i] = fields[id].summary.minimum;
maxs[i] = fields[id].summary.maximum;
fnames[i] = fields[id].name;
make_slider(fields[id], i);
i = i + 1;
}
var scales = new Array();
for (var i = 0; i < keys.length; i++) {
scales[i] = model.final_field_scales[keys[i]];
}
var centroids = new Array();
var scaled_centroids = new Array();
var cnames = new Array();
var max_cluster_count = 0;
i = 0;
for (cid in model.clusters) {
var cluster = model.clusters[cid];
var centroid = new Array();
var scaled_centroid = new Array();
cnames[i] = cluster.name;
max_cluster_count = Math.max(max_cluster_count, cluster.count);
for (var j = 0; j < keys.length; j++) {
var val = cluster.center[keys[j]];
centroid[j] = val;
scaled_centroid[j] = scales[j] * val;
}
centroids[i] = centroid;
scaled_centroids[i] = scaled_centroid;
i = i + 1;
}
update_ranges();
var max_dist = 0;
for (var i = 0; i < centroids.length - 1; i++) {
for (var j = i + 1; j < centroids.length; j++) {
max_dist = Math.max(max_dist, dist(centroids[i], centroids[j]));
}
}
svg.append("text")
.attr("class", "nearest")
.attr("x", buffer - 20)
.attr("y", -20);
var dists = new Array();
var dist_order = new Array();
update_dists();
i = 0;
for (cid in model.clusters) {
var cluster = model.clusters[cid];
make_balls(cluster, i);
i = i + 1;
}
function make_balls(cluster, index) {
var max_rad = 30;
var color = d3.scale.category10().range()[index];
var r = Math.pow((cluster.count / max_cluster_count), 1/3) * max_rad;
svg.append("circle")
.attr("class", "cluster")
.attr("r", r)
.attr("cx", (dist_order[index] + 1) * buffer)
.attr("cy", (1 - Math.min(dists[index] / max_dist, 1)) * max_y)
.style("stroke", d3.rgb(color).darker())
.style("fill", color)
.on("click", function() {
point = centroids[index].slice(0);
update_dists();
update_ranges();
update_balls(300);
update_handles(300);
update_field_text();
})
.on("mouseover", function() {
d3.select(this)
.attr("r", 1.1 * r)
.style("stroke", color)
.style("fill", d3.rgb(color).brighter());
})
.on("mouseout", function() {
d3.select(this)
.attr("r", r)
.style("stroke", d3.rgb(color).darker())
.style("fill", color);
});
}
function update_dists() {
var sorted_dists = new Array();
var min_dist;
var min_index = -1;
for (var i = 0; i < centroids.length; i++) {
dists[i] = dist(point, centroids[i]);
sorted_dists[i] = new Object();
sorted_dists[i].d = dists[i];
sorted_dists[i].i = i;
if (min_index == -1 || min_dist > dists[i]) {
min_index = i;
min_dist = dists[i];
}
}
sorted_dists.sort(function(a, b) {
if (a.d < b.d) return -1;
if (a.d > b.d) return 1;
return 0;
});
for (var i = 0; i < sorted_dists.length; i++) {
dist_order[sorted_dists[i].i] = i;
}
d3.select(".nearest")
.text("Nearest to " + cnames[min_index]
+ " - Distance: " + dists[min_index].toFixed(4))
.style("fill", d3.scale.category10().range()[min_index]);
}
function update_ranges() {
for (var i = 0; i < scales.length; i++) {
var grad = d3.select("#g" + i);
grad.selectAll("stop").remove();
var min_pt = point.slice(0);
min_pt[i] = mins[i];
var min_index = -1;
var min_dist;
for (var j = 0; j < centroids.length; j++) {
var d = dist(min_pt, centroids[j]);
if (min_index == -1 || min_dist > d) {
min_index = j;
min_dist = d;
}
}
var p = min_pt[i];
var c = min_index;
var range = maxs[i] - mins[i];
while (p < maxs[i]) {
grad.append("svg:stop")
.attr("offset", (100 * (p - mins[i]) / range).toFixed(2).toString() + "%")
.attr("stop-color", d3.scale.category10().range()[c])
.attr("stop-opacity", 1);
var nextp = maxs[i];
var nextc = c;
var used = new Object();
used[c.toString()] = true;
for (var j = 0; j < centroids.length; j++) {
if (used[j.toString] == true) { continue; }
var candidate = vdist(line(i), plane(j, c));
if (candidate < nextp && candidate > p) {
nextp = candidate;
nextc = j;
}
}
grad.append("svg:stop")
.attr("offset", (100 * (nextp - mins[i]) / range).toFixed(2).toString() + "%")
.attr("stop-color", d3.scale.category10().range()[c])
.attr("stop-opacity", 1);
p = nextp;
c = nextc;
used[c.toString()] = true;
}
}
}
function update_balls(duration) {
var buffer = width / (model.clusters.length + 1);
var ids = Array.apply(null, Array(centroids.length)).map(function (_, i) {return i;});
d3.selectAll(".cluster")
.data(ids)
.transition().duration(duration + 50)
.attr("cx", function(d) {return (dist_order[d] + 1) * buffer;})
.attr("cy", function(d) {return (1 - Math.min(dists[d] / max_dist, 1)) * max_y;});
}
function make_slider(field, index) {
var summary = field.summary;
var buffer = (width - (2 * slider_size)) / 3;
var x_loc = ((index % 2) + 1) * buffer + (index % 2) * slider_size;
var y_loc = max_y + 60 + Math.floor(index / 2) * 60;
var x = d3.scale.linear()
.domain([summary.minimum, summary.maximum])
.range([0, slider_size])
.clamp(true);
handle_scales[index] = x;
var brush = d3.svg.brush().x(x);
brush.on("brush", function() {
var value = brush.extent()[0];
if (d3.event.sourceEvent) { // not a programmatic event
value = x.invert(d3.mouse(this)[0] - x_loc);
brush.extent([value, value]);
}
handle.attr("cx", x(value));
point[index] = value;
update_dists();
update_ranges();
update_balls(0);
update_field_text();
});
var axis = svg.append("g")
.attr("class", "x axis")
.attr("transform", "translate(" + x_loc + "," + y_loc + ")");
axis.call(d3.svg.axis()
.scale(x)
.orient("bottom")
.tickFormat(function(d) { return d; })
.tickSize(0)
.tickPadding(10))
.select(".domain")
.style("stroke", "none")
.style("fill", "none");
var gradient_name = "g" + index;
var gradient = defs.append("linearGradient")
.attr("id", gradient_name)
.attr("field_id", index)
.attr("x1", "0%")
.attr("y1", "0%")
.attr("x2", "100%")
.attr("y2", "0%");
axis.append("rect")
.attr("class", "range")
.attr("y", -4)
.attr("height", 8)
.attr("width", slider_size)
.attr("field_index", index)
.attr("rx", 4)
.attr("ry", 4)
.style("stroke", "#333")
.style("fill", "url(#" + gradient_name + ")");
var slider = svg.append("g")
.attr("class", "slider")
.call(brush);
slider.append("text")
.attr("class", "field_text")
.attr("transform", "translate(" + x_loc + "," + (y_loc - 13) + ")")
.text(field.name + ": " + point[index].toFixed(2));
slider.selectAll(".extent,.resize").remove();
slider.select(".background").attr("height", 40)
.attr("x", x_loc)
.attr("y", y_loc - 20);
var handle = slider.append("circle")
.attr("class", "handle")
.attr("transform", "translate(" + x_loc + "," + (y_loc - 1) + ")")
.attr("r", 8)
.attr("cx", handle_scales[index](point[index]));
brush.extent([point[index], point[index]]);
}
function update_handles(duration) {
var ids = Array.apply(null, Array(point.length)).map(function (_, i) {return i;});
d3.selectAll(".handle")
.data(ids)
.transition().duration(duration)
.attr("cx", function(d) {return handle_scales[d](point[d])});
}
function update_field_text() {
var ids = Array.apply(null, Array(point.length)).map(function (_, i) {return i;});
d3.selectAll(".field_text")
.data(ids)
.text(function(d) {return fnames[d] + ": " + point[d].toFixed(2);});
}
function dist(p1, p2) {
var tot = 0;
for (var i = 0; i < p1.length; i++) {
var diff = (p1[i] - p2[i]) * scales[i];
tot = tot + diff * diff;
}
return Math.sqrt(tot);
}
function scale_point(point) {
var scaled_point = new Array();
for (var i = 0; i < point.length; i++) {
scaled_point[i] = scales[i] * point[i];
}
return scaled_point;
}
function dotp(v1, v2) {
var tot = 0;
for (var i = 0; i < v1.length; i++) {
tot += (v1[i] * v2[i]);
}
return tot;
}
function plane(cent_id1, cent_id2) {
var pt1 = scaled_centroids[cent_id1];
var pt2 = scaled_centroids[cent_id2];
var origin = new Array();
var norm = new Array();
for (var i = 0; i < pt1.length; i++) {
origin[i] = (pt2[i] + pt1[i]) / 2;
norm[i] = pt2[i] - pt1[i];
}
var plane = new Object();
plane.n = norm;
plane.o = origin;
return plane;
}
function line(field_index) {
var scaled_point = scale_point(point);
scaled_point[field_index] = 0;
var line = new Object;
line.i = field_index;
line.o = scaled_point;
line.v = Array.apply(null, new Array(point.length)).map(Number.prototype.valueOf,0);
line.v[field_index] = 1;
return line;
}
function vdist(line, plane) {
var po = plane.o;
var lo = line.o;
var v = new Array();
for (var i = 0; i < po.length; i++) {
v[i] = po[i] - lo[i];
}
var diff = dotp(plane.n, v) / dotp(line.v, plane.n);
return (diff / scales[line.i]);
}
});
</script>
{
"model" : {
"kind" : "kmeans",
"dataset_id" : "1397472773842",
"missing_tokens" : [ "", "N/A", "n/a", "NULL", "null", "-", "#DIV/0", "#REF!", "#NAME?", "NIL", "nil", "NA", "na", "#VALUE!", "#NULL!", "NaN", "#N/A", "#NUM!", "?" ],
"branching-factor" : 4,
"locale" : "en_US",
"k" : 5,
"type" : "unsupervised",
"clusters" : [ {
"name" : "Cluster 1",
"id" : "000000",
"center" : {
"000000" : 3.55949,
"000001" : 0.66835,
"000002" : 25.77949,
"000003" : 30.33924
},
"distance" : {
"standard_deviation" : 2.04073,
"mean" : 3.53979,
"median" : 3.04558,
"minimum" : 0.56653,
"sum_squares" : 596.84575,
"bins" : [ [ 0.56653, 1 ], [ 0.98173, 1 ], [ 1.07152, 1 ], [ 1.12304, 1 ], [ 1.2643, 1 ], [ 1.35707, 1 ], [ 1.42209, 1 ], [ 1.59626, 2 ], [ 1.97406, 1 ], [ 2.08139, 2 ], [ 2.27839, 1 ], [ 2.31018, 1 ], [ 2.39575, 1 ], [ 2.5942, 1 ], [ 2.79284, 1 ], [ 2.94688, 1 ], [ 3.14428, 1 ], [ 3.33957, 1 ], [ 3.4052, 1 ], [ 4.04064, 1 ], [ 4.21368, 1 ], [ 4.42075, 1 ], [ 4.49325, 1 ], [ 4.57777, 1 ], [ 5.97903, 1 ], [ 6.02109, 2 ], [ 6.06221, 1 ], [ 6.18773, 2 ], [ 6.28665, 1 ], [ 6.52812, 1 ], [ 6.7601, 1 ], [ 7.3298, 1 ] ],
"sum" : 127.43258,
"population" : 36,
"maximum" : 7.3298,
"variance" : 4.16459
},
"count" : 36
}, {
"name" : "Cluster 2",
"id" : "000001",
"center" : {
"000000" : 8.33671,
"000001" : 77.39797,
"000002" : 33.6574,
"000003" : 42.89001
},
"distance" : {
"standard_deviation" : 0.8334,
"mean" : 2.14904,
"median" : 2.04279,
"minimum" : 0.46738,
"sum_squares" : 960.94719,
"bins" : [ [ 0.46738, 1 ], [ 0.56376, 3 ], [ 0.66223, 1 ], [ 0.81007, 4 ], [ 0.90801, 4 ], [ 1.02702, 2 ], [ 1.17505, 8 ], [ 1.28424, 6 ], [ 1.44456, 14 ], [ 1.58378, 11 ], [ 1.715, 6 ], [ 1.82648, 15 ], [ 1.92974, 13 ], [ 2.04279, 5 ], [ 2.21871, 14 ], [ 2.33211, 9 ], [ 2.46184, 15 ], [ 2.57877, 4 ], [ 2.67725, 2 ], [ 2.7746, 3 ], [ 2.91793, 8 ], [ 3.08154, 12 ], [ 3.25344, 9 ], [ 3.38216, 2 ], [ 3.48244, 3 ], [ 3.67815, 1 ], [ 3.84757, 1 ], [ 4.05894, 1 ], [ 4.1944, 1 ], [ 4.3668, 1 ], [ 4.62029, 1 ], [ 4.93763, 1 ] ],
"sum" : 388.97658,
"population" : 181,
"maximum" : 4.93763,
"variance" : 0.69456
},
"count" : 181
}, {
"name" : "Cluster 3",
"id" : "000002",
"center" : {
"000000" : 2.22434,
"000001" : 67.17303,
"000002" : 29.15806,
"000003" : 26.05825
},
"distance" : {
"standard_deviation" : 0.71185,
"mean" : 1.73217,
"median" : 1.6663,
"minimum" : 0.23765,
"sum_squares" : 1188.40937,
"bins" : [ [ 0.23765, 1 ], [ 0.54048, 2 ], [ 0.61407, 2 ], [ 0.74658, 6 ], [ 0.81783, 5 ], [ 0.92198, 17 ], [ 1.01846, 14 ], [ 1.09842, 13 ], [ 1.19938, 24 ], [ 1.3098, 17 ], [ 1.4161, 23 ], [ 1.51966, 23 ], [ 1.61602, 21 ], [ 1.7028, 20 ], [ 1.8065, 23 ], [ 1.90999, 19 ], [ 1.98364, 23 ], [ 2.10338, 15 ], [ 2.18532, 11 ], [ 2.26529, 4 ], [ 2.34244, 14 ], [ 2.44257, 7 ], [ 2.52123, 8 ], [ 2.62252, 7 ], [ 2.7248, 7 ], [ 2.82211, 5 ], [ 3.11514, 2 ], [ 3.36365, 1 ], [ 3.43069, 1 ], [ 3.70389, 1 ], [ 5.32341, 1 ], [ 6.77105, 2 ] ],
"sum" : 587.20473,
"population" : 339,
"maximum" : 6.78712,
"variance" : 0.50672
},
"count" : 339
}, {
"name" : "Cluster 4",
"id" : "000003",
"center" : {
"000000" : 1.61226,
"000001" : 78.52337,
"000002" : 41.85732,
"000003" : 28.72184
},
"distance" : {
"standard_deviation" : 0.90982,
"mean" : 1.91878,
"median" : 1.76477,
"minimum" : 0.48564,
"sum_squares" : 644.02747,
"bins" : [ [ 0.50296, 2 ], [ 0.59756, 2 ], [ 0.74193, 2 ], [ 0.92294, 3 ], [ 1.04193, 5 ], [ 1.13583, 6 ], [ 1.21511, 5 ], [ 1.29393, 10 ], [ 1.38344, 10 ], [ 1.4937, 11 ], [ 1.60846, 5 ], [ 1.70004, 10 ], [ 1.80705, 7 ], [ 1.9439, 16 ], [ 2.0235, 10 ], [ 2.13631, 5 ], [ 2.20307, 2 ], [ 2.27238, 4 ], [ 2.37803, 4 ], [ 2.53424, 3 ], [ 2.73912, 2 ], [ 2.84819, 5 ], [ 2.95334, 3 ], [ 3.56033, 2 ], [ 3.6994, 2 ], [ 3.82821, 1 ], [ 4.10129, 1 ], [ 4.20985, 1 ], [ 4.54442, 1 ], [ 4.64529, 1 ], [ 5.39865, 1 ], [ 6.47886, 1 ] ],
"sum" : 274.38519,
"population" : 143,
"maximum" : 6.47886,
"variance" : 0.82777
},
"count" : 143
}, {
"name" : "Cluster 5",
"id" : "000004",
"center" : {
"000000" : 4.24025,
"000001" : 77.44774,
"000002" : 28.1922,
"000003" : 58.22621
},
"distance" : {
"standard_deviation" : 1.05874,
"mean" : 2.39041,
"median" : 2.21211,
"minimum" : 0.9236,
"sum_squares" : 470.49286,
"bins" : [ [ 0.9236, 1 ], [ 1.0598, 2 ], [ 1.14461, 2 ], [ 1.39544, 3 ], [ 1.57696, 4 ], [ 1.65804, 3 ], [ 1.70767, 3 ], [ 1.81703, 6 ], [ 1.92659, 2 ], [ 1.98454, 2 ], [ 2.09413, 5 ], [ 2.17588, 1 ], [ 2.22536, 2 ], [ 2.29422, 3 ], [ 2.33745, 1 ], [ 2.39747, 3 ], [ 2.45461, 4 ], [ 2.59571, 2 ], [ 2.67824, 3 ], [ 2.82167, 3 ], [ 2.93646, 1 ], [ 3.02555, 1 ], [ 3.09157, 1 ], [ 3.15086, 1 ], [ 3.22363, 1 ], [ 3.40927, 2 ], [ 3.63147, 1 ], [ 3.80396, 2 ], [ 3.91972, 1 ], [ 4.32101, 1 ], [ 6.61361, 1 ], [ 6.93049, 1 ] ],
"sum" : 164.93831,
"population" : 69,
"maximum" : 6.93049,
"variance" : 1.12092
},
"count" : 69
} ],
"fields" : {
"000000" : {
"preferred" : true,
"summary" : {
"standard_deviation" : 3.36958,
"counts" : [ [ 0, 111 ], [ 1, 135 ], [ 2, 103 ], [ 3, 75 ], [ 4, 68 ], [ 5, 57 ], [ 6, 50 ], [ 7, 45 ], [ 8, 38 ], [ 9, 28 ], [ 10, 24 ], [ 11, 11 ], [ 12, 9 ], [ 13, 10 ], [ 14, 2 ], [ 15, 1 ], [ 17, 1 ] ],
"mean" : 3.84505,
"median" : 2.96687,
"minimum" : 0,
"sum_squares" : 20063,
"missing_count" : 0,
"sum" : 2953,
"population" : 768,
"maximum" : 17,
"variance" : 11.35406
},
"datatype" : "int8",
"order" : 0,
"optype" : "numeric",
"name" : "Pregnancies",
"column_number" : 0
},
"000001" : {
"preferred" : true,
"summary" : {
"standard_deviation" : 19.35581,
"mean" : 69.10547,
"median" : 71.43285,
"minimum" : 0,
"sum_squares" : 3954989,
"missing_count" : 0,
"bins" : [ [ 0, 35 ], [ 24, 1 ], [ 30, 2 ], [ 39, 2 ], [ 44.66667, 6 ], [ 49.44444, 18 ], [ 52, 11 ], [ 55.04, 25 ], [ 58, 21 ], [ 60.95833, 72 ], [ 64.8375, 80 ], [ 68, 45 ], [ 70, 57 ], [ 72, 44 ], [ 74.86869, 99 ], [ 78, 45 ], [ 80, 40 ], [ 82, 30 ], [ 84.96, 50 ], [ 88, 25 ], [ 90, 22 ], [ 92, 8 ], [ 94.81818, 11 ], [ 98, 3 ], [ 100, 3 ], [ 102, 1 ], [ 104, 2 ], [ 106, 3 ], [ 108, 2 ], [ 110, 3 ], [ 114, 1 ], [ 122, 1 ] ],
"sum" : 53073,
"population" : 768,
"maximum" : 122,
"variance" : 374.64727,
"splits" : [ 4.95379, 47.79361, 52.45455, 56.31476, 58.78684, 60.12982, 61.72909, 62.88889, 64.03541, 65.06458, 66.38178, 67.65685, 68.70061, 69.62414, 70.48722, 71.43285, 72.48913, 73.49, 74.23488, 75.36578, 76.27906, 77.43376, 78.51858, 79.65375, 80.9007, 82.4098, 84.16186, 85.97598, 88.04005, 90.09226, 94.7101 ]
},
"datatype" : "int8",
"order" : 1,
"optype" : "numeric",
"name" : "Blood Pressure",
"column_number" : 1
},
"000002" : {
"preferred" : true,
"summary" : {
"standard_deviation" : 7.88416,
"mean" : 31.99258,
"median" : 32.04325,
"minimum" : 0,
"sum_squares" : 833743.95,
"missing_count" : 0,
"bins" : [ [ 0, 11 ], [ 18.25, 4 ], [ 19.72308, 13 ], [ 21.02222, 9 ], [ 22.15909, 22 ], [ 23.46333, 30 ], [ 24.88947, 57 ], [ 26.27568, 37 ], [ 27.50732, 41 ], [ 28.56341, 41 ], [ 29.93333, 63 ], [ 31.00645, 31 ], [ 32.07755, 49 ], [ 33.02683, 41 ], [ 34.02778, 54 ], [ 35.33226, 62 ], [ 36.73939, 33 ], [ 38.03721, 43 ], [ 39.46061, 33 ], [ 40.95263, 19 ], [ 42.49444, 18 ], [ 43.36923, 13 ], [ 44.2625, 8 ], [ 45.42727, 11 ], [ 46.41111, 9 ], [ 48.225, 4 ], [ 49.65, 4 ], [ 52.675, 4 ], [ 55, 1 ], [ 57.3, 1 ], [ 59.4, 1 ], [ 67.1, 1 ] ],
"sum" : 24570.3,
"population" : 768,
"maximum" : 67.1,
"variance" : 62.15998,
"splits" : [ 19.95, 22.15, 23.35858, 24.25505, 25, 25.63542, 26.38229, 27.3, 27.76103, 28.45858, 29.06972, 29.76202, 30.175, 30.82974, 31.43175, 32.04325, 32.6366, 33.13956, 33.6263, 34.14142, 34.57, 35.2, 35.7899, 36.56904, 37.41716, 38.17417, 39.1, 40.05858, 42.15858, 43.45, 46.1 ]
},
"datatype" : "double",
"order" : 2,
"optype" : "numeric",
"name" : "BMI",
"column_number" : 2
},
"000003" : {
"preferred" : true,
"summary" : {
"standard_deviation" : 11.76023,
"mean" : 33.24089,
"median" : 29.08726,
"minimum" : 21,
"sum_squares" : 954685,
"missing_count" : 0,
"bins" : [ [ 21.53333, 135 ], [ 23.54762, 84 ], [ 25.40741, 81 ], [ 27.52239, 67 ], [ 29.42, 50 ], [ 31.4, 40 ], [ 33.45161, 31 ], [ 35.61538, 26 ], [ 37.45714, 35 ], [ 39.52, 25 ], [ 41.45, 40 ], [ 43.38095, 21 ], [ 45.46429, 28 ], [ 47.45455, 11 ], [ 49.61538, 13 ], [ 51.5, 16 ], [ 53.54545, 11 ], [ 55.42857, 7 ], [ 57.58333, 12 ], [ 59.625, 8 ], [ 61, 2 ], [ 62, 4 ], [ 63, 4 ], [ 64, 1 ], [ 65, 3 ], [ 66, 4 ], [ 67, 3 ], [ 68, 1 ], [ 69, 2 ], [ 70, 1 ], [ 72, 1 ], [ 81, 1 ] ],
"sum" : 25529,
"population" : 768,
"maximum" : 81,
"variance" : 138.30305,
"splits" : [ 21.00793, 21.49815, 21.95822, 22.45075, 23.15535, 23.91238, 24.60085, 25.28338, 26.13665, 27.12428, 28.07187, 29.08726, 30.43864, 31.93845, 33.92911, 36.36267, 38.19211, 40.54937, 42.17071, 44.82629, 48.1, 52.72302, 59.43649 ]
},
"datatype" : "int8",
"order" : 3,
"optype" : "numeric",
"name" : "Age",
"column_number" : 3
}
},
"max-workers" : 1,
"final_field_scales" : {
"000000" : 0.5296622546318589,
"000001" : 0.09220690531485994,
"000002" : 0.22637025884335418,
"000003" : 0.15176058120992694
},
"excluded_input_fields" : [ "Diabetes" ],
"input_fields" : [ "000000", "000001", "000002", "000003" ],
"seed" : "kmeans-seed"
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment