Skip to content

Instantly share code, notes, and snippets.

@ashenfad
Last active August 29, 2015 13:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ashenfad/10783998 to your computer and use it in GitHub Desktop.
Save ashenfad/10783998 to your computer and use it in GitHub Desktop.
BigML Clusters - Red Wine

A visualization of seven clusters discovered on the wine quality dataset.

Each cluster is represented by a ball. The cluster radii are proportional to the population of each cluster.

The y-axis shows the distance of each cluster to the current point (selected by the sliders). The lower a cluster's position, the nearer it is the current point. The order of the clusters on the x-axis is from nearest to furthest.

The initial point is the median for each field. Selecting a cluster will set the current point equal to the cluster's centroid.

Finally, the colors on each slider represent the closest cluster to the current point for that range.

<!DOCTYPE html>
<meta charset="utf-8">
<style>
.axis {
font: 10px sans-serif;
-webkit-user-select: none;
-moz-user-select: none;
user-select: none;
}
.axis .domain {
fill: none;
stroke: #000;
stroke-opacity: .3;
stroke-width: 10px;
stroke-linecap: round;
}
.slider .handle {
fill: #fff;
stroke: #000;
stroke-opacity: .5;
stroke-width: 1.25px;
pointer-events: none;
}
</style>
<body>
<script src="http://d3js.org/d3.v3.min.js"></script>
<script>
var margin = {top: 40, right: 20, bottom: 50, left: 20},
width = 960 - margin.left - margin.right,
height = 500 - margin.bottom - margin.top;
var svg = d3.select("body").append("svg")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
var defs = d3.select("svg").append("defs");
d3.json("red-wine.json", function(error, root) {
var model = root.model;
var fields = model.fields;
var max_y = height / 2;
var buffer = width / (model.clusters.length + 1);
var slider_size = width / 3;
function make_hline(y_loc) {
svg.append("rect")
.attr("x", buffer - 20)
.attr("y", y_loc)
.attr("width", width - 2 * buffer + 40)
.attr("height", 0.5)
.style("fill", "none")
.style("stroke", "#ccc");
}
make_hline(0);
make_hline(max_y / 4);
make_hline(max_y / 2);
make_hline(3 * max_y / 4);
make_hline(max_y);
var handle_scales = new Array();
var mins = new Array();
var maxs = new Array();
var keys = new Array();
var point = new Array();
var fnames = new Array();
var i = 0;
for (id in fields) {
if (!fields.hasOwnProperty(id)) { continue; }
keys[i] = id;
point[i] = fields[id].summary.median;
mins[i] = fields[id].summary.minimum;
maxs[i] = fields[id].summary.maximum;
fnames[i] = fields[id].name;
make_slider(fields[id], i);
i = i + 1;
}
var scales = new Array();
for (var i = 0; i < keys.length; i++) {
scales[i] = model.final_field_scales[keys[i]];
}
var centroids = new Array();
var scaled_centroids = new Array();
var cnames = new Array();
var max_cluster_count = 0;
i = 0;
for (cid in model.clusters) {
var cluster = model.clusters[cid];
var centroid = new Array();
var scaled_centroid = new Array();
cnames[i] = cluster.name;
max_cluster_count = Math.max(max_cluster_count, cluster.count);
for (var j = 0; j < keys.length; j++) {
var val = cluster.center[keys[j]];
centroid[j] = val;
scaled_centroid[j] = scales[j] * val;
}
centroids[i] = centroid;
scaled_centroids[i] = scaled_centroid;
i = i + 1;
}
update_ranges();
var max_dist = 0;
for (var i = 0; i < centroids.length - 1; i++) {
for (var j = i + 1; j < centroids.length; j++) {
max_dist = Math.max(max_dist, dist(centroids[i], centroids[j]));
}
}
svg.append("text")
.attr("class", "nearest")
.attr("x", buffer - 20)
.attr("y", -20);
var dists = new Array();
var dist_order = new Array();
update_dists();
i = 0;
for (cid in model.clusters) {
var cluster = model.clusters[cid];
make_balls(cluster, i);
i = i + 1;
}
function make_balls(cluster, index) {
var max_rad = 30;
var color = d3.scale.category10().range()[index];
var r = Math.pow((cluster.count / max_cluster_count), 1/3) * max_rad;
svg.append("circle")
.attr("class", "cluster")
.attr("r", r)
.attr("cx", (dist_order[index] + 1) * buffer)
.attr("cy", (1 - Math.min(dists[index] / max_dist, 1)) * max_y)
.style("stroke", d3.rgb(color).darker())
.style("fill", color)
.on("click", function() {
point = centroids[index].slice(0);
update_dists();
update_ranges();
update_balls(300);
update_handles(300);
update_field_text();
})
.on("mouseover", function() {
d3.select(this)
.attr("r", 1.1 * r)
.style("stroke", color)
.style("fill", d3.rgb(color).brighter());
})
.on("mouseout", function() {
d3.select(this)
.attr("r", r)
.style("stroke", d3.rgb(color).darker())
.style("fill", color);
});
}
function update_dists() {
var sorted_dists = new Array();
var min_dist;
var min_index = -1;
for (var i = 0; i < centroids.length; i++) {
dists[i] = dist(point, centroids[i]);
sorted_dists[i] = new Object();
sorted_dists[i].d = dists[i];
sorted_dists[i].i = i;
if (min_index == -1 || min_dist > dists[i]) {
min_index = i;
min_dist = dists[i];
}
}
sorted_dists.sort(function(a, b) {
if (a.d < b.d) return -1;
if (a.d > b.d) return 1;
return 0;
});
for (var i = 0; i < sorted_dists.length; i++) {
dist_order[sorted_dists[i].i] = i;
}
d3.select(".nearest")
.text("Nearest to " + cnames[min_index]
+ " - Distance: " + dists[min_index].toFixed(4))
.style("fill", d3.scale.category10().range()[min_index]);
}
function update_ranges() {
for (var i = 0; i < scales.length; i++) {
var grad = d3.select("#g" + i);
grad.selectAll("stop").remove();
var min_pt = point.slice(0);
min_pt[i] = mins[i];
var min_index = -1;
var min_dist;
for (var j = 0; j < centroids.length; j++) {
var d = dist(min_pt, centroids[j]);
if (min_index == -1 || min_dist > d) {
min_index = j;
min_dist = d;
}
}
var p = min_pt[i];
var c = min_index;
var range = maxs[i] - mins[i];
while (p < maxs[i]) {
grad.append("svg:stop")
.attr("offset", (100 * (p - mins[i]) / range).toFixed(2).toString() + "%")
.attr("stop-color", d3.scale.category10().range()[c])
.attr("stop-opacity", 1);
var nextp = maxs[i];
var nextc = c;
var used = new Object();
used[c.toString()] = true;
for (var j = 0; j < centroids.length; j++) {
if (used[j.toString] == true) { continue; }
var candidate = vdist(line(i), plane(j, c));
if (candidate < nextp && candidate > p) {
nextp = candidate;
nextc = j;
}
}
grad.append("svg:stop")
.attr("offset", (100 * (nextp - mins[i]) / range).toFixed(2).toString() + "%")
.attr("stop-color", d3.scale.category10().range()[c])
.attr("stop-opacity", 1);
p = nextp;
c = nextc;
used[c.toString()] = true;
}
}
}
function update_balls(duration) {
var buffer = width / (model.clusters.length + 1);
var ids = Array.apply(null, Array(centroids.length)).map(function (_, i) {return i;});
d3.selectAll(".cluster")
.data(ids)
.transition().duration(duration + 50)
.attr("cx", function(d) {return (dist_order[d] + 1) * buffer;})
.attr("cy", function(d) {return (1 - Math.min(dists[d] / max_dist, 1)) * max_y;});
}
function make_slider(field, index) {
var summary = field.summary;
var buffer = (width - (2 * slider_size)) / 3;
var x_loc = ((index % 2) + 1) * buffer + (index % 2) * slider_size;
var y_loc = max_y + 60 + Math.floor(index / 2) * 60;
var x = d3.scale.linear()
.domain([summary.minimum, summary.maximum])
.range([0, slider_size])
.clamp(true);
handle_scales[index] = x;
var brush = d3.svg.brush().x(x);
brush.on("brush", function() {
var value = brush.extent()[0];
if (d3.event.sourceEvent) { // not a programmatic event
value = x.invert(d3.mouse(this)[0] - x_loc);
brush.extent([value, value]);
}
handle.attr("cx", x(value));
point[index] = value;
update_dists();
update_ranges();
update_balls(0);
update_field_text();
});
var axis = svg.append("g")
.attr("class", "x axis")
.attr("transform", "translate(" + x_loc + "," + y_loc + ")");
axis.call(d3.svg.axis()
.scale(x)
.orient("bottom")
.tickFormat(function(d) { return d; })
.tickSize(0)
.tickPadding(10))
.select(".domain")
.style("stroke", "none")
.style("fill", "none");
var gradient_name = "g" + index;
var gradient = defs.append("linearGradient")
.attr("id", gradient_name)
.attr("field_id", index)
.attr("x1", "0%")
.attr("y1", "0%")
.attr("x2", "100%")
.attr("y2", "0%");
axis.append("rect")
.attr("class", "range")
.attr("y", -4)
.attr("height", 8)
.attr("width", slider_size)
.attr("field_index", index)
.attr("rx", 4)
.attr("ry", 4)
.style("stroke", "#333")
.style("fill", "url(#" + gradient_name + ")");
var slider = svg.append("g")
.attr("class", "slider")
.call(brush);
slider.append("text")
.attr("class", "field_text")
.attr("transform", "translate(" + x_loc + "," + (y_loc - 13) + ")")
.text(field.name + ": " + point[index].toFixed(2));
slider.selectAll(".extent,.resize").remove();
slider.select(".background").attr("height", 40)
.attr("x", x_loc)
.attr("y", y_loc - 20);
var handle = slider.append("circle")
.attr("class", "handle")
.attr("transform", "translate(" + x_loc + "," + (y_loc - 1) + ")")
.attr("r", 8)
.attr("cx", handle_scales[index](point[index]));
brush.extent([point[index], point[index]]);
}
function update_handles(duration) {
var ids = Array.apply(null, Array(point.length)).map(function (_, i) {return i;});
d3.selectAll(".handle")
.data(ids)
.transition().duration(duration)
.attr("cx", function(d) {return handle_scales[d](point[d])});
}
function update_field_text() {
var ids = Array.apply(null, Array(point.length)).map(function (_, i) {return i;});
d3.selectAll(".field_text")
.data(ids)
.text(function(d) {return fnames[d] + ": " + point[d].toFixed(2);});
}
function dist(p1, p2) {
var tot = 0;
for (var i = 0; i < p1.length; i++) {
var diff = (p1[i] - p2[i]) * scales[i];
tot = tot + diff * diff;
}
return Math.sqrt(tot);
}
function scale_point(point) {
var scaled_point = new Array();
for (var i = 0; i < point.length; i++) {
scaled_point[i] = scales[i] * point[i];
}
return scaled_point;
}
function dotp(v1, v2) {
var tot = 0;
for (var i = 0; i < v1.length; i++) {
tot += (v1[i] * v2[i]);
}
return tot;
}
function plane(cent_id1, cent_id2) {
var pt1 = scaled_centroids[cent_id1];
var pt2 = scaled_centroids[cent_id2];
var origin = new Array();
var norm = new Array();
for (var i = 0; i < pt1.length; i++) {
origin[i] = (pt2[i] + pt1[i]) / 2;
norm[i] = pt2[i] - pt1[i];
}
var plane = new Object();
plane.n = norm;
plane.o = origin;
return plane;
}
function line(field_index) {
var scaled_point = scale_point(point);
scaled_point[field_index] = 0;
var line = new Object;
line.i = field_index;
line.o = scaled_point;
line.v = Array.apply(null, new Array(point.length)).map(Number.prototype.valueOf,0);
line.v[field_index] = 1;
return line;
}
function vdist(line, plane) {
var po = plane.o;
var lo = line.o;
var v = new Array();
for (var i = 0; i < po.length; i++) {
v[i] = po[i] - lo[i];
}
var diff = dotp(plane.n, v) / dotp(line.v, plane.n);
return (diff / scales[line.i]);
}
});
</script>
{
"model" : {
"kind" : "kmeans",
"dataset_id" : "1397595862046",
"missing_tokens" : [ "", "N/A", "n/a", "NULL", "null", "-", "#DIV/0", "#REF!", "#NAME?", "NIL", "nil", "NA", "na", "#VALUE!", "#NULL!", "NaN", "#N/A", "#NUM!", "?" ],
"branching-factor" : 4,
"locale" : "en_US",
"k" : 7,
"type" : "unsupervised",
"clusters" : [ {
"name" : "Cluster 1",
"id" : "000000",
"center" : {
"000000" : 0.37684,
"000001" : 6.008,
"000002" : 0.99837,
"000003" : 0.67107,
"000004" : 11.00109,
"000005" : 5.83583
},
"distance" : {
"standard_deviation" : 0.00127,
"mean" : 0.0044,
"median" : 0.0043,
"minimum" : 0.00142,
"sum_squares" : 0.00187,
"bins" : [ [ 0.00142, 1 ], [ 0.00251, 4 ], [ 0.0026, 3 ], [ 0.00276, 4 ], [ 0.00317, 2 ], [ 0.00329, 5 ], [ 0.00342, 3 ], [ 0.00359, 2 ], [ 0.00368, 1 ], [ 0.00378, 4 ], [ 0.00389, 4 ], [ 0.00402, 7 ], [ 0.00413, 1 ], [ 0.00428, 5 ], [ 0.00437, 3 ], [ 0.00445, 3 ], [ 0.00454, 3 ], [ 0.00463, 3 ], [ 0.00489, 2 ], [ 0.00499, 3 ], [ 0.00509, 4 ], [ 0.0053, 4 ], [ 0.0054, 1 ], [ 0.00563, 3 ], [ 0.00571, 4 ], [ 0.00579, 1 ], [ 0.00597, 2 ], [ 0.00649, 2 ], [ 0.00676, 1 ], [ 0.00711, 1 ], [ 0.00762, 2 ], [ 0.00796, 1 ] ],
"sum" : 0.39177,
"population" : 89,
"maximum" : 0.00796,
"variance" : 0
},
"count" : 89
}, {
"name" : "Cluster 2",
"id" : "000001",
"center" : {
"000000" : 0.36364,
"000001" : 13.34545,
"000002" : 1.00062,
"000003" : 0.65273,
"000004" : 9.53636,
"000005" : 5.54545
},
"distance" : {
"standard_deviation" : 0.00121,
"counts" : [ [ 0.00306, 2 ], [ 0.00369, 1 ], [ 0.00375, 2 ], [ 0.00406, 1 ], [ 0.00444, 2 ], [ 0.0047, 1 ], [ 0.00612, 1 ], [ 0.00695, 1 ] ],
"mean" : 0.00437,
"median" : 0.00406,
"minimum" : 0.00306,
"sum_squares" : 2.2E-4,
"sum" : 0.04803,
"population" : 11,
"maximum" : 0.00695,
"variance" : 0
},
"count" : 11
}, {
"name" : "Cluster 3",
"id" : "000002",
"center" : {
"000000" : 0.3975,
"000001" : 2.1,
"000002" : 0.99724,
"000003" : 1.7825,
"000004" : 9.575,
"000005" : 5.25
},
"distance" : {
"standard_deviation" : 0.00168,
"counts" : [ [ 0.0022, 1 ], [ 0.00248, 1 ], [ 0.00278, 1 ], [ 0.0028, 2 ], [ 0.00284, 1 ], [ 0.00289, 1 ], [ 0.00738, 1 ] ],
"mean" : 0.00327,
"median" : 0.0028,
"minimum" : 0.0022,
"sum_squares" : 1.1E-4,
"sum" : 0.02617,
"population" : 8,
"maximum" : 0.00738,
"variance" : 0
},
"count" : 8
}, {
"name" : "Cluster 4",
"id" : "000003",
"center" : {
"000000" : 0.17459,
"000001" : 2.24507,
"000002" : 0.99686,
"000003" : 0.57589,
"000004" : 9.78054,
"000005" : 5.16951
},
"distance" : {
"standard_deviation" : 9.4E-4,
"mean" : 0.00245,
"median" : 0.00238,
"minimum" : 7.5E-4,
"sum_squares" : 0.005,
"bins" : [ [ 7.7E-4, 8 ], [ 0.00102, 12 ], [ 0.00117, 28 ], [ 0.00134, 36 ], [ 0.00151, 35 ], [ 0.0017, 74 ], [ 0.00191, 37 ], [ 0.00205, 42 ], [ 0.00219, 54 ], [ 0.00234, 50 ], [ 0.0025, 57 ], [ 0.00263, 44 ], [ 0.00275, 50 ], [ 0.00288, 37 ], [ 0.00301, 25 ], [ 0.00314, 24 ], [ 0.00328, 21 ], [ 0.00348, 28 ], [ 0.0037, 12 ], [ 0.00388, 6 ], [ 0.004, 4 ], [ 0.00426, 10 ], [ 0.00448, 9 ], [ 0.00468, 7 ], [ 0.00505, 4 ], [ 0.0053, 1 ], [ 0.00578, 3 ], [ 0.00604, 2 ], [ 0.00621, 2 ], [ 0.00675, 1 ], [ 0.00697, 1 ], [ 0.00788, 1 ] ],
"sum" : 1.77709,
"population" : 725,
"maximum" : 0.00788,
"variance" : 0
},
"count" : 725
}, {
"name" : "Cluster 5",
"id" : "000004",
"center" : {
"000000" : 0.39567,
"000001" : 2.30105,
"000002" : 0.99763,
"000003" : 1.05558,
"000004" : 9.80762,
"000005" : 5.46505
},
"distance" : {
"standard_deviation" : 0.00111,
"mean" : 0.00334,
"median" : 0.00329,
"minimum" : 0.00138,
"sum_squares" : 0.00105,
"bins" : [ [ 0.00138, 1 ], [ 0.00158, 1 ], [ 0.00164, 2 ], [ 0.00175, 2 ], [ 0.00209, 2 ], [ 0.00219, 5 ], [ 0.00227, 5 ], [ 0.00237, 4 ], [ 0.00246, 3 ], [ 0.00261, 2 ], [ 0.00272, 5 ], [ 0.0028, 2 ], [ 0.00297, 5 ], [ 0.00319, 2 ], [ 0.00329, 3 ], [ 0.00345, 1 ], [ 0.00352, 7 ], [ 0.00362, 1 ], [ 0.00374, 1 ], [ 0.00381, 4 ], [ 0.00396, 1 ], [ 0.00404, 4 ], [ 0.00414, 5 ], [ 0.0042, 6 ], [ 0.00442, 1 ], [ 0.00457, 1 ], [ 0.00473, 2 ], [ 0.00528, 1 ], [ 0.00539, 1 ], [ 0.00555, 2 ], [ 0.00612, 2 ], [ 0.00624, 1 ] ],
"sum" : 0.28404,
"population" : 85,
"maximum" : 0.00624,
"variance" : 0
},
"count" : 85
}, {
"name" : "Cluster 6",
"id" : "000005",
"center" : {
"000000" : 0.47963,
"000001" : 2.38731,
"000002" : 0.99805,
"000003" : 0.71241,
"000004" : 10.52227,
"000005" : 6.01511
},
"distance" : {
"standard_deviation" : 8.9E-4,
"mean" : 0.00295,
"median" : 0.00295,
"minimum" : 8.5E-4,
"sum_squares" : 0.00316,
"bins" : [ [ 8.9E-4, 2 ], [ 0.0011, 2 ], [ 0.00125, 4 ], [ 0.00143, 5 ], [ 0.00154, 6 ], [ 0.00171, 18 ], [ 0.00184, 11 ], [ 0.00196, 6 ], [ 0.00214, 16 ], [ 0.00234, 29 ], [ 0.0025, 12 ], [ 0.00261, 10 ], [ 0.00276, 26 ], [ 0.00292, 26 ], [ 0.00303, 20 ], [ 0.00319, 32 ], [ 0.00337, 19 ], [ 0.00349, 23 ], [ 0.00365, 16 ], [ 0.00382, 9 ], [ 0.00401, 11 ], [ 0.00416, 6 ], [ 0.00437, 3 ], [ 0.00448, 2 ], [ 0.00462, 9 ], [ 0.00477, 4 ], [ 0.00497, 2 ], [ 0.00519, 1 ], [ 0.00529, 1 ], [ 0.00539, 1 ], [ 0.00576, 1 ], [ 0.00646, 1 ] ],
"sum" : 0.98398,
"population" : 334,
"maximum" : 0.00646,
"variance" : 0
},
"count" : 334
}, {
"name" : "Cluster 7",
"id" : "000006",
"center" : {
"000000" : 0.21192,
"000001" : 2.13836,
"000002" : 0.99446,
"000003" : 0.66196,
"000004" : 11.7465,
"000005" : 6.26691
},
"distance" : {
"standard_deviation" : 9.9E-4,
"mean" : 0.00315,
"median" : 0.003,
"minimum" : 0.00114,
"sum_squares" : 0.00377,
"bins" : [ [ 0.0012, 5 ], [ 0.00142, 1 ], [ 0.00154, 10 ], [ 0.00175, 7 ], [ 0.00193, 2 ], [ 0.00203, 17 ], [ 0.00229, 36 ], [ 0.00251, 24 ], [ 0.00265, 23 ], [ 0.0028, 22 ], [ 0.00294, 30 ], [ 0.00308, 19 ], [ 0.00323, 16 ], [ 0.00335, 27 ], [ 0.0035, 9 ], [ 0.00369, 25 ], [ 0.00386, 20 ], [ 0.00401, 6 ], [ 0.00414, 1 ], [ 0.00435, 9 ], [ 0.00451, 11 ], [ 0.00466, 2 ], [ 0.00481, 2 ], [ 0.00508, 8 ], [ 0.00525, 3 ], [ 0.00547, 4 ], [ 0.00569, 3 ], [ 0.0059, 1 ], [ 0.00623, 1 ], [ 0.00635, 1 ], [ 0.00658, 1 ], [ 0.00704, 1 ] ],
"sum" : 1.09177,
"population" : 347,
"maximum" : 0.00704,
"variance" : 0
},
"count" : 347
} ],
"fields" : {
"000000" : {
"preferred" : true,
"summary" : {
"standard_deviation" : 0.1948,
"mean" : 0.27098,
"median" : 0.25624,
"minimum" : 0,
"sum_squares" : 178.0511,
"missing_count" : 0,
"bins" : [ [ 0.002, 165 ], [ 0.02375, 80 ], [ 0.04932, 73 ], [ 0.076, 55 ], [ 0.09812, 80 ], [ 0.124, 45 ], [ 0.14755, 49 ], [ 0.18085, 59 ], [ 0.21024, 85 ], [ 0.23671, 76 ], [ 0.25918, 85 ], [ 0.28525, 40 ], [ 0.305, 60 ], [ 0.33191, 94 ], [ 0.36487, 39 ], [ 0.3954, 87 ], [ 0.42341, 44 ], [ 0.44489, 45 ], [ 0.46486, 37 ], [ 0.49185, 124 ], [ 0.52452, 31 ], [ 0.5448, 25 ], [ 0.56529, 17 ], [ 0.59143, 28 ], [ 0.63815, 27 ], [ 0.66125, 16 ], [ 0.68267, 15 ], [ 0.70333, 3 ], [ 0.73556, 9 ], [ 0.76, 3 ], [ 0.785, 2 ], [ 1, 1 ] ],
"sum" : 433.29,
"population" : 1599,
"maximum" : 1,
"variance" : 0.03795,
"splits" : [ 5.0E-5, 0.00686, 0.02206, 0.04251, 0.07179, 0.0938, 0.11995, 0.15254, 0.19193, 0.21601, 0.23823, 0.25624, 0.2824, 0.30858, 0.33115, 0.36506, 0.39692, 0.425, 0.45761, 0.48622, 0.50006, 0.5449, 0.63343 ]
},
"datatype" : "double",
"order" : 0,
"optype" : "numeric",
"name" : "citric acid",
"column_number" : 0
},
"000001" : {
"preferred" : true,
"summary" : {
"standard_deviation" : 1.40993,
"mean" : 2.53881,
"median" : 2.19259,
"minimum" : 0.9,
"sum_squares" : 13483.0675,
"missing_count" : 0,
"bins" : [ [ 0.9, 2 ], [ 1.2, 8 ], [ 1.51231, 130 ], [ 1.7628, 207 ], [ 2.05205, 537 ], [ 2.34413, 196 ], [ 2.57794, 204 ], [ 2.89065, 107 ], [ 3.3125, 44 ], [ 3.74286, 28 ], [ 4.03529, 17 ], [ 4.26071, 14 ], [ 4.54118, 17 ], [ 4.8, 3 ], [ 5.125, 10 ], [ 5.53333, 15 ], [ 5.88333, 12 ], [ 6.17778, 9 ], [ 6.54444, 9 ], [ 7, 1 ], [ 7.25, 2 ], [ 7.5, 1 ], [ 7.86, 5 ], [ 8.22, 5 ], [ 8.6, 1 ], [ 8.875, 4 ], [ 10.7, 1 ], [ 11, 2 ], [ 12.9, 1 ], [ 13.4, 1 ], [ 13.83333, 3 ], [ 15.43333, 3 ] ],
"sum" : 4059.55,
"population" : 1599,
"maximum" : 15.5,
"variance" : 1.9879,
"splits" : [ 1.45183, 1.58374, 1.67583, 1.71731, 1.78518, 1.81351, 1.85349, 1.8951, 1.93567, 1.97189, 2.00252, 2.0243, 2.0829, 2.10561, 2.14312, 2.19259, 2.2142, 2.2767, 2.30528, 2.36801, 2.41438, 2.47308, 2.52032, 2.59656, 2.67364, 2.78061, 2.90705, 3.20419, 3.76465, 4.47656, 6.00078 ]
},
"datatype" : "double",
"order" : 1,
"optype" : "numeric",
"name" : "residual sugar",
"column_number" : 1
},
"000002" : {
"preferred" : true,
"summary" : {
"standard_deviation" : 0.00189,
"mean" : 0.99675,
"median" : 0.99675,
"minimum" : 0.99007,
"sum_squares" : 1588.6185,
"missing_count" : 0,
"bins" : [ [ 0.99011, 3 ], [ 0.99073, 4 ], [ 0.9912, 1 ], [ 0.99167, 10 ], [ 0.99217, 3 ], [ 0.99249, 14 ], [ 0.99288, 7 ], [ 0.99332, 25 ], [ 0.99376, 52 ], [ 0.9943, 40 ], [ 0.99484, 104 ], [ 0.99539, 148 ], [ 0.99584, 128 ], [ 0.99626, 159 ], [ 0.99672, 196 ], [ 0.99711, 115 ], [ 0.99747, 159 ], [ 0.9979, 94 ], [ 0.99826, 86 ], [ 0.99868, 63 ], [ 0.99907, 39 ], [ 0.99938, 33 ], [ 0.9997, 34 ], [ 1.00017, 43 ], [ 1.00067, 9 ], [ 1.001, 6 ], [ 1.00142, 8 ], [ 1.0018, 1 ], [ 1.00215, 4 ], [ 1.00251, 4 ], [ 1.00311, 5 ], [ 1.00369, 2 ] ],
"sum" : 1593.79794,
"population" : 1599,
"maximum" : 1.00369,
"variance" : 0,
"splits" : [ 0.99326, 0.99384, 0.99442, 0.99479, 0.99502, 0.99525, 0.99546, 0.9956, 0.99577, 0.99591, 0.99611, 0.99621, 0.99633, 0.99646, 0.9966, 0.99675, 0.99684, 0.99699, 0.99711, 0.99721, 0.99736, 0.99747, 0.99763, 0.99784, 0.99801, 0.99821, 0.99849, 0.9988, 0.99922, 0.99969, 1.00027 ]
},
"datatype" : "double",
"order" : 2,
"optype" : "numeric",
"name" : "density",
"column_number" : 2
},
"000003" : {
"preferred" : true,
"summary" : {
"standard_deviation" : 0.16951,
"mean" : 0.65815,
"median" : 0.62016,
"minimum" : 0.33,
"sum_squares" : 738.5374,
"missing_count" : 0,
"bins" : [ [ 0.33, 1 ], [ 0.39, 12 ], [ 0.43854, 41 ], [ 0.46514, 37 ], [ 0.49442, 113 ], [ 0.5356, 216 ], [ 0.5747, 234 ], [ 0.60395, 114 ], [ 0.63342, 196 ], [ 0.67287, 136 ], [ 0.70441, 59 ], [ 0.7338, 100 ], [ 0.7782, 111 ], [ 0.824, 65 ], [ 0.86277, 47 ], [ 0.90393, 28 ], [ 0.93286, 14 ], [ 0.96615, 13 ], [ 0.9925, 4 ], [ 1.02625, 8 ], [ 1.06417, 12 ], [ 1.1, 4 ], [ 1.135, 6 ], [ 1.17222, 9 ], [ 1.21, 2 ], [ 1.27333, 3 ], [ 1.32667, 3 ], [ 1.36, 3 ], [ 1.56, 1 ], [ 1.60667, 3 ], [ 1.95, 2 ], [ 1.99, 2 ] ],
"sum" : 1052.38,
"population" : 1599,
"maximum" : 2,
"variance" : 0.02873,
"splits" : [ 0.45158, 0.47801, 0.4948, 0.51304, 0.52466, 0.53427, 0.54208, 0.55094, 0.55995, 0.56859, 0.57707, 0.58479, 0.59387, 0.60165, 0.61088, 0.62016, 0.62927, 0.63988, 0.65144, 0.66351, 0.67711, 0.6943, 0.71126, 0.72971, 0.75011, 0.77085, 0.79393, 0.82145, 0.85516, 0.90758, 1.04462 ]
},
"datatype" : "double",
"order" : 3,
"optype" : "numeric",
"name" : "sulphates",
"column_number" : 3
},
"000004" : {
"preferred" : true,
"summary" : {
"standard_deviation" : 1.06568,
"mean" : 10.42298,
"median" : 10.1578,
"minimum" : 8.4,
"sum_squares" : 175527.8819,
"missing_count" : 0,
"bins" : [ [ 8.43333, 3 ], [ 8.75, 4 ], [ 9.04352, 54 ], [ 9.24496, 133 ], [ 9.4, 103 ], [ 9.5302, 201 ], [ 9.75909, 132 ], [ 9.901, 50 ], [ 10.04103, 116 ], [ 10.2, 46 ], [ 10.35541, 74 ], [ 10.5299, 97 ], [ 10.76071, 70 ], [ 10.9, 49 ], [ 11.03184, 87 ], [ 11.2, 36 ], [ 11.35, 64 ], [ 11.53333, 45 ], [ 11.7, 23 ], [ 11.8, 29 ], [ 11.95119, 42 ], [ 12.148, 25 ], [ 12.352, 25 ], [ 12.52222, 27 ], [ 12.7, 9 ], [ 12.83462, 26 ], [ 13.025, 8 ], [ 13.2, 1 ], [ 13.35, 6 ], [ 13.57833, 6 ], [ 14, 7 ], [ 14.9, 1 ] ],
"sum" : 16666.35,
"population" : 1599,
"maximum" : 14.9,
"variance" : 1.13567,
"splits" : [ 9.102, 9.20125, 9.28559, 9.35046, 9.40419, 9.4486, 9.48778, 9.51385, 9.58815, 9.6541, 9.74178, 9.81129, 9.89027, 9.98824, 10.04541, 10.1578, 10.27397, 10.40933, 10.49986, 10.60134, 10.78164, 10.88309, 10.97891, 11.09204, 11.23461, 11.38809, 11.55932, 11.79174, 12.0077, 12.38111, 12.77851 ]
},
"datatype" : "double",
"order" : 4,
"optype" : "numeric",
"name" : "alcohol",
"column_number" : 4
},
"000005" : {
"preferred" : true,
"summary" : {
"standard_deviation" : 0.80757,
"counts" : [ [ 3, 10 ], [ 4, 53 ], [ 5, 681 ], [ 6, 638 ], [ 7, 199 ], [ 8, 18 ] ],
"mean" : 5.63602,
"median" : 5.59258,
"minimum" : 3,
"sum_squares" : 51834,
"missing_count" : 0,
"sum" : 9012,
"population" : 1599,
"maximum" : 8,
"variance" : 0.65217
},
"datatype" : "int8",
"order" : 5,
"optype" : "numeric",
"name" : "quality",
"column_number" : 5
}
},
"max-workers" : 1,
"final_field_scales" : {
"000000" : 0.009453569300591064,
"000001" : 0.0013061324319328898,
"000002" : 0.9743678834683276,
"000003" : 0.01086399209341714,
"000004" : 0.0017280565458253315,
"000005" : 0.002280366159905815
},
"input_fields" : [ "000000", "000001", "000002", "000003", "000004", "000005" ],
"seed" : "kmeans-seed"
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment