Skip to content

Instantly share code, notes, and snippets.

@ashenfad
Last active August 29, 2015 13:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ashenfad/10785035 to your computer and use it in GitHub Desktop.
Save ashenfad/10785035 to your computer and use it in GitHub Desktop.
BigML Clusters - Concrete Strength

A visualization of seven clusters discovered on the concrete compression strength dataset.

Each cluster is represented by a ball. The cluster radii are proportional to the population of each cluster.

The y-axis shows the distance of each cluster to the current point (selected by the sliders). The lower a cluster's position, the nearer it is the current point. The order of the clusters on the x-axis are from nearest to furthest.

The initial point is the median for each field. Selecting a cluster will set the current point equal to the cluster's centroid.

Finally, the colors on each slider represent the closest cluster to the current point for that range.

{
"model" : {
"kind" : "kmeans",
"dataset_id" : "1397592326981",
"missing_tokens" : [ "", "N/A", "n/a", "NULL", "null", "-", "#DIV/0", "#REF!", "#NAME?", "NIL", "nil", "NA", "na", "#VALUE!", "#NULL!", "NaN", "#N/A", "#NUM!", "?" ],
"branching-factor" : 4,
"locale" : "en_US",
"k" : 7,
"type" : "unsupervised",
"clusters" : [ {
"name" : "Cluster 1",
"id" : "000000",
"center" : {
"000008" : 23.31656,
"000001" : 4.11623,
"000000" : 292.5708,
"000003" : 190.13419,
"000002" : 22.39377,
"000006" : 809.81286,
"000007" : 33.34759,
"000004" : 1.4708,
"000005" : 1012.75415
},
"distance" : {
"standard_deviation" : 1.24254,
"mean" : 4.35952,
"median" : 4.21578,
"minimum" : 1.71485,
"sum_squares" : 3553.48643,
"bins" : [ [ 1.71485, 1 ], [ 2.18295, 1 ], [ 2.27773, 3 ], [ 2.59005, 3 ], [ 2.70438, 4 ], [ 2.82718, 7 ], [ 2.94509, 3 ], [ 3.08999, 11 ], [ 3.20683, 1 ], [ 3.3184, 4 ], [ 3.4358, 3 ], [ 3.58497, 12 ], [ 3.76283, 13 ], [ 3.91706, 8 ], [ 4.14234, 14 ], [ 4.31813, 10 ], [ 4.43479, 3 ], [ 4.54374, 7 ], [ 4.68781, 6 ], [ 4.92151, 16 ], [ 5.08824, 5 ], [ 5.24259, 5 ], [ 5.37527, 5 ], [ 5.51191, 5 ], [ 5.64628, 4 ], [ 5.8762, 4 ], [ 6.04695, 2 ], [ 6.81025, 1 ], [ 7.09849, 1 ], [ 7.21965, 5 ], [ 7.3941, 4 ], [ 7.59463, 2 ] ],
"sum" : 754.19694,
"population" : 173,
"maximum" : 7.62075,
"variance" : 1.54389
},
"count" : 173
}, {
"name" : "Cluster 2",
"id" : "000001",
"center" : {
"000008" : 56.3331,
"000001" : 119.662,
"000000" : 387.80561,
"000003" : 160.56631,
"000002" : 18.55684,
"000006" : 795.1988,
"000007" : 35.46902,
"000004" : 13.12845,
"000005" : 919.72914
},
"distance" : {
"standard_deviation" : 2.02489,
"mean" : 6.41683,
"median" : 5.84764,
"minimum" : 3.40165,
"sum_squares" : 8145.55452,
"bins" : [ [ 3.47013, 8 ], [ 3.77628, 4 ], [ 4.04632, 9 ], [ 4.31642, 3 ], [ 4.46503, 3 ], [ 4.81216, 7 ], [ 5.00262, 13 ], [ 5.18089, 6 ], [ 5.3498, 12 ], [ 5.57098, 13 ], [ 5.75671, 15 ], [ 6.02167, 4 ], [ 6.28388, 13 ], [ 6.48238, 6 ], [ 6.67142, 6 ], [ 6.8546, 5 ], [ 6.99005, 6 ], [ 7.22226, 2 ], [ 7.43573, 6 ], [ 7.6955, 4 ], [ 7.93191, 5 ], [ 8.14934, 2 ], [ 8.80195, 1 ], [ 9.21575, 1 ], [ 9.39983, 1 ], [ 9.79359, 1 ], [ 10.0708, 5 ], [ 10.38128, 7 ], [ 10.56845, 4 ], [ 10.76709, 6 ], [ 11.16013, 1 ], [ 11.64311, 1 ] ],
"sum" : 1155.02906,
"population" : 180,
"maximum" : 11.64311,
"variance" : 4.10018
},
"count" : 180
}, {
"name" : "Cluster 3",
"id" : "000002",
"center" : {
"000008" : 45.23201,
"000001" : 17.93759,
"000000" : 449.95789,
"000003" : 197.63194,
"000002" : 11.4871,
"000006" : 667.68498,
"000007" : 42.60015,
"000004" : 0.99173,
"000005" : 1007.18968
},
"distance" : {
"standard_deviation" : 1.62109,
"mean" : 6.20998,
"median" : 6.01311,
"minimum" : 3.42816,
"sum_squares" : 3086.7555,
"bins" : [ [ 3.42816, 1 ], [ 3.60385, 2 ], [ 3.79984, 2 ], [ 4.15859, 4 ], [ 4.24056, 2 ], [ 4.41598, 2 ], [ 4.56825, 3 ], [ 4.89042, 1 ], [ 5.02442, 2 ], [ 5.12708, 3 ], [ 5.31873, 3 ], [ 5.48849, 2 ], [ 5.63521, 5 ], [ 5.83922, 4 ], [ 5.97276, 2 ], [ 6.16758, 6 ], [ 6.41654, 1 ], [ 6.62569, 4 ], [ 6.81104, 1 ], [ 6.92561, 4 ], [ 7.37298, 1 ], [ 7.48948, 1 ], [ 7.68788, 3 ], [ 7.89136, 2 ], [ 7.99908, 4 ], [ 8.17623, 3 ], [ 8.63963, 2 ], [ 8.7582, 1 ], [ 8.94531, 1 ], [ 9.12635, 1 ], [ 9.90681, 1 ], [ 10.44065, 1 ] ],
"sum" : 465.74854,
"population" : 75,
"maximum" : 10.44065,
"variance" : 2.62792
},
"count" : 75
}, {
"name" : "Cluster 4",
"id" : "000003",
"center" : {
"000008" : 26.54208,
"000001" : 196.04036,
"000000" : 194.93798,
"000003" : 192.6243,
"000002" : 0.03983,
"000006" : 758.15832,
"000007" : 29.7552,
"000004" : 2.04425,
"000005" : 970.11154
},
"distance" : {
"standard_deviation" : 1.7094,
"mean" : 5.33921,
"median" : 5.05025,
"minimum" : 2.26464,
"sum_squares" : 5214.33231,
"bins" : [ [ 2.26464, 1 ], [ 2.46259, 4 ], [ 2.76598, 2 ], [ 2.94124, 1 ], [ 3.12432, 6 ], [ 3.38592, 5 ], [ 3.52411, 6 ], [ 3.78564, 11 ], [ 4.03571, 11 ], [ 4.294, 10 ], [ 4.46914, 7 ], [ 4.62793, 10 ], [ 4.85521, 6 ], [ 5.01222, 4 ], [ 5.18888, 10 ], [ 5.34672, 7 ], [ 5.6432, 3 ], [ 5.90549, 11 ], [ 6.27163, 11 ], [ 6.50642, 3 ], [ 6.79873, 3 ], [ 7.03604, 4 ], [ 7.20119, 1 ], [ 7.5374, 5 ], [ 7.69593, 2 ], [ 7.93255, 6 ], [ 8.15002, 3 ], [ 8.29152, 3 ], [ 8.44739, 3 ], [ 8.59697, 2 ], [ 8.83927, 4 ], [ 9.83608, 1 ] ],
"sum" : 886.30917,
"population" : 166,
"maximum" : 9.83608,
"variance" : 2.92206
},
"count" : 166
}, {
"name" : "Cluster 5",
"id" : "000004",
"center" : {
"000008" : 45.34751,
"000001" : 70.26451,
"000000" : 332.96817,
"000003" : 216.24967,
"000002" : 0,
"000006" : 681.97098,
"000007" : 287.55556,
"000004" : 0,
"000005" : 958.72261
},
"distance" : {
"standard_deviation" : 2.00621,
"mean" : 6.51312,
"median" : 6.51242,
"minimum" : 2.68589,
"sum_squares" : 2086.02648,
"bins" : [ [ 2.68589, 1 ], [ 2.84879, 1 ], [ 3.28087, 1 ], [ 3.60748, 1 ], [ 4.06706, 1 ], [ 4.38188, 1 ], [ 4.49424, 2 ], [ 4.5887, 1 ], [ 4.76914, 1 ], [ 4.91708, 1 ], [ 5.29825, 2 ], [ 5.42987, 1 ], [ 5.60617, 1 ], [ 5.70797, 2 ], [ 5.85573, 1 ], [ 5.97285, 2 ], [ 6.13958, 1 ], [ 6.27768, 1 ], [ 6.59834, 2 ], [ 6.76999, 1 ], [ 6.93572, 2 ], [ 7.07405, 1 ], [ 7.1938, 2 ], [ 7.28261, 1 ], [ 7.55481, 3 ], [ 8.00595, 3 ], [ 8.46258, 1 ], [ 8.73868, 3 ], [ 9.88572, 1 ], [ 9.98165, 1 ], [ 10.14529, 1 ], [ 11.73183, 1 ] ],
"sum" : 293.09034,
"population" : 45,
"maximum" : 11.73183,
"variance" : 4.02487
},
"count" : 45
}, {
"name" : "Cluster 6",
"id" : "000005",
"center" : {
"000008" : 34.76686,
"000001" : 19.27014,
"000000" : 221.47118,
"000003" : 164.82746,
"000002" : 127.08462,
"000006" : 805.85769,
"000007" : 40.1295,
"000004" : 9.1383,
"000005" : 1021.92048
},
"distance" : {
"standard_deviation" : 1.18151,
"mean" : 4.86749,
"median" : 4.81688,
"minimum" : 2.3698,
"sum_squares" : 6546.66991,
"bins" : [ [ 2.4642, 4 ], [ 2.73421, 4 ], [ 2.92132, 9 ], [ 3.11581, 4 ], [ 3.24659, 8 ], [ 3.4412, 5 ], [ 3.58068, 6 ], [ 3.72373, 4 ], [ 3.85697, 8 ], [ 4.04129, 19 ], [ 4.19484, 9 ], [ 4.33024, 21 ], [ 4.52311, 10 ], [ 4.71652, 19 ], [ 4.90177, 17 ], [ 5.09408, 17 ], [ 5.30337, 15 ], [ 5.47591, 9 ], [ 5.60913, 15 ], [ 5.7274, 11 ], [ 5.91344, 5 ], [ 6.08525, 8 ], [ 6.25712, 2 ], [ 6.39555, 7 ], [ 6.66378, 6 ], [ 6.84887, 1 ], [ 6.97671, 7 ], [ 7.16512, 3 ], [ 7.3143, 3 ], [ 7.64492, 2 ], [ 8.04958, 2 ], [ 8.2035, 1 ] ],
"sum" : 1270.41359,
"population" : 261,
"maximum" : 8.2035,
"variance" : 1.39596
},
"count" : 261
}, {
"name" : "Cluster 7",
"id" : "000006",
"center" : {
"000008" : 31.50347,
"000001" : 95.04009,
"000000" : 226.28646,
"000003" : 193.01896,
"000002" : 124.25224,
"000006" : 745.56076,
"000007" : 26.40994,
"000004" : 8.85685,
"000005" : 882.41513
},
"distance" : {
"standard_deviation" : 1.43624,
"mean" : 5.49245,
"median" : 5.38031,
"minimum" : 2.624,
"sum_squares" : 4187.80474,
"bins" : [ [ 2.639, 2 ], [ 2.84559, 2 ], [ 2.97582, 2 ], [ 3.39142, 3 ], [ 3.49389, 4 ], [ 3.67301, 3 ], [ 3.86258, 2 ], [ 3.96865, 4 ], [ 4.15228, 2 ], [ 4.31667, 4 ], [ 4.46133, 6 ], [ 4.73477, 16 ], [ 4.98608, 3 ], [ 5.10467, 8 ], [ 5.36798, 7 ], [ 5.53704, 3 ], [ 5.68922, 2 ], [ 5.81973, 7 ], [ 5.93288, 8 ], [ 6.04826, 3 ], [ 6.17654, 3 ], [ 6.34907, 3 ], [ 6.50053, 4 ], [ 6.5948, 1 ], [ 6.82048, 4 ], [ 7.20546, 6 ], [ 7.33126, 3 ], [ 7.58794, 6 ], [ 7.97058, 2 ], [ 8.12222, 3 ], [ 8.41736, 1 ], [ 8.53031, 3 ] ],
"sum" : 714.01788,
"population" : 130,
"maximum" : 8.55396,
"variance" : 2.0628
},
"count" : 130
} ],
"fields" : {
"000000" : {
"preferred" : true,
"summary" : {
"standard_deviation" : 104.50636,
"mean" : 281.16786,
"median" : 272.9101,
"minimum" : 102,
"sum_squares" : 9.266533487E7,
"missing_count" : 0,
"bins" : [ [ 105.15, 8 ], [ 119.3, 8 ], [ 134.1625, 16 ], [ 144.34082, 49 ], [ 155.52131, 61 ], [ 168.47206, 68 ], [ 181.78333, 24 ], [ 191.90278, 36 ], [ 200.89565, 23 ], [ 214.93333, 75 ], [ 234.78036, 56 ], [ 251.33611, 72 ], [ 263.82667, 15 ], [ 276.41538, 39 ], [ 287.56364, 33 ], [ 297.43514, 37 ], [ 309.98596, 57 ], [ 322.07857, 28 ], [ 335.22245, 49 ], [ 349.4, 20 ], [ 360.96857, 35 ], [ 378.10698, 43 ], [ 387.375, 28 ], [ 397.5, 20 ], [ 425.20588, 34 ], [ 444.36818, 22 ], [ 475.2, 25 ], [ 490.33333, 9 ], [ 500.425, 12 ], [ 516, 2 ], [ 526.08824, 17 ], [ 540, 9 ] ],
"sum" : 289602.9,
"population" : 1030,
"maximum" : 540,
"variance" : 10921.58022,
"splits" : [ 137.4936, 145.9554, 152.96334, 158.87265, 166.11759, 172.23879, 182.49616, 192.62829, 203.42118, 213.29812, 218.55875, 233.3, 238.12349, 250.54841, 252.48241, 272.9101, 280.46825, 290.35147, 298.1683, 309.98251, 316.06535, 330.79167, 338.68129, 351.80754, 364.28077, 379.94207, 387.78827, 415.99219, 431.90377, 474.09237, 500.03304 ]
},
"datatype" : "double",
"order" : 0,
"optype" : "numeric",
"name" : "Cement",
"column_number" : 0
},
"000001" : {
"preferred" : true,
"summary" : {
"standard_deviation" : 86.27934,
"mean" : 73.89583,
"median" : 22.23927,
"minimum" : 0,
"sum_squares" : 1.328441521E7,
"missing_count" : 0,
"bins" : [ [ 0, 471 ], [ 13.35714, 14 ], [ 21.86, 55 ], [ 40.96429, 14 ], [ 47.73529, 17 ], [ 54.2, 10 ], [ 75.7, 10 ], [ 86, 1 ], [ 96.74, 55 ], [ 106.13478, 23 ], [ 117.22553, 47 ], [ 130.83125, 32 ], [ 137.56364, 11 ], [ 143.65667, 30 ], [ 150.68125, 16 ], [ 160.29091, 22 ], [ 168.25, 16 ], [ 175.83158, 19 ], [ 183.34167, 12 ], [ 189.43509, 57 ], [ 201.59375, 16 ], [ 210.64, 25 ], [ 230.25, 2 ], [ 237.77, 20 ], [ 250.13333, 6 ], [ 261.52857, 7 ], [ 272.8, 2 ], [ 286.36, 10 ], [ 305.3, 4 ], [ 316.1, 2 ], [ 342.1, 2 ], [ 359.4, 2 ] ],
"sum" : 76112.7,
"population" : 1030,
"maximum" : 359.4,
"variance" : 7444.12481,
"splits" : [ 0.52628, 22.23927, 142.95 ]
},
"datatype" : "double",
"order" : 1,
"optype" : "numeric",
"name" : "Blast Furnace Slag",
"column_number" : 1
},
"000002" : {
"preferred" : true,
"summary" : {
"standard_deviation" : 63.997,
"mean" : 54.18835,
"median" : 13.85756,
"minimum" : 0,
"sum_squares" : 7238857.96,
"missing_count" : 0,
"bins" : [ [ 0, 566 ], [ 24.5, 15 ], [ 59.5, 2 ], [ 71.25, 2 ], [ 78.37727, 22 ], [ 81.9, 2 ], [ 86.51667, 6 ], [ 90.39, 10 ], [ 95.39483, 58 ], [ 99.91026, 39 ], [ 103.15, 2 ], [ 107.13571, 14 ], [ 112.5125, 16 ], [ 118.13721, 43 ], [ 121.71154, 26 ], [ 124.57308, 52 ], [ 128.525, 12 ], [ 132.33529, 17 ], [ 137.6, 7 ], [ 141.97097, 31 ], [ 147.125, 4 ], [ 150.8, 3 ], [ 159.84, 10 ], [ 163.54167, 12 ], [ 166.80833, 12 ], [ 173.81786, 28 ], [ 178.95, 2 ], [ 182.05, 2 ], [ 185.03333, 6 ], [ 190, 1 ], [ 194.48333, 6 ], [ 200.05, 2 ] ],
"sum" : 55814,
"population" : 1030,
"maximum" : 200.1,
"variance" : 4095.61654,
"splits" : [ 2.76331, 98.68603 ]
},
"datatype" : "double",
"order" : 2,
"optype" : "numeric",
"name" : "Fly Ash",
"column_number" : 2
},
"000003" : {
"preferred" : true,
"summary" : {
"standard_deviation" : 21.35422,
"mean" : 181.56728,
"median" : 185.10147,
"minimum" : 121.8,
"sum_squares" : 3.442490479E7,
"missing_count" : 0,
"bins" : [ [ 121.8, 5 ], [ 126.75714, 7 ], [ 137.8, 5 ], [ 141.85294, 17 ], [ 145.96774, 31 ], [ 151.33333, 6 ], [ 154.32292, 48 ], [ 158.28676, 68 ], [ 161.62195, 41 ], [ 164.65, 44 ], [ 167.55667, 30 ], [ 170.04167, 24 ], [ 172.13333, 24 ], [ 175.12353, 51 ], [ 178.32286, 35 ], [ 181.00784, 51 ], [ 183.56667, 18 ], [ 185.86058, 104 ], [ 188.81852, 27 ], [ 192.1046, 174 ], [ 195.41923, 26 ], [ 197.5, 17 ], [ 200.5, 36 ], [ 203.364, 50 ], [ 206, 4 ], [ 210.21667, 12 ], [ 214.38889, 9 ], [ 218, 1 ], [ 220.45714, 7 ], [ 228, 54 ], [ 236.85, 2 ], [ 246.95, 2 ] ],
"sum" : 187014.3,
"population" : 1030,
"maximum" : 247,
"variance" : 456.00265,
"splits" : [ 143.4925, 149.05395, 154.12183, 156.97849, 158.7329, 160.6525, 162.09644, 164.90779, 167.99058, 170.28269, 173.93702, 175.5133, 178.07555, 180.29358, 181.98981, 185.10147, 185.70955, 185.98805, 188.28747, 190.83435, 191.90879, 191.98253, 192.14784, 192.50125, 193.90917, 196.58818, 200.10369, 203.12387, 204.56557, 219.73541, 227.85326 ]
},
"datatype" : "double",
"order" : 3,
"optype" : "numeric",
"name" : "Water",
"column_number" : 3
},
"000004" : {
"preferred" : true,
"summary" : {
"standard_deviation" : 5.97384,
"mean" : 6.20466,
"median" : 6.4,
"minimum" : 0,
"sum_squares" : 76374.44,
"missing_count" : 0,
"bins" : [ [ 0, 379 ], [ 1.78333, 6 ], [ 2.4, 3 ], [ 3.01429, 7 ], [ 3.525, 8 ], [ 3.97857, 14 ], [ 4.55238, 21 ], [ 5.33636, 22 ], [ 5.892, 50 ], [ 6.54615, 26 ], [ 6.98889, 27 ], [ 7.86709, 79 ], [ 8.8322, 59 ], [ 9.53793, 29 ], [ 10.11905, 63 ], [ 11.08475, 59 ], [ 11.74, 75 ], [ 12.38333, 6 ], [ 12.84286, 14 ], [ 14.18333, 18 ], [ 15, 4 ], [ 15.45, 2 ], [ 15.95, 8 ], [ 16.5, 15 ], [ 17.95, 4 ], [ 18.6375, 8 ], [ 20, 1 ], [ 20.8, 1 ], [ 22.01429, 7 ], [ 23.4, 5 ], [ 28.2, 5 ], [ 32.2, 5 ] ],
"sum" : 6390.8,
"population" : 1030,
"maximum" : 32.2,
"variance" : 35.68678,
"splits" : [ 0.07568, 4.0309, 8.00568, 11.08048 ]
},
"datatype" : "double",
"order" : 4,
"optype" : "numeric",
"name" : "Superplasticizer",
"column_number" : 4
},
"000005" : {
"preferred" : true,
"summary" : {
"standard_deviation" : 77.75395,
"mean" : 972.91893,
"median" : 967.84658,
"minimum" : 801,
"sum_squares" : 9.8118938777E8,
"missing_count" : 0,
"bins" : [ [ 801.08333, 6 ], [ 812.525, 4 ], [ 821.9, 25 ], [ 831.08, 10 ], [ 840.04444, 9 ], [ 851.63393, 56 ], [ 860.86667, 9 ], [ 868.71765, 17 ], [ 880.72069, 29 ], [ 891.73913, 23 ], [ 907.24615, 13 ], [ 915.50667, 15 ], [ 925.18333, 18 ], [ 933.53137, 102 ], [ 943.97792, 77 ], [ 952.65312, 32 ], [ 965.52418, 91 ], [ 974.825, 64 ], [ 988.37, 20 ], [ 1004.26224, 98 ], [ 1012.95625, 16 ], [ 1026.71556, 45 ], [ 1045.01455, 55 ], [ 1056.91935, 62 ], [ 1067.31667, 18 ], [ 1077.65357, 28 ], [ 1087.4871, 31 ], [ 1102.4, 8 ], [ 1111.57778, 9 ], [ 1123.70909, 33 ], [ 1133.58333, 6 ], [ 1145, 1 ] ],
"sum" : 1002106.5,
"population" : 1030,
"maximum" : 1145,
"variance" : 6045.67736,
"splits" : [ 824.23044, 850.41321, 852.39598, 869.37144, 884.20438, 906.175, 925.64772, 931.76146, 933.10428, 937.28715, 942.06367, 944.89887, 949.16708, 959.11893, 966.4062, 967.84658, 972.0375, 977.05625, 988.33201, 1001.951, 1006.02159, 1007.36619, 1022.83494, 1029.4, 1045.44426, 1052.39048, 1057.495, 1065.91, 1078.93625, 1089.48342, 1123.9875 ]
},
"datatype" : "double",
"order" : 5,
"optype" : "numeric",
"name" : "Coarse Aggregate",
"column_number" : 5
},
"000006" : {
"preferred" : true,
"summary" : {
"standard_deviation" : 80.17598,
"mean" : 773.58049,
"median" : 779.57639,
"minimum" : 594,
"sum_squares" : 6.2299417571E8,
"missing_count" : 0,
"bins" : [ [ 594, 30 ], [ 605, 5 ], [ 612.81935, 31 ], [ 623, 2 ], [ 630.90909, 11 ], [ 640.78571, 7 ], [ 656.8, 17 ], [ 669.25714, 28 ], [ 676.91818, 11 ], [ 688.21176, 17 ], [ 696.33226, 31 ], [ 710.19512, 41 ], [ 719.98571, 21 ], [ 728.98333, 6 ], [ 736.33043, 23 ], [ 747.46154, 52 ], [ 757.92679, 112 ], [ 772.62826, 46 ], [ 781.4068, 103 ], [ 792.90769, 39 ], [ 803.01386, 101 ], [ 814.53, 20 ], [ 825.45714, 49 ], [ 843.75556, 36 ], [ 854.32553, 47 ], [ 866.53103, 29 ], [ 878.25, 12 ], [ 889.3098, 51 ], [ 902.63333, 30 ], [ 925.7, 5 ], [ 943.36667, 12 ], [ 992.6, 5 ] ],
"sum" : 796787.9,
"population" : 1030,
"maximum" : 992.6,
"variance" : 6428.18779,
"splits" : [ 604.39554, 613.73224, 658.41997, 672.28243, 692.56078, 704.3252, 712.7746, 730.4, 745.30648, 749.5921, 755.14272, 757.01838, 759.81145, 769.04886, 775.43217, 779.57639, 781.0999, 785.13885, 793.03, 799.75045, 801.59831, 805.11063, 812.48236, 824.33333, 838.70578, 847.15114, 856.3653, 867.7375, 885.2625, 892.7375, 903.66421 ]
},
"datatype" : "double",
"order" : 6,
"optype" : "numeric",
"name" : "Fine Aggregate",
"column_number" : 6
},
"000007" : {
"preferred" : true,
"summary" : {
"standard_deviation" : 63.16991,
"counts" : [ [ 1, 2 ], [ 3, 134 ], [ 7, 126 ], [ 14, 62 ], [ 28, 425 ], [ 56, 91 ], [ 90, 54 ], [ 91, 22 ], [ 100, 52 ], [ 120, 3 ], [ 180, 26 ], [ 270, 13 ], [ 360, 6 ], [ 365, 14 ] ],
"mean" : 45.66214,
"median" : 27.27576,
"minimum" : 1,
"sum_squares" : 6253742,
"missing_count" : 0,
"sum" : 47032,
"population" : 1030,
"maximum" : 365,
"variance" : 3990.43773
},
"datatype" : "int16",
"order" : 7,
"optype" : "numeric",
"name" : "Age (days)",
"column_number" : 7
},
"000008" : {
"preferred" : true,
"summary" : {
"standard_deviation" : 16.70574,
"mean" : 35.81796,
"median" : 34.44333,
"minimum" : 2.33,
"sum_squares" : 1608589.3194,
"missing_count" : 0,
"bins" : [ [ 2.825, 2 ], [ 4.77, 4 ], [ 7.52238, 21 ], [ 9.994, 25 ], [ 12.64224, 49 ], [ 15.23044, 45 ], [ 17.69182, 33 ], [ 19.48158, 19 ], [ 21.7337, 46 ], [ 24.74656, 90 ], [ 27.44581, 31 ], [ 29.56689, 45 ], [ 32.6202, 101 ], [ 35.11273, 33 ], [ 37.26509, 57 ], [ 39.181, 50 ], [ 41.43333, 69 ], [ 44.63576, 59 ], [ 47.98514, 35 ], [ 50.59885, 26 ], [ 52.96385, 39 ], [ 56.03909, 44 ], [ 59.55263, 19 ], [ 61.61923, 13 ], [ 64.30214, 14 ], [ 66.515, 14 ], [ 68.49, 11 ], [ 71.561, 10 ], [ 74.1, 9 ], [ 76.82, 7 ], [ 79.44875, 8 ], [ 82.175, 2 ] ],
"sum" : 36892.5,
"population" : 1030,
"maximum" : 82.6,
"variance" : 279.08181,
"splits" : [ 9.59039, 12.04164, 13.59665, 15.46648, 17.60534, 19.80523, 21.9104, 23.67664, 24.71001, 25.95571, 27.73288, 29.57112, 31.23271, 32.45469, 33.35841, 34.44333, 36.30491, 37.47926, 38.72024, 39.62638, 41.01641, 42.31341, 44.17343, 46.07604, 48.75467, 51.20054, 53.61328, 55.97245, 59.71209, 65.0125, 71.37727 ]
},
"datatype" : "double",
"order" : 8,
"optype" : "numeric",
"name" : "Concrete compressive strength",
"column_number" : 8
}
},
"max-workers" : 1,
"final_field_scales" : {
"000008" : 0.1700393995910662,
"000001" : 0.03292368716919321,
"000000" : 0.027181446175375913,
"000003" : 0.1330244794389333,
"000002" : 0.04438698687945464,
"000006" : 0.0354299878757261,
"000007" : 0.044968150173468,
"000004" : 0.47551223322426756,
"000005" : 0.036533629472515004
},
"input_fields" : [ "000000", "000001", "000002", "000003", "000004", "000005", "000006", "000007", "000008" ],
"seed" : "kmeans-seed"
}
}
<!DOCTYPE html>
<meta charset="utf-8">
<style>
.axis {
font: 10px sans-serif;
-webkit-user-select: none;
-moz-user-select: none;
user-select: none;
}
.axis .domain {
fill: none;
stroke: #000;
stroke-opacity: .3;
stroke-width: 10px;
stroke-linecap: round;
}
.slider .handle {
fill: #fff;
stroke: #000;
stroke-opacity: .5;
stroke-width: 1.25px;
pointer-events: none;
}
</style>
<body>
<script src="http://d3js.org/d3.v3.min.js"></script>
<script>
var margin = {top: 40, right: 20, bottom: 50, left: 20},
width = 960 - margin.left - margin.right,
height = 500 - margin.bottom - margin.top;
var svg = d3.select("body").append("svg")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
var defs = d3.select("svg").append("defs");
d3.json("concrete.json", function(error, root) {
var model = root.model;
var fields = model.fields;
var max_y = height / 3;
var buffer = width / (model.clusters.length + 1);
var slider_size = width / 3;
function make_hline(y_loc) {
svg.append("rect")
.attr("x", buffer - 20)
.attr("y", y_loc)
.attr("width", width - 2 * buffer + 40)
.attr("height", 0.5)
.style("fill", "none")
.style("stroke", "#ccc");
}
make_hline(0);
make_hline(max_y / 4);
make_hline(max_y / 2);
make_hline(3 * max_y / 4);
make_hline(max_y);
var handle_scales = new Array();
var mins = new Array();
var maxs = new Array();
var keys = new Array();
var point = new Array();
var fnames = new Array();
var i = 0;
for (id in fields) {
if (!fields.hasOwnProperty(id)) { continue; }
keys[i] = id;
point[i] = fields[id].summary.median;
mins[i] = fields[id].summary.minimum;
maxs[i] = fields[id].summary.maximum;
fnames[i] = fields[id].name;
make_slider(fields[id], i);
i = i + 1;
}
var scales = new Array();
for (var i = 0; i < keys.length; i++) {
scales[i] = model.final_field_scales[keys[i]];
}
var centroids = new Array();
var scaled_centroids = new Array();
var cnames = new Array();
var max_cluster_count = 0;
i = 0;
for (cid in model.clusters) {
var cluster = model.clusters[cid];
var centroid = new Array();
var scaled_centroid = new Array();
cnames[i] = cluster.name;
max_cluster_count = Math.max(max_cluster_count, cluster.count);
for (var j = 0; j < keys.length; j++) {
var val = cluster.center[keys[j]];
centroid[j] = val;
scaled_centroid[j] = scales[j] * val;
}
centroids[i] = centroid;
scaled_centroids[i] = scaled_centroid;
i = i + 1;
}
update_ranges();
var max_dist = 0;
for (var i = 0; i < centroids.length - 1; i++) {
for (var j = i + 1; j < centroids.length; j++) {
max_dist = Math.max(max_dist, dist(centroids[i], centroids[j]));
}
}
svg.append("text")
.attr("class", "nearest")
.attr("x", buffer - 20)
.attr("y", -20);
var dists = new Array();
var dist_order = new Array();
update_dists();
i = 0;
for (cid in model.clusters) {
var cluster = model.clusters[cid];
make_balls(cluster, i);
i = i + 1;
}
function make_balls(cluster, index) {
var max_rad = 30;
var color = d3.scale.category10().range()[index];
var r = Math.pow((cluster.count / max_cluster_count), 1/3) * max_rad;
svg.append("circle")
.attr("class", "cluster")
.attr("r", r)
.attr("cx", (dist_order[index] + 1) * buffer)
.attr("cy", (1 - Math.min(dists[index] / max_dist, 1)) * max_y)
.style("stroke", d3.rgb(color).darker())
.style("fill", color)
.on("click", function() {
point = centroids[index].slice(0);
update_dists();
update_ranges();
update_balls(300);
update_handles(300);
update_field_text();
})
.on("mouseover", function() {
d3.select(this)
.attr("r", 1.1 * r)
.style("stroke", color)
.style("fill", d3.rgb(color).brighter());
})
.on("mouseout", function() {
d3.select(this)
.attr("r", r)
.style("stroke", d3.rgb(color).darker())
.style("fill", color);
});
}
function update_dists() {
var sorted_dists = new Array();
var min_dist;
var min_index = -1;
for (var i = 0; i < centroids.length; i++) {
dists[i] = dist(point, centroids[i]);
sorted_dists[i] = new Object();
sorted_dists[i].d = dists[i];
sorted_dists[i].i = i;
if (min_index == -1 || min_dist > dists[i]) {
min_index = i;
min_dist = dists[i];
}
}
sorted_dists.sort(function(a, b) {
if (a.d < b.d) return -1;
if (a.d > b.d) return 1;
return 0;
});
for (var i = 0; i < sorted_dists.length; i++) {
dist_order[sorted_dists[i].i] = i;
}
d3.select(".nearest")
.text("Nearest to " + cnames[min_index]
+ " - Distance: " + dists[min_index].toFixed(4))
.style("fill", d3.scale.category10().range()[min_index]);
}
function update_ranges() {
for (var i = 0; i < scales.length; i++) {
var grad = d3.select("#g" + i);
grad.selectAll("stop").remove();
var min_pt = point.slice(0);
min_pt[i] = mins[i];
var min_index = -1;
var min_dist;
for (var j = 0; j < centroids.length; j++) {
var d = dist(min_pt, centroids[j]);
if (min_index == -1 || min_dist > d) {
min_index = j;
min_dist = d;
}
}
var p = min_pt[i];
var c = min_index;
var range = maxs[i] - mins[i];
while (p < maxs[i]) {
grad.append("svg:stop")
.attr("offset", (100 * (p - mins[i]) / range).toFixed(2).toString() + "%")
.attr("stop-color", d3.scale.category10().range()[c])
.attr("stop-opacity", 1);
var nextp = maxs[i];
var nextc = c;
var used = new Object();
used[c.toString()] = true;
for (var j = 0; j < centroids.length; j++) {
if (used[j.toString] == true) { continue; }
var candidate = vdist(line(i), plane(j, c));
if (candidate < nextp && candidate > p) {
nextp = candidate;
nextc = j;
}
}
grad.append("svg:stop")
.attr("offset", (100 * (nextp - mins[i]) / range).toFixed(2).toString() + "%")
.attr("stop-color", d3.scale.category10().range()[c])
.attr("stop-opacity", 1);
p = nextp;
c = nextc;
used[c.toString()] = true;
}
}
}
function update_balls(duration) {
var buffer = width / (model.clusters.length + 1);
var ids = Array.apply(null, Array(centroids.length)).map(function (_, i) {return i;});
d3.selectAll(".cluster")
.data(ids)
.transition().duration(duration + 50)
.attr("cx", function(d) {return (dist_order[d] + 1) * buffer;})
.attr("cy", function(d) {return (1 - Math.min(dists[d] / max_dist, 1)) * max_y;});
}
function make_slider(field, index) {
var summary = field.summary;
var buffer = (width - (2 * slider_size)) / 3;
var x_loc = ((index % 2) + 1) * buffer + (index % 2) * slider_size;
var y_loc = max_y + 60 + Math.floor(index / 2) * 60;
var x = d3.scale.linear()
.domain([summary.minimum, summary.maximum])
.range([0, slider_size])
.clamp(true);
handle_scales[index] = x;
var brush = d3.svg.brush().x(x);
brush.on("brush", function() {
var value = brush.extent()[0];
if (d3.event.sourceEvent) { // not a programmatic event
value = x.invert(d3.mouse(this)[0] - x_loc);
brush.extent([value, value]);
}
handle.attr("cx", x(value));
point[index] = value;
update_dists();
update_ranges();
update_balls(0);
update_field_text();
});
var axis = svg.append("g")
.attr("class", "x axis")
.attr("transform", "translate(" + x_loc + "," + y_loc + ")");
axis.call(d3.svg.axis()
.scale(x)
.orient("bottom")
.tickFormat(function(d) { return d; })
.tickSize(0)
.tickPadding(10))
.select(".domain")
.style("stroke", "none")
.style("fill", "none");
var gradient_name = "g" + index;
var gradient = defs.append("linearGradient")
.attr("id", gradient_name)
.attr("field_id", index)
.attr("x1", "0%")
.attr("y1", "0%")
.attr("x2", "100%")
.attr("y2", "0%");
axis.append("rect")
.attr("class", "range")
.attr("y", -4)
.attr("height", 8)
.attr("width", slider_size)
.attr("field_index", index)
.attr("rx", 4)
.attr("ry", 4)
.style("stroke", "#333")
.style("fill", "url(#" + gradient_name + ")");
var slider = svg.append("g")
.attr("class", "slider")
.call(brush);
slider.append("text")
.attr("class", "field_text")
.attr("transform", "translate(" + x_loc + "," + (y_loc - 13) + ")")
.text(field.name + ": " + point[index].toFixed(2));
slider.selectAll(".extent,.resize").remove();
slider.select(".background").attr("height", 40)
.attr("x", x_loc)
.attr("y", y_loc - 20);
var handle = slider.append("circle")
.attr("class", "handle")
.attr("transform", "translate(" + x_loc + "," + (y_loc - 1) + ")")
.attr("r", 8)
.attr("cx", handle_scales[index](point[index]));
brush.extent([point[index], point[index]]);
}
function update_handles(duration) {
var ids = Array.apply(null, Array(point.length)).map(function (_, i) {return i;});
d3.selectAll(".handle")
.data(ids)
.transition().duration(duration)
.attr("cx", function(d) {return handle_scales[d](point[d])});
}
function update_field_text() {
var ids = Array.apply(null, Array(point.length)).map(function (_, i) {return i;});
d3.selectAll(".field_text")
.data(ids)
.text(function(d) {return fnames[d] + ": " + point[d].toFixed(2);});
}
function dist(p1, p2) {
var tot = 0;
for (var i = 0; i < p1.length; i++) {
var diff = (p1[i] - p2[i]) * scales[i];
tot = tot + diff * diff;
}
return Math.sqrt(tot);
}
function scale_point(point) {
var scaled_point = new Array();
for (var i = 0; i < point.length; i++) {
scaled_point[i] = scales[i] * point[i];
}
return scaled_point;
}
function dotp(v1, v2) {
var tot = 0;
for (var i = 0; i < v1.length; i++) {
tot += (v1[i] * v2[i]);
}
return tot;
}
function plane(cent_id1, cent_id2) {
var pt1 = scaled_centroids[cent_id1];
var pt2 = scaled_centroids[cent_id2];
var origin = new Array();
var norm = new Array();
for (var i = 0; i < pt1.length; i++) {
origin[i] = (pt2[i] + pt1[i]) / 2;
norm[i] = pt2[i] - pt1[i];
}
var plane = new Object();
plane.n = norm;
plane.o = origin;
return plane;
}
function line(field_index) {
var scaled_point = scale_point(point);
scaled_point[field_index] = 0;
var line = new Object;
line.i = field_index;
line.o = scaled_point;
line.v = Array.apply(null, new Array(point.length)).map(Number.prototype.valueOf,0);
line.v[field_index] = 1;
return line;
}
function vdist(line, plane) {
var po = plane.o;
var lo = line.o;
var v = new Array();
for (var i = 0; i < po.length; i++) {
v[i] = po[i] - lo[i];
}
var diff = dotp(plane.n, v) / dotp(line.v, plane.n);
return (diff / scales[line.i]);
}
});
</script>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment