Skip to content

Instantly share code, notes, and snippets.

@ashenfad
Last active December 15, 2015 08:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ashenfad/5235191 to your computer and use it in GitHub Desktop.
Save ashenfad/5235191 to your computer and use it in GitHub Desktop.
BigML Tree - Iris Data

A sunburst visualization of a BigML decision tree built on the iris dataset.

The initial center circle represents the root of the tree. Each outer circle contains the children of the inner circle's nodes. The number of training instances captured by a node determine its arc length (or its size in radians).

Clicking on a node will zoom in to the subtree. After zooming in, selecting the new center point will zoom out one level.

<!DOCTYPE html>
<meta charset="utf-8">
<style>
body {
font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
margin: auto;
position: relative;
width: 960px;
background: #fff;
}
#color-controls {
font: 14px sans-serif;
position: absolute;
right: 10px;
top: 10px;
padding: 3px;
}
#color-controls div {
padding: 4px;
}
#hover-info {
font: 14px sans-serif;
position: absolute;
left: 10px;
top: 10px;
}
#summary-info {
font: 14px sans-serif;
position: absolute;
left: 10px;
bottom: 10px;
font: 12px sans-serif;
}
#summary-info div {
padding: 2px;
}
.split-predicate {
font-weight:bold;
border-bottom: 1px solid #DFDFDF;
padding: 7px;
}
.node-info {
margin-top: 10px;
}
.node-info td {
padding: 2px 7px 2px;
}
</style>
<body>
<div id="color-controls">
<form>
<div>
<input type="radio" name="mode" value="prediction" checked \>
<label>Prediction</label>
</div>
<div>
<input type="radio" name="mode" value="confidence"\>
<label id="cnf">Confidence</label>
</div>
<div>
<input type="radio" name="mode" value="split"\>
<label>Split Field</label>
</div>
</form>
</div>
<div id="hover-info"></div>
<div id="summary-info"></div>
</body>
<script src="http://d3js.org/d3.v3.min.js"></script>
<script>
var width = 960,
height = 600,
radius = Math.min(width, height) / 2;
function hover_adjust(d, color) {
return d.hover ? d3.rgb(color).brighter(0.66) : color;
}
var svg = d3.select("body").append("svg")
.attr("width", width)
.attr("height", height)
.append("g")
.attr("transform", "translate(" + width / 2 + "," + (height / 2 + 10) + ")");
var partition = d3.layout.partition().value(function(d) { return d.count; });
var x = d3.scale.linear().range([0, 2 * Math.PI]);
var y = d3.scale.sqrt().range([0, radius]);
var arc = d3.svg.arc()
.startAngle(function(d) { return Math.max(0, Math.min(2 * Math.PI, x(d.x))); })
.endAngle(function(d) { return Math.max(0, Math.min(2 * Math.PI, x(d.x + d.dx))); })
.innerRadius(function(d) { return Math.max(0, y(d.y)); })
.outerRadius(function(d) { return Math.max(0, y(d.y + d.dy)); });
// Interpolate the scales!
function arcTween(d) {
var xd = d3.interpolate(x.domain(), [d.x, d.x + d.dx]),
yd = d3.interpolate(y.domain(), [d.y, 1]),
yr = d3.interpolate(y.range(), [d.y ? 20 : 0, radius]);
return function(d, i) {
return i
? function(t) { return arc(d); }
: function(t) { x.domain(xd(t)); y.domain(yd(t)).range(yr(t)); return arc(d); };
};
}
function find_minmax(node, attr) {
if (node.children) {
minmaxs = node.children.map(function (n) { return find_minmax(n, attr); });
min = Math.min.apply(null, minmaxs.map(function (mm) {return mm.min}));
max = Math.max.apply(null, minmaxs.map(function (mm) {return mm.max}));
return {"min": Math.min(min, node[attr]), "max": Math.max(max, node[attr])};
} else {
return {"min": node[attr], "max": node[attr]};
}
}
d3.json("iris-model.json", function(error, root) {
var model = root.model;
var model_type = model.model_fields[root.objective_field].optype == "categorical" ?
"classification" : "regression";
var minmaxs = {};
if (model_type == "classification") {
minmaxs.confidence = find_minmax(model.root, "confidence");
} else {
// Hacky label switch for regression trees
document.getElementById("cnf").innerHTML = "Expected Error";
minmaxs.expected_error = find_minmax(model.root, "confidence");
minmaxs.output = find_minmax(model.root, "output");
}
var scale_pred = model_type == "classification" ?
d3.scale.category10() :
d3.scale.linear().domain([minmaxs.output.min,
minmaxs.output.max])
.range(["#222", "#2ee"]);
var scale_conf = model_type == "classification" ?
d3.scale.linear().domain([minmaxs.confidence.min,
minmaxs.confidence.max])
.range(["#d33", "#3d3"]) :
d3.scale.linear().domain([minmaxs.expected_error.max,
minmaxs.expected_error.min])
.range(["#d33", "#3d3"]);
var scale_split = d3.scale.category20b();
var color_lookup =
{"prediction": function(d) { return hover_adjust(d, scale_pred(d.output)); },
"confidence": function(d) { return hover_adjust(d, scale_conf(d.confidence)); },
"split": function(d) { return hover_adjust(d, scale_split(d.predicate.field)); } };
var color_fn = color_lookup["prediction"];
var path = svg.selectAll("path")
.data(partition.nodes(root.model.root))
.enter().append("path")
.attr("d", arc)
.style("fill", color_fn)
.style("stroke", "#fff")
.on("click", click)
.on("mouseover", mouseover)
.on("mouseout", mouseout);
var click_in_progress = false;
function click(d) {
mark_hover(d, false);
click_in_progress = true;
path.transition().duration(750).style("fill", color_fn).attrTween("d", arcTween(d));
setTimeout(function() {click_in_progress = false;}, 750);
}
d3.selectAll("input").on("change", change);
function change() {
color_fn = color_lookup[this.value];
path.transition().duration(250).style("fill", color_fn);
}
function mouseover(d) {
var split = d.predicate;
var split_msg = split.field ? model.model_fields[split.field].name + " " +
split.operator + " " + split.value
: "Tree Root";
var conf_msg = {"classification": "Confidence", "regression": "Expected Error"};
var hover = d3.select("#hover-info");
hover.append("div").attr("class", "split-predicate").text(split_msg);
tbody = hover.append("table").attr("class", "node-info").append("tbody");
var output = model_type == "classification" ? d.output : parseFloat(d.output.toFixed(3));
table_add(tbody, "Prediction", output);
table_add(tbody, conf_msg[model_type], parseFloat(d.confidence.toFixed(3)));
table_add(tbody, "Count", d.count);
mark_hover(d, true);
if (!click_in_progress) {
path.style("fill", color_fn);
}
var summ_doc = d3.select("#summary-info");
var summaries = summarize(d);
for (id in summaries) {
if (!summaries.hasOwnProperty(id)) { continue; }
var name = model.model_fields[id].name;
var msg = name;
fs = summaries[id];
if (isNum(fs.min)) {
msg = parseFloat(fs.min.toFixed(3)) + " < " + msg;
}
if (isNum(fs.max)) {
msg += " <= " + parseFloat(fs.max.toFixed(3));
}
if (fs.eq) {
msg += " = " + fs.eq;
} else if (fs.not_eq) {
msg += " !=";
var first = true;
for (category in fs.not_eq) {
if (first) {
first = false;
} else {
msg += "|";
}
if (!fs.not_eq.hasOwnProperty(category)) { continue; }
msg += " " + category;
}
msg = msg;
}
summ_doc.append("div").text(msg);
}
}
function mouseout(d) {
d3.select("#hover-info").html("");
d3.select("#summary-info").html("");
mark_hover(d, false);
if (!click_in_progress) {
path.style("fill", color_fn);
}
}
function mark_hover (d, val) {
if (d.parent) { mark_hover(d.parent, val); };
d.hover = val;
}
});
d3.select(self.frameElement).style("height", height + "px");
function isNum(n) {
return !isNaN(parseFloat(n)) && isFinite(n);
}
function summarize (node) {
var pred = node.predicate;
if (node.parent) {
var summary = summarize(node.parent);
switch(pred.operator) {
case "<=":
if (summary[pred.field]) {
var old_max = summary[pred.field].max;
max = isNum(old_max) ? Math.min(pred.value, old_max) : pred.value;
summary[pred.field].max = max;
} else {
summary[pred.field] = {"max": pred.value};
}
break;
case ">":
if (summary[pred.field]) {
var old_min = summary[pred.field].min;
min = isNum(old_min) ? Math.max(pred.value, old_min) : pred.value;
summary[pred.field].min = min;
} else {
summary[pred.field] = {"min": pred.value};
}
break;
case "=":
summary[pred.field] = {"eq": pred.value};
break;
case "!=":
if (!summary[pred.field]) {
summary[pred.field] = {};
}
if (!summary[pred.field].not_eq) {
summary[pred.field].not_eq = {};
}
summary[pred.field].not_eq[pred.value] = true;
break;
}
return summary;
} else {
return {};
}
}
function table_add (table, field, val) {
var row = table.append("tr");
row.append("td").text(field);
row.append("td").text(val);
return row;
}
</script>
{"public_dataset": false, "code": 200, "locale": "en-US", "short_url": "", "image": "gallery/model.png", "job_type": 400, "private": true, "dataset": "dataset/514bd86f035d07049200003f", "fields_meta": {"count": 5, "total": 5, "limit": 1000, "offset": 0}, "white_box": false, "dataset_id": "514bd86f035d07049200003f", "randomize": false, "id": "514fb3d00c0b5e04c3000093", "number_of_predictions": 0, "category": 0, "modified_fields": {}, "rows": 150, "out_of_bag": false, "ordering": 0, "fields_to_show": [{"optype": "numeric", "is_objective": false, "name": "sepal length", "datatype_name": "Numeric", "tooltip": "Numeric", "stats": [["Minimum", "4.30"], ["Mean", "5.84"], ["Median", "5.78"], ["Maximum", "7.90"], ["Std dev", "0.83"]], "id": "000000", "css": "numeric type_label datatype-tooltip"}, {"optype": "numeric", "is_objective": false, "name": "sepal width", "datatype_name": "Numeric", "tooltip": "Numeric", "stats": [["Minimum", "2.00"], ["Mean", "3.06"], ["Median", "3.02"], ["Maximum", "4.40"], ["Std dev", "0.44"]], "id": "000001", "css": "numeric type_label datatype-tooltip"}, {"optype": "numeric", "is_objective": false, "name": "petal length", "datatype_name": "Numeric", "tooltip": "Numeric", "stats": [["Minimum", "1.00"], ["Mean", "3.76"], ["Median", "4.34"], ["Maximum", "6.90"], ["Std dev", "1.77"]], "id": "000002", "css": "numeric type_label datatype-tooltip"}, {"optype": "numeric", "is_objective": false, "name": "petal width", "datatype_name": "Numeric", "tooltip": "Numeric", "stats": [["Minimum", "0.10"], ["Mean", "1.20"], ["Median", "1.33"], ["Maximum", "2.50"], ["Std dev", "0.76"]], "id": "000003", "css": "numeric type_label datatype-tooltip"}, {"optype": "categorical", "is_objective": true, "name": "species", "datatype_name": "Categorical", "tooltip": "Categorical", "id": "000004", "css": "categorical type_label datatype-tooltip"}], "views": 0, "size": 4608, "range": [1, 150], "credits_per_prediction": 0.0, "source": "source/514bcf9d0c0b5e3fd100026b", "number_of_public_predictions": 0, "sample_rate": 1.0, "objective_fields": ["000004"], "user_name": "ashenfad", "columns": 5, "selective_pruning": false, "status": {"progress": 1.0, "message": "The model has been created", "code": 5, "elapsed": 89}, "updated": "2013-03-25T02:17:52.927000", "description": "", "tags": [], "price": 0.0, "excluded_fields": [], "cloned": false, "credits": 0.017578125, "stat_pruning": true, "objective_field": "000004", "clones": 0, "resource": "model/514fb3d00c0b5e04c3000093", "name": "Iris Model", "created": "Mon, 25 Mar 2013 02:17:52 +0000", "url": "", "dataset_status": true, "source_status": true, "number_of_evaluations": 0, "max_columns": 5, "max_rows": 150, "input_fields": ["000000", "000001", "000002", "000003"], "published": null, "source_id": "514bcf9d0c0b5e3fd100026b", "model": {"kind": "stree", "missing_strategy": "Last prediction", "importance": [["000002", 0.53556], ["000003", 0.46444], ["000000", 0], ["000001", 0]], "fields": {"000004": {"optype": "categorical", "name": "species", "datatype": "string", "preferred": true, "summary": {"missing_count": 0, "categories": [["Iris-versicolor", 50], ["Iris-setosa", 50], ["Iris-virginica", 50]]}, "column_number": 4, "order": 4}, "000002": {"optype": "numeric", "name": "petal length", "datatype": "double", "preferred": true, "summary": {"sum_squares": 2582.71, "splits": [1.25138, 1.32426, 1.37171, 1.40962, 1.44567, 1.48173, 1.51859, 1.56301, 1.6255, 1.74645, 3.23033, 3.675, 3.94203, 4.0469, 4.18243, 4.34142, 4.45309, 4.51823, 4.61771, 4.72566, 4.83445, 4.93363, 5.03807, 5.1064, 5.20938, 5.43979, 5.5744, 5.6646, 5.81496, 6.02913, 6.38125], "missing_count": 0, "sum": 563.7, "median": 4.34142, "maximum": 6.9, "minimum": 1, "standard_deviation": 1.7653, "variance": 3.11628, "population": 150, "bins": [[1, 1], [1.1, 1], [1.2, 2], [1.3, 7], [1.4, 13], [1.5, 13], [1.63636, 11], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.46, 5], [5.6, 6], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "mean": 3.758}, "column_number": 2, "order": 2}, "000003": {"optype": "numeric", "name": "petal width", "datatype": "double", "preferred": true, "summary": {"sum_squares": 302.33, "missing_count": 0, "sum": 179.9, "median": 1.32848, "maximum": 2.5, "minimum": 0.1, "standard_deviation": 0.76224, "variance": 0.58101, "counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "population": 150, "mean": 1.19933}, "column_number": 3, "order": 3}, "000000": {"optype": "numeric", "name": "sepal length", "datatype": "double", "preferred": true, "summary": {"sum_squares": 5223.85, "splits": [4.51526, 4.67252, 4.81113, 4.89582, 4.96139, 5.01131, 5.05992, 5.11148, 5.18177, 5.35681, 5.44129, 5.5108, 5.58255, 5.65532, 5.71658, 5.77889, 5.85381, 5.97078, 6.05104, 6.13074, 6.23023, 6.29578, 6.35078, 6.41459, 6.49383, 6.63013, 6.70719, 6.79218, 6.92597, 7.20423, 7.64746], "missing_count": 0, "sum": 876.5, "median": 5.77889, "maximum": 7.9, "minimum": 4.3, "standard_deviation": 0.82807, "variance": 0.68569, "population": 150, "bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.7, 2], [4.8, 5], [4.9, 6], [5, 10], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.44167, 12], [6.6, 2], [6.7, 8], [6.8, 3], [6.92, 5], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "mean": 5.84333}, "column_number": 0, "order": 0}, "000001": {"optype": "numeric", "name": "sepal width", "datatype": "double", "preferred": true, "summary": {"sum_squares": 1430.4, "missing_count": 0, "sum": 458.6, "median": 3.02044, "maximum": 4.4, "minimum": 2, "standard_deviation": 0.43587, "variance": 0.18998, "counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "population": 150, "mean": 3.05733}, "column_number": 1, "order": 1}}, "model_fields": {"000004": {"optype": "categorical", "name": "species", "datatype": "string", "preferred": true, "column_number": 4, "order": 4}, "000002": {"optype": "numeric", "name": "petal length", "datatype": "double", "preferred": true, "column_number": 2, "order": 2}, "000003": {"optype": "numeric", "name": "petal width", "datatype": "double", "preferred": true, "column_number": 3, "order": 3}}, "support_threshold": 0, "split_criterion": "Information gain mix", "root": {"count": 150, "confidence": 0.26289, "predicate": true, "objective_summary": {"categories": [["Iris-versicolor", 50], ["Iris-setosa", 50], ["Iris-virginica", 50]]}, "output": "Iris-virginica", "children": [{"count": 50, "output": "Iris-setosa", "confidence": 0.92865, "predicate": {"operator": "<=", "field": "000002", "value": 2.45}, "objective_summary": {"categories": [["Iris-setosa", 50]]}}, {"count": 100, "confidence": 0.40383, "predicate": {"operator": ">", "field": "000002", "value": 2.45}, "objective_summary": {"categories": [["Iris-versicolor", 50], ["Iris-virginica", 50]]}, "output": "Iris-virginica", "children": [{"count": 48, "output": "Iris-virginica", "confidence": 0.86024, "predicate": {"operator": ">", "field": "000003", "value": 1.65}, "objective_summary": {"categories": [["Iris-versicolor", 2], ["Iris-virginica", 46]]}}, {"count": 52, "confidence": 0.81826, "predicate": {"operator": "<=", "field": "000003", "value": 1.65}, "objective_summary": {"categories": [["Iris-virginica", 4], ["Iris-versicolor", 48]]}, "output": "Iris-versicolor", "children": [{"count": 47, "output": "Iris-versicolor", "confidence": 0.92444, "predicate": {"operator": "<=", "field": "000002", "value": 4.95}, "objective_summary": {"categories": [["Iris-versicolor", 47]]}}, {"count": 5, "output": "Iris-virginica", "confidence": 0.37553, "predicate": {"operator": ">", "field": "000002", "value": 4.95}, "objective_summary": {"categories": [["Iris-versicolor", 1], ["Iris-virginica", 4]]}}]}]}]}, "depth_threshold": 20}, "replacement": false}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment