Skip to content

Instantly share code, notes, and snippets.

@armollica
Last active October 1, 2018 01:12
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save armollica/82c136720d31633bc79f1ea121a422e2 to your computer and use it in GitHub Desktop.
Save armollica/82c136720d31633bc79f1ea121a422e2 to your computer and use it in GitHub Desktop.
K-Nearest Neighbors

K-nearest neighbors search of a 2D set of points. Move the slider or scroll to change k.

The algorithm was adapted from this block

<html>
<head>
<style>
body {
font-family: monospace;
}
.k {
position: absolute;
left: 10px;
top: 10px;
}
.k span {
position: relative;
bottom: 7px;
padding: 0 10px;
}
.hull {
fill: tomato;
fill-opacity: 0.25;
stroke: tomato;
pointer-events: none;
}
</style>
</head>
<body>
<div class="k">
<span>K = 25</span><input type="range" name="k" value="25" min="1">
</div>
<script src="https://d3js.org/d3.v3.min.js" charset="utf-8"></script>
<script src="k-nearest-neighbors.js"></script>
<script>
var width = 960,
height = 500,
k = 25;
var kDiv = d3.select(".k")
.on("change", function() {
k = +kDiv.select("input").node().value;
d3.select(this).select("span").text("K = " + k);
findNearest.k(k);
});
var svg = d3.select("body").append("svg")
.attr("width", width)
.attr("height", height);
// add invisible rectangle that will pick up mouse movement
svg.append("rect")
.attr("width", width)
.attr("height", height)
.style("fill-opacity", 0)
.on("mousemove", mousemove)
.on("wheel", function() {
var wheelUp = event.deltaY < 0;
wheelUp ? k++ : k--;
k = clamp(k, 1, 100); // limit k to be between 1 and 100
kDiv.select("input").node().value = k;
kDiv.select("span").text("K = " + k);
findNearest.k(k);
mousemove.call(this);
});
var data = d3.range(500)
.map(function(d) {
return {
x: d3.random.normal(width/2, width/8)(),
y: Math.random() * height
};
});
var findNearest = d3.kNearestNeighbors()
.extent([[-1, -1], [width + 1, height + 1]])
.x(function(d) { return d.x; })
.y(function(d) { return d.y; })
.k(k)
.data(data);
var hull = svg.append("path").attr("class", "hull");
var circles = svg.append("g").attr("class", "circles")
.selectAll("circle").data(data)
.enter().append("circle")
.attr("cx", function(d) { return d.x; })
.attr("cy", function(d) { return d.y; })
.attr("r", 2)
.style("opacity", 0.5);
function mousemove() {
preventDefault(event);
var nearest = findNearest(d3.mouse(this));
// Draw convex hull around k-nearest points (if k > 1)
hull.datum(d3.geom.hull(nearest))
.attr("d", function(d) {
return d.length > 1 ? "M" + d.join("L") + "Z" : null;
});
// Get corresponding "nearest" data points from original data array
nearest = nearest
.map(function(d) { return data[d.i]; });
// Color nearest points red
circles
.style("fill", function(d) {
return nearest.indexOf(d) !== -1 ? "red" : null;
});
}
function clamp(d, min, max) {
return d < min ? min : d > max ? max : d;
}
function preventDefault(e) {
e = e || window.event;
if (e.preventDefault) e.preventDefault();
e.returnValue = false;
}
</script>
</body>
</html>
// algorithm source: http://bl.ocks.org/llb4ll/8709363
d3.kNearestNeighbors = function() {
var points = [],
nodes,
data,
extent = null,
k = 1,
quadtree = d3.geom.quadtree(),
x = function(d) { return d[0]; },
y = function(d) { return d[1]; };
function findNearest(point) {
// TODO: make this more efficient by not recalculating quadtree at
// each call of findNearest()
// Extract points from the data array
points = data.map(function(d) { return [x(d), y(d)]; });
// Add quadtree info to the points
nodes = quadtreeify(points);
// Flag k-nearest points by adding `selected` property set to `true`
kNearest(new Array(nodes), [], point);
// Return nearest points along with indices from origianl `data` array
return points
.map(function(d, i) {
var datum = [d[0], d[1]];
datum.i = i;
return d.selected ? datum : null;
})
.filter(function(d) { return d !== null; });
}
findNearest.extent = function(_) {
if (!arguments.length) return extent;
extent = _;
quadtree.extent(extent);
return findNearest;
};
findNearest.data = function(_) {
if (!arguments.length) return data;
data = _;
return findNearest;
};
findNearest.k = function(_) {
if (!arguments.length) return k;
k = _;
return findNearest;
};
findNearest.x = function(_) {
if (!arguments.length) return x;
x = _;
return findNearest;
};
findNearest.y = function(_) {
if (!arguments.length) return y;
y = _;
return findNearest;
};
return findNearest;
// Add quadtree information to each point (i.e., rectangles, depth, ...)
function quadtreeify(points) {
var nodes = quadtree(points);
nodes.depth = 0;
nodes.visit(function(node, x1, y1, x2, y2) {
node.x1 = x1;
node.y1 = y1;
node.x2 = x2;
node.y2 = y2;
for (var i = 0; i < 4; i++) {
if (node.nodes[i]) node.nodes[i].depth = node.depth + 1;
}
});
return nodes;
}
// calculate the euclidean distance of two points with coordinates a(ax, ay) and b(bx, by)
function euclideanDistance(ax, ay, bx, by) {
return Math.sqrt(Math.pow(ax - bx, 2) + Math.pow(ay - by, 2));
}
// calculate minimum distance between search point rectangles
function minDistance(x, y, x1, y1, x2, y2) {
var dx1 = x - x1,
dx2 = x - x2,
dy1 = y - y1,
dy2 = y - y2;
// x is between x1 and x2
if (dx1 * dx2 < 0) {
// (x, y) is inside the rectangle
if (dy1 * dy2 < 0) {
return 0; // return 0 as a point in the rectangle
}
return Math.min(Math.abs(dy1), Math.abs(dy2));
}
// y is between y1 and y2 (and not inside rectangle)
if (dy1 * dy2 < 0) {
return Math.min(Math.abs(dx1), Math.abs(dx2));
}
return Math.min(
Math.min(euclideanDistance(x,y,x1,y1), euclideanDistance(x,y,x2,y2)),
Math.min(euclideanDistance(x,y,x1,y2), euclideanDistance(x,y,x2,y1))
);
}
// Find the nodes within the specified rectangle (used recursively)
function kNearest(bestQueue, resultQueue, point) {
var x = point[0],
y = point[1];
// sort children according to their minDistance/euclideanDistance to search point
bestQueue.sort(function(a, b) {
// add minDistance to nodes if not there already
[a, b].forEach(function(d) {
if (d.minDistance === undefined) {
d.scanned = true;
if (d.leaf) {
d.point.scanned = true;
d.minDistance = euclideanDistance(x, y, d.x, d.y);
}
else {
d.minDistance = minDistance(x, y, d.x1, d.y1, d.x2, d.y2);
}
}
});
return b.minDistance - a.minDistance;
});
// add nearest leafs (if any)
for (var i = bestQueue.length - 1; i >= 0; i--) {
var elem = bestQueue[i];
if (elem.leaf) {
elem.point.selected = true;
bestQueue.pop();
resultQueue.push(elem);
} else { break; }
if (resultQueue.length >= k) break;
}
// check if enough points found
if (resultQueue.length >= k || bestQueue.length == 0) { return; }
else {
// ...otherwise add child nodes to bestQueue and recurse
var visitedNode = bestQueue.pop();
visitedNode.visited = true;
visitedNode.nodes.forEach(function(d) {
bestQueue.push(d);
});
kNearest(bestQueue, resultQueue, point);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment