Fil · December 26, 2016 19:35
diff --git a/.block b/.block
 license: mit
diff --git a/README.md b/README.md
diff --git a/cities.csv b/cities.csv
diff --git a/index.html b/index.html
 <!DOCTYPE html>

 <head>
    <meta charset="utf-8">
    <script src="https://d3js.org/d3.v4.min.js"></script>

    <style>
        body {
            margin: 0;
            position: fixed;
            top: 0;
            right: 0;
            bottom: 0;
            left: 0;
          font-family: monospace;
        }
    </style>
 </head>

 <body>
 <script>
    const width = 960,
        height = 500,
        margin = { top: 40, bottom: 40, left: 140, right: 140 },
        scalepop = d3.scaleSqrt().domain([0, 100000]).range([0.2, 28]),
        scalecountry = d3.scaleOrdinal(
          d3.merge([d3.schemeCategory20b, d3.schemeCategory20c])),
        centerx = d3.scaleLinear()
        .range([margin.left, width - margin.right]),
        centery = d3.scaleLinear()
        .range([margin.top, height - margin.bottom]);

    d3.csv('cities.csv', function (cities) {

        const data = cities
            .sort((a, b) => d3.descending(+a[2015], +b[2015]))
            .map((d, i) => {
              return {
                lon: +d.Longitude,
                lat: +d.Latitude,
                name: d['Urban Agglomeration'],
                r: scalepop(+d[2015]),
                //color: scalecountry(+d['Country Code'])
              };
            })
            .slice(0, 800);

      /*
      const canvas = d3.select("body").append("canvas")
            .attr("width", width)
            .attr("height", height);
       */
        const svg = d3.select("body").append("svg")
            .attr("width", width)
            .attr("height", height);

      
        // pos is the array of positions that will be updated by the tsne worker
        // start with the geographic coordinates as is (plate-carrée)
        // random or [0,0] is fine too
        let pos = data.map(d => [d.lon, -d.lat]); 
      
      const forcetsne = d3.forceSimulation(
          data.map(d => (d.x = width / 2, d.y = height / 2, d))
        )
            .alphaDecay(0.005) 
            .alpha(0.1)
            .force('tsne', function (alpha) {

                centerx.domain(d3.extent(pos.map(d => d[0]))); 
                centery.domain(d3.extent(pos.map(d => d[1])));

                data.forEach((d, i) => {
                    d.vx += alpha * (centerx(pos[i][0]) - d.x);
                    d.vy += alpha * (centery(pos[i][1]) - d.y);
                    d.color = scalecountry(pos[i][2]);
                });
            })
            .force('collide', d3.forceCollide().radius(d => 1.5 + d.r))
            .on('tick', function () {

              // drawcanvas(canvas, data);
              drawsvg(svg, data);

            });

        function drawcanvas(canvas, nodes) {
            let context = canvas.node().getContext("2d");
            context.clearRect(0, 0, width, width);

            for (var i = 0, n = nodes.length; i < n; ++i) {
                var node = nodes[i];
                context.beginPath();
                context.moveTo(node.x, node.y);
                context.arc(node.x, node.y, node.r, 0, 2 * Math.PI);
                context.lineWidth = 0.5;
                context.fillStyle = node.color;
                context.fill();
            }
        }

        function drawsvg(svg, nodes) {
          const g = svg.selectAll('g.city')
          .data(nodes);
          
          var enter = g.enter().append('g').classed('city', true);

          enter.append('circle')
          .attr('r', d => d.r)
          //.attr('fill', d => d.color)
          .append('title')
          .text(d => d.name); 
          
          enter
          .filter(d => d.r > 6)
          .append('text')
          .attr('fill', 'white')
          .style('font-size', d => d.r > 9 ? '12px' : '9px')
          .attr('text-anchor', 'middle')
          .attr('dominant-baseline', 'middle')
          .attr('pointer-events', 'none')
          .text(d => d.name.substring(0,2));
          
          g
          .attr('fill', d => d.color)
          .attr('transform', d => `translate(${d.x},${d.y})`);

        }

      
 d3.queue()
    .defer(d3.text, 'tsne.js')
    .defer(d3.text, 'worker.js')
    .awaitAll(function (err, scripts) {

    const worker = new Worker(
      window.URL.createObjectURL(
        new Blob(scripts, {
          type: "text/javascript"
        })
      )
    );
  
  worker.postMessage({
      maxIter: 10,
      dim: 2,
      perplexity: 30.0,
      data: data
  });

  worker.onmessage = function (e) {
    if (e.data.log) console.log.apply(this, e.data.log);
    if (e.data.pos) pos = e.data.pos;

    if (e.data.done && e.data.done < 10000  && e.data.cost > 1e-2) {
      worker.postMessage({
            maxIter: e.data.done + 10,
      });
    }
  };
 });
      
    });
 </script>
 </body>
diff --git a/thumbnail.png b/thumbnail.png
diff --git a/tsne.js b/tsne.js
 // create main global object
 var tsnejs = tsnejs || { REVISION: 'ALPHA' };

 (function(global) {
  "use strict";

  // utility function
  var assert = function(condition, message) {
    if (!condition) { throw message || "Assertion failed"; }
  }

  // syntax sugar
  var getopt = function(opt, field, defaultval) {
    if(opt.hasOwnProperty(field)) {
      return opt[field];
    } else {
      return defaultval;
    }
  }

  // return 0 mean unit standard deviation random number
  var return_v = false;
  var v_val = 0.0;
  var gaussRandom = function() {
    if(return_v) { 
      return_v = false;
      return v_val; 
    }
    var u = 2*Math.random()-1;
    var v = 2*Math.random()-1;
    var r = u*u + v*v;
    if(r == 0 || r > 1) return gaussRandom();
    var c = Math.sqrt(-2*Math.log(r)/r);
    v_val = v*c; // cache this for next function call for efficiency
    return_v = true;
    return u*c;
  }

  // return random normal number
  var randn = function(mu, std){ return mu+gaussRandom()*std; }

  // utilitity that creates contiguous vector of zeros of size n
  var zeros = function(n) {
    if(typeof(n)==='undefined' || isNaN(n)) { return []; }
    if(typeof ArrayBuffer === 'undefined') {
      // lacking browser support
      var arr = new Array(n);
      for(var i=0;i<n;i++) { arr[i]= 0; }
      return arr;
    } else {
      return new Float64Array(n); // typed arrays are faster
    }
  }

  // utility that returns 2d array filled with random numbers
  // or with value s, if provided
  var randn2d = function(n,d,s) {
    var uses = typeof s !== 'undefined';
    var x = [];
    for(var i=0;i<n;i++) {
      var xhere = [];
      for(var j=0;j<d;j++) { 
        if(uses) {
          xhere.push(s); 
        } else {
          xhere.push(randn(0.0, 1e-4)); 
        }
      }
      x.push(xhere);
    }
    return x;
  }

  // compute L2 distance between two vectors
  var L2 = function(x1, x2) {
    var D = x1.length;
    var d = 0;
    for(var i=0;i<D;i++) { 
      var x1i = x1[i];
      var x2i = x2[i];
      d += (x1i-x2i)*(x1i-x2i);
    }
    return d;
  }

  // compute pairwise distance in all vectors in X
  var xtod = function(X) {
    var N = X.length;
    var dist = zeros(N * N); // allocate contiguous array
    for(var i=0;i<N;i++) {
      for(var j=i+1;j<N;j++) {
        var d = L2(X[i], X[j]);
        dist[i*N+j] = d;
        dist[j*N+i] = d;
      }
    }
    return dist;
  }

  // compute (p_{i|j} + p_{j|i})/(2n)
  var d2p = function(D, perplexity, tol) {
    var Nf = Math.sqrt(D.length); // this better be an integer
    var N = Math.floor(Nf);
    assert(N === Nf, "D should have square number of elements.");
    var Htarget = Math.log(perplexity); // target entropy of distribution
    var P = zeros(N * N); // temporary probability matrix

    var prow = zeros(N); // a temporary storage compartment
    for(var i=0;i<N;i++) {
      var betamin = -Infinity;
      var betamax = Infinity;
      var beta = 1; // initial value of precision
      var done = false;
      var maxtries = 50;

      // perform binary search to find a suitable precision beta
      // so that the entropy of the distribution is appropriate
      var num = 0;
      while(!done) {
        //debugger;

        // compute entropy and kernel row with beta precision
        var psum = 0.0;
        for(var j=0;j<N;j++) {
          var pj = Math.exp(- D[i*N+j] * beta);
          if(i===j) { pj = 0; } // we dont care about diagonals
          prow[j] = pj;
          psum += pj;
        }
        // normalize p and compute entropy
        var Hhere = 0.0;
        for(var j=0;j<N;j++) {
          if(psum == 0) {
             var pj = 0;
          } else {
             var pj = prow[j] / psum;
          }
          prow[j] = pj;
          if(pj > 1e-7) Hhere -= pj * Math.log(pj);
        }

        // adjust beta based on result
        if(Hhere > Htarget) {
          // entropy was too high (distribution too diffuse)
          // so we need to increase the precision for more peaky distribution
          betamin = beta; // move up the bounds
          if(betamax === Infinity) { beta = beta * 2; }
          else { beta = (beta + betamax) / 2; }

        } else {
          // converse case. make distrubtion less peaky
          betamax = beta;
          if(betamin === -Infinity) { beta = beta / 2; }
          else { beta = (beta + betamin) / 2; }
        }

        // stopping conditions: too many tries or got a good precision
        num++;
        if(Math.abs(Hhere - Htarget) < tol) { done = true; }
        if(num >= maxtries) { done = true; }
      }

      // console.log('data point ' + i + ' gets precision ' + beta + ' after ' + num + ' binary search steps.');
      // copy over the final prow to P at row i
      for(var j=0;j<N;j++) { P[i*N+j] = prow[j]; }

    } // end loop over examples i

    // symmetrize P and normalize it to sum to 1 over all ij
    var Pout = zeros(N * N);
    var N2 = N*2;
    for(var i=0;i<N;i++) {
      for(var j=0;j<N;j++) {
        Pout[i*N+j] = Math.max((P[i*N+j] + P[j*N+i])/N2, 1e-100);
      }
    }

    return Pout;
  }

  // helper function
  function sign(x) { return x > 0 ? 1 : x < 0 ? -1 : 0; }

  var tSNE = function(opt) {
    var opt = opt || {};
    this.perplexity = getopt(opt, "perplexity", 30); // effective number of nearest neighbors
    this.dim = getopt(opt, "dim", 2); // by default 2-D tSNE
    this.epsilon = getopt(opt, "epsilon", 10); // learning rate

    this.iter = 0;
  }

  tSNE.prototype = {

    // this function takes a set of high-dimensional points
    // and creates matrix P from them using gaussian kernel
    initDataRaw: function(X) {
      var N = X.length;
      var D = X[0].length;
      assert(N > 0, " X is empty? You must have some data!");
      assert(D > 0, " X[0] is empty? Where is the data?");
      var dists = xtod(X); // convert X to distances using gaussian kernel
      this.P = d2p(dists, this.perplexity, 1e-4); // attach to object
      this.N = N; // back up the size of the dataset
      this.initSolution(); // refresh this
    },

    // this function takes a given distance matrix and creates
    // matrix P from them.
    // D is assumed to be provided as a list of lists, and should be symmetric
    initDataDist: function(D) {
      var N = D.length;
      assert(N > 0, " X is empty? You must have some data!");
      // convert D to a (fast) typed array version
      var dists = zeros(N * N); // allocate contiguous array
      for(var i=0;i<N;i++) {
        for(var j=i+1;j<N;j++) {
          var d = D[i][j];
          dists[i*N+j] = d;
          dists[j*N+i] = d;
        }
      }
      this.P = d2p(dists, this.perplexity, 1e-4);
      this.N = N;
      this.initSolution(); // refresh this
    },

    // (re)initializes the solution to random
    initSolution: function() {
      // generate random solution to t-SNE
      this.Y = randn2d(this.N, this.dim); // the solution
      this.gains = randn2d(this.N, this.dim, 1.0); // step gains to accelerate progress in unchanging directions
      this.ystep = randn2d(this.N, this.dim, 0.0); // momentum accumulator
      this.iter = 0;
    },

    // return pointer to current solution
    getSolution: function() {
      return this.Y;
    },

    // perform a single step of optimization to improve the embedding
    step: function() {
      this.iter += 1;
      var N = this.N;

      var cg = this.costGrad(this.Y); // evaluate gradient
      var cost = cg.cost;
      var grad = cg.grad;

      // perform gradient step
      var ymean = zeros(this.dim);
      for(var i=0;i<N;i++) {
        for(var d=0;d<this.dim;d++) {
          var gid = grad[i][d];
          var sid = this.ystep[i][d];
          var gainid = this.gains[i][d];

          // compute gain update
          var newgain = sign(gid) === sign(sid) ? gainid * 0.8 : gainid + 0.2;
          if(newgain < 0.01) newgain = 0.01; // clamp
          this.gains[i][d] = newgain; // store for next turn

          // compute momentum step direction
          var momval = this.iter < 250 ? 0.5 : 0.8;
          var newsid = momval * sid - this.epsilon * newgain * grad[i][d];
          this.ystep[i][d] = newsid; // remember the step we took

          // step!
          this.Y[i][d] += newsid; 

          ymean[d] += this.Y[i][d]; // accumulate mean so that we can center later
        }
      }

      // reproject Y to be zero mean
      for(var i=0;i<N;i++) {
        for(var d=0;d<this.dim;d++) {
          this.Y[i][d] -= ymean[d]/N;
        }
      }

      //if(this.iter%100===0) console.log('iter ' + this.iter + ', cost: ' + cost);
      return cost; // return current cost
    },

    // for debugging: gradient check
    debugGrad: function() {
      var N = this.N;

      var cg = this.costGrad(this.Y); // evaluate gradient
      var cost = cg.cost;
      var grad = cg.grad;

      var e = 1e-5;
      for(var i=0;i<N;i++) {
        for(var d=0;d<this.dim;d++) {
          var yold = this.Y[i][d];

          this.Y[i][d] = yold + e;
          var cg0 = this.costGrad(this.Y);

          this.Y[i][d] = yold - e;
          var cg1 = this.costGrad(this.Y);
          
          var analytic = grad[i][d];
          var numerical = (cg0.cost - cg1.cost) / ( 2 * e );
          console.log(i + ',' + d + ': gradcheck analytic: ' + analytic + ' vs. numerical: ' + numerical);

          this.Y[i][d] = yold;
        }
      }
    },

    // return cost and gradient, given an arrangement
    costGrad: function(Y) {
      var N = this.N;
      var dim = this.dim; // dim of output space
      var P = this.P;

      var pmul = this.iter < 100 ? 4 : 1; // trick that helps with local optima

      // compute current Q distribution, unnormalized first
      var Qu = zeros(N * N);
      var qsum = 0.0;
      for(var i=0;i<N;i++) {
        for(var j=i+1;j<N;j++) {
          var dsum = 0.0;
          for(var d=0;d<dim;d++) {
            var dhere = Y[i][d] - Y[j][d];
            dsum += dhere * dhere;
          }
          var qu = 1.0 / (1.0 + dsum); // Student t-distribution
          Qu[i*N+j] = qu;
          Qu[j*N+i] = qu;
          qsum += 2 * qu;
        }
      }
      // normalize Q distribution to sum to 1
      var NN = N*N;
      var Q = zeros(NN);
      for(var q=0;q<NN;q++) { Q[q] = Math.max(Qu[q] / qsum, 1e-100); }

      var cost = 0.0;
      var grad = [];
      for(var i=0;i<N;i++) {
        var gsum = new Array(dim); // init grad for point i
        for(var d=0;d<dim;d++) { gsum[d] = 0.0; }
        for(var j=0;j<N;j++) {
          cost += - P[i*N+j] * Math.log(Q[i*N+j]); // accumulate cost (the non-constant portion at least...)
          var premult = 4 * (pmul * P[i*N+j] - Q[i*N+j]) * Qu[i*N+j];
          for(var d=0;d<dim;d++) {
            gsum[d] += premult * (Y[i][d] - Y[j][d]);
          }
        }
        grad.push(gsum);
      }

      return {cost: cost, grad: grad};
    }
  }

  global.tSNE = tSNE; // export tSNE class
 })(tsnejs);


 // export the library to window, or to module in nodejs
 (function(lib) {
  "use strict";
  if (typeof module === "undefined" || typeof module.exports === "undefined") {
    if (typeof window == "object")
        window.tsnejs = lib; // in ordinary browser attach library to window
  } else {
    module.exports = lib; // in nodejs
  }
 })(tsnejs);
diff --git a/worker.js b/worker.js
 importScripts('https://d3js.org/d3.v4.min.js');
 // in worker.onmessage add : `if (e.data.log) console.log.apply(this, e.data.log);`
 console.log = function() {
  self.postMessage({ log: [...arguments] });
 }

  function levenshtein(a, b) {
  var t = [], u, i, j, m = a.length, n = b.length;
  if (!m) { return n; }
  if (!n) { return m; }
  for (j = 0; j <= n; j++) { t[j] = j; }
  for (i = 1; i <= m; i++) {
    for (u = [i], j = 1; j <= n; j++) {
      u[j] = a[i - 1] === b[j - 1] ? t[j - 1] : Math.min(t[j - 1], t[j], u[j - 1]) + 1;
    } t = u;
  } return u[n];
 }

 let iterations = 0,
    model;
 let K=18,
      centers = d3.range(K).map(i => [ 0, 0 ]);

 self.onmessage = function(e) {
  const msg = e.data,
        data = msg.data,
        maxIter = msg.maxIter || 500,
        perplexity = msg.perplexity || 30;

  let dists = msg.dist;
  if (data && !dists){
    let now = performance.now();
    dists = data.map(
      a => data.map(
        b => d3.range(4).map(n => levenshtein(a.name.substring(0,n).toLowerCase(), b.name.substring(0,n).toLowerCase()) * Math.pow(1/2, n))
                      .reduce((a,b)=>a+b, 0)
      )
    );
    console.log('computed', data.length * data.length,'distances in', Math.round(performance.now()-now),'ms');
  }
  if (dists) {
      model = new tsnejs.tSNE({
          dim: msg.dim,
          perplexity: perplexity,
      });
      model.initDataDist(dists);
  }

  let startpos = model.getSolution().map(d=>d.slice()),
      pos;

  
 
  
  while (iterations++ < maxIter) {
       // every time you call this, solution gets better
       model.step();
    
    pos = model.getSolution();

    // each K-center moves to the barycenter of its group
    var m = pos[Math.floor(Math.random() * pos.length)];

    centers.forEach((c,i) => {
      let p = pos.filter(d => d[2] == i);
      if (p.length < 10) c = m;
      else {
            c[0] = d3.mean(p.map(d => d[0])); 
            c[1] = d3.mean(p.map(d => d[1])); 
      }
          });
    // group each point according to its closest K-center
   pos.forEach(d => { 
       d[2] = d3.scan(centers.map(c => {
               var dx = d[0] - c[0],
                   dy = d[1] - c[1];
               return  (dx*dx + dy*dy);
             }));
    });
   
    
    
       // Y is an array of 2-D points that you can plot
       self.postMessage({
         iterations: iterations - 1,
         pos: pos,
         //log: [ 'step', iterations-1 ]
       });

   }

  let cost = startpos
       .map((d,i) => sqdist(d, pos[i]))
       .reduce ((a,b) => a + b, 0) / pos.length / Math.max(...pos.map(d => Math.abs(d[0]) + Math.abs(d[1])));
  
   self.postMessage({
     done: iterations - 1,
     cost: cost,
     log: [ 'done', iterations - 1 ]
   });
 };

 function sqdist (a,b) {
  let d = [a[0] - b[0], a[1] - b[1]].map(Math.abs);
  return Math.max(d[0], d[1]); 
 }
	<!DOCTYPE html>

	<head>
	<meta charset="utf-8">
	<script src="https://d3js.org/d3.v4.min.js"></script>

	<style>
	body {
	margin: 0;
	position: fixed;
	top: 0;
	right: 0;
	bottom: 0;
	left: 0;
	font-family: monospace;
	}
	</style>
	</head>

	<body>
	<script>
	const width = 960,
	height = 500,
	margin = { top: 40, bottom: 40, left: 140, right: 140 },
	scalepop = d3.scaleSqrt().domain([0, 100000]).range([0.2, 28]),
	scalecountry = d3.scaleOrdinal(
	d3.merge([d3.schemeCategory20b, d3.schemeCategory20c])),
	centerx = d3.scaleLinear()
	.range([margin.left, width - margin.right]),
	centery = d3.scaleLinear()
	.range([margin.top, height - margin.bottom]);

	d3.csv('cities.csv', function (cities) {

	const data = cities
	.sort((a, b) => d3.descending(+a[2015], +b[2015]))
	.map((d, i) => {
	return {
	lon: +d.Longitude,
	lat: +d.Latitude,
	name: d['Urban Agglomeration'],
	r: scalepop(+d[2015]),
	//color: scalecountry(+d['Country Code'])
	};
	})
	.slice(0, 800);

	/*
	const canvas = d3.select("body").append("canvas")
	.attr("width", width)
	.attr("height", height);
	*/
	const svg = d3.select("body").append("svg")
	.attr("width", width)
	.attr("height", height);


	// pos is the array of positions that will be updated by the tsne worker
	// start with the geographic coordinates as is (plate-carrée)
	// random or [0,0] is fine too
	let pos = data.map(d => [d.lon, -d.lat]);

	const forcetsne = d3.forceSimulation(
	data.map(d => (d.x = width / 2, d.y = height / 2, d))
	)
	.alphaDecay(0.005)
	.alpha(0.1)
	.force('tsne', function (alpha) {

	centerx.domain(d3.extent(pos.map(d => d[0])));
	centery.domain(d3.extent(pos.map(d => d[1])));

	data.forEach((d, i) => {
	d.vx += alpha * (centerx(pos[i][0]) - d.x);
	d.vy += alpha * (centery(pos[i][1]) - d.y);
	d.color = scalecountry(pos[i][2]);
	});
	})
	.force('collide', d3.forceCollide().radius(d => 1.5 + d.r))
	.on('tick', function () {

	// drawcanvas(canvas, data);
	drawsvg(svg, data);

	});

	function drawcanvas(canvas, nodes) {
	let context = canvas.node().getContext("2d");
	context.clearRect(0, 0, width, width);

	for (var i = 0, n = nodes.length; i < n; ++i) {
	var node = nodes[i];
	context.beginPath();
	context.moveTo(node.x, node.y);
	context.arc(node.x, node.y, node.r, 0, 2 * Math.PI);
	context.lineWidth = 0.5;
	context.fillStyle = node.color;
	context.fill();
	}
	}

	function drawsvg(svg, nodes) {
	const g = svg.selectAll('g.city')
	.data(nodes);

	var enter = g.enter().append('g').classed('city', true);

	enter.append('circle')
	.attr('r', d => d.r)
	//.attr('fill', d => d.color)
	.append('title')
	.text(d => d.name);

	enter
	.filter(d => d.r > 6)
	.append('text')
	.attr('fill', 'white')
	.style('font-size', d => d.r > 9 ? '12px' : '9px')
	.attr('text-anchor', 'middle')
	.attr('dominant-baseline', 'middle')
	.attr('pointer-events', 'none')
	.text(d => d.name.substring(0,2));

	g
	.attr('fill', d => d.color)
	.attr('transform', d => `translate(${d.x},${d.y})`);

	}


	d3.queue()
	.defer(d3.text, 'tsne.js')
	.defer(d3.text, 'worker.js')
	.awaitAll(function (err, scripts) {

	const worker = new Worker(
	window.URL.createObjectURL(
	new Blob(scripts, {
	type: "text/javascript"
	})
	)
	);

	worker.postMessage({
	maxIter: 10,
	dim: 2,
	perplexity: 30.0,
	data: data
	});

	worker.onmessage = function (e) {
	if (e.data.log) console.log.apply(this, e.data.log);
	if (e.data.pos) pos = e.data.pos;

	if (e.data.done && e.data.done < 10000 && e.data.cost > 1e-2) {
	worker.postMessage({
	maxIter: e.data.done + 10,
	});
	}
	};
	});

	});
	</script>
	</body>
	// create main global object
	var tsnejs = tsnejs \|\| { REVISION: 'ALPHA' };

	(function(global) {
	"use strict";

	// utility function
	var assert = function(condition, message) {
	if (!condition) { throw message \|\| "Assertion failed"; }
	}

	// syntax sugar
	var getopt = function(opt, field, defaultval) {
	if(opt.hasOwnProperty(field)) {
	return opt[field];
	} else {
	return defaultval;
	}
	}

	// return 0 mean unit standard deviation random number
	var return_v = false;
	var v_val = 0.0;
	var gaussRandom = function() {
	if(return_v) {
	return_v = false;
	return v_val;
	}
	var u = 2*Math.random()-1;
	var v = 2*Math.random()-1;
	var r = uu + vv;
	if(r == 0 \|\| r > 1) return gaussRandom();
	var c = Math.sqrt(-2*Math.log(r)/r);
	v_val = v*c; // cache this for next function call for efficiency
	return_v = true;
	return u*c;
	}

	// return random normal number
	var randn = function(mu, std){ return mu+gaussRandom()*std; }

	// utilitity that creates contiguous vector of zeros of size n
	var zeros = function(n) {
	if(typeof(n)==='undefined' \|\| isNaN(n)) { return []; }
	if(typeof ArrayBuffer === 'undefined') {
	// lacking browser support
	var arr = new Array(n);
	for(var i=0;i<n;i++) { arr[i]= 0; }
	return arr;
	} else {
	return new Float64Array(n); // typed arrays are faster
	}
	}

	// utility that returns 2d array filled with random numbers
	// or with value s, if provided
	var randn2d = function(n,d,s) {
	var uses = typeof s !== 'undefined';
	var x = [];
	for(var i=0;i<n;i++) {
	var xhere = [];
	for(var j=0;j<d;j++) {
	if(uses) {
	xhere.push(s);
	} else {
	xhere.push(randn(0.0, 1e-4));
	}
	}
	x.push(xhere);
	}
	return x;
	}

	// compute L2 distance between two vectors
	var L2 = function(x1, x2) {
	var D = x1.length;
	var d = 0;
	for(var i=0;i<D;i++) {
	var x1i = x1[i];
	var x2i = x2[i];
	d += (x1i-x2i)*(x1i-x2i);
	}
	return d;
	}

	// compute pairwise distance in all vectors in X
	var xtod = function(X) {
	var N = X.length;
	var dist = zeros(N * N); // allocate contiguous array
	for(var i=0;i<N;i++) {
	for(var j=i+1;j<N;j++) {
	var d = L2(X[i], X[j]);
	dist[i*N+j] = d;
	dist[j*N+i] = d;
	}
	}
	return dist;
	}

	// compute (p_{i\|j} + p_{j\|i})/(2n)
	var d2p = function(D, perplexity, tol) {
	var Nf = Math.sqrt(D.length); // this better be an integer
	var N = Math.floor(Nf);
	assert(N === Nf, "D should have square number of elements.");
	var Htarget = Math.log(perplexity); // target entropy of distribution
	var P = zeros(N * N); // temporary probability matrix

	var prow = zeros(N); // a temporary storage compartment
	for(var i=0;i<N;i++) {
	var betamin = -Infinity;
	var betamax = Infinity;
	var beta = 1; // initial value of precision
	var done = false;
	var maxtries = 50;

	// perform binary search to find a suitable precision beta
	// so that the entropy of the distribution is appropriate
	var num = 0;
	while(!done) {
	//debugger;

	// compute entropy and kernel row with beta precision
	var psum = 0.0;
	for(var j=0;j<N;j++) {
	var pj = Math.exp(- D[iN+j] beta);
	if(i===j) { pj = 0; } // we dont care about diagonals
	prow[j] = pj;
	psum += pj;
	}
	// normalize p and compute entropy
	var Hhere = 0.0;
	for(var j=0;j<N;j++) {
	if(psum == 0) {
	var pj = 0;
	} else {
	var pj = prow[j] / psum;
	}
	prow[j] = pj;
	if(pj > 1e-7) Hhere -= pj * Math.log(pj);
	}

	// adjust beta based on result
	if(Hhere > Htarget) {
	// entropy was too high (distribution too diffuse)
	// so we need to increase the precision for more peaky distribution
	betamin = beta; // move up the bounds
	if(betamax === Infinity) { beta = beta * 2; }
	else { beta = (beta + betamax) / 2; }

	} else {
	// converse case. make distrubtion less peaky
	betamax = beta;
	if(betamin === -Infinity) { beta = beta / 2; }
	else { beta = (beta + betamin) / 2; }
	}

	// stopping conditions: too many tries or got a good precision
	num++;
	if(Math.abs(Hhere - Htarget) < tol) { done = true; }
	if(num >= maxtries) { done = true; }
	}

	// console.log('data point ' + i + ' gets precision ' + beta + ' after ' + num + ' binary search steps.');
	// copy over the final prow to P at row i
	for(var j=0;j<N;j++) { P[i*N+j] = prow[j]; }

	} // end loop over examples i

	// symmetrize P and normalize it to sum to 1 over all ij
	var Pout = zeros(N * N);
	var N2 = N*2;
	for(var i=0;i<N;i++) {
	for(var j=0;j<N;j++) {
	Pout[iN+j] = Math.max((P[iN+j] + P[j*N+i])/N2, 1e-100);
	}
	}

	return Pout;
	}

	// helper function
	function sign(x) { return x > 0 ? 1 : x < 0 ? -1 : 0; }

	var tSNE = function(opt) {
	var opt = opt \|\| {};
	this.perplexity = getopt(opt, "perplexity", 30); // effective number of nearest neighbors
	this.dim = getopt(opt, "dim", 2); // by default 2-D tSNE
	this.epsilon = getopt(opt, "epsilon", 10); // learning rate

	this.iter = 0;
	}

	tSNE.prototype = {

	// this function takes a set of high-dimensional points
	// and creates matrix P from them using gaussian kernel
	initDataRaw: function(X) {
	var N = X.length;
	var D = X[0].length;
	assert(N > 0, " X is empty? You must have some data!");
	assert(D > 0, " X[0] is empty? Where is the data?");
	var dists = xtod(X); // convert X to distances using gaussian kernel
	this.P = d2p(dists, this.perplexity, 1e-4); // attach to object
	this.N = N; // back up the size of the dataset
	this.initSolution(); // refresh this
	},

	// this function takes a given distance matrix and creates
	// matrix P from them.
	// D is assumed to be provided as a list of lists, and should be symmetric
	initDataDist: function(D) {
	var N = D.length;
	assert(N > 0, " X is empty? You must have some data!");
	// convert D to a (fast) typed array version
	var dists = zeros(N * N); // allocate contiguous array
	for(var i=0;i<N;i++) {
	for(var j=i+1;j<N;j++) {
	var d = D[i][j];
	dists[i*N+j] = d;
	dists[j*N+i] = d;
	}
	}
	this.P = d2p(dists, this.perplexity, 1e-4);
	this.N = N;
	this.initSolution(); // refresh this
	},

	// (re)initializes the solution to random
	initSolution: function() {
	// generate random solution to t-SNE
	this.Y = randn2d(this.N, this.dim); // the solution
	this.gains = randn2d(this.N, this.dim, 1.0); // step gains to accelerate progress in unchanging directions
	this.ystep = randn2d(this.N, this.dim, 0.0); // momentum accumulator
	this.iter = 0;
	},

	// return pointer to current solution
	getSolution: function() {
	return this.Y;
	},

	// perform a single step of optimization to improve the embedding
	step: function() {
	this.iter += 1;
	var N = this.N;

	var cg = this.costGrad(this.Y); // evaluate gradient
	var cost = cg.cost;
	var grad = cg.grad;

	// perform gradient step
	var ymean = zeros(this.dim);
	for(var i=0;i<N;i++) {
	for(var d=0;d<this.dim;d++) {
	var gid = grad[i][d];
	var sid = this.ystep[i][d];
	var gainid = this.gains[i][d];

	// compute gain update
	var newgain = sign(gid) === sign(sid) ? gainid * 0.8 : gainid + 0.2;
	if(newgain < 0.01) newgain = 0.01; // clamp
	this.gains[i][d] = newgain; // store for next turn

	// compute momentum step direction
	var momval = this.iter < 250 ? 0.5 : 0.8;
	var newsid = momval * sid - this.epsilon * newgain * grad[i][d];
	this.ystep[i][d] = newsid; // remember the step we took

	// step!
	this.Y[i][d] += newsid;

	ymean[d] += this.Y[i][d]; // accumulate mean so that we can center later
	}
	}

	// reproject Y to be zero mean
	for(var i=0;i<N;i++) {
	for(var d=0;d<this.dim;d++) {
	this.Y[i][d] -= ymean[d]/N;
	}
	}

	//if(this.iter%100===0) console.log('iter ' + this.iter + ', cost: ' + cost);
	return cost; // return current cost
	},

	// for debugging: gradient check
	debugGrad: function() {
	var N = this.N;

	var cg = this.costGrad(this.Y); // evaluate gradient
	var cost = cg.cost;
	var grad = cg.grad;

	var e = 1e-5;
	for(var i=0;i<N;i++) {
	for(var d=0;d<this.dim;d++) {
	var yold = this.Y[i][d];

	this.Y[i][d] = yold + e;
	var cg0 = this.costGrad(this.Y);

	this.Y[i][d] = yold - e;
	var cg1 = this.costGrad(this.Y);

	var analytic = grad[i][d];
	var numerical = (cg0.cost - cg1.cost) / ( 2 * e );
	console.log(i + ',' + d + ': gradcheck analytic: ' + analytic + ' vs. numerical: ' + numerical);

	this.Y[i][d] = yold;
	}
	}
	},

	// return cost and gradient, given an arrangement
	costGrad: function(Y) {
	var N = this.N;
	var dim = this.dim; // dim of output space
	var P = this.P;

	var pmul = this.iter < 100 ? 4 : 1; // trick that helps with local optima

	// compute current Q distribution, unnormalized first
	var Qu = zeros(N * N);
	var qsum = 0.0;
	for(var i=0;i<N;i++) {
	for(var j=i+1;j<N;j++) {
	var dsum = 0.0;
	for(var d=0;d<dim;d++) {
	var dhere = Y[i][d] - Y[j][d];
	dsum += dhere * dhere;
	}
	var qu = 1.0 / (1.0 + dsum); // Student t-distribution
	Qu[i*N+j] = qu;
	Qu[j*N+i] = qu;
	qsum += 2 * qu;
	}
	}
	// normalize Q distribution to sum to 1
	var NN = N*N;
	var Q = zeros(NN);
	for(var q=0;q<NN;q++) { Q[q] = Math.max(Qu[q] / qsum, 1e-100); }

	var cost = 0.0;
	var grad = [];
	for(var i=0;i<N;i++) {
	var gsum = new Array(dim); // init grad for point i
	for(var d=0;d<dim;d++) { gsum[d] = 0.0; }
	for(var j=0;j<N;j++) {
	cost += - P[iN+j] Math.log(Q[i*N+j]); // accumulate cost (the non-constant portion at least...)
	var premult = 4 * (pmul * P[iN+j] - Q[iN+j]) * Qu[i*N+j];
	for(var d=0;d<dim;d++) {
	gsum[d] += premult * (Y[i][d] - Y[j][d]);
	}
	}
	grad.push(gsum);
	}

	return {cost: cost, grad: grad};
	}
	}

	global.tSNE = tSNE; // export tSNE class
	})(tsnejs);


	// export the library to window, or to module in nodejs
	(function(lib) {
	"use strict";
	if (typeof module === "undefined" \|\| typeof module.exports === "undefined") {
	if (typeof window == "object")
	window.tsnejs = lib; // in ordinary browser attach library to window
	} else {
	module.exports = lib; // in nodejs
	}
	})(tsnejs);
	importScripts('https://d3js.org/d3.v4.min.js');
	// in worker.onmessage add : `if (e.data.log) console.log.apply(this, e.data.log);`
	console.log = function() {
	self.postMessage({ log: [...arguments] });
	}

	function levenshtein(a, b) {
	var t = [], u, i, j, m = a.length, n = b.length;
	if (!m) { return n; }
	if (!n) { return m; }
	for (j = 0; j <= n; j++) { t[j] = j; }
	for (i = 1; i <= m; i++) {
	for (u = [i], j = 1; j <= n; j++) {
	u[j] = a[i - 1] === b[j - 1] ? t[j - 1] : Math.min(t[j - 1], t[j], u[j - 1]) + 1;
	} t = u;
	} return u[n];
	}

	let iterations = 0,
	model;
	let K=18,
	centers = d3.range(K).map(i => [ 0, 0 ]);

	self.onmessage = function(e) {
	const msg = e.data,
	data = msg.data,
	maxIter = msg.maxIter \|\| 500,
	perplexity = msg.perplexity \|\| 30;

	let dists = msg.dist;
	if (data && !dists){
	let now = performance.now();
	dists = data.map(
	a => data.map(
	b => d3.range(4).map(n => levenshtein(a.name.substring(0,n).toLowerCase(), b.name.substring(0,n).toLowerCase()) * Math.pow(1/2, n))
	.reduce((a,b)=>a+b, 0)
	)
	);
	console.log('computed', data.length * data.length,'distances in', Math.round(performance.now()-now),'ms');
	}
	if (dists) {
	model = new tsnejs.tSNE({
	dim: msg.dim,
	perplexity: perplexity,
	});
	model.initDataDist(dists);
	}

	let startpos = model.getSolution().map(d=>d.slice()),
	pos;




	while (iterations++ < maxIter) {
	// every time you call this, solution gets better
	model.step();

	pos = model.getSolution();

	// each K-center moves to the barycenter of its group
	var m = pos[Math.floor(Math.random() * pos.length)];

	centers.forEach((c,i) => {
	let p = pos.filter(d => d[2] == i);
	if (p.length < 10) c = m;
	else {
	c[0] = d3.mean(p.map(d => d[0]));
	c[1] = d3.mean(p.map(d => d[1]));
	}
	});
	// group each point according to its closest K-center
	pos.forEach(d => {
	d[2] = d3.scan(centers.map(c => {
	var dx = d[0] - c[0],
	dy = d[1] - c[1];
	return (dxdx + dydy);
	}));
	});



	// Y is an array of 2-D points that you can plot
	self.postMessage({
	iterations: iterations - 1,
	pos: pos,
	//log: [ 'step', iterations-1 ]
	});

	}

	let cost = startpos
	.map((d,i) => sqdist(d, pos[i]))
	.reduce ((a,b) => a + b, 0) / pos.length / Math.max(...pos.map(d => Math.abs(d[0]) + Math.abs(d[1])));

	self.postMessage({
	done: iterations - 1,
	cost: cost,
	log: [ 'done', iterations - 1 ]
	});
	};

	function sqdist (a,b) {
	let d = [a[0] - b[0], a[1] - b[1]].map(Math.abs);
	return Math.max(d[0], d[1]);
	}