Skip to content

Instantly share code, notes, and snippets.

@Fil
Last active February 28, 2017 21:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Fil/ef49ae2aebcded9b41f414da3cff5c90 to your computer and use it in GitHub Desktop.
Save Fil/ef49ae2aebcded9b41f414da3cff5c90 to your computer and use it in GitHub Desktop.
Les couleurs du Décodex
license: mit
border: no
scrolling: no
height: 960

Une carte du décodex du Monde.

[Mise à jour: le serveur du Monde n'autorise plus l'accès de sites tiers au fichier de données https://www.lemonde.fr/webservice/decodex/updates, ce qui casse cette visualisation. Ne sachant s'il est autorisé de recopier le fichier ici, je m'abstiens.]

Le Monde vient de publier son “Décodex: nos conseils pour vérifier les informations qui circulent en ligne”.

Il s'agit d'une liste de sites, commentés et notés selon un schéma de couleurs.

  • vert: “fiable”
  • bleu: “satirique ou parodique”
  • orange: “régulièrement imprécis”
  • rouge: “diffuse régulièrement de fausses informations”
  • gris: “pas une source à proprement parler”

Nous les regroupons ici en fonction de certains mots-clés contenus dans les descriptifs, avec l'algorithme t-SNE.

<!DOCTYPE html>
<head>
<meta charset="utf-8">
<script src="https://d3js.org/d3.v4.min.js"></script>
<style>
body {
margin: 0;
position: fixed;
top: 0;
right: 0;
bottom: 0;
left: 0;
font-family: monospace;
}
#tip {
position: absolute;
top: -100;
left: 0;
z-index: 2;
background: white;
padding: 2px;
pointer-events: none;
max-width: 27em;
}
#legende {
position: absolute;
top: 0;
left: 0;
z-index: 1;
background: rgba(255,255,255,0.2);
padding: 2px;
pointer-events: none;
max-width: 27em;
}
</style>
</head>
<body>
<script>
const width = 820,
height = 820,
margin = { top: 60, bottom: 60, left: 60, right: 60 },
scalecountry = d3.scaleLinear().range(['#777', '#60c7ff', '#fe4823', 'orange','#00e00b',]).domain([0,1,2,3,4]),
centerx = d3.scaleLinear()
.range([0, width]),
centery = d3.scaleLinear()
.range([0, height]);
const svg = d3.select("body").append("svg")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append('g')
.attr('transform', `translate(${margin.left}, ${margin.top})`);
const tip = d3.select('body').append('div').attr('id', 'tip')
d3.json('https://www.lemonde.fr/webservice/decodex/updates', function (json) {
tags = [/\bblog/, /bpas une source/, /\b(complot|conspi)/, /\bdroite/, /\b(gauche)/, /\b(extrême[- ]droite|national[ei]|patri|antisé|migran|raciste|homophob|islam|ivg)/, /\bfran[çc][ea]/, /\baméri/, /\b(faux|fausse|erroné|fictif|mauvais|trompeu|condamn)/, /\b(satirique|parodi|blague|caricatur)/, /b(opinion|militant)/, /\brégional/, /\b(journal\b|presse|quotidien|magazine|hebdo)/, /\bspécialis/, /\b(peu fiable|sensation|racole|vérifi)/ ];
const data = d3.values(json.sites)
.map((d, i) => {
return {
lon: Math.random(),
lat: Math.random(),
name: d[2],
link: 'http://www.lemonde.fr/verification/source/' + d[3] + '/',
r: 12.5,
score: +d[0],
desc: d[1],
tags: tags.map(tag => !!d[1].toLowerCase().match(tag)),
color: scalecountry(+d[0])
};
});
// pos is the array of positions that will be updated by the tsne worker
// start with the geographic coordinates as is (plate-carrée)
// random or [0,0] is fine too
let pos = data.map(d => [d.lon, -d.lat]);
const forcetsne = d3.forceSimulation(
data.map(d => (d.x = width / 2, d.y = height / 2, d))
)
.alphaDecay(0.005)
.alpha(0.2)
.force('tsne', function (alpha) {
centerx.domain(d3.extent(pos.map(d => d[0])));
centery.domain(d3.extent(pos.map(d => d[1])));
data.forEach((d, i) => {
d.x += alpha * (centerx(pos[i][0]) - d.x);
d.y += alpha * (centery(pos[i][1]) - d.y);
});
})
.force('collide', d3.forceCollide().radius(d => 1 + d.r))
.on('tick', function () {
// drawcanvas(canvas, data);
drawsvg(svg, data);
});
function drawsvg(svg, nodes) {
const g = svg.selectAll('g.city')
.data(nodes);
var enter = g.enter().append('g').classed('city', true);
enter.append('circle')
.attr('r', d => d.r)
.attr('fill', d => d.color)
.on('click', d => {
window.open(d.link,'targetWindow');
})
.on('mouseover', function(d) {
tip
.style('left', Math.min(width - 200, d.x - 10)+'px')
.style('top', Math.min(height-100, d.y + margin.top + 10)+'px')
.html(d.name + '<br><small><em>Commentaire des Décodeurs:</em> ' + d.desc + '</small>')
});
enter
.filter(d => d.r > 7)
.append('text')
.attr('fill', 'white')
.style('font-size', '10px')
.attr('text-anchor', 'middle')
.attr('dominant-baseline', 'middle')
.attr('pointer-events', 'none')
.text(d => d.name.replace(/^(Le |La |L')/, '').substring(0,4));
g.attr('transform', d => `translate(${d.x},${d.y})`);
}
d3.queue()
.defer(d3.text, 'tsne.js')
.defer(d3.text, 'https://unpkg.com/d3-geo')
.defer(d3.text, 'worker.js')
.awaitAll(function (err, scripts) {
const worker = new Worker(
window.URL.createObjectURL(
new Blob(scripts, {
type: "text/javascript"
})
)
);
worker.postMessage({
maxIter: 10,
dim: 2,
perplexity: 30.0,
data: data
});
worker.onmessage = function (e) {
if (e.data.log) console.log.apply(this, e.data.log);
if (e.data.pos) pos = e.data.pos;
if (e.data.done && e.data.done < 10000 && e.data.cost > 1e-2) {
worker.postMessage({
maxIter: e.data.done + 10,
});
}
};
});
});
</script>
</body>
// create main global object
var tsnejs = tsnejs || { REVISION: 'ALPHA' };
(function(global) {
"use strict";
// utility function
var assert = function(condition, message) {
if (!condition) { throw message || "Assertion failed"; }
}
// syntax sugar
var getopt = function(opt, field, defaultval) {
if(opt.hasOwnProperty(field)) {
return opt[field];
} else {
return defaultval;
}
}
// return 0 mean unit standard deviation random number
var return_v = false;
var v_val = 0.0;
var gaussRandom = function() {
if(return_v) {
return_v = false;
return v_val;
}
var u = 2*Math.random()-1;
var v = 2*Math.random()-1;
var r = u*u + v*v;
if(r == 0 || r > 1) return gaussRandom();
var c = Math.sqrt(-2*Math.log(r)/r);
v_val = v*c; // cache this for next function call for efficiency
return_v = true;
return u*c;
}
// return random normal number
var randn = function(mu, std){ return mu+gaussRandom()*std; }
// utilitity that creates contiguous vector of zeros of size n
var zeros = function(n) {
if(typeof(n)==='undefined' || isNaN(n)) { return []; }
if(typeof ArrayBuffer === 'undefined') {
// lacking browser support
var arr = new Array(n);
for(var i=0;i<n;i++) { arr[i]= 0; }
return arr;
} else {
return new Float64Array(n); // typed arrays are faster
}
}
// utility that returns 2d array filled with random numbers
// or with value s, if provided
var randn2d = function(n,d,s) {
var uses = typeof s !== 'undefined';
var x = [];
for(var i=0;i<n;i++) {
var xhere = [];
for(var j=0;j<d;j++) {
if(uses) {
xhere.push(s);
} else {
xhere.push(randn(0.0, 1e-4));
}
}
x.push(xhere);
}
return x;
}
// compute L2 distance between two vectors
var L2 = function(x1, x2) {
var D = x1.length;
var d = 0;
for(var i=0;i<D;i++) {
var x1i = x1[i];
var x2i = x2[i];
d += (x1i-x2i)*(x1i-x2i);
}
return d;
}
// compute pairwise distance in all vectors in X
var xtod = function(X) {
var N = X.length;
var dist = zeros(N * N); // allocate contiguous array
for(var i=0;i<N;i++) {
for(var j=i+1;j<N;j++) {
var d = L2(X[i], X[j]);
dist[i*N+j] = d;
dist[j*N+i] = d;
}
}
return dist;
}
// compute (p_{i|j} + p_{j|i})/(2n)
var d2p = function(D, perplexity, tol) {
var Nf = Math.sqrt(D.length); // this better be an integer
var N = Math.floor(Nf);
assert(N === Nf, "D should have square number of elements.");
var Htarget = Math.log(perplexity); // target entropy of distribution
var P = zeros(N * N); // temporary probability matrix
var prow = zeros(N); // a temporary storage compartment
for(var i=0;i<N;i++) {
var betamin = -Infinity;
var betamax = Infinity;
var beta = 1; // initial value of precision
var done = false;
var maxtries = 50;
// perform binary search to find a suitable precision beta
// so that the entropy of the distribution is appropriate
var num = 0;
while(!done) {
//debugger;
// compute entropy and kernel row with beta precision
var psum = 0.0;
for(var j=0;j<N;j++) {
var pj = Math.exp(- D[i*N+j] * beta);
if(i===j) { pj = 0; } // we dont care about diagonals
prow[j] = pj;
psum += pj;
}
// normalize p and compute entropy
var Hhere = 0.0;
for(var j=0;j<N;j++) {
if(psum == 0) {
var pj = 0;
} else {
var pj = prow[j] / psum;
}
prow[j] = pj;
if(pj > 1e-7) Hhere -= pj * Math.log(pj);
}
// adjust beta based on result
if(Hhere > Htarget) {
// entropy was too high (distribution too diffuse)
// so we need to increase the precision for more peaky distribution
betamin = beta; // move up the bounds
if(betamax === Infinity) { beta = beta * 2; }
else { beta = (beta + betamax) / 2; }
} else {
// converse case. make distrubtion less peaky
betamax = beta;
if(betamin === -Infinity) { beta = beta / 2; }
else { beta = (beta + betamin) / 2; }
}
// stopping conditions: too many tries or got a good precision
num++;
if(Math.abs(Hhere - Htarget) < tol) { done = true; }
if(num >= maxtries) { done = true; }
}
// console.log('data point ' + i + ' gets precision ' + beta + ' after ' + num + ' binary search steps.');
// copy over the final prow to P at row i
for(var j=0;j<N;j++) { P[i*N+j] = prow[j]; }
} // end loop over examples i
// symmetrize P and normalize it to sum to 1 over all ij
var Pout = zeros(N * N);
var N2 = N*2;
for(var i=0;i<N;i++) {
for(var j=0;j<N;j++) {
Pout[i*N+j] = Math.max((P[i*N+j] + P[j*N+i])/N2, 1e-100);
}
}
return Pout;
}
// helper function
function sign(x) { return x > 0 ? 1 : x < 0 ? -1 : 0; }
var tSNE = function(opt) {
var opt = opt || {};
this.perplexity = getopt(opt, "perplexity", 30); // effective number of nearest neighbors
this.dim = getopt(opt, "dim", 2); // by default 2-D tSNE
this.epsilon = getopt(opt, "epsilon", 10); // learning rate
this.iter = 0;
}
tSNE.prototype = {
// this function takes a set of high-dimensional points
// and creates matrix P from them using gaussian kernel
initDataRaw: function(X) {
var N = X.length;
var D = X[0].length;
assert(N > 0, " X is empty? You must have some data!");
assert(D > 0, " X[0] is empty? Where is the data?");
var dists = xtod(X); // convert X to distances using gaussian kernel
this.P = d2p(dists, this.perplexity, 1e-4); // attach to object
this.N = N; // back up the size of the dataset
this.initSolution(); // refresh this
},
// this function takes a given distance matrix and creates
// matrix P from them.
// D is assumed to be provided as a list of lists, and should be symmetric
initDataDist: function(D) {
var N = D.length;
assert(N > 0, " X is empty? You must have some data!");
// convert D to a (fast) typed array version
var dists = zeros(N * N); // allocate contiguous array
for(var i=0;i<N;i++) {
for(var j=i+1;j<N;j++) {
var d = D[i][j];
dists[i*N+j] = d;
dists[j*N+i] = d;
}
}
this.P = d2p(dists, this.perplexity, 1e-4);
this.N = N;
this.initSolution(); // refresh this
},
// (re)initializes the solution to random
initSolution: function() {
// generate random solution to t-SNE
this.Y = randn2d(this.N, this.dim); // the solution
this.gains = randn2d(this.N, this.dim, 1.0); // step gains to accelerate progress in unchanging directions
this.ystep = randn2d(this.N, this.dim, 0.0); // momentum accumulator
this.iter = 0;
},
// return pointer to current solution
getSolution: function() {
return this.Y;
},
// perform a single step of optimization to improve the embedding
step: function() {
this.iter += 1;
var N = this.N;
var cg = this.costGrad(this.Y); // evaluate gradient
var cost = cg.cost;
var grad = cg.grad;
// perform gradient step
var ymean = zeros(this.dim);
for(var i=0;i<N;i++) {
for(var d=0;d<this.dim;d++) {
var gid = grad[i][d];
var sid = this.ystep[i][d];
var gainid = this.gains[i][d];
// compute gain update
var newgain = sign(gid) === sign(sid) ? gainid * 0.8 : gainid + 0.2;
if(newgain < 0.01) newgain = 0.01; // clamp
this.gains[i][d] = newgain; // store for next turn
// compute momentum step direction
var momval = this.iter < 250 ? 0.5 : 0.8;
var newsid = momval * sid - this.epsilon * newgain * grad[i][d];
this.ystep[i][d] = newsid; // remember the step we took
// step!
this.Y[i][d] += newsid;
ymean[d] += this.Y[i][d]; // accumulate mean so that we can center later
}
}
// reproject Y to be zero mean
for(var i=0;i<N;i++) {
for(var d=0;d<this.dim;d++) {
this.Y[i][d] -= ymean[d]/N;
}
}
//if(this.iter%100===0) console.log('iter ' + this.iter + ', cost: ' + cost);
return cost; // return current cost
},
// for debugging: gradient check
debugGrad: function() {
var N = this.N;
var cg = this.costGrad(this.Y); // evaluate gradient
var cost = cg.cost;
var grad = cg.grad;
var e = 1e-5;
for(var i=0;i<N;i++) {
for(var d=0;d<this.dim;d++) {
var yold = this.Y[i][d];
this.Y[i][d] = yold + e;
var cg0 = this.costGrad(this.Y);
this.Y[i][d] = yold - e;
var cg1 = this.costGrad(this.Y);
var analytic = grad[i][d];
var numerical = (cg0.cost - cg1.cost) / ( 2 * e );
console.log(i + ',' + d + ': gradcheck analytic: ' + analytic + ' vs. numerical: ' + numerical);
this.Y[i][d] = yold;
}
}
},
// return cost and gradient, given an arrangement
costGrad: function(Y) {
var N = this.N;
var dim = this.dim; // dim of output space
var P = this.P;
var pmul = this.iter < 100 ? 4 : 1; // trick that helps with local optima
// compute current Q distribution, unnormalized first
var Qu = zeros(N * N);
var qsum = 0.0;
for(var i=0;i<N;i++) {
for(var j=i+1;j<N;j++) {
var dsum = 0.0;
for(var d=0;d<dim;d++) {
var dhere = Y[i][d] - Y[j][d];
dsum += dhere * dhere;
}
var qu = 1.0 / (1.0 + dsum); // Student t-distribution
Qu[i*N+j] = qu;
Qu[j*N+i] = qu;
qsum += 2 * qu;
}
}
// normalize Q distribution to sum to 1
var NN = N*N;
var Q = zeros(NN);
for(var q=0;q<NN;q++) { Q[q] = Math.max(Qu[q] / qsum, 1e-100); }
var cost = 0.0;
var grad = [];
for(var i=0;i<N;i++) {
var gsum = new Array(dim); // init grad for point i
for(var d=0;d<dim;d++) { gsum[d] = 0.0; }
for(var j=0;j<N;j++) {
cost += - P[i*N+j] * Math.log(Q[i*N+j]); // accumulate cost (the non-constant portion at least...)
var premult = 4 * (pmul * P[i*N+j] - Q[i*N+j]) * Qu[i*N+j];
for(var d=0;d<dim;d++) {
gsum[d] += premult * (Y[i][d] - Y[j][d]);
}
}
grad.push(gsum);
}
return {cost: cost, grad: grad};
}
}
global.tSNE = tSNE; // export tSNE class
})(tsnejs);
// export the library to window, or to module in nodejs
(function(lib) {
"use strict";
if (typeof module === "undefined" || typeof module.exports === "undefined") {
if (typeof window == "object")
window.tsnejs = lib; // in ordinary browser attach library to window
} else {
module.exports = lib; // in nodejs
}
})(tsnejs);
// in worker.onmessage add : `if (e.data.log) console.log.apply(this, e.data.log);`
console.log = function() {
self.postMessage({ log: [...arguments] });
}
function distance(a, b) {
return a
.map((_,i) => b[i] != a[i])
.reduce((c,d) => c+d, 0);
}
let iterations = 0,
model;
self.onmessage = function(e) {
const msg = e.data,
data = msg.data,
maxIter = msg.maxIter || 500,
perplexity = msg.perplexity || 30;
let dists = msg.dist;
if (data && !dists){
let now = performance.now();
dists = data.map(
a => data.map(
b => distance(a.tags, b.tags) + Math.abs(a.score - b.score)/10
)
);
console.log('computed', data.length * data.length,'distances in', Math.round(performance.now()-now),'ms');
}
if (dists) {
model = new tsnejs.tSNE({
dim: msg.dim,
perplexity: perplexity,
});
model.initDataDist(dists);
}
let startpos = model.getSolution().map(d=>d.slice()),
pos;
while (iterations++ < maxIter) {
// every time you call this, solution gets better
model.step();
pos = model.getSolution();
// Y is an array of 2-D points that you can plot
self.postMessage({
iterations: iterations - 1,
pos: pos,
//log: [ 'step', iterations-1 ]
});
}
let cost = startpos
.map((d,i) => sqdist(d, pos[i]))
.reduce ((a,b) => a + b, 0) / pos.length / Math.max(...pos.map(d => Math.abs(d[0]) + Math.abs(d[1])));
self.postMessage({
done: iterations - 1,
cost: cost,
log: [ 'done', iterations - 1, cost ]
});
};
function sqdist (a,b) {
let d = [a[0] - b[0], a[1] - b[1]].map(Math.abs);
return Math.max(d[0], d[1]);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment