Skip to content

Instantly share code, notes, and snippets.

@helenedraux
Last active December 6, 2016 17:27
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save helenedraux/075971f9d2ef7999364ddd84daacf83f to your computer and use it in GitHub Desktop.
Save helenedraux/075971f9d2ef7999364ddd84daacf83f to your computer and use it in GitHub Desktop.
Word cloud cycling through a list.

Word cloud cycling through a list.

  • The list contains a long text, and to create a clean word cloud which contains the most frequent meaningful words, it removes the stopwords with a javascript function by geeklad.

  • Animation adapted from jwhitfieldseed

(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g=(g.d3||(g.d3 = {}));g=(g.layout||(g.layout = {}));g.cloud = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){
// Word cloud layout by Jason Davies, https://www.jasondavies.com/wordcloud/
// Algorithm due to Jonathan Feinberg, http://static.mrfeinberg.com/bv_ch03.pdf
var dispatch = require("d3-dispatch").dispatch;
var cloudRadians = Math.PI / 180,
cw = 1 << 11 >> 5,
ch = 1 << 11;
module.exports = function() {
var size = [256, 256],
text = cloudText,
font = cloudFont,
fontSize = cloudFontSize,
fontStyle = cloudFontNormal,
fontWeight = cloudFontNormal,
rotate = cloudRotate,
padding = cloudPadding,
spiral = archimedeanSpiral,
words = [],
timeInterval = Infinity,
event = dispatch("word", "end"),
timer = null,
random = Math.random,
cloud = {},
canvas = cloudCanvas;
cloud.canvas = function(_) {
return arguments.length ? (canvas = functor(_), cloud) : canvas;
};
cloud.start = function() {
var contextAndRatio = getContext(canvas()),
board = zeroArray((size[0] >> 5) * size[1]),
bounds = null,
n = words.length,
i = -1,
tags = [],
data = words.map(function(d, i) {
d.text = text.call(this, d, i);
d.font = font.call(this, d, i);
d.style = fontStyle.call(this, d, i);
d.weight = fontWeight.call(this, d, i);
d.rotate = rotate.call(this, d, i);
d.size = ~~fontSize.call(this, d, i);
d.padding = padding.call(this, d, i);
return d;
}).sort(function(a, b) { return b.size - a.size; });
if (timer) clearInterval(timer);
timer = setInterval(step, 0);
step();
return cloud;
function step() {
var start = Date.now();
while (Date.now() - start < timeInterval && ++i < n && timer) {
var d = data[i];
d.x = (size[0] * (random() + .5)) >> 1;
d.y = (size[1] * (random() + .5)) >> 1;
cloudSprite(contextAndRatio, d, data, i);
if (d.hasText && place(board, d, bounds)) {
tags.push(d);
event.word(d);
if (bounds) cloudBounds(bounds, d);
else bounds = [{x: d.x + d.x0, y: d.y + d.y0}, {x: d.x + d.x1, y: d.y + d.y1}];
// Temporary hack
d.x -= size[0] >> 1;
d.y -= size[1] >> 1;
}
}
if (i >= n) {
cloud.stop();
event.end(tags, bounds);
}
}
}
cloud.stop = function() {
if (timer) {
clearInterval(timer);
timer = null;
}
return cloud;
};
function getContext(canvas) {
canvas.width = canvas.height = 1;
var ratio = Math.sqrt(canvas.getContext("2d").getImageData(0, 0, 1, 1).data.length >> 2);
canvas.width = (cw << 5) / ratio;
canvas.height = ch / ratio;
var context = canvas.getContext("2d");
context.fillStyle = context.strokeStyle = "red";
context.textAlign = "center";
return {context: context, ratio: ratio};
}
function place(board, tag, bounds) {
var perimeter = [{x: 0, y: 0}, {x: size[0], y: size[1]}],
startX = tag.x,
startY = tag.y,
maxDelta = Math.sqrt(size[0] * size[0] + size[1] * size[1]),
s = spiral(size),
dt = random() < .5 ? 1 : -1,
t = -dt,
dxdy,
dx,
dy;
while (dxdy = s(t += dt)) {
dx = ~~dxdy[0];
dy = ~~dxdy[1];
if (Math.min(Math.abs(dx), Math.abs(dy)) >= maxDelta) break;
tag.x = startX + dx;
tag.y = startY + dy;
if (tag.x + tag.x0 < 0 || tag.y + tag.y0 < 0 ||
tag.x + tag.x1 > size[0] || tag.y + tag.y1 > size[1]) continue;
// TODO only check for collisions within current bounds.
if (!bounds || !cloudCollide(tag, board, size[0])) {
if (!bounds || collideRects(tag, bounds)) {
var sprite = tag.sprite,
w = tag.width >> 5,
sw = size[0] >> 5,
lx = tag.x - (w << 4),
sx = lx & 0x7f,
msx = 32 - sx,
h = tag.y1 - tag.y0,
x = (tag.y + tag.y0) * sw + (lx >> 5),
last;
for (var j = 0; j < h; j++) {
last = 0;
for (var i = 0; i <= w; i++) {
board[x + i] |= (last << msx) | (i < w ? (last = sprite[j * w + i]) >>> sx : 0);
}
x += sw;
}
delete tag.sprite;
return true;
}
}
}
return false;
}
cloud.timeInterval = function(_) {
return arguments.length ? (timeInterval = _ == null ? Infinity : _, cloud) : timeInterval;
};
cloud.words = function(_) {
return arguments.length ? (words = _, cloud) : words;
};
cloud.size = function(_) {
return arguments.length ? (size = [+_[0], +_[1]], cloud) : size;
};
cloud.font = function(_) {
return arguments.length ? (font = functor(_), cloud) : font;
};
cloud.fontStyle = function(_) {
return arguments.length ? (fontStyle = functor(_), cloud) : fontStyle;
};
cloud.fontWeight = function(_) {
return arguments.length ? (fontWeight = functor(_), cloud) : fontWeight;
};
cloud.rotate = function(_) {
return arguments.length ? (rotate = functor(_), cloud) : rotate;
};
cloud.text = function(_) {
return arguments.length ? (text = functor(_), cloud) : text;
};
cloud.spiral = function(_) {
return arguments.length ? (spiral = spirals[_] || _, cloud) : spiral;
};
cloud.fontSize = function(_) {
return arguments.length ? (fontSize = functor(_), cloud) : fontSize;
};
cloud.padding = function(_) {
return arguments.length ? (padding = functor(_), cloud) : padding;
};
cloud.random = function(_) {
return arguments.length ? (random = _, cloud) : random;
};
cloud.on = function() {
var value = event.on.apply(event, arguments);
return value === event ? cloud : value;
};
return cloud;
};
function cloudText(d) {
return d.text;
}
function cloudFont() {
return "serif";
}
function cloudFontNormal() {
return "normal";
}
function cloudFontSize(d) {
return Math.sqrt(d.value);
}
function cloudRotate() {
return (~~(Math.random() * 6) - 3) * 30;
}
function cloudPadding() {
return 1;
}
// Fetches a monochrome sprite bitmap for the specified text.
// Load in batches for speed.
function cloudSprite(contextAndRatio, d, data, di) {
if (d.sprite) return;
var c = contextAndRatio.context,
ratio = contextAndRatio.ratio;
c.clearRect(0, 0, (cw << 5) / ratio, ch / ratio);
var x = 0,
y = 0,
maxh = 0,
n = data.length;
--di;
while (++di < n) {
d = data[di];
c.save();
c.font = d.style + " " + d.weight + " " + ~~((d.size + 1) / ratio) + "px " + d.font;
var w = c.measureText(d.text + "m").width * ratio,
h = d.size << 1;
if (d.rotate) {
var sr = Math.sin(d.rotate * cloudRadians),
cr = Math.cos(d.rotate * cloudRadians),
wcr = w * cr,
wsr = w * sr,
hcr = h * cr,
hsr = h * sr;
w = (Math.max(Math.abs(wcr + hsr), Math.abs(wcr - hsr)) + 0x1f) >> 5 << 5;
h = ~~Math.max(Math.abs(wsr + hcr), Math.abs(wsr - hcr));
} else {
w = (w + 0x1f) >> 5 << 5;
}
if (h > maxh) maxh = h;
if (x + w >= (cw << 5)) {
x = 0;
y += maxh;
maxh = 0;
}
if (y + h >= ch) break;
c.translate((x + (w >> 1)) / ratio, (y + (h >> 1)) / ratio);
if (d.rotate) c.rotate(d.rotate * cloudRadians);
c.fillText(d.text, 0, 0);
if (d.padding) c.lineWidth = 2 * d.padding, c.strokeText(d.text, 0, 0);
c.restore();
d.width = w;
d.height = h;
d.xoff = x;
d.yoff = y;
d.x1 = w >> 1;
d.y1 = h >> 1;
d.x0 = -d.x1;
d.y0 = -d.y1;
d.hasText = true;
x += w;
}
var pixels = c.getImageData(0, 0, (cw << 5) / ratio, ch / ratio).data,
sprite = [];
while (--di >= 0) {
d = data[di];
if (!d.hasText) continue;
var w = d.width,
w32 = w >> 5,
h = d.y1 - d.y0;
// Zero the buffer
for (var i = 0; i < h * w32; i++) sprite[i] = 0;
x = d.xoff;
if (x == null) return;
y = d.yoff;
var seen = 0,
seenRow = -1;
for (var j = 0; j < h; j++) {
for (var i = 0; i < w; i++) {
var k = w32 * j + (i >> 5),
m = pixels[((y + j) * (cw << 5) + (x + i)) << 2] ? 1 << (31 - (i % 32)) : 0;
sprite[k] |= m;
seen |= m;
}
if (seen) seenRow = j;
else {
d.y0++;
h--;
j--;
y++;
}
}
d.y1 = d.y0 + seenRow;
d.sprite = sprite.slice(0, (d.y1 - d.y0) * w32);
}
}
// Use mask-based collision detection.
function cloudCollide(tag, board, sw) {
sw >>= 5;
var sprite = tag.sprite,
w = tag.width >> 5,
lx = tag.x - (w << 4),
sx = lx & 0x7f,
msx = 32 - sx,
h = tag.y1 - tag.y0,
x = (tag.y + tag.y0) * sw + (lx >> 5),
last;
for (var j = 0; j < h; j++) {
last = 0;
for (var i = 0; i <= w; i++) {
if (((last << msx) | (i < w ? (last = sprite[j * w + i]) >>> sx : 0))
& board[x + i]) return true;
}
x += sw;
}
return false;
}
function cloudBounds(bounds, d) {
var b0 = bounds[0],
b1 = bounds[1];
if (d.x + d.x0 < b0.x) b0.x = d.x + d.x0;
if (d.y + d.y0 < b0.y) b0.y = d.y + d.y0;
if (d.x + d.x1 > b1.x) b1.x = d.x + d.x1;
if (d.y + d.y1 > b1.y) b1.y = d.y + d.y1;
}
function collideRects(a, b) {
return a.x + a.x1 > b[0].x && a.x + a.x0 < b[1].x && a.y + a.y1 > b[0].y && a.y + a.y0 < b[1].y;
}
function archimedeanSpiral(size) {
var e = size[0] / size[1];
return function(t) {
return [e * (t *= .1) * Math.cos(t), t * Math.sin(t)];
};
}
function rectangularSpiral(size) {
var dy = 4,
dx = dy * size[0] / size[1],
x = 0,
y = 0;
return function(t) {
var sign = t < 0 ? -1 : 1;
// See triangular numbers: T_n = n * (n + 1) / 2.
switch ((Math.sqrt(1 + 4 * sign * t) - sign) & 3) {
case 0: x += dx; break;
case 1: y += dy; break;
case 2: x -= dx; break;
default: y -= dy; break;
}
return [x, y];
};
}
// TODO reuse arrays?
function zeroArray(n) {
var a = [],
i = -1;
while (++i < n) a[i] = 0;
return a;
}
function cloudCanvas() {
return document.createElement("canvas");
}
function functor(d) {
return typeof d === "function" ? d : function() { return d; };
}
var spirals = {
archimedean: archimedeanSpiral,
rectangular: rectangularSpiral
};
},{"d3-dispatch":2}],2:[function(require,module,exports){
(function (global, factory) {
typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) :
typeof define === 'function' && define.amd ? define(['exports'], factory) :
factory((global.dispatch = {}));
}(this, function (exports) { 'use strict';
function Dispatch(types) {
var i = -1,
n = types.length,
callbacksByType = {},
callbackByName = {},
type,
that = this;
that.on = function(type, callback) {
type = parseType(type);
// Return the current callback, if any.
if (arguments.length < 2) {
return (callback = callbackByName[type.name]) && callback.value;
}
// If a type was specified…
if (type.type) {
var callbacks = callbacksByType[type.type],
callback0 = callbackByName[type.name],
i;
// Remove the current callback, if any, using copy-on-remove.
if (callback0) {
callback0.value = null;
i = callbacks.indexOf(callback0);
callbacksByType[type.type] = callbacks = callbacks.slice(0, i).concat(callbacks.slice(i + 1));
delete callbackByName[type.name];
}
// Add the new callback, if any.
if (callback) {
callback = {value: callback};
callbackByName[type.name] = callback;
callbacks.push(callback);
}
}
// Otherwise, if a null callback was specified, remove all callbacks with the given name.
else if (callback == null) {
for (var otherType in callbacksByType) {
if (callback = callbackByName[otherType + type.name]) {
callback.value = null;
var callbacks = callbacksByType[otherType], i = callbacks.indexOf(callback);
callbacksByType[otherType] = callbacks.slice(0, i).concat(callbacks.slice(i + 1));
delete callbackByName[callback.name];
}
}
}
return that;
};
while (++i < n) {
type = types[i] + "";
if (!type || (type in that)) throw new Error("illegal or duplicate type: " + type);
callbacksByType[type] = [];
that[type] = applier(type);
}
function parseType(type) {
var i = (type += "").indexOf("."), name = type;
if (i >= 0) type = type.slice(0, i); else name += ".";
if (type && !callbacksByType.hasOwnProperty(type)) throw new Error("unknown type: " + type);
return {type: type, name: name};
}
function applier(type) {
return function() {
var callbacks = callbacksByType[type], // Defensive reference; copy-on-remove.
callback,
callbackValue,
i = -1,
n = callbacks.length;
while (++i < n) {
if (callbackValue = (callback = callbacks[i]).value) {
callbackValue.apply(this, arguments);
}
}
return that;
};
}
}
function dispatch() {
return new Dispatch(arguments);
}
dispatch.prototype = Dispatch.prototype; // allow instanceof
exports.dispatch = dispatch;
}));
},{}]},{},[1])(1)
});
<!DOCTYPE html>
<meta charset="utf-8">
<script src="http://d3js.org/d3.v3.min.js"></script>
<script src="https://gist.githubusercontent.com/helenedraux/075971f9d2ef7999364ddd84daacf83f/raw/bed58df422a8da5e8608a9bdebf3e7bf2e214451/d3.layout.cloud.js"></script>
<script src="https://gist.githubusercontent.com/helenedraux/075971f9d2ef7999364ddd84daacf83f/raw/bed58df422a8da5e8608a9bdebf3e7bf2e214451/removeStopWords.js"></script>
<script type="text/javascript" src="http://cdnjs.cloudflare.com/ajax/libs/underscore.js/1.4.4/underscore-min.js"></script>
<body>
<div id='title'>Title:</div>
<script>
//Simple animated example of d3-cloud - https://github.com/jasondavies/d3-cloud
//Based on https://github.com/jasondavies/d3-cloud/blob/master/examples/simple.html
//& http://bl.ocks.org/jwhitfieldseed/9697914 for the animation
//Remove Stopwords by GeekLad http://geeklad.com
//List of projects. The first word must be the name of the project.
var projects = [
"Moves platform Schultz + Grassov is a landscape architect company based primarily in Copenhagen, but with international projects. The first project I was involved in with them was in DTU (Copenhagen, DK), then in WUSTL (Saint Louis, US). I built a platform to monitor volunteers’ movements for a short period of time. The platform collects two types of data: the tracks, places and activities from Moves, but also data related to the volunteers’ demographics or commuting habits. Moves The Moves app is a smartphone application recording the location of the phone at regular intervals. It creates two types of geographic data: a track, which is the route followed by users, and a list of places, where the users has stayed during a significant amount of time. It also classify movements in terms of transport: walking, cycling, running and transport (=unclassified). Positioning The position is derived from the location through a GPS signal, 3G, or wi-fi. Its accuracy and precision depends on the phones, and no settings can modify the intervals at which the position is taken. From experience, iPhones record the location more frequently than Android phones. The only way to improve the quality of the records is to have as many sensors as possible on. GPS GPS signal is the most accurate outdoor, but requires to receive the signal from at least 3 satellites, and the precision of the position will depend on the number of satellites as well as the absence of cover (e.g. tall buildings, and cloud or forest covers can divert a GPS signal). This means that for a Moves app user with GPS location enabled, the quality of the location of the phone will depend on the surroundings. Typically, we’ve experienced that the users’ tracks can ‘cross’ buildings – therefore the quality of the tracks is stronger in open areas, but the bigger picture is considered as reliable for urban environments. Network 3G and wi-fi signals can also help locating users; especially indoors where the GPS cannot reach the phones. Therefore keeping wi-fi and/or 3G in urban areas can significantly improve the location accuracy. Communicating with Moves The Moves API is a service from Moves to connect users with 3rd party applications, giving access to tracks, places, and activity types to 3rd parties approved by the user. These 3rd parties are called ‘Connected apps’, and Mobility Pal is one of them, albeit not a public app, and therefore is not in the available from the official connected apps in Moves. Strength of Moves The Moves app is unique for multiple reasons: It does not calculate the tracks and places in the phone, but sends the positions recorded during the day to the Moves servers at periodic intervals (when opening the application, or about once a day when the phone has a data connection – wi-fi or 3G). This means that battery usage is much lower than most applications offering this type of services. Moves creates two types of geographic datasets: tracks and places. It ‘guesses’ the type of transport based on the speed. It also links the places to foursquare places (if existing) It has an API, which means that for a programmer, it is possible to access tracks and places with the users’ permission.",
"CWASU In the Cities Institute I also helped on numerous cartographic projects, notably taking the lead on the spatial analysis for one project working with the CWASU and Solace Women’s Aid. Some of the data analysis work I’ve done for the Child and Women Abuse Studies Unit (CWASU) last year has been published in a report: Finding the Costs of Freedom – How women and children rebuild their lives after domestic violence. Mapping social and relational networks The CWASU had interviewed 107 women and children in the UK who had been or were victims of domestic violence. One of their research interest was to identify the relevance of social and relational network to rebuilding their lives. They interviewed four times over three years, the first wave have 100 women and the last 65. Most of the loss was due to change of home and contact details. During the interviews organised through the charity Solace women’s aid, they asked the women to quantify (family members and friends), qualify (disruptive or supportive) and locate (postcode) their relatives. The relations also were quantified in terms of contact frequency, ways of contact, and length of contact. Although they performed this exercise four times, we mapped only the first (95 maps) and the last (60 maps) waves. The data I had access to a list of relations, with a lot of information describing the friends and relatives, and their interactions (or lack of). The friends and relatives: where they lived, indicating if the person was a friend or family member, the type of relation (disruptive or supportive) The interaction: the frequency of contact the type of contact (face-to-face or not) – where the contact happened (when it did; some meetings happened at their own home, at the other person’s home, or in another neutral location). These maps challenged me in many ways. I had not been present during the research design, the data collection, or the data entry; so the data had to be cleaned before being able to analyse it. A few women had been very secretive about the location of their social network, and so it was hard to map it. Actually, I was more surprised how well they knew their relative’s postcodes! I tried my best to pin down some locations that were very vague, at best it was the name of a city or neighbourhood, at worst it was just ‘a cafe around that neighbourhood’. The further away the person lived, the less the precise location really mattered for the analysis. In a very few cases the postcodes were wrong, but that did not happen so often. The maps I used ArcGIS to make the 95 + 60 maps, making one map per person. On the map I tried to show the extent of their social network as well as their interactions with each members of the network. Each person they knew was given two attributes: friends/family (shape) and disruptive/supportive (color) The relationship was described in terms of frequency (thickness) and mode of contact (color and type). meeting places, if they happened in neither home’s people without precise locations were mapped a buffer All this was mapped onto one page each, but for some networks I had to use multiple frames to visualise their very close relationship and others living much further away. Although the maps were not essential to the analysis, they were really appreciated by the women who got to see them three years later. It helped them coming to term with the journey they had gone through so far. Visualisation of two women's social network Visualisation of two women’s social network The typology Every woman’s story was different and unique in its own way; some had completely cut ties with their families because of the situation, some had no friends, or all their friends very far away. Some kept close relationship with people they qualified as disruptive. Some had only new friends, or old friends only they never saw. I designed a typology based on the extent of this network (see table below), and compared the changes three years later. Typology Description Micro local More than 50% family and friends live within 2 km of the woman Local More than 50% family and friends live within 5km of the woman Across UK More than 50% family and friends live in micro-local, local and other parts of the UK Non-local More than 50% family and friends live outside of local and micro-local but within UK International More than 50% family and friends live outside the UK Scattered None of the above At first we were very surprised to find the ‘scattered’ category, but there were 7 women that fit in that category. However the category completely disappeared in the last wave, while local categories increased and global categories decreased. The women had in a way started to ‘sort out’ their lives, and invested more time to closer relationship. What I learned It was the first time I worked with such a large dataset, and with so sensitive data. Working in GIS, with real data, can be very intrusive; I understand that some women were very secretive about their or other people’s locations. Working with such large files required to automate as much as possible, jumping from QGIS to ArcGIS to use different tools. I learned a lot about templates! Working on this project was an eye-opening insight into domestic violence and the burden that some women go through even once they have escaped abusive partners/relatives. I did not have access to a lot of data, only the description of their relationship, which I had to categorise and ‘fit in boxes’. Sometimes it was difficult to quantify so much real lives. The very little I had access to, however, showed there was no simple answer ‘just get out of the situation’, and that everyone around the victim/perpetrator was affected in different ways.",
"Parkinson'sUK My main contribution to the project consisted in building the consolidated database (during the project, I referred to it as 'the world', and it did become MY world for a good week..) that I eventually renamed 'the network'. It contains everyone who had ever appeared in the datasets that the organisation had. The quality of the data was not very consistent, which made it difficult to put together. It was also only meant as an exploration, to understand how people approach the organisation, and how long they stayed with them.. so not something to build on. From this network, Francesco was able to produce some statistic of presence between datasets. He then tried to find common behaviour between certain users of the services, which was quite successful. I had already dealt a fair bit with databases: designed my own databases, especially to store survey results, I had made many mistakes in doing so (like storing checkboxes results as a list in a field.. which is a pain when analysing), and even had stored geographic data (points as latitude and longitude, but also lines, using the geocoding algorithm from Google). However, I had never dealt with so many datasets, in such an inconsistent state. Neither had I ever tried to consolidate database. It was therefore a huge step for me to do it with python, not just once but 5 times, since most of the datasets actually listed individuals many times. Actually one of the databases was not supposed to have duplicated individuals in it, but I discovered when making the network that 10% of it was duplicated. makingthenetwork-page001I used the email and phone to match individuals. This required cleaning many phone fields (it is incredible what people write in a 'phone' entry) and email fields (same here, and it was more difficult to clean in an automated way). I tried using people's name, but this created some duplicates, even including names that I didn't think were common. In the end, we produced an interesting picture of what services are used by different users. This is quite helpful to understand how people use different services, how efficient are these services at pulling users from one to the other."
]
// First, remove the stopwords from the text
var cleanwords = [];
for(var j=0; j < projects.length; j++) {
cleanwords[j] = removeStopWords(projects[j].replace(/[!\.,:;\?\']/g, ''));
}
// Then, select the 20 most frequent words
function getwords(i) {
var wordSize = 50;
var list = cleanwords[i].split(' ');
result = { };
for(i = 0; i < list.length; ++i) {
if(!result[list[i]])
result[list[i]] = 0;
++result[list[i]];
}
var newList = _.uniq(list);
var frequency_list = [];
for (var i = 0; i < newList.length; i++) {
var temp = newList[i];
frequency_list.push({
text : temp,
freq : result[newList[i]],
title: list[0]
});
}
frequency_list.sort(function(a,b) { return parseFloat(b.freq) - parseFloat(a.freq) } );
for(i in frequency_list){
if(frequency_list[i].freq*wordSize > 160)
wordSize = 3;
}
frequency_list = frequency_list.slice(1,20);
return frequency_list;
}
// Redraw the cloud with a new set of words.
function newproject(vis, i) {
i = i || 0;
vis.update(getwords(i ++ % cleanwords.length));
setTimeout(function() { newproject(vis, i + 1)}, 5000);
}
//Reset the word cloud visualisation.
var wordcloud = makewordcloud('body');
//Start cycling through the projects
newproject(wordcloud);
function makewordcloud(selector) {
var w = 500;
var h = 500;
var fill = d3.scale.category20();
// Create word cloud svg
var svg = d3.select(selector).append("svg")
.attr("width", w)
.attr("height", h)
.append("g")
.attr("transform", "translate(250,250)");
// Draw the word cloud
function draw(projects) {
// Load the text
var cloud = svg.selectAll("g text")
.data(projects, function(d) {return d.text; })
// Create a scale based on the frequency words appear (variable in the data)
var sizeScale = d3.scale.linear()
.domain([0, d3.max(projects, function(d) { return d.freq} )])
.range([10, 95]);
// Enter and style each word
cloud.enter()
.append("text")
.style("font-family", "Impact")
.style("fill", function(d, i) { return fill(i); })
.attr("text-anchor", "middle")
.attr('font-size', 1)
.text(function(d) { return d.text; });
// Transitions between each drawing
cloud.transition()
.duration(600)
.style("font-size", function(d) { return sizeScale(d.freq) + "px"; })
.attr("transform", function(d) {
return "translate(" + [d.x, d.y] + ")rotate(" + d.rotate + ")";
})
.style("fill-opacity", 1);
// Exit the words by slowly reducing
cloud.exit()
.transition()
.duration(200)
.style('fill-opacity', 1e-6)
.attr('font-size', 1)
.remove();
}
// Udate the words to be shown
return {
update: function(frequency_list) {
var sizeScale = d3.scale.linear()
.domain([0, d3.max(frequency_list, function(d) { return d.freq} )])
.range([10, 95]);
//Update the title of the project
document.getElementById('title').innerHTML = frequency_list[0].title;
d3.layout.cloud().size([w, h])
.words(frequency_list)
.padding(5)
.rotate(function() { return ~~(Math.random() * 2) * 90; })
.font("Impact")
.fontSize(function(d) { return sizeScale(d.freq); })
.on("end",draw)
.start();
}
}
}//makewordcloud
</script>
/*
* String method to remove stop words
* Written by GeekLad http://geeklad.com
* Stop words obtained from http://www.lextek.com/manuals/onix/stopwords1.html
* Usage: string_variable.removeStopWords();
* Output: The original String with stop words removed
*/
function removeStopWords(stringtoclean) {
var x;
var y;
var word;
var stop_word;
var regex_str;
var regex;
var cleansed_string = stringtoclean;
//var cleansed_string = this.valueOf();
var stop_words = new Array(
'a','about','above','across','after','again','against','all','almost','alone','along','already','also','although','always','among','an','and','another','any','anybody','anyone','anything','anywhere','are','area','areas','around','as','ask','asked','asking','asks','at','away','b','back','backed','backing','backs','be','became','because','become','becomes','been','before','began','behind','being','beings','best','better','between','big','both','but','by','c','came','can','cannot','case','cases','certain','certainly','clear','clearly','come','could','d','did','differ','different','differently','do','does','done','down','down','downed','downing','downs','during','e','each','early','either','end','ended','ending','ends','enough','even','evenly','ever','every','everybody','everyone','everything','everywhere','f','face','faces','fact','facts','far','felt','few','find','finds','first','for','four','from','full','fully','further','furthered','furthering','furthers','g','gave','general','generally','get','gets','give','given','gives','go','going','good','goods','got','great','greater','greatest','group','grouped','grouping','groups','h','had','has','have','having','he','her','here','herself','high','high','high','higher','highest','him','himself','his','how','however','i','if','important','in','interest','interested','interesting','interests','into','is','it','its','itself','j','just','k','keep','keeps','kind','knew','know','known','knows','l','large','largely','last','later','latest','least','less','let','lets','like','likely','long','longer','longest','m','made','make','making','man','many','may','me','member','members','men','might','more','most','mostly','mr','mrs','much','must','my','myself','n','necessary','need','needed','needing','needs','never','new','new','newer','newest','next','no','nobody','non','noone','not','nothing','now','nowhere','number','numbers','o','of','off','often','old','older','oldest','on','once','one','only','open','opened','opening','opens','or','order','ordered','ordering','orders','other','others','our','out','over','p','part','parted','parting','parts','per','perhaps','place','places','point','pointed','pointing','points','possible','present','presented','presenting','presents','problem','problems','put','puts','q','quite','r','rather','really','right','right','room','rooms','s','said','same','saw','say','says','second','seconds','see','seem','seemed','seeming','seems','sees','several','shall','she','should','show','showed','showing','shows','side','sides','since','small','smaller','smallest','so','some','somebody','someone','something','somewhere','state','states','still','still','such','sure','t','take','taken','than','that','the','their','them','then','there','therefore','these','they','thing','things','think','thinks','this','those','though','thought','thoughts','three','through','thus','to','today','together','too','took','toward','turn','turned','turning','turns','two','u','under','until','up','upon','us','use','used','uses','v','very','w','want','wanted','wanting','wants','was','way','ways','we','well','wells','went','were','what','when','where','whether','which','while','who','whole','whose','why','will','with','within','without','work','worked','working','works','would','x','y','year','years','yet','you','young','younger','youngest','your','yours','z'
)
// Split out all the individual words in the phrase
var words = cleansed_string.match(/[^\s]+|\s+[^\s+]$/g)
// Review all the words
for(x=0; x < words.length; x++) {
// For each word, check all the stop words
for(y=0; y < stop_words.length; y++) {
// Get the current word
word = words[x].replace(/\s+|[^a-z]+/ig, ""); // Trim the word and remove non-alpha
// Get the stop word
stop_word = stop_words[y];
// If the word matches the stop word, remove it from the keywords
if(word.toLowerCase() == stop_word) {
// Build the regex
regex_str = "^\\s*"+stop_word+"\\s*$"; // Only word
regex_str += "|^\\s*"+stop_word+"\\s+"; // First word
regex_str += "|\\s+"+stop_word+"\\s*$"; // Last word
regex_str += "|\\s+"+stop_word+"\\s+"; // Word somewhere in the middle
regex = new RegExp(regex_str, "ig");
// Remove the word from the keywords
cleansed_string = cleansed_string.replace(regex, " ");
}
}
}
return cleansed_string.replace(/^\s+|\s+$/g, "");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment