Skip to content

Instantly share code, notes, and snippets.

@ppham27
Last active September 14, 2021 16:27
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ppham27/12f7444e598011d3182b4fa2ed953eea to your computer and use it in GitHub Desktop.
Save ppham27/12f7444e598011d3182b4fa2ed953eea to your computer and use it in GitHub Desktop.
Suffix Array Construction

This visualization shows how a suffix array can be constructed in O(N log(N) log(N)) time. Everytime, we sort using the first 2^i characters, starting at i = 0. More concretely, assume that first 2^i characters have been sorted. Then, remove the first 2^i characters. The remaining string is a suffix, whose first 2^i characters are already sorted. So, we can sort the first 2^(i+1) characters by comparing a pair or ranks, which is just a pair of integers. In this way, we avoid the O(N) cost of doing a comparison. Since we need to sort log(N) times, the total running time is O(N log(N) log(N)).

The comparison sort is an implementation of quicksort, where partitions are selected with the median-of-three rule.

<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<link rel="stylesheet" type="text/css" href="style.css">
<script src="https://d3js.org/d3.v3.min.js" charset="utf-8"></script>
</head>
<body>
<div id="visualization">
</div>
<script src="visualization.js"></script>
</body>
</html>
body {
font-family: HelveticaNeue, Helvetica;
}
#visualization {
position: absolute;
width: 960px;
height: 500px;
outline: 1px solid #ccc;
}
.suffix {
position: absolute;
outline: 1px solid #ccc;
}
.char {
text-anchor: middle;
font-family: HelveticaNeue, Helvetica;
font-size: 20px;
}
#controls {
width: 300px;
}
#controls fieldset {
border: none;
margin: 5px;
padding: 0px;
height: 70px;
}
#controls input {
width: 200px;
}
#controls #error-message {
font-size: 13px;
color: red;
}
#controls #sorter {
margin: 5px;
}
#controls button {
margin-right: 5px;
}
var S = 'ATGATGTAAGATGTTACATGAAAAC';
var stringSizeLimit = 25;
var charSize = 20;
var jobQueue = new Array();
var worker;
var suffixArray = new Array();
for (var i = 0; i < S.length; ++i) suffixArray.push(i);
var canvas = d3.select('#visualization');
var suffixArrayDiv = new SuffixArrayDiv(canvas, S);
var suffixSorter = new SuffixSorter(S);
var controls = canvas
.append('div')
.attr('id', 'controls');
var stringChanger = controls.append('fieldset');
stringChanger.append('label')
.text('String: ')
.attr('for', 'string-input');
stringChanger.append('input')
.attr('id', 'string-input')
.attr('value', S);
stringChanger.append('br')
stringChanger.append('button')
.attr('type', 'button')
.text('Change String')
.on('click', function() {
var newS = document.getElementById('string-input').value;
if (newS.length < 1) {
stringChanger.select('#error-message')
.text('String must not be empty.');
} else if (newS.length > stringSizeLimit) {
stringChanger.select('#error-message')
.text('String must be at most ' + stringSizeLimit + ' characters.');
} else {
stringChanger.select('#error-message').text('');
S = newS;
suffixArray = new Array();
for (var i = 0; i < S.length; ++i) suffixArray.push(i);
suffixArrayDiv.destroy();
suffixArrayDiv = new SuffixArrayDiv(canvas, S);
suffixSorter.reset(S);
setCharactersSorted(0);
}
});
stringChanger.append('br')
stringChanger.append('span')
.attr('id', 'error-message');
var sorter = controls.append('div').attr('id', 'sorter');
sorter.append('button')
.attr('type', 'button')
.text('Sort')
.on('click', function() {
quicksort(suffixArray, 0, S.length, suffixSorter.compare.bind(suffixSorter),
function(i, j) {
submitJob(suffixArrayDiv.swapColumns, suffixArrayDiv, [i, j]);
});
suffixSorter.updateRank(suffixArray);
var previousCharactersSorted = suffixSorter.charactersSorted;
var charactersSorted = suffixSorter.incrementCharactersSorted();
for (var i = previousCharactersSorted; i < charactersSorted; ++i) {
submitJob(suffixArrayDiv.colorRow, suffixArrayDiv, [i, '#e41a1c']);
}
setCharactersSorted(charactersSorted);
});
sorter.append('span').text('Characters sorted: ');
sorter.append('span').attr('id', 'characters-sorted').text(0);
function SuffixSorter(S) {
this.rank = new Array();
this.charactersSorted = 0;
this.S = S;
for (var i = 0; i < S.length; ++i) this.rank.push(0);
this.compare = function(a, b) {
if (this.charactersSorted === 0) return this.S[a] < this.S[b];
if (this.rank[a] !== this.rank[b]) return this.rank[a] < this.rank[b];
if (a + this.charactersSorted === this.S.length) return true;
if (b + this.charactersSorted === this.S.length) return false;
return this.rank[a + this.charactersSorted] < this.rank[b + this.charactersSorted];
}
this.updateRank = function(arr) {
var newRank = new Array(arr.length);
var currentRank = 0;
newRank[arr[0]] = currentRank;
for (var i = 1; i < arr.length; ++i) {
if (this.compare(arr[i-1], arr[i])) ++currentRank;
newRank[arr[i]] = currentRank;
}
this.rank = newRank;
}
this.incrementCharactersSorted = function() {
if (this.charactersSorted === 0) {
this.charactersSorted = 1;
} else {
this.charactersSorted *= 2;
}
this.charactersSorted = Math.min(this.charactersSorted, S.length);
return this.charactersSorted;
}
this.reset = function(S) {
this.rank = new Array(S.length);
for (var i = 0; i < S.length; ++i) this.rank.push(0);
this.charactersSorted = 0;
this.S = S;
}
}
function setCharactersSorted(charactersSorted) {
sorter.select('#characters-sorted')
.transition().duration(1000).tween('text', function() {
var interpolater = d3.interpolateRound(parseInt(this.textContent), charactersSorted);
return function(t) {
this.textContent = interpolater(t);
}
});
}
function submitJob(f, thisArg, args) {
jobQueue.push([f, thisArg, args]);
startWorker();
}
function startWorker() {
if (worker === undefined) {
worker = setInterval(function() {
if (jobQueue.length) {
var job = jobQueue.shift();
job[0].apply(job[1], job[2]);
} else {
clearInterval(worker);
worker = undefined;
}
}, 250);
}
}
function swap(arr, i, j) {
var tmp = arr[i]; arr[i] = arr[j]; arr[j] = tmp;
}
function quicksort(arr, i, j, comparator, swapTrigger) {
if (j - i <= 1) return;
// median of 3 method to select pivot
var mid = Math.floor((i + j)/2);
if (comparator(arr[i], arr[mid]) && comparator(arr[mid], arr[j - 1])) {
swap(arr, mid, j - 1);
if (swapTrigger) swapTrigger(mid, j - 1);
} else if (comparator(arr[mid], arr[i]) && comparator(arr[i], arr[j - 1])) {
swap(arr, i, j - 1);
if (swapTrigger) swapTrigger(i, j - 1);
}
var pivot = j - 1;
var cursor = i;
for (var k = i; k < j - 1; ++k) {
if (comparator(arr[k], arr[pivot])) {
if (cursor != k) {
swap(arr, k, cursor);
if (swapTrigger) swapTrigger(k, cursor);
}
++cursor;
}
}
if (pivot != cursor) {
swap(arr, pivot, cursor);
if (swapTrigger) swapTrigger(pivot, cursor);
}
quicksort(arr, i, cursor, comparator, swapTrigger);
quicksort(arr, cursor + 1, j, comparator, swapTrigger);
}
function SuffixArrayDiv(parent, S) {
this.parent = parent;
this.suffixArrayDiv = parent.insert('div', ':first-child')
.attr('class', 'suffix-array-div')
.style('position', 'absolute')
.style('left', 2*(960 - S.length*charSize)/3 + 'px');
this.suffixDivs = new Array(); var suffixDivs = this.suffixDivs;
this.suffixArrayDiv.selectAll('.suffix')
.data(S).enter()
.append('div')
.attr('class', 'suffix')
.style('left', function(d, i) { return charSize*i + 'px'; })
.style('opacity', 0)
.each(function(d, i) {
suffixDivs.push(d3.select(this));
var suffixSvg = d3.select(this).append('svg')
.attr('width', charSize)
.attr('height', S.length*charSize);
suffixSvg.selectAll('.char')
.data(S.substring(i))
.enter()
.append('text')
.attr('class', function(d, i) {
return 'char ' + 'row' + i;
})
.attr('x', charSize/2)
.attr('y', function(dd, j) { return (j+1)*charSize; })
.text(function(d) { return d; });
})
.transition().duration(1000)
.style('opacity', 1);
this.colorRow = function(i, color) {
var chars = this.suffixArrayDiv.selectAll('.row' + i);
chars.transition().duration(1000).attr('fill', color);
}
this.swapColumns = function(i, j) {
suffixDivs[i]
.transition()
.styleTween('left', function(d, idx, initial) {
var end = j*charSize + 'px';
return d3.interpolateString(initial, end);
});
suffixDivs[j].transition()
.styleTween('left', function(d, idx, initial) {
var end = i*charSize + 'px';
return d3.interpolateString(initial, end);
});
var tmp = suffixDivs[i];
suffixDivs[i] = suffixDivs[j];
suffixDivs[j] = tmp;
}
this.destroy = function() {
this.suffixArrayDiv.transition().duration(1000).style('opacity', 0).remove();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment