Skip to content

Instantly share code, notes, and snippets.

@madelfio
Last active December 20, 2015 07:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save madelfio/6091794 to your computer and use it in GitHub Desktop.
Save madelfio/6091794 to your computer and use it in GitHub Desktop.
Schema Extraction

Output of the schema extraction process.

"use strict";
var classes = ['title', 'non-relational', 'non-relational', 'header',
'group-header', 'data', 'data', 'data', 'aggregate', 'group-header', 'data',
'data', 'data', 'aggregate', 'blank', 'non-relational'];
var targets = ['title', 'header', 'data', 'group-header', 'aggregate'];
var container = d3.select('body').append('div');
function create_copy(tr, i) {
var node = d3.select(this);
var pos = $(node.node()).position();
container.append('table').append('tr')
.attr('class', tr + ' copy')
.style('position', 'absolute')
.style('left', pos.left + 'px')
.style('top', pos.top + 'px')
.style('opacity', 0)
.html(node.html());
}
d3.selectAll('.orig tr')
.data(classes)
.each(create_copy);
function extract(tgt_num) {
var t = targets[tgt_num];
var tgt = d3.select('#schema .' + t).node();
var pos = $(tgt).position();
console.log(pos);
container.selectAll('tr.' + t)
.transition()
.delay(function(d, i) {return 1000 + i * 200;})
.style('opacity', 1)
.transition()
.style('top', function(d, i) {return (pos.top + i * 20) + 'px';})
.style('left', function(d, i) {return pos.left + 'px';})
.each('end', function(d, i) {
d3.select(this.parentNode).remove();
tgt.appendChild(this);
d3.select(this).style('position', 'inherit');
if (tgt_num < targets.length) {extract(tgt_num + 1);}
});
}
extract(0);
<!doctype html>
<meta charset="utf-8">
<title>Schema Extraction from Web Tables</title>
<style>
table {
border-collapse: collapse;
width: inherit;
margin: 5px 10px;
}
table.orig {
display: inline-block;
border-radius: 2px;
box-shadow: 0 0 10px rgba(100, 100, 100, 0.8);
background-color: #f6f6f6;
}
.orig th, .orig td {border: 1px solid #ccc;}
.tgt > tbody > tr > th, .tgt > tbody > tr > td {border: 1px solid #ccc;}
th, td {padding: 1px 5px;}
.orig th, .tgt th {background-color: #c33; color: white;}
.title {font-weight: bold; color: darkblue;}
.non-relational {color: #888; font-size: 0.9em;}
.aggregate {font-style: italic;}
.group-header td {
background-color: #ddd;
border-bottom: 2px solid #ccc;
font-weight: bold;
font-style: italic;
}
.m {text-align: center;}
.r {text-align: right;}
</style>
<body>
<table>
<tr><th>Source Table</th><th>Extracted Schema</th></tr>
<tr><td>
<table class="example orig">
<tr class="title"><td colspan="3">Patent Applications by Residents</td></tr>
<tr class="non-relational"><td colspan="3">Data Source: worldbank.org</td></tr>
<tr class="non-relational"><td colspan="3">(showing top countries in each continent)</td></tr>
<tr class="header"><th>Country</th><th>Residents</th><th>Applications</th></tr>
<tr class="group-header"><td>North America</td><td></td><td></td></tr>
<tr class="data"><td class="m">United States</td><td class="r">307,007,000</td><td class="r">224,912</td></tr>
<tr class="data"><td class="m">Canada</td><td class="r">33,739,900</td><td class="r">5,067</td></tr>
<tr class="data"><td class="m">Mexico</td><td class="r">112,033,369</td><td class="r">822</td></tr>
<tr class="aggregate"><td></td><td>N.A. Total</td><td class="r">230,801</td></tr>
<tr class="group-header"><td>Asia</td><td></td><td></td></tr>
<tr class="data"><td class="m">Japan</td><td class="r">127,557,958</td><td class="r">295,315</td></tr>
<tr class="data"><td class="m">China</td><td class="r">1,331,380,000</td><td class="r">229,096</td></tr>
<tr class="data"><td class="m">South Korea</td><td class="r">48,747,000</td><td class="r">127,316</td></tr>
<tr class="aggregate"><td></td><td>Asia Total</td><td class="r">651,727</td></tr>
<tr class="blank"><td>&nbsp;</td><td></td><td></td></tr>
<tr class="non-relational"><td colspan="2">Note: data from 2009</td><td></td></tr>
</table>
</td><td style="vertical-align:top;">
<table id="schema" class="tgt" style="display:inline-block;">
<tr><td>Title:</td><td><table class="title"></table></td></tr>
<tr><td>Columns:</td><td><table class="header"></table></td></tr>
<tr><td>Data:</td><td><table class="data"></table></td></tr>
<tr><td>Groups:</td><td><table class="group-header"></table></td></tr>
<tr><td>Totals/Subtotals:</td><td><table class="aggregate"></table></td></tr>
</table>
</td></tr></table>
</body>
<script src="http://ajax.googleapis.com/ajax/libs/jquery/1.10.2/jquery.min.js"></script>
<script src="http://d3js.org/d3.v3.min.js"></script>
<script src="extract.js"></script>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment