Skip to content

Instantly share code, notes, and snippets.

@jyucsiro
Created October 18, 2017 01:35
Show Gist options
  • Save jyucsiro/9175d3327ffb7735cd9a5de33faea101 to your computer and use it in GitHub Desktop.
Save jyucsiro/9175d3327ffb7735cd9a5de33faea101 to your computer and use it in GitHub Desktop.
AusOpenData Survey - Variation by format and byte-size of datasets
source format bytes
www-data-vic-gov-au tabular 414437056
www-data-vic-gov-au web document 1341
www-data-vic-gov-au API or web service 1027913
www-data-vic-gov-au other 38351851626
www-data-vic-gov-au compressed 389682211924
www-data-vic-gov-au structured data 1819829
www-data-vic-gov-au gis 2065390
www-data-vic-gov-au media 47465625158
data-gov-au web document 38919432
data-gov-au document 181181510723
data-gov-au API or web service 884689871
data-gov-au compressed 676417633793
data-gov-au gis 58186816409
data-gov-au tabular 22676310722
data-gov-au media 4499980256
data-gov-au structured data 7146689137
data-gov-au other 4304717708
data-gov-au database 111906007
data-gov-au code 477444
nsw-seed web document 9748093
nsw-seed structured data 1808521
nsw-seed document 33834564
nsw-seed gis 422388766
nsw-seed API or web service 11084
nsw-seed database 50453183
nsw-seed tabular 12477952
nsw-oeh compressed 30760651664
nsw-oeh document 5452499128
nsw-oeh gis 931618107
nsw-oeh web document 7435104
nsw-oeh media 25160043
nsw-oeh other 7933
nsw-oeh tabular 15498943
nsw-oeh API or web service 335273
data-qld-gov-au tabular 2609864816
data-qld-gov-au gis 83729326
data-qld-gov-au document 35768937
data-qld-gov-au database 557731
data-qld-gov-au structured data 380870052
data-qld-gov-au other 2205191
data-qld-gov-au web document 87586
data-qld-gov-au compressed 24042904
data-qld-gov-au media 1977
data-nsw-gov-au document 153263706
data-nsw-gov-au tabular 559824731
data-nsw-gov-au web document 1948718
data-nsw-gov-au other 7786716
data-nsw-gov-au gis 37429788
data-nsw-gov-au structured data 467964
data-nsw-gov-au API or web service 46691
data-nsw-gov-au compressed 49498710
data-nsw-gov-au code 0
data-sa-gov-au API or web service 42804
data-sa-gov-au gis 7386946536
data-sa-gov-au tabular 844263120
data-sa-gov-au web document 3014449
data-sa-gov-au document 188295380
data-sa-gov-au structured data 45067434
data-sa-gov-au other 208154918
data-sa-gov-au media 485492502
data-sa-gov-au compressed 1109372517
data-sa-gov-au code 206092
data-vic-gov-au tabular 414922944
data-vic-gov-au web document 1341
data-vic-gov-au API or web service 1027786
data-vic-gov-au other 38351851626
data-vic-gov-au compressed 389682211905
data-vic-gov-au structured data 1819935
data-vic-gov-au gis 2065390
data-vic-gov-au media 47465625158
data-wa-gov-au compressed 1526532305
data-wa-gov-au media 7829908
data-wa-gov-au web document 23004184
data-wa-gov-au document 73752538
data-wa-gov-au other 174175729
data-wa-gov-au gis 33740237
data-wa-gov-au tabular 123522897
data-wa-gov-au API or web service 2806
data-wa-gov-au structured data 16546835
ozflux web document 8453
ozflux structured data 5404685759
nci structured data 223292571586203
nci media 3722060947
nci other 1487663451545
nci document 4319625
nci code 153091
nci tabular 1706613439
nci database 1845321
nci gis 5048222
nci compressed 199872972633
aodn compressed 19083502139
aodn structured data 31601436156090
tpac structured data 13938651469677
tds-mel-csiro-au structured data 29481176582265
tds-mel-csiro-au other 41559436459
tds-mel-csiro-au web document 9921
csiro-dap other 715674349053648
csiro-dap media 8770813345531
csiro-dap structured data 12615134150391
csiro-dap tabular 62104688809
csiro-dap database 3724836003
csiro-dap gis 67384899496
csiro-dap document 7026165695
csiro-dap compressed 2044755226357
csiro-dap code 70773649
csiro-dap web document 134653859
<!DOCTYPE html>
<meta charset="utf-8">
<html>
<head>
<style>
rect.bordered {
stroke: #E6E6E6;
stroke-width:2px;
}
text.mono {
font-size: 9pt;
font-family: Consolas, courier;
fill: #aaa;
}
text.axis-workweek {
fill: #000;
}
text.axis-worktime {
fill: #000;
}
</style>
<script src="http://d3js.org/d3.v3.js"></script>
</head>
<body>
<div id="chart"></div>
<script type="text/javascript">
var margin = { top: 50, right: 0, bottom: 100, left: 300 },
width = 960 - margin.left - margin.right,
height = 630 - margin.top - margin.bottom,
gridSize = Math.floor(width / 24),
legendElementWidth = gridSize*2,
buckets = 9,
colors = ["#ffffd9","#edf8b1","#c7e9b4","#7fcdbb","#41b6c4","#1d91c0","#225ea8","#253494","#081d58"], // alternatively colorbrewer.YlGnBu[9]
source_list = ["aodn", "csiro-dap", "data-gov-au", "data-nsw-gov-au", "data-qld-gov-au", "data-sa-gov-au", "data-wa-gov-au", "nci", "nsw-oeh",
"nsw-seed", "ozflux", "tds-mel-csiro-au", "tpac", "www-data-vic-gov-au"]
formats = ["API or web service", "code", "compressed", "database", "document", "gis", "media",
"other","structured data", "tabular", "web document"];
formats_labels = ["API", "code", "z", "db", "doc", "gis", "media",
"otr","str", "csv", "web"];
datasets = ["data_all_bytes.tsv", "data2.tsv"];
var source_idx = {};
for(var i = 0; i < source_list.length; i++) {
source_idx[source_list[i]] = i;
}
var format_idx = {};
for(var i = 0; i < formats.length; i++) {
format_idx[formats[i]] = i;
}
var svg = d3.select("#chart").append("svg")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
var dayLabels = svg.selectAll(".dayLabel")
.data(source_list)
.enter().append("text")
.text(function (d) { return d; })
.attr("x", 0)
.attr("y", function (d, i) { return i * gridSize; })
.style("text-anchor", "end")
.attr("transform", "translate(-6," + gridSize / 1.5 + ")")
.attr("class", function (d, i) { return ((i >= 0 && i <= 4) ? "dayLabel mono axis axis-workweek" : "dayLabel mono axis"); });
var timeLabels = svg.selectAll(".timeLabel")
.data(formats_labels)
.enter().append("text")
.text(function(d) { return d; })
.attr("x", function(d, i) { return i * gridSize; })
.attr("y", 0)
.style("text-anchor", "middle")
.attr("transform", "translate(" + gridSize / 2 + ", -6)")
.attr("class", function(d, i) { return ((i >= 7 && i <= 16) ? "timeLabel mono axis axis-worktime" : "timeLabel mono axis"); });
var heatmapChart = function(tsvFile) {
d3.tsv(tsvFile,
function(d) {
return { //day, hour, value
source: d.source,
format:d.format,
bytes: d.bytes
};
},
function(error, data) {
//0, 1 mb, 100 mb, 1 gb, 50gb, 100 gb, 500gb, 1tb, 100tb
var domain = [0, 1000000, 100000000, 5000000000, 10000000000, 100000000000, 10000000000000];
var colorScale = d3.scale.quantile()
//.domain([0, buckets - 1, max])
.domain(domain)
.range(colors);
var cards = svg.selectAll(".hour")
.data(data, function(d) {
console.log(d);
var o = format_idx[d.format]+':'+source_idx[d.source];
return o;
});
cards.append("title");
cards.enter().append("rect")
.attr("x", function(d) { return (format_idx[d.format] ) * gridSize; })
.attr("y", function(d) { return (source_idx[d.source] ) * gridSize; })
.attr("rx", 4)
.attr("ry", 4)
.attr("class", "hour bordered")
.attr("width", gridSize)
.attr("height", gridSize)
.style("fill", colors[0]);
cards.transition().duration(1000)
.style("fill", function(d) { return colorScale(d.bytes); });
cards.select("title").text(function(d) { return d.bytes; });
cards.exit().remove();
var legend = svg.selectAll(".legend")
.data([0].concat(colorScale.quantiles()), function(d) { return d; });
legend.enter().append("g")
.attr("class", "legend");
legend.append("rect")
.attr("x", function(d, i) { return legendElementWidth * i; })
.attr("y", height)
.attr("width", legendElementWidth)
.attr("height", gridSize / 2)
.style("fill", function(d, i) { return colors[i]; });
legend.append("text")
.attr("class", "mono")
.text(function(d) { return "≥ " + Math.round(d); })
.attr("x", function(d, i) { return legendElementWidth * i; })
.attr("y", height + gridSize);
legend.exit().remove();
});
};
heatmapChart(datasets[0]);
var datasetpicker = d3.select("#dataset-picker").selectAll(".dataset-button")
.data(datasets);
datasetpicker.enter()
.append("input")
.attr("value", function(d){ return "Dataset " + d })
.attr("type", "button")
.attr("class", "dataset-button")
.on("click", function(d) {
heatmapChart(d);
});
</script>
</body>
</html>

Copyright (c) 2016, Tom May

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment