Skip to content

Instantly share code, notes, and snippets.

@eesur
Last active January 26, 2018 10:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save eesur/753e793903dad7aaa4c3 to your computer and use it in GitHub Desktop.
Save eesur/753e793903dad7aaa4c3 to your computer and use it in GitHub Desktop.
Sparse and missing data

Sparse and missing data

Sparse data can be really tricky to represent when dealing with real time data —especially as you don't know when it will be sparse or dense, or even both in one representation.

These examples look at views with the same data. A chart showing reading per minute but the dataset doesn't have values for each minute; they would be zero.

This example takes from this super article Padding for zero values but uses the difference of the start and end time to create an array to map to, instead of using the ticks()

The charts are rendered using the awesome [D3 Financial Components (D3FC)](https://github.com/ScottLogic/d3-financial-components`

(function(d3, fc) {
'use strict';
fc.series.table = function() {
var columns;
var tabulate = function(selection) {
selection.each(function(data) {
var table = d3.select(this).append('table').attr('class', 'pure-table' ),
thead = table.append('thead'),
tbody = table.append('tbody');
// append header row
thead.append('tr')
.selectAll('th')
.data(columns)
.enter()
.append('th')
.text(function(column) {
return column;
});
// table rows
var rows = tbody.selectAll('tr')
.data(data)
.enter()
.append('tr');
// table cells
var cells = rows.selectAll('td')
.data(function(row) {
return columns.map(function(column) {
return {
column: column,
value: row[column]
};
});
})
.enter()
.append('td')
.text(function(d) {
return d.value;
});
return table;
});
};
tabulate.columns = function(value) {
if (!arguments.length) return columns;
columns = value;
return this;
};
return tabulate;
};
}(d3, fc));
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<title>Monitoring Components | FC testing</title>
<meta name="viewport" content="width=device-width, initial-scale=1">
<script src="//cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min.js"></script>
<!-- https://github.com/ScottLogic/d3-financial-components -->
<script src="http://dev.eesur.com/d3fc/Layout.js"></script>
<script src="http://dev.eesur.com/d3fc/d3-financial-components.min.js"></script>
<link rel="stylesheet" href="http://yui.yahooapis.com/pure/0.6.0/pure-min.css">
<!-- <link rel="stylesheet" href="http://yui.yahooapis.com/pure/0.6.0/grids-responsive-min.css"> -->
<link href="http://dev.eesur.com/d3fc/d3-financial-components.min.css" rel="stylesheet"/>
<script type="text/javascript" src="//cdnjs.cloudflare.com/ajax/libs/moment.js/2.10.3/moment.min.js"></script>
<script type="text/javascript" src="//cdnjs.cloudflare.com/ajax/libs/lodash.js/3.9.0/lodash.min.js"></script>
<style type="text/css">
body{
padding: 20px;
}
section {
margin-bottom: 20px;
}
p {
font-size: 11px;
letter-spacing: 1px;
font-family: sans-serif;
line-height: 150%;
}
.gridline {
opacity: 0.4;
}
</style>
</head>
<body>
<section id="chart-one">
<p>normal chart, that doesn't change the data; suggesting connections/values where there shouldn't be any ↓</p>
</section>
<section id="chart-two">
<p>function that inserts missing zero values; better, but the drama is misleading ↓</p>
</section>
<section id="chart-three">
<p>Using NaN for values, and line.defined; as the values are often not next to each other, we don't get to make a path ↓</p>
</section>
<section id="chart-four">
<p>Using points to view the data ↓</p>
</section>
<section id="table">
<p>Table to see the actual data ↓</p>
</section>
<script type="text/javascript">
// function that inserts missing zero values
(function valuesAdded() {
// variables
var data,
parseDate = d3.time.format('%Y-%m-%d %H:%M:%S').parse,
formatDate = d3.time.format("%H:%M:%S");
// JSON callback
d3.json("sparse_test.json", function(error, json) {
if (error) return console.warn(error);
data = json;
var orginalData = data.data,
newData = [];
// need reference for start and end date
// note: won't be first and last item in array as missing values
var startRef = data.start_date;
var endRef = data.end_date;
var duration = moment.utc(moment(endRef,"YYYY/MM/DD/ HH:mm:ss").diff(moment(startRef,"YYYY/MM/DD/ HH:mm:ss"))).format("HH:mm:ss");
if (orginalData.length > duration.length) {
var duration = moment.utc(moment(endRef,"YYYY/MM/DD/ HH:mm:ss").diff(moment(startRef,"YYYY/MM/DD/ HH:mm:ss"))).format("HH:mm:ss");
// need the minutes to create an array to map to and inset missing 0 values
var durationMinutes = moment.duration(duration).asMinutes();
// console.log('durationMinutes:' + '\n' + durationMinutes);
var durationMinutesArray = [];
for (var i=0; i<=durationMinutes; i++) {
durationMinutesArray.push(moment(startRef).add(i, 'minutes').format('YYYY-MM-DD HH:mm:ss'));
}
console.log('durationMinutesArray:' + '\n' + durationMinutesArray);
var newData = durationMinutesArray
.map(function(_value) {
return _.find(orginalData,
{ date: _value }) ||
{ date: _value, total: NaN };
});
// console.log('newData:' + '\n' + newData);
} else {
newData = orginalData;
}
dataFormat(newData);
renderChart(newData);
});
// Clean/format data
function dataFormat(data) {
data.forEach(function(d) {
d.date = parseDate(d.date);
d.total = +d.total;
});
}
function renderChart(data) {
var chart = fc.charts.linearTimeSeries()
.xDomain(d3.extent(data, function(d) { return d.date; }))
.xNice()
.xTicks(d3.time.minutes, 10)
// .tickFormat(timeFormatter)
.yDomain(fc.utilities.extent(data, 'total'))
.yNice()
.yTicks(5);
// Create the gridlines
var gridlines = fc.scale.gridlines();
// Create the line series
var line = fc.series.line()
.yValue(function(d) { return d.total; });
var multi = fc.series.multi()
.series([gridlines, line]);
chart.plotArea(multi);
d3.select('#chart-three')
.append('svg')
.style({
height: '300px',
width: '900px'
})
.datum(data)
.call(chart);
}
}());
/* ______ ______ ______ ______ ______ ______ ______ ______
|______|______|______|______|______|______|______|______|
*/
// function that inserts missing zero values
(function valuesAdded() {
// variables
var data,
parseDate = d3.time.format('%Y-%m-%d %H:%M:%S').parse,
formatDate = d3.time.format("%H:%M:%S");
// JSON callback
d3.json("sparse_test.json", function(error, json) {
if (error) return console.warn(error);
data = json;
var orginalData = data.data,
newData = [];
// need reference for start and end date
// note: won't be first and last item in array as missing values
var startRef = data.start_date;
var endRef = data.end_date;
var duration = moment.utc(moment(endRef,"YYYY/MM/DD/ HH:mm:ss").diff(moment(startRef,"YYYY/MM/DD/ HH:mm:ss"))).format("HH:mm:ss");
if (orginalData.length > duration.length) {
var duration = moment.utc(moment(endRef,"YYYY/MM/DD/ HH:mm:ss").diff(moment(startRef,"YYYY/MM/DD/ HH:mm:ss"))).format("HH:mm:ss");
// need the minutes to create an array to map to and inset missing 0 values
var durationMinutes = moment.duration(duration).asMinutes();
// console.log('durationMinutes:' + '\n' + durationMinutes);
var durationMinutesArray = [];
for (var i=0; i<=durationMinutes; i++) {
durationMinutesArray.push(moment(startRef).add(i, 'minutes').format('YYYY-MM-DD HH:mm:ss'));
}
console.log('durationMinutesArray:' + '\n' + durationMinutesArray);
var newData = durationMinutesArray
.map(function(_value) {
return _.find(orginalData,
{ date: _value }) ||
{ date: _value, total: 0 };
});
// console.log('newData:' + '\n' + newData);
} else {
newData = orginalData;
}
dataFormat(newData);
renderChart(newData);
});
// Clean/format data
function dataFormat(data) {
data.forEach(function(d) {
d.date = parseDate(d.date);
d.total = +d.total;
});
}
function renderChart(data) {
var chart = fc.charts.linearTimeSeries()
.xDomain(d3.extent(data, function(d) { return d.date; }))
.xNice()
.xTicks(d3.time.minutes, 10)
// .tickFormat(timeFormatter)
.yDomain(fc.utilities.extent(data, 'total'))
.yNice()
.yTicks(5);
// Create the gridlines
var gridlines = fc.scale.gridlines();
// Create the line series
var line = fc.series.line()
.yValue(function(d) { return d.total; });
var multi = fc.series.multi()
.series([gridlines, line]);
chart.plotArea(multi);
d3.select('#chart-two')
.append('svg')
.style({
height: '300px',
width: '900px'
})
.datum(data)
.call(chart);
}
}());
/* ______ ______ ______ ______ ______ ______ ______ ______
|______|______|______|______|______|______|______|______|
*/
// normal chart, that doesn't change the data
(function missingZeroValues() {
// variables
var data,
parseDate = d3.time.format('%Y-%m-%d %H:%M:%S').parse,
formatDate = d3.time.format("%H:%M:%S");
// JSON callback
d3.json("sparse_test.json", function(error, json) {
if (error) return console.warn(error);
data = json;
dataFormat(data.data);
renderChart(data.data);
});
// Clean/format data
function dataFormat(data) {
data.forEach(function(d) {
d.date = parseDate(d.date);
d.total = +d.total;
});
}
function renderChart(data) {
var chart = fc.charts.linearTimeSeries()
.xDomain(d3.extent(data, function(d) { return d.date; }))
.xNice()
.xTicks(d3.time.minutes, 10)
.yDomain(fc.utilities.extent(data, 'total'))
.yNice()
.yTicks(5);
// Create the gridlines
var gridlines = fc.scale.gridlines();
// Create the line series
var line = fc.series.line()
.yValue(function(d) { return d.total; });
var multi = fc.series.multi()
.series([gridlines, line]);
chart.plotArea(multi);
d3.select('#chart-one')
.append('svg')
.style({
height: '300px',
width: '900px'
})
.datum(data)
.call(chart);
}
}());
/* ______ ______ ______ ______ ______ ______ ______ ______
|______|______|______|______|______|______|______|______|
*/
// data as points
(function missingZeroValues() {
// variables
var data,
parseDate = d3.time.format('%Y-%m-%d %H:%M:%S').parse,
formatDate = d3.time.format("%H:%M:%S");
// JSON callback
d3.json("sparse_test.json", function(error, json) {
if (error) return console.warn(error);
data = json;
dataFormat(data.data);
renderChart(data.data);
});
// Clean/format data
function dataFormat(data) {
data.forEach(function(d) {
d.date = parseDate(d.date);
d.total = +d.total;
});
}
function renderChart(data) {
var chart = fc.charts.linearTimeSeries()
.xDomain(d3.extent(data, function(d) { return d.date; }))
.xNice()
.xTicks(d3.time.minutes, 10)
.yDomain(fc.utilities.extent(data, 'total'))
.yNice()
.yTicks(5);
// Create the gridlines
var gridlines = fc.scale.gridlines();
// Create the line series
var points = fc.series.point()
.yValue(function(d) { return d.total; });
var multi = fc.series.multi()
.series([gridlines, points]);
chart.plotArea(multi);
d3.select('#chart-four')
.append('svg')
.style({
height: '300px',
width: '900px'
})
.datum(data)
.call(chart);
}
}());
</script>
<!-- ______ ______ ______ ______ ______ ______ ______ ______
|______|______|______|______|______|______|______|______|
-->
<script src="fc_table.js"></script> <!-- custom component so can view table -->
<script>
// view data in table for reference
(function(d3, fc) {
'use strict';
// variables
var data,
parseDate = d3.time.format('%Y-%m-%d %H:%M:%S').parse,
formatDate = d3.time.format("%H:%M:%S");
// Clean/format data
function dataFormat(data) {
data.forEach(function(d) {
d.date = formatDate(parseDate(d.date));
d.total = +d.total; // ensure numeric value
d.avg = d3.format(".4f")(d.avg);
});
}
d3.json("sparse_test.json", function(error, json) {
if (error) return console.warn(error);
data = json;
dataFormat(data.data);
renderChart(data.data);
});
function renderChart(data) {
var chart = d3.select('#table');
// Create the table
var table = fc.series.table()
.columns(['date', 'total']); // or use d3.keys
chart
.datum(data)
.call(table);
}
})(d3, fc);
</script>
</body>
</html>
{
"data":[
{
"total":"76",
"date":"2015-05-15 09:09:00"
},
{
"date":"2015-05-15 09:23:00",
"total":"1"
},
{
"total":"72",
"date":"2015-05-15 09:27:00"
},
{
"date":"2015-05-15 09:44:00",
"total":"35"
},
{
"date":"2015-05-15 10:10:00",
"total":"88"
},
{
"total":"1",
"date":"2015-05-15 10:26:00"
},
{
"total":"22",
"date":"2015-05-15 10:59:00"
},
{
"date":"2015-05-15 11:11:00",
"total":"69"
},
{
"total":"2",
"date":"2015-05-15 11:12:00"
},
{
"date":"2015-05-15 11:19:00",
"total":"1"
},
{
"total":"1",
"date":"2015-05-15 11:22:00"
}
],
"end_date":"2015-05-15 11:33:00",
"start_date":"2015-05-15 09:00:00"
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment