Skip to content

Instantly share code, notes, and snippets.

@phil-pedruco
Last active December 25, 2015 04:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save phil-pedruco/6917114 to your computer and use it in GitHub Desktop.
Save phil-pedruco/6917114 to your computer and use it in GitHub Desktop.
Histogram with Chauvenet's criterion for outliers

Trimming outliers using Chauvenet's criterion

This is an example of trimming outliers from datasets to allow the plotting of a histogram. The synthetic dataset in random.csv has had some obvious outliers added to it making it difficult to plot using d3's native histogram. Chauvenet's criterion has been used to trim the outliers allowing a 'better' representation of the data. This gist/bl.ock has been created in response to this Stack Overflow question.

To allow implementation of Chauvenet's criterion required use of Jason Davies science library and the histogram is based on Mike Bostocks example here.

<!DOCTYPE html>
<html>
<meta charset="utf-8">
<head>
<script src="http://d3js.org/d3.v3.min.js"></script>
<style type="text/css">
body {
font: 10px sans-serif;
}
.bar rect {
fill: steelblue;
shape-rendering: crispEdges;
}
.bar text {
fill: #fff;
}
.axis path, .axis line {
fill: none;
stroke: #000;
shape-rendering: crispEdges;
}
</style>
</head>
<body>
<div style="width: 500px; height; 500px; position: absolute; top: 150px; background-color: red"></div>
<script type="text/javascript">
d3.csv("random.csv", function(error, values){
var dataArray =[];
values.forEach(function (d,i) {
dataArray[i] = +d.x
});
// trim outliers
var trimmed = chauvenet(dataArray);
var margin = {top: 10, right: 30, bottom: 30, left: 30},
width = 960 - margin.left - margin.right,
height = 500 - margin.top - margin.bottom;
var x = d3.scale.linear()
.domain(d3.extent(trimmed))
.range([0, width]);
// Generate a histogram using twenty uniformly-spaced bins.
var data = d3.layout.histogram()
.bins(x.ticks(20))(trimmed);
var y = d3.scale.linear()
.domain([0, d3.max(data, function(d) { return d.y; })])
.range([height, 0]);
var xAxis = d3.svg.axis()
.scale(x)
.orient("bottom");
var svg = d3.select("body").append("svg")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
var bar = svg.selectAll(".bar")
.data(data)
.enter().append("g")
.attr("class", "bar")
.attr("transform", function(d) { return "translate(" + x(d.x) + "," + y(d.y) + ")"; });
bar.append("rect")
.attr("x", 1)
.attr("width", function (d,i) { return x(data[i].dx + d3.min(trimmed)) - 1; })
.attr("height", function(d) { return height - y(d.y); });
bar.append("text")
.attr("dy", ".75em")
.attr("y", 6)
.attr("x", x(data[0].dx + d3.min(trimmed)) / 2)
.attr("text-anchor", "middle")
.text(function(d) { return formatCount(d.y); });
svg.append("g")
.attr("class", "x axis")
.attr("transform", "translate(0," + height + ")")
.call(xAxis);
});
// Borrowed from Jason Davies science library https://github.com/jasondavies/science.js/blob/master/science.v1.js
variance = function(x) {
var n = x.length;
if (n < 1) return NaN;
if (n === 1) return 0;
var mean = d3.mean(x),
i = -1,
s = 0;
while (++i < n) {
var v = x[i] - mean;
s += v * v;
}
return s / (n - 1);
};
//A test for outliers http://en.wikipedia.org/wiki/Chauvenet%27s_criterion
function chauvenet (x) {
var dMax = 3;
var mean = d3.mean(x);
var stdv = Math.sqrt(variance(x));
var counter = 0;
var temp = [];
for (var i = 0; i < x.length; i++) {
if(dMax > (Math.abs(x[i] - mean))/stdv) {
temp[counter] = x[i];
counter = counter + 1;
}
};
return temp
}
</script>
</body>
</html>
We can make this file beautiful and searchable if this error is corrected: No commas found in this CSV file in line 0.
x
55.22872918
62.06107141
218.6159306
76.89994449
39.95259581
89.31852497
-152.9785345
-147.5462671
-21.26323462
-113.988547
-69.5292325
-115.7355137
37.46194887
-4.946364616
-18.42710516
98.31264034
8.068124365
-17.7064524
139.354596
20.48874698
63.75173991
-161.4241795
-230.1659372
-33.61646468
104.0555933
132.4407476
32.5776413
8.068003068
-72.1261667
11.2730802
82.39491174
-39.36963208
-144.9623079
-79.23078364
24.5250923
195.3338628
-168.1877359
154.7959738
-79.90806137
27.58321379
93.68146269
24.38658202
69.28333238
-26.77766239
-74.48712265
-104.9791523
36.96020962
-77.1930597
145.7244251
39.54712441
67.73255982
122.4446154
40.81623789
43.50804405
-124.8733906
105.3355072
170.4151717
18.96529934
-15.83163945
-94.13013883
-3.1516028
36.08094696
83.80607436
-49.51484559
88.32547696
26.99854418
-156.0262096
-96.98783521
-69.39407071
2.181610867
-59.09284516
-153.1328983
-61.1508252
-58.54565437
98.82336418
-117.4728405
-25.55930209
-72.00813941
-73.7176415
-15.97741156
-52.87610079
-77.65282511
219.7749068
-17.32552671
36.49630396
21.5072537
-207.3424867
115.2968561
-13.13446327
1.748867485
46.63748808
97.40518116
-3.488397861
-75.00501461
25.01962969
0.343681241
-224.8610764
122.2985042
21.71040491
112.842732
-46.45400983
189.1360086
-43.00686781
26.47075045
59.89195222
-73.12895649
-107.677944
47.13572749
56.4535208
39.65313367
-40.60292566
200.5748
-42.7495574
107.7945339
-135.7506298
94.0525289
-61.70583626
27.90884168
47.73111222
149.0422974
54.8327051
-61.33630463
-40.99768138
-154.6877357
1.413935263
-82.90848548
-148.5193494
-44.92279366
-78.70068778
-72.08453959
18.2043037
-46.82605513
-110.178964
-269.4026736
-103.5381962
-3.995232091
-86.55616738
-32.76292046
34.78084709
-14.50500124
39.91400299
-136.45914
-145.7340094
177.3778588
32.9203227
97.70593981
-48.18598244
-51.81611902
133.7484401
41.00497889
-49.07914815
88.91551969
-35.17774652
-12.98655606
131.2901322
118.790702
-50.5104658
20.65295118
226.1984258
135.4574763
165.4814329
-80.50619013
137.1260116
3.271760094
-86.56108059
86.97335739
-75.02534229
-22.36716325
91.98555434
-91.4884707
-25.82775667
181.9618829
-12.45335024
-50.8671866
21.19048381
9.93789596
-13.93910307
108.5141765
-65.2447688
3.109983838
-58.01961147
26.21225397
-58.61738312
-13.04722013
149.0513179
-64.16414777
163.6095523
186.9428722
0.787637333
30.84471857
25.7150887
-32.06584354
195.8639082
150.5394813
-10.25744268
-20.67219864
-161.036197
156.364561
-29.3993792
199.2641218
273.1930168
-21.82254044
122.4170871
-33.02340576
16.05299692
-21.27855928
-100.5107592
87.77326929
96.33272036
-36.1183841
19.29162889
-53.91244638
-80.25748309
-15.22186462
95.5736127
46.01910187
76.84094178
100.6108313
81.56343086
56.28782955
140.1294514
58.56755554
-28.53605687
-200.2635311
-120.2831472
5.513754173
-25.5497934
-70.83609671
5.694859332
-103.9389193
-55.45692041
-61.90248492
-132.7388927
20.02089319
171.53692
-245.290166
116.7907795
-119.3910258
98.15436212
22.46579952
83.76477547
0.954980834
-3.981713853
32.32166183
-67.86820722
73.94523737
-26.9791269
-51.19829499
-58.21223743
234.1618412
34.25938626
-27.13047784
80.98888139
1.306266305
65.68539182
-42.91335019
-136.3500856
-80.46994684
-43.20112251
-108.8349947
50.52497103
-57.95989086
-46.52057436
-367.3707606
-198.3254423
-90.25019249
-9.627265228
-14.89106449
52.97373479
-18.06922004
20.01788035
-2.251403616
-51.95948759
5.273602906
82.32546663
22.4690537
-248.6118452
37.58504676
-41.07422425
46.47742795
76.08125329
-10.89278666
-38.30922117
-6.051665063
-77.60064548
25.2329304
-53.05168945
-35.33861191
17.46446313
64.50661988
16.56699614
-17.269669
-23.60628662
11.81175902
54.18692239
-220.0286596
-23.75907688
137.6904342
123.2443749
94.17066331
-17.0734232
0.538928408
77.00816628
-43.2388352
59.973704
-7.015110671
-224.3542456
-32.57172531
155.2406355
-260.6176376
1.019221935
-34.50881628
55.73423028
31.38350448
-61.24462016
-82.99060808
30.66490482
52.09519162
75.14731409
25.90662928
-10.9283466
-153.4712378
9.471456317
98.16639212
-318.8548699
78.52246441
-32.46893468
-20.12512696
-72.37067698
-29.37927609
10.33795332
50.41079026
-28.61011302
12.80511011
238.7708935
207.7219013
38.14260105
-76.6526391
175.0824336
-94.83750979
-102.2641101
160.7730997
-49.35455123
-12.46539068
197.8007301
223.3475782
115.1469257
102.1426214
9.978198077
-153.8401068
113.3312325
-72.98368526
-82.19847158
-62.84077803
-14.25575217
-25.69068297
226.0312002
8.487165584
189.8593805
28.97408822
-3.68214999
159.2039756
-11.52460959
-28.8938405
50.40152071
74.71144011
-109.7858235
3.472299712
-229.5804621
128.1486195
-158.4268611
-41.62803576
-111.6967882
-52.6426125
-52.64230721
-111.4909292
145.8657088
44.05289405
58.00411995
109.0768999
1.308240338
26.35171758
70.74475139
31.78995469
-68.99269907
56.06507047
223.2178096
92.239296
57.38396095
11.51743587
-141.2167515
-24.25987663
-2.72847525
32.8934318
131.8416872
158.0267646
-92.22343035
-110.8226465
202.4400413
-0.422966098
31.45333048
-66.2961268
15.66788351
40.58513248
93.71781742
-94.92254564
-64.23970771
15.87132169
-21.16741302
-46.80659802
74.28630537
-227.7625541
-83.63763583
90.89025103
-189.4880811
-100.6948646
-45.10036222
-44.0729326
187.6133137
52.35278485
-78.47925037
12.06260298
-131.881929
83.64051111
59.83987633
-117.8688427
91.81917678
326.4019328
64.50811347
20.86262886
-75.25405658
-128.1368445
-92.59558734
25.96313958
-18.34030676
-95.54112872
-44.45108961
-5.568880911
79.3510768
6.94720677
-34.59917991
-135.4730549
-27.86771167
9.666268575
38.06769182
-46.03562238
-99.20381864
34.57641195
6.568487936
228.9209535
56.81633973
100.865149
-39.53457888
-13.26479106
5.277665972
193.5443385
-62.33129273
200.0914156
157.7230113
119.4112108
61.93716034
-125.3465501
37.04001244
-156.6003205
-165.7543452
-114.6307435
-114.7464445
184.64894
-100.0047783
72.57002792
44.79667604
8.074708276
106.0026381
28.10459128
111.7037675
-32.17925074
-165.3702087
-80.50308027
92.8519696
-80.93203085
103.8301273
-33.97832075
73.84835446
-239.1664633
-71.19377884
51.84940724
217.8016521
-73.28134644
-201.7512755
202.8299048
98.13473315
-224.695976
-41.26063827
-41.04865438
-128.3195428
-81.42403657
-119.6983616
-185.349762
152.4879471
-86.41801722
-113.9770965
126.0127671
159.1127396
107.4773143
-78.14195354
114.9139844
-57.53988901
69.8302279
-29.57761285
180.3310922
235.0657008
-188.2285732
-159.9285569
165.048651
55.34368803
-42.11667604
112.6968526
145.6370943
-84.47825007
-63.8111511
33.04741077
-33.87053867
-3.715560058
-26.50835757
50.91032328
-146.8491656
-31.79356366
-50.14682472
72.8127187
20.99793256
-162.7418384
-145.1649612
-54.95319988
106.7282712
75.06653372
-24.87272606
56.59167035
79.69259555
14.40159887
43.63580524
174.9520435
-86.85111986
-135.3896895
89.64149593
-8.556358265
-93.85902233
-88.6129329
-17.38493831
217.0109237
57.25359364
24.39892061
-2.243016334
24.78684453
-1.886456375
-209.3725854
-85.4501052
-70.84143004
28.31037443
-91.78270198
-52.62254116
45.09117066
-38.40109147
9.040163706
-119.3503039
-52.9769456
-61.82213397
-36.09615746
71.96811849
24.49139883
3.861949822
-32.09698361
-60.88904215
-76.36483114
-105.7603654
30.84218776
-5.576315271
-116.3155628
169.6701658
122.9366247
79.11108103
26.33072072
270.182294
113.6325073
73.33789314
-11.59394392
-115.5825028
46.88874685
15.82401112
51.35406803
-83.51164823
106.4805333
32.16043639
148.8469326
-5.064818542
-198.4710042
77.13311858
-105.3125329
-29.7397614
116.1582428
-236.0415417
96.40226404
11.84276387
40.56203063
93.509375
-121.8202245
-0.596617024
135.4091944
-128.6727838
137.8080732
99.5965063
0.793508054
43.8345484
0.785312591
-20.35658191
43.1752149
12.51781859
47.59465493
87.7776365
126.9820712
-31.91121552
-51.24454714
-38.19148204
-172.5616577
-41.47260696
-2.504750458
-174.2986517
-78.74433899
6.277436073
-19.1221576
-7.620341541
-114.551178
-96.52343138
-68.0127877
-12.60007733
21.71376111
-95.61160998
36.44323886
45.643751
5.795539322
24.35571368
-81.42858995
-101.2333623
-63.8540799
197.2511144
2.813255427
4.558519452
-253.6524773
-22.91911336
18.53970324
36.6138368
78.36018671
5.100441208
135.0589977
8.361105284
28.86799035
-6.642248156
-24.35365855
-76.31304084
37.2779792
21.31324014
-139.906941
15.60093864
46.17469978
81.73971414
-8.41013129
149.4759105
-141.6874776
15.59003943
15.43108126
-149.4209135
-10.80319391
-51.18421728
10.71132134
29.01696997
-112.0366515
138.9185041
63.27330403
-130.0150507
18.64009298
-67.94141034
-108.1930322
-67.05306614
-152.3337012
-19.23499474
2.956310507
-150.8354081
-122.9408564
9.382015847
5.000506707
-137.2346675
-107.2673454
-63.53812646
-176.4207141
47.54247226
162.3609959
-39.21721215
-20.87897697
-67.30060552
43.7301666
-16.01302628
-61.94836575
71.76628436
32.75455587
-4.796003976
-58.97063407
-11.42496424
60.54849074
93.1568442
33.93852706
-95.25421678
89.32467756
-114.7106705
0.900481135
-269.2518354
-58.88093298
-141.5521133
11.67575985
39.99531907
8.148295106
-63.64862302
206.5079971
5.934915609
-43.3240742
19.56650342
153.4377968
29.4572422
95.9916921
-134.0617621
80.09785595
137.2838116
199.4508078
-56.85991387
118.4729343
4.441493964
-110.1680514
-197.8595805
65.28163394
125.1436258
224.9011003
35.56087314
-73.3967614
-70.00141347
-27.0724567
-42.80204467
-185.7342348
-108.2156471
211.9611621
-169.1082601
14.88467342
-84.81183293
-100.4611491
192.2079678
142.046228
77.42416983
-15.72458432
-51.8066751
29.2715255
52.57079342
151.5763483
-118.5893843
-30.5923252
-115.0506139
-79.6425379
-90.44141801
97.75611822
-32.88011968
-62.70909543
-95.39760799
216.8213965
82.4788366
-109.925017
-65.16232939
-14.80607848
-120.1789306
133.4181638
12.26146302
-6.465502805
-197.8848904
-153.9689725
-151.9024915
-11.33412174
-109.0022912
4.449058183
24.53611605
92.51711159
-11.06340757
-138.3970922
-149.9003872
-102.1616877
-112.624306
50.37979885
21.03001346
-19.04130949
40.57930517
97.71522633
-42.24655837
-174.3817587
-23.88282371
202.7361745
136.4767484
82.58075806
34.12087554
-143.8613037
54.81126434
-9.156897835
10.32413569
-8.773572819
102.0601728
-119.936344
38.53962012
2.987410637
-229.5759264
13.37644245
3.722380813
-73.69526977
88.00598039
57.22746163
-3.121206957
1.378785979
8.024108454
57.53568316
-1.374908756
-71.23196993
-36.77794731
110.3515961
47.21158969
116.8209072
-20.64179458
-111.5131625
25.07635565
88.46600156
-190.4376348
-194.0782797
21.22183716
56.41365124
-86.36902015
152.6766126
27.39637103
223.9021338
114.8508937
254.0943389
-80.60430166
3.355064636
-256.8237899
-35.02287501
-83.84535331
-63.45107497
-330.606526
-22.26017422
14.18406916
-32.00804353
-268.77295
-0.117843986
49.18872908
42.69987804
95.23387587
33.83285743
-6.562548777
25.34916583
-101.4366051
-143.3215398
-91.03926598
-87.87167529
-72.85219832
-19.94269839
105.2454062
-142.5486746
-1.35226106
-45.63569193
221.6381169
-89.75253485
-63.99164138
-126.0053155
-105.3004154
60.12819683
161.8996367
98.03146572
207.0776292
88.07724722
-37.73109808
83.09315198
-182.7121581
69.95721191
-229.7696753
-3.351035699
11.97238121
22.29198482
-4.633156336
97.34375403
28.55767231
34.55720216
-111.4466037
26.62831657
-172.9415873
20.80038365
-51.09269639
-9.938322467
-128.472707
83.33234304
16.33172823
-32.21417759
-256.7317498
61.21725501
20.42245328
11.50235128
85.0329371
48.1380095
140.0339334
109.2616659
-96.21825006
-5.291989988
8.053102431
42.99418529
-14.83502332
16.64530161
-25.84917643
56.09179199
42.08503736
-69.71897401
118.1835376
51.22189864
176.0447157
-11.0736391
70.41035736
-99.06733642
-182.6466419
172.7181873
146.7999111
-170.35166
-201.2233948
89.99916655
-6.163018201
-12.06010445
56.36827796
-98.52621834
102.716456
-84.30356398
-6.214207976
38.06998122
41.37852544
-36.98298186
127.2424046
56.58420106
168.7055596
25.62650676
-120.3157219
105.2439122
52.90719458
-85.44451851
82.4393308
68.60925946
119.2824959
-95.43410241
-26.28377841
-14.40495038
-57.82208856
148.53657
62.6458672
-90.7676807
-39.89264667
144.7426966
36.34162891
6.732798052
53.22746988
117.408627
-188.8416385
-34.20204818
3.499896695
8.780872041
-90.31943463
59.4275078
263.5482078
71.12645229
42.7992436
-151.4698735
-14.072094
-41.27096324
-112.2954473
118.827939
87.72805585
24.77930143
14.6863519
24.48832211
-7.137374986
-108.2712973
205.1723112
50.65794772
10.05490567
118.3076784
15.67794566
-5.171719632
-12.6402952
156.7811498
-189.9647156
-190.2880472
-16.11202666
76.37366326
131.282801
-57.46294025
10000
-20000
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment