Skip to content

Commit 70b3a5e

Browse files
authored
Issue-13: "p-values for T-test are not accurate enough" (#19)
* enumerable: Replace stdev, variance and mean methods. This change removes the dependency we have with descriptive_statistics gem. * ruby-statistics: Remove gem dependency 🎉 This change removes the descriptive_statistics gem, which contains a different stdev/variance implementation and it's causing wrong p_value calculations in statistical tests implemented. * specs: Compare p_values without worries! :shipit: * version: 2.0.1
1 parent 33d75d8 commit 70b3a5e

File tree

7 files changed

+49
-16
lines changed

7 files changed

+49
-16
lines changed

lib/enumerable.rb

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# TODO: Avoid monkey-patching.
2+
module Enumerable
3+
def mean
4+
self.reduce(:+) / self.length.to_f
5+
end
6+
7+
def variance
8+
mean = self.mean
9+
self.reduce(0) { |memo, value| memo + ((value - mean) ** 2) } / (self.length - 1).to_f
10+
end
11+
12+
def standard_deviation
13+
Math.sqrt(self.variance)
14+
end
15+
end

lib/statistics.rb

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
require 'descriptive_statistics'
2-
1+
require File.dirname(__FILE__) + '/enumerable'
32
require File.dirname(__FILE__) + '/math'
43
Dir[ File.dirname(__FILE__) + '/statistics/**/*.rb'].each {|file| require file }
54

lib/statistics/version.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
module Statistics
2-
VERSION = "2.0.0"
2+
VERSION = "2.0.1"
33
end

ruby-statistics.gemspec

-1
Original file line numberDiff line numberDiff line change
@@ -32,5 +32,4 @@ Gem::Specification.new do |spec|
3232
spec.add_development_dependency "rspec", '>= 3.6.0'
3333
spec.add_development_dependency "grb", '0.4.1'
3434
spec.add_development_dependency 'byebug', '9.1.0'
35-
spec.add_dependency 'descriptive_statistics', '2.5.1'
3635
end

spec/statistics/enumerable_spec.rb

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
require 'spec_helper'
2+
3+
describe Enumerable do
4+
describe '#mean' do
5+
it 'calculates the mean of an specific collection' do
6+
expect((1..5).to_a.mean).to eq 3.0
7+
expect((1..10).to_a.mean).to eq 5.5
8+
expect((-10..-5).to_a.mean).to eq -7.5
9+
end
10+
end
11+
12+
describe '#variance' do
13+
it 'calculates the *sample* variance of an specific collection' do
14+
expect((1..5).to_a.variance).to eq 2.5
15+
expect((1..10).to_a.variance).to eq 9.166666666666666
16+
expect((-10..-5).to_a.variance).to eq 3.5
17+
end
18+
end
19+
20+
describe '#standard_deviation' do
21+
it 'calcultes the *sample* standard deviation of an specific collection' do
22+
expect((1..5).to_a.standard_deviation).to eq 1.5811388300841898
23+
expect((1..10).to_a.standard_deviation).to eq 3.0276503540974917
24+
expect((-10..-5).to_a.standard_deviation).to eq 1.8708286933869707
25+
end
26+
end
27+
end

spec/statistics/statistical_test/t_test_spec.rb

+3-12
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,7 @@
8585

8686
result = described_class.paired_test(alpha, :one_tail, before, after)
8787

88-
# https://github.com/estebanz01/ruby-statistics/issues/13
89-
# In the example, the p-value for one-side is 0.0006 but we have 0.0004
90-
# Apparently, it's how ruby handles and calculates the float points
91-
# expect(result[:p_value]).to eq 0.0006
88+
expect(result[:p_value].round(4)).to eq 0.0006
9289
expect(result[:null]).to be false
9390
expect(result[:alternative]).to be true
9491
end
@@ -100,10 +97,7 @@
10097

10198
result = described_class.paired_test(alpha, :two_tail, before, after)
10299

103-
# https://github.com/estebanz01/ruby-statistics/issues/13
104-
# In the example, the p-value for one-side is 0.0012 but we have 0.0008
105-
# Apparently, it's how ruby handles and calculates the float points
106-
# expect(result[:p_value].round(4)).to eq 0.0012
100+
expect(result[:p_value].round(4)).to eq 0.0012
107101
expect(result[:null]).to be false
108102
expect(result[:alternative]).to be true
109103
end
@@ -123,10 +117,7 @@
123117

124118
result = described_class.paired_test(alpha, :two_tail, five_mts, ten_mts)
125119

126-
# https://github.com/estebanz01/ruby-statistics/issues/13
127-
# In the example, the p-value for one-side is 0.0026 but we have 0.0024
128-
# Apparently, it's how ruby handles and calculates the float points
129-
# expect(result[:p_value].round(3)).to eq 0.026
120+
expect(result[:p_value].round(3)).to eq 0.026
130121
expect(result[:null]).to be false
131122
expect(result[:alternative]).to be true
132123
end

spec/statistics/statistical_test/wilcoxon_rank_sum_test_spec.rb

+2
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
expect(result[:z].round(3)).to eq -0.186
7171
expect(result[:null]).to be true
7272
expect(result[:alternative]).to be false
73+
expect(result[:p_value]).to eq 0.8525013990549617
7374
end
7475

7576
it 'performs a wilcoxon rank sum/Mann-Whitney U test following example THREE' do
@@ -82,6 +83,7 @@
8283
expect(result[:z].round(3)).to eq -2.988
8384
expect(result[:null]).to be false
8485
expect(result[:alternative]).to be true
86+
expect(result[:p_value]).to eq 0.002808806689028387
8587
end
8688
end
8789
end

0 commit comments

Comments
 (0)