Skip to content

Commit 120729d

Browse files
authored
Minor code beautifications in statistics.py (gh-124866)
1 parent 04bfea2 commit 120729d

File tree

1 file changed

+95
-85
lines changed

1 file changed

+95
-85
lines changed

Lib/statistics.py

+95-85
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,7 @@ def count_positive(iterable):
248248
found_zero = True
249249
else:
250250
raise StatisticsError('No negative inputs allowed', x)
251+
251252
total = fsum(map(log, count_positive(data)))
252253

253254
if not n:
@@ -710,6 +711,7 @@ def correlation(x, y, /, *, method='linear'):
710711
start = (n - 1) / -2 # Center rankings around zero
711712
x = _rank(x, start=start)
712713
y = _rank(y, start=start)
714+
713715
else:
714716
xbar = fsum(x) / n
715717
ybar = fsum(y) / n
@@ -1213,91 +1215,6 @@ def quantiles(data, *, n=4, method='exclusive'):
12131215

12141216
## Normal Distribution #####################################################
12151217

1216-
def _normal_dist_inv_cdf(p, mu, sigma):
1217-
# There is no closed-form solution to the inverse CDF for the normal
1218-
# distribution, so we use a rational approximation instead:
1219-
# Wichura, M.J. (1988). "Algorithm AS241: The Percentage Points of the
1220-
# Normal Distribution". Applied Statistics. Blackwell Publishing. 37
1221-
# (3): 477–484. doi:10.2307/2347330. JSTOR 2347330.
1222-
q = p - 0.5
1223-
1224-
if fabs(q) <= 0.425:
1225-
r = 0.180625 - q * q
1226-
# Hash sum: 55.88319_28806_14901_4439
1227-
num = (((((((2.50908_09287_30122_6727e+3 * r +
1228-
3.34305_75583_58812_8105e+4) * r +
1229-
6.72657_70927_00870_0853e+4) * r +
1230-
4.59219_53931_54987_1457e+4) * r +
1231-
1.37316_93765_50946_1125e+4) * r +
1232-
1.97159_09503_06551_4427e+3) * r +
1233-
1.33141_66789_17843_7745e+2) * r +
1234-
3.38713_28727_96366_6080e+0) * q
1235-
den = (((((((5.22649_52788_52854_5610e+3 * r +
1236-
2.87290_85735_72194_2674e+4) * r +
1237-
3.93078_95800_09271_0610e+4) * r +
1238-
2.12137_94301_58659_5867e+4) * r +
1239-
5.39419_60214_24751_1077e+3) * r +
1240-
6.87187_00749_20579_0830e+2) * r +
1241-
4.23133_30701_60091_1252e+1) * r +
1242-
1.0)
1243-
x = num / den
1244-
return mu + (x * sigma)
1245-
1246-
r = p if q <= 0.0 else 1.0 - p
1247-
r = sqrt(-log(r))
1248-
if r <= 5.0:
1249-
r = r - 1.6
1250-
# Hash sum: 49.33206_50330_16102_89036
1251-
num = (((((((7.74545_01427_83414_07640e-4 * r +
1252-
2.27238_44989_26918_45833e-2) * r +
1253-
2.41780_72517_74506_11770e-1) * r +
1254-
1.27045_82524_52368_38258e+0) * r +
1255-
3.64784_83247_63204_60504e+0) * r +
1256-
5.76949_72214_60691_40550e+0) * r +
1257-
4.63033_78461_56545_29590e+0) * r +
1258-
1.42343_71107_49683_57734e+0)
1259-
den = (((((((1.05075_00716_44416_84324e-9 * r +
1260-
5.47593_80849_95344_94600e-4) * r +
1261-
1.51986_66563_61645_71966e-2) * r +
1262-
1.48103_97642_74800_74590e-1) * r +
1263-
6.89767_33498_51000_04550e-1) * r +
1264-
1.67638_48301_83803_84940e+0) * r +
1265-
2.05319_16266_37758_82187e+0) * r +
1266-
1.0)
1267-
else:
1268-
r = r - 5.0
1269-
# Hash sum: 47.52583_31754_92896_71629
1270-
num = (((((((2.01033_43992_92288_13265e-7 * r +
1271-
2.71155_55687_43487_57815e-5) * r +
1272-
1.24266_09473_88078_43860e-3) * r +
1273-
2.65321_89526_57612_30930e-2) * r +
1274-
2.96560_57182_85048_91230e-1) * r +
1275-
1.78482_65399_17291_33580e+0) * r +
1276-
5.46378_49111_64114_36990e+0) * r +
1277-
6.65790_46435_01103_77720e+0)
1278-
den = (((((((2.04426_31033_89939_78564e-15 * r +
1279-
1.42151_17583_16445_88870e-7) * r +
1280-
1.84631_83175_10054_68180e-5) * r +
1281-
7.86869_13114_56132_59100e-4) * r +
1282-
1.48753_61290_85061_48525e-2) * r +
1283-
1.36929_88092_27358_05310e-1) * r +
1284-
5.99832_20655_58879_37690e-1) * r +
1285-
1.0)
1286-
1287-
x = num / den
1288-
if q < 0.0:
1289-
x = -x
1290-
1291-
return mu + (x * sigma)
1292-
1293-
1294-
# If available, use C implementation
1295-
try:
1296-
from _statistics import _normal_dist_inv_cdf
1297-
except ImportError:
1298-
pass
1299-
1300-
13011218
class NormalDist:
13021219
"Normal distribution of a random variable"
13031220
# https://en.wikipedia.org/wiki/Normal_distribution
@@ -1561,11 +1478,13 @@ def _sum(data):
15611478
types_add = types.add
15621479
partials = {}
15631480
partials_get = partials.get
1481+
15641482
for typ, values in groupby(data, type):
15651483
types_add(typ)
15661484
for n, d in map(_exact_ratio, values):
15671485
count += 1
15681486
partials[d] = partials_get(d, 0) + n
1487+
15691488
if None in partials:
15701489
# The sum will be a NAN or INF. We can ignore all the finite
15711490
# partials, and just look at this special one.
@@ -1574,6 +1493,7 @@ def _sum(data):
15741493
else:
15751494
# Sum all the partial sums using builtin sum.
15761495
total = sum(Fraction(n, d) for d, n in partials.items())
1496+
15771497
T = reduce(_coerce, types, int) # or raise TypeError
15781498
return (T, total, count)
15791499

@@ -1596,6 +1516,7 @@ def _ss(data, c=None):
15961516
types_add = types.add
15971517
sx_partials = defaultdict(int)
15981518
sxx_partials = defaultdict(int)
1519+
15991520
for typ, values in groupby(data, type):
16001521
types_add(typ)
16011522
for n, d in map(_exact_ratio, values):
@@ -1605,11 +1526,13 @@ def _ss(data, c=None):
16051526

16061527
if not count:
16071528
ssd = c = Fraction(0)
1529+
16081530
elif None in sx_partials:
16091531
# The sum will be a NAN or INF. We can ignore all the finite
16101532
# partials, and just look at this special one.
16111533
ssd = c = sx_partials[None]
16121534
assert not _isfinite(ssd)
1535+
16131536
else:
16141537
sx = sum(Fraction(n, d) for d, n in sx_partials.items())
16151538
sxx = sum(Fraction(n, d*d) for d, n in sxx_partials.items())
@@ -1693,8 +1616,10 @@ def _convert(value, T):
16931616
# This covers the cases where T is Fraction, or where value is
16941617
# a NAN or INF (Decimal or float).
16951618
return value
1619+
16961620
if issubclass(T, int) and value.denominator != 1:
16971621
T = float
1622+
16981623
try:
16991624
# FIXME: what do we do if this overflows?
17001625
return T(value)
@@ -1857,3 +1782,88 @@ def _sqrtprod(x: float, y: float) -> float:
18571782
# https://www.wolframalpha.com/input/?i=Maclaurin+series+sqrt%28h**2+%2B+x%29+at+x%3D0
18581783
d = sumprod((x, h), (y, -h))
18591784
return h + d / (2.0 * h)
1785+
1786+
1787+
def _normal_dist_inv_cdf(p, mu, sigma):
1788+
# There is no closed-form solution to the inverse CDF for the normal
1789+
# distribution, so we use a rational approximation instead:
1790+
# Wichura, M.J. (1988). "Algorithm AS241: The Percentage Points of the
1791+
# Normal Distribution". Applied Statistics. Blackwell Publishing. 37
1792+
# (3): 477–484. doi:10.2307/2347330. JSTOR 2347330.
1793+
q = p - 0.5
1794+
1795+
if fabs(q) <= 0.425:
1796+
r = 0.180625 - q * q
1797+
# Hash sum: 55.88319_28806_14901_4439
1798+
num = (((((((2.50908_09287_30122_6727e+3 * r +
1799+
3.34305_75583_58812_8105e+4) * r +
1800+
6.72657_70927_00870_0853e+4) * r +
1801+
4.59219_53931_54987_1457e+4) * r +
1802+
1.37316_93765_50946_1125e+4) * r +
1803+
1.97159_09503_06551_4427e+3) * r +
1804+
1.33141_66789_17843_7745e+2) * r +
1805+
3.38713_28727_96366_6080e+0) * q
1806+
den = (((((((5.22649_52788_52854_5610e+3 * r +
1807+
2.87290_85735_72194_2674e+4) * r +
1808+
3.93078_95800_09271_0610e+4) * r +
1809+
2.12137_94301_58659_5867e+4) * r +
1810+
5.39419_60214_24751_1077e+3) * r +
1811+
6.87187_00749_20579_0830e+2) * r +
1812+
4.23133_30701_60091_1252e+1) * r +
1813+
1.0)
1814+
x = num / den
1815+
return mu + (x * sigma)
1816+
1817+
r = p if q <= 0.0 else 1.0 - p
1818+
r = sqrt(-log(r))
1819+
if r <= 5.0:
1820+
r = r - 1.6
1821+
# Hash sum: 49.33206_50330_16102_89036
1822+
num = (((((((7.74545_01427_83414_07640e-4 * r +
1823+
2.27238_44989_26918_45833e-2) * r +
1824+
2.41780_72517_74506_11770e-1) * r +
1825+
1.27045_82524_52368_38258e+0) * r +
1826+
3.64784_83247_63204_60504e+0) * r +
1827+
5.76949_72214_60691_40550e+0) * r +
1828+
4.63033_78461_56545_29590e+0) * r +
1829+
1.42343_71107_49683_57734e+0)
1830+
den = (((((((1.05075_00716_44416_84324e-9 * r +
1831+
5.47593_80849_95344_94600e-4) * r +
1832+
1.51986_66563_61645_71966e-2) * r +
1833+
1.48103_97642_74800_74590e-1) * r +
1834+
6.89767_33498_51000_04550e-1) * r +
1835+
1.67638_48301_83803_84940e+0) * r +
1836+
2.05319_16266_37758_82187e+0) * r +
1837+
1.0)
1838+
else:
1839+
r = r - 5.0
1840+
# Hash sum: 47.52583_31754_92896_71629
1841+
num = (((((((2.01033_43992_92288_13265e-7 * r +
1842+
2.71155_55687_43487_57815e-5) * r +
1843+
1.24266_09473_88078_43860e-3) * r +
1844+
2.65321_89526_57612_30930e-2) * r +
1845+
2.96560_57182_85048_91230e-1) * r +
1846+
1.78482_65399_17291_33580e+0) * r +
1847+
5.46378_49111_64114_36990e+0) * r +
1848+
6.65790_46435_01103_77720e+0)
1849+
den = (((((((2.04426_31033_89939_78564e-15 * r +
1850+
1.42151_17583_16445_88870e-7) * r +
1851+
1.84631_83175_10054_68180e-5) * r +
1852+
7.86869_13114_56132_59100e-4) * r +
1853+
1.48753_61290_85061_48525e-2) * r +
1854+
1.36929_88092_27358_05310e-1) * r +
1855+
5.99832_20655_58879_37690e-1) * r +
1856+
1.0)
1857+
1858+
x = num / den
1859+
if q < 0.0:
1860+
x = -x
1861+
1862+
return mu + (x * sigma)
1863+
1864+
1865+
# If available, use C implementation
1866+
try:
1867+
from _statistics import _normal_dist_inv_cdf
1868+
except ImportError:
1869+
pass

0 commit comments

Comments
 (0)