Edit File by line
/home/barbar84/www/wp-conte.../plugins/sujqvwi/ShExBy/shex_roo.../proc/self/root/lib64/python3....
File: statistics.py
"""
[0] Fix | Delete
Basic statistics module.
[1] Fix | Delete
[2] Fix | Delete
This module provides functions for calculating statistics of data, including
[3] Fix | Delete
averages, variance, and standard deviation.
[4] Fix | Delete
[5] Fix | Delete
Calculating averages
[6] Fix | Delete
--------------------
[7] Fix | Delete
[8] Fix | Delete
================== ==================================================
[9] Fix | Delete
Function Description
[10] Fix | Delete
================== ==================================================
[11] Fix | Delete
mean Arithmetic mean (average) of data.
[12] Fix | Delete
fmean Fast, floating point arithmetic mean.
[13] Fix | Delete
geometric_mean Geometric mean of data.
[14] Fix | Delete
harmonic_mean Harmonic mean of data.
[15] Fix | Delete
median Median (middle value) of data.
[16] Fix | Delete
median_low Low median of data.
[17] Fix | Delete
median_high High median of data.
[18] Fix | Delete
median_grouped Median, or 50th percentile, of grouped data.
[19] Fix | Delete
mode Mode (most common value) of data.
[20] Fix | Delete
multimode List of modes (most common values of data).
[21] Fix | Delete
quantiles Divide data into intervals with equal probability.
[22] Fix | Delete
================== ==================================================
[23] Fix | Delete
[24] Fix | Delete
Calculate the arithmetic mean ("the average") of data:
[25] Fix | Delete
[26] Fix | Delete
>>> mean([-1.0, 2.5, 3.25, 5.75])
[27] Fix | Delete
2.625
[28] Fix | Delete
[29] Fix | Delete
[30] Fix | Delete
Calculate the standard median of discrete data:
[31] Fix | Delete
[32] Fix | Delete
>>> median([2, 3, 4, 5])
[33] Fix | Delete
3.5
[34] Fix | Delete
[35] Fix | Delete
[36] Fix | Delete
Calculate the median, or 50th percentile, of data grouped into class intervals
[37] Fix | Delete
centred on the data values provided. E.g. if your data points are rounded to
[38] Fix | Delete
the nearest whole number:
[39] Fix | Delete
[40] Fix | Delete
>>> median_grouped([2, 2, 3, 3, 3, 4]) #doctest: +ELLIPSIS
[41] Fix | Delete
2.8333333333...
[42] Fix | Delete
[43] Fix | Delete
This should be interpreted in this way: you have two data points in the class
[44] Fix | Delete
interval 1.5-2.5, three data points in the class interval 2.5-3.5, and one in
[45] Fix | Delete
the class interval 3.5-4.5. The median of these data points is 2.8333...
[46] Fix | Delete
[47] Fix | Delete
[48] Fix | Delete
Calculating variability or spread
[49] Fix | Delete
---------------------------------
[50] Fix | Delete
[51] Fix | Delete
================== =============================================
[52] Fix | Delete
Function Description
[53] Fix | Delete
================== =============================================
[54] Fix | Delete
pvariance Population variance of data.
[55] Fix | Delete
variance Sample variance of data.
[56] Fix | Delete
pstdev Population standard deviation of data.
[57] Fix | Delete
stdev Sample standard deviation of data.
[58] Fix | Delete
================== =============================================
[59] Fix | Delete
[60] Fix | Delete
Calculate the standard deviation of sample data:
[61] Fix | Delete
[62] Fix | Delete
>>> stdev([2.5, 3.25, 5.5, 11.25, 11.75]) #doctest: +ELLIPSIS
[63] Fix | Delete
4.38961843444...
[64] Fix | Delete
[65] Fix | Delete
If you have previously calculated the mean, you can pass it as the optional
[66] Fix | Delete
second argument to the four "spread" functions to avoid recalculating it:
[67] Fix | Delete
[68] Fix | Delete
>>> data = [1, 2, 2, 4, 4, 4, 5, 6]
[69] Fix | Delete
>>> mu = mean(data)
[70] Fix | Delete
>>> pvariance(data, mu)
[71] Fix | Delete
2.5
[72] Fix | Delete
[73] Fix | Delete
[74] Fix | Delete
Exceptions
[75] Fix | Delete
----------
[76] Fix | Delete
[77] Fix | Delete
A single exception is defined: StatisticsError is a subclass of ValueError.
[78] Fix | Delete
[79] Fix | Delete
"""
[80] Fix | Delete
[81] Fix | Delete
__all__ = [
[82] Fix | Delete
'NormalDist',
[83] Fix | Delete
'StatisticsError',
[84] Fix | Delete
'fmean',
[85] Fix | Delete
'geometric_mean',
[86] Fix | Delete
'harmonic_mean',
[87] Fix | Delete
'mean',
[88] Fix | Delete
'median',
[89] Fix | Delete
'median_grouped',
[90] Fix | Delete
'median_high',
[91] Fix | Delete
'median_low',
[92] Fix | Delete
'mode',
[93] Fix | Delete
'multimode',
[94] Fix | Delete
'pstdev',
[95] Fix | Delete
'pvariance',
[96] Fix | Delete
'quantiles',
[97] Fix | Delete
'stdev',
[98] Fix | Delete
'variance',
[99] Fix | Delete
]
[100] Fix | Delete
[101] Fix | Delete
import math
[102] Fix | Delete
import numbers
[103] Fix | Delete
import random
[104] Fix | Delete
[105] Fix | Delete
from fractions import Fraction
[106] Fix | Delete
from decimal import Decimal
[107] Fix | Delete
from itertools import groupby
[108] Fix | Delete
from bisect import bisect_left, bisect_right
[109] Fix | Delete
from math import hypot, sqrt, fabs, exp, erf, tau, log, fsum
[110] Fix | Delete
from operator import itemgetter
[111] Fix | Delete
from collections import Counter
[112] Fix | Delete
[113] Fix | Delete
# === Exceptions ===
[114] Fix | Delete
[115] Fix | Delete
class StatisticsError(ValueError):
[116] Fix | Delete
pass
[117] Fix | Delete
[118] Fix | Delete
[119] Fix | Delete
# === Private utilities ===
[120] Fix | Delete
[121] Fix | Delete
def _sum(data, start=0):
[122] Fix | Delete
"""_sum(data [, start]) -> (type, sum, count)
[123] Fix | Delete
[124] Fix | Delete
Return a high-precision sum of the given numeric data as a fraction,
[125] Fix | Delete
together with the type to be converted to and the count of items.
[126] Fix | Delete
[127] Fix | Delete
If optional argument ``start`` is given, it is added to the total.
[128] Fix | Delete
If ``data`` is empty, ``start`` (defaulting to 0) is returned.
[129] Fix | Delete
[130] Fix | Delete
[131] Fix | Delete
Examples
[132] Fix | Delete
--------
[133] Fix | Delete
[134] Fix | Delete
>>> _sum([3, 2.25, 4.5, -0.5, 1.0], 0.75)
[135] Fix | Delete
(<class 'float'>, Fraction(11, 1), 5)
[136] Fix | Delete
[137] Fix | Delete
Some sources of round-off error will be avoided:
[138] Fix | Delete
[139] Fix | Delete
# Built-in sum returns zero.
[140] Fix | Delete
>>> _sum([1e50, 1, -1e50] * 1000)
[141] Fix | Delete
(<class 'float'>, Fraction(1000, 1), 3000)
[142] Fix | Delete
[143] Fix | Delete
Fractions and Decimals are also supported:
[144] Fix | Delete
[145] Fix | Delete
>>> from fractions import Fraction as F
[146] Fix | Delete
>>> _sum([F(2, 3), F(7, 5), F(1, 4), F(5, 6)])
[147] Fix | Delete
(<class 'fractions.Fraction'>, Fraction(63, 20), 4)
[148] Fix | Delete
[149] Fix | Delete
>>> from decimal import Decimal as D
[150] Fix | Delete
>>> data = [D("0.1375"), D("0.2108"), D("0.3061"), D("0.0419")]
[151] Fix | Delete
>>> _sum(data)
[152] Fix | Delete
(<class 'decimal.Decimal'>, Fraction(6963, 10000), 4)
[153] Fix | Delete
[154] Fix | Delete
Mixed types are currently treated as an error, except that int is
[155] Fix | Delete
allowed.
[156] Fix | Delete
"""
[157] Fix | Delete
count = 0
[158] Fix | Delete
n, d = _exact_ratio(start)
[159] Fix | Delete
partials = {d: n}
[160] Fix | Delete
partials_get = partials.get
[161] Fix | Delete
T = _coerce(int, type(start))
[162] Fix | Delete
for typ, values in groupby(data, type):
[163] Fix | Delete
T = _coerce(T, typ) # or raise TypeError
[164] Fix | Delete
for n,d in map(_exact_ratio, values):
[165] Fix | Delete
count += 1
[166] Fix | Delete
partials[d] = partials_get(d, 0) + n
[167] Fix | Delete
if None in partials:
[168] Fix | Delete
# The sum will be a NAN or INF. We can ignore all the finite
[169] Fix | Delete
# partials, and just look at this special one.
[170] Fix | Delete
total = partials[None]
[171] Fix | Delete
assert not _isfinite(total)
[172] Fix | Delete
else:
[173] Fix | Delete
# Sum all the partial sums using builtin sum.
[174] Fix | Delete
# FIXME is this faster if we sum them in order of the denominator?
[175] Fix | Delete
total = sum(Fraction(n, d) for d, n in sorted(partials.items()))
[176] Fix | Delete
return (T, total, count)
[177] Fix | Delete
[178] Fix | Delete
[179] Fix | Delete
def _isfinite(x):
[180] Fix | Delete
try:
[181] Fix | Delete
return x.is_finite() # Likely a Decimal.
[182] Fix | Delete
except AttributeError:
[183] Fix | Delete
return math.isfinite(x) # Coerces to float first.
[184] Fix | Delete
[185] Fix | Delete
[186] Fix | Delete
def _coerce(T, S):
[187] Fix | Delete
"""Coerce types T and S to a common type, or raise TypeError.
[188] Fix | Delete
[189] Fix | Delete
Coercion rules are currently an implementation detail. See the CoerceTest
[190] Fix | Delete
test class in test_statistics for details.
[191] Fix | Delete
"""
[192] Fix | Delete
# See http://bugs.python.org/issue24068.
[193] Fix | Delete
assert T is not bool, "initial type T is bool"
[194] Fix | Delete
# If the types are the same, no need to coerce anything. Put this
[195] Fix | Delete
# first, so that the usual case (no coercion needed) happens as soon
[196] Fix | Delete
# as possible.
[197] Fix | Delete
if T is S: return T
[198] Fix | Delete
# Mixed int & other coerce to the other type.
[199] Fix | Delete
if S is int or S is bool: return T
[200] Fix | Delete
if T is int: return S
[201] Fix | Delete
# If one is a (strict) subclass of the other, coerce to the subclass.
[202] Fix | Delete
if issubclass(S, T): return S
[203] Fix | Delete
if issubclass(T, S): return T
[204] Fix | Delete
# Ints coerce to the other type.
[205] Fix | Delete
if issubclass(T, int): return S
[206] Fix | Delete
if issubclass(S, int): return T
[207] Fix | Delete
# Mixed fraction & float coerces to float (or float subclass).
[208] Fix | Delete
if issubclass(T, Fraction) and issubclass(S, float):
[209] Fix | Delete
return S
[210] Fix | Delete
if issubclass(T, float) and issubclass(S, Fraction):
[211] Fix | Delete
return T
[212] Fix | Delete
# Any other combination is disallowed.
[213] Fix | Delete
msg = "don't know how to coerce %s and %s"
[214] Fix | Delete
raise TypeError(msg % (T.__name__, S.__name__))
[215] Fix | Delete
[216] Fix | Delete
[217] Fix | Delete
def _exact_ratio(x):
[218] Fix | Delete
"""Return Real number x to exact (numerator, denominator) pair.
[219] Fix | Delete
[220] Fix | Delete
>>> _exact_ratio(0.25)
[221] Fix | Delete
(1, 4)
[222] Fix | Delete
[223] Fix | Delete
x is expected to be an int, Fraction, Decimal or float.
[224] Fix | Delete
"""
[225] Fix | Delete
try:
[226] Fix | Delete
# Optimise the common case of floats. We expect that the most often
[227] Fix | Delete
# used numeric type will be builtin floats, so try to make this as
[228] Fix | Delete
# fast as possible.
[229] Fix | Delete
if type(x) is float or type(x) is Decimal:
[230] Fix | Delete
return x.as_integer_ratio()
[231] Fix | Delete
try:
[232] Fix | Delete
# x may be an int, Fraction, or Integral ABC.
[233] Fix | Delete
return (x.numerator, x.denominator)
[234] Fix | Delete
except AttributeError:
[235] Fix | Delete
try:
[236] Fix | Delete
# x may be a float or Decimal subclass.
[237] Fix | Delete
return x.as_integer_ratio()
[238] Fix | Delete
except AttributeError:
[239] Fix | Delete
# Just give up?
[240] Fix | Delete
pass
[241] Fix | Delete
except (OverflowError, ValueError):
[242] Fix | Delete
# float NAN or INF.
[243] Fix | Delete
assert not _isfinite(x)
[244] Fix | Delete
return (x, None)
[245] Fix | Delete
msg = "can't convert type '{}' to numerator/denominator"
[246] Fix | Delete
raise TypeError(msg.format(type(x).__name__))
[247] Fix | Delete
[248] Fix | Delete
[249] Fix | Delete
def _convert(value, T):
[250] Fix | Delete
"""Convert value to given numeric type T."""
[251] Fix | Delete
if type(value) is T:
[252] Fix | Delete
# This covers the cases where T is Fraction, or where value is
[253] Fix | Delete
# a NAN or INF (Decimal or float).
[254] Fix | Delete
return value
[255] Fix | Delete
if issubclass(T, int) and value.denominator != 1:
[256] Fix | Delete
T = float
[257] Fix | Delete
try:
[258] Fix | Delete
# FIXME: what do we do if this overflows?
[259] Fix | Delete
return T(value)
[260] Fix | Delete
except TypeError:
[261] Fix | Delete
if issubclass(T, Decimal):
[262] Fix | Delete
return T(value.numerator)/T(value.denominator)
[263] Fix | Delete
else:
[264] Fix | Delete
raise
[265] Fix | Delete
[266] Fix | Delete
[267] Fix | Delete
def _find_lteq(a, x):
[268] Fix | Delete
'Locate the leftmost value exactly equal to x'
[269] Fix | Delete
i = bisect_left(a, x)
[270] Fix | Delete
if i != len(a) and a[i] == x:
[271] Fix | Delete
return i
[272] Fix | Delete
raise ValueError
[273] Fix | Delete
[274] Fix | Delete
[275] Fix | Delete
def _find_rteq(a, l, x):
[276] Fix | Delete
'Locate the rightmost value exactly equal to x'
[277] Fix | Delete
i = bisect_right(a, x, lo=l)
[278] Fix | Delete
if i != (len(a)+1) and a[i-1] == x:
[279] Fix | Delete
return i-1
[280] Fix | Delete
raise ValueError
[281] Fix | Delete
[282] Fix | Delete
[283] Fix | Delete
def _fail_neg(values, errmsg='negative value'):
[284] Fix | Delete
"""Iterate over values, failing if any are less than zero."""
[285] Fix | Delete
for x in values:
[286] Fix | Delete
if x < 0:
[287] Fix | Delete
raise StatisticsError(errmsg)
[288] Fix | Delete
yield x
[289] Fix | Delete
[290] Fix | Delete
[291] Fix | Delete
# === Measures of central tendency (averages) ===
[292] Fix | Delete
[293] Fix | Delete
def mean(data):
[294] Fix | Delete
"""Return the sample arithmetic mean of data.
[295] Fix | Delete
[296] Fix | Delete
>>> mean([1, 2, 3, 4, 4])
[297] Fix | Delete
2.8
[298] Fix | Delete
[299] Fix | Delete
>>> from fractions import Fraction as F
[300] Fix | Delete
>>> mean([F(3, 7), F(1, 21), F(5, 3), F(1, 3)])
[301] Fix | Delete
Fraction(13, 21)
[302] Fix | Delete
[303] Fix | Delete
>>> from decimal import Decimal as D
[304] Fix | Delete
>>> mean([D("0.5"), D("0.75"), D("0.625"), D("0.375")])
[305] Fix | Delete
Decimal('0.5625')
[306] Fix | Delete
[307] Fix | Delete
If ``data`` is empty, StatisticsError will be raised.
[308] Fix | Delete
"""
[309] Fix | Delete
if iter(data) is data:
[310] Fix | Delete
data = list(data)
[311] Fix | Delete
n = len(data)
[312] Fix | Delete
if n < 1:
[313] Fix | Delete
raise StatisticsError('mean requires at least one data point')
[314] Fix | Delete
T, total, count = _sum(data)
[315] Fix | Delete
assert count == n
[316] Fix | Delete
return _convert(total/n, T)
[317] Fix | Delete
[318] Fix | Delete
[319] Fix | Delete
def fmean(data):
[320] Fix | Delete
"""Convert data to floats and compute the arithmetic mean.
[321] Fix | Delete
[322] Fix | Delete
This runs faster than the mean() function and it always returns a float.
[323] Fix | Delete
If the input dataset is empty, it raises a StatisticsError.
[324] Fix | Delete
[325] Fix | Delete
>>> fmean([3.5, 4.0, 5.25])
[326] Fix | Delete
4.25
[327] Fix | Delete
"""
[328] Fix | Delete
try:
[329] Fix | Delete
n = len(data)
[330] Fix | Delete
except TypeError:
[331] Fix | Delete
# Handle iterators that do not define __len__().
[332] Fix | Delete
n = 0
[333] Fix | Delete
def count(iterable):
[334] Fix | Delete
nonlocal n
[335] Fix | Delete
for n, x in enumerate(iterable, start=1):
[336] Fix | Delete
yield x
[337] Fix | Delete
total = fsum(count(data))
[338] Fix | Delete
else:
[339] Fix | Delete
total = fsum(data)
[340] Fix | Delete
try:
[341] Fix | Delete
return total / n
[342] Fix | Delete
except ZeroDivisionError:
[343] Fix | Delete
raise StatisticsError('fmean requires at least one data point') from None
[344] Fix | Delete
[345] Fix | Delete
[346] Fix | Delete
def geometric_mean(data):
[347] Fix | Delete
"""Convert data to floats and compute the geometric mean.
[348] Fix | Delete
[349] Fix | Delete
Raises a StatisticsError if the input dataset is empty,
[350] Fix | Delete
if it contains a zero, or if it contains a negative value.
[351] Fix | Delete
[352] Fix | Delete
No special efforts are made to achieve exact results.
[353] Fix | Delete
(However, this may change in the future.)
[354] Fix | Delete
[355] Fix | Delete
>>> round(geometric_mean([54, 24, 36]), 9)
[356] Fix | Delete
36.0
[357] Fix | Delete
"""
[358] Fix | Delete
try:
[359] Fix | Delete
return exp(fmean(map(log, data)))
[360] Fix | Delete
except ValueError:
[361] Fix | Delete
raise StatisticsError('geometric mean requires a non-empty dataset '
[362] Fix | Delete
' containing positive numbers') from None
[363] Fix | Delete
[364] Fix | Delete
[365] Fix | Delete
def harmonic_mean(data):
[366] Fix | Delete
"""Return the harmonic mean of data.
[367] Fix | Delete
[368] Fix | Delete
The harmonic mean, sometimes called the subcontrary mean, is the
[369] Fix | Delete
reciprocal of the arithmetic mean of the reciprocals of the data,
[370] Fix | Delete
and is often appropriate when averaging quantities which are rates
[371] Fix | Delete
or ratios, for example speeds. Example:
[372] Fix | Delete
[373] Fix | Delete
Suppose an investor purchases an equal value of shares in each of
[374] Fix | Delete
three companies, with P/E (price/earning) ratios of 2.5, 3 and 10.
[375] Fix | Delete
What is the average P/E ratio for the investor's portfolio?
[376] Fix | Delete
[377] Fix | Delete
>>> harmonic_mean([2.5, 3, 10]) # For an equal investment portfolio.
[378] Fix | Delete
3.6
[379] Fix | Delete
[380] Fix | Delete
Using the arithmetic mean would give an average of about 5.167, which
[381] Fix | Delete
is too high.
[382] Fix | Delete
[383] Fix | Delete
If ``data`` is empty, or any element is less than zero,
[384] Fix | Delete
``harmonic_mean`` will raise ``StatisticsError``.
[385] Fix | Delete
"""
[386] Fix | Delete
# For a justification for using harmonic mean for P/E ratios, see
[387] Fix | Delete
# http://fixthepitch.pellucid.com/comps-analysis-the-missing-harmony-of-summary-statistics/
[388] Fix | Delete
# http://papers.ssrn.com/sol3/papers.cfm?abstract_id=2621087
[389] Fix | Delete
if iter(data) is data:
[390] Fix | Delete
data = list(data)
[391] Fix | Delete
errmsg = 'harmonic mean does not support negative values'
[392] Fix | Delete
n = len(data)
[393] Fix | Delete
if n < 1:
[394] Fix | Delete
raise StatisticsError('harmonic_mean requires at least one data point')
[395] Fix | Delete
elif n == 1:
[396] Fix | Delete
x = data[0]
[397] Fix | Delete
if isinstance(x, (numbers.Real, Decimal)):
[398] Fix | Delete
if x < 0:
[399] Fix | Delete
raise StatisticsError(errmsg)
[400] Fix | Delete
return x
[401] Fix | Delete
else:
[402] Fix | Delete
raise TypeError('unsupported type')
[403] Fix | Delete
try:
[404] Fix | Delete
T, total, count = _sum(1/x for x in _fail_neg(data, errmsg))
[405] Fix | Delete
except ZeroDivisionError:
[406] Fix | Delete
return 0
[407] Fix | Delete
assert count == n
[408] Fix | Delete
return _convert(n/total, T)
[409] Fix | Delete
[410] Fix | Delete
[411] Fix | Delete
# FIXME: investigate ways to calculate medians without sorting? Quickselect?
[412] Fix | Delete
def median(data):
[413] Fix | Delete
"""Return the median (middle value) of numeric data.
[414] Fix | Delete
[415] Fix | Delete
When the number of data points is odd, return the middle data point.
[416] Fix | Delete
When the number of data points is even, the median is interpolated by
[417] Fix | Delete
taking the average of the two middle values:
[418] Fix | Delete
[419] Fix | Delete
>>> median([1, 3, 5])
[420] Fix | Delete
3
[421] Fix | Delete
>>> median([1, 3, 5, 7])
[422] Fix | Delete
4.0
[423] Fix | Delete
[424] Fix | Delete
"""
[425] Fix | Delete
data = sorted(data)
[426] Fix | Delete
n = len(data)
[427] Fix | Delete
if n == 0:
[428] Fix | Delete
raise StatisticsError("no median for empty data")
[429] Fix | Delete
if n%2 == 1:
[430] Fix | Delete
return data[n//2]
[431] Fix | Delete
else:
[432] Fix | Delete
i = n//2
[433] Fix | Delete
return (data[i - 1] + data[i])/2
[434] Fix | Delete
[435] Fix | Delete
[436] Fix | Delete
def median_low(data):
[437] Fix | Delete
"""Return the low median of numeric data.
[438] Fix | Delete
[439] Fix | Delete
When the number of data points is odd, the middle value is returned.
[440] Fix | Delete
When it is even, the smaller of the two middle values is returned.
[441] Fix | Delete
[442] Fix | Delete
>>> median_low([1, 3, 5])
[443] Fix | Delete
3
[444] Fix | Delete
>>> median_low([1, 3, 5, 7])
[445] Fix | Delete
3
[446] Fix | Delete
[447] Fix | Delete
"""
[448] Fix | Delete
data = sorted(data)
[449] Fix | Delete
n = len(data)
[450] Fix | Delete
if n == 0:
[451] Fix | Delete
raise StatisticsError("no median for empty data")
[452] Fix | Delete
if n%2 == 1:
[453] Fix | Delete
return data[n//2]
[454] Fix | Delete
else:
[455] Fix | Delete
return data[n//2 - 1]
[456] Fix | Delete
[457] Fix | Delete
[458] Fix | Delete
def median_high(data):
[459] Fix | Delete
"""Return the high median of data.
[460] Fix | Delete
[461] Fix | Delete
When the number of data points is odd, the middle value is returned.
[462] Fix | Delete
When it is even, the larger of the two middle values is returned.
[463] Fix | Delete
[464] Fix | Delete
>>> median_high([1, 3, 5])
[465] Fix | Delete
3
[466] Fix | Delete
>>> median_high([1, 3, 5, 7])
[467] Fix | Delete
5
[468] Fix | Delete
[469] Fix | Delete
"""
[470] Fix | Delete
data = sorted(data)
[471] Fix | Delete
n = len(data)
[472] Fix | Delete
if n == 0:
[473] Fix | Delete
raise StatisticsError("no median for empty data")
[474] Fix | Delete
return data[n//2]
[475] Fix | Delete
[476] Fix | Delete
[477] Fix | Delete
def median_grouped(data, interval=1):
[478] Fix | Delete
"""Return the 50th percentile (median) of grouped continuous data.
[479] Fix | Delete
[480] Fix | Delete
>>> median_grouped([1, 2, 2, 3, 4, 4, 4, 4, 4, 5])
[481] Fix | Delete
3.7
[482] Fix | Delete
>>> median_grouped([52, 52, 53, 54])
[483] Fix | Delete
52.5
[484] Fix | Delete
[485] Fix | Delete
This calculates the median as the 50th percentile, and should be
[486] Fix | Delete
used when your data is continuous and grouped. In the above example,
[487] Fix | Delete
the values 1, 2, 3, etc. actually represent the midpoint of classes
[488] Fix | Delete
0.5-1.5, 1.5-2.5, 2.5-3.5, etc. The middle value falls somewhere in
[489] Fix | Delete
class 3.5-4.5, and interpolation is used to estimate it.
[490] Fix | Delete
[491] Fix | Delete
Optional argument ``interval`` represents the class interval, and
[492] Fix | Delete
defaults to 1. Changing the class interval naturally will change the
[493] Fix | Delete
interpolated 50th percentile value:
[494] Fix | Delete
[495] Fix | Delete
>>> median_grouped([1, 3, 3, 5, 7], interval=1)
[496] Fix | Delete
3.25
[497] Fix | Delete
>>> median_grouped([1, 3, 3, 5, 7], interval=2)
[498] Fix | Delete
3.5
[499] Fix | Delete
It is recommended that you Edit text format, this type of Fix handles quite a lot in one request
Function