Edit File by line
/home/barbar84/public_h.../wp-conte.../plugins/sujqvwi/ShExBy/shex_roo.../lib64/python3....
File: statistics.py
"""
[0] Fix | Delete
Basic statistics module.
[1] Fix | Delete
[2] Fix | Delete
This module provides functions for calculating statistics of data, including
[3] Fix | Delete
averages, variance, and standard deviation.
[4] Fix | Delete
[5] Fix | Delete
Calculating averages
[6] Fix | Delete
--------------------
[7] Fix | Delete
[8] Fix | Delete
================== =============================================
[9] Fix | Delete
Function Description
[10] Fix | Delete
================== =============================================
[11] Fix | Delete
mean Arithmetic mean (average) of data.
[12] Fix | Delete
harmonic_mean Harmonic mean of data.
[13] Fix | Delete
median Median (middle value) of data.
[14] Fix | Delete
median_low Low median of data.
[15] Fix | Delete
median_high High median of data.
[16] Fix | Delete
median_grouped Median, or 50th percentile, of grouped data.
[17] Fix | Delete
mode Mode (most common value) of data.
[18] Fix | Delete
================== =============================================
[19] Fix | Delete
[20] Fix | Delete
Calculate the arithmetic mean ("the average") of data:
[21] Fix | Delete
[22] Fix | Delete
>>> mean([-1.0, 2.5, 3.25, 5.75])
[23] Fix | Delete
2.625
[24] Fix | Delete
[25] Fix | Delete
[26] Fix | Delete
Calculate the standard median of discrete data:
[27] Fix | Delete
[28] Fix | Delete
>>> median([2, 3, 4, 5])
[29] Fix | Delete
3.5
[30] Fix | Delete
[31] Fix | Delete
[32] Fix | Delete
Calculate the median, or 50th percentile, of data grouped into class intervals
[33] Fix | Delete
centred on the data values provided. E.g. if your data points are rounded to
[34] Fix | Delete
the nearest whole number:
[35] Fix | Delete
[36] Fix | Delete
>>> median_grouped([2, 2, 3, 3, 3, 4]) #doctest: +ELLIPSIS
[37] Fix | Delete
2.8333333333...
[38] Fix | Delete
[39] Fix | Delete
This should be interpreted in this way: you have two data points in the class
[40] Fix | Delete
interval 1.5-2.5, three data points in the class interval 2.5-3.5, and one in
[41] Fix | Delete
the class interval 3.5-4.5. The median of these data points is 2.8333...
[42] Fix | Delete
[43] Fix | Delete
[44] Fix | Delete
Calculating variability or spread
[45] Fix | Delete
---------------------------------
[46] Fix | Delete
[47] Fix | Delete
================== =============================================
[48] Fix | Delete
Function Description
[49] Fix | Delete
================== =============================================
[50] Fix | Delete
pvariance Population variance of data.
[51] Fix | Delete
variance Sample variance of data.
[52] Fix | Delete
pstdev Population standard deviation of data.
[53] Fix | Delete
stdev Sample standard deviation of data.
[54] Fix | Delete
================== =============================================
[55] Fix | Delete
[56] Fix | Delete
Calculate the standard deviation of sample data:
[57] Fix | Delete
[58] Fix | Delete
>>> stdev([2.5, 3.25, 5.5, 11.25, 11.75]) #doctest: +ELLIPSIS
[59] Fix | Delete
4.38961843444...
[60] Fix | Delete
[61] Fix | Delete
If you have previously calculated the mean, you can pass it as the optional
[62] Fix | Delete
second argument to the four "spread" functions to avoid recalculating it:
[63] Fix | Delete
[64] Fix | Delete
>>> data = [1, 2, 2, 4, 4, 4, 5, 6]
[65] Fix | Delete
>>> mu = mean(data)
[66] Fix | Delete
>>> pvariance(data, mu)
[67] Fix | Delete
2.5
[68] Fix | Delete
[69] Fix | Delete
[70] Fix | Delete
Exceptions
[71] Fix | Delete
----------
[72] Fix | Delete
[73] Fix | Delete
A single exception is defined: StatisticsError is a subclass of ValueError.
[74] Fix | Delete
[75] Fix | Delete
"""
[76] Fix | Delete
[77] Fix | Delete
__all__ = [ 'StatisticsError',
[78] Fix | Delete
'pstdev', 'pvariance', 'stdev', 'variance',
[79] Fix | Delete
'median', 'median_low', 'median_high', 'median_grouped',
[80] Fix | Delete
'mean', 'mode', 'harmonic_mean',
[81] Fix | Delete
]
[82] Fix | Delete
[83] Fix | Delete
import collections
[84] Fix | Delete
import decimal
[85] Fix | Delete
import math
[86] Fix | Delete
import numbers
[87] Fix | Delete
[88] Fix | Delete
from fractions import Fraction
[89] Fix | Delete
from decimal import Decimal
[90] Fix | Delete
from itertools import groupby, chain
[91] Fix | Delete
from bisect import bisect_left, bisect_right
[92] Fix | Delete
[93] Fix | Delete
[94] Fix | Delete
[95] Fix | Delete
# === Exceptions ===
[96] Fix | Delete
[97] Fix | Delete
class StatisticsError(ValueError):
[98] Fix | Delete
pass
[99] Fix | Delete
[100] Fix | Delete
[101] Fix | Delete
# === Private utilities ===
[102] Fix | Delete
[103] Fix | Delete
def _sum(data, start=0):
[104] Fix | Delete
"""_sum(data [, start]) -> (type, sum, count)
[105] Fix | Delete
[106] Fix | Delete
Return a high-precision sum of the given numeric data as a fraction,
[107] Fix | Delete
together with the type to be converted to and the count of items.
[108] Fix | Delete
[109] Fix | Delete
If optional argument ``start`` is given, it is added to the total.
[110] Fix | Delete
If ``data`` is empty, ``start`` (defaulting to 0) is returned.
[111] Fix | Delete
[112] Fix | Delete
[113] Fix | Delete
Examples
[114] Fix | Delete
--------
[115] Fix | Delete
[116] Fix | Delete
>>> _sum([3, 2.25, 4.5, -0.5, 1.0], 0.75)
[117] Fix | Delete
(<class 'float'>, Fraction(11, 1), 5)
[118] Fix | Delete
[119] Fix | Delete
Some sources of round-off error will be avoided:
[120] Fix | Delete
[121] Fix | Delete
# Built-in sum returns zero.
[122] Fix | Delete
>>> _sum([1e50, 1, -1e50] * 1000)
[123] Fix | Delete
(<class 'float'>, Fraction(1000, 1), 3000)
[124] Fix | Delete
[125] Fix | Delete
Fractions and Decimals are also supported:
[126] Fix | Delete
[127] Fix | Delete
>>> from fractions import Fraction as F
[128] Fix | Delete
>>> _sum([F(2, 3), F(7, 5), F(1, 4), F(5, 6)])
[129] Fix | Delete
(<class 'fractions.Fraction'>, Fraction(63, 20), 4)
[130] Fix | Delete
[131] Fix | Delete
>>> from decimal import Decimal as D
[132] Fix | Delete
>>> data = [D("0.1375"), D("0.2108"), D("0.3061"), D("0.0419")]
[133] Fix | Delete
>>> _sum(data)
[134] Fix | Delete
(<class 'decimal.Decimal'>, Fraction(6963, 10000), 4)
[135] Fix | Delete
[136] Fix | Delete
Mixed types are currently treated as an error, except that int is
[137] Fix | Delete
allowed.
[138] Fix | Delete
"""
[139] Fix | Delete
count = 0
[140] Fix | Delete
n, d = _exact_ratio(start)
[141] Fix | Delete
partials = {d: n}
[142] Fix | Delete
partials_get = partials.get
[143] Fix | Delete
T = _coerce(int, type(start))
[144] Fix | Delete
for typ, values in groupby(data, type):
[145] Fix | Delete
T = _coerce(T, typ) # or raise TypeError
[146] Fix | Delete
for n,d in map(_exact_ratio, values):
[147] Fix | Delete
count += 1
[148] Fix | Delete
partials[d] = partials_get(d, 0) + n
[149] Fix | Delete
if None in partials:
[150] Fix | Delete
# The sum will be a NAN or INF. We can ignore all the finite
[151] Fix | Delete
# partials, and just look at this special one.
[152] Fix | Delete
total = partials[None]
[153] Fix | Delete
assert not _isfinite(total)
[154] Fix | Delete
else:
[155] Fix | Delete
# Sum all the partial sums using builtin sum.
[156] Fix | Delete
# FIXME is this faster if we sum them in order of the denominator?
[157] Fix | Delete
total = sum(Fraction(n, d) for d, n in sorted(partials.items()))
[158] Fix | Delete
return (T, total, count)
[159] Fix | Delete
[160] Fix | Delete
[161] Fix | Delete
def _isfinite(x):
[162] Fix | Delete
try:
[163] Fix | Delete
return x.is_finite() # Likely a Decimal.
[164] Fix | Delete
except AttributeError:
[165] Fix | Delete
return math.isfinite(x) # Coerces to float first.
[166] Fix | Delete
[167] Fix | Delete
[168] Fix | Delete
def _coerce(T, S):
[169] Fix | Delete
"""Coerce types T and S to a common type, or raise TypeError.
[170] Fix | Delete
[171] Fix | Delete
Coercion rules are currently an implementation detail. See the CoerceTest
[172] Fix | Delete
test class in test_statistics for details.
[173] Fix | Delete
"""
[174] Fix | Delete
# See http://bugs.python.org/issue24068.
[175] Fix | Delete
assert T is not bool, "initial type T is bool"
[176] Fix | Delete
# If the types are the same, no need to coerce anything. Put this
[177] Fix | Delete
# first, so that the usual case (no coercion needed) happens as soon
[178] Fix | Delete
# as possible.
[179] Fix | Delete
if T is S: return T
[180] Fix | Delete
# Mixed int & other coerce to the other type.
[181] Fix | Delete
if S is int or S is bool: return T
[182] Fix | Delete
if T is int: return S
[183] Fix | Delete
# If one is a (strict) subclass of the other, coerce to the subclass.
[184] Fix | Delete
if issubclass(S, T): return S
[185] Fix | Delete
if issubclass(T, S): return T
[186] Fix | Delete
# Ints coerce to the other type.
[187] Fix | Delete
if issubclass(T, int): return S
[188] Fix | Delete
if issubclass(S, int): return T
[189] Fix | Delete
# Mixed fraction & float coerces to float (or float subclass).
[190] Fix | Delete
if issubclass(T, Fraction) and issubclass(S, float):
[191] Fix | Delete
return S
[192] Fix | Delete
if issubclass(T, float) and issubclass(S, Fraction):
[193] Fix | Delete
return T
[194] Fix | Delete
# Any other combination is disallowed.
[195] Fix | Delete
msg = "don't know how to coerce %s and %s"
[196] Fix | Delete
raise TypeError(msg % (T.__name__, S.__name__))
[197] Fix | Delete
[198] Fix | Delete
[199] Fix | Delete
def _exact_ratio(x):
[200] Fix | Delete
"""Return Real number x to exact (numerator, denominator) pair.
[201] Fix | Delete
[202] Fix | Delete
>>> _exact_ratio(0.25)
[203] Fix | Delete
(1, 4)
[204] Fix | Delete
[205] Fix | Delete
x is expected to be an int, Fraction, Decimal or float.
[206] Fix | Delete
"""
[207] Fix | Delete
try:
[208] Fix | Delete
# Optimise the common case of floats. We expect that the most often
[209] Fix | Delete
# used numeric type will be builtin floats, so try to make this as
[210] Fix | Delete
# fast as possible.
[211] Fix | Delete
if type(x) is float or type(x) is Decimal:
[212] Fix | Delete
return x.as_integer_ratio()
[213] Fix | Delete
try:
[214] Fix | Delete
# x may be an int, Fraction, or Integral ABC.
[215] Fix | Delete
return (x.numerator, x.denominator)
[216] Fix | Delete
except AttributeError:
[217] Fix | Delete
try:
[218] Fix | Delete
# x may be a float or Decimal subclass.
[219] Fix | Delete
return x.as_integer_ratio()
[220] Fix | Delete
except AttributeError:
[221] Fix | Delete
# Just give up?
[222] Fix | Delete
pass
[223] Fix | Delete
except (OverflowError, ValueError):
[224] Fix | Delete
# float NAN or INF.
[225] Fix | Delete
assert not _isfinite(x)
[226] Fix | Delete
return (x, None)
[227] Fix | Delete
msg = "can't convert type '{}' to numerator/denominator"
[228] Fix | Delete
raise TypeError(msg.format(type(x).__name__))
[229] Fix | Delete
[230] Fix | Delete
[231] Fix | Delete
def _convert(value, T):
[232] Fix | Delete
"""Convert value to given numeric type T."""
[233] Fix | Delete
if type(value) is T:
[234] Fix | Delete
# This covers the cases where T is Fraction, or where value is
[235] Fix | Delete
# a NAN or INF (Decimal or float).
[236] Fix | Delete
return value
[237] Fix | Delete
if issubclass(T, int) and value.denominator != 1:
[238] Fix | Delete
T = float
[239] Fix | Delete
try:
[240] Fix | Delete
# FIXME: what do we do if this overflows?
[241] Fix | Delete
return T(value)
[242] Fix | Delete
except TypeError:
[243] Fix | Delete
if issubclass(T, Decimal):
[244] Fix | Delete
return T(value.numerator)/T(value.denominator)
[245] Fix | Delete
else:
[246] Fix | Delete
raise
[247] Fix | Delete
[248] Fix | Delete
[249] Fix | Delete
def _counts(data):
[250] Fix | Delete
# Generate a table of sorted (value, frequency) pairs.
[251] Fix | Delete
table = collections.Counter(iter(data)).most_common()
[252] Fix | Delete
if not table:
[253] Fix | Delete
return table
[254] Fix | Delete
# Extract the values with the highest frequency.
[255] Fix | Delete
maxfreq = table[0][1]
[256] Fix | Delete
for i in range(1, len(table)):
[257] Fix | Delete
if table[i][1] != maxfreq:
[258] Fix | Delete
table = table[:i]
[259] Fix | Delete
break
[260] Fix | Delete
return table
[261] Fix | Delete
[262] Fix | Delete
[263] Fix | Delete
def _find_lteq(a, x):
[264] Fix | Delete
'Locate the leftmost value exactly equal to x'
[265] Fix | Delete
i = bisect_left(a, x)
[266] Fix | Delete
if i != len(a) and a[i] == x:
[267] Fix | Delete
return i
[268] Fix | Delete
raise ValueError
[269] Fix | Delete
[270] Fix | Delete
[271] Fix | Delete
def _find_rteq(a, l, x):
[272] Fix | Delete
'Locate the rightmost value exactly equal to x'
[273] Fix | Delete
i = bisect_right(a, x, lo=l)
[274] Fix | Delete
if i != (len(a)+1) and a[i-1] == x:
[275] Fix | Delete
return i-1
[276] Fix | Delete
raise ValueError
[277] Fix | Delete
[278] Fix | Delete
[279] Fix | Delete
def _fail_neg(values, errmsg='negative value'):
[280] Fix | Delete
"""Iterate over values, failing if any are less than zero."""
[281] Fix | Delete
for x in values:
[282] Fix | Delete
if x < 0:
[283] Fix | Delete
raise StatisticsError(errmsg)
[284] Fix | Delete
yield x
[285] Fix | Delete
[286] Fix | Delete
[287] Fix | Delete
# === Measures of central tendency (averages) ===
[288] Fix | Delete
[289] Fix | Delete
def mean(data):
[290] Fix | Delete
"""Return the sample arithmetic mean of data.
[291] Fix | Delete
[292] Fix | Delete
>>> mean([1, 2, 3, 4, 4])
[293] Fix | Delete
2.8
[294] Fix | Delete
[295] Fix | Delete
>>> from fractions import Fraction as F
[296] Fix | Delete
>>> mean([F(3, 7), F(1, 21), F(5, 3), F(1, 3)])
[297] Fix | Delete
Fraction(13, 21)
[298] Fix | Delete
[299] Fix | Delete
>>> from decimal import Decimal as D
[300] Fix | Delete
>>> mean([D("0.5"), D("0.75"), D("0.625"), D("0.375")])
[301] Fix | Delete
Decimal('0.5625')
[302] Fix | Delete
[303] Fix | Delete
If ``data`` is empty, StatisticsError will be raised.
[304] Fix | Delete
"""
[305] Fix | Delete
if iter(data) is data:
[306] Fix | Delete
data = list(data)
[307] Fix | Delete
n = len(data)
[308] Fix | Delete
if n < 1:
[309] Fix | Delete
raise StatisticsError('mean requires at least one data point')
[310] Fix | Delete
T, total, count = _sum(data)
[311] Fix | Delete
assert count == n
[312] Fix | Delete
return _convert(total/n, T)
[313] Fix | Delete
[314] Fix | Delete
[315] Fix | Delete
def harmonic_mean(data):
[316] Fix | Delete
"""Return the harmonic mean of data.
[317] Fix | Delete
[318] Fix | Delete
The harmonic mean, sometimes called the subcontrary mean, is the
[319] Fix | Delete
reciprocal of the arithmetic mean of the reciprocals of the data,
[320] Fix | Delete
and is often appropriate when averaging quantities which are rates
[321] Fix | Delete
or ratios, for example speeds. Example:
[322] Fix | Delete
[323] Fix | Delete
Suppose an investor purchases an equal value of shares in each of
[324] Fix | Delete
three companies, with P/E (price/earning) ratios of 2.5, 3 and 10.
[325] Fix | Delete
What is the average P/E ratio for the investor's portfolio?
[326] Fix | Delete
[327] Fix | Delete
>>> harmonic_mean([2.5, 3, 10]) # For an equal investment portfolio.
[328] Fix | Delete
3.6
[329] Fix | Delete
[330] Fix | Delete
Using the arithmetic mean would give an average of about 5.167, which
[331] Fix | Delete
is too high.
[332] Fix | Delete
[333] Fix | Delete
If ``data`` is empty, or any element is less than zero,
[334] Fix | Delete
``harmonic_mean`` will raise ``StatisticsError``.
[335] Fix | Delete
"""
[336] Fix | Delete
# For a justification for using harmonic mean for P/E ratios, see
[337] Fix | Delete
# http://fixthepitch.pellucid.com/comps-analysis-the-missing-harmony-of-summary-statistics/
[338] Fix | Delete
# http://papers.ssrn.com/sol3/papers.cfm?abstract_id=2621087
[339] Fix | Delete
if iter(data) is data:
[340] Fix | Delete
data = list(data)
[341] Fix | Delete
errmsg = 'harmonic mean does not support negative values'
[342] Fix | Delete
n = len(data)
[343] Fix | Delete
if n < 1:
[344] Fix | Delete
raise StatisticsError('harmonic_mean requires at least one data point')
[345] Fix | Delete
elif n == 1:
[346] Fix | Delete
x = data[0]
[347] Fix | Delete
if isinstance(x, (numbers.Real, Decimal)):
[348] Fix | Delete
if x < 0:
[349] Fix | Delete
raise StatisticsError(errmsg)
[350] Fix | Delete
return x
[351] Fix | Delete
else:
[352] Fix | Delete
raise TypeError('unsupported type')
[353] Fix | Delete
try:
[354] Fix | Delete
T, total, count = _sum(1/x for x in _fail_neg(data, errmsg))
[355] Fix | Delete
except ZeroDivisionError:
[356] Fix | Delete
return 0
[357] Fix | Delete
assert count == n
[358] Fix | Delete
return _convert(n/total, T)
[359] Fix | Delete
[360] Fix | Delete
[361] Fix | Delete
# FIXME: investigate ways to calculate medians without sorting? Quickselect?
[362] Fix | Delete
def median(data):
[363] Fix | Delete
"""Return the median (middle value) of numeric data.
[364] Fix | Delete
[365] Fix | Delete
When the number of data points is odd, return the middle data point.
[366] Fix | Delete
When the number of data points is even, the median is interpolated by
[367] Fix | Delete
taking the average of the two middle values:
[368] Fix | Delete
[369] Fix | Delete
>>> median([1, 3, 5])
[370] Fix | Delete
3
[371] Fix | Delete
>>> median([1, 3, 5, 7])
[372] Fix | Delete
4.0
[373] Fix | Delete
[374] Fix | Delete
"""
[375] Fix | Delete
data = sorted(data)
[376] Fix | Delete
n = len(data)
[377] Fix | Delete
if n == 0:
[378] Fix | Delete
raise StatisticsError("no median for empty data")
[379] Fix | Delete
if n%2 == 1:
[380] Fix | Delete
return data[n//2]
[381] Fix | Delete
else:
[382] Fix | Delete
i = n//2
[383] Fix | Delete
return (data[i - 1] + data[i])/2
[384] Fix | Delete
[385] Fix | Delete
[386] Fix | Delete
def median_low(data):
[387] Fix | Delete
"""Return the low median of numeric data.
[388] Fix | Delete
[389] Fix | Delete
When the number of data points is odd, the middle value is returned.
[390] Fix | Delete
When it is even, the smaller of the two middle values is returned.
[391] Fix | Delete
[392] Fix | Delete
>>> median_low([1, 3, 5])
[393] Fix | Delete
3
[394] Fix | Delete
>>> median_low([1, 3, 5, 7])
[395] Fix | Delete
3
[396] Fix | Delete
[397] Fix | Delete
"""
[398] Fix | Delete
data = sorted(data)
[399] Fix | Delete
n = len(data)
[400] Fix | Delete
if n == 0:
[401] Fix | Delete
raise StatisticsError("no median for empty data")
[402] Fix | Delete
if n%2 == 1:
[403] Fix | Delete
return data[n//2]
[404] Fix | Delete
else:
[405] Fix | Delete
return data[n//2 - 1]
[406] Fix | Delete
[407] Fix | Delete
[408] Fix | Delete
def median_high(data):
[409] Fix | Delete
"""Return the high median of data.
[410] Fix | Delete
[411] Fix | Delete
When the number of data points is odd, the middle value is returned.
[412] Fix | Delete
When it is even, the larger of the two middle values is returned.
[413] Fix | Delete
[414] Fix | Delete
>>> median_high([1, 3, 5])
[415] Fix | Delete
3
[416] Fix | Delete
>>> median_high([1, 3, 5, 7])
[417] Fix | Delete
5
[418] Fix | Delete
[419] Fix | Delete
"""
[420] Fix | Delete
data = sorted(data)
[421] Fix | Delete
n = len(data)
[422] Fix | Delete
if n == 0:
[423] Fix | Delete
raise StatisticsError("no median for empty data")
[424] Fix | Delete
return data[n//2]
[425] Fix | Delete
[426] Fix | Delete
[427] Fix | Delete
def median_grouped(data, interval=1):
[428] Fix | Delete
"""Return the 50th percentile (median) of grouped continuous data.
[429] Fix | Delete
[430] Fix | Delete
>>> median_grouped([1, 2, 2, 3, 4, 4, 4, 4, 4, 5])
[431] Fix | Delete
3.7
[432] Fix | Delete
>>> median_grouped([52, 52, 53, 54])
[433] Fix | Delete
52.5
[434] Fix | Delete
[435] Fix | Delete
This calculates the median as the 50th percentile, and should be
[436] Fix | Delete
used when your data is continuous and grouped. In the above example,
[437] Fix | Delete
the values 1, 2, 3, etc. actually represent the midpoint of classes
[438] Fix | Delete
0.5-1.5, 1.5-2.5, 2.5-3.5, etc. The middle value falls somewhere in
[439] Fix | Delete
class 3.5-4.5, and interpolation is used to estimate it.
[440] Fix | Delete
[441] Fix | Delete
Optional argument ``interval`` represents the class interval, and
[442] Fix | Delete
defaults to 1. Changing the class interval naturally will change the
[443] Fix | Delete
interpolated 50th percentile value:
[444] Fix | Delete
[445] Fix | Delete
>>> median_grouped([1, 3, 3, 5, 7], interval=1)
[446] Fix | Delete
3.25
[447] Fix | Delete
>>> median_grouped([1, 3, 3, 5, 7], interval=2)
[448] Fix | Delete
3.5
[449] Fix | Delete
[450] Fix | Delete
This function does not check whether the data points are at least
[451] Fix | Delete
``interval`` apart.
[452] Fix | Delete
"""
[453] Fix | Delete
data = sorted(data)
[454] Fix | Delete
n = len(data)
[455] Fix | Delete
if n == 0:
[456] Fix | Delete
raise StatisticsError("no median for empty data")
[457] Fix | Delete
elif n == 1:
[458] Fix | Delete
return data[0]
[459] Fix | Delete
# Find the value at the midpoint. Remember this corresponds to the
[460] Fix | Delete
# centre of the class interval.
[461] Fix | Delete
x = data[n//2]
[462] Fix | Delete
for obj in (x, interval):
[463] Fix | Delete
if isinstance(obj, (str, bytes)):
[464] Fix | Delete
raise TypeError('expected number but got %r' % obj)
[465] Fix | Delete
try:
[466] Fix | Delete
L = x - interval/2 # The lower limit of the median interval.
[467] Fix | Delete
except TypeError:
[468] Fix | Delete
# Mixed type. For now we just coerce to float.
[469] Fix | Delete
L = float(x) - float(interval)/2
[470] Fix | Delete
[471] Fix | Delete
# Uses bisection search to search for x in data with log(n) time complexity
[472] Fix | Delete
# Find the position of leftmost occurrence of x in data
[473] Fix | Delete
l1 = _find_lteq(data, x)
[474] Fix | Delete
# Find the position of rightmost occurrence of x in data[l1...len(data)]
[475] Fix | Delete
# Assuming always l1 <= l2
[476] Fix | Delete
l2 = _find_rteq(data, l1, x)
[477] Fix | Delete
cf = l1
[478] Fix | Delete
f = l2 - l1 + 1
[479] Fix | Delete
return L + interval*(n/2 - cf)/f
[480] Fix | Delete
[481] Fix | Delete
[482] Fix | Delete
def mode(data):
[483] Fix | Delete
"""Return the most common data point from discrete or nominal data.
[484] Fix | Delete
[485] Fix | Delete
``mode`` assumes discrete data, and returns a single value. This is the
[486] Fix | Delete
standard treatment of the mode as commonly taught in schools:
[487] Fix | Delete
[488] Fix | Delete
>>> mode([1, 1, 2, 3, 3, 3, 3, 4])
[489] Fix | Delete
3
[490] Fix | Delete
[491] Fix | Delete
This also works with nominal (non-numeric) data:
[492] Fix | Delete
[493] Fix | Delete
>>> mode(["red", "blue", "blue", "red", "green", "red", "red"])
[494] Fix | Delete
'red'
[495] Fix | Delete
[496] Fix | Delete
If there is not exactly one most common value, ``mode`` will raise
[497] Fix | Delete
StatisticsError.
[498] Fix | Delete
"""
[499] Fix | Delete
12
It is recommended that you Edit text format, this type of Fix handles quite a lot in one request
Function