From ddcb1f73423a9b47d9b457cf3b19f622b0aa567f Mon Sep 17 00:00:00 2001 From: wukong Date: Sat, 5 Jan 2019 16:33:14 -0800 Subject: added min and max to mean and mean_avg, fixed regex bugs in mean --- mean.awk | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) (limited to 'mean.awk') diff --git a/mean.awk b/mean.awk index 816208c..b6a794e 100644 --- a/mean.awk +++ b/mean.awk @@ -1,36 +1,35 @@ #!/usr/bin/awk -f ### mean.awk -# calculate mean average +# calculate mean average of serialized numbers BEGIN { OFS = FS - sign = "[+-±]?" + sign = "[+-]?" decimal = "[0-9]+[.]?[0-9]*" - fraction = "[.][0-9]*" + fraction = "[.][0-9]+" exponent = "([Ee]" sign "[0-9]+)?" number = "^" sign "(" decimal "|" fraction ")" exponent "$" } # Welford's 'online' algorithm for variance NF > 0 { - for (m=1; m<=NF; m++) { - if (m ~ number) { + for (n=1; n<=NF; n++) { + if ($n ~ number) { count += 1 - delta0 = $m - mean - mean = mean + delta0/count - delta1 = $m - mean - M2 = M2 + delta0*delta1 - if (count > 1) - var = M2/(count - 1) - else - var = "nan" + (count == 1 || $n < min) ? min = $n : min = min + (count == 1 || $n > max) ? max = $n : max = max + delta0 = $n - mean + mean += delta0/count + delta1 = $n - mean + M2 += delta0*delta1 + (count > 1) ? var = M2/(count - 1) : var = "nan" } } } END { - print "mean", "std_err", "std_dev", "count" - print mean, sqrt(var/count), sqrt(var), count + print "mean", "std_err", "std_dev", "min", "max", "count" + print mean, sqrt(var/count), sqrt(var), min, max, count } -- cgit v1.2.3