summaryrefslogtreecommitdiff
path: root/mean.awk
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--mean.awk29
1 files changed, 14 insertions, 15 deletions
diff --git a/mean.awk b/mean.awk
index 816208c..b6a794e 100644
--- a/mean.awk
+++ b/mean.awk
@@ -1,36 +1,35 @@
#!/usr/bin/awk -f
### mean.awk
-# calculate mean average
+# calculate mean average of serialized numbers
BEGIN {
OFS = FS
- sign = "[+-±]?"
+ sign = "[+-]?"
decimal = "[0-9]+[.]?[0-9]*"
- fraction = "[.][0-9]*"
+ fraction = "[.][0-9]+"
exponent = "([Ee]" sign "[0-9]+)?"
number = "^" sign "(" decimal "|" fraction ")" exponent "$"
}
# Welford's 'online' algorithm for variance
NF > 0 {
- for (m=1; m<=NF; m++) {
- if (m ~ number) {
+ for (n=1; n<=NF; n++) {
+ if ($n ~ number) {
count += 1
- delta0 = $m - mean
- mean = mean + delta0/count
- delta1 = $m - mean
- M2 = M2 + delta0*delta1
- if (count > 1)
- var = M2/(count - 1)
- else
- var = "nan"
+ (count == 1 || $n < min) ? min = $n : min = min
+ (count == 1 || $n > max) ? max = $n : max = max
+ delta0 = $n - mean
+ mean += delta0/count
+ delta1 = $n - mean
+ M2 += delta0*delta1
+ (count > 1) ? var = M2/(count - 1) : var = "nan"
}
}
}
END {
- print "mean", "std_err", "std_dev", "count"
- print mean, sqrt(var/count), sqrt(var), count
+ print "mean", "std_err", "std_dev", "min", "max", "count"
+ print mean, sqrt(var/count), sqrt(var), min, max, count
}