summaryrefslogtreecommitdiff
path: root/mean.awk
diff options
context:
space:
mode:
authorwukong <wukong@longaeva>2018-06-13 21:41:39 -0700
committerwukong <wukong@longaeva>2018-06-13 21:41:39 -0700
commit8875b420a873a360a9484778b487394add318a5e (patch)
treed357816354c5d9d4bc3b1db5c43ceda7933b5e3f /mean.awk
parent6bb6be845b6099550e0a530217633e30522093ad (diff)
added shebang, replaced if-elses with shortcut notation, ported online mean and variance calc to mean.awk
Diffstat (limited to '')
-rw-r--r--mean.awk37
1 files changed, 29 insertions, 8 deletions
diff --git a/mean.awk b/mean.awk
index 45bedee..c1d31a6 100644
--- a/mean.awk
+++ b/mean.awk
@@ -1,13 +1,34 @@
+#!/usr/bin/awk -f
+
### mean.awk
-# find mean average of a list of numbers.
+# calculate mean average
BEGIN {
- X = ARGV[1]
- inputsize = split(X, Xarr)
- Yarr[n] = 0
- SUM = 0
- for (m=1; m <= inputsize; m++) {
- SUM =+ Xarr[m]
+ sign = "[+-±]?"
+ decimal = "[0-9]+[.]?[0-9]*"
+ fraction = "[.][0-9]*"
+ exponent = "([Ee]" sign "[0-9]+)?"
+ number = "^" sign "(" decimal "|" fraction ")" exponent "$"
+}
+
+# Welford's 'online' algorithm for variance
+NF > 0 {
+ for (m=1; m<=NF; m++) {
+ if (m ~ number) {
+ count += 1
+ delta0 = $m - mean
+ mean = mean + delta0/count
+ delta1 = $m - mean
+ M2 = M2 + delta0*delta1
+ if (count > 1)
+ var = M2/(count - 1)
+ else
+ var = "nan"
+ }
}
- print SUM
+}
+
+END {
+ print "mean", "std_dev", "std_err", "count"
+ print mean, sqrt(var), sqrt(var/count), count
}