summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mean_avg.awk35
1 files changed, 15 insertions, 20 deletions
diff --git a/mean_avg.awk b/mean_avg.awk
index 799f96c..23b1e73 100644
--- a/mean_avg.awk
+++ b/mean_avg.awk
@@ -4,7 +4,7 @@
# average columns of numerical data
BEGIN {
- sign = "[+-]?"
+ sign = "[+-±]?"
decimal = "[0-9]+[.]?[0-9]*"
fraction = "[.][0-9]*"
exponent = "([Ee]" sign "[0-9]+)?"
@@ -19,44 +19,39 @@ NR == 1 {
}
}
+# Welford's 'online' algorithm for variance
NF != 0 {
if (NF > max_nf)
max_nf = NF
for (n=1; n <= NF; n++) {
- if ($n !~ number) {
- continue
- }
if ($n ~ number) {
count[n] += 1
- sum[n] += $n
- sum2[n] += $n*$n
- mean[n] = sum[n]/count[n]
- delta[n] = $n - mean[n]
- delta2[n] = delta[n]*delta[n]
- sum_delta[n] += delta[n]
- sum_delta2[n] += delta2[n]
- if ((count[n] - 1) != 0)
- var[n] = sum_delta2[n]/(count[n] - 1)
+ delta0[n] = $n - mean[n]
+ mean[n] = mean[n] + delta0[n]/count[n]
+ delta1[n] = $n - mean[n]
+ M2[n] = M2[n] + delta0[n]*delta1[n]
+ if (count[n] > 1)
+ var[n] = M2[n]/(count[n] - 1)
else
- var[n] = 0
+ var[n] = "0"
}
}
}
END {
printf("\n")
- printf("%-6s\t%-6s %-6s %-6s\n", "col", "mean", "std_err", "count")
+ printf("%s, \t%s, %s, %s, %s\n", "col", "mean", "std_dev", "std_err", "count")
for (n=1; n<=max_nf; n++) {
if (header[n])
- printf("%-6s\t", header[n])
+ printf("%s, \t", header[n])
else
- printf("%-6g\t", n)
+ printf("col_%g, \t", n)
if (count[n]) {
- printf("%-6g ±%-6g %-6g\n",
- mean[n], 1.96*sqrt(var[n]/count[n]), count[n])
+ printf("%g, ±%g, ±%g, %g\n",
+ mean[n], sqrt(var[n]), 1.96*sqrt(var[n]/count[n]), count[n])
}
else
- printf("%2s %2s %2s\n", " ", " ", " ")
+ printf(",,,,\n")
}
}