summaryrefslogtreecommitdiff
path: root/mean_avg.awk
diff options
context:
space:
mode:
authorwukong <wukong@longaeva>2018-06-05 22:34:51 -0700
committerwukong <wukong@longaeva>2018-06-05 22:34:51 -0700
commit8f263e859e0970ce87b77addc80dec28e8fc7e82 (patch)
treeb0bdc392230c9960f5e5f5b3dea979405334628f /mean_avg.awk
re-init
Diffstat (limited to '')
-rw-r--r--mean_avg.awk62
1 files changed, 62 insertions, 0 deletions
diff --git a/mean_avg.awk b/mean_avg.awk
new file mode 100644
index 0000000..799f96c
--- /dev/null
+++ b/mean_avg.awk
@@ -0,0 +1,62 @@
+#!/usr/bin/awk
+
+### mean_avg.awk
+# average columns of numerical data
+
+BEGIN {
+ sign = "[+-]?"
+ decimal = "[0-9]+[.]?[0-9]*"
+ fraction = "[.][0-9]*"
+ exponent = "([Ee]" sign "[0-9]+)?"
+ number = "^" sign "(" decimal "|" fraction ")" exponent "$"
+}
+
+NR == 1 {
+ header_nf = NF
+ for (n=1; n<=NF; n++) {
+ if ($n !~ number)
+ header[n] = $n
+ }
+}
+
+NF != 0 {
+ if (NF > max_nf)
+ max_nf = NF
+ for (n=1; n <= NF; n++) {
+ if ($n !~ number) {
+ continue
+ }
+ if ($n ~ number) {
+ count[n] += 1
+ sum[n] += $n
+ sum2[n] += $n*$n
+ mean[n] = sum[n]/count[n]
+ delta[n] = $n - mean[n]
+ delta2[n] = delta[n]*delta[n]
+ sum_delta[n] += delta[n]
+ sum_delta2[n] += delta2[n]
+ if ((count[n] - 1) != 0)
+ var[n] = sum_delta2[n]/(count[n] - 1)
+ else
+ var[n] = 0
+ }
+ }
+}
+
+END {
+ printf("\n")
+ printf("%-6s\t%-6s %-6s %-6s\n", "col", "mean", "std_err", "count")
+ for (n=1; n<=max_nf; n++) {
+ if (header[n])
+ printf("%-6s\t", header[n])
+ else
+ printf("%-6g\t", n)
+ if (count[n]) {
+ printf("%-6g ±%-6g %-6g\n",
+ mean[n], 1.96*sqrt(var[n]/count[n]), count[n])
+ }
+ else
+ printf("%2s %2s %2s\n", " ", " ", " ")
+ }
+}
+