diff options
Diffstat (limited to '')
| -rw-r--r-- | mean_avg.awk | 62 |
1 files changed, 62 insertions, 0 deletions
diff --git a/mean_avg.awk b/mean_avg.awk new file mode 100644 index 0000000..799f96c --- /dev/null +++ b/mean_avg.awk @@ -0,0 +1,62 @@ +#!/usr/bin/awk + +### mean_avg.awk +# average columns of numerical data + +BEGIN { + sign = "[+-]?" + decimal = "[0-9]+[.]?[0-9]*" + fraction = "[.][0-9]*" + exponent = "([Ee]" sign "[0-9]+)?" + number = "^" sign "(" decimal "|" fraction ")" exponent "$" +} + +NR == 1 { + header_nf = NF + for (n=1; n<=NF; n++) { + if ($n !~ number) + header[n] = $n + } +} + +NF != 0 { + if (NF > max_nf) + max_nf = NF + for (n=1; n <= NF; n++) { + if ($n !~ number) { + continue + } + if ($n ~ number) { + count[n] += 1 + sum[n] += $n + sum2[n] += $n*$n + mean[n] = sum[n]/count[n] + delta[n] = $n - mean[n] + delta2[n] = delta[n]*delta[n] + sum_delta[n] += delta[n] + sum_delta2[n] += delta2[n] + if ((count[n] - 1) != 0) + var[n] = sum_delta2[n]/(count[n] - 1) + else + var[n] = 0 + } + } +} + +END { + printf("\n") + printf("%-6s\t%-6s %-6s %-6s\n", "col", "mean", "std_err", "count") + for (n=1; n<=max_nf; n++) { + if (header[n]) + printf("%-6s\t", header[n]) + else + printf("%-6g\t", n) + if (count[n]) { + printf("%-6g ±%-6g %-6g\n", + mean[n], 1.96*sqrt(var[n]/count[n]), count[n]) + } + else + printf("%2s %2s %2s\n", " ", " ", " ") + } +} + |
