#!/usr/bin/awk ### mean_avg.awk # average columns of numerical data BEGIN { sign = "[+-]?" decimal = "[0-9]+[.]?[0-9]*" fraction = "[.][0-9]*" exponent = "([Ee]" sign "[0-9]+)?" number = "^" sign "(" decimal "|" fraction ")" exponent "$" } NR == 1 { header_nf = NF for (n=1; n<=NF; n++) { if ($n !~ number) header[n] = $n } } NF != 0 { if (NF > max_nf) max_nf = NF for (n=1; n <= NF; n++) { if ($n !~ number) { continue } if ($n ~ number) { count[n] += 1 sum[n] += $n sum2[n] += $n*$n mean[n] = sum[n]/count[n] delta[n] = $n - mean[n] delta2[n] = delta[n]*delta[n] sum_delta[n] += delta[n] sum_delta2[n] += delta2[n] if ((count[n] - 1) != 0) var[n] = sum_delta2[n]/(count[n] - 1) else var[n] = 0 } } } END { printf("\n") printf("%-6s\t%-6s %-6s %-6s\n", "col", "mean", "std_err", "count") for (n=1; n<=max_nf; n++) { if (header[n]) printf("%-6s\t", header[n]) else printf("%-6g\t", n) if (count[n]) { printf("%-6g ±%-6g %-6g\n", mean[n], 1.96*sqrt(var[n]/count[n]), count[n]) } else printf("%2s %2s %2s\n", " ", " ", " ") } }