#!/usr/bin/awk -f ### mean_avg.awk # average columns of numerical data BEGIN { sign = "[+-±]?" decimal = "[0-9]+[.]?[0-9]*" fraction = "[.][0-9]*" exponent = "([Ee]" sign "[0-9]+)?" number = "^" sign "(" decimal "|" fraction ")" exponent "$" } NR == 1 { header_nf = NF for (n=1; n<=NF; n++) { if ($n !~ number) header[n] = $n } } # Welford's 'online' algorithm for variance NF > 0 { if (NF > max_nf) max_nf = NF for (n=1; n <= NF; n++) { if ($n ~ number) { count[n] += 1 delta0[n] = $n - mean[n] mean[n] = mean[n] + delta0[n]/count[n] delta1[n] = $n - mean[n] M2[n] = M2[n] + delta0[n]*delta1[n] (count[n] > 1) ? var[n] = M2[n]/(count[n] - 1) : var[n] = "0" } } } END { print "col", "mean", "std_dev", "std_err", "count" for (n=1; n<=max_nf; n++) { if (header[n]) print header[n], mean[n], sqrt(var[n]), sqrt(var[n]/count[n]), count[n] else print "col_" n, mean[n], sqrt(var[n]), sqrt(var[n]/count[n]), count[n] } }