diff options
Diffstat (limited to 'mean_avg.awk')
| -rw-r--r-- | mean_avg.awk | 27 |
1 files changed, 12 insertions, 15 deletions
diff --git a/mean_avg.awk b/mean_avg.awk index 6f5a270..565dcc2 100644 --- a/mean_avg.awk +++ b/mean_avg.awk @@ -4,20 +4,17 @@ # average columns of numerical data BEGIN { - OFS = "%.18g" - sign = "[+-±]?" - decimal = "[0-9]+[.]?[0-9]*" - fraction = "[.][0-9]*" - exponent = "([Ee]" sign "[0-9]+)?" - number = "^" sign "(" decimal "|" fraction ")" exponent "$" + OFMT = "%.18g" + #sign = "[+-]?" + #decimal = "[0-9]+[.]?[0-9]*" + #fraction = "[.][0-9]*" + #exponent = "([Ee]" "[+-]?" "[0-9]+)?" + number = "^[+-]?([0-9]+[.]?[0-9]*|[.][0-9]*)([Ee][+-]?[0-9]+)?$" } NR == 1 { - header_nf = NF - for (n=1; n<=NF; n++) { - if ($n !~ number) - header[n] = $n - } + for (n=1; n<=NF; n++) + ($n ~ number) ? header[n] = "col" n : header[n] = $n } # Welford's 'online' algorithm for variance @@ -28,10 +25,10 @@ NF > 0 { if ($n ~ number) { count[n] += 1 delta0[n] = $n - mean[n] - mean[n] = mean[n] + delta0[n]/count[n] + mean[n] += delta0[n]/count[n] delta1[n] = $n - mean[n] - M2[n] = M2[n] + delta0[n]*delta1[n] - (count[n] > 1) ? var[n] = M2[n]/(count[n] - 1) : var[n] = "0" + M2[n] += delta0[n]*delta1[n] + (count[n] > 1) ? var[n] = M2[n]/(count[n] - 1) : var[n] = "" } } } @@ -42,7 +39,7 @@ END { if (header[n]) print header[n], mean[n], sqrt(var[n]), sqrt(var[n]/count[n]), count[n] else - print "col_" n, mean[n], sqrt(var[n]), sqrt(var[n]/count[n]), count[n] + print "col" n, mean[n], sqrt(var[n]), sqrt(var[n]/count[n]), count[n] } } |
