From 8875b420a873a360a9484778b487394add318a5e Mon Sep 17 00:00:00 2001 From: wukong Date: Wed, 13 Jun 2018 21:41:39 -0700 Subject: added shebang, replaced if-elses with shortcut notation, ported online mean and variance calc to mean.awk --- mean.awk | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) (limited to 'mean.awk') diff --git a/mean.awk b/mean.awk index 45bedee..c1d31a6 100644 --- a/mean.awk +++ b/mean.awk @@ -1,13 +1,34 @@ +#!/usr/bin/awk -f + ### mean.awk -# find mean average of a list of numbers. +# calculate mean average BEGIN { - X = ARGV[1] - inputsize = split(X, Xarr) - Yarr[n] = 0 - SUM = 0 - for (m=1; m <= inputsize; m++) { - SUM =+ Xarr[m] + sign = "[+-±]?" + decimal = "[0-9]+[.]?[0-9]*" + fraction = "[.][0-9]*" + exponent = "([Ee]" sign "[0-9]+)?" + number = "^" sign "(" decimal "|" fraction ")" exponent "$" +} + +# Welford's 'online' algorithm for variance +NF > 0 { + for (m=1; m<=NF; m++) { + if (m ~ number) { + count += 1 + delta0 = $m - mean + mean = mean + delta0/count + delta1 = $m - mean + M2 = M2 + delta0*delta1 + if (count > 1) + var = M2/(count - 1) + else + var = "nan" + } } - print SUM +} + +END { + print "mean", "std_dev", "std_err", "count" + print mean, sqrt(var), sqrt(var/count), count } -- cgit v1.2.3