summaryrefslogtreecommitdiff
path: root/mean_avg.awk
diff options
context:
space:
mode:
authorwukong <wukong@longaeva>2018-06-17 16:38:38 -0700
committerwukong <wukong@longaeva>2018-06-17 16:44:59 -0700
commit2482727a6902e44e6a68236f878f5f9bf7947bd2 (patch)
tree1849d67ab3f730d4ddecbfbe5486d618ff3ac5ec /mean_avg.awk
parent4916e9b13765de970deff094abb3eb50c663834a (diff)
added covariance matrix calculation
Diffstat (limited to 'mean_avg.awk')
-rw-r--r--mean_avg.awk27
1 files changed, 12 insertions, 15 deletions
diff --git a/mean_avg.awk b/mean_avg.awk
index 6f5a270..565dcc2 100644
--- a/mean_avg.awk
+++ b/mean_avg.awk
@@ -4,20 +4,17 @@
# average columns of numerical data
BEGIN {
- OFS = "%.18g"
- sign = "[+-±]?"
- decimal = "[0-9]+[.]?[0-9]*"
- fraction = "[.][0-9]*"
- exponent = "([Ee]" sign "[0-9]+)?"
- number = "^" sign "(" decimal "|" fraction ")" exponent "$"
+ OFMT = "%.18g"
+ #sign = "[+-]?"
+ #decimal = "[0-9]+[.]?[0-9]*"
+ #fraction = "[.][0-9]*"
+ #exponent = "([Ee]" "[+-]?" "[0-9]+)?"
+ number = "^[+-]?([0-9]+[.]?[0-9]*|[.][0-9]*)([Ee][+-]?[0-9]+)?$"
}
NR == 1 {
- header_nf = NF
- for (n=1; n<=NF; n++) {
- if ($n !~ number)
- header[n] = $n
- }
+ for (n=1; n<=NF; n++)
+ ($n ~ number) ? header[n] = "col" n : header[n] = $n
}
# Welford's 'online' algorithm for variance
@@ -28,10 +25,10 @@ NF > 0 {
if ($n ~ number) {
count[n] += 1
delta0[n] = $n - mean[n]
- mean[n] = mean[n] + delta0[n]/count[n]
+ mean[n] += delta0[n]/count[n]
delta1[n] = $n - mean[n]
- M2[n] = M2[n] + delta0[n]*delta1[n]
- (count[n] > 1) ? var[n] = M2[n]/(count[n] - 1) : var[n] = "0"
+ M2[n] += delta0[n]*delta1[n]
+ (count[n] > 1) ? var[n] = M2[n]/(count[n] - 1) : var[n] = ""
}
}
}
@@ -42,7 +39,7 @@ END {
if (header[n])
print header[n], mean[n], sqrt(var[n]), sqrt(var[n]/count[n]), count[n]
else
- print "col_" n, mean[n], sqrt(var[n]), sqrt(var[n]/count[n]), count[n]
+ print "col" n, mean[n], sqrt(var[n]), sqrt(var[n]/count[n]), count[n]
}
}