summaryrefslogtreecommitdiff
path: root/cov.awk
diff options
context:
space:
mode:
Diffstat (limited to 'cov.awk')
-rw-r--r--cov.awk61
1 files changed, 61 insertions, 0 deletions
diff --git a/cov.awk b/cov.awk
new file mode 100644
index 0000000..176fd96
--- /dev/null
+++ b/cov.awk
@@ -0,0 +1,61 @@
+#!/usr/bin/awk -f
+
+### cov.awk
+# online covariance algorithm
+
+BEGIN {
+ OFMT = "%.18g"
+ sign = "[+-]?"
+ decimal = "[0-9]+[.]?[0-9]*"
+ fraction = "[.][0-9]*"
+ exponent = "([Ee]" sign "[0-9]+)?"
+ number = "^" sign "(" decimal "|" fraction ")" exponent "$"
+}
+
+NR == 1 {
+ for (y=1; y<=NF; y++)
+ ($y ~ number) ? header[y] = "col" y : header[y] = $y
+ printf(header[y])
+}
+
+NF > 0 {
+ if (NF > nf_max)
+ nf_max = NF
+
+ ### columns
+ for (y=1; y<=nf_max; y++) {
+ if ($y == header[y])
+ continue
+ ### rows
+ for (x=1; x<=nf_max; x++) {
+ count[x,y]++
+ dx[x,y] = $x - meanx[x,y]
+ meanx[x,y] += dx[x,y]/count[x,y]
+ meany[x,y] += ($y - meany[x,y])/count[x,y]
+ C[x,y] += dx[x,y]*($y - meany[x,y])
+ cov_pop[x,y] = C[x,y]/count[x,y]
+ (count[x,y] > 1) ? cov_samp[x,y] = C[x,y]/(count[x,y] - 1) : cov_samp[x,y] = ""
+ }
+ }
+}
+
+END {
+ ### column headers
+ printf("cov")
+ for (y=1; y<=nf_max; y++) {
+ printf(OFS header[y])
+ }
+ printf(ORS)
+
+ ### columns
+ for (y=1; y<=nf_max; y++) {
+ printf(header[y] OFS)
+ ### rows
+ for (x=1; x<=nf_max; x++) {
+ printf("%.18g", cov_samp[x,y])
+ if (x < nf_max)
+ printf(OFS)
+ }
+ printf(ORS)
+ }
+}