From 2482727a6902e44e6a68236f878f5f9bf7947bd2 Mon Sep 17 00:00:00 2001 From: wukong Date: Sun, 17 Jun 2018 16:38:38 -0700 Subject: added covariance matrix calculation --- cov.awk | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 cov.awk (limited to 'cov.awk') diff --git a/cov.awk b/cov.awk new file mode 100644 index 0000000..176fd96 --- /dev/null +++ b/cov.awk @@ -0,0 +1,61 @@ +#!/usr/bin/awk -f + +### cov.awk +# online covariance algorithm + +BEGIN { + OFMT = "%.18g" + sign = "[+-]?" + decimal = "[0-9]+[.]?[0-9]*" + fraction = "[.][0-9]*" + exponent = "([Ee]" sign "[0-9]+)?" + number = "^" sign "(" decimal "|" fraction ")" exponent "$" +} + +NR == 1 { + for (y=1; y<=NF; y++) + ($y ~ number) ? header[y] = "col" y : header[y] = $y + printf(header[y]) +} + +NF > 0 { + if (NF > nf_max) + nf_max = NF + + ### columns + for (y=1; y<=nf_max; y++) { + if ($y == header[y]) + continue + ### rows + for (x=1; x<=nf_max; x++) { + count[x,y]++ + dx[x,y] = $x - meanx[x,y] + meanx[x,y] += dx[x,y]/count[x,y] + meany[x,y] += ($y - meany[x,y])/count[x,y] + C[x,y] += dx[x,y]*($y - meany[x,y]) + cov_pop[x,y] = C[x,y]/count[x,y] + (count[x,y] > 1) ? cov_samp[x,y] = C[x,y]/(count[x,y] - 1) : cov_samp[x,y] = "" + } + } +} + +END { + ### column headers + printf("cov") + for (y=1; y<=nf_max; y++) { + printf(OFS header[y]) + } + printf(ORS) + + ### columns + for (y=1; y<=nf_max; y++) { + printf(header[y] OFS) + ### rows + for (x=1; x<=nf_max; x++) { + printf("%.18g", cov_samp[x,y]) + if (x < nf_max) + printf(OFS) + } + printf(ORS) + } +} -- cgit v1.2.3