diff options
| author | wukong <wukong@longaeva> | 2018-06-17 16:38:38 -0700 |
|---|---|---|
| committer | wukong <wukong@longaeva> | 2018-06-17 16:44:59 -0700 |
| commit | 2482727a6902e44e6a68236f878f5f9bf7947bd2 (patch) | |
| tree | 1849d67ab3f730d4ddecbfbe5486d618ff3ac5ec /cov.awk | |
| parent | 4916e9b13765de970deff094abb3eb50c663834a (diff) | |
added covariance matrix calculation
Diffstat (limited to 'cov.awk')
| -rw-r--r-- | cov.awk | 61 |
1 files changed, 61 insertions, 0 deletions
@@ -0,0 +1,61 @@ +#!/usr/bin/awk -f + +### cov.awk +# online covariance algorithm + +BEGIN { + OFMT = "%.18g" + sign = "[+-]?" + decimal = "[0-9]+[.]?[0-9]*" + fraction = "[.][0-9]*" + exponent = "([Ee]" sign "[0-9]+)?" + number = "^" sign "(" decimal "|" fraction ")" exponent "$" +} + +NR == 1 { + for (y=1; y<=NF; y++) + ($y ~ number) ? header[y] = "col" y : header[y] = $y + printf(header[y]) +} + +NF > 0 { + if (NF > nf_max) + nf_max = NF + + ### columns + for (y=1; y<=nf_max; y++) { + if ($y == header[y]) + continue + ### rows + for (x=1; x<=nf_max; x++) { + count[x,y]++ + dx[x,y] = $x - meanx[x,y] + meanx[x,y] += dx[x,y]/count[x,y] + meany[x,y] += ($y - meany[x,y])/count[x,y] + C[x,y] += dx[x,y]*($y - meany[x,y]) + cov_pop[x,y] = C[x,y]/count[x,y] + (count[x,y] > 1) ? cov_samp[x,y] = C[x,y]/(count[x,y] - 1) : cov_samp[x,y] = "" + } + } +} + +END { + ### column headers + printf("cov") + for (y=1; y<=nf_max; y++) { + printf(OFS header[y]) + } + printf(ORS) + + ### columns + for (y=1; y<=nf_max; y++) { + printf(header[y] OFS) + ### rows + for (x=1; x<=nf_max; x++) { + printf("%.18g", cov_samp[x,y]) + if (x < nf_max) + printf(OFS) + } + printf(ORS) + } +} |
