#!/usr/bin/awk -f ### cov.awk # online covariance algorithm BEGIN { OFS = FS sign = "[+-]?" decimal = "[0-9]+[.]?[0-9]*" fraction = "[.][0-9]*" exponent = "([Ee]" sign "[0-9]+)?" number = "^" sign "(" decimal "|" fraction ")" exponent "$" } NR == 1 { for (y=1; y<=NF; y++) ($y ~ number) ? header[y] = "col" y : header[y] = $y } NF > 0 { if (NF > nf_max) nf_max = NF ### columns for (y=1; y<=nf_max; y++) { ### rows for (x=1; x<=nf_max; x++) { if ($x !~ number || $x == header[x]) continue count[x,y]++ dx[x,y] = $x - meanx[x,y] dy[x,y] = $y - meany[x,y] meanx[x,y] += dx[x,y]/count[x,y] meany[x,y] += dy[x,y]/count[x,y] C[x,y] += dx[x,y]*dy[x,y] # cov_pop[x,y] = C[x,y]/count[x,y] (count[x,y] > 1) ? cov_samp[x,y] = C[x,y]/(count[x,y] - 1) : cov_samp[x,y] = "" } } } END { ### column headers printf("cov") for (y=1; y<=nf_max; y++) { printf(OFS header[y]) } printf(ORS) ### columns for (y=1; y<=nf_max; y++) { printf(header[y] OFS) ### rows for (x=1; x<=nf_max; x++) { printf(OFMT, cov_samp[x,y]) if (x < nf_max) printf(OFS) } printf(ORS) } }