diff options
| author | wukong <wukong@longaeva> | 2018-06-17 16:38:38 -0700 |
|---|---|---|
| committer | wukong <wukong@longaeva> | 2018-06-17 16:44:59 -0700 |
| commit | 2482727a6902e44e6a68236f878f5f9bf7947bd2 (patch) | |
| tree | 1849d67ab3f730d4ddecbfbe5486d618ff3ac5ec /lin_reg.awk | |
| parent | 4916e9b13765de970deff094abb3eb50c663834a (diff) | |
added covariance matrix calculation
Diffstat (limited to 'lin_reg.awk')
| -rw-r--r-- | lin_reg.awk | 34 |
1 files changed, 17 insertions, 17 deletions
diff --git a/lin_reg.awk b/lin_reg.awk index 8622289..56114a2 100644 --- a/lin_reg.awk +++ b/lin_reg.awk @@ -4,7 +4,7 @@ # simple linear regression between columns BEGIN { - OFS = "%.18g" + OFMT = "%.18g" sign = "[+-]?" decimal = "[0-9]+[.]?[0-9]*" fraction = "[.][0-9]*" @@ -13,13 +13,11 @@ BEGIN { } NR == 1 { - header_nf = NF - for (n=1; n<=NF; n++) { - ($n !~ number) ? header[n] = $n : header[n] = "col" n - } + for (n=1; n<=NF; n++) + ($n ~ number) ? header[n] = "col" n : header[n] = $n } -NF != 0 { +NF > 0 { if (NF > nf_max) nf_max = NF @@ -31,15 +29,14 @@ NF != 0 { count[y] += 1 sum[y] += $y sum2[y] += $y*$y - mean[y] = sum[y]/count[y] - - ### difference from the mean - delta[y] = $y - mean[y] - sum_delta[y] += delta[y] - sum_delta2[y] += delta[y]*delta[y] + delta0[y] = $y - mean[y] + mean[y] = mean[y] + delta0[y]/count[y] + delta1[y] = $y - mean[y] + sum_delta[y] += delta1[y] + sum_delta2[y] += delta0[y]*delta1[y] ### sample variance - (count[y] > 1) ? var[y] = sum_delta2[y]/(count[y] - 1) : var[y] = 0 + (count[y] > 1) ? var[y] = sum_delta2[y]/(count[y] - 1) : var[y] = "" # x = row, y = col for (x=1; x<=nf_max; x++) { @@ -47,9 +44,12 @@ NF != 0 { sum_xy[x,y] += $x*$y sum_delta_xy[x,y] += delta[x]*delta[y] + # covariance + #(count[x,y] > 1) ? cov[x,y] = sum_delta_xy[x,y]/(count[x,y] - 1) : cov[x,y] = "" + # correlation r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y]) - (r_den[x,y]) ? r[x,y] = sum_delta_xy[x,y]/r_den[x,y] : r[x,y] = 1 + (r_den[x,y]) ? r[x,y] = sum_delta_xy[x,y]/r_den[x,y] : r[x,y] = 1 ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x]) if (ab_den[x,y]) { @@ -84,9 +84,9 @@ END { for (y=1; y<=nf_max; y++) { for (x=1; x<=nf_max; x++) { if (x != y && r[x,y]) { - printf("\n %.18g \t (%s) \t = (%.18g +/- %.18g)(%s) \t + (%.18g +/- %.18g)", - 10.0*log(r[x,y]*r[x,y])/log(10), header[y], b[x,y], b_err[x,y], header[x], - a[x,y], a_err[x,y]) + printf("%.9g "OFS" (%s) "OFS" = (%.9g +/- %.9g)(%s) "OFS" + (%.9g +/- %.9g)%s", + 10.0*log(r[x,y]*r[x,y])/log(10.0), header[y], b[x,y], + b_err[x,y], header[x], a[x,y], a_err[x,y], ORS) } } } |
