diff options
Diffstat (limited to 'quad_reg.awk')
| -rw-r--r-- | quad_reg.awk | 27 |
1 files changed, 16 insertions, 11 deletions
diff --git a/quad_reg.awk b/quad_reg.awk index 1b30afd..0620808 100644 --- a/quad_reg.awk +++ b/quad_reg.awk @@ -1,7 +1,7 @@ #!/usr/bin/awk -f ### quad_reg.awk -# simple linear regression between columns +# quadratic regression along columns BEGIN { OFMT="%.9g" @@ -36,8 +36,10 @@ NF > 0 { ### difference from the mean delta[y] = $y - mean[y] + delta2[y] = $y*$y - mean2[y] sum_delta[y] += delta[y] - sum_delta2[y] += delta[y]*delta[y] + sum2_delta[y] += delta[y]*delta[y] + sum_delta2[y] += delta2[y] ### sample variance (count[y] > 1) ? var[y] = sum_delta2[y]/(count[y] - 1) : var[y] = "" @@ -48,14 +50,17 @@ NF > 0 { sum_xy[x,y] += $x*$y sum_x2y[x,y] += $x*$x*$y sum_delta_xy[x,y] += delta[x]*delta[y] + sum_delta_xx2[x,y] += delta[x]*delta2[x] + sum_delta_x2y[x,y] += delta2[x]*delta[y] + sum_delta_x2x2[x,y] += delta2[x]*delta2[x] # covariances if (count[x,y] > 1) { - s_xx[x,y] = sum2[x]/(count[x,y]) - mean[x]*mean[x] - s_xy[x,y] = sum_xy[x,y]/(count[x,y]) - mean[x]*mean[y] - s_xx2[x,y] = sum3[x]/(count[x,y]) - mean[x]*mean2[x] - s_x2x2[x,y] = sum4[x]/(count[x,y]) - mean2[x]*mean2[x] - s_x2y[x,y] = sum_x2y[x]/(count[x,y]) - mean2[x]*mean[y] + s_xx[x,y] = sum2_delta[x]/(count[x,y] - 1) + s_xy[x,y] = sum_delta_xy[x,y]/(count[x,y] - 1) + s_xx2[x,y] = sum_delta_xx2[x,y]/(count[x,y] - 1) + s_x2x2[x,y] = sum_delta_x2x2[x,y]/(count[x,y] - 1) + s_x2y[x,y] = sum_delta_x2y[x,y]/(count[x,y] - 1) } bc_den[x,y] = (s_xx[x,y]*s_x2x2[x,y] - s_xx2[x,y]*s_xx2[x,y]) @@ -67,14 +72,14 @@ NF > 0 { c[x,y] = 0 b[x,y] = 0 } - a[x,y] = mean[y] - b[x,y]*mean[x] - c[x,y]*mean[x]*mean[x] + a[x,y] = mean[y] - b[x,y]*mean[x] - c[x,y]*mean2[x] # error estimate err[x,y] = ($y - (a[x,y] + b[x,y]*$x + c[x,y]*$x*$x)) sum_err2[x,y] += err[x,y]*err[x,y] # correlation - sum_delta2[y] ? r2[x,y] = sum_err2[x,y]/sum_delta2[y] : r2[x,y] = 1 + sum_delta2[y] ? r[x,y] = sqrt(1 - sum_err2[x,y]/sum_delta2[y]) : r[x,y] = 0 } } else @@ -85,9 +90,9 @@ NF > 0 { END { for (x=1; x<=nf_max; x++) { for (y=1; y<=nf_max; y++) { - if (x != y && r2[x,y]) { + if (x != y && r[x,y]) { printf(OFMT OFS "(%s)" OFS " = (" OFMT ")(%s)^2" OFS " + (" OFMT ")(%s)" OFS " + (" OFMT ")" ORS, - 10.0*log(r2[x,y])/log(10), header[y], c[x,y], header[x], b[x,y], header[x], a[x,y]) + 10.0*log(r[x,y]*r[x,y])/log(10), header[y], c[x,y], header[x], b[x,y], header[x], a[x,y]) } } } |
