diff options
Diffstat (limited to '')
| -rw-r--r-- | quad_reg.awk | 62 |
1 files changed, 33 insertions, 29 deletions
diff --git a/quad_reg.awk b/quad_reg.awk index 8939947..1b30afd 100644 --- a/quad_reg.awk +++ b/quad_reg.awk @@ -1,9 +1,10 @@ #!/usr/bin/awk -f -### lin_reg2.awk +### quad_reg.awk # simple linear regression between columns BEGIN { + OFMT="%.9g" sign = "[+-]?" decimal = "[0-9]+[.]?[0-9]*" fraction = "[.][0-9]*" @@ -28,7 +29,10 @@ NF > 0 { count[y] += 1 sum[y] += $y sum2[y] += $y*$y + sum3[y] += $y*$y*$y + sum4[y] += $y*$y*$y*$y mean[y] = sum[y]/count[y] + mean2[y] = sum2[y]/count[y] ### difference from the mean delta[y] = $y - mean[y] @@ -38,38 +42,39 @@ NF > 0 { ### sample variance (count[y] > 1) ? var[y] = sum_delta2[y]/(count[y] - 1) : var[y] = "" - # x = row, y = col + # x = row, y = col, trendline: y = A + Bx + Cx^2 for (x=1; x<=nf_max; x++) { count[x,y] += 1 sum_xy[x,y] += $x*$y + sum_x2y[x,y] += $x*$x*$y sum_delta_xy[x,y] += delta[x]*delta[y] - # correlation - r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y]) - (r_den[x,y]) ? r[x,y] = sum_delta_xy[x,y]/r_den[x,y] : r[x,y] = 1 + # covariances + if (count[x,y] > 1) { + s_xx[x,y] = sum2[x]/(count[x,y]) - mean[x]*mean[x] + s_xy[x,y] = sum_xy[x,y]/(count[x,y]) - mean[x]*mean[y] + s_xx2[x,y] = sum3[x]/(count[x,y]) - mean[x]*mean2[x] + s_x2x2[x,y] = sum4[x]/(count[x,y]) - mean2[x]*mean2[x] + s_x2y[x,y] = sum_x2y[x]/(count[x,y]) - mean2[x]*mean[y] + } - ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x]) - if (ab_den[x,y]) { - a[x,y] = (sum[y]*sum2[x] - sum[x]*sum_xy[x,y])/ab_den[x,y] - b[x,y] = (count[x,y]*sum_xy[x,y] - sum[x]*sum[y])/ab_den[x,y] + bc_den[x,y] = (s_xx[x,y]*s_x2x2[x,y] - s_xx2[x,y]*s_xx2[x,y]) + if (bc_den[x,y]) { + c[x,y] = (s_x2y[x,y]*s_xx[x,y] - s_xy[x,y]*s_xx2[x,y])/bc_den[x,y] + b[x,y] = (s_xy[x,y]*s_x2x2[x,y] - s_x2y[x,y]*s_xx2[x,y])/bc_den[x,y] } else { - a[x,y] = 0 - b[x,y] = 1 + c[x,y] = 0 + b[x,y] = 0 } + a[x,y] = mean[y] - b[x,y]*mean[x] - c[x,y]*mean[x]*mean[x] - ### error estimate - err_den[x,y] = count[x,y]*(count[x,y] - 2) - if (count[x,y] > 2) { - err[x,y] = $y - (a[x,y] + b[x,y]*$x) - sum_err2[x,y] += err[x,y]*err[x,y] - } - b_err_den[x,y] = (count[x,y] - 2)*sum_delta2[x] - if (b_err_den[x,y]) - b_err[x,y] = sqrt(sum_err2[x,y]/b_err_den[x,y]) - a_err_den[x,y] = count[x,y]*b_err_den[x,y] - if (a_err_den[x,y]) - a_err[x,y] = sqrt(sum2[x]/count[x,y])*b_err[x,y] + # error estimate + err[x,y] = ($y - (a[x,y] + b[x,y]*$x + c[x,y]*$x*$x)) + sum_err2[x,y] += err[x,y]*err[x,y] + + # correlation + sum_delta2[y] ? r2[x,y] = sum_err2[x,y]/sum_delta2[y] : r2[x,y] = 1 } } else @@ -78,12 +83,11 @@ NF > 0 { } END { - for (y=1; y<=nf_max; y++) { - for (x=1; x<=nf_max; x++) { - if (x != y && r[x,y]) { - printf(OFMT OFS "(%s)" OFS " = (" OFMT " +/- " OFMT ")(%s)" OFS " + (" OFMT " +/- " OFMT ")" ORS, - 10.0*log(r[x,y]*r[x,y])/log(10), header[y], b[x,y], b_err[x,y], header[x], - a[x,y], a_err[x,y]) + for (x=1; x<=nf_max; x++) { + for (y=1; y<=nf_max; y++) { + if (x != y && r2[x,y]) { + printf(OFMT OFS "(%s)" OFS " = (" OFMT ")(%s)^2" OFS " + (" OFMT ")(%s)" OFS " + (" OFMT ")" ORS, + 10.0*log(r2[x,y])/log(10), header[y], c[x,y], header[x], b[x,y], header[x], a[x,y]) } } } |
