diff options
Diffstat (limited to '')
| -rw-r--r-- | lin_reg.awk | 15 |
1 files changed, 8 insertions, 7 deletions
diff --git a/lin_reg.awk b/lin_reg.awk index 16fda52..e0d67ca 100644 --- a/lin_reg.awk +++ b/lin_reg.awk @@ -31,7 +31,7 @@ NF > 0 { sum[y] += $y sum2[y] += $y*$y delta0[y] = $y - mean[y] - mean[y] = mean[y] + delta0[y]/count[y] + mean[y] += delta0[y]/count[y] delta1[y] = $y - mean[y] sum_delta[y] += delta1[y] sum_delta2[y] += delta0[y]*delta1[y] @@ -39,7 +39,7 @@ NF > 0 { ### sample variance (count[y] > 1) ? var[y] = sum_delta2[y]/(count[y] - 1) : var[y] = "" - # x = row, y = col + # x = row, y = col, trendline: y = A + Bx for (x=1; x<=nf_max; x++) { count[x,y] += 1 sum_xy[x,y] += $x*$y @@ -48,10 +48,6 @@ NF > 0 { # covariance (count[x,y] > 1) ? cov[x,y] = sum_delta_xy[x,y]/(count[x,y] - 1) : cov[x,y] = "" - # correlation - r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y]) - (r_den[x,y]) ? r[x,y] = sum_delta_xy[x,y]/r_den[x,y] : r[x,y] = 1 - ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x]) if (ab_den[x,y]) { a[x,y] = (sum[y]*sum2[x] - sum[x]*sum_xy[x,y])/ab_den[x,y] @@ -62,7 +58,7 @@ NF > 0 { b[x,y] = 1 } - ### error estimate + # error estimate err_den[x,y] = count[x,y]*(count[x,y] - 2) if (count[x,y] > 2) { err[x,y] = $y - (a[x,y] + b[x,y]*$x) @@ -74,6 +70,11 @@ NF > 0 { a_err_den[x,y] = count[x,y]*b_err_den[x,y] if (a_err_den[x,y]) a_err[x,y] = sqrt(sum2[x]/count[x,y])*b_err[x,y] + + # correlation + r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y]) + (r_den[x,y]) ? r[x,y] = sum_delta_xy[x,y]/r_den[x,y] : r[x,y] = 1 + } } else |
