diff options
Diffstat (limited to 'lin_reg.awk')
| -rw-r--r-- | lin_reg.awk | 73 |
1 files changed, 38 insertions, 35 deletions
diff --git a/lin_reg.awk b/lin_reg.awk index 52e55d3..0213c7f 100644 --- a/lin_reg.awk +++ b/lin_reg.awk @@ -17,7 +17,7 @@ NR == 1 { ($n ~ number) ? header[n] = "col" n : header[n] = $n } -NF > 0 { +NF { if (NF > nf_max) nf_max = NF @@ -25,6 +25,7 @@ NF > 0 { for (y=1; y<=nf_max; y++) { if ($y == header[n]) continue + if ($y ~ number) { ### mean @@ -42,45 +43,47 @@ NF > 0 { # x = row, y = col, trendline: y = A + Bx for (x=1; x<=nf_max; x++) { - count[x,y] += 1 - sum_xy[x,y] += $x*$y - sum_delta_xy[x,y] += delta0[x]*delta1[y] + if ($x ~ number) { + count[x,y] += 1 + sum_xy[x,y] += $x*$y + sum_delta_xy[x,y] += delta0[x]*delta1[y] - # covariance - #(count[x,y] > 1) ? cov[x,y] = sum_delta_xy[x,y]/(count[x,y] - 1) : cov[x,y] = "" + # covariance + #(count[x,y] > 1) ? cov[x,y] = sum_delta_xy[x,y]/(count[x,y] - 1) : cov[x,y] = "" - # correlation - r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y]) - (r_den[x,y]) ? r[x,y] = sum_delta_xy[x,y]/r_den[x,y] : r[x,y] = 1 + # correlation + r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y]) + (r_den[x,y]) ? r[x,y] = sum_delta_xy[x,y]/r_den[x,y] : r[x,y] = 1 - ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x]) - if (ab_den[x,y]) { - a[x,y] = (sum[y]*sum2[x] - sum[x]*sum_xy[x,y])/ab_den[x,y] - b[x,y] = (count[x,y]*sum_xy[x,y] - sum[x]*sum[y])/ab_den[x,y] - } - else { - a[x,y] = 0 - b[x,y] = 1 - } + ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x]) + if (ab_den[x,y]) { + a[x,y] = (sum[y]*sum2[x] - sum[x]*sum_xy[x,y])/ab_den[x,y] + b[x,y] = (count[x,y]*sum_xy[x,y] - sum[x]*sum[y])/ab_den[x,y] + } + else { + a[x,y] = 0 + b[x,y] = 1 + } + + # error estimate + err_den[x,y] = count[x,y]*(count[x,y] - 2) + if (count[x,y] > 2) { + err[x,y] = $y - (a[x,y] + b[x,y]*$x) + sum_err2[x,y] += err[x,y]*err[x,y] + } + b_err_den[x,y] = (count[x,y] - 2)*sum_delta2[x] + if (b_err_den[x,y]) + b_err[x,y] = sqrt(sum_err2[x,y]/b_err_den[x,y]) + a_err_den[x,y] = count[x,y]*b_err_den[x,y] + if (a_err_den[x,y]) + a_err[x,y] = sqrt(sum2[x]/count[x,y])*b_err[x,y] - # error estimate - err_den[x,y] = count[x,y]*(count[x,y] - 2) - if (count[x,y] > 2) { - err[x,y] = $y - (a[x,y] + b[x,y]*$x) - sum_err2[x,y] += err[x,y]*err[x,y] + # weighted mean, from HP-20S manual, pg 60 + xw[x,y] = sum_xy[x,y]/sum[y] + yw[x,y] = b[x,y]*xw[x,y] + a[x,y] + xw_dist[x,y] = (xw[x,y] - mean[x]) + yw_dist[x,y] = b[x,y]*(xw[x,y] - mean[x]) } - b_err_den[x,y] = (count[x,y] - 2)*sum_delta2[x] - if (b_err_den[x,y]) - b_err[x,y] = sqrt(sum_err2[x,y]/b_err_den[x,y]) - a_err_den[x,y] = count[x,y]*b_err_den[x,y] - if (a_err_den[x,y]) - a_err[x,y] = sqrt(sum2[x]/count[x,y])*b_err[x,y] - - # weighted mean, from HP-20S manual, pg 60 - xw[x,y] = sum_xy[x,y]/sum[y] - yw[x,y] = b[x,y]*xw[x,y] + a[x,y] - xw_dist[x,y] = (xw[x,y] - mean[x]) - yw_dist[x,y] = b[x,y]*(xw[x,y] - mean[x]) } } else |
