diff options
Diffstat (limited to '')
| -rw-r--r-- | diff.awk | 39 | ||||
| -rw-r--r-- | gaussian.awk | 5 | ||||
| -rw-r--r-- | lin_reg.awk | 73 |
3 files changed, 63 insertions, 54 deletions
@@ -5,6 +5,7 @@ BEGIN { OFS = FS + # OFMT = "%.9g" sign = "[+-]?" decimal = "[0-9]+[.]?[0-9]*" fraction = "[.][0-9]*" @@ -13,7 +14,7 @@ BEGIN { } NR == 1 { - ### orig data columns + # orig data columns for (n=1; n<=NF; n++) { if ($n !~ number) { header[n] = $n @@ -27,7 +28,7 @@ NR == 1 { printf(OFS) } - ### diff columns + # diff columns for (n=1; n<=NF; n++) { printf(dheader[n]) printf(n < NF ? OFS : ORS) @@ -38,29 +39,31 @@ NF { if (NF > nf_max) nf_max = NF - ### data columns - for (y=1; y<=nf_max; y++) { - if ($y == header[y] || $y == dheader[y]) + # data columns + for (n=1; n<=nf_max; n++) { + if ($n == header[n] || $n == dheader[n]) continue - if ($y ~ number) - printf(OFMT, $y) + if ($n ~ number) + printf(OFMT, $n) else printf("") printf(OFS) } - ### diff columns - for (y=1; y<=nf_max; y++) { - if ($y == header[y] || $y == dheader[y]) + # diff columns + for (n=1; n<=nf_max; n++) { + if ($n == header[n] || $n == dheader[n]) continue - if ($y ~ number) { - data[y] = $y - diff[y] = data[y] - data_prev[y] - data_prev[y] = data[y] - printf(OFMT, diff[y]) + if ($n ~ number) { + data[n] = $n + if (data_prev[n] ~ number) + diff[n] = data[n] - data_prev[n] + else + diff[n] = "" + data_prev[n] = data[n] } - else - diff[y] = "" - printf(y < nf_max ? OFS : ORS) + printf(diff[n]) + printf(n < nf_max ? OFS : ORS) } } + diff --git a/gaussian.awk b/gaussian.awk index dfb5fb4..dc62d37 100644 --- a/gaussian.awk +++ b/gaussian.awk @@ -25,7 +25,10 @@ BEGIN { for (n=0; n<N; n++) {
if (N > 1 && M > 0) {
- w[n] = gaussian(n)
+ # sigma <= 0.5
+ sigma = 0.4
+ en[n] = (n - M)/(sigma*M)
+ w[n] = exp(-0.5*e[n]*e[n])
print n, w[n]/M
}
else {
diff --git a/lin_reg.awk b/lin_reg.awk index 52e55d3..0213c7f 100644 --- a/lin_reg.awk +++ b/lin_reg.awk @@ -17,7 +17,7 @@ NR == 1 { ($n ~ number) ? header[n] = "col" n : header[n] = $n } -NF > 0 { +NF { if (NF > nf_max) nf_max = NF @@ -25,6 +25,7 @@ NF > 0 { for (y=1; y<=nf_max; y++) { if ($y == header[n]) continue + if ($y ~ number) { ### mean @@ -42,45 +43,47 @@ NF > 0 { # x = row, y = col, trendline: y = A + Bx for (x=1; x<=nf_max; x++) { - count[x,y] += 1 - sum_xy[x,y] += $x*$y - sum_delta_xy[x,y] += delta0[x]*delta1[y] + if ($x ~ number) { + count[x,y] += 1 + sum_xy[x,y] += $x*$y + sum_delta_xy[x,y] += delta0[x]*delta1[y] - # covariance - #(count[x,y] > 1) ? cov[x,y] = sum_delta_xy[x,y]/(count[x,y] - 1) : cov[x,y] = "" + # covariance + #(count[x,y] > 1) ? cov[x,y] = sum_delta_xy[x,y]/(count[x,y] - 1) : cov[x,y] = "" - # correlation - r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y]) - (r_den[x,y]) ? r[x,y] = sum_delta_xy[x,y]/r_den[x,y] : r[x,y] = 1 + # correlation + r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y]) + (r_den[x,y]) ? r[x,y] = sum_delta_xy[x,y]/r_den[x,y] : r[x,y] = 1 - ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x]) - if (ab_den[x,y]) { - a[x,y] = (sum[y]*sum2[x] - sum[x]*sum_xy[x,y])/ab_den[x,y] - b[x,y] = (count[x,y]*sum_xy[x,y] - sum[x]*sum[y])/ab_den[x,y] - } - else { - a[x,y] = 0 - b[x,y] = 1 - } + ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x]) + if (ab_den[x,y]) { + a[x,y] = (sum[y]*sum2[x] - sum[x]*sum_xy[x,y])/ab_den[x,y] + b[x,y] = (count[x,y]*sum_xy[x,y] - sum[x]*sum[y])/ab_den[x,y] + } + else { + a[x,y] = 0 + b[x,y] = 1 + } + + # error estimate + err_den[x,y] = count[x,y]*(count[x,y] - 2) + if (count[x,y] > 2) { + err[x,y] = $y - (a[x,y] + b[x,y]*$x) + sum_err2[x,y] += err[x,y]*err[x,y] + } + b_err_den[x,y] = (count[x,y] - 2)*sum_delta2[x] + if (b_err_den[x,y]) + b_err[x,y] = sqrt(sum_err2[x,y]/b_err_den[x,y]) + a_err_den[x,y] = count[x,y]*b_err_den[x,y] + if (a_err_den[x,y]) + a_err[x,y] = sqrt(sum2[x]/count[x,y])*b_err[x,y] - # error estimate - err_den[x,y] = count[x,y]*(count[x,y] - 2) - if (count[x,y] > 2) { - err[x,y] = $y - (a[x,y] + b[x,y]*$x) - sum_err2[x,y] += err[x,y]*err[x,y] + # weighted mean, from HP-20S manual, pg 60 + xw[x,y] = sum_xy[x,y]/sum[y] + yw[x,y] = b[x,y]*xw[x,y] + a[x,y] + xw_dist[x,y] = (xw[x,y] - mean[x]) + yw_dist[x,y] = b[x,y]*(xw[x,y] - mean[x]) } - b_err_den[x,y] = (count[x,y] - 2)*sum_delta2[x] - if (b_err_den[x,y]) - b_err[x,y] = sqrt(sum_err2[x,y]/b_err_den[x,y]) - a_err_den[x,y] = count[x,y]*b_err_den[x,y] - if (a_err_den[x,y]) - a_err[x,y] = sqrt(sum2[x]/count[x,y])*b_err[x,y] - - # weighted mean, from HP-20S manual, pg 60 - xw[x,y] = sum_xy[x,y]/sum[y] - yw[x,y] = b[x,y]*xw[x,y] + a[x,y] - xw_dist[x,y] = (xw[x,y] - mean[x]) - yw_dist[x,y] = b[x,y]*(xw[x,y] - mean[x]) } } else |
