summaryrefslogtreecommitdiff
path: root/quad_reg.awk
diff options
context:
space:
mode:
Diffstat (limited to 'quad_reg.awk')
-rw-r--r--quad_reg.awk27
1 files changed, 16 insertions, 11 deletions
diff --git a/quad_reg.awk b/quad_reg.awk
index 1b30afd..0620808 100644
--- a/quad_reg.awk
+++ b/quad_reg.awk
@@ -1,7 +1,7 @@
#!/usr/bin/awk -f
### quad_reg.awk
-# simple linear regression between columns
+# quadratic regression along columns
BEGIN {
OFMT="%.9g"
@@ -36,8 +36,10 @@ NF > 0 {
### difference from the mean
delta[y] = $y - mean[y]
+ delta2[y] = $y*$y - mean2[y]
sum_delta[y] += delta[y]
- sum_delta2[y] += delta[y]*delta[y]
+ sum2_delta[y] += delta[y]*delta[y]
+ sum_delta2[y] += delta2[y]
### sample variance
(count[y] > 1) ? var[y] = sum_delta2[y]/(count[y] - 1) : var[y] = ""
@@ -48,14 +50,17 @@ NF > 0 {
sum_xy[x,y] += $x*$y
sum_x2y[x,y] += $x*$x*$y
sum_delta_xy[x,y] += delta[x]*delta[y]
+ sum_delta_xx2[x,y] += delta[x]*delta2[x]
+ sum_delta_x2y[x,y] += delta2[x]*delta[y]
+ sum_delta_x2x2[x,y] += delta2[x]*delta2[x]
# covariances
if (count[x,y] > 1) {
- s_xx[x,y] = sum2[x]/(count[x,y]) - mean[x]*mean[x]
- s_xy[x,y] = sum_xy[x,y]/(count[x,y]) - mean[x]*mean[y]
- s_xx2[x,y] = sum3[x]/(count[x,y]) - mean[x]*mean2[x]
- s_x2x2[x,y] = sum4[x]/(count[x,y]) - mean2[x]*mean2[x]
- s_x2y[x,y] = sum_x2y[x]/(count[x,y]) - mean2[x]*mean[y]
+ s_xx[x,y] = sum2_delta[x]/(count[x,y] - 1)
+ s_xy[x,y] = sum_delta_xy[x,y]/(count[x,y] - 1)
+ s_xx2[x,y] = sum_delta_xx2[x,y]/(count[x,y] - 1)
+ s_x2x2[x,y] = sum_delta_x2x2[x,y]/(count[x,y] - 1)
+ s_x2y[x,y] = sum_delta_x2y[x,y]/(count[x,y] - 1)
}
bc_den[x,y] = (s_xx[x,y]*s_x2x2[x,y] - s_xx2[x,y]*s_xx2[x,y])
@@ -67,14 +72,14 @@ NF > 0 {
c[x,y] = 0
b[x,y] = 0
}
- a[x,y] = mean[y] - b[x,y]*mean[x] - c[x,y]*mean[x]*mean[x]
+ a[x,y] = mean[y] - b[x,y]*mean[x] - c[x,y]*mean2[x]
# error estimate
err[x,y] = ($y - (a[x,y] + b[x,y]*$x + c[x,y]*$x*$x))
sum_err2[x,y] += err[x,y]*err[x,y]
# correlation
- sum_delta2[y] ? r2[x,y] = sum_err2[x,y]/sum_delta2[y] : r2[x,y] = 1
+ sum_delta2[y] ? r[x,y] = sqrt(1 - sum_err2[x,y]/sum_delta2[y]) : r[x,y] = 0
}
}
else
@@ -85,9 +90,9 @@ NF > 0 {
END {
for (x=1; x<=nf_max; x++) {
for (y=1; y<=nf_max; y++) {
- if (x != y && r2[x,y]) {
+ if (x != y && r[x,y]) {
printf(OFMT OFS "(%s)" OFS " = (" OFMT ")(%s)^2" OFS " + (" OFMT ")(%s)" OFS " + (" OFMT ")" ORS,
- 10.0*log(r2[x,y])/log(10), header[y], c[x,y], header[x], b[x,y], header[x], a[x,y])
+ 10.0*log(r[x,y]*r[x,y])/log(10), header[y], c[x,y], header[x], b[x,y], header[x], a[x,y])
}
}
}