summaryrefslogtreecommitdiff
path: root/quad_reg.awk
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--quad_reg.awk62
1 files changed, 33 insertions, 29 deletions
diff --git a/quad_reg.awk b/quad_reg.awk
index 8939947..1b30afd 100644
--- a/quad_reg.awk
+++ b/quad_reg.awk
@@ -1,9 +1,10 @@
#!/usr/bin/awk -f
-### lin_reg2.awk
+### quad_reg.awk
# simple linear regression between columns
BEGIN {
+ OFMT="%.9g"
sign = "[+-]?"
decimal = "[0-9]+[.]?[0-9]*"
fraction = "[.][0-9]*"
@@ -28,7 +29,10 @@ NF > 0 {
count[y] += 1
sum[y] += $y
sum2[y] += $y*$y
+ sum3[y] += $y*$y*$y
+ sum4[y] += $y*$y*$y*$y
mean[y] = sum[y]/count[y]
+ mean2[y] = sum2[y]/count[y]
### difference from the mean
delta[y] = $y - mean[y]
@@ -38,38 +42,39 @@ NF > 0 {
### sample variance
(count[y] > 1) ? var[y] = sum_delta2[y]/(count[y] - 1) : var[y] = ""
- # x = row, y = col
+ # x = row, y = col, trendline: y = A + Bx + Cx^2
for (x=1; x<=nf_max; x++) {
count[x,y] += 1
sum_xy[x,y] += $x*$y
+ sum_x2y[x,y] += $x*$x*$y
sum_delta_xy[x,y] += delta[x]*delta[y]
- # correlation
- r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y])
- (r_den[x,y]) ? r[x,y] = sum_delta_xy[x,y]/r_den[x,y] : r[x,y] = 1
+ # covariances
+ if (count[x,y] > 1) {
+ s_xx[x,y] = sum2[x]/(count[x,y]) - mean[x]*mean[x]
+ s_xy[x,y] = sum_xy[x,y]/(count[x,y]) - mean[x]*mean[y]
+ s_xx2[x,y] = sum3[x]/(count[x,y]) - mean[x]*mean2[x]
+ s_x2x2[x,y] = sum4[x]/(count[x,y]) - mean2[x]*mean2[x]
+ s_x2y[x,y] = sum_x2y[x]/(count[x,y]) - mean2[x]*mean[y]
+ }
- ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x])
- if (ab_den[x,y]) {
- a[x,y] = (sum[y]*sum2[x] - sum[x]*sum_xy[x,y])/ab_den[x,y]
- b[x,y] = (count[x,y]*sum_xy[x,y] - sum[x]*sum[y])/ab_den[x,y]
+ bc_den[x,y] = (s_xx[x,y]*s_x2x2[x,y] - s_xx2[x,y]*s_xx2[x,y])
+ if (bc_den[x,y]) {
+ c[x,y] = (s_x2y[x,y]*s_xx[x,y] - s_xy[x,y]*s_xx2[x,y])/bc_den[x,y]
+ b[x,y] = (s_xy[x,y]*s_x2x2[x,y] - s_x2y[x,y]*s_xx2[x,y])/bc_den[x,y]
}
else {
- a[x,y] = 0
- b[x,y] = 1
+ c[x,y] = 0
+ b[x,y] = 0
}
+ a[x,y] = mean[y] - b[x,y]*mean[x] - c[x,y]*mean[x]*mean[x]
- ### error estimate
- err_den[x,y] = count[x,y]*(count[x,y] - 2)
- if (count[x,y] > 2) {
- err[x,y] = $y - (a[x,y] + b[x,y]*$x)
- sum_err2[x,y] += err[x,y]*err[x,y]
- }
- b_err_den[x,y] = (count[x,y] - 2)*sum_delta2[x]
- if (b_err_den[x,y])
- b_err[x,y] = sqrt(sum_err2[x,y]/b_err_den[x,y])
- a_err_den[x,y] = count[x,y]*b_err_den[x,y]
- if (a_err_den[x,y])
- a_err[x,y] = sqrt(sum2[x]/count[x,y])*b_err[x,y]
+ # error estimate
+ err[x,y] = ($y - (a[x,y] + b[x,y]*$x + c[x,y]*$x*$x))
+ sum_err2[x,y] += err[x,y]*err[x,y]
+
+ # correlation
+ sum_delta2[y] ? r2[x,y] = sum_err2[x,y]/sum_delta2[y] : r2[x,y] = 1
}
}
else
@@ -78,12 +83,11 @@ NF > 0 {
}
END {
- for (y=1; y<=nf_max; y++) {
- for (x=1; x<=nf_max; x++) {
- if (x != y && r[x,y]) {
- printf(OFMT OFS "(%s)" OFS " = (" OFMT " +/- " OFMT ")(%s)" OFS " + (" OFMT " +/- " OFMT ")" ORS,
- 10.0*log(r[x,y]*r[x,y])/log(10), header[y], b[x,y], b_err[x,y], header[x],
- a[x,y], a_err[x,y])
+ for (x=1; x<=nf_max; x++) {
+ for (y=1; y<=nf_max; y++) {
+ if (x != y && r2[x,y]) {
+ printf(OFMT OFS "(%s)" OFS " = (" OFMT ")(%s)^2" OFS " + (" OFMT ")(%s)" OFS " + (" OFMT ")" ORS,
+ 10.0*log(r2[x,y])/log(10), header[y], c[x,y], header[x], b[x,y], header[x], a[x,y])
}
}
}