1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
|
#!/usr/bin/awk -f
### quad_reg.awk
# quadratic regression along columns
BEGIN {
OFMT="%.9g"
sign = "[+-]?"
decimal = "[0-9]+[.]?[0-9]*"
fraction = "[.][0-9]*"
exponent = "([Ee]" sign "[0-9]+)?"
number = "^" sign "(" decimal "|" fraction ")" exponent "$"
}
NR == 1 {
for (n=1; n<=NF; n++)
($n ~ number) ? header[n] = "col" n : header[n] = $n
}
NF > 0 {
if (NF > nf_max)
nf_max = NF
### iterate over columns
for (y=1; y<=nf_max; y++) {
if ($y ~ number) {
### mean
count[y] += 1
sum[y] += $y
sum2[y] += $y*$y
sum3[y] += $y*$y*$y
sum4[y] += $y*$y*$y*$y
mean[y] = sum[y]/count[y]
mean2[y] = sum2[y]/count[y]
### delta, difference from the mean
delta[y] = $y - mean[y]
delta2[y] = $y*$y - mean2[y]
sum_delta[y] += delta[y]
sum2_delta[y] += delta[y]*delta[y]
sum_delta2[y] += delta2[y]
### sample variance
(count[y] > 1) ? var[y] = sum_delta2[y]/(count[y] - 1) : var[y] = ""
# x = row, y = col, trendline: y = A + Bx + Cx^2
for (x=1; x<=nf_max; x++) {
count[x,y] += 1
sum_xy[x,y] += $x*$y
sum_x2y[x,y] += $x*$x*$y
sum_delta_xy[x,y] += delta[x]*delta[y]
sum_delta_xx2[x,y] += delta[x]*delta2[x]
sum_delta_x2y[x,y] += delta2[x]*delta[y]
sum_delta_x2x2[x,y] += delta2[x]*delta2[x]
# covariances
if (count[x,y] > 1) {
s_xx[x,y] = sum2_delta[x]/(count[x,y] - 1)
s_xy[x,y] = sum_delta_xy[x,y]/(count[x,y] - 1)
s_xx2[x,y] = sum_delta_xx2[x,y]/(count[x,y] - 1)
s_x2x2[x,y] = sum_delta_x2x2[x,y]/(count[x,y] - 1)
s_x2y[x,y] = sum_delta_x2y[x,y]/(count[x,y] - 1)
}
else {
s_xx[x,y] = 0
s_xy[x,y] = 0
s_xx2[x,y] = 0
s_x2x2[x,y] = 0
s_x2y[x,y] = 0
}
bc_den[x,y] = (s_xx[x,y]*s_x2x2[x,y] - s_xx2[x,y]*s_xx2[x,y])
if (bc_den[x,y]) {
c[x,y] = (s_x2y[x,y]*s_xx[x,y] - s_xy[x,y]*s_xx2[x,y])/bc_den[x,y]
b[x,y] = (s_xy[x,y]*s_x2x2[x,y] - s_x2y[x,y]*s_xx2[x,y])/bc_den[x,y]
}
else {
c[x,y] = 0
b[x,y] = 0
}
a[x,y] = mean[y] - b[x,y]*mean[x] - c[x,y]*mean2[x]
# error estimate
err[x,y] = ($y - (a[x,y] + b[x,y]*$x + c[x,y]*$x*$x))
sum_err2[x,y] += err[x,y]*err[x,y]
# correlation
sum_delta2[y] ? r[x,y] = sqrt(1 - sum_err2[x,y]/sum_delta2[y]) : r[x,y] = 0
# vertex of parabola
if (c[x,y]) {
xv[x,y] = -1.0*b[x,y]/(2.0*c[x,y])
yv[x,y] = -1.0*(b[x,y]*b[x,y])/(4.0*c[x,y]) + a[x,y]
}
# roots (x-intercept)
if (c[x,y]) {
rx0[x,y] = (-1.0*b[x,y] - sqrt(b[x,y]*b[x,y] - 4.0*a[x,y]*c[x,y]))/(2.0*c[x,y])
rx1[x,y] = (-1.0*b[x,y] + sqrt(b[x,y]*b[x,y] - 4.0*a[x,y]*c[x,y]))/(2.0*c[x,y])
}
}
}
else
continue
}
}
END {
for (x=1; x<=nf_max; x++) {
for (y=1; y<=nf_max; y++) {
if (x != y && r[x,y]) {
printf(OFMT OFS "(%s)" OFS " = (" OFMT ")(%s)^2" OFS " + (" OFMT ")(%s)" OFS " + (" OFMT ")",
10.0*log(r[x,y]*r[x,y])/log(10), header[y], c[x,y], header[x], b[x,y], header[x], a[x,y])
printf("\t[" OFMT "," OFMT "][" OFMT "," OFMT "][" OFMT "," OFMT "]" OFS" [" OFMT "," OFMT "]" ORS,
rx0[x,y], 0, rx1[x,y], 0, 0, a[x,y], xv[x,y], yv[x,y])
}
}
}
}
|