1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
|
#!/usr/bin/awk -f
### lin_reg1.awk
# simple linear regression between columns
BEGIN {
OFS = "%.9g"
sign = "[+-]?"
decimal = "[0-9]+[.]?[0-9]*"
fraction = "[.][0-9]*"
exponent = "([Ee]" sign "[0-9]+)?"
number = "^" sign "(" decimal "|" fraction ")" exponent "$"
}
NR == 1 {
header_nf = NF
for (n=1; n<=NF; n++) {
($n !~ number) ? header[n] = $n : header[n] = "col" n
}
}
NF != 0 {
if (NF > nf_max)
nf_max = NF
### iterate over columns
for (y=1; y<=nf_max; y++) {
if ($y ~ number) {
### mean
count[y] += 1
sum[y] += $y
sum2[y] += $y*$y
mean[y] = sum[y]/count[y]
### difference from the mean
delta[y] = $y - mean[y]
sum_delta[y] += delta[y]
sum_delta2[y] += delta[y]*delta[y]
# x = row, y = col
for (x=1; x<=nf_max; x++) {
count[x,y] += 1
sum_xy[x,y] += $x*$y
sum_delta_xy[x,y] += delta[x]*delta[y]
r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y])
(r_den[x,y]) ? r[x,y] = sum_delta_xy[x,y]/r_den[x,y] : r[x,y] = 0
ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x])
if (ab_den[x,y]) {
a[x,y] = (sum[y]*sum2[x] - sum[x]*sum_xy[x,y])/ab_den[x,y]
b[x,y] = (count[x,y]*sum_xy[x,y] - sum[x]*sum[y])/ab_den[x,y]
}
else {
a[x,y] = 0
b[x,y] = 1
}
}
}
else
continue
}
}
END {
for (y=1; y<=nf_max; y++) {
for (x=1; x<=nf_max; x++) {
if (x != y && r[x,y]) {
r2[x,y] = r[x,y]*r[x,y]
printf("\n %.9g \t (%s) \t = %.9g(%s) \t + %.9g",
10.0*log(r2[x,y])/log(10), header[y], b[x,y], header[x], a[x,y])
}
}
}
}
|