blob: aeee3db44611baa025409340456c8d9ca07f3c51 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
|
#!/usr/bin/awk -f
### mean_avg.awk
# average columns of numerical data
BEGIN {
#sign = "[+-]?"
#decimal = "[0-9]+[.]?[0-9]*"
#fraction = "[.][0-9]*"
#exponent = "([Ee]" "[+-]?" "[0-9]+)?"
number = "^[+-]?([0-9]+[.]?[0-9]*|[.][0-9]*)([Ee][+-]?[0-9]+)?$"
}
NR == 1 {
for (n=1; n<=NF; n++)
($n ~ number) ? header[n] = "col" n : header[n] = $n
}
# Welford's 'online' algorithm for variance
NF > 0 {
if (NF > max_nf)
max_nf = NF
for (n=1; n <= NF; n++) {
if ($n ~ number) {
count[n] += 1
delta0[n] = $n - mean[n]
mean[n] += delta0[n]/count[n]
delta1[n] = $n - mean[n]
M2[n] += delta0[n]*delta1[n]
(count[n] > 1) ? var[n] = M2[n]/(count[n] - 1) : var[n] = ""
}
}
}
END {
print "col", "mean", "std_dev", "std_err", "count"
for (n=1; n<=max_nf; n++) {
if (header[n])
print header[n], mean[n], sqrt(var[n]), sqrt(var[n]/count[n]), count[n]
else
print "col" n, mean[n], sqrt(var[n]), sqrt(var[n]/count[n]), count[n]
}
}
|