summaryrefslogtreecommitdiff
path: root/mean.awk
blob: b6a794ea4098ac3816e898bb2c195843cc4fdb40 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/usr/bin/awk -f

### mean.awk
# calculate mean average of serialized numbers

BEGIN {
    OFS = FS
    sign = "[+-]?"
    decimal = "[0-9]+[.]?[0-9]*"
    fraction = "[.][0-9]+"
    exponent = "([Ee]" sign "[0-9]+)?"
    number = "^" sign "(" decimal "|" fraction ")" exponent "$"
}

# Welford's 'online' algorithm for variance
NF > 0 {
    for (n=1; n<=NF; n++) {
        if ($n ~ number) {
            count += 1
            (count == 1 || $n < min) ? min = $n : min = min
            (count == 1 || $n > max) ? max = $n : max = max
            delta0 = $n - mean
            mean += delta0/count
            delta1 = $n - mean
            M2 += delta0*delta1
            (count > 1) ? var = M2/(count - 1) : var = "nan"
        }
    }
}

END {
    print "mean", "std_err", "std_dev", "min", "max", "count"
    print mean, sqrt(var/count), sqrt(var), min, max, count
}