summaryrefslogtreecommitdiff
path: root/cov.awk
blob: ffac77009242603e06d5c5dbd6e82f4a6c6a447d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/awk -f

### cov.awk
# online covariance algorithm

BEGIN {
    sign = "[+-]?"
    decimal = "[0-9]+[.]?[0-9]*"
    fraction = "[.][0-9]*"
    exponent = "([Ee]" sign "[0-9]+)?"
    number = "^" sign "(" decimal "|" fraction ")" exponent "$"
}

NR == 1 {
    for (y=1; y<=NF; y++)
        ($y ~ number) ? header[y] = "col" y : header[y] = $y
        printf(header[y])
}

NF > 0 {
    if (NF > nf_max)
        nf_max = NF

    ### columns
    for (y=1; y<=nf_max; y++) {
        if ($y == header[y])
            continue
        ### rows
        for (x=1; x<=nf_max; x++) {
            count[x,y]++
            dx[x,y] = $x - meanx[x,y]
            meanx[x,y] += dx[x,y]/count[x,y]
            meany[x,y] += ($y - meany[x,y])/count[x,y]
            C[x,y] += dx[x,y]*($y - meany[x,y])
            cov_pop[x,y] = C[x,y]/count[x,y]
            (count[x,y] > 1) ? cov_samp[x,y] = C[x,y]/(count[x,y] - 1) : cov_samp[x,y] = ""
        }
    }
}

END {
    ### column headers
    printf("cov")
    for (y=1; y<=nf_max; y++) {
        printf(OFS header[y])
    }
    printf(ORS)

    ### columns
    for (y=1; y<=nf_max; y++) {
        printf(header[y] OFS)
        ### rows
        for (x=1; x<=nf_max; x++) {
            printf(OFMT, cov_samp[x,y])
            if (x < nf_max)
                printf(OFS)
        }
        printf(ORS)
    }
}