diff options
| -rw-r--r-- | ABOUT.TXT | 2 | ||||
| -rw-r--r-- | diff.awk | 2 | ||||
| -rw-r--r-- | diff1.awk | 30 | ||||
| -rw-r--r-- | lin_reg1.awk | 1 | ||||
| -rw-r--r-- | lin_reg2.awk | 1 | ||||
| -rw-r--r-- | mean.awk | 17 | ||||
| -rw-r--r-- | mean_avg.awk | 9 |
7 files changed, 33 insertions, 29 deletions
@@ -1,2 +1,2 @@ repo: awk; -desc: experiments in awk, etc.; +desc: experiments in using awk for mathematical and statistical calculations; @@ -1,7 +1,7 @@ #!/usr/bin/awk -f ### diff.awk -# numerical diff along columns +# print numerical diff along columns BEGIN { OFS = FS @@ -1,9 +1,11 @@ #!/usr/bin/awk -f ### diff1.awk -# numerical diff along columns +# print numerical diff along columns BEGIN { + OFS = FS + # OFMT = "%.9g" sign = "[+-]?" decimal = "[0-9]+[.]?[0-9]*" fraction = "[.][0-9]*" @@ -12,7 +14,7 @@ BEGIN { } NR == 1 { - ### orig data columns + # orig data columns for (n=1; n<=NF; n++) { if ($n !~ number) { header[n] = $n @@ -31,18 +33,20 @@ NF { if (NF > nf_max) nf_max = NF - ### data columns - for (y=1; y<=nf_max; y++) { - if ($y == header[y] || $y == dheader[y]) + # data columns + for (n=1; n<=nf_max; n++) { + if ($n == header[n] || $n == dheader[n]) continue - if ($y ~ number) { - data[y] = $y - (data_prev[y] ~ number) ? diff[y] = data[y] - data_prev[y] : diff[y] = "" - data_prev[y] = data[y] - printf(OFMT, diff[y]) + if ($n ~ number) { + data[n] = $n + if (data_prev[n] ~ number) + diff[n] = data[n] - data_prev[n] + else + diff[n] = "" + data_prev[n] = data[n] } - else - diff[y] = "" - printf(y < nf_max ? OFS : ORS) + printf(diff[n]) + printf(n < nf_max ? OFS : ORS) } } + diff --git a/lin_reg1.awk b/lin_reg1.awk index ae59c77..c657035 100644 --- a/lin_reg1.awk +++ b/lin_reg1.awk @@ -36,7 +36,6 @@ NF > 0 { sum_delta[y] += delta1[y] sum_delta2[y] += delta0[y]*delta1[y] - # x = row, y = col, trendline: y = A + Bx for (x=1; x<=nf_max; x++) { count[x,y] += 1 diff --git a/lin_reg2.awk b/lin_reg2.awk index f06d890..450f0f0 100644 --- a/lin_reg2.awk +++ b/lin_reg2.awk @@ -36,7 +36,6 @@ NF > 0 { sum_delta[y] += delta1[y] sum_delta2[y] += delta0[y]*delta1[y] - # x = row, y = col, trendline: y = A + Bx for (x=1; x<=nf_max; x++) { count[x,y] += 1 @@ -5,20 +5,21 @@ BEGIN {
OFS = FS
- sign = "[+-]?"
- decimal = "[0-9]+[.]?[0-9]*"
- fraction = "[.][0-9]+"
- exponent = "([Ee]" sign "[0-9]+)?"
- number = "^" sign "(" decimal "|" fraction ")" exponent "$"
+ #sign = "[+-]?"
+ #decimal = "[0-9]+[.]?[0-9]*"
+ #fraction = "[.][0-9]*"
+ #exponent = "([Ee]" "[+-]?" "[0-9]+)?"
+ number = "^[+-]?([0-9]+[.]?[0-9]*|[.][0-9]*)([Ee][+-]?[0-9]+)?$"
}
# Welford's 'online' algorithm for variance
-NF > 0 {
+NF {
for (n=1; n<=NF; n++) {
if ($n ~ number) {
count += 1
(count == 1 || $n < min) ? min = $n : min = min
(count == 1 || $n > max) ? max = $n : max = max
+ range = max - min
delta0 = $n - mean
mean += delta0/count
delta1 = $n - mean
@@ -29,7 +30,7 @@ NF > 0 { }
END {
- print "mean", "std_err", "std_dev", "min", "max", "total", "count"
- print mean, sqrt(var/count), sqrt(var), min, max, (mean*count), count
+ print "mean", "std_err", "std_dev", "range", "min", "max", "total", "count"
+ print mean, sqrt(var/count), sqrt(var), range, min, max, (mean*count), count
}
diff --git a/mean_avg.awk b/mean_avg.awk index e4596b0..bef984d 100644 --- a/mean_avg.awk +++ b/mean_avg.awk @@ -20,7 +20,7 @@ NR == 1 { } # Welford's 'online' algorithm for variance -NF > 0 { +NF { if (NF > max_nf) max_nf = NF for (n=1; n <= NF; n++) { @@ -28,6 +28,7 @@ NF > 0 { count[n] += 1 (count[n] == 1 || $n < min[n]) ? min[n] = $n : min[n] = min[n] (count[n] == 1 || $n > max[n]) ? max[n] = $n : max[n] = max[n] + range[n] = max[n] - min[n] delta0[n] = $n - mean[n] mean[n] += delta0[n]/count[n] delta1[n] = $n - mean[n] @@ -38,12 +39,12 @@ NF > 0 { } END { - print "col", "mean", "std_err", "std_dev", "min", "max", "total", "count" + print "col", "mean", "std_err", "std_dev", "range", "min", "max", "total", "count" for (n=1; n<=max_nf; n++) { if (header[n]) - print header[n], mean[n], sqrt(var[n]/count[n]), sqrt(var[n]), min[n], max[n], mean[n]*count[n], count[n] + print header[n], mean[n], sqrt(var[n]/count[n]), sqrt(var[n]), range[n], min[n], max[n], mean[n]*count[n], count[n] else - print "col" n, mean[n], sqrt(var[n]/count[n]), sqrt(var[n]), min[n], max[n], mean[n]*count[n], count[n] + print "col" n, mean[n], sqrt(var[n]/count[n]), sqrt(var[n]), range[n], min[n], max[n], mean[n]*count[n], count[n] } } |
