summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--ABOUT.TXT2
-rw-r--r--diff.awk2
-rw-r--r--diff1.awk30
-rw-r--r--lin_reg1.awk1
-rw-r--r--lin_reg2.awk1
-rw-r--r--mean.awk17
-rw-r--r--mean_avg.awk9
7 files changed, 33 insertions, 29 deletions
diff --git a/ABOUT.TXT b/ABOUT.TXT
index b16ad44..07ab7ec 100644
--- a/ABOUT.TXT
+++ b/ABOUT.TXT
@@ -1,2 +1,2 @@
repo: awk;
-desc: experiments in awk, etc.;
+desc: experiments in using awk for mathematical and statistical calculations;
diff --git a/diff.awk b/diff.awk
index ba452c7..bf8b472 100644
--- a/diff.awk
+++ b/diff.awk
@@ -1,7 +1,7 @@
#!/usr/bin/awk -f
### diff.awk
-# numerical diff along columns
+# print numerical diff along columns
BEGIN {
OFS = FS
diff --git a/diff1.awk b/diff1.awk
index cf1f2ff..7322e98 100644
--- a/diff1.awk
+++ b/diff1.awk
@@ -1,9 +1,11 @@
#!/usr/bin/awk -f
### diff1.awk
-# numerical diff along columns
+# print numerical diff along columns
BEGIN {
+ OFS = FS
+ # OFMT = "%.9g"
sign = "[+-]?"
decimal = "[0-9]+[.]?[0-9]*"
fraction = "[.][0-9]*"
@@ -12,7 +14,7 @@ BEGIN {
}
NR == 1 {
- ### orig data columns
+ # orig data columns
for (n=1; n<=NF; n++) {
if ($n !~ number) {
header[n] = $n
@@ -31,18 +33,20 @@ NF {
if (NF > nf_max)
nf_max = NF
- ### data columns
- for (y=1; y<=nf_max; y++) {
- if ($y == header[y] || $y == dheader[y])
+ # data columns
+ for (n=1; n<=nf_max; n++) {
+ if ($n == header[n] || $n == dheader[n])
continue
- if ($y ~ number) {
- data[y] = $y
- (data_prev[y] ~ number) ? diff[y] = data[y] - data_prev[y] : diff[y] = ""
- data_prev[y] = data[y]
- printf(OFMT, diff[y])
+ if ($n ~ number) {
+ data[n] = $n
+ if (data_prev[n] ~ number)
+ diff[n] = data[n] - data_prev[n]
+ else
+ diff[n] = ""
+ data_prev[n] = data[n]
}
- else
- diff[y] = ""
- printf(y < nf_max ? OFS : ORS)
+ printf(diff[n])
+ printf(n < nf_max ? OFS : ORS)
}
}
+
diff --git a/lin_reg1.awk b/lin_reg1.awk
index ae59c77..c657035 100644
--- a/lin_reg1.awk
+++ b/lin_reg1.awk
@@ -36,7 +36,6 @@ NF > 0 {
sum_delta[y] += delta1[y]
sum_delta2[y] += delta0[y]*delta1[y]
-
# x = row, y = col, trendline: y = A + Bx
for (x=1; x<=nf_max; x++) {
count[x,y] += 1
diff --git a/lin_reg2.awk b/lin_reg2.awk
index f06d890..450f0f0 100644
--- a/lin_reg2.awk
+++ b/lin_reg2.awk
@@ -36,7 +36,6 @@ NF > 0 {
sum_delta[y] += delta1[y]
sum_delta2[y] += delta0[y]*delta1[y]
-
# x = row, y = col, trendline: y = A + Bx
for (x=1; x<=nf_max; x++) {
count[x,y] += 1
diff --git a/mean.awk b/mean.awk
index 754acfe..069db4c 100644
--- a/mean.awk
+++ b/mean.awk
@@ -5,20 +5,21 @@
BEGIN {
OFS = FS
- sign = "[+-]?"
- decimal = "[0-9]+[.]?[0-9]*"
- fraction = "[.][0-9]+"
- exponent = "([Ee]" sign "[0-9]+)?"
- number = "^" sign "(" decimal "|" fraction ")" exponent "$"
+ #sign = "[+-]?"
+ #decimal = "[0-9]+[.]?[0-9]*"
+ #fraction = "[.][0-9]*"
+ #exponent = "([Ee]" "[+-]?" "[0-9]+)?"
+ number = "^[+-]?([0-9]+[.]?[0-9]*|[.][0-9]*)([Ee][+-]?[0-9]+)?$"
}
# Welford's 'online' algorithm for variance
-NF > 0 {
+NF {
for (n=1; n<=NF; n++) {
if ($n ~ number) {
count += 1
(count == 1 || $n < min) ? min = $n : min = min
(count == 1 || $n > max) ? max = $n : max = max
+ range = max - min
delta0 = $n - mean
mean += delta0/count
delta1 = $n - mean
@@ -29,7 +30,7 @@ NF > 0 {
}
END {
- print "mean", "std_err", "std_dev", "min", "max", "total", "count"
- print mean, sqrt(var/count), sqrt(var), min, max, (mean*count), count
+ print "mean", "std_err", "std_dev", "range", "min", "max", "total", "count"
+ print mean, sqrt(var/count), sqrt(var), range, min, max, (mean*count), count
}
diff --git a/mean_avg.awk b/mean_avg.awk
index e4596b0..bef984d 100644
--- a/mean_avg.awk
+++ b/mean_avg.awk
@@ -20,7 +20,7 @@ NR == 1 {
}
# Welford's 'online' algorithm for variance
-NF > 0 {
+NF {
if (NF > max_nf)
max_nf = NF
for (n=1; n <= NF; n++) {
@@ -28,6 +28,7 @@ NF > 0 {
count[n] += 1
(count[n] == 1 || $n < min[n]) ? min[n] = $n : min[n] = min[n]
(count[n] == 1 || $n > max[n]) ? max[n] = $n : max[n] = max[n]
+ range[n] = max[n] - min[n]
delta0[n] = $n - mean[n]
mean[n] += delta0[n]/count[n]
delta1[n] = $n - mean[n]
@@ -38,12 +39,12 @@ NF > 0 {
}
END {
- print "col", "mean", "std_err", "std_dev", "min", "max", "total", "count"
+ print "col", "mean", "std_err", "std_dev", "range", "min", "max", "total", "count"
for (n=1; n<=max_nf; n++) {
if (header[n])
- print header[n], mean[n], sqrt(var[n]/count[n]), sqrt(var[n]), min[n], max[n], mean[n]*count[n], count[n]
+ print header[n], mean[n], sqrt(var[n]/count[n]), sqrt(var[n]), range[n], min[n], max[n], mean[n]*count[n], count[n]
else
- print "col" n, mean[n], sqrt(var[n]/count[n]), sqrt(var[n]), min[n], max[n], mean[n]*count[n], count[n]
+ print "col" n, mean[n], sqrt(var[n]/count[n]), sqrt(var[n]), range[n], min[n], max[n], mean[n]*count[n], count[n]
}
}