summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorwukong <wukong@longaeva>2018-10-14 16:00:25 -0700
committerwukong <wukong@longaeva>2018-10-14 16:00:25 -0700
commit4b4f50352d061c569fe448e72aaa47ee54d138bc (patch)
tree4d818fd18caa59fb8d5843b6344bc5ea497a8363
parent97304f19e701d18ca753b90b993fffa1c58c5f9a (diff)
restructured conditional prints in diff, added parabolic focus solution to quad_reg output
-rw-r--r--diff.awk8
-rw-r--r--diff1.awk11
-rw-r--r--fib.awk2
-rw-r--r--lin_reg.awk15
-rw-r--r--quad_reg.awk27
-rw-r--r--sterling_approx.awk17
6 files changed, 51 insertions, 29 deletions
diff --git a/diff.awk b/diff.awk
index 3bb58c0..721aac6 100644
--- a/diff.awk
+++ b/diff.awk
@@ -30,10 +30,8 @@ NR == 1 {
### diff columns
for (n=1; n<=NF; n++) {
printf(dheader[n])
- if (n < NF)
- printf(OFS)
+ printf(n < NF ? OFS : ORS)
}
- printf(ORS)
}
NF {
@@ -63,8 +61,6 @@ NF {
}
else
diff[y] = ""
- if (y < nf_max)
- printf(OFS)
+ printf(y < nf_max ? OFS : ORS)
}
- printf(ORS)
}
diff --git a/diff1.awk b/diff1.awk
index db7d2bc..cf1f2ff 100644
--- a/diff1.awk
+++ b/diff1.awk
@@ -1,6 +1,6 @@
-#!usr/bin/awk -f
+#!/usr/bin/awk -f
-### diff.awk
+### diff1.awk
# numerical diff along columns
BEGIN {
@@ -23,8 +23,7 @@ NR == 1 {
dheader[n] = "dcol" n
}
printf(dheader[n])
- if (n < NF)
- printf(OFS)
+ printf(n < NF ? OFS : ORS)
}
}
@@ -44,8 +43,6 @@ NF {
}
else
diff[y] = ""
- if (y < nf_max)
- printf(OFS)
+ printf(y < nf_max ? OFS : ORS)
}
- printf(ORS)
}
diff --git a/fib.awk b/fib.awk
index 6107c4c..07e8bab 100644
--- a/fib.awk
+++ b/fib.awk
@@ -6,5 +6,5 @@
BEGIN {
n = ARGV[1]
printf(OFMT ORS,
- (1/sqrt(5))*((1.0 + sqrt(5))/2.0)^n - (1/sqrt(5))*((1.0 - sqrt(5))/2.0)^n)
+ (1.0/sqrt(5.0))*((1.0 + sqrt(5.0))/2.0)^n - (1.0/sqrt(5.0))*((1.0 - sqrt(5.0))/2.0)^n)
}
diff --git a/lin_reg.awk b/lin_reg.awk
index 16fda52..e0d67ca 100644
--- a/lin_reg.awk
+++ b/lin_reg.awk
@@ -31,7 +31,7 @@ NF > 0 {
sum[y] += $y
sum2[y] += $y*$y
delta0[y] = $y - mean[y]
- mean[y] = mean[y] + delta0[y]/count[y]
+ mean[y] += delta0[y]/count[y]
delta1[y] = $y - mean[y]
sum_delta[y] += delta1[y]
sum_delta2[y] += delta0[y]*delta1[y]
@@ -39,7 +39,7 @@ NF > 0 {
### sample variance
(count[y] > 1) ? var[y] = sum_delta2[y]/(count[y] - 1) : var[y] = ""
- # x = row, y = col
+ # x = row, y = col, trendline: y = A + Bx
for (x=1; x<=nf_max; x++) {
count[x,y] += 1
sum_xy[x,y] += $x*$y
@@ -48,10 +48,6 @@ NF > 0 {
# covariance
(count[x,y] > 1) ? cov[x,y] = sum_delta_xy[x,y]/(count[x,y] - 1) : cov[x,y] = ""
- # correlation
- r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y])
- (r_den[x,y]) ? r[x,y] = sum_delta_xy[x,y]/r_den[x,y] : r[x,y] = 1
-
ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x])
if (ab_den[x,y]) {
a[x,y] = (sum[y]*sum2[x] - sum[x]*sum_xy[x,y])/ab_den[x,y]
@@ -62,7 +58,7 @@ NF > 0 {
b[x,y] = 1
}
- ### error estimate
+ # error estimate
err_den[x,y] = count[x,y]*(count[x,y] - 2)
if (count[x,y] > 2) {
err[x,y] = $y - (a[x,y] + b[x,y]*$x)
@@ -74,6 +70,11 @@ NF > 0 {
a_err_den[x,y] = count[x,y]*b_err_den[x,y]
if (a_err_den[x,y])
a_err[x,y] = sqrt(sum2[x]/count[x,y])*b_err[x,y]
+
+ # correlation
+ r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y])
+ (r_den[x,y]) ? r[x,y] = sum_delta_xy[x,y]/r_den[x,y] : r[x,y] = 1
+
}
}
else
diff --git a/quad_reg.awk b/quad_reg.awk
index 7d9304a..4267412 100644
--- a/quad_reg.awk
+++ b/quad_reg.awk
@@ -4,7 +4,6 @@
# quadratic regression along columns
BEGIN {
- OFMT="%.9g"
sign = "[+-]?"
decimal = "[0-9]+[.]?[0-9]*"
fraction = "[.][0-9]*"
@@ -23,6 +22,8 @@ NF > 0 {
### iterate over columns
for (y=1; y<=nf_max; y++) {
+ if ($y == header[n])
+ continue
if ($y ~ number) {
### mean
@@ -77,7 +78,7 @@ NF > 0 {
}
else {
c[x,y] = 0
- b[x,y] = 0
+ b[x,y] = 1
}
a[x,y] = mean[y] - b[x,y]*mean[x] - c[x,y]*mean2[x]
@@ -88,16 +89,24 @@ NF > 0 {
# correlation
sum_delta2[y] ? r[x,y] = sqrt(1 - sum_err2[x,y]/sum_delta2[y]) : r[x,y] = 0
+ D[x,y] = b[x,y]*b[x,y] - 4.0*a[x,y]*c[x,y]
+
+ # quadratic roots (x-intercepts)
+ if (c[x,y]) {
+ rx0[x,y] = (-1.0*b[x,y] - sqrt(D[x,y])/(2.0*c[x,y]))
+ rx1[x,y] = (-1.0*b[x,y] + sqrt(D[x,y])/(2.0*c[x,y]))
+ }
+
# vertex of parabola
if (c[x,y]) {
- xv[x,y] = -1.0*b[x,y]/(2.0*c[x,y])
- yv[x,y] = -1.0*(b[x,y]*b[x,y])/(4.0*c[x,y]) + a[x,y]
+ xv[x,y] = -0.5*b[x,y]/c[x,y]
+ yv[x,y] = -0.25*(b[x,y]*b[x,y])/c[x,y] + a[x,y]
}
- # roots (x-intercept)
+ # focus of parabola
if (c[x,y]) {
- rx0[x,y] = (-1.0*b[x,y] - sqrt(b[x,y]*b[x,y] - 4.0*a[x,y]*c[x,y]))/(2.0*c[x,y])
- rx1[x,y] = (-1.0*b[x,y] + sqrt(b[x,y]*b[x,y] - 4.0*a[x,y]*c[x,y]))/(2.0*c[x,y])
+ xf[x,y] = -0.5*b[x,y]/c[x,y]
+ yf[x,y] = (4.0*a[x,y]*c[x,y] - b[x,y]*b[x,y] + 1.0)/(4.0*c[x,y])
}
}
}
@@ -112,8 +121,10 @@ END {
if (x != y && r[x,y]) {
printf(OFMT OFS "(%s)" OFS " = (" OFMT ")(%s)^2" OFS " + (" OFMT ")(%s)" OFS " + (" OFMT ")",
10.0*log(r[x,y]*r[x,y])/log(10), header[y], c[x,y], header[x], b[x,y], header[x], a[x,y])
- printf("\t[" OFMT "," OFMT "][" OFMT "," OFMT "][" OFMT "," OFMT "]" OFS" [" OFMT "," OFMT "]" ORS,
+ printf(" [" OFMT "," OFMT "][" OFMT "," OFMT "][" OFMT "," OFMT "]" OFS" [" OFMT "," OFMT "]",
rx0[x,y], 0, rx1[x,y], 0, 0, a[x,y], xv[x,y], yv[x,y])
+ printf(" [" OFMT "," OFMT "]", xf[x,y], yf[x,y])
+ printf(" [" OFMT "]" ORS, sqrt((yf[x,y] - yv[x,y])*(yf[x,y] - yv[x,y])))
}
}
}
diff --git a/sterling_approx.awk b/sterling_approx.awk
new file mode 100644
index 0000000..5abe41d
--- /dev/null
+++ b/sterling_approx.awk
@@ -0,0 +1,17 @@
+#!/usr/bin/awk -f
+
+### sterling_approx.awk
+# https://en.wikipedia.org/wiki/Stirling%27s_approximation
+
+BEGIN {
+ ARGV[1] ? n = ARGV[1] : n = 0
+ pi = 4*atan2(1,1)
+ p = 0
+ if (n > 0) {
+ p = 1
+ for (m=n; m>0; m--)
+ p *= n*exp(-1)
+ p = sqrt(2*pi*n)*p*(1 + 1/(12*n) + 1/(288*n*n) - 139/(51840*n*n*n) - 571/(2488320*n*n*n*n))
+ }
+ printf(OFMT ORS, p)
+}