summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorwukong <wukong@longaeva>2023-12-18 12:37:31 -0800
committerwukong <wukong@longaeva>2023-12-18 12:37:31 -0800
commit309c5d8f7ff8c36235222d079955cd3783bb7ad0 (patch)
tree22fa0994657cee27292ef1531211a47293afab5f
parent0c1d68ee8cc2a631d6029285e771ebbfe119995d (diff)
Added a partially working function performing LU decomposition on a square
matrix, ludcmp.awk. This is providing correct answers on _some_ test data, further testing debugging required here.
Diffstat (limited to '')
-rw-r--r--binom_coeff.awk18
-rw-r--r--det.awk57
-rw-r--r--em.awk13
-rw-r--r--exp_reg.awk113
-rw-r--r--fib.awk13
-rw-r--r--gaussian.awk2
-rw-r--r--ludcmp.awk105
-rw-r--r--lupd.awk111
8 files changed, 427 insertions, 5 deletions
diff --git a/binom_coeff.awk b/binom_coeff.awk
new file mode 100644
index 0000000..a19f464
--- /dev/null
+++ b/binom_coeff.awk
@@ -0,0 +1,18 @@
+#! /usr/bin/awk -f
+
+### binomial coeffecient
+# https://rosettacode.org/wiki/Evaluate_binomial_coefficients
+function binom(n, k) {
+ b = 1
+ for (i=1; i<(k+1); i++) {
+ b *= (n - i + 1) / i
+ }
+ return b
+}
+
+BEGIN {
+ ARGV[1] ? N = ARGV[1] : N = 1
+ ARGV[2] ? K = ARGV[2] : K = 1
+ ARGV[3] ? OFMT = "%." ARGV[3] "g" : OFMT = "%g"
+ printf(OFMT ORS, binom(N, K))
+}
diff --git a/det.awk b/det.awk
new file mode 100644
index 0000000..b3e5c1f
--- /dev/null
+++ b/det.awk
@@ -0,0 +1,57 @@
+#!/usr/bin/awk -f
+
+### det.awk
+# determinant via LU
+# input: square array as delimited text
+# output: (scalar) determinant
+
+BEGIN {
+ OFS = "\t"
+ sign = "[+-]?"
+ decimal = "[0-9]+[.]?[0-9]*"
+ fraction = "[.][0-9]*"
+ exponent = "([Ee]" sign "[0-9]+)?"
+ number = "^" sign "(" decimal "|" fraction ")" exponent "$"
+}
+
+NR == 1 {
+ for (y=1; y<=NF; y++)
+ ($y ~ number) ? header[y] = "col" y : header[y] = $y
+}
+
+NF > 0 {
+ (NF > nf_max) ? nf_max = NF : nf_max = nf_max
+
+ ### columns
+ for (y=1; y<=nf_max; y++) {
+ ### rows
+ if ($y !~ number) {
+ continue
+ }
+ else {
+ read_data[NR,y] = $y
+ col_sum[y] += $y
+ row_sum[NR] += $y
+ }
+ }
+}
+
+END {
+ print length(row_sum), length(col_sum)
+ ### columns
+ for (y=1; y<=nf_max; y++) {
+ if (y in col_sum) {
+ ### rows
+ for (x=1; x<=NR; x++) {
+ if (x in row_sum) {
+ printf("[" OFMT "," OFMT "]" OFS OFMT OFS OFMT,
+ x, y, read_data[x,y], col_sum[y])
+ if (x < nf_max)
+ printf(OFS)
+ }
+ }
+ }
+ printf(ORS)
+ }
+}
+
diff --git a/em.awk b/em.awk
new file mode 100644
index 0000000..1297153
--- /dev/null
+++ b/em.awk
@@ -0,0 +1,13 @@
+#!/usr/bin/awk -f
+
+BEGIN {
+ ARGV[1] ? OFMT = "%." ARGV[1] "g" : OFMT = "%.12g"
+
+ ### constants
+ pi = 4.0*atan2(1,1) # rad
+ c0 = 299792458E0 # m/s, exact
+ mu0 = (4.0E-7)*pi # H/m
+ epsilon0 = (mu0*c0^2)^-1 # F/m
+
+ print pi, c0, mu0, epsilon0
+}
diff --git a/exp_reg.awk b/exp_reg.awk
new file mode 100644
index 0000000..f4ea12c
--- /dev/null
+++ b/exp_reg.awk
@@ -0,0 +1,113 @@
+#!/usr/bin/awk -f
+
+### lin_reg.awk
+# simple linear regression between columns
+
+BEGIN {
+ OFS = ":"
+ sign = "[+-]?"
+ decimal = "[0-9]+[.]?[0-9]*"
+ fraction = "[.][0-9]*"
+ exponent = "([Ee]" sign "[0-9]+)?"
+ number = "^" sign "(" decimal "|" fraction ")" exponent "$"
+}
+
+NR == 1 {
+ for (n=1; n<=NF; n++)
+ ($n ~ number) ? header[n] = "col" n : header[n] = $n
+}
+
+NF {
+ if (NF > nf_max)
+ nf_max = NF
+
+ ### iterate over columns
+ for (y=1; y<=nf_max; y++) {
+ if ($y == header[n])
+ continue
+
+ if ($y ~ number) {
+
+ ### mean
+ count[y] += 1
+ data[y] = log($y)
+ sum[y] += data[y]
+ sum2[y] += data[y]*data[y]
+ delta0[y] = data[y]y - mean[y]
+ mean[y] += delta0[y]/count[y]
+ delta1[y] = data[y]y - mean[y]
+ sum_delta[y] += delta1[y]
+ sum_delta2[y] += delta0[y]*delta1[y]
+
+ ### sample variance
+ #(count[y] > 1) ? var[y] = sum_delta2[y]/(count[y] - 1) : var[y] = ""
+
+ # x = row, y = col, trendline: y = A + Bx
+ for (x=1; x<=nf_max; x++) {
+ if ($x ~ number) {
+ count[x,y] += 1
+ sum_xy[x,y] += $x*data[y]
+ sum_delta_xy[x,y] += delta0[x]*delta1[y]
+
+ # covariance
+ #(count[x,y] > 1) ? cov[x,y] = sum_delta_xy[x,y]/(count[x,y] - 1) : cov[x,y] = ""
+
+ # correlation
+ r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y])
+ (r_den[x,y]) ? r[x,y] = sum_delta_xy[x,y]/r_den[x,y] : r[x,y] = 1
+
+ ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x])
+ if (ab_den[x,y]) {
+ a[x,y] = (sum[y]*sum2[x] - sum[x]*sum_xy[x,y])/ab_den[x,y]
+ b[x,y] = (count[x,y]*sum_xy[x,y] - sum[x]*sum[y])/ab_den[x,y]
+ }
+ else {
+ a[x,y] = 0
+ b[x,y] = 1
+ }
+
+ # error estimate
+ err_den[x,y] = count[x,y]*(count[x,y] - 2)
+ if (count[x,y] > 2) {
+ err[x,y] = data[y] - (a[x,y] + b[x,y]*$x)
+ sum_err2[x,y] += err[x,y]*err[x,y]
+ }
+ b_err_den[x,y] = (count[x,y] - 2)*sum_delta2[x]
+ if (b_err_den[x,y])
+ b_err[x,y] = sqrt(sum_err2[x,y]/b_err_den[x,y])
+ a_err_den[x,y] = count[x,y]*b_err_den[x,y]
+ if (a_err_den[x,y])
+ a_err[x,y] = sqrt(sum2[x]/count[x,y])*b_err[x,y]
+
+ # weighted mean, from HP-20S manual, pg 60
+ xw[x,y] = sum_xy[x,y]/sum[y]
+ yw[x,y] = b[x,y]*xw[x,y] + a[x,y]
+ xw_dist[x,y] = (xw[x,y] - mean[x])
+ yw_dist[x,y] = b[x,y]*(xw[x,y] - mean[x])
+ }
+ }
+ }
+ else
+ continue
+ }
+}
+
+END {
+ for (y=1; y<=nf_max; y++) {
+ for (x=1; x<=nf_max; x++) {
+ # if (x != y && r[x,y]) {
+ if (r[x,y]) {
+ printf(OFMT OFS "(%s)" OFS " = (" OFMT " +/- " OFMT ")(%s) + (" OFMT " +/- " OFMT ")" OFS,
+ (r[x,y]*r[x,y]),
+ header[x], a[x,y], a_err[x,y],
+ header[y], log(b[x,y]), log(b_err[x,y]))
+ printf("[" OFMT "," OFMT "][" OFMT "," OFMT "]" OFS "[" OFMT "," OFMT "]" OFS,
+ 0, a[x,y], (-1.0*a[x,y]/b[x,y]), 0,
+ mean[x], b[x,y]*(mean[x]) + a[x,y])
+ printf("[" OFMT "," OFMT "]" OFS, xw[x,y], yw[x,y])
+ printf("[" OFMT "]" ORS, sqrt(xw_dist[x,y]*xw_dist[x,y] + yw_dist[x,y]*yw_dist[x,y]))
+ }
+ }
+ }
+}
+
diff --git a/fib.awk b/fib.awk
index 0d46479..7baef76 100644
--- a/fib.awk
+++ b/fib.awk
@@ -1,13 +1,18 @@
#!/usr/bin/awk -f
### fib.awk
-# fib sol'n from Hamming
BEGIN {
ARGV[1] ? n = ARGV[1] : n = 0
ARGV[2] ? OFMT = "%." ARGV[2] "g" : OFMT = "%g"
- C1 = 1.0/sqrt(5)
- C2 = -1.0/sqrt(5)
- print C1*(0.5*(1 + sqrt(5)))^n + C2*(0.5*(1 - sqrt(5)))^n
+
+ # fib sol'n from Hamming
+ C1 = 1.0/sqrt(5.0)
+ C2 = -1.0/sqrt(5.0)
+ print C1*(0.5*(1 + sqrt(5.0)))^n + C2*(0.5*(1 - sqrt(5.0)))^n
+
+ phi = 0.5*(1 + sqrt(5.0))
+ binet = (phi^n - (-1.0/phi)^n)/sqrt(5.0)
+ print binet
}
diff --git a/gaussian.awk b/gaussian.awk
index dc62d37..402ea10 100644
--- a/gaussian.awk
+++ b/gaussian.awk
@@ -27,7 +27,7 @@ BEGIN {
if (N > 1 && M > 0) {
# sigma <= 0.5
sigma = 0.4
- en[n] = (n - M)/(sigma*M)
+ e[n] = (n - M)/(sigma*M)
w[n] = exp(-0.5*e[n]*e[n])
print n, w[n]/M
}
diff --git a/ludcmp.awk b/ludcmp.awk
new file mode 100644
index 0000000..6c7807a
--- /dev/null
+++ b/ludcmp.awk
@@ -0,0 +1,105 @@
+#!/usr/bin/awk -f
+
+### ludcmp.awk
+# LU decomposition on square matrix, based on example code from,
+# https://en.wikipedia.org/wiki/LU_decomposition
+# INPUT:
+# A (N-by-N matrix)
+# right_part (N-element vector)
+# OUTPUT:
+# x (N-element vector)
+# note, matrix indexing begins with 1 and uses i=row, j=col
+# lu = L + U - I
+
+# decomposition of A
+function lu_decomp(A, n) {
+
+ for (i=1; i<=n; i++) {
+
+ for (j=i; j<=n; j++) {
+ sum = 0.0
+ for (k=1; k<=i; k++)
+ sum += lu[i,k] * lu[k,j]
+ lu[i,j] = A[i,j] - sum
+ }
+
+ for (j=i+1; j<=n; j++) {
+ sum = 0.0
+ for (k=1; k<=i; k++)
+ sum += lu[j,k] * lu[k,i]
+
+ # check for div by zero
+ if (lu[i,i] != 0.0)
+ lu[j,i] = (A[j,i] - sum) / lu[i,i]
+ else
+ lu[j,i] = "nan"
+ }
+
+ # debug
+ for (j=1; j<=n; j++) {
+ printf(lu[i,j])
+ printf( j==n ? ORS : OFS )
+ }
+ }
+}
+
+# regex to identify strings that look like numbers
+BEGIN {
+ OFS = FS
+ sign = "[+-]?"
+ decimal = "[0-9]+[.]?[0-9]*"
+ fraction = "[.][0-9]*"
+ exponent = "([Ee]" sign "[0-9]+)?"
+ number = "^" sign "(" decimal "|" fraction ")" exponent "$"
+}
+
+# column headers
+NR == 1 {
+ for (n=1; n<=NF; n++)
+ ($n ~ number) ? head[n] = "col" n : head[n] = $n
+}
+
+# read input data
+NF {
+
+ if (NF > max_nf)
+ (NF > nf_max) ? nf_max = NF : nf_max = nf_max
+
+ for (n=1; n<=NF; n++) {
+ if ($n ~ number) {
+ count[n] += 1
+ (count[n] == 1 || max_nf > size) ? size = max_nf : size = size
+ (count[n] == 1 || count[n] > size) ? size = count[n] : size = size
+ matrix[count[n],n] = $n
+ print(matrix[count[n],n])
+ }
+ }
+
+}
+
+END {
+ printf(ORS)
+ print(NR, max_nf, size)
+ printf(ORS)
+ lu_decomp(matrix, size)
+
+}
+
+## find solution of Ly = b, for y
+#for (i=0; i<n;, i++) {
+# sum = 0
+# for (k=0; k<i; k++)
+# sum += lu[i,k] * y[k]
+# y[i] = right_part[i] - sum
+#}
+#
+## find solution of Ux = y, for x
+#for (i=n-1; i>=0; i--) {
+# sum = 0;
+# for (k=i+1; k<n; k++)
+# sum += lu[i,k] * x[k];
+# x[i] = (1 / lu[i,i]) * (y[i] - sum);
+#}
+#
+#return x
+
diff --git a/lupd.awk b/lupd.awk
new file mode 100644
index 0000000..39bf00c
--- /dev/null
+++ b/lupd.awk
@@ -0,0 +1,111 @@
+#!/usr/bin/awk -f
+
+### lupd.awk
+# LU Decomposition example
+# https://en.wikipedia.org/wiki/LU_decomposition
+# input: square array as delimited text
+# output:
+#
+# note, matrix indexing begins with 1 and uses i=row, j=col
+# lu = L + U - I
+
+# solve A[n,n] * x[n] = b[n]
+function lu_solve(A, b, n) {
+
+ # decomposition of A
+ for (i=1; i<=n; i++) {
+
+ for (j=i; j<=n; j++) {
+ sum = 0.0
+ for (k=1; k<=i; k++)
+ sum += lu[i,k] * lu[k,j]
+ lu[i,j] = A[i,j] - sum
+ }
+
+ for (j=i+1; j<=n; j++) {
+ sum = 0.0
+ for (k=1; k<=i; k++)
+ sum += lu[j,k] * lu[k,i]
+
+ # check for div by zero
+ if (lu[i,i] != 0.0)
+ lu[j,i] = (A[j,i] - sum) / lu[i,i]
+ else
+ lu[j,i] = "nan"
+ }
+
+ }
+
+ # find solution of Ly = b
+ for (i = 0; i < n; i++) {
+ sum = 0;
+ for (k = 0; k < i; k++)
+ sum += lu[i,k] * y[k];
+ y[i] = b[i] - sum;
+ }
+
+ # find solution of Ux = y
+ for (i = n - 1; i = 0; i--) {
+ sum = 0;
+ for (k = i + 1; k < n; k++)
+ sum += lu[i, k] * x[k];
+ x[i] = (1.0 / lu[i,i]) * (y[i] - sum);
+ }
+
+ return x;
+
+}
+
+# regex to identify strings that look like numbers
+BEGIN {
+ OFS = FS
+ sign = "[+-]?"
+ decimal = "[0-9]+[.]?[0-9]*"
+ fraction = "[.][0-9]*"
+ exponent = "([Ee]" sign "[0-9]+)?"
+ number = "^" sign "(" decimal "|" fraction ")" exponent "$"
+}
+
+# column headers
+NR == 1 {
+ for (y=1; y<=NF; y++)
+ ($y ~ number) ? header[y] = "col" y : header[y] = $y
+}
+
+# read input data
+NF > 0 {
+ (NF > nf_max) ? nf_max = NF : nf_max = nf_max
+
+ ### columns
+ for (y=1; y<=nf_max; y++) {
+ ### rows
+ if ($y !~ number) {
+ continue
+ }
+ else {
+ read_data[NR,y] = $y
+ col_sum[y] += $y
+ row_sum[NR] += $y
+ }
+ }
+}
+
+END {
+ print length(row_sum), length(col_sum)
+ ### columns
+ for (y=1; y<=nf_max; y++) {
+ if (y in col_sum) {
+ ### rows
+ for (x=1; x<=NR; x++) {
+ if (x in row_sum) {
+ printf("[" OFMT "," OFMT "]" OFS OFMT OFS OFMT,
+ x, y, read_data[x,y], col_sum[y])
+ if (x < nf_max)
+ printf(OFS)
+ }
+ }
+ }
+ printf(ORS)
+ }
+}
+