From 309c5d8f7ff8c36235222d079955cd3783bb7ad0 Mon Sep 17 00:00:00 2001 From: wukong Date: Mon, 18 Dec 2023 12:37:31 -0800 Subject: Added a partially working function performing LU decomposition on a square matrix, ludcmp.awk. This is providing correct answers on _some_ test data, further testing debugging required here. --- binom_coeff.awk | 18 +++++++++ det.awk | 57 ++++++++++++++++++++++++++++ em.awk | 13 +++++++ exp_reg.awk | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fib.awk | 13 +++++-- gaussian.awk | 2 +- ludcmp.awk | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++ lupd.awk | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 427 insertions(+), 5 deletions(-) create mode 100644 binom_coeff.awk create mode 100644 det.awk create mode 100644 em.awk create mode 100644 exp_reg.awk create mode 100644 ludcmp.awk create mode 100644 lupd.awk diff --git a/binom_coeff.awk b/binom_coeff.awk new file mode 100644 index 0000000..a19f464 --- /dev/null +++ b/binom_coeff.awk @@ -0,0 +1,18 @@ +#! /usr/bin/awk -f + +### binomial coeffecient +# https://rosettacode.org/wiki/Evaluate_binomial_coefficients +function binom(n, k) { + b = 1 + for (i=1; i<(k+1); i++) { + b *= (n - i + 1) / i + } + return b +} + +BEGIN { + ARGV[1] ? N = ARGV[1] : N = 1 + ARGV[2] ? K = ARGV[2] : K = 1 + ARGV[3] ? OFMT = "%." ARGV[3] "g" : OFMT = "%g" + printf(OFMT ORS, binom(N, K)) +} diff --git a/det.awk b/det.awk new file mode 100644 index 0000000..b3e5c1f --- /dev/null +++ b/det.awk @@ -0,0 +1,57 @@ +#!/usr/bin/awk -f + +### det.awk +# determinant via LU +# input: square array as delimited text +# output: (scalar) determinant + +BEGIN { + OFS = "\t" + sign = "[+-]?" + decimal = "[0-9]+[.]?[0-9]*" + fraction = "[.][0-9]*" + exponent = "([Ee]" sign "[0-9]+)?" + number = "^" sign "(" decimal "|" fraction ")" exponent "$" +} + +NR == 1 { + for (y=1; y<=NF; y++) + ($y ~ number) ? header[y] = "col" y : header[y] = $y +} + +NF > 0 { + (NF > nf_max) ? nf_max = NF : nf_max = nf_max + + ### columns + for (y=1; y<=nf_max; y++) { + ### rows + if ($y !~ number) { + continue + } + else { + read_data[NR,y] = $y + col_sum[y] += $y + row_sum[NR] += $y + } + } +} + +END { + print length(row_sum), length(col_sum) + ### columns + for (y=1; y<=nf_max; y++) { + if (y in col_sum) { + ### rows + for (x=1; x<=NR; x++) { + if (x in row_sum) { + printf("[" OFMT "," OFMT "]" OFS OFMT OFS OFMT, + x, y, read_data[x,y], col_sum[y]) + if (x < nf_max) + printf(OFS) + } + } + } + printf(ORS) + } +} + diff --git a/em.awk b/em.awk new file mode 100644 index 0000000..1297153 --- /dev/null +++ b/em.awk @@ -0,0 +1,13 @@ +#!/usr/bin/awk -f + +BEGIN { + ARGV[1] ? OFMT = "%." ARGV[1] "g" : OFMT = "%.12g" + + ### constants + pi = 4.0*atan2(1,1) # rad + c0 = 299792458E0 # m/s, exact + mu0 = (4.0E-7)*pi # H/m + epsilon0 = (mu0*c0^2)^-1 # F/m + + print pi, c0, mu0, epsilon0 +} diff --git a/exp_reg.awk b/exp_reg.awk new file mode 100644 index 0000000..f4ea12c --- /dev/null +++ b/exp_reg.awk @@ -0,0 +1,113 @@ +#!/usr/bin/awk -f + +### lin_reg.awk +# simple linear regression between columns + +BEGIN { + OFS = ":" + sign = "[+-]?" + decimal = "[0-9]+[.]?[0-9]*" + fraction = "[.][0-9]*" + exponent = "([Ee]" sign "[0-9]+)?" + number = "^" sign "(" decimal "|" fraction ")" exponent "$" +} + +NR == 1 { + for (n=1; n<=NF; n++) + ($n ~ number) ? header[n] = "col" n : header[n] = $n +} + +NF { + if (NF > nf_max) + nf_max = NF + + ### iterate over columns + for (y=1; y<=nf_max; y++) { + if ($y == header[n]) + continue + + if ($y ~ number) { + + ### mean + count[y] += 1 + data[y] = log($y) + sum[y] += data[y] + sum2[y] += data[y]*data[y] + delta0[y] = data[y]y - mean[y] + mean[y] += delta0[y]/count[y] + delta1[y] = data[y]y - mean[y] + sum_delta[y] += delta1[y] + sum_delta2[y] += delta0[y]*delta1[y] + + ### sample variance + #(count[y] > 1) ? var[y] = sum_delta2[y]/(count[y] - 1) : var[y] = "" + + # x = row, y = col, trendline: y = A + Bx + for (x=1; x<=nf_max; x++) { + if ($x ~ number) { + count[x,y] += 1 + sum_xy[x,y] += $x*data[y] + sum_delta_xy[x,y] += delta0[x]*delta1[y] + + # covariance + #(count[x,y] > 1) ? cov[x,y] = sum_delta_xy[x,y]/(count[x,y] - 1) : cov[x,y] = "" + + # correlation + r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y]) + (r_den[x,y]) ? r[x,y] = sum_delta_xy[x,y]/r_den[x,y] : r[x,y] = 1 + + ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x]) + if (ab_den[x,y]) { + a[x,y] = (sum[y]*sum2[x] - sum[x]*sum_xy[x,y])/ab_den[x,y] + b[x,y] = (count[x,y]*sum_xy[x,y] - sum[x]*sum[y])/ab_den[x,y] + } + else { + a[x,y] = 0 + b[x,y] = 1 + } + + # error estimate + err_den[x,y] = count[x,y]*(count[x,y] - 2) + if (count[x,y] > 2) { + err[x,y] = data[y] - (a[x,y] + b[x,y]*$x) + sum_err2[x,y] += err[x,y]*err[x,y] + } + b_err_den[x,y] = (count[x,y] - 2)*sum_delta2[x] + if (b_err_den[x,y]) + b_err[x,y] = sqrt(sum_err2[x,y]/b_err_den[x,y]) + a_err_den[x,y] = count[x,y]*b_err_den[x,y] + if (a_err_den[x,y]) + a_err[x,y] = sqrt(sum2[x]/count[x,y])*b_err[x,y] + + # weighted mean, from HP-20S manual, pg 60 + xw[x,y] = sum_xy[x,y]/sum[y] + yw[x,y] = b[x,y]*xw[x,y] + a[x,y] + xw_dist[x,y] = (xw[x,y] - mean[x]) + yw_dist[x,y] = b[x,y]*(xw[x,y] - mean[x]) + } + } + } + else + continue + } +} + +END { + for (y=1; y<=nf_max; y++) { + for (x=1; x<=nf_max; x++) { + # if (x != y && r[x,y]) { + if (r[x,y]) { + printf(OFMT OFS "(%s)" OFS " = (" OFMT " +/- " OFMT ")(%s) + (" OFMT " +/- " OFMT ")" OFS, + (r[x,y]*r[x,y]), + header[x], a[x,y], a_err[x,y], + header[y], log(b[x,y]), log(b_err[x,y])) + printf("[" OFMT "," OFMT "][" OFMT "," OFMT "]" OFS "[" OFMT "," OFMT "]" OFS, + 0, a[x,y], (-1.0*a[x,y]/b[x,y]), 0, + mean[x], b[x,y]*(mean[x]) + a[x,y]) + printf("[" OFMT "," OFMT "]" OFS, xw[x,y], yw[x,y]) + printf("[" OFMT "]" ORS, sqrt(xw_dist[x,y]*xw_dist[x,y] + yw_dist[x,y]*yw_dist[x,y])) + } + } + } +} + diff --git a/fib.awk b/fib.awk index 0d46479..7baef76 100644 --- a/fib.awk +++ b/fib.awk @@ -1,13 +1,18 @@ #!/usr/bin/awk -f ### fib.awk -# fib sol'n from Hamming BEGIN { ARGV[1] ? n = ARGV[1] : n = 0 ARGV[2] ? OFMT = "%." ARGV[2] "g" : OFMT = "%g" - C1 = 1.0/sqrt(5) - C2 = -1.0/sqrt(5) - print C1*(0.5*(1 + sqrt(5)))^n + C2*(0.5*(1 - sqrt(5)))^n + + # fib sol'n from Hamming + C1 = 1.0/sqrt(5.0) + C2 = -1.0/sqrt(5.0) + print C1*(0.5*(1 + sqrt(5.0)))^n + C2*(0.5*(1 - sqrt(5.0)))^n + + phi = 0.5*(1 + sqrt(5.0)) + binet = (phi^n - (-1.0/phi)^n)/sqrt(5.0) + print binet } diff --git a/gaussian.awk b/gaussian.awk index dc62d37..402ea10 100644 --- a/gaussian.awk +++ b/gaussian.awk @@ -27,7 +27,7 @@ BEGIN { if (N > 1 && M > 0) { # sigma <= 0.5 sigma = 0.4 - en[n] = (n - M)/(sigma*M) + e[n] = (n - M)/(sigma*M) w[n] = exp(-0.5*e[n]*e[n]) print n, w[n]/M } diff --git a/ludcmp.awk b/ludcmp.awk new file mode 100644 index 0000000..6c7807a --- /dev/null +++ b/ludcmp.awk @@ -0,0 +1,105 @@ +#!/usr/bin/awk -f + +### ludcmp.awk +# LU decomposition on square matrix, based on example code from, +# https://en.wikipedia.org/wiki/LU_decomposition +# INPUT: +# A (N-by-N matrix) +# right_part (N-element vector) +# OUTPUT: +# x (N-element vector) +# note, matrix indexing begins with 1 and uses i=row, j=col +# lu = L + U - I + +# decomposition of A +function lu_decomp(A, n) { + + for (i=1; i<=n; i++) { + + for (j=i; j<=n; j++) { + sum = 0.0 + for (k=1; k<=i; k++) + sum += lu[i,k] * lu[k,j] + lu[i,j] = A[i,j] - sum + } + + for (j=i+1; j<=n; j++) { + sum = 0.0 + for (k=1; k<=i; k++) + sum += lu[j,k] * lu[k,i] + + # check for div by zero + if (lu[i,i] != 0.0) + lu[j,i] = (A[j,i] - sum) / lu[i,i] + else + lu[j,i] = "nan" + } + + # debug + for (j=1; j<=n; j++) { + printf(lu[i,j]) + printf( j==n ? ORS : OFS ) + } + } +} + +# regex to identify strings that look like numbers +BEGIN { + OFS = FS + sign = "[+-]?" + decimal = "[0-9]+[.]?[0-9]*" + fraction = "[.][0-9]*" + exponent = "([Ee]" sign "[0-9]+)?" + number = "^" sign "(" decimal "|" fraction ")" exponent "$" +} + +# column headers +NR == 1 { + for (n=1; n<=NF; n++) + ($n ~ number) ? head[n] = "col" n : head[n] = $n +} + +# read input data +NF { + + if (NF > max_nf) + (NF > nf_max) ? nf_max = NF : nf_max = nf_max + + for (n=1; n<=NF; n++) { + if ($n ~ number) { + count[n] += 1 + (count[n] == 1 || max_nf > size) ? size = max_nf : size = size + (count[n] == 1 || count[n] > size) ? size = count[n] : size = size + matrix[count[n],n] = $n + print(matrix[count[n],n]) + } + } + +} + +END { + printf(ORS) + print(NR, max_nf, size) + printf(ORS) + lu_decomp(matrix, size) + +} + +## find solution of Ly = b, for y +#for (i=0; i=0; i--) { +# sum = 0; +# for (k=i+1; k 0 { + (NF > nf_max) ? nf_max = NF : nf_max = nf_max + + ### columns + for (y=1; y<=nf_max; y++) { + ### rows + if ($y !~ number) { + continue + } + else { + read_data[NR,y] = $y + col_sum[y] += $y + row_sum[NR] += $y + } + } +} + +END { + print length(row_sum), length(col_sum) + ### columns + for (y=1; y<=nf_max; y++) { + if (y in col_sum) { + ### rows + for (x=1; x<=NR; x++) { + if (x in row_sum) { + printf("[" OFMT "," OFMT "]" OFS OFMT OFS OFMT, + x, y, read_data[x,y], col_sum[y]) + if (x < nf_max) + printf(OFS) + } + } + } + printf(ORS) + } +} + -- cgit v1.2.3