From 309c5d8f7ff8c36235222d079955cd3783bb7ad0 Mon Sep 17 00:00:00 2001 From: wukong Date: Mon, 18 Dec 2023 12:37:31 -0800 Subject: Added a partially working function performing LU decomposition on a square matrix, ludcmp.awk. This is providing correct answers on _some_ test data, further testing debugging required here. --- exp_reg.awk | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 exp_reg.awk (limited to 'exp_reg.awk') diff --git a/exp_reg.awk b/exp_reg.awk new file mode 100644 index 0000000..f4ea12c --- /dev/null +++ b/exp_reg.awk @@ -0,0 +1,113 @@ +#!/usr/bin/awk -f + +### lin_reg.awk +# simple linear regression between columns + +BEGIN { + OFS = ":" + sign = "[+-]?" + decimal = "[0-9]+[.]?[0-9]*" + fraction = "[.][0-9]*" + exponent = "([Ee]" sign "[0-9]+)?" + number = "^" sign "(" decimal "|" fraction ")" exponent "$" +} + +NR == 1 { + for (n=1; n<=NF; n++) + ($n ~ number) ? header[n] = "col" n : header[n] = $n +} + +NF { + if (NF > nf_max) + nf_max = NF + + ### iterate over columns + for (y=1; y<=nf_max; y++) { + if ($y == header[n]) + continue + + if ($y ~ number) { + + ### mean + count[y] += 1 + data[y] = log($y) + sum[y] += data[y] + sum2[y] += data[y]*data[y] + delta0[y] = data[y]y - mean[y] + mean[y] += delta0[y]/count[y] + delta1[y] = data[y]y - mean[y] + sum_delta[y] += delta1[y] + sum_delta2[y] += delta0[y]*delta1[y] + + ### sample variance + #(count[y] > 1) ? var[y] = sum_delta2[y]/(count[y] - 1) : var[y] = "" + + # x = row, y = col, trendline: y = A + Bx + for (x=1; x<=nf_max; x++) { + if ($x ~ number) { + count[x,y] += 1 + sum_xy[x,y] += $x*data[y] + sum_delta_xy[x,y] += delta0[x]*delta1[y] + + # covariance + #(count[x,y] > 1) ? cov[x,y] = sum_delta_xy[x,y]/(count[x,y] - 1) : cov[x,y] = "" + + # correlation + r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y]) + (r_den[x,y]) ? r[x,y] = sum_delta_xy[x,y]/r_den[x,y] : r[x,y] = 1 + + ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x]) + if (ab_den[x,y]) { + a[x,y] = (sum[y]*sum2[x] - sum[x]*sum_xy[x,y])/ab_den[x,y] + b[x,y] = (count[x,y]*sum_xy[x,y] - sum[x]*sum[y])/ab_den[x,y] + } + else { + a[x,y] = 0 + b[x,y] = 1 + } + + # error estimate + err_den[x,y] = count[x,y]*(count[x,y] - 2) + if (count[x,y] > 2) { + err[x,y] = data[y] - (a[x,y] + b[x,y]*$x) + sum_err2[x,y] += err[x,y]*err[x,y] + } + b_err_den[x,y] = (count[x,y] - 2)*sum_delta2[x] + if (b_err_den[x,y]) + b_err[x,y] = sqrt(sum_err2[x,y]/b_err_den[x,y]) + a_err_den[x,y] = count[x,y]*b_err_den[x,y] + if (a_err_den[x,y]) + a_err[x,y] = sqrt(sum2[x]/count[x,y])*b_err[x,y] + + # weighted mean, from HP-20S manual, pg 60 + xw[x,y] = sum_xy[x,y]/sum[y] + yw[x,y] = b[x,y]*xw[x,y] + a[x,y] + xw_dist[x,y] = (xw[x,y] - mean[x]) + yw_dist[x,y] = b[x,y]*(xw[x,y] - mean[x]) + } + } + } + else + continue + } +} + +END { + for (y=1; y<=nf_max; y++) { + for (x=1; x<=nf_max; x++) { + # if (x != y && r[x,y]) { + if (r[x,y]) { + printf(OFMT OFS "(%s)" OFS " = (" OFMT " +/- " OFMT ")(%s) + (" OFMT " +/- " OFMT ")" OFS, + (r[x,y]*r[x,y]), + header[x], a[x,y], a_err[x,y], + header[y], log(b[x,y]), log(b_err[x,y])) + printf("[" OFMT "," OFMT "][" OFMT "," OFMT "]" OFS "[" OFMT "," OFMT "]" OFS, + 0, a[x,y], (-1.0*a[x,y]/b[x,y]), 0, + mean[x], b[x,y]*(mean[x]) + a[x,y]) + printf("[" OFMT "," OFMT "]" OFS, xw[x,y], yw[x,y]) + printf("[" OFMT "]" ORS, sqrt(xw_dist[x,y]*xw_dist[x,y] + yw_dist[x,y]*yw_dist[x,y])) + } + } + } +} + -- cgit v1.2.3