summaryrefslogtreecommitdiff
path: root/exp_reg.awk
diff options
context:
space:
mode:
authorwukong <wukong@longaeva>2023-12-18 12:37:31 -0800
committerwukong <wukong@longaeva>2023-12-18 12:37:31 -0800
commit309c5d8f7ff8c36235222d079955cd3783bb7ad0 (patch)
tree22fa0994657cee27292ef1531211a47293afab5f /exp_reg.awk
parent0c1d68ee8cc2a631d6029285e771ebbfe119995d (diff)
Added a partially working function performing LU decomposition on a square
matrix, ludcmp.awk. This is providing correct answers on _some_ test data, further testing debugging required here.
Diffstat (limited to '')
-rw-r--r--exp_reg.awk113
1 files changed, 113 insertions, 0 deletions
diff --git a/exp_reg.awk b/exp_reg.awk
new file mode 100644
index 0000000..f4ea12c
--- /dev/null
+++ b/exp_reg.awk
@@ -0,0 +1,113 @@
+#!/usr/bin/awk -f
+
+### lin_reg.awk
+# simple linear regression between columns
+
+BEGIN {
+ OFS = ":"
+ sign = "[+-]?"
+ decimal = "[0-9]+[.]?[0-9]*"
+ fraction = "[.][0-9]*"
+ exponent = "([Ee]" sign "[0-9]+)?"
+ number = "^" sign "(" decimal "|" fraction ")" exponent "$"
+}
+
+NR == 1 {
+ for (n=1; n<=NF; n++)
+ ($n ~ number) ? header[n] = "col" n : header[n] = $n
+}
+
+NF {
+ if (NF > nf_max)
+ nf_max = NF
+
+ ### iterate over columns
+ for (y=1; y<=nf_max; y++) {
+ if ($y == header[n])
+ continue
+
+ if ($y ~ number) {
+
+ ### mean
+ count[y] += 1
+ data[y] = log($y)
+ sum[y] += data[y]
+ sum2[y] += data[y]*data[y]
+ delta0[y] = data[y]y - mean[y]
+ mean[y] += delta0[y]/count[y]
+ delta1[y] = data[y]y - mean[y]
+ sum_delta[y] += delta1[y]
+ sum_delta2[y] += delta0[y]*delta1[y]
+
+ ### sample variance
+ #(count[y] > 1) ? var[y] = sum_delta2[y]/(count[y] - 1) : var[y] = ""
+
+ # x = row, y = col, trendline: y = A + Bx
+ for (x=1; x<=nf_max; x++) {
+ if ($x ~ number) {
+ count[x,y] += 1
+ sum_xy[x,y] += $x*data[y]
+ sum_delta_xy[x,y] += delta0[x]*delta1[y]
+
+ # covariance
+ #(count[x,y] > 1) ? cov[x,y] = sum_delta_xy[x,y]/(count[x,y] - 1) : cov[x,y] = ""
+
+ # correlation
+ r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y])
+ (r_den[x,y]) ? r[x,y] = sum_delta_xy[x,y]/r_den[x,y] : r[x,y] = 1
+
+ ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x])
+ if (ab_den[x,y]) {
+ a[x,y] = (sum[y]*sum2[x] - sum[x]*sum_xy[x,y])/ab_den[x,y]
+ b[x,y] = (count[x,y]*sum_xy[x,y] - sum[x]*sum[y])/ab_den[x,y]
+ }
+ else {
+ a[x,y] = 0
+ b[x,y] = 1
+ }
+
+ # error estimate
+ err_den[x,y] = count[x,y]*(count[x,y] - 2)
+ if (count[x,y] > 2) {
+ err[x,y] = data[y] - (a[x,y] + b[x,y]*$x)
+ sum_err2[x,y] += err[x,y]*err[x,y]
+ }
+ b_err_den[x,y] = (count[x,y] - 2)*sum_delta2[x]
+ if (b_err_den[x,y])
+ b_err[x,y] = sqrt(sum_err2[x,y]/b_err_den[x,y])
+ a_err_den[x,y] = count[x,y]*b_err_den[x,y]
+ if (a_err_den[x,y])
+ a_err[x,y] = sqrt(sum2[x]/count[x,y])*b_err[x,y]
+
+ # weighted mean, from HP-20S manual, pg 60
+ xw[x,y] = sum_xy[x,y]/sum[y]
+ yw[x,y] = b[x,y]*xw[x,y] + a[x,y]
+ xw_dist[x,y] = (xw[x,y] - mean[x])
+ yw_dist[x,y] = b[x,y]*(xw[x,y] - mean[x])
+ }
+ }
+ }
+ else
+ continue
+ }
+}
+
+END {
+ for (y=1; y<=nf_max; y++) {
+ for (x=1; x<=nf_max; x++) {
+ # if (x != y && r[x,y]) {
+ if (r[x,y]) {
+ printf(OFMT OFS "(%s)" OFS " = (" OFMT " +/- " OFMT ")(%s) + (" OFMT " +/- " OFMT ")" OFS,
+ (r[x,y]*r[x,y]),
+ header[x], a[x,y], a_err[x,y],
+ header[y], log(b[x,y]), log(b_err[x,y]))
+ printf("[" OFMT "," OFMT "][" OFMT "," OFMT "]" OFS "[" OFMT "," OFMT "]" OFS,
+ 0, a[x,y], (-1.0*a[x,y]/b[x,y]), 0,
+ mean[x], b[x,y]*(mean[x]) + a[x,y])
+ printf("[" OFMT "," OFMT "]" OFS, xw[x,y], yw[x,y])
+ printf("[" OFMT "]" ORS, sqrt(xw_dist[x,y]*xw_dist[x,y] + yw_dist[x,y]*yw_dist[x,y]))
+ }
+ }
+ }
+}
+