#!/usr/bin/awk -f ### lin_reg.awk # simple linear regression between columns BEGIN { OFS = ":" sign = "[+-]?" decimal = "[0-9]+[.]?[0-9]*" fraction = "[.][0-9]*" exponent = "([Ee]" sign "[0-9]+)?" number = "^" sign "(" decimal "|" fraction ")" exponent "$" } NR == 1 { for (n=1; n<=NF; n++) ($n ~ number) ? header[n] = "col" n : header[n] = $n } NF > 0 { if (NF > nf_max) nf_max = NF ### iterate over columns for (y=1; y<=nf_max; y++) { if ($y == header[n]) continue if ($y ~ number) { ### mean count[y] += 1 sum[y] += $y sum2[y] += $y*$y delta0[y] = $y - mean[y] mean[y] += delta0[y]/count[y] delta1[y] = $y - mean[y] sum_delta[y] += delta1[y] sum_delta2[y] += delta0[y]*delta1[y] ### sample variance #(count[y] > 1) ? var[y] = sum_delta2[y]/(count[y] - 1) : var[y] = "" # x = row, y = col, trendline: y = A + Bx for (x=1; x<=nf_max; x++) { count[x,y] += 1 sum_xy[x,y] += $x*$y sum_delta_xy[x,y] += delta0[x]*delta1[y] # covariance #(count[x,y] > 1) ? cov[x,y] = sum_delta_xy[x,y]/(count[x,y] - 1) : cov[x,y] = "" # correlation r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y]) (r_den[x,y]) ? r[x,y] = sum_delta_xy[x,y]/r_den[x,y] : r[x,y] = 1 ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x]) if (ab_den[x,y]) { a[x,y] = (sum[y]*sum2[x] - sum[x]*sum_xy[x,y])/ab_den[x,y] b[x,y] = (count[x,y]*sum_xy[x,y] - sum[x]*sum[y])/ab_den[x,y] } else { a[x,y] = 0 b[x,y] = 1 } # error estimate err_den[x,y] = count[x,y]*(count[x,y] - 2) if (count[x,y] > 2) { err[x,y] = $y - (a[x,y] + b[x,y]*$x) sum_err2[x,y] += err[x,y]*err[x,y] } b_err_den[x,y] = (count[x,y] - 2)*sum_delta2[x] if (b_err_den[x,y]) b_err[x,y] = sqrt(sum_err2[x,y]/b_err_den[x,y]) a_err_den[x,y] = count[x,y]*b_err_den[x,y] if (a_err_den[x,y]) a_err[x,y] = sqrt(sum2[x]/count[x,y])*b_err[x,y] # weighted mean, from HP-20S manual, pg 60 xw[x,y] = sum_xy[x,y]/sum[y] yw[x,y] = b[x,y]*xw[x,y] + a[x,y] xw_dist[x,y] = (xw[x,y] - mean[x]) yw_dist[x,y] = b[x,y]*(xw[x,y] - mean[x]) } } else continue } } END { for (y=1; y<=nf_max; y++) { for (x=1; x<=nf_max; x++) { if (x != y && r[x,y]) { printf(OFMT OFS "(%s)" OFS " = (" OFMT " +/- " OFMT ")(%s) + (" OFMT " +/- " OFMT ")" OFS, (r[x,y]*r[x,y]), header[y], b[x,y], b_err[x,y], header[x], a[x,y], a_err[x,y]) printf("[" OFMT "," OFMT "][" OFMT "," OFMT "]" OFS "[" OFMT "," OFMT "]" OFS, 0, a[x,y], (-1.0*a[x,y]/b[x,y]), 0, mean[x], b[x,y]*(mean[x]) + a[x,y]) printf("[" OFMT "," OFMT "]" OFS, xw[x,y], yw[x,y]) printf("[" OFMT "]" ORS, sqrt(xw_dist[x,y]*xw_dist[x,y] + yw_dist[x,y]*yw_dist[x,y])) } } } }