#!/usr/bin/awk -f ### quad_reg.awk # simple linear regression between columns BEGIN { OFMT="%.9g" sign = "[+-]?" decimal = "[0-9]+[.]?[0-9]*" fraction = "[.][0-9]*" exponent = "([Ee]" sign "[0-9]+)?" number = "^" sign "(" decimal "|" fraction ")" exponent "$" } NR == 1 { for (n=1; n<=NF; n++) ($n ~ number) ? header[n] = "col" n : header[n] = $n } NF > 0 { if (NF > nf_max) nf_max = NF ### iterate over columns for (y=1; y<=nf_max; y++) { if ($y ~ number) { ### mean count[y] += 1 sum[y] += $y sum2[y] += $y*$y sum3[y] += $y*$y*$y sum4[y] += $y*$y*$y*$y mean[y] = sum[y]/count[y] mean2[y] = sum2[y]/count[y] ### difference from the mean delta[y] = $y - mean[y] sum_delta[y] += delta[y] sum_delta2[y] += delta[y]*delta[y] ### sample variance (count[y] > 1) ? var[y] = sum_delta2[y]/(count[y] - 1) : var[y] = "" # x = row, y = col, trendline: y = A + Bx + Cx^2 for (x=1; x<=nf_max; x++) { count[x,y] += 1 sum_xy[x,y] += $x*$y sum_x2y[x,y] += $x*$x*$y sum_delta_xy[x,y] += delta[x]*delta[y] # covariances if (count[x,y] > 1) { s_xx[x,y] = sum2[x]/(count[x,y]) - mean[x]*mean[x] s_xy[x,y] = sum_xy[x,y]/(count[x,y]) - mean[x]*mean[y] s_xx2[x,y] = sum3[x]/(count[x,y]) - mean[x]*mean2[x] s_x2x2[x,y] = sum4[x]/(count[x,y]) - mean2[x]*mean2[x] s_x2y[x,y] = sum_x2y[x]/(count[x,y]) - mean2[x]*mean[y] } bc_den[x,y] = (s_xx[x,y]*s_x2x2[x,y] - s_xx2[x,y]*s_xx2[x,y]) if (bc_den[x,y]) { c[x,y] = (s_x2y[x,y]*s_xx[x,y] - s_xy[x,y]*s_xx2[x,y])/bc_den[x,y] b[x,y] = (s_xy[x,y]*s_x2x2[x,y] - s_x2y[x,y]*s_xx2[x,y])/bc_den[x,y] } else { c[x,y] = 0 b[x,y] = 0 } a[x,y] = mean[y] - b[x,y]*mean[x] - c[x,y]*mean[x]*mean[x] # error estimate err[x,y] = ($y - (a[x,y] + b[x,y]*$x + c[x,y]*$x*$x)) sum_err2[x,y] += err[x,y]*err[x,y] # correlation sum_delta2[y] ? r2[x,y] = sum_err2[x,y]/sum_delta2[y] : r2[x,y] = 1 } } else continue } } END { for (x=1; x<=nf_max; x++) { for (y=1; y<=nf_max; y++) { if (x != y && r2[x,y]) { printf(OFMT OFS "(%s)" OFS " = (" OFMT ")(%s)^2" OFS " + (" OFMT ")(%s)" OFS " + (" OFMT ")" ORS, 10.0*log(r2[x,y])/log(10), header[y], c[x,y], header[x], b[x,y], header[x], a[x,y]) } } } }