#!/usr/bin/awk -f ### quad_reg.awk # quadratic regression along columns BEGIN { OFMT="%.9g" sign = "[+-]?" decimal = "[0-9]+[.]?[0-9]*" fraction = "[.][0-9]*" exponent = "([Ee]" sign "[0-9]+)?" number = "^" sign "(" decimal "|" fraction ")" exponent "$" } NR == 1 { for (n=1; n<=NF; n++) ($n ~ number) ? header[n] = "col" n : header[n] = $n } NF > 0 { if (NF > nf_max) nf_max = NF ### iterate over columns for (y=1; y<=nf_max; y++) { if ($y ~ number) { ### mean count[y] += 1 sum[y] += $y sum2[y] += $y*$y sum3[y] += $y*$y*$y sum4[y] += $y*$y*$y*$y mean[y] = sum[y]/count[y] mean2[y] = sum2[y]/count[y] ### difference from the mean delta[y] = $y - mean[y] delta2[y] = $y*$y - mean2[y] sum_delta[y] += delta[y] sum2_delta[y] += delta[y]*delta[y] sum_delta2[y] += delta2[y] ### sample variance (count[y] > 1) ? var[y] = sum_delta2[y]/(count[y] - 1) : var[y] = "" # x = row, y = col, trendline: y = A + Bx + Cx^2 for (x=1; x<=nf_max; x++) { count[x,y] += 1 sum_xy[x,y] += $x*$y sum_x2y[x,y] += $x*$x*$y sum_delta_xy[x,y] += delta[x]*delta[y] sum_delta_xx2[x,y] += delta[x]*delta2[x] sum_delta_x2y[x,y] += delta2[x]*delta[y] sum_delta_x2x2[x,y] += delta2[x]*delta2[x] # covariances if (count[x,y] > 1) { s_xx[x,y] = sum2_delta[x]/(count[x,y] - 1) s_xy[x,y] = sum_delta_xy[x,y]/(count[x,y] - 1) s_xx2[x,y] = sum_delta_xx2[x,y]/(count[x,y] - 1) s_x2x2[x,y] = sum_delta_x2x2[x,y]/(count[x,y] - 1) s_x2y[x,y] = sum_delta_x2y[x,y]/(count[x,y] - 1) } bc_den[x,y] = (s_xx[x,y]*s_x2x2[x,y] - s_xx2[x,y]*s_xx2[x,y]) if (bc_den[x,y]) { c[x,y] = (s_x2y[x,y]*s_xx[x,y] - s_xy[x,y]*s_xx2[x,y])/bc_den[x,y] b[x,y] = (s_xy[x,y]*s_x2x2[x,y] - s_x2y[x,y]*s_xx2[x,y])/bc_den[x,y] } else { c[x,y] = 0 b[x,y] = 0 } a[x,y] = mean[y] - b[x,y]*mean[x] - c[x,y]*mean2[x] # error estimate err[x,y] = ($y - (a[x,y] + b[x,y]*$x + c[x,y]*$x*$x)) sum_err2[x,y] += err[x,y]*err[x,y] # correlation sum_delta2[y] ? r[x,y] = sqrt(1 - sum_err2[x,y]/sum_delta2[y]) : r[x,y] = 0 } } else continue } } END { for (x=1; x<=nf_max; x++) { for (y=1; y<=nf_max; y++) { if (x != y && r[x,y]) { printf(OFMT OFS "(%s)" OFS " = (" OFMT ")(%s)^2" OFS " + (" OFMT ")(%s)" OFS " + (" OFMT ")" ORS, 10.0*log(r[x,y]*r[x,y])/log(10), header[y], c[x,y], header[x], b[x,y], header[x], a[x,y]) } } } }