diff options
| author | wukong <wukong@longaeva> | 2018-06-20 20:52:04 -0700 |
|---|---|---|
| committer | wukong <wukong@longaeva> | 2018-06-20 20:52:04 -0700 |
| commit | e42cee748f5bc38d11742739b5e2cad4b6a07c43 (patch) | |
| tree | d8158336637af6d2d02e6a766f4d4f57986cfadf /quad_reg.awk | |
| parent | b65bcc4d948ef1cdabb4267dea6238da9c79042b (diff) | |
moved example data into data folder; added initial version of quad_reg.awk;
Diffstat (limited to 'quad_reg.awk')
| -rw-r--r-- | quad_reg.awk | 91 |
1 files changed, 91 insertions, 0 deletions
diff --git a/quad_reg.awk b/quad_reg.awk new file mode 100644 index 0000000..8939947 --- /dev/null +++ b/quad_reg.awk @@ -0,0 +1,91 @@ +#!/usr/bin/awk -f + +### lin_reg2.awk +# simple linear regression between columns + +BEGIN { + sign = "[+-]?" + decimal = "[0-9]+[.]?[0-9]*" + fraction = "[.][0-9]*" + exponent = "([Ee]" sign "[0-9]+)?" + number = "^" sign "(" decimal "|" fraction ")" exponent "$" +} + +NR == 1 { + for (n=1; n<=NF; n++) + ($n ~ number) ? header[n] = "col" n : header[n] = $n +} + +NF > 0 { + if (NF > nf_max) + nf_max = NF + + ### iterate over columns + for (y=1; y<=nf_max; y++) { + if ($y ~ number) { + + ### mean + count[y] += 1 + sum[y] += $y + sum2[y] += $y*$y + mean[y] = sum[y]/count[y] + + ### difference from the mean + delta[y] = $y - mean[y] + sum_delta[y] += delta[y] + sum_delta2[y] += delta[y]*delta[y] + + ### sample variance + (count[y] > 1) ? var[y] = sum_delta2[y]/(count[y] - 1) : var[y] = "" + + # x = row, y = col + for (x=1; x<=nf_max; x++) { + count[x,y] += 1 + sum_xy[x,y] += $x*$y + sum_delta_xy[x,y] += delta[x]*delta[y] + + # correlation + r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y]) + (r_den[x,y]) ? r[x,y] = sum_delta_xy[x,y]/r_den[x,y] : r[x,y] = 1 + + ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x]) + if (ab_den[x,y]) { + a[x,y] = (sum[y]*sum2[x] - sum[x]*sum_xy[x,y])/ab_den[x,y] + b[x,y] = (count[x,y]*sum_xy[x,y] - sum[x]*sum[y])/ab_den[x,y] + } + else { + a[x,y] = 0 + b[x,y] = 1 + } + + ### error estimate + err_den[x,y] = count[x,y]*(count[x,y] - 2) + if (count[x,y] > 2) { + err[x,y] = $y - (a[x,y] + b[x,y]*$x) + sum_err2[x,y] += err[x,y]*err[x,y] + } + b_err_den[x,y] = (count[x,y] - 2)*sum_delta2[x] + if (b_err_den[x,y]) + b_err[x,y] = sqrt(sum_err2[x,y]/b_err_den[x,y]) + a_err_den[x,y] = count[x,y]*b_err_den[x,y] + if (a_err_den[x,y]) + a_err[x,y] = sqrt(sum2[x]/count[x,y])*b_err[x,y] + } + } + else + continue + } +} + +END { + for (y=1; y<=nf_max; y++) { + for (x=1; x<=nf_max; x++) { + if (x != y && r[x,y]) { + printf(OFMT OFS "(%s)" OFS " = (" OFMT " +/- " OFMT ")(%s)" OFS " + (" OFMT " +/- " OFMT ")" ORS, + 10.0*log(r[x,y]*r[x,y])/log(10), header[y], b[x,y], b_err[x,y], header[x], + a[x,y], a_err[x,y]) + } + } + } +} + |
