From d1f6c89be163d9399d569e01458242d8ce15e041 Mon Sep 17 00:00:00 2001 From: wukong Date: Sun, 9 Sep 2018 23:48:31 -0700 Subject: added summations to quad_reg added in-progress lpf.awk (low pass filter), an adaptation of convolution script (conv.awk) to use delmitied columns as input additional tweaking of OFMT, OFS, and conditional print statements --- ABOUT.TXT | 5 +-- diff.awk | 1 + lpf.awk | 125 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ mean.awk | 1 + mean_avg.awk | 1 + quad_reg.awk | 62 +++++++++++++++-------------- sum1.awk | 7 +++- sum2.awk | 6 ++- sum3.awk | 4 +- sum4.awk | 28 ++++++++----- 10 files changed, 194 insertions(+), 46 deletions(-) create mode 100644 lpf.awk diff --git a/ABOUT.TXT b/ABOUT.TXT index f471f31..b16ad44 100644 --- a/ABOUT.TXT +++ b/ABOUT.TXT @@ -1,3 +1,2 @@ -repo: awk - -desc: experiments in awk, etc. +repo: awk; +desc: experiments in awk, etc.; diff --git a/diff.awk b/diff.awk index c90a0e0..015669a 100644 --- a/diff.awk +++ b/diff.awk @@ -4,6 +4,7 @@ # numerical diff along columns BEGIN { + OFS = FS sign = "[+-]?" decimal = "[0-9]+[.]?[0-9]*" fraction = "[.][0-9]*" diff --git a/lpf.awk b/lpf.awk new file mode 100644 index 0000000..814020f --- /dev/null +++ b/lpf.awk @@ -0,0 +1,125 @@ +#!/usr/bin/awk -f + +### lpf.awk +# Low Pass Filter with Hardcoded FIR Window + +BEGIN { + OFS = FS + sign = "[+-]?" + decimal = "[0-9]+[.]?[0-9]*" + fraction = "[.][0-9]*" + exponent = "([Ee]" sign "[0-9]+)?" + number = "^" sign "(" decimal "|" fraction ")" exponent "$" + + #H = ARGV[1] + #H = 1.0 + #H = "1.00 1.00 1.00" # rect + #H = "0.25 0.50 0.25" # von Hann + H = "0.23 0.54 0.23" # Hamming + window_size = split(H, H_arr, "[ ]*") +} + +NR == 1 { + for (y=1; y<=NF; y++) + ($y ~ number) ? header[y] = "col" y : header[y] = $y +} + +NF > 0 { + if (NF > nf_max) + nf_max = NF + + input_size = window_size + output_size = (input_size + window_size - 1) + + ### columns + for (y=1; y<=nf_max; y++) { + if ($y == header[y]) + printf(header[y] OFS header[y] "_lpf") + if ($y ~ number) { + count[y]++ + + # rotate input buffer + for (n=1; n<=input_size; n++) { + X_arr[y,n] = X_arr[y,n+1] + } + X_arr[y,input_size] = $y + + Y[y] = 0 + for (n=1; n<=window_size; n++) { + for (m=1; m<=input_size; m++) { + if (n <= window_size) { + Y[y] += H_arr[n-m+1]*X_arr[y,m] + continue + } + if ((n > window_size) && (n <= input_size)) { + Y[y] += H_arr[n-m+1]*X_arr[y,m] + continue + } + if ((n > window_size) && (n > input_size)) { + Y[y] += H_arr[n-m+1]*X_arr[y,m] + continue + } + else { + Y[y] += 0 + continue + } + } + } + printf(X_arr[y,input_size] OFS Y[y]) + } + + if (y < nf_max) + printf(OFS) + else + printf(ORS) + + } + + + +} + +END { + ### rows + for (x=1; x<=window_size; x++) { + ### columns + for (y=1; y<=nf_max; y++) { + # rotate input buffer + for (n=1; n<=input_size; n++) { + X_arr[y,n] = X_arr[y,n+1] + #print X_arr[y,n] + } + #delete X_arr[y,input_size] + #input_size = length(X_arr) + #print length(X_arr) + + Y[y] = 0 + for (n=1; n<=window_size; n++) { + for (m=1; m<=input_size; m++) { + if (n <= window_size) { + Y[y] += H_arr[n-m+1]*X_arr[y,m] + continue + } + if ((n > window_size) && (n <= input_size)) { + Y[y] += H_arr[n-m+1]*X_arr[y,m] + continue + } + if ((n > window_size) && (n > input_size)) { + Y[y] += H_arr[n-m+1]*X_arr[y,m] + continue + } + else { + Y[y] += 0 + continue + } + } + } + printf(X_arr[y,input_size] OFS Y[y]) + if (y < nf_max) + printf(OFS) + else + printf(ORS) + X_arr[input_size] = 0 + } + } +} diff --git a/mean.awk b/mean.awk index aa5ec8e..b509473 100644 --- a/mean.awk +++ b/mean.awk @@ -4,6 +4,7 @@ # calculate mean average BEGIN { + OFS = FS sign = "[+-±]?" decimal = "[0-9]+[.]?[0-9]*" fraction = "[.][0-9]*" diff --git a/mean_avg.awk b/mean_avg.awk index aeee3db..8a5a3c5 100644 --- a/mean_avg.awk +++ b/mean_avg.awk @@ -4,6 +4,7 @@ # average columns of numerical data BEGIN { + OFS = FS #sign = "[+-]?" #decimal = "[0-9]+[.]?[0-9]*" #fraction = "[.][0-9]*" diff --git a/quad_reg.awk b/quad_reg.awk index 8939947..1b30afd 100644 --- a/quad_reg.awk +++ b/quad_reg.awk @@ -1,9 +1,10 @@ #!/usr/bin/awk -f -### lin_reg2.awk +### quad_reg.awk # simple linear regression between columns BEGIN { + OFMT="%.9g" sign = "[+-]?" decimal = "[0-9]+[.]?[0-9]*" fraction = "[.][0-9]*" @@ -28,7 +29,10 @@ NF > 0 { count[y] += 1 sum[y] += $y sum2[y] += $y*$y + sum3[y] += $y*$y*$y + sum4[y] += $y*$y*$y*$y mean[y] = sum[y]/count[y] + mean2[y] = sum2[y]/count[y] ### difference from the mean delta[y] = $y - mean[y] @@ -38,38 +42,39 @@ NF > 0 { ### sample variance (count[y] > 1) ? var[y] = sum_delta2[y]/(count[y] - 1) : var[y] = "" - # x = row, y = col + # x = row, y = col, trendline: y = A + Bx + Cx^2 for (x=1; x<=nf_max; x++) { count[x,y] += 1 sum_xy[x,y] += $x*$y + sum_x2y[x,y] += $x*$x*$y sum_delta_xy[x,y] += delta[x]*delta[y] - # correlation - r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y]) - (r_den[x,y]) ? r[x,y] = sum_delta_xy[x,y]/r_den[x,y] : r[x,y] = 1 + # covariances + if (count[x,y] > 1) { + s_xx[x,y] = sum2[x]/(count[x,y]) - mean[x]*mean[x] + s_xy[x,y] = sum_xy[x,y]/(count[x,y]) - mean[x]*mean[y] + s_xx2[x,y] = sum3[x]/(count[x,y]) - mean[x]*mean2[x] + s_x2x2[x,y] = sum4[x]/(count[x,y]) - mean2[x]*mean2[x] + s_x2y[x,y] = sum_x2y[x]/(count[x,y]) - mean2[x]*mean[y] + } - ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x]) - if (ab_den[x,y]) { - a[x,y] = (sum[y]*sum2[x] - sum[x]*sum_xy[x,y])/ab_den[x,y] - b[x,y] = (count[x,y]*sum_xy[x,y] - sum[x]*sum[y])/ab_den[x,y] + bc_den[x,y] = (s_xx[x,y]*s_x2x2[x,y] - s_xx2[x,y]*s_xx2[x,y]) + if (bc_den[x,y]) { + c[x,y] = (s_x2y[x,y]*s_xx[x,y] - s_xy[x,y]*s_xx2[x,y])/bc_den[x,y] + b[x,y] = (s_xy[x,y]*s_x2x2[x,y] - s_x2y[x,y]*s_xx2[x,y])/bc_den[x,y] } else { - a[x,y] = 0 - b[x,y] = 1 + c[x,y] = 0 + b[x,y] = 0 } + a[x,y] = mean[y] - b[x,y]*mean[x] - c[x,y]*mean[x]*mean[x] - ### error estimate - err_den[x,y] = count[x,y]*(count[x,y] - 2) - if (count[x,y] > 2) { - err[x,y] = $y - (a[x,y] + b[x,y]*$x) - sum_err2[x,y] += err[x,y]*err[x,y] - } - b_err_den[x,y] = (count[x,y] - 2)*sum_delta2[x] - if (b_err_den[x,y]) - b_err[x,y] = sqrt(sum_err2[x,y]/b_err_den[x,y]) - a_err_den[x,y] = count[x,y]*b_err_den[x,y] - if (a_err_den[x,y]) - a_err[x,y] = sqrt(sum2[x]/count[x,y])*b_err[x,y] + # error estimate + err[x,y] = ($y - (a[x,y] + b[x,y]*$x + c[x,y]*$x*$x)) + sum_err2[x,y] += err[x,y]*err[x,y] + + # correlation + sum_delta2[y] ? r2[x,y] = sum_err2[x,y]/sum_delta2[y] : r2[x,y] = 1 } } else @@ -78,12 +83,11 @@ NF > 0 { } END { - for (y=1; y<=nf_max; y++) { - for (x=1; x<=nf_max; x++) { - if (x != y && r[x,y]) { - printf(OFMT OFS "(%s)" OFS " = (" OFMT " +/- " OFMT ")(%s)" OFS " + (" OFMT " +/- " OFMT ")" ORS, - 10.0*log(r[x,y]*r[x,y])/log(10), header[y], b[x,y], b_err[x,y], header[x], - a[x,y], a_err[x,y]) + for (x=1; x<=nf_max; x++) { + for (y=1; y<=nf_max; y++) { + if (x != y && r2[x,y]) { + printf(OFMT OFS "(%s)" OFS " = (" OFMT ")(%s)^2" OFS " + (" OFMT ")(%s)" OFS " + (" OFMT ")" ORS, + 10.0*log(r2[x,y])/log(10), header[y], c[x,y], header[x], b[x,y], header[x], a[x,y]) } } } diff --git a/sum1.awk b/sum1.awk index 61d96e0..8f1419b 100644 --- a/sum1.awk +++ b/sum1.awk @@ -5,11 +5,13 @@ # output: sum of each column # missing entries are treated as zeros +BEGIN { OFS = FS } + { - for (i=1; i<=NF; i++) - sum[i] += $i if (NF > nf_max) nf_max = NF + for (i=1; i<=NF; i++) + sum[i] += $i } END { @@ -18,3 +20,4 @@ END { printf((i < nf_max) ? OFS : ORS) } } + diff --git a/sum2.awk b/sum2.awk index 979d133..694c047 100644 --- a/sum2.awk +++ b/sum2.awk @@ -3,7 +3,9 @@ ### sum2.awk, print column sums # check that each line has the same number of fields as line one -NR==1 { nf_max = NF } +BEGIN { OFS = FS } + +NR == 1 { nf_max = NF } { for (i=1; i<=NF; i++) @@ -13,6 +15,6 @@ NR==1 { nf_max = NF } } END { - for (i=1; i<=NF; i++) + for (i=1; i<=nf_max; i++) printf(OFMT "%s", sum[i], i < nf_max ? OFS : ORS) } diff --git a/sum3.awk b/sum3.awk index bca92e3..3e4661f 100644 --- a/sum3.awk +++ b/sum3.awk @@ -11,7 +11,9 @@ function isnum(n) { } -NR==1 { +BEGIN { OFS = FS } + +NR == 1 { nfld = NF for (i=1; i<=NF; i++) numcol[i] = isnum($i) diff --git a/sum4.awk b/sum4.awk index 6a06455..aa3f044 100644 --- a/sum4.awk +++ b/sum4.awk @@ -4,6 +4,7 @@ # input: rows of integers and strings # output: sums of numeric columns + function isnum(n) { sign = "[+-]?" decimal = "[0-9]+[.]?[0-9]*" @@ -13,24 +14,33 @@ function isnum(n) { return n ~ number } -NR==1 { + +BEGIN { OFS = FS } + +NR == 1 { nf_max = NF - for (i=1; i<=NF; i++) { - (!isnum($i)) ? header[i] = $i : header[i] = "col" i - } + for (i=1; i<=NF; i++) + isnum($i) ? header[i] = "col" i : header[i] = $i } { + if (NF > nf_max) + nf_max = NF for (i=1; i<=NF; i++) { - sum[i] += $i - count[i]++ + if ($i == header[i]) + continue + if (isnum($i)) { + count[i]++ + sum[i] += $i + } } } END { for (i=1; i<=nf_max; i++) { - if (header[i]) - printf("%s:" OFS, header[i]) - printf(OFMT ORS, sum[i]) + printf((header[i]) ? header[i] OFS : OFS) + printf((count[i]) ? count[i] OFS sum[i] : OFS) + printf(ORS) } } + -- cgit v1.2.3