diff options
| -rw-r--r-- | ABOUT.TXT | 5 | ||||
| -rw-r--r-- | diff.awk | 1 | ||||
| -rw-r--r-- | lpf.awk | 125 | ||||
| -rw-r--r-- | mean.awk | 1 | ||||
| -rw-r--r-- | mean_avg.awk | 1 | ||||
| -rw-r--r-- | quad_reg.awk | 62 | ||||
| -rw-r--r-- | sum1.awk | 7 | ||||
| -rw-r--r-- | sum2.awk | 6 | ||||
| -rw-r--r-- | sum3.awk | 4 | ||||
| -rw-r--r-- | sum4.awk | 28 |
10 files changed, 194 insertions, 46 deletions
@@ -1,3 +1,2 @@ -repo: awk - -desc: experiments in awk, etc. +repo: awk; +desc: experiments in awk, etc.; @@ -4,6 +4,7 @@ # numerical diff along columns BEGIN { + OFS = FS sign = "[+-]?" decimal = "[0-9]+[.]?[0-9]*" fraction = "[.][0-9]*" @@ -0,0 +1,125 @@ +#!/usr/bin/awk -f
+
+### lpf.awk
+# Low Pass Filter with Hardcoded FIR Window
+
+BEGIN {
+ OFS = FS
+ sign = "[+-]?"
+ decimal = "[0-9]+[.]?[0-9]*"
+ fraction = "[.][0-9]*"
+ exponent = "([Ee]" sign "[0-9]+)?"
+ number = "^" sign "(" decimal "|" fraction ")" exponent "$"
+
+ #H = ARGV[1]
+ #H = 1.0
+ #H = "1.00 1.00 1.00" # rect
+ #H = "0.25 0.50 0.25" # von Hann
+ H = "0.23 0.54 0.23" # Hamming
+ window_size = split(H, H_arr, "[ ]*")
+}
+
+NR == 1 {
+ for (y=1; y<=NF; y++)
+ ($y ~ number) ? header[y] = "col" y : header[y] = $y
+}
+
+NF > 0 {
+ if (NF > nf_max)
+ nf_max = NF
+
+ input_size = window_size
+ output_size = (input_size + window_size - 1)
+
+ ### columns
+ for (y=1; y<=nf_max; y++) {
+ if ($y == header[y])
+ printf(header[y] OFS header[y] "_lpf")
+ if ($y ~ number) {
+ count[y]++
+
+ # rotate input buffer
+ for (n=1; n<=input_size; n++) {
+ X_arr[y,n] = X_arr[y,n+1]
+ }
+ X_arr[y,input_size] = $y
+
+ Y[y] = 0
+ for (n=1; n<=window_size; n++) {
+ for (m=1; m<=input_size; m++) {
+ if (n <= window_size) {
+ Y[y] += H_arr[n-m+1]*X_arr[y,m]
+ continue
+ }
+ if ((n > window_size) && (n <= input_size)) {
+ Y[y] += H_arr[n-m+1]*X_arr[y,m]
+ continue
+ }
+ if ((n > window_size) && (n > input_size)) {
+ Y[y] += H_arr[n-m+1]*X_arr[y,m]
+ continue
+ }
+ else {
+ Y[y] += 0
+ continue
+ }
+ }
+ }
+ printf(X_arr[y,input_size] OFS Y[y])
+ }
+
+ if (y < nf_max)
+ printf(OFS)
+ else
+ printf(ORS)
+
+ }
+
+
+
+}
+
+END {
+ ### rows
+ for (x=1; x<=window_size; x++) {
+ ### columns
+ for (y=1; y<=nf_max; y++) {
+ # rotate input buffer
+ for (n=1; n<=input_size; n++) {
+ X_arr[y,n] = X_arr[y,n+1]
+ #print X_arr[y,n]
+ }
+ #delete X_arr[y,input_size]
+ #input_size = length(X_arr)
+ #print length(X_arr)
+
+ Y[y] = 0
+ for (n=1; n<=window_size; n++) {
+ for (m=1; m<=input_size; m++) {
+ if (n <= window_size) {
+ Y[y] += H_arr[n-m+1]*X_arr[y,m]
+ continue
+ }
+ if ((n > window_size) && (n <= input_size)) {
+ Y[y] += H_arr[n-m+1]*X_arr[y,m]
+ continue
+ }
+ if ((n > window_size) && (n > input_size)) {
+ Y[y] += H_arr[n-m+1]*X_arr[y,m]
+ continue
+ }
+ else {
+ Y[y] += 0
+ continue
+ }
+ }
+ }
+ printf(X_arr[y,input_size] OFS Y[y])
+ if (y < nf_max)
+ printf(OFS)
+ else
+ printf(ORS)
+ X_arr[input_size] = 0
+ }
+ }
+}
@@ -4,6 +4,7 @@ # calculate mean average
BEGIN {
+ OFS = FS
sign = "[+-±]?"
decimal = "[0-9]+[.]?[0-9]*"
fraction = "[.][0-9]*"
diff --git a/mean_avg.awk b/mean_avg.awk index aeee3db..8a5a3c5 100644 --- a/mean_avg.awk +++ b/mean_avg.awk @@ -4,6 +4,7 @@ # average columns of numerical data BEGIN { + OFS = FS #sign = "[+-]?" #decimal = "[0-9]+[.]?[0-9]*" #fraction = "[.][0-9]*" diff --git a/quad_reg.awk b/quad_reg.awk index 8939947..1b30afd 100644 --- a/quad_reg.awk +++ b/quad_reg.awk @@ -1,9 +1,10 @@ #!/usr/bin/awk -f -### lin_reg2.awk +### quad_reg.awk # simple linear regression between columns BEGIN { + OFMT="%.9g" sign = "[+-]?" decimal = "[0-9]+[.]?[0-9]*" fraction = "[.][0-9]*" @@ -28,7 +29,10 @@ NF > 0 { count[y] += 1 sum[y] += $y sum2[y] += $y*$y + sum3[y] += $y*$y*$y + sum4[y] += $y*$y*$y*$y mean[y] = sum[y]/count[y] + mean2[y] = sum2[y]/count[y] ### difference from the mean delta[y] = $y - mean[y] @@ -38,38 +42,39 @@ NF > 0 { ### sample variance (count[y] > 1) ? var[y] = sum_delta2[y]/(count[y] - 1) : var[y] = "" - # x = row, y = col + # x = row, y = col, trendline: y = A + Bx + Cx^2 for (x=1; x<=nf_max; x++) { count[x,y] += 1 sum_xy[x,y] += $x*$y + sum_x2y[x,y] += $x*$x*$y sum_delta_xy[x,y] += delta[x]*delta[y] - # correlation - r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y]) - (r_den[x,y]) ? r[x,y] = sum_delta_xy[x,y]/r_den[x,y] : r[x,y] = 1 + # covariances + if (count[x,y] > 1) { + s_xx[x,y] = sum2[x]/(count[x,y]) - mean[x]*mean[x] + s_xy[x,y] = sum_xy[x,y]/(count[x,y]) - mean[x]*mean[y] + s_xx2[x,y] = sum3[x]/(count[x,y]) - mean[x]*mean2[x] + s_x2x2[x,y] = sum4[x]/(count[x,y]) - mean2[x]*mean2[x] + s_x2y[x,y] = sum_x2y[x]/(count[x,y]) - mean2[x]*mean[y] + } - ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x]) - if (ab_den[x,y]) { - a[x,y] = (sum[y]*sum2[x] - sum[x]*sum_xy[x,y])/ab_den[x,y] - b[x,y] = (count[x,y]*sum_xy[x,y] - sum[x]*sum[y])/ab_den[x,y] + bc_den[x,y] = (s_xx[x,y]*s_x2x2[x,y] - s_xx2[x,y]*s_xx2[x,y]) + if (bc_den[x,y]) { + c[x,y] = (s_x2y[x,y]*s_xx[x,y] - s_xy[x,y]*s_xx2[x,y])/bc_den[x,y] + b[x,y] = (s_xy[x,y]*s_x2x2[x,y] - s_x2y[x,y]*s_xx2[x,y])/bc_den[x,y] } else { - a[x,y] = 0 - b[x,y] = 1 + c[x,y] = 0 + b[x,y] = 0 } + a[x,y] = mean[y] - b[x,y]*mean[x] - c[x,y]*mean[x]*mean[x] - ### error estimate - err_den[x,y] = count[x,y]*(count[x,y] - 2) - if (count[x,y] > 2) { - err[x,y] = $y - (a[x,y] + b[x,y]*$x) - sum_err2[x,y] += err[x,y]*err[x,y] - } - b_err_den[x,y] = (count[x,y] - 2)*sum_delta2[x] - if (b_err_den[x,y]) - b_err[x,y] = sqrt(sum_err2[x,y]/b_err_den[x,y]) - a_err_den[x,y] = count[x,y]*b_err_den[x,y] - if (a_err_den[x,y]) - a_err[x,y] = sqrt(sum2[x]/count[x,y])*b_err[x,y] + # error estimate + err[x,y] = ($y - (a[x,y] + b[x,y]*$x + c[x,y]*$x*$x)) + sum_err2[x,y] += err[x,y]*err[x,y] + + # correlation + sum_delta2[y] ? r2[x,y] = sum_err2[x,y]/sum_delta2[y] : r2[x,y] = 1 } } else @@ -78,12 +83,11 @@ NF > 0 { } END { - for (y=1; y<=nf_max; y++) { - for (x=1; x<=nf_max; x++) { - if (x != y && r[x,y]) { - printf(OFMT OFS "(%s)" OFS " = (" OFMT " +/- " OFMT ")(%s)" OFS " + (" OFMT " +/- " OFMT ")" ORS, - 10.0*log(r[x,y]*r[x,y])/log(10), header[y], b[x,y], b_err[x,y], header[x], - a[x,y], a_err[x,y]) + for (x=1; x<=nf_max; x++) { + for (y=1; y<=nf_max; y++) { + if (x != y && r2[x,y]) { + printf(OFMT OFS "(%s)" OFS " = (" OFMT ")(%s)^2" OFS " + (" OFMT ")(%s)" OFS " + (" OFMT ")" ORS, + 10.0*log(r2[x,y])/log(10), header[y], c[x,y], header[x], b[x,y], header[x], a[x,y]) } } } @@ -5,11 +5,13 @@ # output: sum of each column # missing entries are treated as zeros +BEGIN { OFS = FS } + { - for (i=1; i<=NF; i++) - sum[i] += $i if (NF > nf_max) nf_max = NF + for (i=1; i<=NF; i++) + sum[i] += $i } END { @@ -18,3 +20,4 @@ END { printf((i < nf_max) ? OFS : ORS) } } + @@ -3,7 +3,9 @@ ### sum2.awk, print column sums # check that each line has the same number of fields as line one -NR==1 { nf_max = NF } +BEGIN { OFS = FS } + +NR == 1 { nf_max = NF } { for (i=1; i<=NF; i++) @@ -13,6 +15,6 @@ NR==1 { nf_max = NF } } END { - for (i=1; i<=NF; i++) + for (i=1; i<=nf_max; i++) printf(OFMT "%s", sum[i], i < nf_max ? OFS : ORS) } @@ -11,7 +11,9 @@ function isnum(n) { } -NR==1 { +BEGIN { OFS = FS } + +NR == 1 { nfld = NF for (i=1; i<=NF; i++) numcol[i] = isnum($i) @@ -4,6 +4,7 @@ # input: rows of integers and strings # output: sums of numeric columns + function isnum(n) { sign = "[+-]?" decimal = "[0-9]+[.]?[0-9]*" @@ -13,24 +14,33 @@ function isnum(n) { return n ~ number } -NR==1 { + +BEGIN { OFS = FS } + +NR == 1 { nf_max = NF - for (i=1; i<=NF; i++) { - (!isnum($i)) ? header[i] = $i : header[i] = "col" i - } + for (i=1; i<=NF; i++) + isnum($i) ? header[i] = "col" i : header[i] = $i } { + if (NF > nf_max) + nf_max = NF for (i=1; i<=NF; i++) { - sum[i] += $i - count[i]++ + if ($i == header[i]) + continue + if (isnum($i)) { + count[i]++ + sum[i] += $i + } } } END { for (i=1; i<=nf_max; i++) { - if (header[i]) - printf("%s:" OFS, header[i]) - printf(OFMT ORS, sum[i]) + printf((header[i]) ? header[i] OFS : OFS) + printf((count[i]) ? count[i] OFS sum[i] : OFS) + printf(ORS) } } + |
