summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorwukong <wukong@longaeva>2018-09-09 23:48:31 -0700
committerwukong <wukong@longaeva>2018-09-09 23:48:31 -0700
commitd1f6c89be163d9399d569e01458242d8ce15e041 (patch)
treea263822af5cb1532f9eb1abe7b7ae6334c27d170
parente42cee748f5bc38d11742739b5e2cad4b6a07c43 (diff)
added summations to quad_reg
added in-progress lpf.awk (low pass filter), an adaptation of convolution script (conv.awk) to use delmitied columns as input additional tweaking of OFMT, OFS, and conditional print statements
-rw-r--r--ABOUT.TXT5
-rw-r--r--diff.awk1
-rw-r--r--lpf.awk125
-rw-r--r--mean.awk1
-rw-r--r--mean_avg.awk1
-rw-r--r--quad_reg.awk62
-rw-r--r--sum1.awk7
-rw-r--r--sum2.awk6
-rw-r--r--sum3.awk4
-rw-r--r--sum4.awk28
10 files changed, 194 insertions, 46 deletions
diff --git a/ABOUT.TXT b/ABOUT.TXT
index f471f31..b16ad44 100644
--- a/ABOUT.TXT
+++ b/ABOUT.TXT
@@ -1,3 +1,2 @@
-repo: awk
-
-desc: experiments in awk, etc.
+repo: awk;
+desc: experiments in awk, etc.;
diff --git a/diff.awk b/diff.awk
index c90a0e0..015669a 100644
--- a/diff.awk
+++ b/diff.awk
@@ -4,6 +4,7 @@
# numerical diff along columns
BEGIN {
+ OFS = FS
sign = "[+-]?"
decimal = "[0-9]+[.]?[0-9]*"
fraction = "[.][0-9]*"
diff --git a/lpf.awk b/lpf.awk
new file mode 100644
index 0000000..814020f
--- /dev/null
+++ b/lpf.awk
@@ -0,0 +1,125 @@
+#!/usr/bin/awk -f
+
+### lpf.awk
+# Low Pass Filter with Hardcoded FIR Window
+
+BEGIN {
+ OFS = FS
+ sign = "[+-]?"
+ decimal = "[0-9]+[.]?[0-9]*"
+ fraction = "[.][0-9]*"
+ exponent = "([Ee]" sign "[0-9]+)?"
+ number = "^" sign "(" decimal "|" fraction ")" exponent "$"
+
+ #H = ARGV[1]
+ #H = 1.0
+ #H = "1.00 1.00 1.00" # rect
+ #H = "0.25 0.50 0.25" # von Hann
+ H = "0.23 0.54 0.23" # Hamming
+ window_size = split(H, H_arr, "[ ]*")
+}
+
+NR == 1 {
+ for (y=1; y<=NF; y++)
+ ($y ~ number) ? header[y] = "col" y : header[y] = $y
+}
+
+NF > 0 {
+ if (NF > nf_max)
+ nf_max = NF
+
+ input_size = window_size
+ output_size = (input_size + window_size - 1)
+
+ ### columns
+ for (y=1; y<=nf_max; y++) {
+ if ($y == header[y])
+ printf(header[y] OFS header[y] "_lpf")
+ if ($y ~ number) {
+ count[y]++
+
+ # rotate input buffer
+ for (n=1; n<=input_size; n++) {
+ X_arr[y,n] = X_arr[y,n+1]
+ }
+ X_arr[y,input_size] = $y
+
+ Y[y] = 0
+ for (n=1; n<=window_size; n++) {
+ for (m=1; m<=input_size; m++) {
+ if (n <= window_size) {
+ Y[y] += H_arr[n-m+1]*X_arr[y,m]
+ continue
+ }
+ if ((n > window_size) && (n <= input_size)) {
+ Y[y] += H_arr[n-m+1]*X_arr[y,m]
+ continue
+ }
+ if ((n > window_size) && (n > input_size)) {
+ Y[y] += H_arr[n-m+1]*X_arr[y,m]
+ continue
+ }
+ else {
+ Y[y] += 0
+ continue
+ }
+ }
+ }
+ printf(X_arr[y,input_size] OFS Y[y])
+ }
+
+ if (y < nf_max)
+ printf(OFS)
+ else
+ printf(ORS)
+
+ }
+
+
+
+}
+
+END {
+ ### rows
+ for (x=1; x<=window_size; x++) {
+ ### columns
+ for (y=1; y<=nf_max; y++) {
+ # rotate input buffer
+ for (n=1; n<=input_size; n++) {
+ X_arr[y,n] = X_arr[y,n+1]
+ #print X_arr[y,n]
+ }
+ #delete X_arr[y,input_size]
+ #input_size = length(X_arr)
+ #print length(X_arr)
+
+ Y[y] = 0
+ for (n=1; n<=window_size; n++) {
+ for (m=1; m<=input_size; m++) {
+ if (n <= window_size) {
+ Y[y] += H_arr[n-m+1]*X_arr[y,m]
+ continue
+ }
+ if ((n > window_size) && (n <= input_size)) {
+ Y[y] += H_arr[n-m+1]*X_arr[y,m]
+ continue
+ }
+ if ((n > window_size) && (n > input_size)) {
+ Y[y] += H_arr[n-m+1]*X_arr[y,m]
+ continue
+ }
+ else {
+ Y[y] += 0
+ continue
+ }
+ }
+ }
+ printf(X_arr[y,input_size] OFS Y[y])
+ if (y < nf_max)
+ printf(OFS)
+ else
+ printf(ORS)
+ X_arr[input_size] = 0
+ }
+ }
+}
diff --git a/mean.awk b/mean.awk
index aa5ec8e..b509473 100644
--- a/mean.awk
+++ b/mean.awk
@@ -4,6 +4,7 @@
# calculate mean average
BEGIN {
+ OFS = FS
sign = "[+-±]?"
decimal = "[0-9]+[.]?[0-9]*"
fraction = "[.][0-9]*"
diff --git a/mean_avg.awk b/mean_avg.awk
index aeee3db..8a5a3c5 100644
--- a/mean_avg.awk
+++ b/mean_avg.awk
@@ -4,6 +4,7 @@
# average columns of numerical data
BEGIN {
+ OFS = FS
#sign = "[+-]?"
#decimal = "[0-9]+[.]?[0-9]*"
#fraction = "[.][0-9]*"
diff --git a/quad_reg.awk b/quad_reg.awk
index 8939947..1b30afd 100644
--- a/quad_reg.awk
+++ b/quad_reg.awk
@@ -1,9 +1,10 @@
#!/usr/bin/awk -f
-### lin_reg2.awk
+### quad_reg.awk
# simple linear regression between columns
BEGIN {
+ OFMT="%.9g"
sign = "[+-]?"
decimal = "[0-9]+[.]?[0-9]*"
fraction = "[.][0-9]*"
@@ -28,7 +29,10 @@ NF > 0 {
count[y] += 1
sum[y] += $y
sum2[y] += $y*$y
+ sum3[y] += $y*$y*$y
+ sum4[y] += $y*$y*$y*$y
mean[y] = sum[y]/count[y]
+ mean2[y] = sum2[y]/count[y]
### difference from the mean
delta[y] = $y - mean[y]
@@ -38,38 +42,39 @@ NF > 0 {
### sample variance
(count[y] > 1) ? var[y] = sum_delta2[y]/(count[y] - 1) : var[y] = ""
- # x = row, y = col
+ # x = row, y = col, trendline: y = A + Bx + Cx^2
for (x=1; x<=nf_max; x++) {
count[x,y] += 1
sum_xy[x,y] += $x*$y
+ sum_x2y[x,y] += $x*$x*$y
sum_delta_xy[x,y] += delta[x]*delta[y]
- # correlation
- r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y])
- (r_den[x,y]) ? r[x,y] = sum_delta_xy[x,y]/r_den[x,y] : r[x,y] = 1
+ # covariances
+ if (count[x,y] > 1) {
+ s_xx[x,y] = sum2[x]/(count[x,y]) - mean[x]*mean[x]
+ s_xy[x,y] = sum_xy[x,y]/(count[x,y]) - mean[x]*mean[y]
+ s_xx2[x,y] = sum3[x]/(count[x,y]) - mean[x]*mean2[x]
+ s_x2x2[x,y] = sum4[x]/(count[x,y]) - mean2[x]*mean2[x]
+ s_x2y[x,y] = sum_x2y[x]/(count[x,y]) - mean2[x]*mean[y]
+ }
- ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x])
- if (ab_den[x,y]) {
- a[x,y] = (sum[y]*sum2[x] - sum[x]*sum_xy[x,y])/ab_den[x,y]
- b[x,y] = (count[x,y]*sum_xy[x,y] - sum[x]*sum[y])/ab_den[x,y]
+ bc_den[x,y] = (s_xx[x,y]*s_x2x2[x,y] - s_xx2[x,y]*s_xx2[x,y])
+ if (bc_den[x,y]) {
+ c[x,y] = (s_x2y[x,y]*s_xx[x,y] - s_xy[x,y]*s_xx2[x,y])/bc_den[x,y]
+ b[x,y] = (s_xy[x,y]*s_x2x2[x,y] - s_x2y[x,y]*s_xx2[x,y])/bc_den[x,y]
}
else {
- a[x,y] = 0
- b[x,y] = 1
+ c[x,y] = 0
+ b[x,y] = 0
}
+ a[x,y] = mean[y] - b[x,y]*mean[x] - c[x,y]*mean[x]*mean[x]
- ### error estimate
- err_den[x,y] = count[x,y]*(count[x,y] - 2)
- if (count[x,y] > 2) {
- err[x,y] = $y - (a[x,y] + b[x,y]*$x)
- sum_err2[x,y] += err[x,y]*err[x,y]
- }
- b_err_den[x,y] = (count[x,y] - 2)*sum_delta2[x]
- if (b_err_den[x,y])
- b_err[x,y] = sqrt(sum_err2[x,y]/b_err_den[x,y])
- a_err_den[x,y] = count[x,y]*b_err_den[x,y]
- if (a_err_den[x,y])
- a_err[x,y] = sqrt(sum2[x]/count[x,y])*b_err[x,y]
+ # error estimate
+ err[x,y] = ($y - (a[x,y] + b[x,y]*$x + c[x,y]*$x*$x))
+ sum_err2[x,y] += err[x,y]*err[x,y]
+
+ # correlation
+ sum_delta2[y] ? r2[x,y] = sum_err2[x,y]/sum_delta2[y] : r2[x,y] = 1
}
}
else
@@ -78,12 +83,11 @@ NF > 0 {
}
END {
- for (y=1; y<=nf_max; y++) {
- for (x=1; x<=nf_max; x++) {
- if (x != y && r[x,y]) {
- printf(OFMT OFS "(%s)" OFS " = (" OFMT " +/- " OFMT ")(%s)" OFS " + (" OFMT " +/- " OFMT ")" ORS,
- 10.0*log(r[x,y]*r[x,y])/log(10), header[y], b[x,y], b_err[x,y], header[x],
- a[x,y], a_err[x,y])
+ for (x=1; x<=nf_max; x++) {
+ for (y=1; y<=nf_max; y++) {
+ if (x != y && r2[x,y]) {
+ printf(OFMT OFS "(%s)" OFS " = (" OFMT ")(%s)^2" OFS " + (" OFMT ")(%s)" OFS " + (" OFMT ")" ORS,
+ 10.0*log(r2[x,y])/log(10), header[y], c[x,y], header[x], b[x,y], header[x], a[x,y])
}
}
}
diff --git a/sum1.awk b/sum1.awk
index 61d96e0..8f1419b 100644
--- a/sum1.awk
+++ b/sum1.awk
@@ -5,11 +5,13 @@
# output: sum of each column
# missing entries are treated as zeros
+BEGIN { OFS = FS }
+
{
- for (i=1; i<=NF; i++)
- sum[i] += $i
if (NF > nf_max)
nf_max = NF
+ for (i=1; i<=NF; i++)
+ sum[i] += $i
}
END {
@@ -18,3 +20,4 @@ END {
printf((i < nf_max) ? OFS : ORS)
}
}
+
diff --git a/sum2.awk b/sum2.awk
index 979d133..694c047 100644
--- a/sum2.awk
+++ b/sum2.awk
@@ -3,7 +3,9 @@
### sum2.awk, print column sums
# check that each line has the same number of fields as line one
-NR==1 { nf_max = NF }
+BEGIN { OFS = FS }
+
+NR == 1 { nf_max = NF }
{
for (i=1; i<=NF; i++)
@@ -13,6 +15,6 @@ NR==1 { nf_max = NF }
}
END {
- for (i=1; i<=NF; i++)
+ for (i=1; i<=nf_max; i++)
printf(OFMT "%s", sum[i], i < nf_max ? OFS : ORS)
}
diff --git a/sum3.awk b/sum3.awk
index bca92e3..3e4661f 100644
--- a/sum3.awk
+++ b/sum3.awk
@@ -11,7 +11,9 @@ function isnum(n) {
}
-NR==1 {
+BEGIN { OFS = FS }
+
+NR == 1 {
nfld = NF
for (i=1; i<=NF; i++)
numcol[i] = isnum($i)
diff --git a/sum4.awk b/sum4.awk
index 6a06455..aa3f044 100644
--- a/sum4.awk
+++ b/sum4.awk
@@ -4,6 +4,7 @@
# input: rows of integers and strings
# output: sums of numeric columns
+
function isnum(n) {
sign = "[+-]?"
decimal = "[0-9]+[.]?[0-9]*"
@@ -13,24 +14,33 @@ function isnum(n) {
return n ~ number
}
-NR==1 {
+
+BEGIN { OFS = FS }
+
+NR == 1 {
nf_max = NF
- for (i=1; i<=NF; i++) {
- (!isnum($i)) ? header[i] = $i : header[i] = "col" i
- }
+ for (i=1; i<=NF; i++)
+ isnum($i) ? header[i] = "col" i : header[i] = $i
}
{
+ if (NF > nf_max)
+ nf_max = NF
for (i=1; i<=NF; i++) {
- sum[i] += $i
- count[i]++
+ if ($i == header[i])
+ continue
+ if (isnum($i)) {
+ count[i]++
+ sum[i] += $i
+ }
}
}
END {
for (i=1; i<=nf_max; i++) {
- if (header[i])
- printf("%s:" OFS, header[i])
- printf(OFMT ORS, sum[i])
+ printf((header[i]) ? header[i] OFS : OFS)
+ printf((count[i]) ? count[i] OFS sum[i] : OFS)
+ printf(ORS)
}
}
+