summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ABOUT.TXT5
-rw-r--r--LICENSE.TXT29
-rw-r--r--conv.awk36
-rw-r--r--diff.awk37
-rw-r--r--example.csv11
-rw-r--r--example_4-61.csv5
-rw-r--r--hamming.awk29
-rw-r--r--hamming.sh30
-rw-r--r--kalman.awk114
-rw-r--r--kalman_init.awk97
-rw-r--r--lin_reg.awk101
-rw-r--r--lin_reg.py50
-rw-r--r--lin_reg1.awk80
-rw-r--r--lin_reg2.awk101
-rw-r--r--lin_reg_example.py44
-rw-r--r--mean.awk13
-rw-r--r--mean_avg.awk62
-rw-r--r--pi.awk24
-rw-r--r--pi.sh24
-rw-r--r--sum1.awk21
-rw-r--r--sum2.awk16
-rw-r--r--sum3.awk31
-rw-r--r--sum4.awk37
-rw-r--r--table_4-3.csv11
24 files changed, 1008 insertions, 0 deletions
diff --git a/ABOUT.TXT b/ABOUT.TXT
new file mode 100644
index 0000000..9ecb91e
--- /dev/null
+++ b/ABOUT.TXT
@@ -0,0 +1,5 @@
+
+ repo: awk
+
+ desc: experiments in awk, etc.
+
diff --git a/LICENSE.TXT b/LICENSE.TXT
new file mode 100644
index 0000000..5817b3d
--- /dev/null
+++ b/LICENSE.TXT
@@ -0,0 +1,29 @@
+
+ Copyright (c) 2016-2018 <0x09c5>. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the copyright holder nor the names of its
+ contributors may be used to endorse or promote products derived from this
+ software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/conv.awk b/conv.awk
new file mode 100644
index 0000000..10c99c7
--- /dev/null
+++ b/conv.awk
@@ -0,0 +1,36 @@
+### conv.awk
+# [PoC] linear convolution (with hardcoded IR window).
+
+
+BEGIN {
+ X = ARGV[1]
+ input_size = split(X, X_arr)
+ #H = "1.0 1.0 1.0" # rect
+ #H = "0.25 0.50 0.25" # von Hann
+ H = "0.23 0.54 0.23" # Hamming
+ window_size = split(H, H_arr)
+ output_size = (input_size + window_size - 1)
+ for (n=1; n <= output_size; n++) {
+ Y_arr[n] = 0
+ for (m=1; m <= input_size; m++) {
+ if (n <= window_size) {
+ Y_arr[n] = Y_arr[n] + H_arr[n-m+1]*X_arr[m]
+ continue
+ }
+ if ((n > window_size) && (n <= input_size)) {
+ Y_arr[n] = Y_arr[n] + H_arr[n-m+1]*X_arr[m]
+ continue
+ }
+ if ((n > window_size) && (n > input_size)) {
+ Y_arr[n] = Y_arr[n] + H_arr[n-m+1]*X_arr[m]
+ continue
+ }
+ else {
+ Y_arr[n] = Y_arr[n] + 0
+ continue
+ }
+ }
+ print Y_arr[n]
+ }
+}
+
diff --git a/diff.awk b/diff.awk
new file mode 100644
index 0000000..3923f8b
--- /dev/null
+++ b/diff.awk
@@ -0,0 +1,37 @@
+
+BEGIN {
+ sign = "[+-]?"
+ decimal = "[0-9]+[.]?[0-9]*"
+ fraction = "[.][0-9]*"
+ exponent = "([Ee]" sign "[0-9]+)?"
+ number = "^" sign "(" decimal "|" fraction ")" exponent "$"
+}
+
+NR == 1 {
+ header_nf = NF
+ for (n=1; n<=NF; n++) {
+ if ($n !~ number)
+ header[n] = $n
+ else
+ header[n] = n
+ }
+}
+
+NF != 0 {
+ printf("\n%s: %s\n", NR, $0)
+ if (NF > max_nf)
+ max_nf = NF
+
+ ### iterate over columns
+ for (y=1; y<=max_nf; y++) {
+ if ($y ~ number) {
+ count[y] += 1
+ data[y] = $y
+ sum[y] += $y
+ mean[y] = data[y]
+ diff[y] = data[y] - data_prev[y]
+ print header[y], data[y], sum[y], diff[y]
+ data_prev[y] = data[y]
+ }
+ }
+}
diff --git a/example.csv b/example.csv
new file mode 100644
index 0000000..d325f03
--- /dev/null
+++ b/example.csv
@@ -0,0 +1,11 @@
+correct,attitude
+17,94
+13,73
+12,59
+15,80
+16,93
+14,85
+16,66
+16,79
+18,77
+19,91
diff --git a/example_4-61.csv b/example_4-61.csv
new file mode 100644
index 0000000..b3e1e10
--- /dev/null
+++ b/example_4-61.csv
@@ -0,0 +1,5 @@
+V,Hrs
+105,1200
+110,1000
+115,920
+120,750
diff --git a/hamming.awk b/hamming.awk
new file mode 100644
index 0000000..30140a5
--- /dev/null
+++ b/hamming.awk
@@ -0,0 +1,29 @@
+### hamming.awk
+# generate a Hamming window
+# https://en.wikipedia.org/wiki/Window_function provides a few values for the
+# 'a0' and 'a1' parameters of the raised cosine.
+
+a1EGIN {
+
+ N = ARGV[1]
+
+ pi = 4*(4*atan2(1,5) - atan2(1,239))
+ #a0 = (25.0/46.0)
+ #a1 = (21.0/46.0)
+
+ ### optimal values for equal-ripple
+ a0 = 0.53836
+ a1 = 0.46164
+
+ ### R.W. Hamming, "Digital Filters"
+ # H = "0.23 0.54 0.23"
+
+ for (n=0; n<N; n++) {
+ if (N > 1) {
+ w[n] = 0.5*a0 - 0.5*a1*cos((2*pi*n)/(N - 1))
+ sum_w += w[n]
+ }
+ printf("%f %f\n", w[n], w[n]/sum_w)
+ }
+
+}
diff --git a/hamming.sh b/hamming.sh
new file mode 100644
index 0000000..fdd6ba6
--- /dev/null
+++ b/hamming.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+### hamming.sh
+# Convolve input vector with 3-element Hamming window [0.23, 0.54, 0.23]
+# note: bash built-in $(( )) constructs only support integer arithmetic
+# floating point operation will require another language such as awk, perl, or
+# python.
+
+### input vector as array
+X=($1)
+echo ${#X[@]}
+
+### Hamming window vector
+H=(0.23 0.54 0.23)
+echo ${#H[@]}
+
+### length of output
+L=$(( ${#X[@]} + ${#H[@]} - 1 ))
+
+### iterate over both vectors
+for N in $( seq 0 1 $L ) ; do
+ for M in $( seq 0 1 $(( ${#H[@]} - 1 )) ) ; do
+ if [ $(( $N >= $M )) ] ; then
+ printf "N=$N\tM=$M\tN-M=$(( $N - $M ))\n"
+ else
+ continue
+ fi
+ done
+done
+
diff --git a/kalman.awk b/kalman.awk
new file mode 100644
index 0000000..9eb6f79
--- /dev/null
+++ b/kalman.awk
@@ -0,0 +1,114 @@
+### lin_reg.awk
+# simple linear regression between columns
+
+BEGIN {
+ sign = "[+-]?"
+ decimal = "[0-9]+[.]?[0-9]*"
+ fraction = "[.][0-9]*"
+ exponent = "([Ee]" sign "[0-9]+)?"
+ number = "^" sign "(" decimal "|" fraction ")" exponent "$"
+}
+
+NR == 1 {
+ header_nf = NF
+ for (n=1; n<=NF; n++) {
+ if ($n !~ number)
+ header[n] = $n
+ else
+ header[n] = n
+ }
+}
+
+NF != 0 {
+ #printf("\n%s: %s", NR, $0)
+ if (NF > max_nf)
+ max_nf = NF
+
+ ### iterate over columns
+ for (y=1; y<= NF; y++) {
+ if ($y ~ number) {
+
+ ### mean
+ count[y] += 1
+ sum[y] += $y
+ sum2[y] += $y*$y
+ mean[y] = sum[y]/count[y]
+
+ ### difference from the mean
+ delta[y] = $y - mean[y]
+ sum_delta[y] += delta[y]
+ sum_delta2[y] += delta[y]*delta[y]
+
+ ### sample variance
+ if (count[y] - 1)
+ var[y] = sum_delta2[y]/(count[y] - 1)
+ else
+ var[y] = 0
+
+ # x = row, y = col
+ for (x=1; x<=max_nf; x++) {
+ count[x,y] += 1
+ sum_xy[x,y] += $x*$y
+ sum_delta_xy[x,y] += delta[x]*delta[y]
+
+ # covariance
+ if (count[x,y] > 1) {
+ KG = 1.0
+ meas_cov[x,y] = sum_delta_xy[x,y]/(count[x,y] - 1)
+ est_err_cov[x,y] = sum_delta_xy[x,y]/(count[x,y] - 1)
+ }
+ else {
+ meas_cov[x,y] = 0.0
+ est_err_cov[x,y] = (1.0 - KG)*est_err_cov_last[x,y]
+ }
+
+ # correlation
+ r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y])
+ if (r_den[x,y])
+ r[x,y] = sum_delta_xy[x,y]/r_den[x,y]
+ else
+ r[x,y] = 1
+
+ ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x])
+ if (ab_den[x,y]) {
+ a[x,y] = (sum[y]*sum2[x] - sum[x]*sum_xy[x,y])/ab_den[x,y]
+ b[x,y] = (count[x,y]*sum_xy[x,y] - sum[x]*sum[y])/ab_den[x,y]
+ }
+ else {
+ a[x,y] = 0
+ b[x,y] = 1
+ }
+
+ ### error estimate
+ err_den[x,y] = count[x,y]*(count[x,y] - 2)
+ if (count[x,y] > 2) {
+ err[x,y] = $y - (a[x,y] + b[x,y]*$x)
+ sum_err2[x,y] += err[x,y]*err[x,y]
+ }
+ b_err_den[x,y] = (count[x,y] - 2)*sum_delta2[x]
+ if (b_err_den[x,y])
+ b_err[x,y] = sqrt(sum_err2[x,y]/b_err_den[x,y])
+ a_err_den[x,y] = count[x,y]*b_err_den[x,y]
+ if (a_err_den[x,y])
+ a_err[x,y] = sqrt(sum2[x]/count[x,y])*b_err[x,y]
+
+ est_err_cov_last[x,y] = est_err_cov[x,y]
+ }
+ }
+ else
+ continue
+ }
+}
+
+END {
+ for (y=1; y<=max_nf; y++) {
+ for (x=1; x<=max_nf; x++) {
+ if (x != y && r[x,y]) {
+ printf("\n %g %g \t (%s) \t = (%g +/- %g)(%s) \t + (%g +/- %g)",
+ meas_cov[x,y], est_err_cov[x,y], header[y], b[x,y], b_err[x,y], header[x],
+ a[x,y], a_err[x,y])
+ }
+ }
+ }
+}
+
diff --git a/kalman_init.awk b/kalman_init.awk
new file mode 100644
index 0000000..a63a864
--- /dev/null
+++ b/kalman_init.awk
@@ -0,0 +1,97 @@
+#!/usr/bin/awk
+
+### kalman.awk
+# kalman csv experiemnt
+
+BEGIN {
+ sign = "[+-]?"
+ decimal = "[0-9]+[.]?[0-9]*"
+ fraction = "[.][0-9]*"
+ exponent = "([Ee]" sign "[0-9]+)?"
+ number = "^" sign "(" decimal "|" fraction ")" exponent "$"
+}
+
+NR == 1 {
+ header_nf = NF
+ for (n=1; n<=NF; n++) {
+ if ($n !~ number)
+ header[n] = $n
+ }
+}
+
+NF != 0 {
+ #print "\n"
+ print "\n" NR ": \t" $0
+ if (NF > max_nf)
+ max_nf = NF
+
+ for (n=1; n <= NF; n++) {
+ if ($n ~ number) {
+ count[n] = ($n ~ number) + (last[n] ~ number) + (last2[n] ~ number)
+ diff[n] = $n - last[n]
+ diff2[n] = 0.5*($n - 2.0*last[n] + last2[n])
+ sum[n] = $n + last[n] + last2[n]
+ if (count[n])
+ mean[n] = sum[n]/count[n]
+ else
+ mean[n] = sum[n]
+
+ delta[n] = $n - mean[n]
+ delta2[n] = delta[n]*delta[n]
+ sum_delta[n] = delta[n] + delta_last[n] + delta_last2[n]
+ sum_delta2[n] = delta2[n] + delta2_last[n] + delta2_last2[n]
+ if (count[n] > 1)
+ var[n] = sum_delta2[n]/(count[n] - 1)
+ else
+ var[n] = 0
+
+ if (count[n]) {
+ meas_err[n] = sqrt(var[n]/count[n])
+ }
+
+ ### error covariance
+ for (m=1; m<=max_nf; m++) {
+ count[m,n]++
+ sum_xy[m,n] += 1
+ delta_xy[m,n] = delta[m]*delta[n]
+ if (count[m,n] > 1)
+ meas_cov[m,n] = delta[m]*delta[n]/(count[m,n] - 1)
+ else
+ meas_cov[m,n] = 0
+ est_err_last_cov[m,n] = 1
+ print meas_cov[m,n], est_err_last_cov[m,n]
+ }
+
+ ### kalman gain
+ if (count[n] <= 1) {
+ KG[n] = 1
+ est_err[n] = sqrt(diff[n]*diff[n]) + 2.0*sqrt(diff2[n]*diff2[n])
+ est_err_last[n] = sqrt(diff[n]*diff[n]) + 2.0*sqrt(diff2[n]*diff2[n])
+ }
+ else
+ KG[n] = est_err_last[n]/(est_err_last[n] + meas_err[n])
+
+ ### update estimate and estimated error
+ est[n] = est_last[n] + KG[n]*($n - est_last[n])
+ est_err[n] = (1.0 - KG[n])*est_err_last[n]
+
+ ### visual check
+ printf "meas: \t%g %g %g", $n, meas_err[n], count[n]
+ printf "\nd1: \t%g", diff[n]
+ printf "\nd2: \t%g", diff2[n]
+ printf "\nest: \t%g %g %g", est[n], est_err[n], KG[n]
+
+ last2[n] = last[n]
+ last[n] = $n
+ diff_last2[n] = diff_last[n]
+ diff_last[n] = diff[n]
+ delta_last2[n] = delta_last[n]
+ delta_last[n] = delta[n]
+ delta2_last2[n] = delta2_last[n]
+ delta2_last[n] = delta2[n]
+ est_last[n] = est[n]
+ est_error_last[n] = est_error[n]
+ printf "\n"
+ }
+ }
+}
diff --git a/lin_reg.awk b/lin_reg.awk
new file mode 100644
index 0000000..d950f80
--- /dev/null
+++ b/lin_reg.awk
@@ -0,0 +1,101 @@
+### lin_reg.awk
+# simple linear regression between columns
+
+BEGIN {
+ sign = "[+-]?"
+ decimal = "[0-9]+[.]?[0-9]*"
+ fraction = "[.][0-9]*"
+ exponent = "([Ee]" sign "[0-9]+)?"
+ number = "^" sign "(" decimal "|" fraction ")" exponent "$"
+}
+
+NR == 1 {
+ header_nf = NF
+ for (n=1; n<=NF; n++) {
+ if ($n !~ number)
+ header[n] = $n
+ else
+ header[n] = n
+ }
+}
+
+NF != 0 {
+ #printf("\n%s: %s", NR, $0)
+ if (NF > max_nf)
+ max_nf = NF
+
+ ### iterate over columns
+ for (y=1; y<=NF; y++) {
+ if ($y ~ number) {
+
+ ### mean
+ count[y] += 1
+ sum[y] += $y
+ sum2[y] += $y*$y
+ mean[y] = sum[y]/count[y]
+
+ ### difference from the mean
+ delta[y] = $y - mean[y]
+ sum_delta[y] += delta[y]
+ sum_delta2[y] += delta[y]*delta[y]
+
+ ### sample variance
+ if (count[y] - 1)
+ var[y] = sum_delta2[y]/(count[y] - 1)
+ else
+ var[y] = 0
+
+ # x = row, y = col
+ for (x=1; x<=max_nf; x++) {
+ count[x,y] += 1
+ sum_xy[x,y] += $x*$y
+ sum_delta_xy[x,y] += delta[x]*delta[y]
+
+ # correlation
+ r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y])
+ if (r_den[x,y])
+ r[x,y] = sum_delta_xy[x,y]/r_den[x,y]
+ else
+ r[x,y] = 1
+
+ ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x])
+ if (ab_den[x,y]) {
+ a[x,y] = (sum[y]*sum2[x] - sum[x]*sum_xy[x,y])/ab_den[x,y]
+ b[x,y] = (count[x,y]*sum_xy[x,y] - sum[x]*sum[y])/ab_den[x,y]
+ }
+ else {
+ a[x,y] = 0
+ b[x,y] = 1
+ }
+
+ ### error estimate
+ err_den[x,y] = count[x,y]*(count[x,y] - 2)
+ if (count[x,y] > 2) {
+ err[x,y] = $y - (a[x,y] + b[x,y]*$x)
+ sum_err2[x,y] += err[x,y]*err[x,y]
+ }
+ b_err_den[x,y] = (count[x,y] - 2)*sum_delta2[x]
+ if (b_err_den[x,y])
+ b_err[x,y] = sqrt(sum_err2[x,y]/b_err_den[x,y])
+ a_err_den[x,y] = count[x,y]*b_err_den[x,y]
+ if (a_err_den[x,y])
+ a_err[x,y] = sqrt(sum2[x]/count[x,y])*b_err[x,y]
+ }
+ }
+ else
+ continue
+ }
+}
+
+END {
+ for (y=1; y<=max_nf; y++) {
+ for (x=1; x<=max_nf; x++) {
+ if (x != y && r[x,y]) {
+ printf("\n %g \t (%s) \t = (%g +/- %g)(%s) \t + (%g +/- %g)",
+ 10.0*log(r[x,y]*r[x,y])/log(10), header[y], b[x,y], b_err[x,y], header[x],
+ a[x,y], a_err[x,y])
+ }
+ }
+ }
+}
+
diff --git a/lin_reg.py b/lin_reg.py
new file mode 100644
index 0000000..8fb88b2
--- /dev/null
+++ b/lin_reg.py
@@ -0,0 +1,50 @@
+#!/usr/bin/python
+#/usr/local/bin/python2.7
+
+import numpy as np
+
+
+def main():
+
+ dataz = np.genfromtxt('example.csv', delimiter=',', names=True)
+
+ y = dataz['attitude']
+ x = dataz['correct']
+ print 'x = {} {}'.format(x, np.mean(x))
+ print 'y = {} {}'.format(y, np.mean(y))
+
+ delta_x = x - np.mean(x)
+ delta_y = y - np.mean(y)
+ delta_xy = delta_x*delta_y
+ delta2_x = delta_x**2
+ delta2_y = delta_y**2
+ #print 'delta_x = {} {}'.format(delta_x, np.sum(delta_x))
+ #print 'delta_y = {} {}'.format(delta_y, np.sum(delta_y))
+ #print 'delta_xy = {} {}'.format(delta_xy, np.sum(delta_xy))
+ #print 'delta2_x = {} {}'.format(delta2_x, np.sum(delta2_x))
+ #print 'delta2_y = {} {}'.format(delta2_y, np.sum(delta2_y))
+
+ sx = np.sqrt(np.sum(delta2_x)/(delta2_x.size - 1))
+ sy = np.sqrt(np.sum(delta2_y)/(delta2_y.size - 1))
+ r = np.sum(delta_xy)/np.sqrt(np.sum(delta2_x)*np.sum(delta2_y))
+ #print 'sx = {}'.format(sx)
+ #print 'sy = {}'.format(sy)
+ print 'r = {}'.format(r)
+ print 'r^2 = {}'.format(r**2)
+
+ b = r*(sy/sx)
+ a = np.mean(y) - b*np.mean(x)
+ print 'b = {}'.format(b)
+ print 'a = {}'.format(a)
+
+ err = y - (a + b*x)
+ b_err = np.sqrt(np.sum(err**2.0)/((x.size - 2)*np.sum(delta2_x)))
+ a_err = np.sqrt(np.sum(x**2.0)*np.sum(err**2.0)/(x.size*(x.size - 2)*np.sum(delta2_x)))
+ print 'b_err = {}'.format(b_err)
+ print 'a_err = {}'.format(a_err)
+
+ print ' (attitude) \t= {}(correct) + {}'.format(b, a)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/lin_reg1.awk b/lin_reg1.awk
new file mode 100644
index 0000000..866cab8
--- /dev/null
+++ b/lin_reg1.awk
@@ -0,0 +1,80 @@
+### lin_reg1.awk
+# simple linear regression between individual text columns
+
+BEGIN {
+ sign = "[+-]?"
+ decimal = "[0-9]+[.]?[0-9]*"
+ fraction = "[.][0-9]*"
+ exponent = "([Ee]" sign "[0-9]+)?"
+ number = "^" sign "(" decimal "|" fraction ")" exponent "$"
+}
+
+NR == 1 {
+ header_nf = NF
+ for (n=1; n<=NF; n++) {
+ if ($n !~ number)
+ header[n] = $n
+ else
+ header[n] = sprintf("col_%g", n)
+ }
+}
+
+NF != 0 {
+ if (NF > max_nf)
+ max_nf = NF
+
+ ### iterate over columns
+ for (y=1; y<= NF; y++) {
+ if ($y ~ number) {
+
+ ### mean
+ count[y] += 1
+ sum[y] += $y
+ sum2[y] += $y*$y
+ mean[y] = sum[y]/count[y]
+
+ ### difference from the mean
+ delta[y] = $y - mean[y]
+ sum_delta[y] += delta[y]
+ sum_delta2[y] += delta[y]*delta[y]
+
+ # x = row, y = col
+ for (x=1; x<=max_nf; x++) {
+ count[x,y] += 1
+ sum_xy[x,y] += $x*$y
+ sum_delta_xy[x,y] += delta[x]*delta[y]
+
+ r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y])
+ if (r_den[x,y])
+ r[x,y] = sum_delta_xy[x,y]/r_den[x,y]
+ else
+ r[x,y] = 0
+
+ ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x])
+ if (ab_den[x,y]) {
+ a[x,y] = (sum[y]*sum2[x] - sum[x]*sum_xy[x,y])/ab_den[x,y]
+ b[x,y] = (count[x,y]*sum_xy[x,y] - sum[x]*sum[y])/ab_den[x,y]
+ }
+ else {
+ a[x,y] = 0
+ b[x,y] = 1
+ }
+ }
+ }
+ else
+ continue
+ }
+}
+
+END {
+ for (y=1; y<=max_nf; y++) {
+ for (x=1; x<=max_nf; x++) {
+ if (x != y && r[x,y]) {
+ r2[x,y] = r[x,y]*r[x,y]
+ printf("\n %f \t (%s) \t = %g(%s) \t + %g",
+ 10.0*log(r2[x,y])/log(10), header[y], b[x,y], header[x], a[x,y])
+ }
+ }
+ }
+}
+
diff --git a/lin_reg2.awk b/lin_reg2.awk
new file mode 100644
index 0000000..a66c78f
--- /dev/null
+++ b/lin_reg2.awk
@@ -0,0 +1,101 @@
+### lin_reg.awk
+# simple linear regression between columns
+
+BEGIN {
+ sign = "[+-]?"
+ decimal = "[0-9]+[.]?[0-9]*"
+ fraction = "[.][0-9]*"
+ exponent = "([Ee]" sign "[0-9]+)?"
+ number = "^" sign "(" decimal "|" fraction ")" exponent "$"
+}
+
+NR == 1 {
+ header_nf = NF
+ for (n=1; n<=NF; n++) {
+ if ($n !~ number)
+ header[n] = $n
+ else
+ header[n] = n
+ }
+}
+
+NF != 0 {
+ #printf("\n%s: %s", NR, $0)
+ if (NF > max_nf)
+ max_nf = NF
+
+ ### iterate over columns
+ for (y=1; y<= NF; y++) {
+ if ($y ~ number) {
+
+ ### mean
+ count[y] += 1
+ sum[y] += $y
+ sum2[y] += $y*$y
+ mean[y] = sum[y]/count[y]
+
+ ### difference from the mean
+ delta[y] = $y - mean[y]
+ sum_delta[y] += delta[y]
+ sum_delta2[y] += delta[y]*delta[y]
+
+ ### sample variance
+ if (count[y] - 1)
+ var[y] = sum_delta2[y]/(count[y] - 1)
+ else
+ var[y] = 0
+
+ # x = row, y = col
+ for (x=1; x<=max_nf; x++) {
+ count[x,y] += 1
+ sum_xy[x,y] += $x*$y
+ sum_delta_xy[x,y] += delta[x]*delta[y]
+
+ # correlation
+ r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y])
+ if (r_den[x,y])
+ r[x,y] = sum_delta_xy[x,y]/r_den[x,y]
+ else
+ r[x,y] = 1
+
+ ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x])
+ if (ab_den[x,y]) {
+ a[x,y] = (sum[y]*sum2[x] - sum[x]*sum_xy[x,y])/ab_den[x,y]
+ b[x,y] = (count[x,y]*sum_xy[x,y] - sum[x]*sum[y])/ab_den[x,y]
+ }
+ else {
+ a[x,y] = 0
+ b[x,y] = 1
+ }
+
+ ### error estimate
+ err_den[x,y] = count[x,y]*(count[x,y] - 2)
+ if (count[x,y] > 2) {
+ err[x,y] = $y - (a[x,y] + b[x,y]*$x)
+ sum_err2[x,y] += err[x,y]*err[x,y]
+ }
+ b_err_den[x,y] = (count[x,y] - 2)*sum_delta2[x]
+ if (b_err_den[x,y])
+ b_err[x,y] = sqrt(sum_err2[x,y]/b_err_den[x,y])
+ a_err_den[x,y] = count[x,y]*b_err_den[x,y]
+ if (a_err_den[x,y])
+ a_err[x,y] = sqrt(sum2[x]/count[x,y])*b_err[x,y]
+ }
+ }
+ else
+ continue
+ }
+}
+
+END {
+ for (y=1; y<=max_nf; y++) {
+ for (x=1; x<=max_nf; x++) {
+ if (x != y && r[x,y]) {
+ printf("\n %g \t (%s) \t = (%g +/- %g)(%s) \t + (%g +/- %g)",
+ 10.0*log(r[x,y]*r[x,y])/log(10), header[y], b[x,y], b_err[x,y], header[x],
+ a[x,y], a_err[x,y])
+ }
+ }
+ }
+}
+
diff --git a/lin_reg_example.py b/lin_reg_example.py
new file mode 100644
index 0000000..3a2043e
--- /dev/null
+++ b/lin_reg_example.py
@@ -0,0 +1,44 @@
+#!/usr/bin/python
+#/usr/local/bin/python2.7
+
+import numpy as np
+
+
+def main():
+
+ dataz = np.genfromtxt('example.csv', delimiter=',', names=True)
+
+ y = dataz['attitude']
+ x = dataz['correct']
+ print 'x = {} {}'.format(x, np.mean(x))
+ print 'y = {} {}'.format(y, np.mean(y))
+
+ delta_x = x - np.mean(x)
+ delta_y = y - np.mean(y)
+ delta_xy = delta_x*delta_y
+ delta2_x = delta_x**2
+ delta2_y = delta_y**2
+ print 'delta_x = {} {}'.format(delta_x, np.sum(delta_x))
+ print 'delta_y = {} {}'.format(delta_y, np.sum(delta_y))
+ print 'delta_xy = {} {}'.format(delta_xy, np.sum(delta_xy))
+ print 'delta2_x = {} {}'.format(delta2_x, np.sum(delta2_x))
+ print 'delta2_y = {} {}'.format(delta2_y, np.sum(delta2_y))
+
+ Sx = np.sqrt(np.sum(delta2_x)/(delta2_x.size - 1))
+ Sy = np.sqrt(np.sum(delta2_y)/(delta2_y.size - 1))
+ r = np.sum(delta_xy)/np.sqrt(np.sum(delta2_x)*np.sum(delta2_y))
+ print 'Sx = {}'.format(Sx)
+ print 'Sy = {}'.format(Sy)
+ print 'r = {}'.format(r)
+ print 'r^2 = {}'.format(r**2)
+
+ b = r*(Sy/Sx)
+ a = np.mean(y) - b*np.mean(x)
+ print 'b = {}'.format(b)
+ print 'a = {}'.format(a)
+
+ print ' (attitude) \t= {}(correct) + {}'.format(b, a)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/mean.awk b/mean.awk
new file mode 100644
index 0000000..45bedee
--- /dev/null
+++ b/mean.awk
@@ -0,0 +1,13 @@
+### mean.awk
+# find mean average of a list of numbers.
+
+BEGIN {
+ X = ARGV[1]
+ inputsize = split(X, Xarr)
+ Yarr[n] = 0
+ SUM = 0
+ for (m=1; m <= inputsize; m++) {
+ SUM =+ Xarr[m]
+ }
+ print SUM
+}
diff --git a/mean_avg.awk b/mean_avg.awk
new file mode 100644
index 0000000..799f96c
--- /dev/null
+++ b/mean_avg.awk
@@ -0,0 +1,62 @@
+#!/usr/bin/awk
+
+### mean_avg.awk
+# average columns of numerical data
+
+BEGIN {
+ sign = "[+-]?"
+ decimal = "[0-9]+[.]?[0-9]*"
+ fraction = "[.][0-9]*"
+ exponent = "([Ee]" sign "[0-9]+)?"
+ number = "^" sign "(" decimal "|" fraction ")" exponent "$"
+}
+
+NR == 1 {
+ header_nf = NF
+ for (n=1; n<=NF; n++) {
+ if ($n !~ number)
+ header[n] = $n
+ }
+}
+
+NF != 0 {
+ if (NF > max_nf)
+ max_nf = NF
+ for (n=1; n <= NF; n++) {
+ if ($n !~ number) {
+ continue
+ }
+ if ($n ~ number) {
+ count[n] += 1
+ sum[n] += $n
+ sum2[n] += $n*$n
+ mean[n] = sum[n]/count[n]
+ delta[n] = $n - mean[n]
+ delta2[n] = delta[n]*delta[n]
+ sum_delta[n] += delta[n]
+ sum_delta2[n] += delta2[n]
+ if ((count[n] - 1) != 0)
+ var[n] = sum_delta2[n]/(count[n] - 1)
+ else
+ var[n] = 0
+ }
+ }
+}
+
+END {
+ printf("\n")
+ printf("%-6s\t%-6s %-6s %-6s\n", "col", "mean", "std_err", "count")
+ for (n=1; n<=max_nf; n++) {
+ if (header[n])
+ printf("%-6s\t", header[n])
+ else
+ printf("%-6g\t", n)
+ if (count[n]) {
+ printf("%-6g ±%-6g %-6g\n",
+ mean[n], 1.96*sqrt(var[n]/count[n]), count[n])
+ }
+ else
+ printf("%2s %2s %2s\n", " ", " ", " ")
+ }
+}
+
diff --git a/pi.awk b/pi.awk
new file mode 100644
index 0000000..7ecc4ab
--- /dev/null
+++ b/pi.awk
@@ -0,0 +1,24 @@
+
+### pi.awk, https://en.wikipedia.org/wiki/Pi
+# In 1706 John Machin used the Gregory–Leibniz series to produce an algorithm
+# that converged much faster. Machin reached 100 digits of π with this
+# formula. Other mathematicians created variants, now known as Machin-like
+# formulae, that were used to set several successive records for calculating
+# digits of π. Machin-like formulae remained the best-known method for
+# calculating π well into the age of computers, and were used to set records
+# for 250 years, culminating in a 620-digit approximation in 1946 by Daniel
+# Ferguson–the best approximation achieved without the aid of a calculating
+# device.
+
+function pi() {
+ return 4*(4*atan2(1,5) - atan2(1,239))
+}
+
+BEGIN {
+ if (ARGV[1] > 0)
+ fig = ARGV[1]
+ else
+ fig = 6
+ str = "%." fig "g\n"
+ printf(str, pi())
+}
diff --git a/pi.sh b/pi.sh
new file mode 100644
index 0000000..83af08a
--- /dev/null
+++ b/pi.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+### pi.sh, https://en.wikipedia.org/wiki/Pi
+# In 1706 John Machin used the Gregory–Leibniz series to produce an algorithm
+# that converged much faster. Machin reached 100 digits of π with this
+# formula. Other mathematicians created variants, now known as Machin-like
+# formulae, that were used to set several successive records for calculating
+# digits of π. Machin-like formulae remained the best-known method for
+# calculating π well into the age of computers, and were used to set records
+# for 250 years, culminating in a 620-digit approximation in 1946 by Daniel
+# Ferguson–the best approximation achieved without the aid of a calculating
+# device.
+
+awk -v fig=${1} '
+ function pi() {
+ return 4*(4*atan2(1,5) - atan2(1,239))
+ }
+ BEGIN {
+ if (fig <= 0)
+ fig = 6
+ str = "%." fig "g\n"
+ printf(str, pi())
+ }
+'
diff --git a/sum1.awk b/sum1.awk
new file mode 100644
index 0000000..02b7d6a
--- /dev/null
+++ b/sum1.awk
@@ -0,0 +1,21 @@
+### sum1.awk, print column sums
+# input: rows of numbers
+# output: sum of each column
+# missing entries are treated as zeros
+
+{
+ for (i=1; i<=NF; i++)
+ sum[i] += $i
+ if (NF > maxfld)
+ maxfld = NF
+}
+
+END {
+ for (i=1; i<=maxfld; i++) {
+ printf("%g", sum[i])
+ if (i < maxfld)
+ printf(" ")
+ else
+ printf("\n")
+ }
+}
diff --git a/sum2.awk b/sum2.awk
new file mode 100644
index 0000000..0b2aad3
--- /dev/null
+++ b/sum2.awk
@@ -0,0 +1,16 @@
+### sum2.awk, print column sums
+# check that each line has the same number of fields as line one
+
+NR==1 { nfld = NF }
+
+{
+ for (i=1; i<=NF; i++)
+ sum[i] += $i
+ if (NF != nfld)
+ print "line " NR " has " NF " entries, not " nfld
+}
+
+END {
+ for (i=1; i<=NF; i++)
+ printf("%g%s", sum[i], i < nfld ? " " : "\n")
+}
diff --git a/sum3.awk b/sum3.awk
new file mode 100644
index 0000000..b4100a7
--- /dev/null
+++ b/sum3.awk
@@ -0,0 +1,31 @@
+### sum3.awk, print sums of numeric columns
+# input: rows of integers and strings
+# output: sums of numeric columns
+# assumes every line has same layout
+
+NR==1 {
+ nfld = NF
+ for (i=1; i<=NF; i++)
+ numcol[i] = isnum($i)
+}
+
+{
+ for (i=1; i<=NF; i++)
+ if (numcol[i])
+ sum[i] += $i
+}
+
+END {
+ for (i=1; i<=nfld; i++) {
+ if (numcol[i])
+ printf("%g", sum[i])
+ else
+ printf("--")
+ printf(i < nfld ? " " : "\n")
+ }
+}
+
+function isnum(n) {
+ return n ~ /^[+-]?[0-9]+$/
+}
+
diff --git a/sum4.awk b/sum4.awk
new file mode 100644
index 0000000..c592409
--- /dev/null
+++ b/sum4.awk
@@ -0,0 +1,37 @@
+### sum4.awk, print sums of numeric columns
+# input: rows of integers and strings
+# output: sums of numeric columns
+
+function isnum(n) {
+ sign = "[+-]?"
+ decimal = "[0-9]+[.]?[0-9]*"
+ fraction = "[.][0-9]+"
+ exponent = "([Ee]" sign "[0-9]+)?"
+ number = "^" sign "(" decimal "|" fraction ")" exponent "$"
+ return n ~ number
+}
+
+NR==1 {
+ nfldz = NF
+ for (i=1; i<=NF; i++) {
+ if (!isnum($i))
+ header[i] = $i
+ }
+}
+
+{
+ for (i=1; i<=NF; i++) {
+ sum[i] += $i
+ count[i]++
+ }
+}
+
+END {
+ for (i=1; i<=nfldz; i++) {
+ if (header[i])
+ printf("%s: \t", header[i])
+ #if (numcol[i])
+ printf("%g\n", sum[i])
+ printf(i < nfldz ? "" : "\n")
+ }
+}
diff --git a/table_4-3.csv b/table_4-3.csv
new file mode 100644
index 0000000..63c0b59
--- /dev/null
+++ b/table_4-3.csv
@@ -0,0 +1,11 @@
+Tc,Vb
+10,420
+20,410
+30,360
+40,360
+50,340
+60,290
+70,300
+80,270
+90,210
+100,200