From 8f263e859e0970ce87b77addc80dec28e8fc7e82 Mon Sep 17 00:00:00 2001 From: wukong Date: Tue, 5 Jun 2018 22:34:51 -0700 Subject: re-init --- ABOUT.TXT | 5 +++ LICENSE.TXT | 29 ++++++++++++++ conv.awk | 36 +++++++++++++++++ diff.awk | 37 +++++++++++++++++ example.csv | 11 ++++++ example_4-61.csv | 5 +++ hamming.awk | 29 ++++++++++++++ hamming.sh | 30 ++++++++++++++ kalman.awk | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++++ kalman_init.awk | 97 +++++++++++++++++++++++++++++++++++++++++++++ lin_reg.awk | 101 +++++++++++++++++++++++++++++++++++++++++++++++ lin_reg.py | 50 +++++++++++++++++++++++ lin_reg1.awk | 80 +++++++++++++++++++++++++++++++++++++ lin_reg2.awk | 101 +++++++++++++++++++++++++++++++++++++++++++++++ lin_reg_example.py | 44 +++++++++++++++++++++ mean.awk | 13 ++++++ mean_avg.awk | 62 +++++++++++++++++++++++++++++ pi.awk | 24 +++++++++++ pi.sh | 24 +++++++++++ sum1.awk | 21 ++++++++++ sum2.awk | 16 ++++++++ sum3.awk | 31 +++++++++++++++ sum4.awk | 37 +++++++++++++++++ table_4-3.csv | 11 ++++++ 24 files changed, 1008 insertions(+) create mode 100644 ABOUT.TXT create mode 100644 LICENSE.TXT create mode 100644 conv.awk create mode 100644 diff.awk create mode 100644 example.csv create mode 100644 example_4-61.csv create mode 100644 hamming.awk create mode 100644 hamming.sh create mode 100644 kalman.awk create mode 100644 kalman_init.awk create mode 100644 lin_reg.awk create mode 100644 lin_reg.py create mode 100644 lin_reg1.awk create mode 100644 lin_reg2.awk create mode 100644 lin_reg_example.py create mode 100644 mean.awk create mode 100644 mean_avg.awk create mode 100644 pi.awk create mode 100644 pi.sh create mode 100644 sum1.awk create mode 100644 sum2.awk create mode 100644 sum3.awk create mode 100644 sum4.awk create mode 100644 table_4-3.csv diff --git a/ABOUT.TXT b/ABOUT.TXT new file mode 100644 index 0000000..9ecb91e --- /dev/null +++ b/ABOUT.TXT @@ -0,0 +1,5 @@ + + repo: awk + + desc: experiments in awk, etc. + diff --git a/LICENSE.TXT b/LICENSE.TXT new file mode 100644 index 0000000..5817b3d --- /dev/null +++ b/LICENSE.TXT @@ -0,0 +1,29 @@ + + Copyright (c) 2016-2018 <0x09c5>. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from this + software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + diff --git a/conv.awk b/conv.awk new file mode 100644 index 0000000..10c99c7 --- /dev/null +++ b/conv.awk @@ -0,0 +1,36 @@ +### conv.awk +# [PoC] linear convolution (with hardcoded IR window). + + +BEGIN { + X = ARGV[1] + input_size = split(X, X_arr) + #H = "1.0 1.0 1.0" # rect + #H = "0.25 0.50 0.25" # von Hann + H = "0.23 0.54 0.23" # Hamming + window_size = split(H, H_arr) + output_size = (input_size + window_size - 1) + for (n=1; n <= output_size; n++) { + Y_arr[n] = 0 + for (m=1; m <= input_size; m++) { + if (n <= window_size) { + Y_arr[n] = Y_arr[n] + H_arr[n-m+1]*X_arr[m] + continue + } + if ((n > window_size) && (n <= input_size)) { + Y_arr[n] = Y_arr[n] + H_arr[n-m+1]*X_arr[m] + continue + } + if ((n > window_size) && (n > input_size)) { + Y_arr[n] = Y_arr[n] + H_arr[n-m+1]*X_arr[m] + continue + } + else { + Y_arr[n] = Y_arr[n] + 0 + continue + } + } + print Y_arr[n] + } +} + diff --git a/diff.awk b/diff.awk new file mode 100644 index 0000000..3923f8b --- /dev/null +++ b/diff.awk @@ -0,0 +1,37 @@ + +BEGIN { + sign = "[+-]?" + decimal = "[0-9]+[.]?[0-9]*" + fraction = "[.][0-9]*" + exponent = "([Ee]" sign "[0-9]+)?" + number = "^" sign "(" decimal "|" fraction ")" exponent "$" +} + +NR == 1 { + header_nf = NF + for (n=1; n<=NF; n++) { + if ($n !~ number) + header[n] = $n + else + header[n] = n + } +} + +NF != 0 { + printf("\n%s: %s\n", NR, $0) + if (NF > max_nf) + max_nf = NF + + ### iterate over columns + for (y=1; y<=max_nf; y++) { + if ($y ~ number) { + count[y] += 1 + data[y] = $y + sum[y] += $y + mean[y] = data[y] + diff[y] = data[y] - data_prev[y] + print header[y], data[y], sum[y], diff[y] + data_prev[y] = data[y] + } + } +} diff --git a/example.csv b/example.csv new file mode 100644 index 0000000..d325f03 --- /dev/null +++ b/example.csv @@ -0,0 +1,11 @@ +correct,attitude +17,94 +13,73 +12,59 +15,80 +16,93 +14,85 +16,66 +16,79 +18,77 +19,91 diff --git a/example_4-61.csv b/example_4-61.csv new file mode 100644 index 0000000..b3e1e10 --- /dev/null +++ b/example_4-61.csv @@ -0,0 +1,5 @@ +V,Hrs +105,1200 +110,1000 +115,920 +120,750 diff --git a/hamming.awk b/hamming.awk new file mode 100644 index 0000000..30140a5 --- /dev/null +++ b/hamming.awk @@ -0,0 +1,29 @@ +### hamming.awk +# generate a Hamming window +# https://en.wikipedia.org/wiki/Window_function provides a few values for the +# 'a0' and 'a1' parameters of the raised cosine. + +a1EGIN { + + N = ARGV[1] + + pi = 4*(4*atan2(1,5) - atan2(1,239)) + #a0 = (25.0/46.0) + #a1 = (21.0/46.0) + + ### optimal values for equal-ripple + a0 = 0.53836 + a1 = 0.46164 + + ### R.W. Hamming, "Digital Filters" + # H = "0.23 0.54 0.23" + + for (n=0; n 1) { + w[n] = 0.5*a0 - 0.5*a1*cos((2*pi*n)/(N - 1)) + sum_w += w[n] + } + printf("%f %f\n", w[n], w[n]/sum_w) + } + +} diff --git a/hamming.sh b/hamming.sh new file mode 100644 index 0000000..fdd6ba6 --- /dev/null +++ b/hamming.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +### hamming.sh +# Convolve input vector with 3-element Hamming window [0.23, 0.54, 0.23] +# note: bash built-in $(( )) constructs only support integer arithmetic +# floating point operation will require another language such as awk, perl, or +# python. + +### input vector as array +X=($1) +echo ${#X[@]} + +### Hamming window vector +H=(0.23 0.54 0.23) +echo ${#H[@]} + +### length of output +L=$(( ${#X[@]} + ${#H[@]} - 1 )) + +### iterate over both vectors +for N in $( seq 0 1 $L ) ; do + for M in $( seq 0 1 $(( ${#H[@]} - 1 )) ) ; do + if [ $(( $N >= $M )) ] ; then + printf "N=$N\tM=$M\tN-M=$(( $N - $M ))\n" + else + continue + fi + done +done + diff --git a/kalman.awk b/kalman.awk new file mode 100644 index 0000000..9eb6f79 --- /dev/null +++ b/kalman.awk @@ -0,0 +1,114 @@ +### lin_reg.awk +# simple linear regression between columns + +BEGIN { + sign = "[+-]?" + decimal = "[0-9]+[.]?[0-9]*" + fraction = "[.][0-9]*" + exponent = "([Ee]" sign "[0-9]+)?" + number = "^" sign "(" decimal "|" fraction ")" exponent "$" +} + +NR == 1 { + header_nf = NF + for (n=1; n<=NF; n++) { + if ($n !~ number) + header[n] = $n + else + header[n] = n + } +} + +NF != 0 { + #printf("\n%s: %s", NR, $0) + if (NF > max_nf) + max_nf = NF + + ### iterate over columns + for (y=1; y<= NF; y++) { + if ($y ~ number) { + + ### mean + count[y] += 1 + sum[y] += $y + sum2[y] += $y*$y + mean[y] = sum[y]/count[y] + + ### difference from the mean + delta[y] = $y - mean[y] + sum_delta[y] += delta[y] + sum_delta2[y] += delta[y]*delta[y] + + ### sample variance + if (count[y] - 1) + var[y] = sum_delta2[y]/(count[y] - 1) + else + var[y] = 0 + + # x = row, y = col + for (x=1; x<=max_nf; x++) { + count[x,y] += 1 + sum_xy[x,y] += $x*$y + sum_delta_xy[x,y] += delta[x]*delta[y] + + # covariance + if (count[x,y] > 1) { + KG = 1.0 + meas_cov[x,y] = sum_delta_xy[x,y]/(count[x,y] - 1) + est_err_cov[x,y] = sum_delta_xy[x,y]/(count[x,y] - 1) + } + else { + meas_cov[x,y] = 0.0 + est_err_cov[x,y] = (1.0 - KG)*est_err_cov_last[x,y] + } + + # correlation + r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y]) + if (r_den[x,y]) + r[x,y] = sum_delta_xy[x,y]/r_den[x,y] + else + r[x,y] = 1 + + ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x]) + if (ab_den[x,y]) { + a[x,y] = (sum[y]*sum2[x] - sum[x]*sum_xy[x,y])/ab_den[x,y] + b[x,y] = (count[x,y]*sum_xy[x,y] - sum[x]*sum[y])/ab_den[x,y] + } + else { + a[x,y] = 0 + b[x,y] = 1 + } + + ### error estimate + err_den[x,y] = count[x,y]*(count[x,y] - 2) + if (count[x,y] > 2) { + err[x,y] = $y - (a[x,y] + b[x,y]*$x) + sum_err2[x,y] += err[x,y]*err[x,y] + } + b_err_den[x,y] = (count[x,y] - 2)*sum_delta2[x] + if (b_err_den[x,y]) + b_err[x,y] = sqrt(sum_err2[x,y]/b_err_den[x,y]) + a_err_den[x,y] = count[x,y]*b_err_den[x,y] + if (a_err_den[x,y]) + a_err[x,y] = sqrt(sum2[x]/count[x,y])*b_err[x,y] + + est_err_cov_last[x,y] = est_err_cov[x,y] + } + } + else + continue + } +} + +END { + for (y=1; y<=max_nf; y++) { + for (x=1; x<=max_nf; x++) { + if (x != y && r[x,y]) { + printf("\n %g %g \t (%s) \t = (%g +/- %g)(%s) \t + (%g +/- %g)", + meas_cov[x,y], est_err_cov[x,y], header[y], b[x,y], b_err[x,y], header[x], + a[x,y], a_err[x,y]) + } + } + } +} + diff --git a/kalman_init.awk b/kalman_init.awk new file mode 100644 index 0000000..a63a864 --- /dev/null +++ b/kalman_init.awk @@ -0,0 +1,97 @@ +#!/usr/bin/awk + +### kalman.awk +# kalman csv experiemnt + +BEGIN { + sign = "[+-]?" + decimal = "[0-9]+[.]?[0-9]*" + fraction = "[.][0-9]*" + exponent = "([Ee]" sign "[0-9]+)?" + number = "^" sign "(" decimal "|" fraction ")" exponent "$" +} + +NR == 1 { + header_nf = NF + for (n=1; n<=NF; n++) { + if ($n !~ number) + header[n] = $n + } +} + +NF != 0 { + #print "\n" + print "\n" NR ": \t" $0 + if (NF > max_nf) + max_nf = NF + + for (n=1; n <= NF; n++) { + if ($n ~ number) { + count[n] = ($n ~ number) + (last[n] ~ number) + (last2[n] ~ number) + diff[n] = $n - last[n] + diff2[n] = 0.5*($n - 2.0*last[n] + last2[n]) + sum[n] = $n + last[n] + last2[n] + if (count[n]) + mean[n] = sum[n]/count[n] + else + mean[n] = sum[n] + + delta[n] = $n - mean[n] + delta2[n] = delta[n]*delta[n] + sum_delta[n] = delta[n] + delta_last[n] + delta_last2[n] + sum_delta2[n] = delta2[n] + delta2_last[n] + delta2_last2[n] + if (count[n] > 1) + var[n] = sum_delta2[n]/(count[n] - 1) + else + var[n] = 0 + + if (count[n]) { + meas_err[n] = sqrt(var[n]/count[n]) + } + + ### error covariance + for (m=1; m<=max_nf; m++) { + count[m,n]++ + sum_xy[m,n] += 1 + delta_xy[m,n] = delta[m]*delta[n] + if (count[m,n] > 1) + meas_cov[m,n] = delta[m]*delta[n]/(count[m,n] - 1) + else + meas_cov[m,n] = 0 + est_err_last_cov[m,n] = 1 + print meas_cov[m,n], est_err_last_cov[m,n] + } + + ### kalman gain + if (count[n] <= 1) { + KG[n] = 1 + est_err[n] = sqrt(diff[n]*diff[n]) + 2.0*sqrt(diff2[n]*diff2[n]) + est_err_last[n] = sqrt(diff[n]*diff[n]) + 2.0*sqrt(diff2[n]*diff2[n]) + } + else + KG[n] = est_err_last[n]/(est_err_last[n] + meas_err[n]) + + ### update estimate and estimated error + est[n] = est_last[n] + KG[n]*($n - est_last[n]) + est_err[n] = (1.0 - KG[n])*est_err_last[n] + + ### visual check + printf "meas: \t%g %g %g", $n, meas_err[n], count[n] + printf "\nd1: \t%g", diff[n] + printf "\nd2: \t%g", diff2[n] + printf "\nest: \t%g %g %g", est[n], est_err[n], KG[n] + + last2[n] = last[n] + last[n] = $n + diff_last2[n] = diff_last[n] + diff_last[n] = diff[n] + delta_last2[n] = delta_last[n] + delta_last[n] = delta[n] + delta2_last2[n] = delta2_last[n] + delta2_last[n] = delta2[n] + est_last[n] = est[n] + est_error_last[n] = est_error[n] + printf "\n" + } + } +} diff --git a/lin_reg.awk b/lin_reg.awk new file mode 100644 index 0000000..d950f80 --- /dev/null +++ b/lin_reg.awk @@ -0,0 +1,101 @@ +### lin_reg.awk +# simple linear regression between columns + +BEGIN { + sign = "[+-]?" + decimal = "[0-9]+[.]?[0-9]*" + fraction = "[.][0-9]*" + exponent = "([Ee]" sign "[0-9]+)?" + number = "^" sign "(" decimal "|" fraction ")" exponent "$" +} + +NR == 1 { + header_nf = NF + for (n=1; n<=NF; n++) { + if ($n !~ number) + header[n] = $n + else + header[n] = n + } +} + +NF != 0 { + #printf("\n%s: %s", NR, $0) + if (NF > max_nf) + max_nf = NF + + ### iterate over columns + for (y=1; y<=NF; y++) { + if ($y ~ number) { + + ### mean + count[y] += 1 + sum[y] += $y + sum2[y] += $y*$y + mean[y] = sum[y]/count[y] + + ### difference from the mean + delta[y] = $y - mean[y] + sum_delta[y] += delta[y] + sum_delta2[y] += delta[y]*delta[y] + + ### sample variance + if (count[y] - 1) + var[y] = sum_delta2[y]/(count[y] - 1) + else + var[y] = 0 + + # x = row, y = col + for (x=1; x<=max_nf; x++) { + count[x,y] += 1 + sum_xy[x,y] += $x*$y + sum_delta_xy[x,y] += delta[x]*delta[y] + + # correlation + r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y]) + if (r_den[x,y]) + r[x,y] = sum_delta_xy[x,y]/r_den[x,y] + else + r[x,y] = 1 + + ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x]) + if (ab_den[x,y]) { + a[x,y] = (sum[y]*sum2[x] - sum[x]*sum_xy[x,y])/ab_den[x,y] + b[x,y] = (count[x,y]*sum_xy[x,y] - sum[x]*sum[y])/ab_den[x,y] + } + else { + a[x,y] = 0 + b[x,y] = 1 + } + + ### error estimate + err_den[x,y] = count[x,y]*(count[x,y] - 2) + if (count[x,y] > 2) { + err[x,y] = $y - (a[x,y] + b[x,y]*$x) + sum_err2[x,y] += err[x,y]*err[x,y] + } + b_err_den[x,y] = (count[x,y] - 2)*sum_delta2[x] + if (b_err_den[x,y]) + b_err[x,y] = sqrt(sum_err2[x,y]/b_err_den[x,y]) + a_err_den[x,y] = count[x,y]*b_err_den[x,y] + if (a_err_den[x,y]) + a_err[x,y] = sqrt(sum2[x]/count[x,y])*b_err[x,y] + } + } + else + continue + } +} + +END { + for (y=1; y<=max_nf; y++) { + for (x=1; x<=max_nf; x++) { + if (x != y && r[x,y]) { + printf("\n %g \t (%s) \t = (%g +/- %g)(%s) \t + (%g +/- %g)", + 10.0*log(r[x,y]*r[x,y])/log(10), header[y], b[x,y], b_err[x,y], header[x], + a[x,y], a_err[x,y]) + } + } + } +} + diff --git a/lin_reg.py b/lin_reg.py new file mode 100644 index 0000000..8fb88b2 --- /dev/null +++ b/lin_reg.py @@ -0,0 +1,50 @@ +#!/usr/bin/python +#/usr/local/bin/python2.7 + +import numpy as np + + +def main(): + + dataz = np.genfromtxt('example.csv', delimiter=',', names=True) + + y = dataz['attitude'] + x = dataz['correct'] + print 'x = {} {}'.format(x, np.mean(x)) + print 'y = {} {}'.format(y, np.mean(y)) + + delta_x = x - np.mean(x) + delta_y = y - np.mean(y) + delta_xy = delta_x*delta_y + delta2_x = delta_x**2 + delta2_y = delta_y**2 + #print 'delta_x = {} {}'.format(delta_x, np.sum(delta_x)) + #print 'delta_y = {} {}'.format(delta_y, np.sum(delta_y)) + #print 'delta_xy = {} {}'.format(delta_xy, np.sum(delta_xy)) + #print 'delta2_x = {} {}'.format(delta2_x, np.sum(delta2_x)) + #print 'delta2_y = {} {}'.format(delta2_y, np.sum(delta2_y)) + + sx = np.sqrt(np.sum(delta2_x)/(delta2_x.size - 1)) + sy = np.sqrt(np.sum(delta2_y)/(delta2_y.size - 1)) + r = np.sum(delta_xy)/np.sqrt(np.sum(delta2_x)*np.sum(delta2_y)) + #print 'sx = {}'.format(sx) + #print 'sy = {}'.format(sy) + print 'r = {}'.format(r) + print 'r^2 = {}'.format(r**2) + + b = r*(sy/sx) + a = np.mean(y) - b*np.mean(x) + print 'b = {}'.format(b) + print 'a = {}'.format(a) + + err = y - (a + b*x) + b_err = np.sqrt(np.sum(err**2.0)/((x.size - 2)*np.sum(delta2_x))) + a_err = np.sqrt(np.sum(x**2.0)*np.sum(err**2.0)/(x.size*(x.size - 2)*np.sum(delta2_x))) + print 'b_err = {}'.format(b_err) + print 'a_err = {}'.format(a_err) + + print ' (attitude) \t= {}(correct) + {}'.format(b, a) + + +if __name__ == '__main__': + main() diff --git a/lin_reg1.awk b/lin_reg1.awk new file mode 100644 index 0000000..866cab8 --- /dev/null +++ b/lin_reg1.awk @@ -0,0 +1,80 @@ +### lin_reg1.awk +# simple linear regression between individual text columns + +BEGIN { + sign = "[+-]?" + decimal = "[0-9]+[.]?[0-9]*" + fraction = "[.][0-9]*" + exponent = "([Ee]" sign "[0-9]+)?" + number = "^" sign "(" decimal "|" fraction ")" exponent "$" +} + +NR == 1 { + header_nf = NF + for (n=1; n<=NF; n++) { + if ($n !~ number) + header[n] = $n + else + header[n] = sprintf("col_%g", n) + } +} + +NF != 0 { + if (NF > max_nf) + max_nf = NF + + ### iterate over columns + for (y=1; y<= NF; y++) { + if ($y ~ number) { + + ### mean + count[y] += 1 + sum[y] += $y + sum2[y] += $y*$y + mean[y] = sum[y]/count[y] + + ### difference from the mean + delta[y] = $y - mean[y] + sum_delta[y] += delta[y] + sum_delta2[y] += delta[y]*delta[y] + + # x = row, y = col + for (x=1; x<=max_nf; x++) { + count[x,y] += 1 + sum_xy[x,y] += $x*$y + sum_delta_xy[x,y] += delta[x]*delta[y] + + r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y]) + if (r_den[x,y]) + r[x,y] = sum_delta_xy[x,y]/r_den[x,y] + else + r[x,y] = 0 + + ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x]) + if (ab_den[x,y]) { + a[x,y] = (sum[y]*sum2[x] - sum[x]*sum_xy[x,y])/ab_den[x,y] + b[x,y] = (count[x,y]*sum_xy[x,y] - sum[x]*sum[y])/ab_den[x,y] + } + else { + a[x,y] = 0 + b[x,y] = 1 + } + } + } + else + continue + } +} + +END { + for (y=1; y<=max_nf; y++) { + for (x=1; x<=max_nf; x++) { + if (x != y && r[x,y]) { + r2[x,y] = r[x,y]*r[x,y] + printf("\n %f \t (%s) \t = %g(%s) \t + %g", + 10.0*log(r2[x,y])/log(10), header[y], b[x,y], header[x], a[x,y]) + } + } + } +} + diff --git a/lin_reg2.awk b/lin_reg2.awk new file mode 100644 index 0000000..a66c78f --- /dev/null +++ b/lin_reg2.awk @@ -0,0 +1,101 @@ +### lin_reg.awk +# simple linear regression between columns + +BEGIN { + sign = "[+-]?" + decimal = "[0-9]+[.]?[0-9]*" + fraction = "[.][0-9]*" + exponent = "([Ee]" sign "[0-9]+)?" + number = "^" sign "(" decimal "|" fraction ")" exponent "$" +} + +NR == 1 { + header_nf = NF + for (n=1; n<=NF; n++) { + if ($n !~ number) + header[n] = $n + else + header[n] = n + } +} + +NF != 0 { + #printf("\n%s: %s", NR, $0) + if (NF > max_nf) + max_nf = NF + + ### iterate over columns + for (y=1; y<= NF; y++) { + if ($y ~ number) { + + ### mean + count[y] += 1 + sum[y] += $y + sum2[y] += $y*$y + mean[y] = sum[y]/count[y] + + ### difference from the mean + delta[y] = $y - mean[y] + sum_delta[y] += delta[y] + sum_delta2[y] += delta[y]*delta[y] + + ### sample variance + if (count[y] - 1) + var[y] = sum_delta2[y]/(count[y] - 1) + else + var[y] = 0 + + # x = row, y = col + for (x=1; x<=max_nf; x++) { + count[x,y] += 1 + sum_xy[x,y] += $x*$y + sum_delta_xy[x,y] += delta[x]*delta[y] + + # correlation + r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y]) + if (r_den[x,y]) + r[x,y] = sum_delta_xy[x,y]/r_den[x,y] + else + r[x,y] = 1 + + ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x]) + if (ab_den[x,y]) { + a[x,y] = (sum[y]*sum2[x] - sum[x]*sum_xy[x,y])/ab_den[x,y] + b[x,y] = (count[x,y]*sum_xy[x,y] - sum[x]*sum[y])/ab_den[x,y] + } + else { + a[x,y] = 0 + b[x,y] = 1 + } + + ### error estimate + err_den[x,y] = count[x,y]*(count[x,y] - 2) + if (count[x,y] > 2) { + err[x,y] = $y - (a[x,y] + b[x,y]*$x) + sum_err2[x,y] += err[x,y]*err[x,y] + } + b_err_den[x,y] = (count[x,y] - 2)*sum_delta2[x] + if (b_err_den[x,y]) + b_err[x,y] = sqrt(sum_err2[x,y]/b_err_den[x,y]) + a_err_den[x,y] = count[x,y]*b_err_den[x,y] + if (a_err_den[x,y]) + a_err[x,y] = sqrt(sum2[x]/count[x,y])*b_err[x,y] + } + } + else + continue + } +} + +END { + for (y=1; y<=max_nf; y++) { + for (x=1; x<=max_nf; x++) { + if (x != y && r[x,y]) { + printf("\n %g \t (%s) \t = (%g +/- %g)(%s) \t + (%g +/- %g)", + 10.0*log(r[x,y]*r[x,y])/log(10), header[y], b[x,y], b_err[x,y], header[x], + a[x,y], a_err[x,y]) + } + } + } +} + diff --git a/lin_reg_example.py b/lin_reg_example.py new file mode 100644 index 0000000..3a2043e --- /dev/null +++ b/lin_reg_example.py @@ -0,0 +1,44 @@ +#!/usr/bin/python +#/usr/local/bin/python2.7 + +import numpy as np + + +def main(): + + dataz = np.genfromtxt('example.csv', delimiter=',', names=True) + + y = dataz['attitude'] + x = dataz['correct'] + print 'x = {} {}'.format(x, np.mean(x)) + print 'y = {} {}'.format(y, np.mean(y)) + + delta_x = x - np.mean(x) + delta_y = y - np.mean(y) + delta_xy = delta_x*delta_y + delta2_x = delta_x**2 + delta2_y = delta_y**2 + print 'delta_x = {} {}'.format(delta_x, np.sum(delta_x)) + print 'delta_y = {} {}'.format(delta_y, np.sum(delta_y)) + print 'delta_xy = {} {}'.format(delta_xy, np.sum(delta_xy)) + print 'delta2_x = {} {}'.format(delta2_x, np.sum(delta2_x)) + print 'delta2_y = {} {}'.format(delta2_y, np.sum(delta2_y)) + + Sx = np.sqrt(np.sum(delta2_x)/(delta2_x.size - 1)) + Sy = np.sqrt(np.sum(delta2_y)/(delta2_y.size - 1)) + r = np.sum(delta_xy)/np.sqrt(np.sum(delta2_x)*np.sum(delta2_y)) + print 'Sx = {}'.format(Sx) + print 'Sy = {}'.format(Sy) + print 'r = {}'.format(r) + print 'r^2 = {}'.format(r**2) + + b = r*(Sy/Sx) + a = np.mean(y) - b*np.mean(x) + print 'b = {}'.format(b) + print 'a = {}'.format(a) + + print ' (attitude) \t= {}(correct) + {}'.format(b, a) + + +if __name__ == '__main__': + main() diff --git a/mean.awk b/mean.awk new file mode 100644 index 0000000..45bedee --- /dev/null +++ b/mean.awk @@ -0,0 +1,13 @@ +### mean.awk +# find mean average of a list of numbers. + +BEGIN { + X = ARGV[1] + inputsize = split(X, Xarr) + Yarr[n] = 0 + SUM = 0 + for (m=1; m <= inputsize; m++) { + SUM =+ Xarr[m] + } + print SUM +} diff --git a/mean_avg.awk b/mean_avg.awk new file mode 100644 index 0000000..799f96c --- /dev/null +++ b/mean_avg.awk @@ -0,0 +1,62 @@ +#!/usr/bin/awk + +### mean_avg.awk +# average columns of numerical data + +BEGIN { + sign = "[+-]?" + decimal = "[0-9]+[.]?[0-9]*" + fraction = "[.][0-9]*" + exponent = "([Ee]" sign "[0-9]+)?" + number = "^" sign "(" decimal "|" fraction ")" exponent "$" +} + +NR == 1 { + header_nf = NF + for (n=1; n<=NF; n++) { + if ($n !~ number) + header[n] = $n + } +} + +NF != 0 { + if (NF > max_nf) + max_nf = NF + for (n=1; n <= NF; n++) { + if ($n !~ number) { + continue + } + if ($n ~ number) { + count[n] += 1 + sum[n] += $n + sum2[n] += $n*$n + mean[n] = sum[n]/count[n] + delta[n] = $n - mean[n] + delta2[n] = delta[n]*delta[n] + sum_delta[n] += delta[n] + sum_delta2[n] += delta2[n] + if ((count[n] - 1) != 0) + var[n] = sum_delta2[n]/(count[n] - 1) + else + var[n] = 0 + } + } +} + +END { + printf("\n") + printf("%-6s\t%-6s %-6s %-6s\n", "col", "mean", "std_err", "count") + for (n=1; n<=max_nf; n++) { + if (header[n]) + printf("%-6s\t", header[n]) + else + printf("%-6g\t", n) + if (count[n]) { + printf("%-6g ±%-6g %-6g\n", + mean[n], 1.96*sqrt(var[n]/count[n]), count[n]) + } + else + printf("%2s %2s %2s\n", " ", " ", " ") + } +} + diff --git a/pi.awk b/pi.awk new file mode 100644 index 0000000..7ecc4ab --- /dev/null +++ b/pi.awk @@ -0,0 +1,24 @@ + +### pi.awk, https://en.wikipedia.org/wiki/Pi +# In 1706 John Machin used the Gregory–Leibniz series to produce an algorithm +# that converged much faster. Machin reached 100 digits of π with this +# formula. Other mathematicians created variants, now known as Machin-like +# formulae, that were used to set several successive records for calculating +# digits of π. Machin-like formulae remained the best-known method for +# calculating π well into the age of computers, and were used to set records +# for 250 years, culminating in a 620-digit approximation in 1946 by Daniel +# Ferguson–the best approximation achieved without the aid of a calculating +# device. + +function pi() { + return 4*(4*atan2(1,5) - atan2(1,239)) +} + +BEGIN { + if (ARGV[1] > 0) + fig = ARGV[1] + else + fig = 6 + str = "%." fig "g\n" + printf(str, pi()) +} diff --git a/pi.sh b/pi.sh new file mode 100644 index 0000000..83af08a --- /dev/null +++ b/pi.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +### pi.sh, https://en.wikipedia.org/wiki/Pi +# In 1706 John Machin used the Gregory–Leibniz series to produce an algorithm +# that converged much faster. Machin reached 100 digits of π with this +# formula. Other mathematicians created variants, now known as Machin-like +# formulae, that were used to set several successive records for calculating +# digits of π. Machin-like formulae remained the best-known method for +# calculating π well into the age of computers, and were used to set records +# for 250 years, culminating in a 620-digit approximation in 1946 by Daniel +# Ferguson–the best approximation achieved without the aid of a calculating +# device. + +awk -v fig=${1} ' + function pi() { + return 4*(4*atan2(1,5) - atan2(1,239)) + } + BEGIN { + if (fig <= 0) + fig = 6 + str = "%." fig "g\n" + printf(str, pi()) + } +' diff --git a/sum1.awk b/sum1.awk new file mode 100644 index 0000000..02b7d6a --- /dev/null +++ b/sum1.awk @@ -0,0 +1,21 @@ +### sum1.awk, print column sums +# input: rows of numbers +# output: sum of each column +# missing entries are treated as zeros + +{ + for (i=1; i<=NF; i++) + sum[i] += $i + if (NF > maxfld) + maxfld = NF +} + +END { + for (i=1; i<=maxfld; i++) { + printf("%g", sum[i]) + if (i < maxfld) + printf(" ") + else + printf("\n") + } +} diff --git a/sum2.awk b/sum2.awk new file mode 100644 index 0000000..0b2aad3 --- /dev/null +++ b/sum2.awk @@ -0,0 +1,16 @@ +### sum2.awk, print column sums +# check that each line has the same number of fields as line one + +NR==1 { nfld = NF } + +{ + for (i=1; i<=NF; i++) + sum[i] += $i + if (NF != nfld) + print "line " NR " has " NF " entries, not " nfld +} + +END { + for (i=1; i<=NF; i++) + printf("%g%s", sum[i], i < nfld ? " " : "\n") +} diff --git a/sum3.awk b/sum3.awk new file mode 100644 index 0000000..b4100a7 --- /dev/null +++ b/sum3.awk @@ -0,0 +1,31 @@ +### sum3.awk, print sums of numeric columns +# input: rows of integers and strings +# output: sums of numeric columns +# assumes every line has same layout + +NR==1 { + nfld = NF + for (i=1; i<=NF; i++) + numcol[i] = isnum($i) +} + +{ + for (i=1; i<=NF; i++) + if (numcol[i]) + sum[i] += $i +} + +END { + for (i=1; i<=nfld; i++) { + if (numcol[i]) + printf("%g", sum[i]) + else + printf("--") + printf(i < nfld ? " " : "\n") + } +} + +function isnum(n) { + return n ~ /^[+-]?[0-9]+$/ +} + diff --git a/sum4.awk b/sum4.awk new file mode 100644 index 0000000..c592409 --- /dev/null +++ b/sum4.awk @@ -0,0 +1,37 @@ +### sum4.awk, print sums of numeric columns +# input: rows of integers and strings +# output: sums of numeric columns + +function isnum(n) { + sign = "[+-]?" + decimal = "[0-9]+[.]?[0-9]*" + fraction = "[.][0-9]+" + exponent = "([Ee]" sign "[0-9]+)?" + number = "^" sign "(" decimal "|" fraction ")" exponent "$" + return n ~ number +} + +NR==1 { + nfldz = NF + for (i=1; i<=NF; i++) { + if (!isnum($i)) + header[i] = $i + } +} + +{ + for (i=1; i<=NF; i++) { + sum[i] += $i + count[i]++ + } +} + +END { + for (i=1; i<=nfldz; i++) { + if (header[i]) + printf("%s: \t", header[i]) + #if (numcol[i]) + printf("%g\n", sum[i]) + printf(i < nfldz ? "" : "\n") + } +} diff --git a/table_4-3.csv b/table_4-3.csv new file mode 100644 index 0000000..63c0b59 --- /dev/null +++ b/table_4-3.csv @@ -0,0 +1,11 @@ +Tc,Vb +10,420 +20,410 +30,360 +40,360 +50,340 +60,290 +70,300 +80,270 +90,210 +100,200 -- cgit v1.2.3