summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorwukong <wukong@longaeva>2019-10-22 19:57:00 -0700
committerwukong <wukong@longaeva>2019-10-22 19:57:00 -0700
commitc1ec2518f46cd011ef79c7e7b08265a9053c92b6 (patch)
treeb6cc20703fa62640e7171c4b20ad9d2efca0a30a
parent9efa8c077a4f9cea4b057f8b98104a34e38bcbfd (diff)
added (prev_data ~ number) check to diff;
added ($x ~ number) check to lin_reg; added gaussian expressions gaussian;
Diffstat (limited to '')
-rw-r--r--diff.awk39
-rw-r--r--gaussian.awk5
-rw-r--r--lin_reg.awk73
3 files changed, 63 insertions, 54 deletions
diff --git a/diff.awk b/diff.awk
index 1d1f72d..ba452c7 100644
--- a/diff.awk
+++ b/diff.awk
@@ -5,6 +5,7 @@
BEGIN {
OFS = FS
+ # OFMT = "%.9g"
sign = "[+-]?"
decimal = "[0-9]+[.]?[0-9]*"
fraction = "[.][0-9]*"
@@ -13,7 +14,7 @@ BEGIN {
}
NR == 1 {
- ### orig data columns
+ # orig data columns
for (n=1; n<=NF; n++) {
if ($n !~ number) {
header[n] = $n
@@ -27,7 +28,7 @@ NR == 1 {
printf(OFS)
}
- ### diff columns
+ # diff columns
for (n=1; n<=NF; n++) {
printf(dheader[n])
printf(n < NF ? OFS : ORS)
@@ -38,29 +39,31 @@ NF {
if (NF > nf_max)
nf_max = NF
- ### data columns
- for (y=1; y<=nf_max; y++) {
- if ($y == header[y] || $y == dheader[y])
+ # data columns
+ for (n=1; n<=nf_max; n++) {
+ if ($n == header[n] || $n == dheader[n])
continue
- if ($y ~ number)
- printf(OFMT, $y)
+ if ($n ~ number)
+ printf(OFMT, $n)
else
printf("")
printf(OFS)
}
- ### diff columns
- for (y=1; y<=nf_max; y++) {
- if ($y == header[y] || $y == dheader[y])
+ # diff columns
+ for (n=1; n<=nf_max; n++) {
+ if ($n == header[n] || $n == dheader[n])
continue
- if ($y ~ number) {
- data[y] = $y
- diff[y] = data[y] - data_prev[y]
- data_prev[y] = data[y]
- printf(OFMT, diff[y])
+ if ($n ~ number) {
+ data[n] = $n
+ if (data_prev[n] ~ number)
+ diff[n] = data[n] - data_prev[n]
+ else
+ diff[n] = ""
+ data_prev[n] = data[n]
}
- else
- diff[y] = ""
- printf(y < nf_max ? OFS : ORS)
+ printf(diff[n])
+ printf(n < nf_max ? OFS : ORS)
}
}
+
diff --git a/gaussian.awk b/gaussian.awk
index dfb5fb4..dc62d37 100644
--- a/gaussian.awk
+++ b/gaussian.awk
@@ -25,7 +25,10 @@ BEGIN {
for (n=0; n<N; n++) {
if (N > 1 && M > 0) {
- w[n] = gaussian(n)
+ # sigma <= 0.5
+ sigma = 0.4
+ en[n] = (n - M)/(sigma*M)
+ w[n] = exp(-0.5*e[n]*e[n])
print n, w[n]/M
}
else {
diff --git a/lin_reg.awk b/lin_reg.awk
index 52e55d3..0213c7f 100644
--- a/lin_reg.awk
+++ b/lin_reg.awk
@@ -17,7 +17,7 @@ NR == 1 {
($n ~ number) ? header[n] = "col" n : header[n] = $n
}
-NF > 0 {
+NF {
if (NF > nf_max)
nf_max = NF
@@ -25,6 +25,7 @@ NF > 0 {
for (y=1; y<=nf_max; y++) {
if ($y == header[n])
continue
+
if ($y ~ number) {
### mean
@@ -42,45 +43,47 @@ NF > 0 {
# x = row, y = col, trendline: y = A + Bx
for (x=1; x<=nf_max; x++) {
- count[x,y] += 1
- sum_xy[x,y] += $x*$y
- sum_delta_xy[x,y] += delta0[x]*delta1[y]
+ if ($x ~ number) {
+ count[x,y] += 1
+ sum_xy[x,y] += $x*$y
+ sum_delta_xy[x,y] += delta0[x]*delta1[y]
- # covariance
- #(count[x,y] > 1) ? cov[x,y] = sum_delta_xy[x,y]/(count[x,y] - 1) : cov[x,y] = ""
+ # covariance
+ #(count[x,y] > 1) ? cov[x,y] = sum_delta_xy[x,y]/(count[x,y] - 1) : cov[x,y] = ""
- # correlation
- r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y])
- (r_den[x,y]) ? r[x,y] = sum_delta_xy[x,y]/r_den[x,y] : r[x,y] = 1
+ # correlation
+ r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y])
+ (r_den[x,y]) ? r[x,y] = sum_delta_xy[x,y]/r_den[x,y] : r[x,y] = 1
- ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x])
- if (ab_den[x,y]) {
- a[x,y] = (sum[y]*sum2[x] - sum[x]*sum_xy[x,y])/ab_den[x,y]
- b[x,y] = (count[x,y]*sum_xy[x,y] - sum[x]*sum[y])/ab_den[x,y]
- }
- else {
- a[x,y] = 0
- b[x,y] = 1
- }
+ ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x])
+ if (ab_den[x,y]) {
+ a[x,y] = (sum[y]*sum2[x] - sum[x]*sum_xy[x,y])/ab_den[x,y]
+ b[x,y] = (count[x,y]*sum_xy[x,y] - sum[x]*sum[y])/ab_den[x,y]
+ }
+ else {
+ a[x,y] = 0
+ b[x,y] = 1
+ }
+
+ # error estimate
+ err_den[x,y] = count[x,y]*(count[x,y] - 2)
+ if (count[x,y] > 2) {
+ err[x,y] = $y - (a[x,y] + b[x,y]*$x)
+ sum_err2[x,y] += err[x,y]*err[x,y]
+ }
+ b_err_den[x,y] = (count[x,y] - 2)*sum_delta2[x]
+ if (b_err_den[x,y])
+ b_err[x,y] = sqrt(sum_err2[x,y]/b_err_den[x,y])
+ a_err_den[x,y] = count[x,y]*b_err_den[x,y]
+ if (a_err_den[x,y])
+ a_err[x,y] = sqrt(sum2[x]/count[x,y])*b_err[x,y]
- # error estimate
- err_den[x,y] = count[x,y]*(count[x,y] - 2)
- if (count[x,y] > 2) {
- err[x,y] = $y - (a[x,y] + b[x,y]*$x)
- sum_err2[x,y] += err[x,y]*err[x,y]
+ # weighted mean, from HP-20S manual, pg 60
+ xw[x,y] = sum_xy[x,y]/sum[y]
+ yw[x,y] = b[x,y]*xw[x,y] + a[x,y]
+ xw_dist[x,y] = (xw[x,y] - mean[x])
+ yw_dist[x,y] = b[x,y]*(xw[x,y] - mean[x])
}
- b_err_den[x,y] = (count[x,y] - 2)*sum_delta2[x]
- if (b_err_den[x,y])
- b_err[x,y] = sqrt(sum_err2[x,y]/b_err_den[x,y])
- a_err_den[x,y] = count[x,y]*b_err_den[x,y]
- if (a_err_den[x,y])
- a_err[x,y] = sqrt(sum2[x]/count[x,y])*b_err[x,y]
-
- # weighted mean, from HP-20S manual, pg 60
- xw[x,y] = sum_xy[x,y]/sum[y]
- yw[x,y] = b[x,y]*xw[x,y] + a[x,y]
- xw_dist[x,y] = (xw[x,y] - mean[x])
- yw_dist[x,y] = b[x,y]*(xw[x,y] - mean[x])
}
}
else