summaryrefslogtreecommitdiff
path: root/lin_reg.awk
diff options
context:
space:
mode:
authorwukong <wukong@longaeva>2019-10-22 19:57:00 -0700
committerwukong <wukong@longaeva>2019-10-22 19:57:00 -0700
commitc1ec2518f46cd011ef79c7e7b08265a9053c92b6 (patch)
treeb6cc20703fa62640e7171c4b20ad9d2efca0a30a /lin_reg.awk
parent9efa8c077a4f9cea4b057f8b98104a34e38bcbfd (diff)
added (prev_data ~ number) check to diff;
added ($x ~ number) check to lin_reg; added gaussian expressions gaussian;
Diffstat (limited to '')
-rw-r--r--lin_reg.awk73
1 files changed, 38 insertions, 35 deletions
diff --git a/lin_reg.awk b/lin_reg.awk
index 52e55d3..0213c7f 100644
--- a/lin_reg.awk
+++ b/lin_reg.awk
@@ -17,7 +17,7 @@ NR == 1 {
($n ~ number) ? header[n] = "col" n : header[n] = $n
}
-NF > 0 {
+NF {
if (NF > nf_max)
nf_max = NF
@@ -25,6 +25,7 @@ NF > 0 {
for (y=1; y<=nf_max; y++) {
if ($y == header[n])
continue
+
if ($y ~ number) {
### mean
@@ -42,45 +43,47 @@ NF > 0 {
# x = row, y = col, trendline: y = A + Bx
for (x=1; x<=nf_max; x++) {
- count[x,y] += 1
- sum_xy[x,y] += $x*$y
- sum_delta_xy[x,y] += delta0[x]*delta1[y]
+ if ($x ~ number) {
+ count[x,y] += 1
+ sum_xy[x,y] += $x*$y
+ sum_delta_xy[x,y] += delta0[x]*delta1[y]
- # covariance
- #(count[x,y] > 1) ? cov[x,y] = sum_delta_xy[x,y]/(count[x,y] - 1) : cov[x,y] = ""
+ # covariance
+ #(count[x,y] > 1) ? cov[x,y] = sum_delta_xy[x,y]/(count[x,y] - 1) : cov[x,y] = ""
- # correlation
- r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y])
- (r_den[x,y]) ? r[x,y] = sum_delta_xy[x,y]/r_den[x,y] : r[x,y] = 1
+ # correlation
+ r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y])
+ (r_den[x,y]) ? r[x,y] = sum_delta_xy[x,y]/r_den[x,y] : r[x,y] = 1
- ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x])
- if (ab_den[x,y]) {
- a[x,y] = (sum[y]*sum2[x] - sum[x]*sum_xy[x,y])/ab_den[x,y]
- b[x,y] = (count[x,y]*sum_xy[x,y] - sum[x]*sum[y])/ab_den[x,y]
- }
- else {
- a[x,y] = 0
- b[x,y] = 1
- }
+ ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x])
+ if (ab_den[x,y]) {
+ a[x,y] = (sum[y]*sum2[x] - sum[x]*sum_xy[x,y])/ab_den[x,y]
+ b[x,y] = (count[x,y]*sum_xy[x,y] - sum[x]*sum[y])/ab_den[x,y]
+ }
+ else {
+ a[x,y] = 0
+ b[x,y] = 1
+ }
+
+ # error estimate
+ err_den[x,y] = count[x,y]*(count[x,y] - 2)
+ if (count[x,y] > 2) {
+ err[x,y] = $y - (a[x,y] + b[x,y]*$x)
+ sum_err2[x,y] += err[x,y]*err[x,y]
+ }
+ b_err_den[x,y] = (count[x,y] - 2)*sum_delta2[x]
+ if (b_err_den[x,y])
+ b_err[x,y] = sqrt(sum_err2[x,y]/b_err_den[x,y])
+ a_err_den[x,y] = count[x,y]*b_err_den[x,y]
+ if (a_err_den[x,y])
+ a_err[x,y] = sqrt(sum2[x]/count[x,y])*b_err[x,y]
- # error estimate
- err_den[x,y] = count[x,y]*(count[x,y] - 2)
- if (count[x,y] > 2) {
- err[x,y] = $y - (a[x,y] + b[x,y]*$x)
- sum_err2[x,y] += err[x,y]*err[x,y]
+ # weighted mean, from HP-20S manual, pg 60
+ xw[x,y] = sum_xy[x,y]/sum[y]
+ yw[x,y] = b[x,y]*xw[x,y] + a[x,y]
+ xw_dist[x,y] = (xw[x,y] - mean[x])
+ yw_dist[x,y] = b[x,y]*(xw[x,y] - mean[x])
}
- b_err_den[x,y] = (count[x,y] - 2)*sum_delta2[x]
- if (b_err_den[x,y])
- b_err[x,y] = sqrt(sum_err2[x,y]/b_err_den[x,y])
- a_err_den[x,y] = count[x,y]*b_err_den[x,y]
- if (a_err_den[x,y])
- a_err[x,y] = sqrt(sum2[x]/count[x,y])*b_err[x,y]
-
- # weighted mean, from HP-20S manual, pg 60
- xw[x,y] = sum_xy[x,y]/sum[y]
- yw[x,y] = b[x,y]*xw[x,y] + a[x,y]
- xw_dist[x,y] = (xw[x,y] - mean[x])
- yw_dist[x,y] = b[x,y]*(xw[x,y] - mean[x])
}
}
else