summaryrefslogtreecommitdiff
path: root/lin_reg.awk
diff options
context:
space:
mode:
authorwukong <wukong@longaeva>2018-06-13 21:41:39 -0700
committerwukong <wukong@longaeva>2018-06-13 21:41:39 -0700
commit8875b420a873a360a9484778b487394add318a5e (patch)
treed357816354c5d9d4bc3b1db5c43ceda7933b5e3f /lin_reg.awk
parent6bb6be845b6099550e0a530217633e30522093ad (diff)
added shebang, replaced if-elses with shortcut notation, ported online mean and variance calc to mean.awk
Diffstat (limited to 'lin_reg.awk')
-rw-r--r--lin_reg.awk17
1 files changed, 5 insertions, 12 deletions
diff --git a/lin_reg.awk b/lin_reg.awk
index 66d8aa6..e1cb722 100644
--- a/lin_reg.awk
+++ b/lin_reg.awk
@@ -1,3 +1,5 @@
+#!/usr/bin/awk -f
+
### lin_reg.awk
# simple linear regression between columns
@@ -12,10 +14,7 @@ BEGIN {
NR == 1 {
header_nf = NF
for (n=1; n<=NF; n++) {
- if ($n !~ number)
- header[n] = $n
- else
- header[n] = "col_" n
+ ($n !~ number) ? header[n] = $n : header[n] = "col_" n
}
}
@@ -40,10 +39,7 @@ NF != 0 {
sum_delta2[y] += delta[y]*delta[y]
### sample variance
- if (count[y] - 1)
- var[y] = sum_delta2[y]/(count[y] - 1)
- else
- var[y] = 0
+ (count[y] - 1) ? var[y] = sum_delta2[y]/(count[y] - 1) : var[y] = 0
# x = row, y = col
for (x=1; x<=max_nf; x++) {
@@ -53,10 +49,7 @@ NF != 0 {
# correlation
r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y])
- if (r_den[x,y])
- r[x,y] = sum_delta_xy[x,y]/r_den[x,y]
- else
- r[x,y] = 1
+ (r_den[x,y]) ? r[x,y] = sum_delta_xy[x,y]/r_den[x,y] : r[x,y] = 1
ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x])
if (ab_den[x,y]) {