summaryrefslogtreecommitdiff
path: root/lin_reg.awk
diff options
context:
space:
mode:
Diffstat (limited to 'lin_reg.awk')
-rw-r--r--lin_reg.awk34
1 files changed, 17 insertions, 17 deletions
diff --git a/lin_reg.awk b/lin_reg.awk
index 8622289..56114a2 100644
--- a/lin_reg.awk
+++ b/lin_reg.awk
@@ -4,7 +4,7 @@
# simple linear regression between columns
BEGIN {
- OFS = "%.18g"
+ OFMT = "%.18g"
sign = "[+-]?"
decimal = "[0-9]+[.]?[0-9]*"
fraction = "[.][0-9]*"
@@ -13,13 +13,11 @@ BEGIN {
}
NR == 1 {
- header_nf = NF
- for (n=1; n<=NF; n++) {
- ($n !~ number) ? header[n] = $n : header[n] = "col" n
- }
+ for (n=1; n<=NF; n++)
+ ($n ~ number) ? header[n] = "col" n : header[n] = $n
}
-NF != 0 {
+NF > 0 {
if (NF > nf_max)
nf_max = NF
@@ -31,15 +29,14 @@ NF != 0 {
count[y] += 1
sum[y] += $y
sum2[y] += $y*$y
- mean[y] = sum[y]/count[y]
-
- ### difference from the mean
- delta[y] = $y - mean[y]
- sum_delta[y] += delta[y]
- sum_delta2[y] += delta[y]*delta[y]
+ delta0[y] = $y - mean[y]
+ mean[y] = mean[y] + delta0[y]/count[y]
+ delta1[y] = $y - mean[y]
+ sum_delta[y] += delta1[y]
+ sum_delta2[y] += delta0[y]*delta1[y]
### sample variance
- (count[y] > 1) ? var[y] = sum_delta2[y]/(count[y] - 1) : var[y] = 0
+ (count[y] > 1) ? var[y] = sum_delta2[y]/(count[y] - 1) : var[y] = ""
# x = row, y = col
for (x=1; x<=nf_max; x++) {
@@ -47,9 +44,12 @@ NF != 0 {
sum_xy[x,y] += $x*$y
sum_delta_xy[x,y] += delta[x]*delta[y]
+ # covariance
+ #(count[x,y] > 1) ? cov[x,y] = sum_delta_xy[x,y]/(count[x,y] - 1) : cov[x,y] = ""
+
# correlation
r_den[x,y] = sqrt(sum_delta2[x]*sum_delta2[y])
- (r_den[x,y]) ? r[x,y] = sum_delta_xy[x,y]/r_den[x,y] : r[x,y] = 1
+ (r_den[x,y]) ? r[x,y] = sum_delta_xy[x,y]/r_den[x,y] : r[x,y] = 1
ab_den[x,y] = (count[x,y]*sum2[x] - sum[x]*sum[x])
if (ab_den[x,y]) {
@@ -84,9 +84,9 @@ END {
for (y=1; y<=nf_max; y++) {
for (x=1; x<=nf_max; x++) {
if (x != y && r[x,y]) {
- printf("\n %.18g \t (%s) \t = (%.18g +/- %.18g)(%s) \t + (%.18g +/- %.18g)",
- 10.0*log(r[x,y]*r[x,y])/log(10), header[y], b[x,y], b_err[x,y], header[x],
- a[x,y], a_err[x,y])
+ printf("%.9g "OFS" (%s) "OFS" = (%.9g +/- %.9g)(%s) "OFS" + (%.9g +/- %.9g)%s",
+ 10.0*log(r[x,y]*r[x,y])/log(10.0), header[y], b[x,y],
+ b_err[x,y], header[x], a[x,y], a_err[x,y], ORS)
}
}
}