#!/usr/local/bin/wermit +

; Linear regression using S-Expressions.
;
; Input:
;   A data file containing two columns of numbers.
; Output:
;   Means, minima, maxima, variances, standard deviations,
;   and linear correlation coefficient.
; Illustrates:
;   S-Expressions, floating-point arithmetic, file i/o, field extraction
; Requires:
;   C-Kermit 7.1 or K95 1.1.21 or later.
; Author:
;   F. da Cruz, Columbia University, Nov 2000
;
if < \v(argc) 2 exit 1 Usage: \%0 filename
if LLT \v(version) 701199 exit 1 Insufficient Kermit version

fopen /read \%c \fcontents(\%1)                ; Open the data file
if fail exit 1                                 ; Check

xecho Reading...

(setq xsum 0 ysum 0 xsum2 0 ysum2 0 xysum 0 n 0)
(setq t1 \v(ftime))

; Loop through elements and accumulate sums, maxima, and minima

while not \f_eof(\%c) {
    fread \%c line                              ; Read a line (record)
    if fail break                               ; Check for EOF
    if not def line continue                    ; Ignore empty lines
    .x := \fword(\m(line),1)                    ; Extract X and Y
    .y := \fword(\m(line),2)
    if ( not float \m(x) || not float \m(y) ) { ; Verify record
        exit 1 BAD DATA (line \m(n)): [\m(line)]
    }
    (if (== n 0) (setq xmin x ymin y xmax x ymax y)) ; Init max and mins
    (++ n)                                      ; Count record
    (setq xmax (max xmax x) ymax (max ymax y))  ; X and Y maxima
    (setq xmin (min xmin x) ymin (min ymin y))  ; X and Y minima
    (++ xsum x ysum y)                          ; X and Y sums
    (++ xsum2 (^ x 2) ysum2 (^ y 2))            ; Sum of X and Y squares
    (++ xysum (* x y))                          ; Sum of XY products
}
fclose \%c                                      ; Done reading - close file

; Calculate results

(setq xmean (/ xsum n) ymean (/ ysum n))        ; Mean X and Y
(setq xss (- xsum2 (/ (^ xsum 2) n)))           ; Intermediate values
(setq yss (- ysum2 (/ (^ ysum 2) n)))
(setq xvar (/ xss n) yvar (/ yss n))            ; X and Y variance
(setq sdx (sqrt xvar) sdy (sqrt yvar))          ; Std deviation in X and Y
(setq tmp (* xss yss))                          ; More intermediate values
(setq xyss (- xysum (/ (* xsum ysum) n)))
(setq cc (if tmp (/ xyss (sqrt tmp)) 1.0))      ; Correlation coefficient

; Display the results

(setq t2 (- \v(ftime) t1))
echo Done (\ffpround(t2,2) sec)
echo Points: \m(n)

define xxout {
  echo {\frpad(\%1:,16)\flpad(\ffprou(\%2,2),10)\%9\flpad(\ffprou(\%3,2),10)}
}
echo {                       X         Y}
xxout Miminum xmin ymin
xxout Maximum xmax ymax
xxout Mean  xmean ymean
xxout Variance xvar yvar
xxout {Std Deviation} sdx sdy
echo
echo  Correlation coefficient: \flpad(\ffpround(cc,2),11)
exit 0