Article 5571 of comp.lang.perl: Xref: feenix.metronet.com comp.lang.perl:5571 Newsgroups: comp.lang.perl Path: feenix.metronet.com!news.utdallas.edu!corpgate!bnrgate!bnr.co.uk!pipex!uunet!spool.mu.edu!sol.ctr.columbia.edu!destroyer!nntp.cs.ubc.ca!uw-beaver!fluke!dbc From: dbc@tc.fluke.COM (Dan Carson) Subject: Re: Perl Statistical scripts Message-ID: Organization: Fluke Corporation, Everett, WA References: <1993Aug27.152525.19641@softwords.bc.ca> <1993Aug30.152853.114491@embl-heidelberg.de> Date: Wed, 1 Sep 1993 18:06:58 GMT Lines: 149 In article <1993Aug27.152525.19641@softwords.bc.ca>, mark@softwords.bc.ca (Mark Perrin) writes: > I'm a newbie to PERL and since I haven't been able to find the FAQ > I have to ask: does anyone have some basic statistical scripts for > mean, min, max, regression analysis? Here's a weighted least squares polynomial curve fit in perl. I'd appreciate hints to make it go faster. Dan Carson Analog Guy ----------------------------------------------------------------------------- #!/usr/local/perl # lsq [-wc][-o order] infile Calculates least-squares fit coefficients. # options: -o order Specifies order of fit (default is 1). # -w Specifies weighted fit. # -c Calculates fit & error for each input value. # Takes input file of the form: x y # Output to stdout. # 7/18/91 dbc Added command line switches. require 'getopts.pl'; &Getopts("o:wc"); # -o3 = 3rd order, -w = weighted, -c = calculate error $order = $opt_o ? $opt_o : 1; # Get order from command line. # Read the input file. while(<>) { s/#.*//; # Toss comments. s/^[, \t]+//; # Toss leading garbage. ($x, $y, $w) = split(/[, \t\n]+/, $_); # Read data line. next if $x eq ""; # Skip lines with no data. if($opt_c) { push(@x,$x); # Save data for error calculation push(@y,$y); } # Save Sum(X**n) and Sum(Y * X**n) $xn = $opt_w ? $w : 1; # Weight data point if desired for($j = 0; $j <= $order; $j++, $xn *= $x) { $s_xn[$j] += $xn; $s_yxn[$j] += $xn * $y; } for(; $j <= 2 * $order; $j++, $xn *= $x) { $s_xn[$j] += $xn; } } # Load the matrix. for($i = 0; $i <= $order; $i++) { for($j = 0; $j <= $order; $j++) { $matrix{$i, $j} = $s_xn[$i + $j]; } } @coefficient = &solve_matrix_eq(*matrix, *s_yxn); for($i = 0; $i <= $order; $i++) { printf "A%d = %.6e\n",$i,$coefficient[$i]; } if($opt_c) { printf "\n%12s %12s %12s %12s\n","x","y","y fit","fit error"; for($i = 0; $i <= $#x; $i++) { $y1 = &calc($x[$i],*coefficient); printf "%12.2g %12.2g %12.2g %12.2g\n", $x[$i],$y[$i],$y1,$y1-$y[$i]; } } exit 0; sub solve_matrix_eq # pass *matrix and *vector { # returns @x solution of [matrix]*[@x]=[vector] local(*a,*b) = @_; local(%mat) = %a; local($size) = $#b + 1; local($i,$j,$k,$f); @mat{grep($_ .= "$;$size", (0 .. $#b))} = @b; # Augment the matrix for($i = 0; $i < $#b; $i++) { for($j = $i + 1; $j <= $#b; $j++) { $f = $mat{$i, $i} / $mat{$j, $i}; for($k = 0; $k <= $size; $k++) { $mat{$j, $k} = $mat{$j, $k} * $f - $mat{$i, $k}; } } } for($i = $#b; $i > 0; $i--) { for($j = $i - 1; $j >= 0; $j--) { $f = $mat{$i, $i} / $mat{$j, $i}; for($k = $j; $k <= $size; $k++) { $mat{$j, $k} = $mat{$j, $k} * $f - $mat{$i, $k}; } } } for($i = 0; $i <= $#b; $i++) # Normalize the diagonal { $mat{$i, $size} /= $mat{$i, $i}; $mat{$i, $i} = 1.0; } @mat{grep($_ .= "$;$size", (0 .. $#b))}; # Answer is in augmented column } sub calc # Pass $x and *a (array of coefficients) { # Returns Sum($a[$i] * $x**$i) local($x, *a) = @_; local($y,$xn); $xn = 1; foreach(@a) { $y += $_ * $xn; $xn *= $x; } $y; } ------------------------------------------------------------------- -- Dan Carson dbc@tc.fluke.COM Fluke Corporation Everett, WA .