news.utdallas.edu!wupost!uunet!caen!uvaarpa!mmdf Tue Dec 15 10:08:50 CST 1992 Article: 29 of comp.lang.perl Xref: feenix.metronet.com comp.lang.perl:29 Newsgroups: comp.lang.perl Path: feenix.metronet.com!news.utdallas.edu!wupost!uunet!caen!uvaarpa!mmdf From: Dov Grobgeld #Subject: Re: Statistic routines ? Message-ID: <1992Dec14.075602.5410@uvaarpa.Virginia.EDU> Sender: mmdf@uvaarpa.Virginia.EDU (Mail System) Reply-To: dov@menora.weizmann.ac.il Organization: The Internet Date: Mon, 14 Dec 1992 07:56:02 GMT Lines: 270 For all it's worth. Here are a couple of routines that I have written that I find very useful in my data manipulations. <> #/usr/local/bin/perl ############################################################################# # histbin # # version 0.1 # # Bin data for a histogram. # # Dov Grobgeld # Department of Chemical Physics # The Weizmann Institute of Science # Rehovot, Israel # Email: dov@menora.weizmann.ac.il # # 1992-06-16 ############################################################################# # Get options while (($_ = $ARGV[0]) =~ /^-/) { shift; if (/-nBins$/i) { $nBins= $ARGV[0]; shift; } elsif (/-intv$/i) { $binMin= $ARGV[0]; shift; $binMax= $ARGV[0]; shift; } elsif (/-n(orm)?/i) { $normalize++; } elsif (/-h(elp)?$|-u(sage)?$/i) { print< where is a text file containing a column of values to bin. Options: -nBins n Number of bins. Default is n=100. -intv min max Interval for binning. Default is min=0 and max=1. -norm Normalize the histogram. EOH ; exit; } } # Set default values $binMin=0.0 unless defined $binMin; $binMax=1.0 unless defined $binMax; $nBins=100 unless defined $nBins; # Construct index array... for $i (0..$nBins-1) { $xBins[$i]= $binMin + (1.0 * $i / $nBins) * ($binMax - $binMin); } # Clear the bin array for $i (0..$nBins-1) { $binCount[$i]=0; } # Read the data file while (<>) { next if (/^#/); # Skip comments ($x=$_) =~ s/^\s+//; $x=~ s/\n//; next unless $x=~ /^\d+(\.\d*)?([de][\+\-]?\d+)?$/o; if (++$numEntries % 100 == 0) {printf STDERR ("%5d\r",$numEntries); } $binIdx= int(1.0 * ($x-$binMin) * $nBins / ($binMax - $binMin)); $binCount[$binIdx]++ unless $binIdx < 0; if ($binIdx >= $nBins || $binIdx < 0) { printf STDERR "Warning! Value $x outside of binning range\n"; } } # print the data for $i (0..$nBins-1) { if ($normalize) {$binCount[$i] = $binCount[$i]/$numEntries; } printf ("%12.8f %15.8g\n",$xBins[$i], $binCount[$i]); } print STDERR "Done binning $numEntries values!\n"; <> #/usr/local/bin/perl ########################################################################## # histrebin # # version 0.1 # # Reads a file of bin values and frequency count and rebins it by # a user defined factor. Optionally normalizes the histogram. # # Dov Grobgeld # Department of Chemical Physics # The Weizmann Institute of Science # Rehovot, Israel # Email: dov@menora.weizmann.ac.il # # 1992-06-16 ########################################################################## # Get options while (($_ = $ARGV[0]) =~ /^-/) { shift; if (/-n(orm)?$/) { $normalize++; } elsif (/-s(trip)?$/) { $stripComment++; } elsif (/-h(elp)?$|-u(sage)$/) { print < where is the file to rebin by a a factor of . will default to 1, if left out. histrebin will sum the values of each bins and create one new bin with the accumulated sum. Options: -n will cause the histogram to be normalized. -s will strip comments. Examples: 1. Rebin a histogram by a factor of 4. This would e.g. create 25 bins instead of 100. histrebin binned100.dat 4 > binned25.dat 2. Sum all the 100 bins in the file binned100.dat histrebin binned100.dat 100 3. Normalize the histogram in binned100.dat histrebin -norm binned100.dat > binned100norm.dat EOHELP ; exit; } else { die "Unknown option $_!\n"; } } $infile=shift; open (IN, $infile) || die "No such file $infile!\n"; ($factor=shift) || ($factor=1); if ($factor==0) { die "Invalid factor!\n"; } # If normalize then go through the whole file and sum the histogram... $totalsum=0.0; if ($normalize) { while () { next if (/^#/); if (/^\s*(\S+)\s+(\S+)/) { $totalsum += $2; } } close(IN); open(IN, $infile); } $bunchindex=0; # Number of bins summed into the bunch $bunchsum=0; # Sum of all entries in the bin $binlow=0; # Lower limit of the bin while () { if (/^#/) { print unless ($stripComment); } elsif (/^\s*(\S+)\s+(\S+)/) { # Assume naively that these are numbers... $bunchindex++; $binlow = 0.0 + $1 if ($bunchindex == 1); # Convert to floating point $bunchsum += $2; if ($bunchindex == $factor) { $bunchsum= 1.0 * $bunchsum/$totalsum if $normalize; printf("%8.5f %10.8g\n", $binlow, $bunchsum); $bunchsum=0; $bunchindex=0; } } } <> #/usr/local/bin/perl ############################################################################## # cutcol # # Version 0.1 # # Cut columns out of a data file. Great as a prefilter for xgraph. # # Dov Grobgeld # Email: dov@menora.weizmann.ac.il # 1992-06-16 ############################################################################### while (($_= $ARGV[0]) =~ /^-/) { shift; if (/-c$/) { # Other columns than default ones $col1= $ARGV[0]; shift; $col2= $ARGV[0]; shift; } elsif (/-s$/) { $strip++; } elsif (/^-h(elp)?$|^-u(sage)?$/) { print <) { if (/^\#/) { print unless $strip; next; } s/^\s+//; # Strip leading spaces @columns= split; print "$columns[$col1] $columns[$col2]\n"; } -- ___ ___ / o \ o \ Dov Grobgeld ( o o ) o | The Weizmann Institute of Science, Israel \ o /o o / "Where the tree of wisdom carries oranges" | | | | _| |_ _| |_ .