[system] / trunk / pg / lib / Distributions.pm Repository:
ViewVC logotype

View of /trunk/pg/lib/Distributions.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1050 - (download) (as text) (annotate)
Fri Jun 6 21:39:42 2003 UTC (16 years, 8 months ago) by sh002i
File size: 11736 byte(s)
moved PG modules and macro files from webwork-modperl to pg
-sam

    1 #package Statistics::Distributions;
    2 package Distributions;
    3 
    4 # In order to use the functions from this module in your WebWork problems, you have to load PGstatisticsmacros.pl
    5 
    6 # The documentation at the end of this file is slightly modified for WebWork by Maria Voloshina, University of Rochester.
    7 
    8 use strict;
    9 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
   10 use constant PI => 3.1415926536;
   11 use constant SIGNIFICANT => 6; # number of significant digits to be returned
   12 
   13 require Exporter;
   14 
   15 @ISA = qw(Exporter AutoLoader);
   16 # Items to export into callers namespace by default. Note: do not export
   17 # names by default without a very good reason. Use EXPORT_OK instead.
   18 # Do not simply export all your public functions/methods/constants.
   19 @EXPORT_OK = qw(chisqrdistr tdistr fdistr udistr uprob chisqrprob tprob fprob);
   20 $VERSION = '0.07';
   21 
   22 # Preloaded methods go here.
   23 
   24 sub chisqrdistr { # Percentage points  X^2(x^2,n)
   25   my ($n, $p) = @_;
   26   if ($n <= 0 || abs($n) - abs(int($n)) != 0) {
   27     die "Invalid n: $n\n"; # degree of freedom
   28   }
   29   if ($p <= 0 || $p > 1) {
   30     die "Invalid p: $p\n";
   31   }
   32   return precision_string(_subchisqr($n, $p));
   33 }
   34 
   35 sub udistr { # Percentage points   N(0,1^2)
   36   my ($p) = (@_);
   37   if ($p > 1 || $p <= 0) {
   38     die "Invalid p: $p\n";
   39   }
   40   return precision_string(_subu($p));
   41 }
   42 
   43 sub tdistr { # Percentage points   t(x,n)
   44   my ($n, $p) = @_;
   45   if ($n <= 0 || abs($n) - abs(int($n)) != 0) {
   46     die "Invalid n: $n\n";
   47   }
   48   if ($p <= 0 || $p >= 1) {
   49     die "Invalid p: $p\n";
   50   }
   51   return precision_string(_subt($n, $p));
   52 }
   53 
   54 sub fdistr { # Percentage points  F(x,n1,n2)
   55   my ($n, $m, $p) = @_;
   56   if (($n<=0) || ((abs($n)-(abs(int($n))))!=0)) {
   57     die "Invalid n: $n\n"; # first degree of freedom
   58   }
   59   if (($m<=0) || ((abs($m)-(abs(int($m))))!=0)) {
   60     die "Invalid m: $m\n"; # second degree of freedom
   61   }
   62   if (($p<=0) || ($p>1)) {
   63     die "Invalid p: $p\n";
   64   }
   65   return precision_string(_subf($n, $m, $p));
   66 }
   67 
   68 sub uprob { # Upper probability   N(0,1^2)
   69   my ($x) = @_;
   70   return precision_string(_subuprob($x));
   71 }
   72 
   73 sub chisqrprob { # Upper probability   X^2(x^2,n)
   74   my ($n,$x) = @_;
   75   if (($n <= 0) || ((abs($n) - (abs(int($n)))) != 0)) {
   76     die "Invalid n: $n\n"; # degree of freedom
   77   }
   78   return precision_string(_subchisqrprob($n, $x));
   79 }
   80 
   81 sub tprob { # Upper probability   t(x,n)
   82   my ($n, $x) = @_;
   83   if (($n <= 0) || ((abs($n) - abs(int($n))) !=0)) {
   84     die "Invalid n: $n\n"; # degree of freedom
   85   }
   86   return precision_string(_subtprob($n, $x));
   87 }
   88 
   89 sub fprob { # Upper probability   F(x,n1,n2)
   90   my ($n, $m, $x) = @_;
   91   if (($n<=0) || ((abs($n)-(abs(int($n))))!=0)) {
   92     die "Invalid n: $n\n"; # first degree of freedom
   93   }
   94   if (($m<=0) || ((abs($m)-(abs(int($m))))!=0)) {
   95     die "Invalid m: $m\n"; # second degree of freedom
   96   }
   97   return precision_string(_subfprob($n, $m, $x));
   98 }
   99 
  100 
  101 sub _subfprob {
  102   my ($n, $m, $x) = @_;
  103   my $p;
  104 
  105   if ($x<=0) {
  106     $p=1;
  107   } elsif ($m % 2 == 0) {
  108     my $z = $m / ($m + $n * $x);
  109     my $a = 1;
  110     for (my $i = $m - 2; $i >= 2; $i -= 2) {
  111       $a = 1 + ($n + $i - 2) / $i * $z * $a;
  112     }
  113     $p = 1 - ((1 - $z) ** ($n / 2) * $a);
  114   } elsif ($n % 2 == 0) {
  115     my $z = $n * $x / ($m + $n * $x);
  116     my $a = 1;
  117     for (my $i = $n - 2; $i >= 2; $i -= 2) {
  118       $a = 1 + ($m + $i - 2) / $i * $z * $a;
  119     }
  120     $p = (1 - $z) ** ($m / 2) * $a;
  121   } else {
  122     my $y = atan2(sqrt($n * $x / $m), 1);
  123     my $z = sin($y) ** 2;
  124     my $a = ($n == 1) ? 0 : 1;
  125     for (my $i = $n - 2; $i >= 3; $i -= 2) {
  126       $a = 1 + ($m + $i - 2) / $i * $z * $a;
  127     }
  128     my $b = PI;
  129     for (my $i = 2; $i <= $m - 1; $i += 2) {
  130       $b *= ($i - 1) / $i;
  131     }
  132     my $p1 = 2 / $b * sin($y) * cos($y) ** $m * $a;
  133 
  134     $z = cos($y) ** 2;
  135     $a = ($m == 1) ? 0 : 1;
  136     for (my $i = $m-2; $i >= 3; $i -= 2) {
  137       $a = 1 + ($i - 1) / $i * $z * $a;
  138     }
  139     $p = max(0, $p1 + 1 - 2 * $y / PI
  140       - 2 / PI * sin($y) * cos($y) * $a);
  141   }
  142   return $p;
  143 }
  144 
  145 
  146 sub _subchisqrprob {
  147   my ($n,$x) = @_;
  148   my $p;
  149 
  150   if ($x <= 0) {
  151     $p = 1;
  152   } elsif ($n > 100) {
  153     $p = _subuprob((($x / $n) ** (1/3)
  154         - (1 - 2/9/$n)) / sqrt(2/9/$n));
  155   } elsif ($x > 400) {
  156     $p = 0;
  157   } else {
  158     my ($a, $i, $i1);
  159     if (($n % 2) != 0) {
  160       $p = 2 * _subuprob(sqrt($x));
  161       $a = sqrt(2/PI) * exp(-$x/2) / sqrt($x);
  162       $i1 = 1;
  163     } else {
  164       $p = $a = exp(-$x/2);
  165       $i1 = 2;
  166     }
  167 
  168     for ($i = $i1; $i <= ($n-2); $i += 2) {
  169       $a *= $x / $i;
  170       $p += $a;
  171     }
  172   }
  173   return $p;
  174 }
  175 
  176 sub _subu {
  177   my ($p) = @_;
  178   my $y = -log(4 * $p * (1 - $p));
  179   my $x = sqrt(
  180     $y * (1.570796288
  181       + $y * (.03706987906
  182         + $y * (-.8364353589E-3
  183         + $y *(-.2250947176E-3
  184           + $y * (.6841218299E-5
  185           + $y * (0.5824238515E-5
  186           + $y * (-.104527497E-5
  187             + $y * (.8360937017E-7
  188             + $y * (-.3231081277E-8
  189               + $y * (.3657763036E-10
  190               + $y *.6936233982E-12)))))))))));
  191   $x = -$x if ($p>.5);
  192   return $x;
  193 }
  194 
  195 sub _subuprob {
  196   my ($x) = @_;
  197   my $p = 0; # if ($absx > 100)
  198   my $absx = abs($x);
  199 
  200   if ($absx < 1.9) {
  201     $p = (1 +
  202       $absx * (.049867347
  203         + $absx * (.0211410061
  204           + $absx * (.0032776263
  205           + $absx * (.0000380036
  206           + $absx * (.0000488906
  207             + $absx * .000005383)))))) ** -16/2;
  208   } elsif ($absx <= 100) {
  209     for (my $i = 18; $i >= 1; $i--) {
  210       $p = $i / ($absx + $p);
  211     }
  212     $p = exp(-.5 * $absx * $absx)
  213       / sqrt(2 * PI) / ($absx + $p);
  214   }
  215 
  216   $p = 1 - $p if ($x<0);
  217   return $p;
  218 }
  219 
  220 
  221 sub _subt {
  222   my ($n, $p) = @_;
  223 
  224   if ($p >= 1 || $p <= 0) {
  225     die "Invalid p: $p\n";
  226   }
  227 
  228   if ($p == 0.5) {
  229     return 0;
  230   } elsif ($p < 0.5) {
  231     return - _subt($n, 1 - $p);
  232   }
  233 
  234   my $u = _subu($p);
  235   my $u2 = $u ** 2;
  236 
  237   my $a = ($u2 + 1) / 4;
  238   my $b = ((5 * $u2 + 16) * $u2 + 3) / 96;
  239   my $c = (((3 * $u2 + 19) * $u2 + 17) * $u2 - 15) / 384;
  240   my $d = ((((79 * $u2 + 776) * $u2 + 1482) * $u2 - 1920) * $u2 - 945)
  241         / 92160;
  242   my $e = (((((27 * $u2 + 339) * $u2 + 930) * $u2 - 1782) * $u2 - 765) * $u2
  243       + 17955) / 368640;
  244 
  245   my $x = $u * (1 + ($a + ($b + ($c + ($d + $e / $n) / $n) / $n) / $n) / $n);
  246 
  247   if ($n <= log10($p) ** 2 + 3) {
  248     my $round;
  249     do {
  250       my $p1 = _subtprob($n, $x);
  251       my $n1 = $n + 1;
  252       my $delta = ($p1 - $p)
  253         / exp(($n1 * log($n1 / ($n + $x * $x))
  254           + log($n/$n1/2/PI) - 1
  255           + (1/$n1 - 1/$n) / 6) / 2);
  256       $x += $delta;
  257       $round = sprintf("%.".abs(int(log10(abs $x)-4))."f",$delta);
  258     } while (($x) && ($round != 0));
  259   }
  260   return $x;
  261 }
  262 
  263 sub _subtprob {
  264   my ($n, $x) = @_;
  265 
  266   my ($a,$b);
  267   my $w = atan2($x / sqrt($n), 1);
  268   my $z = cos($w) ** 2;
  269   my $y = 1;
  270 
  271   for (my $i = $n-2; $i >= 2; $i -= 2) {
  272     $y = 1 + ($i-1) / $i * $z * $y;
  273   }
  274 
  275   if ($n % 2 == 0) {
  276     $a = sin($w)/2;
  277     $b = .5;
  278   } else {
  279     $a = ($n == 1) ? 0 : sin($w)*cos($w)/PI;
  280     $b= .5 + $w/PI;
  281   }
  282   return max(0, 1 - $b - $a * $y);
  283 }
  284 
  285 sub _subf {
  286   my ($n, $m, $p) = @_;
  287   my $x;
  288 
  289   if ($p >= 1 || $p <= 0) {
  290     die "Invalid p: $p\n";
  291   }
  292 
  293   if ($p == 1) {
  294     $x = 0;
  295   } elsif ($m == 1) {
  296     $x = 1 / (_subt($n, 0.5 - $p / 2) ** 2);
  297   } elsif ($n == 1) {
  298     $x = _subt($m, $p/2) ** 2;
  299   } elsif ($m == 2) {
  300     my $u = _subchisqr($m, 1 - $p);
  301     my $a = $m - 2;
  302     $x = 1 / ($u / $m * (1 +
  303       (($u - $a) / 2 +
  304         (((4 * $u - 11 * $a) * $u + $a * (7 * $m - 10)) / 24 +
  305           (((2 * $u - 10 * $a) * $u + $a * (17 * $m - 26)) * $u
  306             - $a * $a * (9 * $m - 6)
  307           )/48/$n
  308         )/$n
  309       )/$n));
  310   } elsif ($n > $m) {
  311     $x = 1 / _subf2($m, $n, 1 - $p)
  312   } else {
  313     $x = _subf2($n, $m, $p)
  314   }
  315   return $x;
  316 }
  317 
  318 sub _subf2 {
  319   my ($n, $m, $p) = @_;
  320   my $u = _subchisqr($n, $p);
  321   my $n2 = $n - 2;
  322   my $x = $u / $n *
  323     (1 +
  324       (($u - $n2) / 2 +
  325         (((4 * $u - 11 * $n2) * $u + $n2 * (7 * $n - 10)) / 24 +
  326           (((2 * $u - 10 * $n2) * $u + $n2 * (17 * $n - 26)) * $u
  327             - $n2 * $n2 * (9 * $n - 6)) / 48 / $m) / $m) / $m);
  328   my $delta;
  329   do {
  330     my $z = exp(
  331       (($n+$m) * log(($n+$m) / ($n * $x + $m))
  332         + ($n - 2) * log($x)
  333         + log($n * $m / ($n+$m))
  334         - log(4 * PI)
  335         - (1/$n  + 1/$m - 1/($n+$m))/6
  336       )/2);
  337     $delta = (_subfprob($n, $m, $x) - $p) / $z;
  338     $x += $delta;
  339   } while (abs($delta)>3e-4);
  340   return $x;
  341 }
  342 
  343 sub _subchisqr {
  344   my ($n, $p) = @_;
  345   my $x;
  346 
  347   if (($p > 1) || ($p <= 0)) {
  348     die "Invalid p: $p\n";
  349   } elsif ($p == 1){
  350     $x = 0;
  351   } elsif ($n == 1) {
  352     $x = _subu($p / 2) ** 2;
  353   } elsif ($n == 2) {
  354     $x = -2 * log($p);
  355   } else {
  356     my $u = _subu($p);
  357     my $u2 = $u * $u;
  358 
  359     $x = max(0, $n + sqrt(2 * $n) * $u
  360       + 2/3 * ($u2 - 1)
  361       + $u * ($u2 - 7) / 9 / sqrt(2 * $n)
  362       - 2/405 / $n * ($u2 * (3 *$u2 + 7) - 16));
  363 
  364     if ($n <= 100) {
  365       my ($x0, $p1, $z);
  366       do {
  367         $x0 = $x;
  368         if ($x < 0) {
  369           $p1 = 1;
  370         } elsif ($n>100) {
  371           $p1 = _subuprob((($x / $n)**(1/3) - (1 - 2/9/$n))
  372             / sqrt(2/9/$n));
  373         } elsif ($x>400) {
  374           $p1 = 0;
  375         } else {
  376           my ($i0, $a);
  377           if (($n % 2) != 0) {
  378             $p1 = 2 * _subuprob(sqrt($x));
  379             $a = sqrt(2/PI) * exp(-$x/2) / sqrt($x);
  380             $i0 = 1;
  381           } else {
  382             $p1 = $a = exp(-$x/2);
  383             $i0 = 2;
  384           }
  385 
  386           for (my $i = $i0; $i <= $n-2; $i += 2) {
  387             $a *= $x / $i;
  388             $p1 += $a;
  389           }
  390         }
  391         $z = exp((($n-1) * log($x/$n) - log(4*PI*$x)
  392           + $n - $x - 1/$n/6) / 2);
  393         $x += ($p1 - $p) / $z;
  394         $x = sprintf("%.5f", $x);
  395       } while (($n < 31) && (abs($x0 - $x) > 1e-4));
  396     }
  397   }
  398   return $x;
  399 }
  400 
  401 sub log10 {
  402   my $n = shift;
  403   return log($n) / log(10);
  404 }
  405 
  406 sub max {
  407   my $max = shift;
  408   my $next;
  409   while (@_) {
  410     $next = shift;
  411     $max = $next if ($next > $max);
  412   }
  413   return $max;
  414 }
  415 
  416 sub min {
  417   my $min = shift;
  418   my $next;
  419   while (@_) {
  420     $next = shift;
  421     $min = $next if ($next < $min);
  422   }
  423   return $min;
  424 }
  425 
  426 sub precision {
  427   my ($x) = @_;
  428   return abs int(log10(abs $x) - SIGNIFICANT);
  429 }
  430 
  431 sub precision_string {
  432   my ($x) = @_;
  433   if ($x) {
  434     return sprintf "%." . precision($x) . "f", $x;
  435   } else {
  436     return "0";
  437   }
  438 }
  439 
  440 
  441 # Autoload methods go after =cut, and are processed by the autosplit program.
  442 
  443 1;
  444 __END__
  445 
  446 # Below is the stub of documentation for your module.
  447 
  448 =head1 NAME
  449 
  450 Statistics::Distributions - Perl module for calculating probabilities and critical values of common statistical distributions
  451 
  452 =head1 SYNOPSIS
  453 
  454   use Statistics::Distributions;
  455 
  456   $uprob=uprob(-0.85);
  457   print "upper probability of the u distribution: Q(u) = 1-G(u) (u=1.43) = $uprob";
  458 
  459   $tprob=tprob(3, 6.251);
  460   print "upper probability of the t distribution: Q = 1-G (3 degrees of freedom  , t = 6.251) = $tprob";
  461 
  462   $chisprob=chisqrprob(3, 6.25);
  463   print "upper probability of the chi-square distribution: Q = 1-G (3 degrees of freedom, chi-squared = 6.25) = $chisprob";
  464 
  465   $fprob=fprob(3,5, .625);
  466   print "upper probability of the F distribution: Q = 1-G (3 degrees of freedom  in numerator, 5 degrees of freedom in denominator, F $
  467 
  468   $u=udistr(.05);
  469   print "u-crit (95th percentile = 0.05 level) = $u";
  470 
  471   $t=tdistr(1, .005);
  472   print "t-crit (1 degree of freedom, 99.5th percentile = 0.005 level) =$t";
  473 
  474   $chis=chisqrdistr(2, .05);
  475   print "Chi-squared-crit (2 degrees of freedom, 95th percentile = 0.05 level) = $chis";
  476 
  477   $f=fdistr(1,3, .01);
  478   print "F-crit (1 degree of freedom in numerator, 3 degrees of freedom in denominator, 99th percentile = 0.01 level) = $f";
  479 
  480 =head1 DESCRIPTION
  481 
  482 This Perl module calulates percentage points (6 significant digits) of the u (standard normal) distribution,
  483 the student's t distribution, the chi-square distribution and the F distribution.
  484 
  485 It can also calculate the upper probability (6 significant digits) of the u (standard normal),
  486 the chi-square, the t and the F distribution.
  487 
  488 These critical values are needed to perform statistical tests, like the u test, the t test, the chi-squared test,
  489 and the F test, and to calculate confidence intervals.
  490 
  491 If you are interested in more precise algorithms you could look at:
  492  StatLib: http://lib.stat.cmu.edu/apstat/
  493  Applied Statistics Algorithms by Griffiths, P. and Hill, I.D., Ellis Horwood: Chichester (1985)
  494 
  495 =head1 AUTHOR
  496 
  497 Michael Kospach, mike.perl@gmx.at
  498 Nice formating, simplification and bug repair by Matthias Trautner Kromann, mtk@id.cbs.dk
  499 
  500 =cut
  501 
  502 
  503 
  504 
  505 
  506 
  507 
  508 
  509 

aubreyja at gmail dot com
ViewVC Help
Powered by ViewVC 1.0.9