[system] / trunk / webwork / system / courseScripts / Distributions.pm Repository:
ViewVC logotype

View of /trunk/webwork/system/courseScripts/Distributions.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 56 - (download) (as text) (annotate)
Fri Jun 22 19:31:16 2001 UTC (18 years, 5 months ago) by maria
File size: 11718 byte(s)
initial report

    1 #package Statistics::Distributions;
    2 package Distributions;
    3 
    4 use strict;
    5 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
    6 use constant PI => 3.1415926536;
    7 use constant SIGNIFICANT => 5; # number of significant digits to be returned
    8 
    9 require Exporter;
   10 
   11 @ISA = qw(Exporter AutoLoader);
   12 # Items to export into callers namespace by default. Note: do not export
   13 # names by default without a very good reason. Use EXPORT_OK instead.
   14 # Do not simply export all your public functions/methods/constants.
   15 @EXPORT_OK = qw(chisqrdistr tdistr fdistr udistr uprob chisqrprob tprob fprob);
   16 $VERSION = '0.06';
   17 
   18 # Preloaded methods go here.
   19 
   20 sub chisqrdistr { # Percentage points  X^2(x^2,n)
   21   my ($n, $p) = @_;
   22   if ($n <= 0 || abs($n) - abs(int($n)) != 0) {
   23     die "Invalid n: $n\n"; # degree of freedom
   24   }
   25   if ($p <= 0 || $p > 1) {
   26     die "Invalid p: $p\n";
   27   }
   28   return precision_string(_subchisqr($n, $p));
   29 }
   30 
   31 sub udistr { # Percentage points   N(0,1^2)
   32   my ($p) = (@_);
   33   if ($p > 1 || $p <= 0) {
   34     die "Invalid p: $p\n";
   35   }
   36   return precision_string(_subu($p));
   37 }
   38 
   39 sub tdistr { # Percentage points   t(x,n)
   40   my ($n, $p) = @_;
   41   if ($n <= 0 || abs($n) - abs(int($n)) != 0) {
   42     die "Invalid n: $n\n";
   43   }
   44   if ($p <= 0 || $p >= 1) {
   45     die "Invalid p: $p\n";
   46   }
   47   return precision_string(_subt($n, $p));
   48 }
   49 
   50 sub fdistr { # Percentage points  F(x,n1,n2)
   51   my ($n, $m, $p) = @_;
   52   if (($n<=0) || ((abs($n)-(abs(int($n))))!=0)) {
   53     die "Invalid n: $n\n"; # first degree of freedom
   54   }
   55   if (($m<=0) || ((abs($m)-(abs(int($m))))!=0)) {
   56     die "Invalid m: $m\n"; # second degree of freedom
   57   }
   58   if (($p<=0) || ($p>1)) {
   59     die "Invalid p: $p\n";
   60   }
   61   return precision_string(_subf($n, $m, $p));
   62 }
   63 
   64 sub uprob { # Upper probability   N(0,1^2)
   65   my ($x) = @_;
   66   return precision_string(_subuprob($x));
   67 }
   68 
   69 sub chisqrprob { # Upper probability   X^2(x^2,n)
   70   my ($n,$x) = @_;
   71   if (($n <= 0) || ((abs($n) - (abs(int($n)))) != 0)) {
   72     die "Invalid n: $n\n"; # degree of freedom
   73   }
   74   return precision_string(_subchisqrprob($n, $x));
   75 }
   76 
   77 sub tprob { # Upper probability   t(x,n)
   78   my ($n, $x) = @_;
   79   if (($n <= 0) || ((abs($n) - abs(int($n))) !=0)) {
   80     die "Invalid n: $n\n"; # degree of freedom
   81   }
   82   return precision_string(_subtprob($n, $x));
   83 }
   84 
   85 sub fprob { # Upper probability   F(x,n1,n2)
   86   my ($n, $m, $x) = @_;
   87   if (($n<=0) || ((abs($n)-(abs(int($n))))!=0)) {
   88     die "Invalid n: $n\n"; # first degree of freedom
   89   }
   90   if (($m<=0) || ((abs($m)-(abs(int($m))))!=0)) {
   91     die "Invalid m: $m\n"; # second degree of freedom
   92   }
   93   return precision_string(_subfprob($n, $m, $x));
   94 }
   95 
   96 
   97 sub _subfprob {
   98   my ($n, $m, $x) = @_;
   99   my $p;
  100 
  101   if ($x<=0) {
  102     $p=1;
  103   } elsif ($m % 2 == 0) {
  104     my $z = $m / ($m + $n * $x);
  105     my $a = 1;
  106     for (my $i = $m - 2; $i >= 2; $i -= 2) {
  107       $a = 1 + ($n + $i - 2) / $i * $z * $a;
  108     }
  109     $p = 1 - ((1 - $z) ** ($n / 2) * $a);
  110   } elsif ($n % 2 == 0) {
  111     my $z = $n * $x / ($m + $n * $x);
  112     my $a = 1;
  113     for (my $i = $n - 2; $i >= 2; $i -= 2) {
  114       $a = 1 + ($m + $i - 2) / $i * $z * $a;
  115     }
  116     $p = (1 - $z) ** ($m / 2) * $a;
  117   } else {
  118     my $y = atan2(sqrt($n * $x / $m), 1);
  119     my $z = sin($y) ** 2;
  120     my $a = ($n == 1) ? 0 : 1;
  121     for (my $i = $n - 2; $i >= 3; $i -= 2) {
  122       $a = 1 + ($m + $i - 2) / $i * $z * $a;
  123     }
  124     my $b = PI;
  125     for (my $i = 2; $i <= $m - 1; $i += 2) {
  126       $b *= ($i - 1) / $i;
  127     }
  128     my $p1 = 2 / $b * sin($y) * cos($y) ** $m * $a;
  129 
  130     $z = cos($y) ** 2;
  131     $a = ($m == 1) ? 0 : 1;
  132     for (my $i = $m-2; $i >= 3; $i -= 2) {
  133       $a = 1 + ($i - 1) / $i * $z * $a;
  134     }
  135     $p = max(0, $p1 + 1 - 2 * $y / PI
  136       - 2 / PI * sin($y) * cos $y * $a);
  137   }
  138   return $p;
  139 }
  140 
  141 
  142 sub _subchisqrprob {
  143   my ($n,$x) = @_;
  144   my $p;
  145 
  146   if ($x <= 0) {
  147     $p = 1;
  148   } elsif ($n > 100) {
  149     $p = _subuprob((($x / $n) ** (1/3)
  150         - (1 - 2/9/$n)) / sqrt(2/9/$n));
  151   } elsif ($x > 400) {
  152     $p = 0;
  153   } else {
  154     my ($a, $i, $i1);
  155     if (($n % 2) != 0) {
  156       $p = 2 * _subuprob(sqrt($x));
  157       $a = sqrt(2/PI) * exp(-$x/2) / sqrt($x);
  158       $i1 = 1;
  159     } else {
  160       $p = $a = exp(-$x/2);
  161       $i1 = 2;
  162     }
  163 
  164     for ($i = $i1; $i <= ($n-2); $i += 2) {
  165       $a *= $x / $i;
  166       $p += $a;
  167     }
  168   }
  169   return $p;
  170 }
  171 
  172 sub _subu {
  173   my ($p) = @_;
  174   my $y = -log(4 * $p * (1 - $p));
  175   my $x = sqrt(
  176     $y * (1.570796288
  177       + $y * (.03706987906
  178         + $y * (-.8364353589E-3
  179         + $y *(-.2250947176E-3
  180           + $y * (.6841218299E-5
  181           + $y * (0.5824238515E-5
  182           + $y * (-.104527497E-5
  183             + $y * (.8360937017E-7
  184             + $y * (-.3231081277E-8
  185               + $y * (.3657763036E-10
  186               + $y *.6936233982E-12)))))))))));
  187   $x = -$x if ($p>.5);
  188   return $x;
  189 }
  190 
  191 sub _subuprob {
  192   my ($x) = @_;
  193   my $p = 0; # if ($absx > 100)
  194   my $absx = abs($x);
  195 
  196   if ($absx < 1.9) {
  197     $p = (1 +
  198       $absx * (.049867347
  199         + $absx * (.0211410061
  200           + $absx * (.0032776263
  201           + $absx * (.0000380036
  202           + $absx * (.0000488906
  203             + $absx * .000005383)))))) ** -16/2;
  204   } elsif ($absx <= 100) {
  205     for (my $i = 18; $i >= 1; $i--) {
  206       $p = $i / ($absx + $p);
  207     }
  208     $p = exp(-.5 * $absx * $absx)
  209       / sqrt(2 * PI) / ($absx + $p);
  210   }
  211 
  212   $p = 1 - $p if ($x<0);
  213   return $p;
  214 }
  215 
  216 
  217 sub _subt {
  218   my ($n, $p) = @_;
  219 
  220   if ($p >= 1 || $p <= 0) {
  221     die "Invalid p: $p\n";
  222   }
  223 
  224   if ($p == 0.5) {
  225     return 0;
  226   } elsif ($p < 0.5) {
  227     return - _subt($n, 1 - $p);
  228   }
  229 
  230   my $u = _subu($p);
  231   my $u2 = $u ** 2;
  232 
  233   my $a = ($u2 + 1) / 4;
  234   my $b = ((5 * $u2 + 16) * $u2 + 3) / 96;
  235   my $c = (((3 * $u2 + 19) * $u2 + 17) * $u2 - 15) / 384;
  236   my $d = ((((79 * $u2 + 776) * $u2 + 1482) * $u2 - 1920) * $u2 - 945)
  237         / 92160;
  238   my $e = (((((27 * $u2 + 339) * $u2 + 930) * $u2 - 1782) * $u2 - 765) * $u2
  239       + 17955) / 368640;
  240 
  241   my $x = $u * (1 + ($a + ($b + ($c + ($d + $e / $n) / $n) / $n) / $n) / $n);
  242 
  243   if ($n <= log10($p) ** 2 + 3) {
  244     my $round;
  245     do {
  246       my $p1 = _subtprob($n, $x);
  247       my $n1 = $n + 1;
  248       my $delta = ($p1 - $p)
  249         / exp(($n1 * log($n1 / ($n + $x * $x))
  250           + log($n/$n1/2/PI) - 1
  251           + (1/$n1 - 1/$n) / 6) / 2);
  252       $x += $delta;
  253       $round = sprintf("%.".abs(int(log10(abs $x)-4))."f",$delta);
  254     } while (($x) && ($round != 0));
  255   }
  256   return $x;
  257 }
  258 
  259 sub _subtprob {
  260   my ($n, $x) = @_;
  261 
  262   my ($a,$b);
  263   my $w = atan2($x / sqrt($n), 1);
  264   my $z = cos $w ** 2;
  265   my $y = 1;
  266 
  267   for (my $i = $n-2; $i >= 2; $i -= 2) {
  268     $y = 1 + ($i-1) / $i * $z * $y;
  269   }
  270 
  271   if ($n % 2 == 0) {
  272     $a = sin($w)/2;
  273     $b = .5;
  274   } else {
  275     $a = ($n == 1) ? 0 : sin($w)*cos($w)/PI;
  276     $b= .5 + $w/PI;
  277   }
  278   return max(0, 1 - $b - $a * $y);
  279 }
  280 
  281 sub _subf {
  282   my ($n, $m, $p) = @_;
  283   my $x;
  284 
  285   if ($p >= 1 || $p <= 0) {
  286     die "Invalid p: $p\n";
  287   }
  288 
  289   if ($p == 1) {
  290     $x = 0;
  291   } elsif ($m == 1) {
  292     $x = 1 / (_subt($n, 0.5 - $p / 2) ** 2);
  293   } elsif ($n == 1) {
  294     $x = _subt($m, $p/2) ** 2;
  295   } elsif ($m == 2) {
  296     my $u = _subchisqr($m, 1 - $p);
  297     my $a = $m - 2;
  298     $x = 1 / ($u / $m * (1 +
  299       (($u - $a) / 2 +
  300         (((4 * $u - 11 * $a) * $u + $a * (7 * $m - 10)) / 24 +
  301           (((2 * $u - 10 * $a) * $u + $a * (17 * $m - 26)) * $u
  302             - $a * $a * (9 * $m - 6)
  303           )/48/$n
  304         )/$n
  305       )/$n));
  306   } elsif ($n > $m) {
  307     $x = 1 / _subf2($m, $n, 1 - $p)
  308   } else {
  309     $x = _subf2($n, $m, $p)
  310   }
  311   return $x;
  312 }
  313 
  314 sub _subf2 {
  315   my ($n, $m, $p) = @_;
  316   my $u = _subchisqr($n, $p);
  317   my $n2 = $n - 2;
  318   my $x = $u / $n *
  319     (1 +
  320       (($u - $n2) / 2 +
  321         (((4 * $u - 11 * $n2) * $u + $n2 * (7 * $n - 10)) / 24 +
  322           (((2 * $u - 10 * $n2) * $u + $n2 * (17 * $n - 26)) * $u
  323             - $n2 * $n2 * (9 * $n - 6)) / 48 / $m) / $m) / $m);
  324   my $delta;
  325   do {
  326     my $z = exp(
  327       (($n+$m) * log(($n+$m) / ($n * $x + $m))
  328         + ($n - 2) * log($x)
  329         + log($n * $m / ($n+$m))
  330         - log(4 * PI)
  331         - (1/$n  + 1/$m - 1/($n+$m))/6
  332       )/2);
  333     $delta = (_subfprob($n, $m, $x) - $p) / $z;
  334     $x += $delta;
  335   } while (abs($delta)>3e-4);
  336   return $x;
  337 }
  338 
  339 sub _subchisqr {
  340   my ($n, $p) = @_;
  341   my $x;
  342 
  343   if (($p > 1) || ($p <= 0)) {
  344     die "Invalid p: $p\n";
  345   } elsif ($p == 1){
  346     $x = 0;
  347   } elsif ($n == 1) {
  348     $x = _subu($p / 2) ** 2;
  349   } elsif ($n == 2) {
  350     $x = -2 * log($p);
  351   } else {
  352     my $u = _subu($p);
  353     my $u2 = $u * $u;
  354 
  355     $x = max(0, $n + sqrt(2 * $n) * $u
  356       + 2/3 * ($u2 - 1)
  357       + $u * ($u2 - 7) / 9 / sqrt(2 * $n)
  358       - 2/405 / $n * ($u2 * (3 *$u2 + 7) - 16));
  359 
  360     if ($n <= 100) {
  361       my ($x0, $p1, $z);
  362       do {
  363         $x0 = $x;
  364         if ($x < 0) {
  365           $p1 = 1;
  366         } elsif ($n>100) {
  367           $p1 = _subuprob((($x / $n)**(1/3) - (1 - 2/9/$n))
  368             / sqrt(2/9/$n));
  369         } elsif ($x>400) {
  370           $p1 = 0;
  371         } else {
  372           my ($i0, $a);
  373           if (($n % 2) != 0) {
  374             $p1 = 2 * _subuprob(sqrt($x));
  375             $a = sqrt(2/PI) * exp(-$x/2) / sqrt($x);
  376             $i0 = 1;
  377           } else {
  378             $p1 = $a = exp(-$x/2);
  379             $i0 = 2;
  380           }
  381 
  382           for (my $i = $i0; $i <= $n-2; $i += 2) {
  383             $a *= $x / $i;
  384             $p1 += $a;
  385           }
  386         }
  387         $z = exp((($n-1) * log($x/$n) - log(4*PI*$x)
  388           + $n - $x - 1/$n/6) / 2);
  389         $x += ($p1 - $p) / $z;
  390         $x = sprintf("%.5f", $x);
  391       } while (($n < 31) && (abs($x0 - $x) > 1e-4));
  392     }
  393   }
  394   return $x;
  395 }
  396 
  397 sub log10 {
  398   my $n = shift;
  399   return log($n) / log(10);
  400 }
  401 
  402 sub max {
  403   my $max = shift;
  404   my $next;
  405   while (@_) {
  406     $next = shift;
  407     $max = $next if ($next > $max);
  408   }
  409   return $max;
  410 }
  411 
  412 sub min {
  413   my $min = shift;
  414   my $next;
  415   while (@_) {
  416     $next = shift;
  417     $min = $next if ($next < $min);
  418   }
  419   return $min;
  420 }
  421 
  422 sub precision {
  423   my ($x) = @_;
  424   return abs int(log10(abs $x) - SIGNIFICANT);
  425 }
  426 
  427 sub precision_string {
  428   my ($x) = @_;
  429   if ($x) {
  430     return sprintf "%." . precision($x) . "f", $x;
  431   } else {
  432     return "0";
  433   }
  434 }
  435 
  436 
  437 # Autoload methods go after =cut, and are processed by the autosplit program.
  438 
  439 1;
  440 __END__
  441 # Below is the stub of documentation for your module. You better edit it!
  442 
  443 =head1 NAME
  444 
  445 Statistics::Distributions - Perl module for calculating critical values of common statistical distributions
  446 
  447 =head1 SYNOPSIS
  448 
  449   use Statistics::Distributions;
  450 
  451   $chis=chisqrdistr (2,.05);
  452   print "Chi-squared-crit (2 degrees of freedom, 95th percentile = 0.05 level) = $chis\n";
  453   $u=udistr (.05);
  454   print "u-crit (95th percentile = 0.05 level) = $u\n";
  455   $t=tdistr (1,.005);
  456   print "t-crit (1 degree of freedom, 99.5th percentile = 0.005 level) =$t\n";
  457   $f=fdistr (1,3,.01);
  458   print "F-crit (1 degree of freedom in numerator, 3 degrees of freedom in denominator, 99th percentile = 0.01 level) = $f\n";
  459   $uprob=Statistics::Distributions::uprob (-0.85);
  460   print "upper probability of the u distribution: Q(u) = 1-G(u) (u=1.43) = $uprob\n";
  461   $chisprob=Statistics::Distributions::chisqrprob (3,6.25);
  462   print "upper probability of the chi-square distribution: Q = 1-G (3 degrees of freedom, chi-squared = 6.25) = $chisprob\n";
  463   $tprob=Statistics::Distributions::tprob (3,6.251);
  464   print "upper probability of the t distribution: Q = 1-G (3 degrees of freedom  , t = 6.251) = $tprob\n";
  465   $fprob=Statistics::Distributions::fprob (3,5,.625);
  466   print "upper probability of the F distribution: Q = 1-G (3 degrees of freedom  in numerator, 5 degrees of freedom in denominator, F = 6.25) = $fprob\n";
  467 
  468 =head1 DESCRIPTION
  469 
  470 This Perl module calulates percentage points (5 significant digits) of the u (standard normal) distribution, the student's t distribution, the chi-square distribution and the F distribution. It can also calculate the upper probability (5 significant digits) of the u (standard normal), the chi-square, the t and the F distribution.
  471 These critical values are needed to perform statistical tests, like the u test, the t test, the F test and the chi-squared test, and to calculate confidence intervals.
  472 If you are interested in more precise algorithms you could look at:
  473  StatLib: http://lib.stat.cmu.edu/apstat/
  474  Applied Statistics Algorithms by Griffiths, P. and Hill, I.D., Ellis Horwood: Chichester (1985)
  475 
  476 =head1 AUTHOR
  477 
  478 Michael Kospach, mike.perl@gmx.at
  479 Nice formating, simplification and bug repair by Matthias Trautner Kromann, mtk@id.cbs.dk
  480 
  481 =head1 SEE ALSO
  482 
  483 Statistics::ChiSquare, Statistics::Table::t, Statistics::Table::F, perl(1).
  484 
  485 =cut
  486 
  487 
  488 
  489 
  490 
  491 
  492 
  493 
  494 

aubreyja at gmail dot com
ViewVC Help
Powered by ViewVC 1.0.9