## DESCRIPTION
## Statistics
## ENDDESCRIPTION

## KEYWORDS('Statistics','Distribution')
## Tagged by
## DBsubject('Statistics')
## DBchapter('Chi-squared')
## DBsection('Contingency Table')
## Date('April 2012')
## Author('Hedley Pinsent')
## Institution('CNA')
## TitleText1('')
## EditionText1('')
## AuthorText1('')
## Section1('')
## Problem1('')

#
# First comes some stuff that appears at the beginning of every problem
#

DOCUMENT();        # This should be the first executable line in the problem.

"PGbasicmacros.pl",
"PGchoicemacros.pl",
"unionMacros.pl",
"unionTables.pl",
"PGstatisticsmacros.pl",
);

TEXT($BEGIN_ONE_COLUMN,beginproblem()) ;

# vvvvvvvvvvvvvvvvv Defining Categories
while ($nRows +$nColumns < 5){
$nRows = list_random(2,3,4);$nColumns = list_random(2,3,4)}
@colourList = ("Red","Green","Yellow","Blue","Orange", "Purple");
@colaList = ("Inca","Pepsi","Coca-Cola","Mountain Dew","Lime","Frosty");

@colaList = @colaList[NchooseK(6,$nRows)]; @colourList = @colourList[NchooseK(6,$nColumns)];
# ^^^^^^^^^^^^^^^^^ Defining Categories

## Constructing Differences vvvvvvvvvvvvvvvvvvvvvv
foreach $irow (0..$nRows -1){
foreach $icol (0..$nColumns -1){
$D[$irow*$nColumns +$icol] = 0;# Differences = zeros
}
}
#adjust below to get balance between reject and do not reject
$tries = random(1,5,1); foreach$i (0..$tries){$diff = list_random(1,2,3,4,5, -5,-4,-3,-2,-1);

@colPair = NchooseK($nColumns, 2); @rowPair = NchooseK($nRows, 2);
$D[$rowPair[0]*$nColumns +$colPair[0]] += $diff;$D[$rowPair[0]*$nColumns + $colPair[1]] -=$diff;
$D[$rowPair[1]*$nColumns +$colPair[1]] += $diff;$D[$rowPair[1]*$nColumns + $colPair[0]] -=$diff;
}
## Constructing Differences ^^^^^^^^^^^^^^^^^^^^^^

# Assigning Values (Expected/Observed) vvvvvvvvvvvvvvvvvvvvvvv
$base = random (1,3,1); foreach$irow (0..$nRows -1){$rowFact[$irow]= list_random(4,5,6,7,8,9)} foreach$icol (0..$nColumns -1){$colFact[$icol]= list_random(4,5,6,7,8,9)} foreach$irow (0..$nRows -1){ foreach$icol (0..$nColumns -1){$E[$irow*$nColumns + $icol] =$base*$rowFact[$irow]*$colFact[$icol];# Expected

$O[$irow*$nColumns +$icol] = $E[$irow*$nColumns +$icol]+$D[$irow*$nColumns +$icol];# Observed
}
}
# Assigning Values (Expected/Observed)^^^^^^^^^^^^^^^^^^^^^^
##Calculating chi-squared vvvvvvvvvvvvvvvvvvvvvvvvv
$CALC = 0; foreach$k (0.. $nRows *$nColumns -1){$CALC =$CALC + ($D[$k]**2) / $E[$k]}
##Calculating chi-squared ^^^^^^^^^^^^^^^^^^^^^^^^

##Constructing the data table vvvvvvvvvvvvvvvvvvvvvvvvvvvvv
$datatable = BeginTable(spacing => 3);$datatable .= AlignedRow(["Cola-Colour",@colourList]);
foreach $i (0..$nRows-1) {
foreach $j (0..$nColumns -1) {

@row [$j] =$O[$i *$nColumns + $j]; }$datatable .= AlignedRow([$colaList[$i],@row])
}
$datatable .= EndTable(); ##Constructing the data table ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ BEGIN_TEXT Follows is a contingency table for colour preference and cola preference.$BR
For example $O[0 *$nColumns + 0] people in the survey preferred the colour $colourList[0] and their favourite beverage was$colaList[0] $PAR$datatable

END_TEXT

##Constructing the answer table vvvvvvvvvvvvvvvvvvvvvvvvvvvvv

$answertable = BeginTable(spacing => 3);$answertable .= AlignedRow(["Cola-Colour",@colourList, "Totals"]);
foreach $i (0..$nRows-1) {
$rowTotal = 0; foreach$j (0.. $nColumns -1) {$name = $i . "and".$j;

@answerRule[$j] = NAMED_ANS_RULE($name,6);
push @namedCmp , $name=>num_cmp($E[$i *$nColumns + $j] ) ;$rowTotal = $rowTotal +$E[$i *$nColumns + $j] ;$colTotal[$j] =$colTotal[$j]+$E[$i *$nColumns + $j]; }$name = "rowTotal". $i; @answerRule[$nColumns] = NAMED_ANS_RULE($name,6); push @namedCmp ,$name=>num_cmp( $rowTotal ) ;$answertable .= AlignedRow([$colaList[$i],@answerRule]);

}
# Last row
foreach $k (0..$nColumns-1 ){
$name = "colTotal".$k;
@answerRule[$k] = NAMED_ANS_RULE($name,6);
push @namedCmp , $name=>num_cmp($colTotal[$k] ) ; }$grandTotal = 0;
foreach $k (0..$nColumns - 1){$grandTotal +=$colTotal[$k]}$name = "grand";
$answerRule[$nColumns] = NAMED_ANS_RULE($name,6); push @namedCmp ,$name=>num_cmp( $grandTotal ) ;$answertable .= AlignedRow(["Totals",@answerRule]);
$answertable .= EndTable(); ##Constructing the answer table ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ BEGIN_TEXT For the above table calculate the row totals, column totals grand total, and the expected values.$BR Enter these values in the appropriate spaces below.$PAR$answertable

END_TEXT

## Setting up significance test vvvvvvvvvvvvvvvvvvvvvvvvvvv
$alpha = list_random(0.01,0.05);$df = ($nColumns -1)*($nRows - 1);
$CRIT = chisqrdistr ($df,$alpha); ##Setting up significance test ^^^^^^^^^^^^^^^^^^^^^^^^^^^$mcnull = new_multiple_choice();
$mcnull->qa( "", "There is no relationship" );$mcnull->extra(
"There is a relationship"
);

$mcalt = new_multiple_choice();$mcalt->qa(
"",
"There is a relationship"
);
$mcalt->extra( "There is no relationship" ); ##SETTING UP THE TABLE @r1 = ("$$H_0$$",$mcnull->print_a()) ;
@r2 = ("$$H_1$$",$mcalt->print_a());$mcTestStatistic = new_multiple_choice();
$mcTestStatistic->qa( "The test statistic is", "$$\chi^2$$" );$mcTestStatistic->extra(
"$$z$$",
"$$t$$","$$F$$",
);

# Test Statistics choices

$mctestEQ = new_multiple_choice();$mctestEQ->qa(
"", "$$\gt$$"
);
$mctestEQ->extra("$$\lt or \gt$$". " -/+", "$$\lt$$" ); @r3 = ("We reject if: ",$mcTestStatistic->print_a() ,$mctestEQ->print_a(),NAMED_ANS_RULE("CRIT",6));$mcAR = new_multiple_choice();

## this is a two-tailed test

if( abs($CALC) >$CRIT ){$mcAR->qa( "We", "reject" );$mcAR->extra(
"do not reject"
);}
else{$mcAR->qa( "We", "do not reject" );$mcAR->extra(
"reject"
);}

BEGIN_TEXT
$HR At a$alpha significance level test whether there is a relationship between favourite colour and farourite cola.
$PAR$PAR

\{begintable(4)\}

\{row( @r1)\}
\{row( @r2)\}

\{endtable()\}

$PAR$PAR
$$\alpha$$ = \{NAMED_ANS_RULE("alpha",6)\} $PERCENT$BR

$BR \{begintable(4)\} \{row( @r3)\} \{endtable()\}$PAR
The value of the test statistic is \{NAMED_ANS_RULE("Ftest",6)\} $PAR \{$mcAR->print_q()\}
\{$mcAR->print_a()\} END_TEXT ##contingency table check vvvvvvvvvvvvvvvvvv NAMED_ANS(@namedCmp); ##contingency table check ^^^^^^^^^^^^^^^^^^ #CHECKING FIRST ROW H0 ANS( radio_cmp($mcnull->correct_ans() ) );

#CHECKING SECOND ROW H1
ANS( radio_cmp( $mcalt->correct_ans() ) ); #checking alpha in percent NAMED_ANS(alpha=>num_cmp( 100 *$alpha, mode=>"arith", reltol=>.01));

#CHECKING THIRD ROW
ANS( radio_cmp( $mcTestStatistic->correct_ans() ) ); ANS( radio_cmp($mctestEQ->correct_ans() ) );
NAMED_ANS(CRIT=>num_cmp( $CRIT, mode=>"arith", reltol=>2)); #CHECKING FORTH ROW NAMED_ANS(Ftest=>num_cmp($CALC, mode=>"arith", reltol=>2));

ANS( radio_cmp( $mcAR->correct_ans() ) ); TEXT($END_ONE_COLUMN);

ENDDOCUMENT(); # This should be the last executable line in the problem.