## WeBWorK Problems

### Re: estimating equation of a line

by Paul Pearson -
Hi Bruce,

http://webwork.maa.org/wiki/FormulaTestPoints

Statisticians would probably prefer an approach that uses a linear equation that best fits the data using a least squares method, and then compares the variance of the best fit line to the variance of the student answer. (I'm not a statistician, so anyone please correct me if I'm using incorrect terminology or a faulty method.) Such an approach is given below. If your data sets are large, let me know and I'll email you a version that uses perl functions (instead of MathObject functions) and should be much faster.

Best Regards,

Paul Pearson

##################
# Initialization

DOCUMENT();

"PGstandard.pl",
"MathObjects.pl",
);

TEXT(beginproblem());

##################
# Setup

Context("Numeric");

@x = (1,2,3,4);
@y = (3,6,7,9);

sub least_squares_linear_fit {

my $x = @{shift}; my$y = @{shift};

# From equation 10 at http://mathworld.wolfram.com/LeastSquaresFitting.html
#
# The best fit line b + m x for n data points (x_i,y_i) satisfies the matrix equation
#
# [b] = [ n Sum x_i ]^{-1} [ Sum y_i ]
# [m] [ Sum x_i Sum x_i^2 ] [ Sum x_i*y_i ]

my $n = scalar(@x); my$n1 = $n - 1; my$sumx = 0; foreach my $i (0..$n1) { $sumx +=$x[$i]; } my$sumxx = 0; foreach my $i (0..$n1) { $sumxx += ($x[$i])**2; } # The vector on the right my$sumy = 0; foreach my $i (0..$n1) { $sumy +=$y[$i]; } my$sumxy = 0; foreach my $i (0..$n1) { $sumxy += ($x[$i])*($y[$i]); } my$det = ($n)*($sumxx)-($sumx)**2; my$b = 1/$det * ($sumxx * $sumy -($sumx) * $sumxy); my$m = 1/$det * (-($sumx) * $sumy +$n * $sumxy); # correlation coefficient from http://mathbits.com/mathbits/tisection/statistics2/correlation.htm my$sumyy = 0; foreach my $i (0..$n1) { $sumyy += ($y[$i])*($y[$i]); } my$correlation = ($n * ($sumxy) -($sumx *$sumy) ) / ( sqrt($n *$sumxx -($sumx)**2) * sqrt($n * $sumyy -($sumy)**2) );

# from equation 32 at http://mathworld.wolfram.com/LeastSquaresFitting.html
my @e = (); foreach my $i (0..$n-1) { $e[$i] = $y[$i] -($b +$m * $x[$i]); } # error
my $sumee = 0; foreach my$i (0..$n-1) {$sumee += ($e[$i])**2; }
my $variance = sqrt($sumee / ($n-2)); return ($m,$b,$correlation,$variance); } @fit = least_squares_linear_fit( \@x, \@y);$f = Formula("$fit[0] * x +$fit[1]")->reduce;

$cor = Real("$fit[2]");
$var = Real("$fit[3]");

#########################
# Main text

Context()->texStrings;
BEGIN_TEXT
The least squares best linear fit for
$$(x[0],y[0])$$,
$$(x[1],y[1])$$,
$$(x[2],y[2])$$,
$$(x[3],y[3])$$
is $$y = f$$ with correlation $$cor$$
and variance $$var$$.
$PAR Enter your own linear approximation.$BR
y = \{ ans_rule(20) \}
END_TEXT
Context()->normalStrings;

#########################

$showPartialCorrectAnswers = 1; ANS($f->cmp( checker=>sub {
my ( $correct,$student, $ansHash ) = @_; my @ystu = (); foreach my$i (0..$#x) {$ystu[$i] =$student->eval(x=>$x[$i]);
}

my $n = scalar(@x); my$n1 = $n - 1; # student error: array of height differences between data point and student's line my @estu = (); foreach my$i (0..$n1) {$estu[$i] =$y[$i] -($student->eval(x=>$x[$i])); }
# sum of the squares of the student error
my $sumeestu = 0; foreach my$i (0..$n1) {$sumeestu += ($estu[$i])**2; }
# a variance calculation for the student's answer
my $varstu = sqrt($sumeestu / ($n-2)); # compare the variance$var of the best fit line to the variance of the student's answer $varstu if ( abs($var-\$varstu) < 0.1 ) { return 1; } else { return 0; }

})
);

ENDDOCUMENT();