Parent Directory
|
Revision Log
The functionality of loadDB2 and create_tables2.sql are not handled by .../webwork2/bin/NPL-update. This reverts recent changes in create_tables2.sql so that it does things the old way, puts the old loadDB2 in loadDB2-deprecated, and loadDB2 just prints a help message about the changes.
1 #!/usr/bin/perl 2 3 #use strict; 4 use File::Find; 5 use File::Basename; 6 use Cwd; 7 use DBI; 8 9 if (! -e "create_tables2.sql") { 10 print "Cannot find file create_tables2.sql.\n"; 11 print "This needs to be run from the directory it was unpacked to,\n"; 12 print "in which case \"create_tables2.sql\" should be in this directory.\n"; 13 exit; 14 } 15 16 my $passwd; 17 18 if (@ARGV != 1) { 19 print "What is the root password for mysql? "; 20 $passwd = <STDIN>; 21 chomp($passwd); 22 } else { 23 $passwd = $ARGV[0]; 24 } 25 26 my $user = 'root'; 27 my $libraryRoot = Cwd::cwd(); 28 my $verbose =0; 29 my $cnt = 0; 30 my $cnt2 = 0; 31 32 $| = 1; # autoflush output 33 34 sub dbug { 35 my $msg = shift; 36 $cnt++; 37 print $msg if($verbose>1); 38 # if($cnt==5000) { print "\n"; $cnt=0;} 39 print "." if($verbose==1 and ($cnt % 100 == 0)); 40 } 41 42 system("mysql -uroot -p$passwd -B ProblemLibrary <./create_tables2.sql"); 43 print "Mysql database reinitialized.\n"; 44 45 my $mydb = "dbi:mysql:ProblemLibrary"; 46 47 # From pgfile 48 ## DBchapter('Limits and Derivatives') 49 ## DBsection('Calculating Limits using the Limit Laws') 50 ## Date('6/3/2002') 51 ## Author('Tangan Gao') 52 ## Institution('csulb') 53 ## TitleText1('Calculus Early Transcendentals') 54 ## EditionText1('4') 55 ## AuthorText1('Stewart') 56 ## Section1('2.3') 57 ## Problem1('7') 58 # 59 # The database structure is in the file create_tables2.sql and its ER-graph is 60 # the file wwdb_er_graph.pdf. 61 62 my ($name,$pgfile,$pgpath); 63 64 #recursive search for all pg files 65 my $dbh = DBI->connect('dbi:mysql:ProblemLibrary', $user, $passwd); 66 67 $dbh->{RaiseError} = 1; #bail if anything fails via die 68 69 #### First read in textbook information 70 71 if(open(IN, "Textbooks")) { 72 print "Reading in textbook data.\n"; 73 my %textinfo = ( TitleText => '', EditionText =>'', AuthorText=>''); 74 my $bookid = undef; 75 while (my $line = <IN>) { 76 $line =~ s|#*$||; 77 if($line =~ /^\s*(.*?)\s*>>>\s*(.*?)\s*$/) { # Should have chapter or section information 78 my $chapsec = $1; 79 my $title = $2; 80 if($chapsec=~ /(\d+)\.(\d+)/) { # We have a section 81 if(defined($bookid)) { 82 my $query = "SELECT chapter_id FROM chapter WHERE textbook_id = \"$bookid\" AND number = \"$1\""; 83 my $chapid = $dbh->selectrow_array($query); 84 if(defined($chapid)) { 85 $query = "SELECT section_id FROM section WHERE chapter_id = \"$chapid\" AND name = \"$title\""; 86 my $sectid = $dbh->selectrow_array($query); 87 if (!defined($sectid)) { 88 $dbh->do("INSERT INTO section 89 VALUES( 90 \"\", 91 \"$chapid\", 92 \"$2\", 93 \"$title\", 94 \"\" 95 )" 96 ); 97 dbug "INSERT INTO section VALUES(\"\", \"$chapid\", \"$2\", \"$title\", \"\" )\n"; 98 } 99 } else { 100 print "Cannot enter section $chapsec because textbook information is missing the chapter entry\n"; 101 } 102 } else { 103 print "Cannot enter section $chapsec because textbook information is incomplete\n"; 104 } 105 } else { # We have a chapter entry 106 if(defined($bookid)) { 107 my $query = "SELECT chapter_id FROM chapter WHERE textbook_id = \"$bookid\" AND name = \"$chapsec\""; 108 my $chapid = $dbh->selectrow_array($query); 109 if (!defined($chapid)) { 110 $dbh->do("INSERT INTO chapter 111 VALUES( 112 \"\", 113 \"$bookid\", 114 \"".$chapsec."\", 115 \"$title\", 116 \"\" 117 )" 118 ); 119 dbug "INSERT INTO chapter VALUES(\"\", \"$bookid\", \"".$chapsec."\", \"$title\", \"\" )\n"; 120 } 121 } else { 122 print "Cannot enter chapter $chapsec because textbook information is incomplete\n"; 123 } 124 } 125 } elsif($line =~ /^\s*(TitleText|EditionText|AuthorText)\(\s*'(.*?)'\s*\)/) { 126 # Textbook information, maybe new 127 my $type = $1; 128 if(defined($textinfo{$type})) { # signals new text 129 %textinfo = ( TitleText => undef, 130 EditionText =>undef, 131 AuthorText=> undef); 132 $textinfo{$type} = $2; 133 $bookid = undef; 134 } else { 135 $textinfo{$type} = $2; 136 if(defined($textinfo{TitleText}) and 137 defined($textinfo{AuthorText}) and 138 defined($textinfo{EditionText})) { 139 my $query = "SELECT textbook_id FROM textbook WHERE title = \"$textinfo{TitleText}\" AND edition = \"$textinfo{EditionText}\" AND author=\"$textinfo{AuthorText}\""; 140 $bookid = $dbh->selectrow_array($query); 141 if (!defined($bookid)) { 142 $dbh->do("INSERT INTO textbook 143 VALUES( 144 \"\", 145 \"$textinfo{TitleText}\", 146 \"$textinfo{EditionText}\", 147 \"$textinfo{AuthorText}\", 148 \"\", 149 \"\", 150 \"\" 151 )" 152 ); 153 dbug "INSERT INTO textbook VALUES( \"\", \"$textinfo{TitleText}\", \"$textinfo{EditionText}\", \"$textinfo{AuthorText}\", \"\", \"\", \"\" )\n"; 154 $bookid = $dbh->selectrow_array($query); 155 } 156 } 157 } 158 } 159 } 160 close(IN); 161 } else{ 162 print "Textbooks file was not found. Updating from cvs should fix this problem.\n"; 163 } 164 165 print "Converting data from tagged pgfiles into mysql.\n"; 166 print "Number of files processed:\n"; 167 168 #### Now search for tagged problems 169 170 find({ wanted => \&pgfiles, follow_fast=> 1}, $libraryRoot); 171 172 sub kwtidy { 173 my $s = shift; 174 $s =~ s/\W//g; 175 $s =~ s/_//g; 176 $s = lc($s); 177 return($s); 178 } 179 180 sub keywordcleaner { 181 my $string = shift; 182 my @spl1 = split /,/, $string; 183 my @spl2 = map(kwtidy($_), @spl1); 184 return(@spl2); 185 } 186 187 # Save on passing these values around 188 my %textinfo; 189 190 # Initialize, if needed more text-info information; 191 sub maybenewtext { 192 my $textno = shift; 193 return if defined($textinfo{$textno}); 194 # So, not defined yet 195 $textinfo{$textno} = { title => '', author =>'', edition =>'', 196 section => '', chapter =>'', problems => [] }; 197 } 198 199 # process each file returned by the find command. 200 sub pgfiles { 201 my $name = $File::Find::name; 202 my ($subject, $chapter, $section, $date, $institution, $author, $text); 203 my ($edition, $textauthor, $textsection, $textproblem, $tagged); 204 %textinfo=(); 205 my @textproblems = (-1); 206 if ($name =~ /pg$/) { 207 $pgfile = basename($name); 208 $pgpath = dirname($name); 209 $cnt2++; 210 printf("%6d", $cnt2) if(($cnt2 % 100) == 0); 211 print "\n" if(($cnt2 % 1000) == 0); 212 $pgpath =~ s|^$libraryRoot/||; 213 open(IN,"$name") or die "can not open $name: $!"; 214 $tagged = 0; 215 while (<IN>) { 216 SWITCH: { 217 if (/KEYWORDS\((.*)\)/i) { 218 @keyword = keywordcleaner($1); 219 last SWITCH; 220 } 221 if (/DBsubject\(\s*'(.*?)'\s*\)/) { 222 $subject = $1; 223 $subject =~ s/'/\'/g; 224 last SWITCH; 225 } 226 if (/DBchapter\(\s*'(.*?)'\s*\)/) { 227 $chapter = $1; 228 $chapter =~ s/'/\'/g; 229 $tagged = 1; 230 last SWITCH; 231 } 232 if (/DBsection\(\s*'(.*?)'\s*\)/) { 233 $section = $1; 234 $section =~ s/'/\'/g; 235 last SWITCH; 236 } 237 if (/Date\(\s*'(.*?)'\s*\)/) { 238 $date = $1; 239 $date =~ s/'/\'/g; 240 last SWITCH; 241 } 242 if (/Institution\(\s*'(.*?)'\s*\)/) { 243 $institution = $1; 244 $institution =~ s/'/\'/g; 245 last SWITCH; 246 } 247 if (/Author\(\s*'(.*?)'\s*\)/) { 248 $author = $1; 249 $author =~ s/'/\'/g; 250 last SWITCH; 251 } 252 if (/TitleText(\d+)\(\s*'(.*?)'\s*\)/) { 253 $textno = $1; 254 $text = $2; 255 $text =~ s/'/\'/g; 256 if ($text =~ /\S/) { 257 maybenewtext($textno); 258 $textinfo{$textno}->{title} = $text; 259 } 260 last SWITCH; 261 } 262 if (/EditionText(\d+)\(\s*'(.*?)'\s*\)/) { 263 $textno = $1; 264 $edition = $2; 265 $edition =~ s/'/\'/g; 266 if ($edition =~ /\S/) { 267 maybenewtext($textno); 268 $textinfo{$textno}->{edition} = $edition; 269 } 270 last SWITCH; 271 } 272 if (/AuthorText(\d+)\(\s*'(.*?)'\s*\)/) { 273 $textno = $1; 274 $textauthor = $2; 275 $textauthor =~ s/'/\'/g; 276 if ($textauthor =~ /\S/) { 277 maybenewtext($textno); 278 $textinfo{$textno}->{author} = $textauthor; 279 } 280 last SWITCH; 281 } 282 if (/Section(\d+)\(\s*'(.*?)'\s*\)/) { 283 $textno = $1; 284 $textsection = $2; 285 $textsection =~ s/'/\'/g; 286 if ($textsection =~ /\S/) { 287 maybenewtext($textno); 288 if ($textsection =~ /(\d*?)\.(\d*)/) { 289 $textinfo{$textno}->{chapter} = $1; 290 $textinfo{$textno}->{section} = $2; 291 } else { 292 $textinfo{$textno}->{chapter} = $textsection; 293 $textinfo{$textno}->{section} = ''; 294 # used to be: $chapnum = undef; #no valid number assigned 295 } 296 297 } 298 last SWITCH; 299 } 300 if (/Problem(\d+)\(\s*(.*?)\s*\)/) { 301 $textno = $1; 302 $textproblem = $2; 303 $textproblem =~ s/\D/ /g; 304 @textproblems = split /\s+/, $textproblem; 305 @textproblems = grep { $_ =~ /\S/ } @textproblems; 306 if (scalar(@textproblems) or defined($textinfo{$textno})) { 307 @textproblems = (-1) unless(scalar(@textproblems)); 308 maybenewtext($textno); 309 $textinfo{$textno}->{problems} = \@textproblems; 310 } 311 #print "$textproblem\n" if ($textproblem !~ /^[\s\d]+$/); 312 # $textproblem =~ s/'/\'/g; 313 last SWITCH; 314 } 315 } 316 } #end of SWITCH and while 317 if ($tagged and $chapter eq 'ZZZ-Inserted Text') { 318 $tagged=0; 319 } 320 if ($tagged) { 321 # 322 # kludge to fix the omission of a subject field 323 unless($subject) { 324 if ($text =~ /precalculus/i) { 325 $subject = "Precalculus"; 326 } elsif ($text =~ /calculus/i) { 327 $subject = "Calculus"; 328 } elsif ($text =~ /linear/i) { 329 $subject = "Linear Algebra"; 330 } elsif ($text =~ /algebra/i) { 331 $subject = "Algebra"; 332 } elsif ($text =~ /statistic/i) { 333 $subject = "Statistics"; 334 } elsif ($text =~ /financial/i) { 335 $subject = "Financial Mathematics"; 336 } else { 337 $subject = "Misc"; 338 } 339 } 340 # From the pgfile we just looked at, 341 ## DBchapter('Limits and Derivatives') in $chapter 342 ## DBsection('Calculating Limits using the Limit Laws') in $section 343 ## Date('6/3/2002') in $date 344 ## Author('Tangan Gao') in $author 345 ## Institution('csulb') in $institution 346 ## TitleText1('Calculus Early Transcendentals') in $text 347 ## EditionText1('4') in $edition 348 ## AuthorText1('Stewart') in $textauthor 349 ## Section1('2.3') in $textsection 350 ## Problem1('7') in $textproblem 351 # 352 # The database structure is in the file create_tables2.sql and its ER-graph 353 # is the file wwdb_er_graph.pdf. Insert, in order, into the tables 354 # in that file. 355 # 356 357 #selectrow_array returns first field of first row in scalar context or undef 358 # undef for failure also, $dbh->{RaiseError} = 1 should catch that case. 359 # 360 # DBsubject table 361 # 362 $query = "SELECT DBsubject_id FROM DBsubject WHERE name = \"$subject\""; 363 my $DBsubject_id = $dbh->selectrow_array($query); 364 if (!defined($DBsubject_id)) { 365 $dbh->do( 366 "INSERT INTO DBsubject 367 VALUES( 368 \"\", 369 \"$subject\" 370 )" 371 ); 372 dbug "INSERT INTO DBsubject VALUES(\"\",\"$subject\")\n"; 373 $DBsubject_id = $dbh->selectrow_array($query); 374 } 375 376 # DBchapter table 377 # 378 $query = "SELECT DBchapter_id FROM DBchapter WHERE name = \"$chapter\" and DBsubject_id = $DBsubject_id"; 379 my $DBchapter_id = $dbh->selectrow_array($query); 380 if (!defined($DBchapter_id)) { 381 $dbh->do("INSERT INTO DBchapter 382 VALUES( 383 \"\", 384 \"$chapter\", 385 \"$DBsubject_id\" 386 )" 387 ); 388 dbug "INSERT INTO DBchapter VALUES( \"\", \"$chapter\", \"$DBsubject_id\)\n"; 389 $DBchapter_id = $dbh->selectrow_array($query); 390 } 391 392 # DBsection table 393 # 394 $query = "SELECT DBsection_id FROM DBsection WHERE name = \"$section\" AND DBchapter_id = $DBchapter_id"; 395 my $DBsection_id = $dbh->selectrow_array($query); 396 if (!defined($DBsection_id)) { 397 $dbh->do("INSERT INTO DBsection 398 VALUES( 399 \"\", 400 \"$section\", 401 \"$DBchapter_id\" 402 )" 403 ); 404 dbug "INSERT INTO DBsection VALUES( \"\", \"$section\", \"$DBchapter_id\", \"$DBsubject_id\" )\n"; 405 $DBsection_id = $dbh->selectrow_array($query); 406 } 407 408 # institution table 409 # 410 $query = "SELECT institution_id FROM institution WHERE name = \"$institution\""; 411 my $institution_id = $dbh->selectrow_array($query); 412 if (!defined($institution_id)) { 413 $dbh->do("INSERT INTO institution 414 VALUES( 415 \"\", 416 \"$institution\", 417 \"\", 418 \"\", 419 \"\", 420 \"\", 421 \"\", 422 \"\" 423 )" 424 ); 425 dbug "INSERT INTO institution VALUES( \"\", \"$institution\", \"\", \"\", \"\", \"\", \"\", \"\")\n"; 426 $institution_id = $dbh->selectrow_array($query); 427 } 428 429 # author table 430 # 431 $author =~ /(.*?)\s(\w+)\s*$/; 432 my $firstname = $1; 433 my $lastname = $2; 434 #remove leading and trailing spaces from firstname, which includes any middle name too. 435 $firstname =~ s/^\s*//; 436 $firstname =~ s/\s*$//; 437 $query = "SELECT author_id FROM author WHERE lastname = \"$lastname\" AND firstname=\"$firstname\""; 438 my $author_id = $dbh->selectrow_array($query); 439 if (!defined($author_id)) { 440 $dbh->do("INSERT INTO author 441 VALUES( 442 \"\", 443 \"$institution_id\", 444 \"$lastname\", 445 \"$firstname\", 446 \"\" 447 )" 448 ); 449 dbug "INSERT INTO author VALUES( \"\", \"$institution_id\", \"$lastname\", \"$firstname\", \"\" )\n"; 450 $author_id = $dbh->selectrow_array($query); 451 } 452 453 # path table 454 # 455 $query = "SELECT path_id FROM path WHERE path = \"$pgpath\""; 456 my $path_id = $dbh->selectrow_array($query); 457 if (!defined($path_id)) { 458 $dbh->do("INSERT INTO path 459 VALUES( 460 \"\", 461 \"$pgpath\", 462 \"\", 463 \"\" 464 )" 465 ); 466 dbug "INSERT INTO path VALUES( \"\", \"$path\", \"\", \"\" )\n"; 467 $path_id = $dbh->selectrow_array($query); 468 } 469 470 # pgfile table 471 # 472 my $pgfile_id; 473 $dbh->do("INSERT INTO pgfile 474 VALUES( 475 \"\", 476 \"$DBsection_id\", 477 \"$author_id\", 478 \"$institution_id\", 479 \"$path_id\", 480 \"$pgfile\", 481 \"\" 482 )" 483 ); 484 dbug "INSERT INTO pgfile VALUES( \"\", \"$DBsection_id\", \"$author_id\", \"$institution_id\", \"$path_id\", \"$pgfile\", \"\" )\n"; 485 $query = "SELECT pgfile_id FROM pgfile WHERE filename = \"$pgfile\" and path_id=$path_id"; 486 $pgfile_id = $dbh->selectrow_array($query); 487 488 # keyword table, and problem_keyword many-many table 489 # 490 foreach my $keyword (@keyword) { 491 $keyword =~ s/[\'\"]//g; 492 $query = "SELECT keyword_id FROM keyword WHERE keyword = \"$keyword\""; 493 my $keyword_id = $dbh->selectrow_array($query); 494 if (!defined($keyword_id)) { 495 $dbh->do("INSERT INTO keyword 496 VALUES( 497 \"\", 498 \"$keyword\" 499 )" 500 ); 501 dbug "INSERT INTO keyword VALUES( \"\", \"$keyword\")\n"; 502 $keyword_id = $dbh->selectrow_array($query); 503 } 504 505 $query = "SELECT pgfile_id FROM pgfile_keyword WHERE keyword_id = \"$keyword_id\" and pgfile_id=\"$pgfile_id\""; 506 my $ok = $dbh->selectrow_array($query); 507 if (!defined($ok)) { 508 $dbh->do("INSERT INTO pgfile_keyword 509 VALUES( 510 \"$pgfile_id\", 511 \"$keyword_id\" 512 )" 513 ); 514 dbug "INSERT INTO pgfile_keyword VALUES( \"$pgfile_id\", \"$keyword_id\" )\n"; 515 } 516 } #end foreach keyword 517 518 # pgfile_institution table 519 # 520 $query = "SELECT institution_id FROM pgfile_institution WHERE pgfile_id = \"$pgfile_id\""; 521 my $pg_inst_id = $dbh->selectrow_array($query); 522 if (!defined($pg_inst_id)) { 523 $dbh->do("INSERT INTO pgfile_institution 524 VALUES( 525 \"$pgfile_id\", 526 \"$institution_id\" 527 )" 528 ); 529 dbug "INSERT INTO pgfile_institution VALUES( \"$pgfile_id\", \"$institution_id\" )\n"; 530 $pg_inst_id = $dbh->selectrow_array($query); 531 } 532 533 # Textbook section 534 # problem table contains textbook problems 535 # 536 for my $textno (keys %textinfo) { 537 my $texthashref = $textinfo{$textno}; 538 539 # textbook table 540 # 541 $text = $texthashref->{title}; 542 $edition = $texthashref->{edition}; 543 $textauthor = $texthashref->{author}; 544 my $chapnum = $texthashref->{chapter}; 545 my $secnum = $texthashref->{section}; 546 $query = "SELECT textbook_id FROM textbook WHERE title = \"$text\" AND edition = \"$edition\" AND author=\"$textauthor\""; 547 my $textbook_id = $dbh->selectrow_array($query); 548 if (!defined($textbook_id)) { 549 $dbh->do("INSERT INTO textbook 550 VALUES( 551 \"\", 552 \"$text\", 553 \"$edition\", 554 \"$textauthor\", 555 \"\", 556 \"\", 557 \"\" 558 )" 559 ); 560 dbug "INSERT INTO textbook VALUES( \"\", \"$text\", \"$edition\", \"$textauthor\", \"\", \"\", \"\" )\n"; 561 print "Late add into textbook \"$text\", \"$edition\", \"$textauthor\"\n"; 562 $textbook_id = $dbh->selectrow_array($query); 563 } 564 565 # chapter weak table of textbook 566 # 567 $query = "SELECT chapter_id FROM chapter WHERE textbook_id = \"$textbook_id\" AND name = \"$chapter\""; 568 my $chapter_id = $dbh->selectrow_array($query); 569 if (!defined($chapter_id)) { 570 $dbh->do("INSERT INTO chapter 571 VALUES( 572 \"\", 573 \"$textbook_id\", 574 \"".$chapnum."\", 575 \"$chapter\", 576 \"\" 577 )" 578 ); 579 dbug "INSERT INTO chapter VALUES(\"\", \"$textbook_id\", \"".$chapnum."\", \"$chapter\", \"\" )\n"; 580 $chapter_id = $dbh->selectrow_array($query); 581 } 582 583 # section weak table of textbook 584 # 585 $query = "SELECT section_id FROM section WHERE chapter_id = \"$chapter_id\" AND name = \"$section\""; 586 my $section_id = $dbh->selectrow_array($query); 587 if (!defined($section_id)) { 588 $dbh->do("INSERT INTO section 589 VALUES( 590 \"\", 591 \"$chapter_id\", 592 \"$secnum\", 593 \"$section\", 594 \"\" 595 )" 596 ); 597 dbug "INSERT INTO section VALUES(\"\", \"$textbook_id\", \"$secnum\", \"$section\", \"\" )\n"; 598 $section_id = $dbh->selectrow_array($query); 599 } 600 601 @textproblems = @{$texthashref->{problems}}; 602 for my $tp (@textproblems) { 603 $query = "SELECT problem_id FROM problem WHERE section_id = \"$section_id\" AND number = \"$tp\""; 604 my $problem_id = $dbh->selectrow_array($query); 605 if (!defined($problem_id)) { 606 $dbh->do("INSERT INTO problem 607 VALUES( 608 \"\", 609 \"$section_id\", 610 \"$tp\", 611 \"\" 612 )" 613 ); 614 dbug "INSERT INTO problem VALUES( \"\", \"$section_id\", \"$tp\", \"\" )\n"; 615 $problem_id = $dbh->selectrow_array($query); 616 } 617 618 # pgfile_problem table associates pgfiles with textbook problems 619 # 620 $query = "SELECT problem_id FROM pgfile_problem WHERE problem_id = \"$problem_id\" AND pgfile_id = \"$pgfile_id\""; 621 my $pg_problem_id = $dbh->selectrow_array($query); 622 if (!defined($pg_problem_id)) { 623 $dbh->do("INSERT INTO pgfile_problem 624 VALUES( 625 \"$pgfile_id\", 626 \"$problem_id\" 627 )" 628 ); 629 dbug "INSERT INTO pgfile_problem VALUES( \"$pgfile_id\", \"$problem_id\" )\n"; 630 } 631 } 632 633 } 634 #reset tag vars, they may not match the next file 635 $date =""; $textauthor=""; $textsection=""; 636 $chapter=""; $section=""; 637 } 638 close(IN) or die "can not close: $!"; 639 } 640 } 641 642 643 $dbh->disconnect; 644 645 print "\nDone.\n"; 646
| aubreyja at gmail dot com | ViewVC Help |
| Powered by ViewVC 1.0.9 |