Parent Directory
|
Revision Log
Testing commit access.
1 #!/usr/bin/perl 2 3 #use strict; 4 use File::Find; 5 use File::Basename; 6 use Cwd; 7 use DBI; 8 9 if (! -e "create_tables2.sql") { 10 print "Cannot find file create_tables2.sql.\n"; 11 print "This needs to be run from the directory it was unpacked to,\n"; 12 print "in which case \"create_tables2.sql\" should be in this directory.\n"; 13 exit; 14 } 15 16 my $passwd; 17 18 # Innocuous change to check on commit 19 20 if (@ARGV != 1) { 21 print "What is the root password for mysql? "; 22 $passwd = <STDIN>; 23 chomp($passwd); 24 } else { 25 $passwd = $ARGV[0]; 26 } 27 28 my $user = 'root'; 29 my $libraryRoot = Cwd::cwd(); 30 my $verbose =0; 31 my $cnt = 0; 32 my $cnt2 = 0; 33 34 $| = 1; # autoflush output 35 36 sub dbug { 37 my $msg = shift; 38 $cnt++; 39 print $msg if($verbose>1); 40 # if($cnt==5000) { print "\n"; $cnt=0;} 41 print "." if($verbose==1 and ($cnt % 100 == 0)); 42 } 43 44 system("mysql -uroot -p$passwd -B ProblemLibrary <./create_tables2.sql"); 45 print "Mysql database reinitialized.\n"; 46 47 my $mydb = "dbi:mysql:ProblemLibrary"; 48 49 # From pgfile 50 ## DBchapter('Limits and Derivatives') 51 ## DBsection('Calculating Limits using the Limit Laws') 52 ## Date('6/3/2002') 53 ## Author('Tangan Gao') 54 ## Institution('csulb') 55 ## TitleText1('Calculus Early Transcendentals') 56 ## EditionText1('4') 57 ## AuthorText1('Stewart') 58 ## Section1('2.3') 59 ## Problem1('7') 60 # 61 # The database structure is in the file create_tables2.sql and its ER-graph is 62 # the file wwdb_er_graph.pdf. 63 64 my ($name,$pgfile,$pgpath); 65 66 #recursive search for all pg files 67 my $dbh = DBI->connect('dbi:mysql:ProblemLibrary', $user, $passwd); 68 69 $dbh->{RaiseError} = 1; #bail if anything fails via die 70 71 #### First read in textbook information 72 73 if(open(IN, "Textbooks")) { 74 print "Reading in textbook data.\n"; 75 my %textinfo = ( TitleText => '', EditionText =>'', AuthorText=>''); 76 my $bookid = undef; 77 while (my $line = <IN>) { 78 $line =~ s|#*$||; 79 if($line =~ /^\s*(.*?)\s*>>>\s*(.*?)\s*$/) { # Should have chapter or section information 80 my $chapsec = $1; 81 my $title = $2; 82 if($chapsec=~ /(\d+)\.(\d+)/) { # We have a section 83 if(defined($bookid)) { 84 my $query = "SELECT chapter_id FROM chapter WHERE textbook_id = \"$bookid\" AND number = \"$1\""; 85 my $chapid = $dbh->selectrow_array($query); 86 if(defined($chapid)) { 87 $query = "SELECT section_id FROM section WHERE chapter_id = \"$chapid\" AND name = \"$title\""; 88 my $sectid = $dbh->selectrow_array($query); 89 if (!defined($sectid)) { 90 $dbh->do("INSERT INTO section 91 VALUES( 92 \"\", 93 \"$chapid\", 94 \"$2\", 95 \"$title\", 96 \"\" 97 )" 98 ); 99 dbug "INSERT INTO section VALUES(\"\", \"$chapid\", \"$2\", \"$title\", \"\" )\n"; 100 } 101 } else { 102 print "Cannot enter section $chapsec because textbook information is missing the chapter entry\n"; 103 } 104 } else { 105 print "Cannot enter section $chapsec because textbook information is incomplete\n"; 106 } 107 } else { # We have a chapter entry 108 if(defined($bookid)) { 109 my $query = "SELECT chapter_id FROM chapter WHERE textbook_id = \"$bookid\" AND name = \"$chapsec\""; 110 my $chapid = $dbh->selectrow_array($query); 111 if (!defined($chapid)) { 112 $dbh->do("INSERT INTO chapter 113 VALUES( 114 \"\", 115 \"$bookid\", 116 \"".$chapsec."\", 117 \"$title\", 118 \"\" 119 )" 120 ); 121 dbug "INSERT INTO chapter VALUES(\"\", \"$bookid\", \"".$chapsec."\", \"$title\", \"\" )\n"; 122 } 123 } else { 124 print "Cannot enter chapter $chapsec because textbook information is incomplete\n"; 125 } 126 } 127 } elsif($line =~ /^\s*(TitleText|EditionText|AuthorText)\(\s*'(.*?)'\s*\)/) { 128 # Textbook information, maybe new 129 my $type = $1; 130 if(defined($textinfo{$type})) { # signals new text 131 %textinfo = ( TitleText => undef, 132 EditionText =>undef, 133 AuthorText=> undef); 134 $textinfo{$type} = $2; 135 $bookid = undef; 136 } else { 137 $textinfo{$type} = $2; 138 if(defined($textinfo{TitleText}) and 139 defined($textinfo{AuthorText}) and 140 defined($textinfo{EditionText})) { 141 my $query = "SELECT textbook_id FROM textbook WHERE title = \"$textinfo{TitleText}\" AND edition = \"$textinfo{EditionText}\" AND author=\"$textinfo{AuthorText}\""; 142 $bookid = $dbh->selectrow_array($query); 143 if (!defined($bookid)) { 144 $dbh->do("INSERT INTO textbook 145 VALUES( 146 \"\", 147 \"$textinfo{TitleText}\", 148 \"$textinfo{EditionText}\", 149 \"$textinfo{AuthorText}\", 150 \"\", 151 \"\", 152 \"\" 153 )" 154 ); 155 dbug "INSERT INTO textbook VALUES( \"\", \"$textinfo{TitleText}\", \"$textinfo{EditionText}\", \"$textinfo{AuthorText}\", \"\", \"\", \"\" )\n"; 156 $bookid = $dbh->selectrow_array($query); 157 } 158 } 159 } 160 } 161 } 162 close(IN); 163 } else{ 164 print "Textbooks file was not found. Updating from cvs should fix this problem.\n"; 165 } 166 167 print "Converting data from tagged pgfiles into mysql.\n"; 168 print "Number of files processed:\n"; 169 170 #### Now search for tagged problems 171 172 find({ wanted => \&pgfiles, follow_fast=> 1}, $libraryRoot); 173 174 sub kwtidy { 175 my $s = shift; 176 $s =~ s/\W//g; 177 $s =~ s/_//g; 178 $s = lc($s); 179 return($s); 180 } 181 182 sub keywordcleaner { 183 my $string = shift; 184 my @spl1 = split /,/, $string; 185 my @spl2 = map(kwtidy($_), @spl1); 186 return(@spl2); 187 } 188 189 # Save on passing these values around 190 my %textinfo; 191 192 # Initialize, if needed more text-info information; 193 sub maybenewtext { 194 my $textno = shift; 195 return if defined($textinfo{$textno}); 196 # So, not defined yet 197 $textinfo{$textno} = { title => '', author =>'', edition =>'', 198 section => '', chapter =>'', problems => [] }; 199 } 200 201 # process each file returned by the find command. 202 sub pgfiles { 203 my $name = $File::Find::name; 204 my ($subject, $chapter, $section, $date, $institution, $author, $text); 205 my ($edition, $textauthor, $textsection, $textproblem, $tagged); 206 %textinfo=(); 207 my @textproblems = (-1); 208 if ($name =~ /pg$/) { 209 $pgfile = basename($name); 210 $pgpath = dirname($name); 211 $cnt2++; 212 printf("%6d", $cnt2) if(($cnt2 % 100) == 0); 213 print "\n" if(($cnt2 % 1000) == 0); 214 $pgpath =~ s|^$libraryRoot/||; 215 open(IN,"$name") or die "can not open $name: $!"; 216 $tagged = 0; 217 while (<IN>) { 218 SWITCH: { 219 if (/KEYWORDS\((.*)\)/i) { 220 @keyword = keywordcleaner($1); 221 last SWITCH; 222 } 223 if (/DBsubject\(\s*'(.*?)'\s*\)/) { 224 $subject = $1; 225 $subject =~ s/'/\'/g; 226 last SWITCH; 227 } 228 if (/DBchapter\(\s*'(.*?)'\s*\)/) { 229 $chapter = $1; 230 $chapter =~ s/'/\'/g; 231 $tagged = 1; 232 last SWITCH; 233 } 234 if (/DBsection\(\s*'(.*?)'\s*\)/) { 235 $section = $1; 236 $section =~ s/'/\'/g; 237 last SWITCH; 238 } 239 if (/Date\(\s*'(.*?)'\s*\)/) { 240 $date = $1; 241 $date =~ s/'/\'/g; 242 last SWITCH; 243 } 244 if (/Institution\(\s*'(.*?)'\s*\)/) { 245 $institution = $1; 246 $institution =~ s/'/\'/g; 247 last SWITCH; 248 } 249 if (/Author\(\s*'(.*?)'\s*\)/) { 250 $author = $1; 251 $author =~ s/'/\'/g; 252 last SWITCH; 253 } 254 if (/TitleText(\d+)\(\s*'(.*?)'\s*\)/) { 255 $textno = $1; 256 $text = $2; 257 $text =~ s/'/\'/g; 258 if ($text =~ /\S/) { 259 maybenewtext($textno); 260 $textinfo{$textno}->{title} = $text; 261 } 262 last SWITCH; 263 } 264 if (/EditionText(\d+)\(\s*'(.*?)'\s*\)/) { 265 $textno = $1; 266 $edition = $2; 267 $edition =~ s/'/\'/g; 268 if ($edition =~ /\S/) { 269 maybenewtext($textno); 270 $textinfo{$textno}->{edition} = $edition; 271 } 272 last SWITCH; 273 } 274 if (/AuthorText(\d+)\(\s*'(.*?)'\s*\)/) { 275 $textno = $1; 276 $textauthor = $2; 277 $textauthor =~ s/'/\'/g; 278 if ($textauthor =~ /\S/) { 279 maybenewtext($textno); 280 $textinfo{$textno}->{author} = $textauthor; 281 } 282 last SWITCH; 283 } 284 if (/Section(\d+)\(\s*'(.*?)'\s*\)/) { 285 $textno = $1; 286 $textsection = $2; 287 $textsection =~ s/'/\'/g; 288 if ($textsection =~ /\S/) { 289 maybenewtext($textno); 290 if ($textsection =~ /(\d*?)\.(\d*)/) { 291 $textinfo{$textno}->{chapter} = $1; 292 $textinfo{$textno}->{section} = $2; 293 } else { 294 $textinfo{$textno}->{chapter} = $textsection; 295 $textinfo{$textno}->{section} = ''; 296 # used to be: $chapnum = undef; #no valid number assigned 297 } 298 299 } 300 last SWITCH; 301 } 302 if (/Problem(\d+)\(\s*(.*?)\s*\)/) { 303 $textno = $1; 304 $textproblem = $2; 305 $textproblem =~ s/\D/ /g; 306 @textproblems = split /\s+/, $textproblem; 307 @textproblems = grep { $_ =~ /\S/ } @textproblems; 308 if (scalar(@textproblems) or defined($textinfo{$textno})) { 309 @textproblems = (-1) unless(scalar(@textproblems)); 310 maybenewtext($textno); 311 $textinfo{$textno}->{problems} = \@textproblems; 312 } 313 #print "$textproblem\n" if ($textproblem !~ /^[\s\d]+$/); 314 # $textproblem =~ s/'/\'/g; 315 last SWITCH; 316 } 317 } 318 } #end of SWITCH and while 319 if ($tagged and $chapter eq 'ZZZ-Inserted Text') { 320 $tagged=0; 321 } 322 if ($tagged) { 323 # 324 # kludge to fix the omission of a subject field 325 unless($subject) { 326 if ($text =~ /precalculus/i) { 327 $subject = "Precalculus"; 328 } elsif ($text =~ /calculus/i) { 329 $subject = "Calculus"; 330 } elsif ($text =~ /linear/i) { 331 $subject = "Linear Algebra"; 332 } elsif ($text =~ /algebra/i) { 333 $subject = "Algebra"; 334 } elsif ($text =~ /statistic/i) { 335 $subject = "Statistics"; 336 } elsif ($text =~ /financial/i) { 337 $subject = "Financial Mathematics"; 338 } else { 339 $subject = "Misc"; 340 } 341 } 342 # From the pgfile we just looked at, 343 ## DBchapter('Limits and Derivatives') in $chapter 344 ## DBsection('Calculating Limits using the Limit Laws') in $section 345 ## Date('6/3/2002') in $date 346 ## Author('Tangan Gao') in $author 347 ## Institution('csulb') in $institution 348 ## TitleText1('Calculus Early Transcendentals') in $text 349 ## EditionText1('4') in $edition 350 ## AuthorText1('Stewart') in $textauthor 351 ## Section1('2.3') in $textsection 352 ## Problem1('7') in $textproblem 353 # 354 # The database structure is in the file create_tables2.sql and its ER-graph 355 # is the file wwdb_er_graph.pdf. Insert, in order, into the tables 356 # in that file. 357 # 358 359 #selectrow_array returns first field of first row in scalar context or undef 360 # undef for failure also, $dbh->{RaiseError} = 1 should catch that case. 361 # 362 # DBsubject table 363 # 364 $query = "SELECT DBsubject_id FROM DBsubject WHERE name = \"$subject\""; 365 my $DBsubject_id = $dbh->selectrow_array($query); 366 if (!defined($DBsubject_id)) { 367 $dbh->do( 368 "INSERT INTO DBsubject 369 VALUES( 370 \"\", 371 \"$subject\" 372 )" 373 ); 374 dbug "INSERT INTO DBsubject VALUES(\"\",\"$subject\")\n"; 375 $DBsubject_id = $dbh->selectrow_array($query); 376 } 377 378 # DBchapter table 379 # 380 $query = "SELECT DBchapter_id FROM DBchapter WHERE name = \"$chapter\" and DBsubject_id = $DBsubject_id"; 381 my $DBchapter_id = $dbh->selectrow_array($query); 382 if (!defined($DBchapter_id)) { 383 $dbh->do("INSERT INTO DBchapter 384 VALUES( 385 \"\", 386 \"$chapter\", 387 \"$DBsubject_id\" 388 )" 389 ); 390 dbug "INSERT INTO DBchapter VALUES( \"\", \"$chapter\", \"$DBsubject_id\)\n"; 391 $DBchapter_id = $dbh->selectrow_array($query); 392 } 393 394 # DBsection table 395 # 396 $query = "SELECT DBsection_id FROM DBsection WHERE name = \"$section\" AND DBchapter_id = $DBchapter_id"; 397 my $DBsection_id = $dbh->selectrow_array($query); 398 if (!defined($DBsection_id)) { 399 $dbh->do("INSERT INTO DBsection 400 VALUES( 401 \"\", 402 \"$section\", 403 \"$DBchapter_id\" 404 )" 405 ); 406 dbug "INSERT INTO DBsection VALUES( \"\", \"$section\", \"$DBchapter_id\", \"$DBsubject_id\" )\n"; 407 $DBsection_id = $dbh->selectrow_array($query); 408 } 409 410 # institution table 411 # 412 $query = "SELECT institution_id FROM institution WHERE name = \"$institution\""; 413 my $institution_id = $dbh->selectrow_array($query); 414 if (!defined($institution_id)) { 415 $dbh->do("INSERT INTO institution 416 VALUES( 417 \"\", 418 \"$institution\", 419 \"\", 420 \"\", 421 \"\", 422 \"\", 423 \"\", 424 \"\" 425 )" 426 ); 427 dbug "INSERT INTO institution VALUES( \"\", \"$institution\", \"\", \"\", \"\", \"\", \"\", \"\")\n"; 428 $institution_id = $dbh->selectrow_array($query); 429 } 430 431 # author table 432 # 433 $author =~ /(.*?)\s(\w+)\s*$/; 434 my $firstname = $1; 435 my $lastname = $2; 436 #remove leading and trailing spaces from firstname, which includes any middle name too. 437 $firstname =~ s/^\s*//; 438 $firstname =~ s/\s*$//; 439 $query = "SELECT author_id FROM author WHERE lastname = \"$lastname\" AND firstname=\"$firstname\""; 440 my $author_id = $dbh->selectrow_array($query); 441 if (!defined($author_id)) { 442 $dbh->do("INSERT INTO author 443 VALUES( 444 \"\", 445 \"$institution_id\", 446 \"$lastname\", 447 \"$firstname\", 448 \"\" 449 )" 450 ); 451 dbug "INSERT INTO author VALUES( \"\", \"$institution_id\", \"$lastname\", \"$firstname\", \"\" )\n"; 452 $author_id = $dbh->selectrow_array($query); 453 } 454 455 # path table 456 # 457 $query = "SELECT path_id FROM path WHERE path = \"$pgpath\""; 458 my $path_id = $dbh->selectrow_array($query); 459 if (!defined($path_id)) { 460 $dbh->do("INSERT INTO path 461 VALUES( 462 \"\", 463 \"$pgpath\", 464 \"\", 465 \"\" 466 )" 467 ); 468 dbug "INSERT INTO path VALUES( \"\", \"$path\", \"\", \"\" )\n"; 469 $path_id = $dbh->selectrow_array($query); 470 } 471 472 # pgfile table 473 # 474 my $pgfile_id; 475 $dbh->do("INSERT INTO pgfile 476 VALUES( 477 \"\", 478 \"$DBsection_id\", 479 \"$author_id\", 480 \"$institution_id\", 481 \"$path_id\", 482 \"$pgfile\", 483 \"\" 484 )" 485 ); 486 dbug "INSERT INTO pgfile VALUES( \"\", \"$DBsection_id\", \"$author_id\", \"$institution_id\", \"$path_id\", \"$pgfile\", \"\" )\n"; 487 $query = "SELECT pgfile_id FROM pgfile WHERE filename = \"$pgfile\" and path_id=$path_id"; 488 $pgfile_id = $dbh->selectrow_array($query); 489 490 # keyword table, and problem_keyword many-many table 491 # 492 foreach my $keyword (@keyword) { 493 $keyword =~ s/[\'\"]//g; 494 $query = "SELECT keyword_id FROM keyword WHERE keyword = \"$keyword\""; 495 my $keyword_id = $dbh->selectrow_array($query); 496 if (!defined($keyword_id)) { 497 $dbh->do("INSERT INTO keyword 498 VALUES( 499 \"\", 500 \"$keyword\" 501 )" 502 ); 503 dbug "INSERT INTO keyword VALUES( \"\", \"$keyword\")\n"; 504 $keyword_id = $dbh->selectrow_array($query); 505 } 506 507 $query = "SELECT pgfile_id FROM pgfile_keyword WHERE keyword_id = \"$keyword_id\" and pgfile_id=\"$pgfile_id\""; 508 my $ok = $dbh->selectrow_array($query); 509 if (!defined($ok)) { 510 $dbh->do("INSERT INTO pgfile_keyword 511 VALUES( 512 \"$pgfile_id\", 513 \"$keyword_id\" 514 )" 515 ); 516 dbug "INSERT INTO pgfile_keyword VALUES( \"$pgfile_id\", \"$keyword_id\" )\n"; 517 } 518 } #end foreach keyword 519 520 # pgfile_institution table 521 # 522 $query = "SELECT institution_id FROM pgfile_institution WHERE pgfile_id = \"$pgfile_id\""; 523 my $pg_inst_id = $dbh->selectrow_array($query); 524 if (!defined($pg_inst_id)) { 525 $dbh->do("INSERT INTO pgfile_institution 526 VALUES( 527 \"$pgfile_id\", 528 \"$institution_id\" 529 )" 530 ); 531 dbug "INSERT INTO pgfile_institution VALUES( \"$pgfile_id\", \"$institution_id\" )\n"; 532 $pg_inst_id = $dbh->selectrow_array($query); 533 } 534 535 # Textbook section 536 # problem table contains textbook problems 537 # 538 for my $textno (keys %textinfo) { 539 my $texthashref = $textinfo{$textno}; 540 541 # textbook table 542 # 543 $text = $texthashref->{title}; 544 $edition = $texthashref->{edition}; 545 $textauthor = $texthashref->{author}; 546 my $chapnum = $texthashref->{chapter}; 547 my $secnum = $texthashref->{section}; 548 $query = "SELECT textbook_id FROM textbook WHERE title = \"$text\" AND edition = \"$edition\" AND author=\"$textauthor\""; 549 my $textbook_id = $dbh->selectrow_array($query); 550 if (!defined($textbook_id)) { 551 $dbh->do("INSERT INTO textbook 552 VALUES( 553 \"\", 554 \"$text\", 555 \"$edition\", 556 \"$textauthor\", 557 \"\", 558 \"\", 559 \"\" 560 )" 561 ); 562 dbug "INSERT INTO textbook VALUES( \"\", \"$text\", \"$edition\", \"$textauthor\", \"\", \"\", \"\" )\n"; 563 print "Late add into textbook \"$text\", \"$edition\", \"$textauthor\"\n"; 564 $textbook_id = $dbh->selectrow_array($query); 565 } 566 567 # chapter weak table of textbook 568 # 569 $query = "SELECT chapter_id FROM chapter WHERE textbook_id = \"$textbook_id\" AND name = \"$chapter\""; 570 my $chapter_id = $dbh->selectrow_array($query); 571 if (!defined($chapter_id)) { 572 $dbh->do("INSERT INTO chapter 573 VALUES( 574 \"\", 575 \"$textbook_id\", 576 \"".$chapnum."\", 577 \"$chapter\", 578 \"\" 579 )" 580 ); 581 dbug "INSERT INTO chapter VALUES(\"\", \"$textbook_id\", \"".$chapnum."\", \"$chapter\", \"\" )\n"; 582 $chapter_id = $dbh->selectrow_array($query); 583 } 584 585 # section weak table of textbook 586 # 587 $query = "SELECT section_id FROM section WHERE chapter_id = \"$chapter_id\" AND name = \"$section\""; 588 my $section_id = $dbh->selectrow_array($query); 589 if (!defined($section_id)) { 590 $dbh->do("INSERT INTO section 591 VALUES( 592 \"\", 593 \"$chapter_id\", 594 \"$secnum\", 595 \"$section\", 596 \"\" 597 )" 598 ); 599 dbug "INSERT INTO section VALUES(\"\", \"$textbook_id\", \"$secnum\", \"$section\", \"\" )\n"; 600 $section_id = $dbh->selectrow_array($query); 601 } 602 603 @textproblems = @{$texthashref->{problems}}; 604 for my $tp (@textproblems) { 605 $query = "SELECT problem_id FROM problem WHERE section_id = \"$section_id\" AND number = \"$tp\""; 606 my $problem_id = $dbh->selectrow_array($query); 607 if (!defined($problem_id)) { 608 $dbh->do("INSERT INTO problem 609 VALUES( 610 \"\", 611 \"$section_id\", 612 \"$tp\", 613 \"\" 614 )" 615 ); 616 dbug "INSERT INTO problem VALUES( \"\", \"$section_id\", \"$tp\", \"\" )\n"; 617 $problem_id = $dbh->selectrow_array($query); 618 } 619 620 # pgfile_problem table associates pgfiles with textbook problems 621 # 622 $query = "SELECT problem_id FROM pgfile_problem WHERE problem_id = \"$problem_id\" AND pgfile_id = \"$pgfile_id\""; 623 my $pg_problem_id = $dbh->selectrow_array($query); 624 if (!defined($pg_problem_id)) { 625 $dbh->do("INSERT INTO pgfile_problem 626 VALUES( 627 \"$pgfile_id\", 628 \"$problem_id\" 629 )" 630 ); 631 dbug "INSERT INTO pgfile_problem VALUES( \"$pgfile_id\", \"$problem_id\" )\n"; 632 } 633 } 634 635 } 636 #reset tag vars, they may not match the next file 637 $date =""; $textauthor=""; $textsection=""; 638 $chapter=""; $section=""; 639 } 640 close(IN) or die "can not close: $!"; 641 } 642 } 643 644 645 $dbh->disconnect; 646 647 print "\nDone.\n"; 648
| aubreyja at gmail dot com | ViewVC Help |
| Powered by ViewVC 1.0.9 |