[system] / trunk / webwork2 / bin / NPL-update Repository:
ViewVC logotype

View of /trunk/webwork2/bin/NPL-update

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6169 - (download) (annotate)
Fri Dec 4 18:34:26 2009 UTC (3 years, 5 months ago) by jj
File size: 22436 byte(s)
Fixed problem when reading tags so that it will do a better job at skipping
extraneous tags, like HHSection.

    1 #!/usr/bin/perl
    2 
    3 # This is the script formerly known as loadDB2.  It is used to update
    4 # the database when it comes to the National Problem Library (NPL).
    5 # This should be run after doing a cvs checkout or update for the NPL
    6 # files.
    7 
    8 # In order for this script to work:
    9 #   1) The NPL downloaded to your machine (the .pg files)
   10 #   2) The environment variable WEBWORK_ROOT needs to be 
   11 #      correctly defined (as with other scripts here).
   12 #   3) Configuration for the NPL in global.conf needs to be
   13 #      done (basically just setting the path to the NPL files.
   14 
   15 #use strict;
   16 use File::Find;
   17 use File::Basename;
   18 use Cwd;
   19 use DBI;
   20 
   21  #(maximum varchar length is 255 for mysql version < 5.0.3.  
   22  #You can increase path length to  4096 for mysql > 5.0.3)
   23 
   24 BEGIN {
   25         die "WEBWORK_ROOT not found in environment.\n"
   26                 unless exists $ENV{WEBWORK_ROOT};
   27 }
   28 
   29 ### Data for creating the database tables
   30 
   31 @create_tables = (
   32 ['NPL-DBsubject', '
   33 	DBsubject_id int(15) NOT NULL auto_increment,
   34 	name varchar(127) NOT NULL,
   35 	KEY DBsubject (name),
   36 	PRIMARY KEY (DBsubject_id)
   37 '],
   38 ['NPL-DBchapter', '
   39 	DBchapter_id int(15) NOT NULL auto_increment,
   40 	name varchar(127) NOT NULL,
   41 	DBsubject_id int(15) DEFAULT 0 NOT NULL,
   42 	KEY DBchapter (name),
   43 	KEY (DBsubject_id),
   44 	PRIMARY KEY (DBchapter_id)
   45 '],
   46 ['NPL-DBsection', '
   47 	DBsection_id int(15) NOT NULL auto_increment,
   48 	name varchar(255) NOT NULL,
   49 	DBchapter_id int(15) DEFAULT 0 NOT NULL,
   50 	KEY DBsection (name),
   51 	KEY (DBchapter_id),
   52 	PRIMARY KEY (DBsection_id)
   53 '],
   54 ['NPL-author', '
   55 	author_id int (15) NOT NULL auto_increment,
   56 	institution tinyblob,
   57 	lastname varchar (100) NOT NULL,
   58 	firstname varchar (100) NOT NULL,
   59 	email varchar (255),
   60 	KEY author (lastname, firstname),
   61 	PRIMARY KEY (author_id)
   62 '],
   63 ['NPL-path', '
   64 	path_id int(15) NOT NULL auto_increment,
   65 	path varchar(255) NOT NULL,
   66 	machine varchar(127),
   67 	user varchar(127),
   68 	KEY (path),
   69 	PRIMARY KEY (path_id)
   70 '],
   71 ['NPL-pgfile', '
   72 	pgfile_id int(15) NOT NULL auto_increment,
   73 	DBsection_id int(15) NOT NULL,
   74 	author_id int(15),
   75 	institution tinyblob,
   76 	path_id int(15) NOT NULL,
   77 	filename varchar(255) NOT NULL,
   78 	PRIMARY KEY (pgfile_id)
   79 '],
   80 ['NPL-keyword', '
   81 	keyword_id int(15) NOT NULL auto_increment,
   82 	keyword varchar(65) NOT NULL,
   83 	KEY (keyword),
   84 	PRIMARY KEY (keyword_id)
   85 '],
   86 ['NPL-pgfile-keyword', '
   87 	pgfile_id int(15) DEFAULT 0 NOT NULL,
   88 	keyword_id int(15) DEFAULT 0 NOT NULL,
   89 	KEY pgfile_keyword (keyword_id, pgfile_id),
   90 	KEY pgfile (pgfile_id)
   91 '],
   92 ['NPL-textbook', '
   93 	textbook_id int (15) NOT NULL auto_increment,
   94 	title varchar (255) NOT NULL,
   95 	edition int (3) DEFAULT 0 NOT NULL,
   96 	author varchar (63) NOT NULL,
   97 	publisher varchar (127),
   98 	isbn char (15),
   99 	pubdate varchar (27),
  100 	PRIMARY KEY (textbook_id)
  101 '],
  102 ['NPL-chapter', '
  103 	chapter_id int (15) NOT NULL auto_increment,
  104 	textbook_id int (15),
  105 	number int(3),
  106 	name varchar(127) NOT NULL,
  107 	page int(4),
  108 	KEY (textbook_id, name),
  109 	KEY (number),
  110 	PRIMARY KEY (chapter_id)
  111 '],
  112 ['NPL-section', '
  113 	section_id int(15) NOT NULL auto_increment,
  114 	chapter_id int (15),
  115 	number int(3),
  116 	name varchar(127) NOT NULL,
  117 	page int(4),
  118 	KEY (chapter_id, name),
  119 	KEY (number),
  120 	PRIMARY KEY section (section_id)
  121 '],
  122 ['NPL-problem', '
  123 	problem_id int(15) NOT NULL auto_increment,
  124 	section_id int(15),
  125 	number int(4) NOT NULL,
  126 	page int(4),
  127 	#KEY (page, number),
  128 	KEY (section_id),
  129 	PRIMARY KEY (problem_id)
  130 '],
  131 ['NPL-pgfile-problem', '
  132 	pgfile_id int(15) DEFAULT 0 NOT NULL,
  133 	problem_id int(15) DEFAULT 0 NOT NULL,
  134 	PRIMARY KEY (pgfile_id, problem_id)
  135 ']);
  136 
  137 ### End of database data
  138 
  139 # Get database connection
  140 
  141 use lib "$ENV{WEBWORK_ROOT}/lib";
  142 use WeBWorK::CourseEnvironment;
  143 
  144 my $ce = new WeBWorK::CourseEnvironment({webwork_dir=>$ENV{WEBWORK_ROOT}});
  145 my $dbh = DBI->connect(
  146         $ce->{database_dsn},
  147         $ce->{database_username},
  148         $ce->{database_password},
  149         {
  150                 PrintError => 0,
  151                 RaiseError => 1,
  152         },
  153 );
  154 
  155 my $passwd = $ce->{database_password};
  156 my $user = $ce->{database_username};
  157 my $libraryRoot = $ce->{problemLibrary}->{root};
  158 my $verbose = 0;
  159 my $cnt2 = 0;
  160 
  161 $| = 1; # autoflush output
  162 
  163 sub dbug {
  164 	my $msg = shift;
  165 	my $insignificance = shift || 2;
  166 	print $msg if($verbose>=$insignificance);
  167 }
  168 
  169 ## Resetting the database tables.
  170 #  First take care of tables which are no longer used
  171 
  172 $dbh->do("DROP TABLE IF EXISTS `NPL-institution`");
  173 $dbh->do("DROP TABLE IF EXISTS `NPL-pgfile-institution`");
  174 
  175 for my $tableinfo (@create_tables) {
  176     my $tabname = $tableinfo->[0];
  177     my $tabinit = $tableinfo->[1];
  178 	my $query = "DROP TABLE IF EXISTS `$tabname`";
  179 	$dbh->do($query);
  180 	$query = "CREATE TABLE `$tabname` ( $tabinit )";
  181 	$dbh->do($query);
  182 }
  183 
  184 print "Mysql database reinitialized.\n";
  185 
  186 # From pgfile
  187 ## DBchapter('Limits and Derivatives')
  188 ## DBsection('Calculating Limits using the Limit Laws')
  189 ## Date('6/3/2002')
  190 ## Author('Tangan Gao')
  191 ## Institution('csulb')
  192 ## TitleText1('Calculus Early Transcendentals')
  193 ## EditionText1('4')
  194 ## AuthorText1('Stewart')
  195 ## Section1('2.3')
  196 ## Problem1('7')
  197 #
  198 # The database structure is in the file create_tables2.sql and its ER-graph is 
  199 #    the file wwdb_er_graph.pdf. 
  200 
  201 my ($name,$pgfile,$pgpath);
  202 
  203 #### First read in textbook information
  204 
  205 if(open(IN, "$libraryRoot/Textbooks")) {
  206 	print "Reading in textbook data from Textbooks in the library $libraryRoot.\n";
  207 	my %textinfo = ( TitleText => '', EditionText =>'', AuthorText=>'');
  208 	my $bookid = undef;
  209 	while (my $line = <IN>) {
  210 		$line =~ s|#*$||;
  211 		if($line =~ /^\s*(.*?)\s*>>>\s*(.*?)\s*$/) { # Should have chapter or section information
  212 			my $chapsec = $1;
  213 			my $title = $2;
  214 			if($chapsec=~ /(\d+)\.(\d+)/) { # We have a section
  215 				if(defined($bookid)) {
  216 					my $query = "SELECT chapter_id FROM `NPL-chapter` WHERE textbook_id = \"$bookid\" AND number = \"$1\"";
  217 					my $chapid = $dbh->selectrow_array($query);
  218 					if(defined($chapid)) {
  219 						$query = "SELECT section_id FROM `NPL-section` WHERE chapter_id = \"$chapid\" AND name = \"$title\"";
  220 						my $sectid = $dbh->selectrow_array($query);
  221 						if (!defined($sectid)) {
  222 							$dbh->do("INSERT INTO `NPL-section`
  223 					VALUES(
  224 						\"\",
  225 						\"$chapid\",
  226 						\"$2\",
  227 						\"$title\",
  228 						\"\"
  229 					)"
  230 								);
  231 							dbug "INSERT INTO section VALUES(\"\", \"$chapid\", \"$2\", \"$title\", \"\" )\n";
  232 						}
  233 					} else {
  234 						print "Cannot enter section $chapsec because textbook information is missing the chapter entry\n";
  235 					}
  236 				} else {
  237 					print "Cannot enter section $chapsec because textbook information is incomplete\n";
  238 				}
  239 			} else { # We have a chapter entry
  240 				if(defined($bookid)) {
  241 					my $query = "SELECT chapter_id FROM `NPL-chapter` WHERE textbook_id = \"$bookid\" AND number = \"$chapsec\"";
  242 					my $chapid = $dbh->selectrow_array($query);
  243 					if (!defined($chapid)) {
  244 						$dbh->do("INSERT INTO `NPL-chapter`
  245 					VALUES(
  246 						\"\",
  247 						\"$bookid\",
  248 						\"".$chapsec."\",
  249 						\"$title\",
  250 						\"\"
  251 					)"
  252 							);
  253 						$chapid = $dbh->selectrow_array($query);
  254 
  255 						# Add dummy section entry for problems tagged to the chapter
  256 						# without a section
  257 						$query = "SELECT section_id FROM `NPL-section` WHERE chapter_id = \"$chapid\" AND number = -1";
  258 						my $sectid = $dbh->selectrow_array($query);
  259 						if (!defined($sectid)) {
  260 							$dbh->do("INSERT INTO `NPL-section`
  261 						VALUES(
  262 							\"\",
  263 							\"$chapid\",
  264 							\"-1\",
  265 							\"\",
  266 							\"\"
  267 						)"
  268 								);
  269 						dbug "INSERT INTO section VALUES(\"\", \"$chapid\", \"-1\", \"\", \"\" )\n";
  270 						}
  271 					}
  272 				} else {
  273 					print "Cannot enter chapter $chapsec because textbook information is incomplete\n";
  274 				}
  275 			}
  276 		} elsif($line =~ /^\s*(TitleText|EditionText|AuthorText)\(\s*'(.*?)'\s*\)/) {
  277 			# Textbook information, maybe new
  278 			my $type = $1;
  279 			if(defined($textinfo{$type})) { # signals new text
  280 				%textinfo = ( TitleText => undef, 
  281 							  EditionText =>undef, 
  282 							  AuthorText=> undef);
  283 				$textinfo{$type} = $2;
  284 				$bookid = undef;
  285 			} else {
  286 				$textinfo{$type} = $2;
  287 				if(defined($textinfo{TitleText}) and 
  288 				   defined($textinfo{AuthorText}) and 
  289 				   defined($textinfo{EditionText})) {
  290 					my $query = "SELECT textbook_id FROM `NPL-textbook` WHERE title = \"$textinfo{TitleText}\" AND edition = \"$textinfo{EditionText}\" AND author=\"$textinfo{AuthorText}\"";
  291 					$bookid = $dbh->selectrow_array($query);
  292 					if (!defined($bookid)) {
  293 						$dbh->do("INSERT INTO `NPL-textbook`
  294 					VALUES(
  295 						\"\",
  296 						\"$textinfo{TitleText}\",
  297 						\"$textinfo{EditionText}\",
  298 						\"$textinfo{AuthorText}\",
  299 						\"\",
  300 						\"\",
  301 						\"\"
  302 					)"
  303 							);
  304 						dbug "INSERT INTO textbook VALUES( \"\", \"$textinfo{TitleText}\", \"$textinfo{EditionText}\", \"$textinfo{AuthorText}\", \"\", \"\", \"\" )\n";
  305 						$bookid = $dbh->selectrow_array($query);
  306 					}
  307 				}
  308 			}
  309 		}
  310 	}
  311 	close(IN);
  312 } else{
  313 	print "Textbooks file was not found in library $libraryRoot. If the path to the problem library doesn't seem
  314 	correct, make modifications in webwork2/conf/global.conf (\$problemLibrary{root}).  If that is correct then
  315 	updating from cvs should download the Textbooks file.\n";
  316 }
  317 
  318 print "Converting data from tagged pgfiles into mysql.\n";
  319 print "Number of files processed:\n";
  320 
  321 #### Now search for tagged problems
  322 #recursive search for all pg files
  323 
  324 find({ wanted => \&pgfiles, follow_fast=> 1}, $libraryRoot);
  325 
  326 sub kwtidy {
  327 	my $s = shift;
  328 	$s =~ s/\W//g;
  329 	$s =~ s/_//g;
  330 	$s = lc($s);
  331 	return($s);
  332 }
  333 
  334 sub keywordcleaner {
  335 	my $string = shift;
  336 	my @spl1 = split /,/, $string;
  337 	my @spl2 = map(kwtidy($_), @spl1);
  338 	return(@spl2);
  339 }
  340 
  341 # Save on passing these values around
  342 my %textinfo;
  343 
  344 # Initialize, if needed more text-info information;
  345 sub maybenewtext {
  346 	my $textno = shift;
  347 	return if defined($textinfo{$textno});
  348 	# So, not defined yet
  349 	$textinfo{$textno} = { title => '', author =>'', edition =>'',
  350 						   section => '', chapter =>'', problems => [] };
  351 }
  352 
  353 # process each file returned by the find command.
  354 sub pgfiles {
  355 	my $name = $File::Find::name;
  356 	my ($subject, $chapter, $section, $date, $institution, $author, $text);
  357 	my ($edition, $textauthor, $textsection, $textproblem, $tagged);
  358 	%textinfo=();
  359 	my @textproblems = (-1);
  360 	if ($name =~ /pg$/) {
  361 		$pgfile = basename($name);
  362 		$pgpath = dirname($name);
  363 		$cnt2++;
  364 		printf("%6d", $cnt2) if(($cnt2 % 100) == 0);
  365 		print "\n" if(($cnt2 % 1000) == 0);
  366 		$pgpath =~ s|^$libraryRoot/||;
  367 		open(IN,"$name") or die "can not open $name: $!";
  368 		$tagged = 0;
  369 		while (<IN>) {
  370 		SWITCH: {
  371 				if (/\bKEYWORDS\((.*)\)/i) {
  372 					@keyword = keywordcleaner($1);
  373 					last SWITCH;
  374 				}
  375 				if (/\bDBsubject\(\s*'(.*?)'\s*\)/) {
  376 					$subject = $1;
  377 					$subject =~ s/'/\'/g;
  378 					last SWITCH;
  379 				}
  380 				if (/\bDBchapter\(\s*'(.*?)'\s*\)/) {
  381 					$chapter = $1;
  382 					$chapter =~ s/'/\'/g;
  383 					$tagged = 1;
  384 					last SWITCH;
  385 				}
  386 				if (/\bDBsection\(\s*'(.*?)'\s*\)/) {
  387 					$section = $1;
  388 					$section =~ s/'/\'/g;
  389 					last SWITCH;
  390 				}
  391 				if (/\bDate\(\s*'(.*?)'\s*\)/) {
  392 					$date = $1;
  393 					$date =~ s/'/\'/g;
  394 					last SWITCH;
  395 				}
  396 				if (/\bInstitution\(\s*'(.*?)'\s*\)/) {
  397 					$institution = $1;
  398 					$institution =~ s/'/\'/g;
  399 					last SWITCH;
  400 				}
  401 				if (/\bAuthor\(\s*'(.*?)'\s*\)/) {
  402 					$author = $1;
  403 					$author =~ s/'/\'/g;
  404 					last SWITCH;
  405 				}
  406 				if (/\bTitleText(\d+)\(\s*'(.*?)'\s*\)/) {
  407 					$textno = $1;
  408 					$text = $2;
  409 					$text =~ s/'/\'/g;
  410 					if ($text =~ /\S/) {
  411 						maybenewtext($textno);
  412 						$textinfo{$textno}->{title} = $text;
  413 					}
  414 					last SWITCH;
  415 				}
  416 				if (/\bEditionText(\d+)\(\s*'(.*?)'\s*\)/) {
  417 					$textno = $1;	
  418 					$edition = $2;
  419 					$edition =~ s/'/\'/g;
  420 					if ($edition =~ /\S/) {
  421 						maybenewtext($textno);
  422 						$textinfo{$textno}->{edition} = $edition;
  423 					}
  424 					last SWITCH;
  425 				}
  426 				if (/\bAuthorText(\d+)\(\s*'(.*?)'\s*\)/) {
  427 					$textno = $1;
  428 					$textauthor = $2;
  429 					$textauthor =~ s/'/\'/g;
  430 					if ($textauthor =~ /\S/) {
  431 						maybenewtext($textno);
  432 						$textinfo{$textno}->{author} = $textauthor;
  433 					}
  434 					last SWITCH;
  435 				}
  436 				if (/\bSection(\d+)\(\s*'(.*?)'\s*\)/) {
  437 					$textno = $1;
  438 					$textsection = $2;
  439 					$textsection =~ s/'/\'/g;
  440 					if ($textsection =~ /\S/) {
  441 						maybenewtext($textno);
  442 						if ($textsection =~ /(\d*?)\.(\d*)/) {
  443 							$textinfo{$textno}->{chapter} = $1;
  444 							$textinfo{$textno}->{section} = $2;
  445 						} else {
  446 							$textinfo{$textno}->{chapter} = $textsection;
  447 							$textinfo{$textno}->{section} = -1;
  448 						}
  449 
  450 					}
  451 					last SWITCH;
  452 				}
  453 				if (/\bProblem(\d+)\(\s*(.*?)\s*\)/) {
  454 					$textno = $1;
  455 					$textproblem = $2;
  456 					$textproblem =~ s/\D/ /g;
  457 					@textproblems = split /\s+/, $textproblem;
  458 					@textproblems = grep { $_ =~ /\S/ } @textproblems;
  459 					if (scalar(@textproblems) or defined($textinfo{$textno})) {
  460 						@textproblems = (-1) unless(scalar(@textproblems));
  461 						maybenewtext($textno);
  462 						$textinfo{$textno}->{problems} = \@textproblems;
  463 					}
  464 					#print "$textproblem\n" if ($textproblem !~ /^[\s\d]+$/);
  465 					#		$textproblem =~ s/'/\'/g;
  466 					last SWITCH;
  467 				}
  468 			}
  469 		}						#end of SWITCH and while
  470 		if ($tagged and $chapter eq 'ZZZ-Inserted Text') {
  471 			$tagged=0;
  472 		}
  473 		if ($tagged) {
  474 			#
  475 			# kludge to fix the omission of a subject field
  476 			unless($subject) {
  477 				if ($text =~ /precalculus/i) {
  478 					$subject = "Precalculus";
  479 				} elsif ($text =~ /calculus/i) {
  480 					$subject = "Calculus";
  481 				} elsif ($text =~ /linear/i) {
  482 					$subject = "Linear Algebra";
  483 				} elsif ($text =~ /algebra/i) {
  484 					$subject = "Algebra";
  485 				} elsif ($text =~ /statistic/i) {
  486 					$subject = "Statistics";
  487 				} elsif ($text =~ /financial/i) {
  488 					$subject = "Financial Mathematics";
  489 				} else {
  490 					$subject = "Misc";
  491 				}
  492 			}
  493 			# From the pgfile we just looked at,
  494 			## DBchapter('Limits and Derivatives') 		in $chapter
  495 			## DBsection('Calculating Limits using the Limit Laws') in $section
  496 			## Date('6/3/2002') 				in $date
  497 			## Author('Tangan Gao') 				in $author
  498 			## Institution('csulb') 				in $institution
  499 			## TitleText1('Calculus Early Transcendentals')	in $text
  500 			## EditionText1('4') 				in $edition
  501 			## AuthorText1('Stewart') 				in $textauthor
  502 			## Section1('2.3') 					in $textsection
  503 			## Problem1('7') 					in $textproblem
  504 			#
  505 			# The database structure is in the file create_tables2.sql and its ER-graph
  506 			#    is the file wwdb_er_graph.pdf. Insert, in order, into the tables
  507 			#    in that file.
  508 			#
  509 
  510 			#selectrow_array returns first field of first row in scalar context or undef 
  511 			# undef for failure also, $dbh->{RaiseError} = 1 should catch that case.
  512 			#
  513 			# DBsubject table
  514 			#
  515 			$query = "SELECT DBsubject_id FROM `NPL-DBsubject` WHERE name = \"$subject\"";
  516 			my $DBsubject_id = $dbh->selectrow_array($query);
  517 			if (!defined($DBsubject_id)) {
  518 				$dbh->do(
  519 						 "INSERT INTO `NPL-DBsubject`
  520 					VALUES(
  521 						\"\",
  522 						\"$subject\"
  523 					)"
  524 						);
  525 				dbug "INSERT INTO DBsubject VALUES(\"\",\"$subject\")\n";
  526 				$DBsubject_id = $dbh->selectrow_array($query);
  527 			}
  528 
  529 			# DBchapter table
  530 			#
  531 			$query = "SELECT DBchapter_id FROM `NPL-DBchapter` WHERE name = \"$chapter\" and DBsubject_id = $DBsubject_id";
  532 			my $DBchapter_id = $dbh->selectrow_array($query);  
  533 			if (!defined($DBchapter_id)) {
  534 				$dbh->do("INSERT INTO `NPL-DBchapter` 
  535 					VALUES(
  536 						\"\",
  537 						\"$chapter\",
  538 						\"$DBsubject_id\"
  539 					)"
  540 						);
  541 				dbug "INSERT INTO DBchapter VALUES(  \"\", \"$chapter\", \"$DBsubject_id\)\n";
  542 				$DBchapter_id = $dbh->selectrow_array($query);
  543 			}
  544 
  545 			# DBsection table
  546 			#
  547 			$query = "SELECT DBsection_id FROM `NPL-DBsection` WHERE name = \"$section\" AND DBchapter_id = $DBchapter_id";
  548 			my $DBsection_id = $dbh->selectrow_array($query);
  549 			if (!defined($DBsection_id)) {
  550 				$dbh->do("INSERT INTO `NPL-DBsection`
  551 					VALUES(
  552 						\"\",
  553 						\"$section\",
  554 						\"$DBchapter_id\"
  555 					)"
  556 						);
  557 				dbug "INSERT INTO DBsection VALUES( \"\", \"$section\", \"$DBchapter_id\", \"$DBsubject_id\" )\n";
  558 				$DBsection_id = $dbh->selectrow_array($query);
  559 			}
  560 
  561 			# author table
  562 			#
  563 			$author =~ /(.*?)\s(\w+)\s*$/;
  564 			my $firstname = $1;
  565 			my $lastname = $2;
  566 			#remove leading and trailing spaces from firstname, which includes any middle name too.
  567 			$firstname =~ s/^\s*//;
  568 			$firstname =~ s/\s*$//;
  569 			$query = "SELECT author_id FROM `NPL-author` WHERE lastname = \"$lastname\" AND firstname=\"$firstname\"";
  570 			my $author_id = $dbh->selectrow_array($query);
  571 			if (!defined($author_id)) {
  572 				$dbh->do("INSERT INTO `NPL-author`
  573 					VALUES(
  574 						\"\",
  575 						\"$institution\",
  576 						\"$lastname\",
  577 						\"$firstname\",
  578 						\"\"
  579 					)"
  580 						);
  581 				dbug "INSERT INTO author VALUES( \"\", \"$institution\", \"$lastname\", \"$firstname\", \"\" )\n";
  582 				$author_id = $dbh->selectrow_array($query);
  583 			}
  584 
  585 			# path table
  586 			#
  587 			$query = "SELECT path_id FROM `NPL-path` WHERE path = \"$pgpath\"";
  588 			my $path_id = $dbh->selectrow_array($query);
  589 			if (!defined($path_id)) {
  590 				$dbh->do("INSERT INTO `NPL-path`
  591 					VALUES(
  592 						\"\",
  593 						\"$pgpath\",
  594 						\"\",
  595 						\"\"
  596 					)"
  597 						);
  598 				dbug "INSERT INTO path VALUES( \"\", \"$path\", \"\", \"\" )\n";
  599 				$path_id = $dbh->selectrow_array($query);
  600 			}
  601 
  602 			# pgfile table
  603 			#
  604 			my $pgfile_id;
  605 			$dbh->do("INSERT INTO `NPL-pgfile`
  606 					VALUES(
  607 						\"\",
  608 						\"$DBsection_id\",
  609 						\"$author_id\",
  610 						\"$institution\",
  611 						\"$path_id\",
  612 						\"$pgfile\"
  613 					)"
  614 					);
  615 			dbug "INSERT INTO pgfile VALUES( \"\", \"$DBsection_id\", \"$author_id\", \"$institution\", \"$path_id\", \"$pgfile\", \"\" )\n";
  616 			$query = "SELECT pgfile_id FROM `NPL-pgfile` WHERE filename = \"$pgfile\" and path_id=$path_id";
  617 			$pgfile_id = $dbh->selectrow_array($query);
  618 
  619 			# keyword table, and problem_keyword many-many table
  620 			#
  621 			foreach my $keyword (@keyword) {
  622 				$keyword =~ s/[\'\"]//g;
  623 				$query = "SELECT keyword_id FROM `NPL-keyword` WHERE keyword = \"$keyword\"";
  624 				my $keyword_id = $dbh->selectrow_array($query);
  625 				if (!defined($keyword_id)) {
  626 					$dbh->do("INSERT INTO `NPL-keyword`
  627 						VALUES(
  628 							\"\",
  629 							\"$keyword\"
  630 						)"
  631 							);
  632 					dbug "INSERT INTO keyword VALUES( \"\", \"$keyword\")\n";
  633 					$keyword_id = $dbh->selectrow_array($query);
  634 				}
  635 
  636 				$query = "SELECT pgfile_id FROM `NPL-pgfile-keyword` WHERE keyword_id = \"$keyword_id\" and pgfile_id=\"$pgfile_id\"";
  637 				my $ok = $dbh->selectrow_array($query);
  638 				if (!defined($ok)) {
  639 					$dbh->do("INSERT INTO `NPL-pgfile-keyword`
  640 						VALUES(
  641 							\"$pgfile_id\",
  642 							\"$keyword_id\"
  643 						)"
  644 							);
  645 					dbug "INSERT INTO pgfile_keyword VALUES( \"$pgfile_id\", \"$keyword_id\" )\n";
  646 				}
  647 			}					#end foreach keyword
  648 
  649 			# Textbook section
  650 			# problem table contains textbook problems
  651 			#
  652 			for my $textno (keys %textinfo) {
  653 				my $texthashref = $textinfo{$textno};
  654 
  655 				# textbook table
  656 				#
  657 				$text = $texthashref->{title};
  658 				$edition = $texthashref->{edition};
  659 				$textauthor = $texthashref->{author};
  660 				next unless($text and $textauthor);
  661 				my $chapnum = $texthashref->{chapter};
  662 				my $secnum = $texthashref->{section};
  663 				$query = "SELECT textbook_id FROM `NPL-textbook` WHERE title = \"$text\" AND edition = \"$edition\" AND author=\"$textauthor\"";
  664 				my $textbook_id = $dbh->selectrow_array($query);
  665 				if (!defined($textbook_id)) {
  666 					$dbh->do("INSERT INTO `NPL-textbook`
  667 					VALUES(
  668 						\"\",
  669 						\"$text\",
  670 						\"$edition\",
  671 						\"$textauthor\",
  672 						\"\",
  673 						\"\",
  674 						\"\"
  675 					)"
  676 							);
  677 					dbug "INSERT INTO textbook VALUES( \"\", \"$text\", \"$edition\", \"$textauthor\", \"\", \"\", \"\" )\n";
  678 					dbug "\nLate add into NPL-textbook \"$text\", \"$edition\", \"$textauthor\"\n", 1;
  679 					$textbook_id = $dbh->selectrow_array($query);
  680 				}
  681 
  682 				# chapter weak table of textbook
  683 				#
  684 				$query = "SELECT chapter_id FROM `NPL-chapter` WHERE textbook_id = \"$textbook_id\" AND number = \"$chapnum\"";
  685 				my $chapter_id = $dbh->selectrow_array($query);
  686 				if (!defined($chapter_id)) {
  687 					$dbh->do("INSERT INTO `NPL-chapter`
  688 					VALUES(
  689 						\"\",
  690 						\"$textbook_id\",
  691 						\"".$chapnum."\",
  692 						\"$chapter\",
  693 						\"\"
  694 					)"
  695 							);
  696 					dbug "\nLate add into NPL-textchapter \"$text\", \"$edition\", \"$textauthor\", $chapnum $chapter from $name\n", 1;
  697 					dbug "INSERT INTO chapter VALUES(\"\", \"$textbook_id\", \"".$chapnum."\", \"$chapter\", \"\" )\n";
  698 					$chapter_id = $dbh->selectrow_array($query);
  699 				}
  700 
  701 				# section weak table of textbook
  702 				#
  703 				$section = '' if ($secnum < 0);
  704 				$query = "SELECT section_id FROM `NPL-section` WHERE chapter_id = \"$chapter_id\" AND number = \"$secnum\"";
  705 				my $section_id = $dbh->selectrow_array($query);
  706 				if (!defined($section_id)) {
  707 					$dbh->do("INSERT INTO `NPL-section`
  708 					VALUES(
  709 						\"\",
  710 						\"$chapter_id\",
  711 						\"$secnum\",
  712 						\"$section\",
  713 						\"\"
  714 					)"
  715 							);
  716 					dbug "INSERT INTO section VALUES(\"\", \"$textbook_id\", \"$secnum\", \"$section\", \"\" )\n";
  717 					dbug "\nLate add into NPL-textsection \"$text\", \"$edition\", \"$textauthor\", $secnum $section from $name\n", 1;
  718 					$section_id = $dbh->selectrow_array($query);
  719 				}
  720 
  721 				@textproblems = @{$texthashref->{problems}};
  722 				for my $tp (@textproblems) {
  723 					$query = "SELECT problem_id FROM `NPL-problem` WHERE section_id = \"$section_id\" AND number = \"$tp\"";
  724 					my $problem_id = $dbh->selectrow_array($query);
  725 					if (!defined($problem_id)) {
  726 						$dbh->do("INSERT INTO `NPL-problem`
  727 					VALUES(
  728 						\"\",
  729 						\"$section_id\",
  730 						\"$tp\",
  731 						\"\"
  732 					)"
  733 								);
  734 						dbug "INSERT INTO problem VALUES( \"\", \"$section_id\", \"$tp\", \"\" )\n";
  735 						$problem_id = $dbh->selectrow_array($query);
  736 					}
  737 
  738 					# pgfile_problem table associates pgfiles with textbook problems
  739 					#
  740 					$query = "SELECT problem_id FROM `NPL-pgfile-problem` WHERE problem_id = \"$problem_id\" AND pgfile_id = \"$pgfile_id\"";
  741 					my $pg_problem_id = $dbh->selectrow_array($query);  
  742 					if (!defined($pg_problem_id)) {
  743 						$dbh->do("INSERT INTO `NPL-pgfile-problem`
  744 					VALUES(
  745 						\"$pgfile_id\",
  746 						\"$problem_id\"
  747 					)"
  748 								);
  749 						dbug "INSERT INTO pgfile_problem VALUES( \"$pgfile_id\", \"$problem_id\" )\n";
  750 					}
  751 				}
  752 
  753 				#reset tag vars, they may not match the next text/file
  754 				$date =""; $textauthor=""; $textsection="";
  755 				$chapter=""; $section="";
  756 			}
  757 		}
  758 		close(IN) or die "can not close: $!";
  759 	}
  760 }
  761 
  762 
  763 $dbh->disconnect;
  764 
  765 print "\nDone.\n";

aubreyja at gmail dot com
ViewVC Help
Powered by ViewVC 1.0.9