Parent Directory
|
Revision Log
adding missing files
1 ################################################################################ 2 # WeBWorK Online Homework Delivery System 3 # Copyright © 2000-2007 The WeBWorK Project, http://openwebwork.sf.net/ 4 # $CVSHeader: webwork2/lib/WeBWorK/NPL.pm,v 1.2 2008/02/04 22:56:23 sh002i Exp $ 5 # 6 # This program is free software; you can redistribute it and/or modify it under 7 # the terms of either: (a) the GNU General Public License as published by the 8 # Free Software Foundation; either version 2, or (at your option) any later 9 # version, or (b) the "Artistic License" which comes with this package. 10 # 11 # This program is distributed in the hope that it will be useful, but WITHOUT 12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 13 # FOR A PARTICULAR PURPOSE. See either the GNU General Public License or the 14 # Artistic License for more details. 15 # 16 # Contributed by W.H. Freeman; Bedford, Freeman, and Worth Publishing Group. 17 ################################################################################ 18 19 package WeBWorK::NPL; 20 use base 'Exporter'; 21 22 =head1 NAME 23 24 WeBWorK::NPL - Parse formats used by the National Problem Library. 25 26 =head1 SYNOPSIS 27 28 use WeBWorK::NPL qw/read_textbooks read_tags format_tags gen_find_tags/; 29 30 open TEXTS, "<", "Textbooks"; 31 my $textbooks = []; 32 read_textbooks(\*TEXTS, $textbooks); 33 34 open PGFILE, "<", "file.pg"; 35 my $tags = {}; 36 read_tags(\*PGFILE, $tags); 37 38 foreach my $string (format_tags($tags)) { 39 $string =~ s/^/## /gm; 40 print "TAG: $string\n"; 41 } 42 43 use File::Find; 44 my $process = sub { print "Found: $_[0]\n" }; 45 my $wanted = gen_find_tags({author=>'Rogawski'}, $process); 46 find({wanted=>$wanted}, @ARGV); 47 48 =head1 DESCRIPTION 49 50 This package contains parsing routines for the various data formats associated 51 with the National Problem Library. 52 53 =cut 54 55 use strict; 56 use warnings; 57 use Data::Dumper; 58 59 our @EXPORT_OK = qw( 60 read_textbooks 61 read_tags 62 format_tags 63 gen_find_tags 64 ); 65 66 our @global_fields = qw(DESCRIPTION KEYWORDS DBsubject DBchapter DBsection Date 67 Institution Author UsesAuxiliaryFiles); 68 our @textbook_fields = qw(title edition author chapter section problem); 69 70 our %tag2field = ( TitleText => "title", EditionText => "edition", 71 AuthorText => "author", Section => "section", Problem => "problem", ); 72 our %field2tag = reverse %tag2field; 73 74 =head1 FUNCTIONS 75 76 =head2 read_textbooks 77 78 read_textbooks($fh, $arrayref) 79 80 Reads a Textbooks file opened for reading on $fh and appends its contents to 81 $arrayref. Each item appended to $arrayref is a reference to a hash containing 82 the following keys: 83 84 _title The title of the textbook 85 _edition The edition of the textbook 86 _author The author of the textbook 87 1 The name of chapter 1 88 1.1 The name of section 1.1 89 1.2 The name of section 1.2 90 ... 91 2 The name of chapter 2 92 2.1 The name of section 2.1 93 ... 94 95 Since the number of sections in a textbook is typically small, it is not terribly 96 inefficient to pull chapters or sections out: 97 98 @chapters = grep { /^\d+$/ } keys %textbook; 99 @sections = grep { /^\d+\.\d+$/ } keys %textbook; 100 101 =cut 102 103 sub read_textbooks { 104 my ($fh, $result) = @_; 105 106 my %curr_textbook; 107 108 while (<$fh>) { 109 s/#.*$//g; 110 next unless /\S/; 111 s/^\s*//; 112 s/\s*$//; 113 114 if (/^(TitleText|EditionText|AuthorText)\(\s*'(.*?)'\s*\)/) { 115 my $field = $tag2field{$1}; 116 my $value = $2; 117 if (exists $curr_textbook{"_$field"}) { 118 # repeated tag -- this is a new textbook 119 push @$result, {%curr_textbook}; 120 %curr_textbook = (); 121 } 122 $curr_textbook{"_$field"} = $value; 123 } elsif (/^(\d+)(?:\.(\d+))?\s*>>>\s*(.*)$/) { 124 my $chapter = $1; 125 my $section = $2; 126 my $name = $3; 127 if (defined $section and length $section > 0) { 128 $curr_textbook{"$chapter.$section"} = $name; 129 } else { 130 $curr_textbook{$chapter} = $name; 131 } 132 } 133 } 134 push @$result, {%curr_textbook}; 135 } 136 137 =head2 read_tags 138 139 read_tags($fh, $hashref, $extra_editing_info); 140 141 Reads the NPL tags from a PG file opened for reading on $fh and stores the tags 142 in %$hashref. The following keys may be added to %$hashref: 143 144 DESCRIPTION 145 KEYWORDS 146 DBsubject 147 DBchapter 148 DBsection 149 Date 150 Institution 151 Author 152 UsesAuxiliaryFiles (experimental, subject to change) 153 textbooks (arrayref) 154 155 The value for the C<textbooks> key will be a reference to an array of textbook 156 hashes containing the textbook tags from the source file. In each textbook hash, 157 entries with empty values (e.g. C<TitleText1('')>) will be omitted. This is to 158 deal with the large number of empty-valued tags in the NPL. The keys of each 159 textbook hash will be among: 160 161 title 162 edition 163 author 164 chapter 165 section 166 problem (arrayref) 167 168 The value for the C<problem> key will be a reference to an array of problem 169 numbers. 170 171 If $extra_editing_info is true, special hash items _pos, _rest, and _maxtextbook 172 will also be added to %$hashref. 173 174 _pos will contain the position of the first byte of the next line after the last 175 tag in the file. _rest will contain the bytes of the "rest" of the file, after 176 all tags, starting at _pos. _maxtextbook will contain the highest number used to 177 identify a textbook in the file. (e.g. If TitleText1 and TitleText3 appear in 178 the file, there will only be two items in the textbooks array, but _maxtextbook 179 will be 3.) 180 181 This is useful for appending tags to a file which contains existing tags, where 182 the new tags should appear immediately after the existing tags: 183 184 open PGFILE, "+<", "file.pg"; 185 my $tags = {}; 186 read_tags(\*PGFILE, $tags, 1); 187 my $pos = $tags{_pos}; 188 my $rest = $tags{_rest}; 189 seek PGFILE, $pos, 0; 190 print PGFILE "## SomeNewTag('foo','bar')\n"; 191 print PGFILE $rest; 192 close PGFILE; 193 194 =cut 195 196 sub read_tags { 197 my ($file, $result, $extra_editing_info) = @_; 198 199 my $fh; 200 if (ref $file) { 201 $fh = $file; 202 } elsif (defined $file and not ref $file) { 203 $fh = new IO::File($file, 'r'); 204 } 205 206 my $pos; 207 my $rest = ''; 208 my $maxtextbook; 209 while (<$fh>) { 210 #if (0) { 211 if (/^(.*?\#.*?)(\s*)DESCRIPTION/) { 212 my $prefix = $1; 213 my $whitespace = $2; 214 my $description = ''; 215 while (<$fh>) { 216 if (/\#.*ENDDESCRIPTION/) { 217 chomp $description; 218 $result->{DESCRIPTION} = $description if length $description > 0; 219 last; 220 } else { 221 # handle prefix and whitespace separately so that we can still 222 # chop the prefix off even if people are being careless about 223 # whitespace. :P 224 s/^$prefix//; 225 s/^$whitespace//; 226 $description .= $_; 227 } 228 } 229 if ($extra_editing_info) { 230 $pos = tell $fh; 231 $rest = ''; 232 } 233 } elsif (/\#.*KEYWORDS\((.*)\)/) { 234 my $keywords = $1; 235 push @{$result->{KEYWORDS}}, parse_keywords($keywords); 236 if ($extra_editing_info) { 237 $pos = tell $fh; 238 $rest = ''; 239 } 240 } elsif (/\#.*(DBsubject|DBchapter|DBsection|Date|Institution|Author)\(\s*(.*?)\s*\)/) { 241 my $field = $1; 242 my $value = $2; 243 my ($parsed_value, $parse_errors) = parse_normal_value($field, $value); 244 if (@$parse_errors) { 245 warn "error while parsing value \"$value\" in field $field:\n" 246 . join('', @$parse_errors) 247 . "value may be incomplete. use with caution.\n" 248 . "(line $. of file $file)\n"; 249 } 250 $result->{$field} = $parsed_value; 251 if ($extra_editing_info) { 252 $pos = tell $fh; 253 $rest = ''; 254 } 255 } elsif (/\#.*(TitleText|EditionText|AuthorText|Section|Problem)(\d+)\(\s*'(.*?)'\s*\)/) { 256 my $field = $tag2field{$1}; 257 my $num = $2; 258 my $value = $3; 259 next unless $value =~ /\S/; 260 $value = [ parse_problems($value) ] if $field eq "problem"; 261 if ($field eq "section") { 262 my ($ch, $sec) = split /\./, $value; 263 $result->{textbooks}[$num]{chapter} = $ch; 264 $result->{textbooks}[$num]{section} = $sec if defined $sec and length $sec > 0; 265 } else { 266 $result->{textbooks}[$num]{$field} = $value; 267 } 268 if ($extra_editing_info) { 269 $pos = tell $fh; 270 $rest = ''; 271 $maxtextbook = $num if not defined $maxtextbook or $num > $maxtextbook; 272 } 273 } elsif (/\#.*(UsesAuxiliaryFiles)\(\s*(.*?)\s*\)/) { 274 my $field = $1; 275 my $value = $2; 276 my ($parsed_value, $parse_errors) = parse_normal_list($field, $value); 277 if (@$parse_errors) { 278 warn "error while parsing list value \"$value\" in field $field:\n" 279 . join('', @$parse_errors) 280 . "value may be incomplete. use with caution.\n" 281 . "(line $. of file $file)\n"; 282 } 283 $result->{$field} = $parsed_value; 284 if ($extra_editing_info) { 285 $pos = tell $fh; 286 $rest = ''; 287 } 288 } else { 289 if ($extra_editing_info) { 290 $rest .= $_; 291 } 292 } 293 } 294 295 # remove holes in textbook numbering 296 @{$result->{textbooks}} = grep { defined } @{$result->{textbooks}}; 297 delete $result->{textbooks} unless @{$result->{textbooks}}; 298 299 if ($extra_editing_info) { 300 $result->{_pos} = $pos; 301 $result->{_rest} = $rest; 302 $result->{_maxtextbook} = $maxtextbook; 303 } 304 } 305 306 sub parse_normal_list { 307 my ($name, $string) = @_; 308 309 use constant NRM=>0; 310 use constant STR=>1; 311 use constant ESC=>2; 312 use constant STP=>3; 313 my $state = NRM; 314 my @errors; 315 my @items; 316 my $curr_item = ''; 317 my $next_item = 0; 318 foreach my $i (0 .. length($string)-1) { 319 my $c = substr($string,$i,1); 320 #print "i=$i c=$c state=$state curr_item=$curr_item next_item=$next_item\n"; 321 # state changes 322 if ($state == NRM) { 323 if ($c eq "'") { 324 $state = STR; 325 } elsif ($c eq ',' or $c eq ' ') { 326 # do nothing -- closequote already consumed curr_item 327 } else { 328 push @errors, 329 "illegal char '$c' in state NRM while parsing value for $name.\n" 330 . " $string\n" 331 . ' ' . ' 'x$i . "^\n"; 332 $next_item = 1; 333 $state = STP; 334 } 335 } elsif ($state == STR) { 336 if ($c eq "'") { 337 $state = NRM; 338 $next_item = 1; 339 } elsif ($c eq '\\') { 340 $state = ESC; 341 } else { 342 $curr_item .= $c; 343 } 344 } elsif ($state == ESC) { 345 $curr_item .= $c; 346 $state = STR; 347 } elsif ($state == STP) { 348 last; 349 } else { 350 die "unexpected state $state while parsing value for $name.\n"; 351 } 352 #print "i=$i c=$c state=$state curr_item=$curr_item next_item=$next_item\n"; 353 # actions 354 if ($next_item) { 355 push @items, $curr_item; 356 $curr_item = ''; 357 $next_item = 0; 358 #print "stored item to list\n"; 359 } 360 } 361 362 return \@items, \@errors; 363 } 364 365 sub parse_normal_value { 366 my ($name, $string) = @_; 367 my ($items, $errors) = parse_normal_list($name, $string); 368 push @$errors, "only one item allowed in value for $name.\n" if @$items > 1; 369 return shift @$items, $errors; 370 } 371 372 # this now works for keywords is embedded spaces (which are later stripped out 373 # by kwtidy) but now it doesn't work for values with double quotes or no quotes! 374 sub parse_keywords { 375 my $string = shift; 376 my ($items, $errors) = parse_normal_list('KEYWORDS', $string); 377 if (@$errors) { 378 warn "errors while parsing KEYWORDS list:\n@$errors\n" 379 . "Partially-parsed KEYWORDS: @$items\n" 380 . "Resorting to old-style KEYWORDS parsing...\n"; 381 @$items = split /(?:,|\s)+/, $string; 382 warn "Old-style parse result: ", join('|', @$items), "\n"; 383 } 384 return map { kwtidy($_) } @$items; 385 } 386 387 sub kwtidy { 388 my $keyword = shift; 389 $keyword =~ s/\W//g; 390 $keyword =~ s/_//g; 391 return lc $keyword; 392 } 393 394 sub parse_problems { 395 my $string = shift; 396 $string =~ s/\D/ /g; 397 return grep { /\S/ } split /\s+/, $string; 398 } 399 400 =head2 format_tags 401 402 format_tags($tags, $mintextbook); 403 404 Given a reference to a hash of tags, return a list of strings representing said 405 tags. The strings do not begin with the standard NPL comment prefix ("## ") or 406 end with newlines. These must be added by the caller if the strings are to be 407 inserted into a PG source file. 408 409 One complication is the DESCRIPTION field, which contains embedded newlines. If 410 a DESCRIPTION tag occurs in %$tags, it will be formatted with embedded newlines 411 but without a trailing newline. For example, after this code executes, 412 413 $tags = { DESCRIPTION => "line one\nline two\nline three" }; 414 ($desc) = format_tags($tags); 415 416 $desc will contain the string: 417 418 "DESCRIPTION\nline one\nline two\nline three\nENDDESCRIPTION" 419 420 To account for this when writing to a PG file, you could use: 421 422 foreach my $string (format_tags($tags)) { 423 $string =~ s/^/## /gm; 424 print PGFILE "$string\n"; 425 } 426 427 =cut 428 429 sub format_tags { 430 my ($tags, $mintextbook) = @_; 431 $mintextbook ||= 1; 432 my @result; 433 my @ordered_fields = grep { exists $tags->{$_} } @global_fields, "textbooks"; 434 foreach my $field (@ordered_fields) { 435 my $value = $tags->{$field}; 436 if ($field eq "DESCRIPTION") { 437 push @result, format_description($value); 438 } elsif ($field eq "textbooks") { 439 push @result, format_textbooks($value, $mintextbook); 440 } else { 441 push @result, format_tag($field, $value); 442 } 443 } 444 return @result; 445 } 446 447 sub format_tag { 448 my ($field, $value, $n) = @_; 449 my $tag = $field2tag{$field} || $field; 450 451 # problems are always listed in a single string in the tag. 452 if ($field eq "problem") { 453 $value = format_problems($value); 454 } 455 456 # if we have an arrayref, we represent it as multiple strings in one tag. 457 if (ref $value) { 458 $value = join(',', map { "'$_'" } @$value); 459 } elsif (defined $value) { 460 $value = "'$value'"; 461 } else { 462 warn "value is not defined for field $field!\n"; 463 $value = "''"; 464 } 465 466 if (defined $n) { 467 return "$tag$n($value)"; 468 } else { 469 return "$tag($value)"; 470 } 471 } 472 473 sub format_description { 474 my $value = shift; 475 return "DESCRIPTION\n$value\nENDDESCRIPTION"; 476 } 477 478 sub format_textbooks { 479 my ($textbook, $n) = @_; 480 my @textbooks = @$textbook; 481 my @result; 482 foreach my $textbook (@textbooks) { 483 push @result, format_textbook($textbook, $n); 484 $n++; 485 } 486 return @result; 487 } 488 489 sub format_textbook { 490 my ($textbook, $n) = @_; 491 492 # combine chapter/section into single section tag 493 my $chapter = $textbook->{chapter}; 494 my $section = $textbook->{section}; 495 if (defined $chapter or defined $section) { 496 $section = ".$section" if defined $section; 497 $section = "$chapter$section" if defined $chapter; 498 delete $textbook->{chapter}; 499 $textbook->{section} = $section; 500 } 501 502 my @result; 503 my @ordered_fields = grep { exists $textbook->{$_} } @textbook_fields; 504 foreach my $field (@ordered_fields) { 505 my $value = $textbook->{$field}; 506 push @result, format_tag($field, $value, $n); 507 } 508 return @result; 509 } 510 511 sub format_problems { 512 my $first = shift; 513 my @problems; 514 if (ref $first) { 515 @problems = @$first; 516 } else { 517 @problems = ($first, @_); 518 } 519 520 return join(',', @problems); 521 } 522 523 =head2 gen_find_tags 524 525 gen_find_tags($pattern, $action, $extra_editing_info); 526 527 Generates an anonymous subroutine suitable for passing the the find() function 528 of the File::Find module. The no_chdir=>1 option must be passed to find() for 529 the generated subroutine to operate properly. 530 531 $pattern is a reference to a hash describing the fields that must match. $action 532 is a reference to a subroutine that will be called if all fields match. 533 $extra_editing_info is passed to read_tags(). 534 535 Legal fields for $pattern are as follows: 536 537 B<Global fields:> DESCRIPTION, KEYWORDS, DBsubject, DBchapter, DBsection, Date, 538 Institution, Author. (The experimental UsesAuxiliaryFiles field may be supported 539 in the future.) 540 541 B<Text-specific fields:> title, edition, author, chapter, section, problem. 542 543 If multiple text-specific keys are given, then all must match for a single 544 textbook. 545 546 $action is called as follows: 547 548 $action->($path, $tags, $text_index) 549 550 There $path the path to the matching file, $tags a reference to the tag hash for 551 the matching file, and $text_index the index into the @{$tags->{textbooks}} array 552 if $pattern included textbook-specific tags. 553 554 =cut 555 556 sub gen_find_tags { 557 my ($pattern, $action, $extra_editing_info) = @_; 558 return sub { 559 return unless /\.pg$/ and -f $File::Find::name; 560 561 my $name = $File::Find::name; 562 #my $relpath = $name; 563 #$relpath =~ s/^$src\///; 564 565 my %tags; 566 567 open my $fh, "<", $name or do { 568 warn "skipping $name: $!\n"; 569 return; 570 }; 571 read_tags($fh, \%tags, $extra_editing_info); 572 close $fh; 573 574 my (%global_pattern, %textbook_pattern); 575 foreach my $field (@global_fields) { 576 $global_pattern{$field} = $pattern->{$field} if exists $pattern->{$field}; 577 } 578 foreach my $field (@textbook_fields) { 579 $textbook_pattern{$field} = $pattern->{$field} if exists $pattern->{$field}; 580 } 581 582 if (%global_pattern) { 583 return unless match_global(\%tags, \%global_pattern); 584 } 585 my $text_index; 586 if (%textbook_pattern) { 587 $text_index = match_textbook(\%tags, \%textbook_pattern); 588 return unless $text_index >= 0; 589 } 590 591 $action->($name, \%tags, $text_index); 592 }; 593 } 594 595 sub match_global { 596 my ($tags, $matches) = @_; 597 foreach my $field (keys %$matches) { 598 return 0 unless $tags->{$field} eq $matches->{$field}; 599 } 600 return 1; 601 } 602 603 sub match_textbook { 604 my ($tags, $matches) = @_; 605 return -1 unless defined $tags->{textbooks}; 606 my @textbooks = @{$tags->{textbooks}}; 607 608 #textbook: foreach my $textbook (@{$tags->{textbooks}}) { 609 textbook: foreach my $i (0 .. $#{$tags->{textbooks}}) { 610 my $textbook = $tags->{textbooks}[$i]; 611 foreach my $field (keys %$matches) { 612 next if $field !~ /^(title|edition|author|chapter|section|problem)$/; 613 next textbook unless $textbook->{$field} eq $matches->{$field}; 614 } 615 #warn "matched text i=$i: ", Dumper($textbook); 616 return $i; 617 } 618 return -1; 619 } 620 621 =back 622 623 =cut 624 625 1;
| aubreyja at gmail dot com | ViewVC Help |
| Powered by ViewVC 1.0.9 |