[system] / branches / rel-2-4-patches / webwork2 / lib / WeBWorK / NPL.pm Repository:
ViewVC logotype

View of /branches/rel-2-4-patches/webwork2/lib/WeBWorK/NPL.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5763 - (download) (as text) (annotate)
Tue Jun 24 23:02:16 2008 UTC (4 years, 10 months ago) by gage
File size: 17088 byte(s)
adding missing files

    1 ################################################################################
    2 # WeBWorK Online Homework Delivery System
    3 # Copyright © 2000-2007 The WeBWorK Project, http://openwebwork.sf.net/
    4 # $CVSHeader: webwork2/lib/WeBWorK/NPL.pm,v 1.2 2008/02/04 22:56:23 sh002i Exp $
    5 #
    6 # This program is free software; you can redistribute it and/or modify it under
    7 # the terms of either: (a) the GNU General Public License as published by the
    8 # Free Software Foundation; either version 2, or (at your option) any later
    9 # version, or (b) the "Artistic License" which comes with this package.
   10 #
   11 # This program is distributed in the hope that it will be useful, but WITHOUT
   12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
   13 # FOR A PARTICULAR PURPOSE.  See either the GNU General Public License or the
   14 # Artistic License for more details.
   15 #
   16 # Contributed by W.H. Freeman; Bedford, Freeman, and Worth Publishing Group.
   17 ################################################################################
   18 
   19 package WeBWorK::NPL;
   20 use base 'Exporter';
   21 
   22 =head1 NAME
   23 
   24 WeBWorK::NPL - Parse formats used by the National Problem Library.
   25 
   26 =head1 SYNOPSIS
   27 
   28   use WeBWorK::NPL qw/read_textbooks read_tags format_tags gen_find_tags/;
   29 
   30   open TEXTS, "<", "Textbooks";
   31   my $textbooks = [];
   32   read_textbooks(\*TEXTS, $textbooks);
   33 
   34   open PGFILE, "<", "file.pg";
   35   my $tags = {};
   36   read_tags(\*PGFILE, $tags);
   37 
   38   foreach my $string (format_tags($tags)) {
   39     $string =~ s/^/## /gm;
   40     print "TAG: $string\n";
   41   }
   42 
   43   use File::Find;
   44   my $process = sub { print "Found: $_[0]\n" };
   45   my $wanted = gen_find_tags({author=>'Rogawski'}, $process);
   46   find({wanted=>$wanted}, @ARGV);
   47 
   48 =head1 DESCRIPTION
   49 
   50 This package contains parsing routines for the various data formats associated
   51 with the National Problem Library.
   52 
   53 =cut
   54 
   55 use strict;
   56 use warnings;
   57 use Data::Dumper;
   58 
   59 our @EXPORT_OK = qw(
   60   read_textbooks
   61   read_tags
   62   format_tags
   63   gen_find_tags
   64 );
   65 
   66 our @global_fields = qw(DESCRIPTION KEYWORDS DBsubject DBchapter DBsection Date
   67 Institution Author UsesAuxiliaryFiles);
   68 our @textbook_fields = qw(title edition author chapter section problem);
   69 
   70 our %tag2field = ( TitleText => "title", EditionText => "edition",
   71 AuthorText => "author", Section => "section", Problem => "problem", );
   72 our %field2tag = reverse %tag2field;
   73 
   74 =head1 FUNCTIONS
   75 
   76 =head2 read_textbooks
   77 
   78   read_textbooks($fh, $arrayref)
   79 
   80 Reads a Textbooks file opened for reading on $fh and appends its contents to
   81 $arrayref. Each item appended to $arrayref is a reference to a hash containing
   82 the following keys:
   83 
   84   _title      The title of the textbook
   85   _edition    The edition of the textbook
   86   _author     The author of the textbook
   87   1           The name of chapter 1
   88   1.1         The name of section 1.1
   89   1.2         The name of section 1.2
   90   ...
   91   2           The name of chapter 2
   92   2.1         The name of section 2.1
   93   ...
   94 
   95 Since the number of sections in a textbook is typically small, it is not terribly
   96 inefficient to pull chapters or sections out:
   97 
   98   @chapters = grep { /^\d+$/ } keys %textbook;
   99   @sections = grep { /^\d+\.\d+$/ } keys %textbook;
  100 
  101 =cut
  102 
  103 sub read_textbooks {
  104   my ($fh, $result) = @_;
  105 
  106   my %curr_textbook;
  107 
  108   while (<$fh>) {
  109     s/#.*$//g;
  110     next unless /\S/;
  111     s/^\s*//;
  112     s/\s*$//;
  113 
  114     if (/^(TitleText|EditionText|AuthorText)\(\s*'(.*?)'\s*\)/) {
  115       my $field = $tag2field{$1};
  116       my $value = $2;
  117       if (exists $curr_textbook{"_$field"}) {
  118         # repeated tag -- this is a new textbook
  119         push @$result, {%curr_textbook};
  120         %curr_textbook = ();
  121       }
  122       $curr_textbook{"_$field"} = $value;
  123     } elsif (/^(\d+)(?:\.(\d+))?\s*>>>\s*(.*)$/) {
  124       my $chapter = $1;
  125       my $section = $2;
  126       my $name = $3;
  127       if (defined $section and length $section > 0) {
  128         $curr_textbook{"$chapter.$section"} = $name;
  129       } else {
  130         $curr_textbook{$chapter} = $name;
  131       }
  132     }
  133   }
  134   push @$result, {%curr_textbook};
  135 }
  136 
  137 =head2 read_tags
  138 
  139   read_tags($fh, $hashref, $extra_editing_info);
  140 
  141 Reads the NPL tags from a PG file opened for reading on $fh and stores the tags
  142 in %$hashref. The following keys may be added to %$hashref:
  143 
  144   DESCRIPTION
  145   KEYWORDS
  146   DBsubject
  147   DBchapter
  148   DBsection
  149   Date
  150   Institution
  151   Author
  152   UsesAuxiliaryFiles      (experimental, subject to change)
  153   textbooks               (arrayref)
  154 
  155 The value for the C<textbooks> key will be a reference to an array of textbook
  156 hashes containing the textbook tags from the source file. In each textbook hash,
  157 entries with empty values (e.g. C<TitleText1('')>) will be omitted. This is to
  158 deal with the large number of empty-valued tags in the NPL. The keys of each
  159 textbook hash will be among:
  160 
  161   title
  162   edition
  163   author
  164   chapter
  165   section
  166   problem        (arrayref)
  167 
  168 The value for the C<problem> key will be a reference to an array of problem
  169 numbers.
  170 
  171 If $extra_editing_info is true, special hash items _pos, _rest, and _maxtextbook
  172 will also be added to %$hashref.
  173 
  174 _pos will contain the position of the first byte of the next line after the last
  175 tag in the file. _rest will contain the bytes of the "rest" of the file, after
  176 all tags, starting at _pos. _maxtextbook will contain the highest number used to
  177 identify a textbook in the file. (e.g. If TitleText1 and TitleText3 appear in
  178 the  file, there will only be two items in the textbooks array, but _maxtextbook
  179 will be 3.)
  180 
  181 This is useful for appending tags to a file which contains existing tags, where
  182 the new tags should appear immediately after the existing tags:
  183 
  184   open PGFILE, "+<", "file.pg";
  185   my $tags = {};
  186   read_tags(\*PGFILE, $tags, 1);
  187   my $pos = $tags{_pos};
  188   my $rest = $tags{_rest};
  189   seek PGFILE, $pos, 0;
  190   print PGFILE "## SomeNewTag('foo','bar')\n";
  191   print PGFILE $rest;
  192   close PGFILE;
  193 
  194 =cut
  195 
  196 sub read_tags {
  197   my ($file, $result, $extra_editing_info) = @_;
  198 
  199   my $fh;
  200   if (ref $file) {
  201     $fh = $file;
  202   } elsif (defined $file and not ref $file) {
  203     $fh = new IO::File($file, 'r');
  204   }
  205 
  206   my $pos;
  207   my $rest = '';
  208   my $maxtextbook;
  209   while (<$fh>) {
  210     #if (0) {
  211     if (/^(.*?\#.*?)(\s*)DESCRIPTION/) {
  212       my $prefix = $1;
  213       my $whitespace = $2;
  214       my $description = '';
  215       while (<$fh>) {
  216         if (/\#.*ENDDESCRIPTION/) {
  217           chomp $description;
  218           $result->{DESCRIPTION} = $description if length $description > 0;
  219           last;
  220         } else {
  221           # handle prefix and whitespace separately so that we can still
  222           # chop the prefix off even if people are being careless about
  223           # whitespace. :P
  224           s/^$prefix//;
  225           s/^$whitespace//;
  226           $description .= $_;
  227         }
  228       }
  229       if ($extra_editing_info) {
  230         $pos = tell $fh;
  231         $rest = '';
  232       }
  233     } elsif (/\#.*KEYWORDS\((.*)\)/) {
  234       my $keywords = $1;
  235       push @{$result->{KEYWORDS}}, parse_keywords($keywords);
  236       if ($extra_editing_info) {
  237         $pos = tell $fh;
  238         $rest = '';
  239       }
  240     } elsif (/\#.*(DBsubject|DBchapter|DBsection|Date|Institution|Author)\(\s*(.*?)\s*\)/) {
  241       my $field = $1;
  242       my $value = $2;
  243       my ($parsed_value, $parse_errors) = parse_normal_value($field, $value);
  244       if (@$parse_errors) {
  245         warn "error while parsing value \"$value\" in field $field:\n"
  246           . join('', @$parse_errors)
  247           . "value may be incomplete. use with caution.\n"
  248           . "(line $. of file $file)\n";
  249       }
  250       $result->{$field} = $parsed_value;
  251       if ($extra_editing_info) {
  252         $pos = tell $fh;
  253         $rest = '';
  254       }
  255     } elsif (/\#.*(TitleText|EditionText|AuthorText|Section|Problem)(\d+)\(\s*'(.*?)'\s*\)/) {
  256       my $field = $tag2field{$1};
  257       my $num = $2;
  258       my $value = $3;
  259       next unless $value =~ /\S/;
  260       $value = [ parse_problems($value) ] if $field eq "problem";
  261       if ($field eq "section") {
  262         my ($ch, $sec) = split /\./, $value;
  263         $result->{textbooks}[$num]{chapter} = $ch;
  264         $result->{textbooks}[$num]{section} = $sec if defined $sec and length $sec > 0;
  265       } else {
  266         $result->{textbooks}[$num]{$field} = $value;
  267       }
  268       if ($extra_editing_info) {
  269         $pos = tell $fh;
  270         $rest = '';
  271         $maxtextbook = $num if not defined $maxtextbook or $num > $maxtextbook;
  272       }
  273     } elsif (/\#.*(UsesAuxiliaryFiles)\(\s*(.*?)\s*\)/) {
  274       my $field = $1;
  275       my $value = $2;
  276       my ($parsed_value, $parse_errors) = parse_normal_list($field, $value);
  277       if (@$parse_errors) {
  278         warn "error while parsing list value \"$value\" in field $field:\n"
  279           . join('', @$parse_errors)
  280           . "value may be incomplete. use with caution.\n"
  281           . "(line $. of file $file)\n";
  282       }
  283       $result->{$field} = $parsed_value;
  284       if ($extra_editing_info) {
  285         $pos = tell $fh;
  286         $rest = '';
  287       }
  288     } else {
  289       if ($extra_editing_info) {
  290         $rest .= $_;
  291       }
  292     }
  293   }
  294 
  295   # remove holes in textbook numbering
  296   @{$result->{textbooks}} = grep { defined } @{$result->{textbooks}};
  297   delete $result->{textbooks} unless @{$result->{textbooks}};
  298 
  299   if ($extra_editing_info) {
  300     $result->{_pos} = $pos;
  301     $result->{_rest} = $rest;
  302     $result->{_maxtextbook} = $maxtextbook;
  303   }
  304 }
  305 
  306 sub parse_normal_list {
  307   my ($name, $string) = @_;
  308 
  309   use constant NRM=>0;
  310   use constant STR=>1;
  311   use constant ESC=>2;
  312   use constant STP=>3;
  313   my $state = NRM;
  314   my @errors;
  315   my @items;
  316   my $curr_item = '';
  317   my $next_item = 0;
  318   foreach my $i (0 .. length($string)-1) {
  319     my $c = substr($string,$i,1);
  320     #print "i=$i c=$c state=$state curr_item=$curr_item next_item=$next_item\n";
  321     # state changes
  322     if ($state == NRM) {
  323       if ($c eq "'") {
  324         $state = STR;
  325       } elsif ($c eq ',' or $c eq ' ') {
  326         # do nothing -- closequote already consumed curr_item
  327       } else {
  328         push @errors,
  329           "illegal char '$c' in state NRM while parsing value for $name.\n"
  330           . "    $string\n"
  331           . '    ' . ' 'x$i . "^\n";
  332         $next_item = 1;
  333         $state = STP;
  334       }
  335     } elsif ($state == STR) {
  336       if ($c eq "'") {
  337         $state = NRM;
  338         $next_item = 1;
  339       } elsif ($c eq '\\') {
  340         $state = ESC;
  341       } else {
  342         $curr_item .= $c;
  343       }
  344     } elsif ($state == ESC) {
  345       $curr_item .= $c;
  346       $state = STR;
  347     } elsif ($state == STP) {
  348       last;
  349     } else {
  350       die "unexpected state $state while parsing value for $name.\n";
  351     }
  352     #print "i=$i c=$c state=$state curr_item=$curr_item next_item=$next_item\n";
  353     # actions
  354     if ($next_item) {
  355       push @items, $curr_item;
  356       $curr_item = '';
  357       $next_item = 0;
  358       #print "stored item to list\n";
  359     }
  360   }
  361 
  362   return \@items, \@errors;
  363 }
  364 
  365 sub parse_normal_value {
  366   my ($name, $string) = @_;
  367   my ($items, $errors) = parse_normal_list($name, $string);
  368   push @$errors, "only one item allowed in value for $name.\n" if @$items > 1;
  369   return shift @$items, $errors;
  370 }
  371 
  372 # this now works for keywords is embedded spaces (which are later stripped out
  373 # by kwtidy) but now it doesn't work for values with double quotes or no quotes!
  374 sub parse_keywords {
  375   my $string = shift;
  376   my ($items, $errors) = parse_normal_list('KEYWORDS', $string);
  377   if (@$errors) {
  378     warn "errors while parsing KEYWORDS list:\n@$errors\n"
  379       . "Partially-parsed KEYWORDS: @$items\n"
  380       . "Resorting to old-style KEYWORDS parsing...\n";
  381     @$items = split /(?:,|\s)+/, $string;
  382     warn "Old-style parse result: ", join('|', @$items), "\n";
  383   }
  384   return map { kwtidy($_) } @$items;
  385 }
  386 
  387 sub kwtidy {
  388   my $keyword = shift;
  389   $keyword =~ s/\W//g;
  390   $keyword =~ s/_//g;
  391   return lc $keyword;
  392 }
  393 
  394 sub parse_problems {
  395   my $string = shift;
  396   $string =~ s/\D/ /g;
  397   return grep { /\S/ } split /\s+/, $string;
  398 }
  399 
  400 =head2 format_tags
  401 
  402   format_tags($tags, $mintextbook);
  403 
  404 Given a reference to a hash of tags, return a list of strings representing said
  405 tags. The strings do not begin with the standard NPL comment prefix ("## ") or
  406 end with newlines. These must be added by the caller if the strings are to be
  407 inserted into a PG source file.
  408 
  409 One complication is the DESCRIPTION field, which contains embedded newlines. If
  410 a DESCRIPTION tag occurs in %$tags, it will be formatted with embedded newlines
  411 but without a trailing newline. For example, after this code executes,
  412 
  413   $tags = { DESCRIPTION => "line one\nline two\nline three" };
  414   ($desc) = format_tags($tags);
  415 
  416 $desc will contain the string:
  417 
  418   "DESCRIPTION\nline one\nline two\nline three\nENDDESCRIPTION"
  419 
  420 To account for this when writing to a PG file, you could use:
  421 
  422   foreach my $string (format_tags($tags)) {
  423     $string =~ s/^/## /gm;
  424     print PGFILE "$string\n";
  425   }
  426 
  427 =cut
  428 
  429 sub format_tags {
  430   my ($tags, $mintextbook) = @_;
  431   $mintextbook ||= 1;
  432   my @result;
  433   my @ordered_fields = grep { exists $tags->{$_} } @global_fields, "textbooks";
  434   foreach my $field (@ordered_fields) {
  435     my $value = $tags->{$field};
  436     if ($field eq "DESCRIPTION") {
  437       push @result, format_description($value);
  438     } elsif ($field eq "textbooks") {
  439       push @result, format_textbooks($value, $mintextbook);
  440     } else {
  441       push @result, format_tag($field, $value);
  442     }
  443   }
  444   return @result;
  445 }
  446 
  447 sub format_tag {
  448   my ($field, $value, $n) = @_;
  449   my $tag = $field2tag{$field} || $field;
  450 
  451   # problems are always listed in a single string in the tag.
  452   if ($field eq "problem") {
  453     $value = format_problems($value);
  454   }
  455 
  456   # if we have an arrayref, we represent it as multiple strings in one tag.
  457   if (ref $value) {
  458     $value = join(',', map { "'$_'" } @$value);
  459   } elsif (defined $value) {
  460     $value = "'$value'";
  461   } else {
  462     warn "value is not defined for field $field!\n";
  463     $value = "''";
  464   }
  465 
  466   if (defined $n) {
  467     return "$tag$n($value)";
  468   } else {
  469     return "$tag($value)";
  470   }
  471 }
  472 
  473 sub format_description {
  474   my $value = shift;
  475   return "DESCRIPTION\n$value\nENDDESCRIPTION";
  476 }
  477 
  478 sub format_textbooks {
  479   my ($textbook, $n) = @_;
  480   my @textbooks = @$textbook;
  481   my @result;
  482   foreach my $textbook (@textbooks) {
  483     push @result, format_textbook($textbook, $n);
  484     $n++;
  485   }
  486   return @result;
  487 }
  488 
  489 sub format_textbook {
  490   my ($textbook, $n) = @_;
  491 
  492   # combine chapter/section into single section tag
  493   my $chapter = $textbook->{chapter};
  494   my $section = $textbook->{section};
  495   if (defined $chapter or defined $section) {
  496     $section = ".$section" if defined $section;
  497     $section = "$chapter$section" if defined $chapter;
  498     delete $textbook->{chapter};
  499     $textbook->{section} = $section;
  500   }
  501 
  502   my @result;
  503   my @ordered_fields = grep { exists $textbook->{$_} } @textbook_fields;
  504   foreach my $field (@ordered_fields) {
  505     my $value = $textbook->{$field};
  506     push @result, format_tag($field, $value, $n);
  507   }
  508   return @result;
  509 }
  510 
  511 sub format_problems {
  512   my $first = shift;
  513   my @problems;
  514   if (ref $first) {
  515     @problems = @$first;
  516   } else {
  517     @problems = ($first, @_);
  518   }
  519 
  520   return join(',', @problems);
  521 }
  522 
  523 =head2 gen_find_tags
  524 
  525   gen_find_tags($pattern, $action, $extra_editing_info);
  526 
  527 Generates an anonymous subroutine suitable for passing the the find() function
  528 of the File::Find module. The no_chdir=>1 option must be passed to find() for
  529 the generated subroutine to operate properly.
  530 
  531 $pattern is a reference to a hash describing the fields that must match. $action
  532 is a reference to a subroutine that will be called if all fields match.
  533 $extra_editing_info is passed to read_tags().
  534 
  535 Legal fields for $pattern are as follows:
  536 
  537 B<Global fields:> DESCRIPTION, KEYWORDS, DBsubject, DBchapter, DBsection, Date,
  538 Institution, Author. (The experimental UsesAuxiliaryFiles field may be supported
  539 in the future.)
  540 
  541 B<Text-specific fields:> title, edition, author, chapter, section, problem.
  542 
  543 If multiple text-specific keys are given, then all must match for a single
  544 textbook.
  545 
  546 $action is called as follows:
  547 
  548   $action->($path, $tags, $text_index)
  549 
  550 There $path the path to the matching file, $tags a reference to the tag hash for
  551 the matching file, and $text_index the index into the @{$tags->{textbooks}} array
  552 if $pattern included textbook-specific tags.
  553 
  554 =cut
  555 
  556 sub gen_find_tags {
  557   my ($pattern, $action, $extra_editing_info) = @_;
  558   return sub {
  559     return unless /\.pg$/ and -f $File::Find::name;
  560 
  561     my $name = $File::Find::name;
  562     #my $relpath = $name;
  563     #$relpath =~ s/^$src\///;
  564 
  565     my %tags;
  566 
  567     open my $fh, "<", $name or do {
  568       warn "skipping $name: $!\n";
  569       return;
  570     };
  571     read_tags($fh, \%tags, $extra_editing_info);
  572     close $fh;
  573 
  574     my (%global_pattern, %textbook_pattern);
  575     foreach my $field (@global_fields) {
  576       $global_pattern{$field} = $pattern->{$field} if exists $pattern->{$field};
  577     }
  578     foreach my $field (@textbook_fields) {
  579       $textbook_pattern{$field} = $pattern->{$field} if exists $pattern->{$field};
  580     }
  581 
  582     if (%global_pattern) {
  583       return unless match_global(\%tags, \%global_pattern);
  584     }
  585     my $text_index;
  586     if (%textbook_pattern) {
  587       $text_index = match_textbook(\%tags, \%textbook_pattern);
  588       return unless $text_index >= 0;
  589     }
  590 
  591     $action->($name, \%tags, $text_index);
  592   };
  593 }
  594 
  595 sub match_global {
  596   my ($tags, $matches) = @_;
  597   foreach my $field (keys %$matches) {
  598     return 0 unless $tags->{$field} eq $matches->{$field};
  599   }
  600   return 1;
  601 }
  602 
  603 sub match_textbook {
  604   my ($tags, $matches) = @_;
  605   return -1 unless defined $tags->{textbooks};
  606   my @textbooks = @{$tags->{textbooks}};
  607 
  608   #textbook: foreach my $textbook (@{$tags->{textbooks}}) {
  609   textbook: foreach my $i (0 .. $#{$tags->{textbooks}}) {
  610     my $textbook = $tags->{textbooks}[$i];
  611     foreach my $field (keys %$matches) {
  612       next if $field !~ /^(title|edition|author|chapter|section|problem)$/;
  613       next textbook unless $textbook->{$field} eq $matches->{$field};
  614     }
  615     #warn "matched text i=$i: ", Dumper($textbook);
  616     return $i;
  617   }
  618   return -1;
  619 }
  620 
  621 =back
  622 
  623 =cut
  624 
  625 1;

aubreyja at gmail dot com
ViewVC Help
Powered by ViewVC 1.0.9