[system] / trunk / xmlrpc / RPC / RPC-XML-0.25 / lib / RPC / XML / Parser.pm Repository:
ViewVC logotype

View of /trunk/xmlrpc/RPC/RPC-XML-0.25/lib/RPC/XML/Parser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 279 - (download) (as text) (annotate)
Fri May 17 21:44:04 2002 UTC (17 years, 6 months ago) by gage
File size: 18976 byte(s)
Experimental xmlrpc WeBWorK webservices

    1 ###############################################################################
    2 #
    3 # This file copyright (c) 2001 by Randy J. Ray, all rights reserved
    4 #
    5 # Copying and distribution are permitted under the terms of the Artistic
    6 # License as distributed with Perl versions 5.005 and later. See
    7 # http://language.perl.com/misc/Artistic.html
    8 #
    9 ###############################################################################
   10 #
   11 #   $Id$
   12 #
   13 #   Description:    This is the RPC::XML::Parser class, a container for the
   14 #                   XML::Parser class. It was moved here from RPC::XML in
   15 #                   order to reduce the weight of that module.
   16 #
   17 #   Functions:      new
   18 #                   parse
   19 #                   message_init
   20 #                   tag_start
   21 #                   error
   22 #                   stack_error
   23 #                   tag_end
   24 #                   char_data
   25 #
   26 #   Libraries:      RPC::XML
   27 #                   XML::Parser
   28 #
   29 #   Global Consts:  Uses $RPC::XML::ERROR
   30 #
   31 #   Environment:    None.
   32 #
   33 ###############################################################################
   34 
   35 package RPC::XML::Parser;
   36 
   37 use 5.005;
   38 use strict;
   39 use vars qw($VERSION @ISA);
   40 use subs qw(error stack_error new message_init message_end tag_start tag_end
   41             char_data parse);
   42 
   43 # These constants are only used by the internal stack machine
   44 use constant PARSE_ERROR => 0;
   45 use constant METHOD      => 1;
   46 use constant METHODSET   => 2;
   47 use constant RESPONSE    => 3;
   48 use constant RESPONSESET => 4;
   49 use constant STRUCT      => 5;
   50 use constant ARRAY       => 6;
   51 use constant DATATYPE    => 7;
   52 use constant ATTR_SET    => 8;
   53 use constant METHODNAME  => 9;
   54 use constant VALUEMARKER => 10;
   55 use constant PARAMSTART  => 11;
   56 use constant PARAM       => 12;
   57 use constant STRUCTMEM   => 13;
   58 use constant STRUCTNAME  => 14;
   59 use constant DATAOBJECT  => 15;
   60 use constant PARAMLIST   => 16;
   61 use constant NAMEVAL     => 17;
   62 use constant MEMBERENT   => 18;
   63 use constant METHODENT   => 19;
   64 use constant RESPONSEENT => 20;
   65 use constant FAULTENT    => 21;
   66 use constant FAULTSTART  => 22;
   67 
   68 # This is to identify valid types
   69 use constant VALIDTYPES  => { map { $_, 1 } qw(int i4 string double reference
   70                                                boolean dateTime.iso8601
   71                                                base64) };
   72 # This maps XML tags to stack-machine tokens
   73 use constant TAG2TOKEN   => { methodCall        => METHOD,
   74                               methodResponse    => RESPONSE,
   75                               methodName        => METHODNAME,
   76                               params            => PARAMSTART,
   77                               param             => PARAM,
   78                               value             => VALUEMARKER,
   79                               fault             => FAULTSTART,
   80                               array             => ARRAY,
   81                               struct            => STRUCT,
   82                               member            => STRUCTMEM,
   83                               name              => STRUCTNAME  };
   84 
   85 use XML::Parser;
   86 
   87 require RPC::XML;
   88 
   89 $VERSION = do { my @r=(q$Revision$=~/\d+/g); sprintf "%d."."%02d"x$#r,@r };
   90 
   91 1;
   92 
   93 ###############################################################################
   94 #
   95 #   Sub Name:       new
   96 #
   97 #   Description:    Constructor. Save any important attributes, leave the
   98 #                   heavy lifting for the parse() routine and XML::Parser.
   99 #
  100 #   Arguments:      NAME      IN/OUT  TYPE      DESCRIPTION
  101 #                   $class    in      scalar    Class we're initializing
  102 #                   %attr     in      hash      Any extras the caller wants
  103 #
  104 #   Globals:        $RPC::XML::ERROR
  105 #
  106 #   Environment:    None.
  107 #
  108 #   Returns:        Success:    object ref
  109 #                   Failure:    undef
  110 #
  111 ###############################################################################
  112 sub new
  113 {
  114     my $class = shift;
  115     my %attrs = @_;
  116 
  117     my $self = {};
  118     if (keys %attrs)
  119     {
  120         for (keys %attrs) { $self->{$_} = $attrs{$_} }
  121     }
  122 
  123     bless $self, $class;
  124 }
  125 
  126 ###############################################################################
  127 #
  128 #   Sub Name:       parse
  129 #
  130 #   Description:    Parse the requested string or stream. This behaves mostly
  131 #                   like parse() in the XML::Parser namespace, but does some
  132 #                   extra, as well.
  133 #
  134 #   Arguments:      NAME      IN/OUT  TYPE      DESCRIPTION
  135 #                   $self     in      ref       Object of this class
  136 #                   $stream   in      scalar    Either the string to parse or
  137 #                                                 an open filehandle of sorts
  138 #
  139 #   Globals:        None.
  140 #
  141 #   Environment:    None.
  142 #
  143 #   Returns:        Success:    ref to request or response object
  144 #                   Failure:    error string
  145 #
  146 ###############################################################################
  147 sub parse
  148 {
  149     my $self = shift;
  150     my $stream = shift;
  151 
  152     my $parser = XML::Parser->new(Namespaces => 0, ParseParamEnt => 0,
  153                                   Handlers =>
  154                                   {
  155                                    Init  => sub { message_init $self, @_ },
  156                                    Start => sub { tag_start $self, @_ },
  157                                    End   => sub { tag_end $self, @_ },
  158                                    Char  => sub { char_data $self, @_ },
  159                                   });
  160 
  161     $parser->parse($stream);
  162     # Look at the top-most marker, it'll need to be one of the end cases
  163     my $marker = pop(@{$self->{stack}});
  164     # There should be only on item on the stack after it
  165     my $retval = pop(@{$self->{stack}});
  166     # If the top-most marker isn't the error marker, check the stack
  167     $retval = 'RPC::XML Error: Extra data on parse stack at document end'
  168         if ($marker != PARSE_ERROR and (@{$self->{stack}}));
  169 
  170     $retval;
  171 }
  172 
  173 # This is called when a new document is about to start parsing
  174 sub message_init
  175 {
  176     my $robj = shift;
  177     my $self = shift;
  178 
  179     $robj->{stack} = [];
  180     $self;
  181 }
  182 
  183 # This gets called each time an opening tag is parsed
  184 sub tag_start
  185 {
  186     my $robj = shift;
  187     my $self = shift;
  188     my $elem = shift;
  189     my %attr = @_;
  190 
  191     $robj->{cdata} = '';
  192     return if ($elem eq 'data');
  193     if (TAG2TOKEN->{$elem})
  194     {
  195         push(@{$robj->{stack}}, TAG2TOKEN->{$elem});
  196     }
  197     elsif (VALIDTYPES->{$elem})
  198     {
  199         # All datatypes are represented on the stack by this generic token
  200         push(@{$robj->{stack}}, DATATYPE);
  201     }
  202     else
  203     {
  204         push(@{$robj->{stack}},
  205              "Unknown tag encountered: $elem", PARSE_ERROR);
  206         $self->finish;
  207     }
  208 }
  209 
  210 # Very simple error-text generator, just to eliminate heavy reduncancy in the
  211 # next sub:
  212 sub error
  213 {
  214     my $robj = shift;
  215     my $self = shift;
  216     my $mesg = shift;
  217     my $elem = shift || '';
  218 
  219     my $fmt = $elem ?
  220         '%s at document line %d, column %d (byte %d, closing tag %s)' :
  221         '%s at document line %d, column %d (byte %d)';
  222 
  223     push(@{$robj->{stack}},
  224          sprintf($fmt, $mesg, $self->current_line, $self->current_column,
  225                  $self->current_byte, $elem),
  226          PARSE_ERROR);
  227     $self->finish;
  228 }
  229 
  230 # A shorter-cut for stack integrity errors
  231 sub stack_error
  232 {
  233     my $robj = shift;
  234     my $self = shift;
  235     my $elem = shift;
  236 
  237     error($robj, $self, 'Stack corruption detected', $elem);
  238 }
  239 
  240 # This is a hairy subroutine-- what to do at the end-tag. The actions range
  241 # from simply new-ing a datatype all the way to building the final object.
  242 sub tag_end
  243 {
  244     my $robj = shift;
  245     my $self = shift;
  246     my $elem = shift;
  247 
  248     my ($op, $attr, $obj, $class, $list, $name, $err);
  249 
  250     return if ($elem eq 'data');
  251     # This should always be one of the stack machine ops defined above
  252     $op = pop(@{$robj->{stack}});
  253 
  254     # Decide what to do from here
  255     if (VALIDTYPES->{$elem})
  256     {
  257         # This is the closing tag of one of the data-types.
  258         ($class = lc $elem) =~ s/\./_/;
  259         # Some minimal data-integrity checking
  260         if ($class eq 'int' or $class eq 'i4')
  261         {
  262             return error($robj, $self, 'Bad integer data read')
  263                 unless ($robj->{cdata} =~ /^[-+]?\d+$/);
  264         }
  265         elsif ($class eq 'double')
  266         {
  267             return error($robj, $self, 'Bad floating-point data read')
  268                 unless ($robj->{cdata} =~
  269                         # Taken from perldata(1)
  270                         /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/);
  271         }
  272 
  273         $class = "RPC::XML::$class";
  274         $obj = $class->new($robj->{cdata});
  275         return error($robj, $self, 'Error instantiating data object: ' .
  276                             $RPC::XML::ERROR)
  277             unless ($obj);
  278         push(@{$robj->{stack}}, $obj, DATAOBJECT);
  279     }
  280     elsif ($elem eq 'value')
  281     {
  282         # For <value></value>, there should already be a dataobject, or else
  283         # the marker token in which case the CDATA is used as a string value.
  284         if ($op == DATAOBJECT)
  285         {
  286             ($op, $obj) = splice(@{$robj->{stack}}, -2);
  287             return stack_error($robj, $self, $elem)
  288                 unless ($op == VALUEMARKER);
  289         }
  290         elsif ($op == VALUEMARKER)
  291         {
  292             $obj = RPC::XML::string->new($robj->{cdata});
  293         }
  294         else
  295         {
  296             return error($robj, $self,
  297                          'No datatype found within <value> container');
  298         }
  299 
  300         push(@{$robj->{stack}}, $obj, DATAOBJECT);
  301     }
  302     elsif ($elem eq 'param')
  303     {
  304         # Almost like above, since this is really a NOP anyway
  305         return error($robj, $self, 'No <value> found within <param> container')
  306             unless ($op == DATAOBJECT);
  307         ($op, $obj) = splice(@{$robj->{stack}}, -2);
  308         return stack_error($robj, $self, $elem) unless ($op == PARAM);
  309         push(@{$robj->{stack}}, $obj, DATAOBJECT);
  310     }
  311     elsif ($elem eq 'params')
  312     {
  313         # At this point, there should be zero or more DATAOBJECT tokens on the
  314         # stack, each with a data object right below it.
  315         $list = [];
  316         return stack_error($robj, $self, $elem)
  317             unless ($op == DATAOBJECT or $op == PARAMSTART);
  318         while ($op == DATAOBJECT)
  319         {
  320             unshift(@$list, pop(@{$robj->{stack}}));
  321             $op = pop(@{$robj->{stack}});
  322         }
  323         # Now that we see something ! DATAOBJECT, it needs to be PARAMSTART
  324         return stack_error($robj, $self, $elem) unless ($op == PARAMSTART);
  325         push(@{$robj->{stack}}, $list, PARAMLIST);
  326     }
  327     elsif ($elem eq 'fault')
  328     {
  329         # If we're finishing up a fault definition, there needs to be a struct
  330         # on the stack.
  331         return stack_error($robj, $self, $elem) unless ($op == DATAOBJECT);
  332         ($op, $obj) = splice(@{$robj->{stack}}, -2);
  333         return error($robj, $self,
  334                      'Only a <struct> value may be within a <fault>')
  335             unless ($obj->isa('RPC::XML::struct'));
  336 
  337         $obj = new RPC::XML::fault $obj;
  338         return error($robj, $self, 'Unable to instantiate fault object: ' .
  339                             $RPC::XML::ERROR)
  340             unless $obj;
  341         push(@{$robj->{stack}}, $obj, FAULTENT);
  342     }
  343     elsif ($elem eq 'member')
  344     {
  345         # We need to see a DATAOBJECT followed by a STRUCTNAME
  346         return stack_error($robj, $self, $elem) unless ($op == DATAOBJECT);
  347         ($op, $obj) = splice(@{$robj->{stack}}, -2);
  348         return stack_error($robj, $self, $elem) unless ($op == STRUCTNAME);
  349         # Get the name off the stack to clear the way for the STRUCTMEM marker
  350         # under it
  351         ($op, $name) = splice(@{$robj->{stack}}, -2);
  352         # Push the name back on, with the value and the new marker (STRUCTMEM)
  353         push(@{$robj->{stack}}, $name, $obj, STRUCTMEM);
  354     }
  355     elsif ($elem eq 'name')
  356     {
  357         # Fairly simple: just push the current content of CDATA on w/ a marker
  358         push(@{$robj->{stack}}, $robj->{cdata}, STRUCTNAME);
  359     }
  360     elsif ($elem eq 'struct')
  361     {
  362         # Create the hash table in-place, then pass the ref to the constructor
  363         $list = {};
  364         # First off the stack needs to be STRUCTMEM or STRUCT
  365         return stack_error($robj, $self, $elem)
  366             unless ($op == STRUCTMEM or $op == STRUCT);
  367         while ($op == STRUCTMEM)
  368         {
  369             # Next on stack (in list-order): name, value
  370             ($name, $obj) = splice(@{$robj->{stack}}, -2);
  371             $list->{$name} = $obj;
  372             $op = pop(@{$robj->{stack}});
  373         }
  374         # Now that we see something ! STRUCTMEM, it needs to be STRUCT
  375         return stack_error($robj, $self, $elem) unless ($op == STRUCT);
  376         $obj = RPC::XML::struct->new($list);
  377         return error($robj, $self,
  378                      'Error creating a RPC::XML::struct object: ' .
  379                      $RPC::XML::ERROR)
  380             unless $obj;
  381         push(@{$robj->{stack}}, $obj, DATAOBJECT);
  382     }
  383     elsif ($elem eq 'array')
  384     {
  385         # This is similar in most ways to struct creation, save for the lack
  386         # of naming for the elements.
  387         # Create the list in-place, then pass the ref to the constructor
  388         $list = [];
  389         # Only DATAOBJECT or ARRAY should be visible
  390         return stack_error($robj, $self, $elem)
  391             unless ($op == DATAOBJECT or $op == ARRAY);
  392         while ($op == DATAOBJECT)
  393         {
  394             unshift(@$list, pop(@{$robj->{stack}}));
  395             $op = pop(@{$robj->{stack}});
  396         }
  397         # Now that we see something ! DATAOBJECT, it needs to be ARRAY
  398         return stack_error($robj, $self, $elem) unless ($op == ARRAY);
  399         $obj = RPC::XML::array->new($list);
  400         return error($robj, $self,
  401                      'Error creating a RPC::XML::array object: ' .
  402                      $RPC::XML::ERROR)
  403             unless $obj;
  404         push(@{$robj->{stack}}, $obj, DATAOBJECT);
  405     }
  406     elsif ($elem eq 'methodName')
  407     {
  408         return error($robj, $self,
  409                      "<$elem> tag must immediately follow a <methodCall> tag")
  410             unless ($robj->{stack}->[$#{$robj->{stack}}] == METHOD);
  411         push(@{$robj->{stack}}, $robj->{cdata}, NAMEVAL);
  412     }
  413     elsif ($elem eq 'methodCall')
  414     {
  415         # A methodCall closing should have on the stack an optional PARAMLIST
  416         # marker, a NAMEVAL marker, then the METHOD token from the
  417         # opening tag. An ATTR_SET may follow the METHOD token.
  418         if ($op == PARAMLIST)
  419         {
  420             ($op, $list) = splice(@{$robj->{stack}}, -2);
  421         }
  422         else
  423         {
  424             $list = [];
  425         }
  426         if ($op == NAMEVAL)
  427         {
  428             ($op, $name) = splice(@{$robj->{stack}}, -2);
  429         }
  430         return error($robj, $self,
  431                      "No methodName tag detected during methodCall parsing")
  432             unless $name;
  433         return stack_error($robj, $self, $elem) unless ($op == METHOD);
  434         # Create the request object and push it on the stack
  435         $obj = RPC::XML::request->new($name, @$list);
  436         return error($robj, $self,
  437                      "Error creating request object: $RPC::XML::ERROR")
  438             unless $obj;
  439         push(@{$robj->{stack}}, $obj, METHODENT);
  440     }
  441     elsif ($elem eq 'methodResponse')
  442     {
  443         # A methodResponse closing should have on the stack only the
  444         # DATAOBJECT marker, then the RESPONSE token from the opening tag.
  445         if ($op == PARAMLIST)
  446         {
  447             # To my knowledge, the XML-RPC spec limits the params list for
  448             # a response to exactly one object. Extract it from the listref
  449             # and put it back.
  450             $list = pop(@{$robj->{stack}});
  451             return error($robj, $self,
  452                          "Params list for <$elem> tag invalid")
  453                 unless (@$list == 1);
  454             $obj = $list->[0];
  455             return error($robj, $self,
  456                          "Returned value on stack not a type reference")
  457                 unless (ref $obj and $obj->isa('RPC::XML::datatype'));
  458             push(@{$robj->{stack}}, $obj);
  459         }
  460         elsif (! ($op == DATAOBJECT or $op == FAULTENT))
  461         {
  462             return error($robj, $self,
  463                          "No parameter was declared for the <$elem> tag");
  464         }
  465         ($op, $list) = splice(@{$robj->{stack}}, -2);
  466         return stack_error($robj, $self, $elem) unless ($op == RESPONSE);
  467         # Create the response object and push it on the stack
  468         $obj = RPC::XML::response->new($list);
  469         return error($robj, $self,
  470                      "Error creating response object: $RPC::XML::ERROR")
  471             unless $obj;
  472         push(@{$robj->{stack}}, $obj, RESPONSEENT);
  473     }
  474 }
  475 
  476 # This just spools the character data until a closing tag makes use of it
  477 sub char_data
  478 {
  479     my $robj = shift;
  480     my $self = shift;
  481     my $data = shift;
  482 
  483     $robj->{cdata} .= $data;
  484 }
  485 
  486 __END__
  487 
  488 =head1 NAME
  489 
  490 RPC::XML::Parser - A container class for XML::Parser
  491 
  492 =head1 SYNOPSIS
  493 
  494     use RPC::XML::Parser;
  495     ...
  496     $P = new RPC::XML::Parser;
  497     $P->parse($message);
  498 
  499 =head1 DESCRIPTION
  500 
  501 The B<RPC::XML::Parser> class encapsulates the parsing process, for turning a
  502 string or an input stream into a B<RPC::XML::request> or B<RPC::XML::response>
  503 object. The B<XML::Parser> class is used internally, with a new instance
  504 created for each call to C<parse> (detailed below). This allows the
  505 B<RPC::XML::Parser> object to be reusable, even though the B<XML::Parser>
  506 objects are not. The methods are:
  507 
  508 =over 4
  509 
  510 =item new
  511 
  512 Create a new instance of the class. Any extra data passed to the constructor
  513 is taken as key/value pairs (B<not> a hash reference) and attached to the
  514 object.
  515 
  516 =item parse { STRING | STREAM }
  517 
  518 Parse the XML document specified in either a string or a stream. The stream
  519 may be any file descriptor, derivative of B<IO::Handle>, etc. The return
  520 value is either an object reference (to one of B<RPC::XML::request> or
  521 B<RPC::XML::response>) or an error string. Any non-reference return value
  522 should be treated as an error condition.
  523 
  524 =back
  525 
  526 =head1 DIAGNOSTICS
  527 
  528 The constructor returns C<undef> upon failure, with the error message available
  529 in the global variable B<C<$RPC::XML::ERROR>>.
  530 
  531 =head1 CAVEATS
  532 
  533 This is part of a reference implementation in which clarity of process and
  534 readability of the code take precedence over general efficiency. Much, if not
  535 all, of this can be written more compactly and/or efficiently.
  536 
  537 =head1 CREDITS
  538 
  539 The B<XML-RPC> standard is Copyright (c) 1998-2001, UserLand Software, Inc.
  540 See <http://www.xmlrpc.com> for more information about the B<XML-RPC>
  541 specification.
  542 
  543 =head1 LICENSE
  544 
  545 This module is licensed under the terms of the Artistic License that covers
  546 Perl itself. See <http://language.perl.com/misc/Artistic.html> for the
  547 license itself.
  548 
  549 =head1 SEE ALSO
  550 
  551 L<RPC::XML>, L<RPC::XML::Client>, L<RPC::XML::Server>, L<XML::Parser>
  552 
  553 =head1 AUTHOR
  554 
  555 Randy J. Ray <rjray@blackperl.com>
  556 
  557 =cut

aubreyja at gmail dot com
ViewVC Help
Powered by ViewVC 1.0.9