[system] / trunk / xmlrpc / RPC / RPC-XML-0.25 / lib / RPC / XML / Parser.pm Repository:
ViewVC logotype

Annotation of /trunk/xmlrpc/RPC/RPC-XML-0.25/lib/RPC/XML/Parser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 279 - (view) (download) (as text)

1 : gage 279 ###############################################################################
2 :     #
3 :     # This file copyright (c) 2001 by Randy J. Ray, all rights reserved
4 :     #
5 :     # Copying and distribution are permitted under the terms of the Artistic
6 :     # License as distributed with Perl versions 5.005 and later. See
7 :     # http://language.perl.com/misc/Artistic.html
8 :     #
9 :     ###############################################################################
10 :     #
11 :     # $Id$
12 :     #
13 :     # Description: This is the RPC::XML::Parser class, a container for the
14 :     # XML::Parser class. It was moved here from RPC::XML in
15 :     # order to reduce the weight of that module.
16 :     #
17 :     # Functions: new
18 :     # parse
19 :     # message_init
20 :     # tag_start
21 :     # error
22 :     # stack_error
23 :     # tag_end
24 :     # char_data
25 :     #
26 :     # Libraries: RPC::XML
27 :     # XML::Parser
28 :     #
29 :     # Global Consts: Uses $RPC::XML::ERROR
30 :     #
31 :     # Environment: None.
32 :     #
33 :     ###############################################################################
34 :    
35 :     package RPC::XML::Parser;
36 :    
37 :     use 5.005;
38 :     use strict;
39 :     use vars qw($VERSION @ISA);
40 :     use subs qw(error stack_error new message_init message_end tag_start tag_end
41 :     char_data parse);
42 :    
43 :     # These constants are only used by the internal stack machine
44 :     use constant PARSE_ERROR => 0;
45 :     use constant METHOD => 1;
46 :     use constant METHODSET => 2;
47 :     use constant RESPONSE => 3;
48 :     use constant RESPONSESET => 4;
49 :     use constant STRUCT => 5;
50 :     use constant ARRAY => 6;
51 :     use constant DATATYPE => 7;
52 :     use constant ATTR_SET => 8;
53 :     use constant METHODNAME => 9;
54 :     use constant VALUEMARKER => 10;
55 :     use constant PARAMSTART => 11;
56 :     use constant PARAM => 12;
57 :     use constant STRUCTMEM => 13;
58 :     use constant STRUCTNAME => 14;
59 :     use constant DATAOBJECT => 15;
60 :     use constant PARAMLIST => 16;
61 :     use constant NAMEVAL => 17;
62 :     use constant MEMBERENT => 18;
63 :     use constant METHODENT => 19;
64 :     use constant RESPONSEENT => 20;
65 :     use constant FAULTENT => 21;
66 :     use constant FAULTSTART => 22;
67 :    
68 :     # This is to identify valid types
69 :     use constant VALIDTYPES => { map { $_, 1 } qw(int i4 string double reference
70 :     boolean dateTime.iso8601
71 :     base64) };
72 :     # This maps XML tags to stack-machine tokens
73 :     use constant TAG2TOKEN => { methodCall => METHOD,
74 :     methodResponse => RESPONSE,
75 :     methodName => METHODNAME,
76 :     params => PARAMSTART,
77 :     param => PARAM,
78 :     value => VALUEMARKER,
79 :     fault => FAULTSTART,
80 :     array => ARRAY,
81 :     struct => STRUCT,
82 :     member => STRUCTMEM,
83 :     name => STRUCTNAME };
84 :    
85 :     use XML::Parser;
86 :    
87 :     require RPC::XML;
88 :    
89 :     $VERSION = do { my @r=(q$Revision$=~/\d+/g); sprintf "%d."."%02d"x$#r,@r };
90 :    
91 :     1;
92 :    
93 :     ###############################################################################
94 :     #
95 :     # Sub Name: new
96 :     #
97 :     # Description: Constructor. Save any important attributes, leave the
98 :     # heavy lifting for the parse() routine and XML::Parser.
99 :     #
100 :     # Arguments: NAME IN/OUT TYPE DESCRIPTION
101 :     # $class in scalar Class we're initializing
102 :     # %attr in hash Any extras the caller wants
103 :     #
104 :     # Globals: $RPC::XML::ERROR
105 :     #
106 :     # Environment: None.
107 :     #
108 :     # Returns: Success: object ref
109 :     # Failure: undef
110 :     #
111 :     ###############################################################################
112 :     sub new
113 :     {
114 :     my $class = shift;
115 :     my %attrs = @_;
116 :    
117 :     my $self = {};
118 :     if (keys %attrs)
119 :     {
120 :     for (keys %attrs) { $self->{$_} = $attrs{$_} }
121 :     }
122 :    
123 :     bless $self, $class;
124 :     }
125 :    
126 :     ###############################################################################
127 :     #
128 :     # Sub Name: parse
129 :     #
130 :     # Description: Parse the requested string or stream. This behaves mostly
131 :     # like parse() in the XML::Parser namespace, but does some
132 :     # extra, as well.
133 :     #
134 :     # Arguments: NAME IN/OUT TYPE DESCRIPTION
135 :     # $self in ref Object of this class
136 :     # $stream in scalar Either the string to parse or
137 :     # an open filehandle of sorts
138 :     #
139 :     # Globals: None.
140 :     #
141 :     # Environment: None.
142 :     #
143 :     # Returns: Success: ref to request or response object
144 :     # Failure: error string
145 :     #
146 :     ###############################################################################
147 :     sub parse
148 :     {
149 :     my $self = shift;
150 :     my $stream = shift;
151 :    
152 :     my $parser = XML::Parser->new(Namespaces => 0, ParseParamEnt => 0,
153 :     Handlers =>
154 :     {
155 :     Init => sub { message_init $self, @_ },
156 :     Start => sub { tag_start $self, @_ },
157 :     End => sub { tag_end $self, @_ },
158 :     Char => sub { char_data $self, @_ },
159 :     });
160 :    
161 :     $parser->parse($stream);
162 :     # Look at the top-most marker, it'll need to be one of the end cases
163 :     my $marker = pop(@{$self->{stack}});
164 :     # There should be only on item on the stack after it
165 :     my $retval = pop(@{$self->{stack}});
166 :     # If the top-most marker isn't the error marker, check the stack
167 :     $retval = 'RPC::XML Error: Extra data on parse stack at document end'
168 :     if ($marker != PARSE_ERROR and (@{$self->{stack}}));
169 :    
170 :     $retval;
171 :     }
172 :    
173 :     # This is called when a new document is about to start parsing
174 :     sub message_init
175 :     {
176 :     my $robj = shift;
177 :     my $self = shift;
178 :    
179 :     $robj->{stack} = [];
180 :     $self;
181 :     }
182 :    
183 :     # This gets called each time an opening tag is parsed
184 :     sub tag_start
185 :     {
186 :     my $robj = shift;
187 :     my $self = shift;
188 :     my $elem = shift;
189 :     my %attr = @_;
190 :    
191 :     $robj->{cdata} = '';
192 :     return if ($elem eq 'data');
193 :     if (TAG2TOKEN->{$elem})
194 :     {
195 :     push(@{$robj->{stack}}, TAG2TOKEN->{$elem});
196 :     }
197 :     elsif (VALIDTYPES->{$elem})
198 :     {
199 :     # All datatypes are represented on the stack by this generic token
200 :     push(@{$robj->{stack}}, DATATYPE);
201 :     }
202 :     else
203 :     {
204 :     push(@{$robj->{stack}},
205 :     "Unknown tag encountered: $elem", PARSE_ERROR);
206 :     $self->finish;
207 :     }
208 :     }
209 :    
210 :     # Very simple error-text generator, just to eliminate heavy reduncancy in the
211 :     # next sub:
212 :     sub error
213 :     {
214 :     my $robj = shift;
215 :     my $self = shift;
216 :     my $mesg = shift;
217 :     my $elem = shift || '';
218 :    
219 :     my $fmt = $elem ?
220 :     '%s at document line %d, column %d (byte %d, closing tag %s)' :
221 :     '%s at document line %d, column %d (byte %d)';
222 :    
223 :     push(@{$robj->{stack}},
224 :     sprintf($fmt, $mesg, $self->current_line, $self->current_column,
225 :     $self->current_byte, $elem),
226 :     PARSE_ERROR);
227 :     $self->finish;
228 :     }
229 :    
230 :     # A shorter-cut for stack integrity errors
231 :     sub stack_error
232 :     {
233 :     my $robj = shift;
234 :     my $self = shift;
235 :     my $elem = shift;
236 :    
237 :     error($robj, $self, 'Stack corruption detected', $elem);
238 :     }
239 :    
240 :     # This is a hairy subroutine-- what to do at the end-tag. The actions range
241 :     # from simply new-ing a datatype all the way to building the final object.
242 :     sub tag_end
243 :     {
244 :     my $robj = shift;
245 :     my $self = shift;
246 :     my $elem = shift;
247 :    
248 :     my ($op, $attr, $obj, $class, $list, $name, $err);
249 :    
250 :     return if ($elem eq 'data');
251 :     # This should always be one of the stack machine ops defined above
252 :     $op = pop(@{$robj->{stack}});
253 :    
254 :     # Decide what to do from here
255 :     if (VALIDTYPES->{$elem})
256 :     {
257 :     # This is the closing tag of one of the data-types.
258 :     ($class = lc $elem) =~ s/\./_/;
259 :     # Some minimal data-integrity checking
260 :     if ($class eq 'int' or $class eq 'i4')
261 :     {
262 :     return error($robj, $self, 'Bad integer data read')
263 :     unless ($robj->{cdata} =~ /^[-+]?\d+$/);
264 :     }
265 :     elsif ($class eq 'double')
266 :     {
267 :     return error($robj, $self, 'Bad floating-point data read')
268 :     unless ($robj->{cdata} =~
269 :     # Taken from perldata(1)
270 :     /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/);
271 :     }
272 :    
273 :     $class = "RPC::XML::$class";
274 :     $obj = $class->new($robj->{cdata});
275 :     return error($robj, $self, 'Error instantiating data object: ' .
276 :     $RPC::XML::ERROR)
277 :     unless ($obj);
278 :     push(@{$robj->{stack}}, $obj, DATAOBJECT);
279 :     }
280 :     elsif ($elem eq 'value')
281 :     {
282 :     # For <value></value>, there should already be a dataobject, or else
283 :     # the marker token in which case the CDATA is used as a string value.
284 :     if ($op == DATAOBJECT)
285 :     {
286 :     ($op, $obj) = splice(@{$robj->{stack}}, -2);
287 :     return stack_error($robj, $self, $elem)
288 :     unless ($op == VALUEMARKER);
289 :     }
290 :     elsif ($op == VALUEMARKER)
291 :     {
292 :     $obj = RPC::XML::string->new($robj->{cdata});
293 :     }
294 :     else
295 :     {
296 :     return error($robj, $self,
297 :     'No datatype found within <value> container');
298 :     }
299 :    
300 :     push(@{$robj->{stack}}, $obj, DATAOBJECT);
301 :     }
302 :     elsif ($elem eq 'param')
303 :     {
304 :     # Almost like above, since this is really a NOP anyway
305 :     return error($robj, $self, 'No <value> found within <param> container')
306 :     unless ($op == DATAOBJECT);
307 :     ($op, $obj) = splice(@{$robj->{stack}}, -2);
308 :     return stack_error($robj, $self, $elem) unless ($op == PARAM);
309 :     push(@{$robj->{stack}}, $obj, DATAOBJECT);
310 :     }
311 :     elsif ($elem eq 'params')
312 :     {
313 :     # At this point, there should be zero or more DATAOBJECT tokens on the
314 :     # stack, each with a data object right below it.
315 :     $list = [];
316 :     return stack_error($robj, $self, $elem)
317 :     unless ($op == DATAOBJECT or $op == PARAMSTART);
318 :     while ($op == DATAOBJECT)
319 :     {
320 :     unshift(@$list, pop(@{$robj->{stack}}));
321 :     $op = pop(@{$robj->{stack}});
322 :     }
323 :     # Now that we see something ! DATAOBJECT, it needs to be PARAMSTART
324 :     return stack_error($robj, $self, $elem) unless ($op == PARAMSTART);
325 :     push(@{$robj->{stack}}, $list, PARAMLIST);
326 :     }
327 :     elsif ($elem eq 'fault')
328 :     {
329 :     # If we're finishing up a fault definition, there needs to be a struct
330 :     # on the stack.
331 :     return stack_error($robj, $self, $elem) unless ($op == DATAOBJECT);
332 :     ($op, $obj) = splice(@{$robj->{stack}}, -2);
333 :     return error($robj, $self,
334 :     'Only a <struct> value may be within a <fault>')
335 :     unless ($obj->isa('RPC::XML::struct'));
336 :    
337 :     $obj = new RPC::XML::fault $obj;
338 :     return error($robj, $self, 'Unable to instantiate fault object: ' .
339 :     $RPC::XML::ERROR)
340 :     unless $obj;
341 :     push(@{$robj->{stack}}, $obj, FAULTENT);
342 :     }
343 :     elsif ($elem eq 'member')
344 :     {
345 :     # We need to see a DATAOBJECT followed by a STRUCTNAME
346 :     return stack_error($robj, $self, $elem) unless ($op == DATAOBJECT);
347 :     ($op, $obj) = splice(@{$robj->{stack}}, -2);
348 :     return stack_error($robj, $self, $elem) unless ($op == STRUCTNAME);
349 :     # Get the name off the stack to clear the way for the STRUCTMEM marker
350 :     # under it
351 :     ($op, $name) = splice(@{$robj->{stack}}, -2);
352 :     # Push the name back on, with the value and the new marker (STRUCTMEM)
353 :     push(@{$robj->{stack}}, $name, $obj, STRUCTMEM);
354 :     }
355 :     elsif ($elem eq 'name')
356 :     {
357 :     # Fairly simple: just push the current content of CDATA on w/ a marker
358 :     push(@{$robj->{stack}}, $robj->{cdata}, STRUCTNAME);
359 :     }
360 :     elsif ($elem eq 'struct')
361 :     {
362 :     # Create the hash table in-place, then pass the ref to the constructor
363 :     $list = {};
364 :     # First off the stack needs to be STRUCTMEM or STRUCT
365 :     return stack_error($robj, $self, $elem)
366 :     unless ($op == STRUCTMEM or $op == STRUCT);
367 :     while ($op == STRUCTMEM)
368 :     {
369 :     # Next on stack (in list-order): name, value
370 :     ($name, $obj) = splice(@{$robj->{stack}}, -2);
371 :     $list->{$name} = $obj;
372 :     $op = pop(@{$robj->{stack}});
373 :     }
374 :     # Now that we see something ! STRUCTMEM, it needs to be STRUCT
375 :     return stack_error($robj, $self, $elem) unless ($op == STRUCT);
376 :     $obj = RPC::XML::struct->new($list);
377 :     return error($robj, $self,
378 :     'Error creating a RPC::XML::struct object: ' .
379 :     $RPC::XML::ERROR)
380 :     unless $obj;
381 :     push(@{$robj->{stack}}, $obj, DATAOBJECT);
382 :     }
383 :     elsif ($elem eq 'array')
384 :     {
385 :     # This is similar in most ways to struct creation, save for the lack
386 :     # of naming for the elements.
387 :     # Create the list in-place, then pass the ref to the constructor
388 :     $list = [];
389 :     # Only DATAOBJECT or ARRAY should be visible
390 :     return stack_error($robj, $self, $elem)
391 :     unless ($op == DATAOBJECT or $op == ARRAY);
392 :     while ($op == DATAOBJECT)
393 :     {
394 :     unshift(@$list, pop(@{$robj->{stack}}));
395 :     $op = pop(@{$robj->{stack}});
396 :     }
397 :     # Now that we see something ! DATAOBJECT, it needs to be ARRAY
398 :     return stack_error($robj, $self, $elem) unless ($op == ARRAY);
399 :     $obj = RPC::XML::array->new($list);
400 :     return error($robj, $self,
401 :     'Error creating a RPC::XML::array object: ' .
402 :     $RPC::XML::ERROR)
403 :     unless $obj;
404 :     push(@{$robj->{stack}}, $obj, DATAOBJECT);
405 :     }
406 :     elsif ($elem eq 'methodName')
407 :     {
408 :     return error($robj, $self,
409 :     "<$elem> tag must immediately follow a <methodCall> tag")
410 :     unless ($robj->{stack}->[$#{$robj->{stack}}] == METHOD);
411 :     push(@{$robj->{stack}}, $robj->{cdata}, NAMEVAL);
412 :     }
413 :     elsif ($elem eq 'methodCall')
414 :     {
415 :     # A methodCall closing should have on the stack an optional PARAMLIST
416 :     # marker, a NAMEVAL marker, then the METHOD token from the
417 :     # opening tag. An ATTR_SET may follow the METHOD token.
418 :     if ($op == PARAMLIST)
419 :     {
420 :     ($op, $list) = splice(@{$robj->{stack}}, -2);
421 :     }
422 :     else
423 :     {
424 :     $list = [];
425 :     }
426 :     if ($op == NAMEVAL)
427 :     {
428 :     ($op, $name) = splice(@{$robj->{stack}}, -2);
429 :     }
430 :     return error($robj, $self,
431 :     "No methodName tag detected during methodCall parsing")
432 :     unless $name;
433 :     return stack_error($robj, $self, $elem) unless ($op == METHOD);
434 :     # Create the request object and push it on the stack
435 :     $obj = RPC::XML::request->new($name, @$list);
436 :     return error($robj, $self,
437 :     "Error creating request object: $RPC::XML::ERROR")
438 :     unless $obj;
439 :     push(@{$robj->{stack}}, $obj, METHODENT);
440 :     }
441 :     elsif ($elem eq 'methodResponse')
442 :     {
443 :     # A methodResponse closing should have on the stack only the
444 :     # DATAOBJECT marker, then the RESPONSE token from the opening tag.
445 :     if ($op == PARAMLIST)
446 :     {
447 :     # To my knowledge, the XML-RPC spec limits the params list for
448 :     # a response to exactly one object. Extract it from the listref
449 :     # and put it back.
450 :     $list = pop(@{$robj->{stack}});
451 :     return error($robj, $self,
452 :     "Params list for <$elem> tag invalid")
453 :     unless (@$list == 1);
454 :     $obj = $list->[0];
455 :     return error($robj, $self,
456 :     "Returned value on stack not a type reference")
457 :     unless (ref $obj and $obj->isa('RPC::XML::datatype'));
458 :     push(@{$robj->{stack}}, $obj);
459 :     }
460 :     elsif (! ($op == DATAOBJECT or $op == FAULTENT))
461 :     {
462 :     return error($robj, $self,
463 :     "No parameter was declared for the <$elem> tag");
464 :     }
465 :     ($op, $list) = splice(@{$robj->{stack}}, -2);
466 :     return stack_error($robj, $self, $elem) unless ($op == RESPONSE);
467 :     # Create the response object and push it on the stack
468 :     $obj = RPC::XML::response->new($list);
469 :     return error($robj, $self,
470 :     "Error creating response object: $RPC::XML::ERROR")
471 :     unless $obj;
472 :     push(@{$robj->{stack}}, $obj, RESPONSEENT);
473 :     }
474 :     }
475 :    
476 :     # This just spools the character data until a closing tag makes use of it
477 :     sub char_data
478 :     {
479 :     my $robj = shift;
480 :     my $self = shift;
481 :     my $data = shift;
482 :    
483 :     $robj->{cdata} .= $data;
484 :     }
485 :    
486 :     __END__
487 :    
488 :     =head1 NAME
489 :    
490 :     RPC::XML::Parser - A container class for XML::Parser
491 :    
492 :     =head1 SYNOPSIS
493 :    
494 :     use RPC::XML::Parser;
495 :     ...
496 :     $P = new RPC::XML::Parser;
497 :     $P->parse($message);
498 :    
499 :     =head1 DESCRIPTION
500 :    
501 :     The B<RPC::XML::Parser> class encapsulates the parsing process, for turning a
502 :     string or an input stream into a B<RPC::XML::request> or B<RPC::XML::response>
503 :     object. The B<XML::Parser> class is used internally, with a new instance
504 :     created for each call to C<parse> (detailed below). This allows the
505 :     B<RPC::XML::Parser> object to be reusable, even though the B<XML::Parser>
506 :     objects are not. The methods are:
507 :    
508 :     =over 4
509 :    
510 :     =item new
511 :    
512 :     Create a new instance of the class. Any extra data passed to the constructor
513 :     is taken as key/value pairs (B<not> a hash reference) and attached to the
514 :     object.
515 :    
516 :     =item parse { STRING | STREAM }
517 :    
518 :     Parse the XML document specified in either a string or a stream. The stream
519 :     may be any file descriptor, derivative of B<IO::Handle>, etc. The return
520 :     value is either an object reference (to one of B<RPC::XML::request> or
521 :     B<RPC::XML::response>) or an error string. Any non-reference return value
522 :     should be treated as an error condition.
523 :    
524 :     =back
525 :    
526 :     =head1 DIAGNOSTICS
527 :    
528 :     The constructor returns C<undef> upon failure, with the error message available
529 :     in the global variable B<C<$RPC::XML::ERROR>>.
530 :    
531 :     =head1 CAVEATS
532 :    
533 :     This is part of a reference implementation in which clarity of process and
534 :     readability of the code take precedence over general efficiency. Much, if not
535 :     all, of this can be written more compactly and/or efficiently.
536 :    
537 :     =head1 CREDITS
538 :    
539 :     The B<XML-RPC> standard is Copyright (c) 1998-2001, UserLand Software, Inc.
540 :     See <http://www.xmlrpc.com> for more information about the B<XML-RPC>
541 :     specification.
542 :    
543 :     =head1 LICENSE
544 :    
545 :     This module is licensed under the terms of the Artistic License that covers
546 :     Perl itself. See <http://language.perl.com/misc/Artistic.html> for the
547 :     license itself.
548 :    
549 :     =head1 SEE ALSO
550 :    
551 :     L<RPC::XML>, L<RPC::XML::Client>, L<RPC::XML::Server>, L<XML::Parser>
552 :    
553 :     =head1 AUTHOR
554 :    
555 :     Randy J. Ray <rjray@blackperl.com>
556 :    
557 :     =cut

aubreyja at gmail dot com
ViewVC Help
Powered by ViewVC 1.0.9