###############################################################################
#
# This file copyright (c) 2001 by Randy J. Ray, all rights reserved
#
# Copying and distribution are permitted under the terms of the Artistic
# License as distributed with Perl versions 5.005 and later. See
# http://language.perl.com/misc/Artistic.html
#
###############################################################################
#
# $Id$
#
# Description: This is the RPC::XML::Parser class, a container for the
# XML::Parser class. It was moved here from RPC::XML in
# order to reduce the weight of that module.
#
# Functions: new
# parse
# message_init
# tag_start
# error
# stack_error
# tag_end
# char_data
#
# Libraries: RPC::XML
# XML::Parser
#
# Global Consts: Uses $RPC::XML::ERROR
#
# Environment: None.
#
###############################################################################
package RPC::XML::Parser;
use 5.005;
use strict;
use vars qw($VERSION @ISA);
use subs qw(error stack_error new message_init message_end tag_start tag_end
char_data parse);
# These constants are only used by the internal stack machine
use constant PARSE_ERROR => 0;
use constant METHOD => 1;
use constant METHODSET => 2;
use constant RESPONSE => 3;
use constant RESPONSESET => 4;
use constant STRUCT => 5;
use constant ARRAY => 6;
use constant DATATYPE => 7;
use constant ATTR_SET => 8;
use constant METHODNAME => 9;
use constant VALUEMARKER => 10;
use constant PARAMSTART => 11;
use constant PARAM => 12;
use constant STRUCTMEM => 13;
use constant STRUCTNAME => 14;
use constant DATAOBJECT => 15;
use constant PARAMLIST => 16;
use constant NAMEVAL => 17;
use constant MEMBERENT => 18;
use constant METHODENT => 19;
use constant RESPONSEENT => 20;
use constant FAULTENT => 21;
use constant FAULTSTART => 22;
# This is to identify valid types
use constant VALIDTYPES => { map { $_, 1 } qw(int i4 string double reference
boolean dateTime.iso8601
base64) };
# This maps XML tags to stack-machine tokens
use constant TAG2TOKEN => { methodCall => METHOD,
methodResponse => RESPONSE,
methodName => METHODNAME,
params => PARAMSTART,
param => PARAM,
value => VALUEMARKER,
fault => FAULTSTART,
array => ARRAY,
struct => STRUCT,
member => STRUCTMEM,
name => STRUCTNAME };
use XML::Parser;
require RPC::XML;
$VERSION = do { my @r=(q$Revision$=~/\d+/g); sprintf "%d."."%02d"x$#r,@r };
1;
###############################################################################
#
# Sub Name: new
#
# Description: Constructor. Save any important attributes, leave the
# heavy lifting for the parse() routine and XML::Parser.
#
# Arguments: NAME IN/OUT TYPE DESCRIPTION
# $class in scalar Class we're initializing
# %attr in hash Any extras the caller wants
#
# Globals: $RPC::XML::ERROR
#
# Environment: None.
#
# Returns: Success: object ref
# Failure: undef
#
###############################################################################
sub new
{
my $class = shift;
my %attrs = @_;
my $self = {};
if (keys %attrs)
{
for (keys %attrs) { $self->{$_} = $attrs{$_} }
}
bless $self, $class;
}
###############################################################################
#
# Sub Name: parse
#
# Description: Parse the requested string or stream. This behaves mostly
# like parse() in the XML::Parser namespace, but does some
# extra, as well.
#
# Arguments: NAME IN/OUT TYPE DESCRIPTION
# $self in ref Object of this class
# $stream in scalar Either the string to parse or
# an open filehandle of sorts
#
# Globals: None.
#
# Environment: None.
#
# Returns: Success: ref to request or response object
# Failure: error string
#
###############################################################################
sub parse
{
my $self = shift;
my $stream = shift;
my $parser = XML::Parser->new(Namespaces => 0, ParseParamEnt => 0,
Handlers =>
{
Init => sub { message_init $self, @_ },
Start => sub { tag_start $self, @_ },
End => sub { tag_end $self, @_ },
Char => sub { char_data $self, @_ },
});
$parser->parse($stream);
# Look at the top-most marker, it'll need to be one of the end cases
my $marker = pop(@{$self->{stack}});
# There should be only on item on the stack after it
my $retval = pop(@{$self->{stack}});
# If the top-most marker isn't the error marker, check the stack
$retval = 'RPC::XML Error: Extra data on parse stack at document end'
if ($marker != PARSE_ERROR and (@{$self->{stack}}));
$retval;
}
# This is called when a new document is about to start parsing
sub message_init
{
my $robj = shift;
my $self = shift;
$robj->{stack} = [];
$self;
}
# This gets called each time an opening tag is parsed
sub tag_start
{
my $robj = shift;
my $self = shift;
my $elem = shift;
my %attr = @_;
$robj->{cdata} = '';
return if ($elem eq 'data');
if (TAG2TOKEN->{$elem})
{
push(@{$robj->{stack}}, TAG2TOKEN->{$elem});
}
elsif (VALIDTYPES->{$elem})
{
# All datatypes are represented on the stack by this generic token
push(@{$robj->{stack}}, DATATYPE);
}
else
{
push(@{$robj->{stack}},
"Unknown tag encountered: $elem", PARSE_ERROR);
$self->finish;
}
}
# Very simple error-text generator, just to eliminate heavy reduncancy in the
# next sub:
sub error
{
my $robj = shift;
my $self = shift;
my $mesg = shift;
my $elem = shift || '';
my $fmt = $elem ?
'%s at document line %d, column %d (byte %d, closing tag %s)' :
'%s at document line %d, column %d (byte %d)';
push(@{$robj->{stack}},
sprintf($fmt, $mesg, $self->current_line, $self->current_column,
$self->current_byte, $elem),
PARSE_ERROR);
$self->finish;
}
# A shorter-cut for stack integrity errors
sub stack_error
{
my $robj = shift;
my $self = shift;
my $elem = shift;
error($robj, $self, 'Stack corruption detected', $elem);
}
# This is a hairy subroutine-- what to do at the end-tag. The actions range
# from simply new-ing a datatype all the way to building the final object.
sub tag_end
{
my $robj = shift;
my $self = shift;
my $elem = shift;
my ($op, $attr, $obj, $class, $list, $name, $err);
return if ($elem eq 'data');
# This should always be one of the stack machine ops defined above
$op = pop(@{$robj->{stack}});
# Decide what to do from here
if (VALIDTYPES->{$elem})
{
# This is the closing tag of one of the data-types.
($class = lc $elem) =~ s/\./_/;
# Some minimal data-integrity checking
if ($class eq 'int' or $class eq 'i4')
{
return error($robj, $self, 'Bad integer data read')
unless ($robj->{cdata} =~ /^[-+]?\d+$/);
}
elsif ($class eq 'double')
{
return error($robj, $self, 'Bad floating-point data read')
unless ($robj->{cdata} =~
# Taken from perldata(1)
/^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/);
}
$class = "RPC::XML::$class";
$obj = $class->new($robj->{cdata});
return error($robj, $self, 'Error instantiating data object: ' .
$RPC::XML::ERROR)
unless ($obj);
push(@{$robj->{stack}}, $obj, DATAOBJECT);
}
elsif ($elem eq 'value')
{
# For , there should already be a dataobject, or else
# the marker token in which case the CDATA is used as a string value.
if ($op == DATAOBJECT)
{
($op, $obj) = splice(@{$robj->{stack}}, -2);
return stack_error($robj, $self, $elem)
unless ($op == VALUEMARKER);
}
elsif ($op == VALUEMARKER)
{
$obj = RPC::XML::string->new($robj->{cdata});
}
else
{
return error($robj, $self,
'No datatype found within container');
}
push(@{$robj->{stack}}, $obj, DATAOBJECT);
}
elsif ($elem eq 'param')
{
# Almost like above, since this is really a NOP anyway
return error($robj, $self, 'No found within container')
unless ($op == DATAOBJECT);
($op, $obj) = splice(@{$robj->{stack}}, -2);
return stack_error($robj, $self, $elem) unless ($op == PARAM);
push(@{$robj->{stack}}, $obj, DATAOBJECT);
}
elsif ($elem eq 'params')
{
# At this point, there should be zero or more DATAOBJECT tokens on the
# stack, each with a data object right below it.
$list = [];
return stack_error($robj, $self, $elem)
unless ($op == DATAOBJECT or $op == PARAMSTART);
while ($op == DATAOBJECT)
{
unshift(@$list, pop(@{$robj->{stack}}));
$op = pop(@{$robj->{stack}});
}
# Now that we see something ! DATAOBJECT, it needs to be PARAMSTART
return stack_error($robj, $self, $elem) unless ($op == PARAMSTART);
push(@{$robj->{stack}}, $list, PARAMLIST);
}
elsif ($elem eq 'fault')
{
# If we're finishing up a fault definition, there needs to be a struct
# on the stack.
return stack_error($robj, $self, $elem) unless ($op == DATAOBJECT);
($op, $obj) = splice(@{$robj->{stack}}, -2);
return error($robj, $self,
'Only a value may be within a ')
unless ($obj->isa('RPC::XML::struct'));
$obj = new RPC::XML::fault $obj;
return error($robj, $self, 'Unable to instantiate fault object: ' .
$RPC::XML::ERROR)
unless $obj;
push(@{$robj->{stack}}, $obj, FAULTENT);
}
elsif ($elem eq 'member')
{
# We need to see a DATAOBJECT followed by a STRUCTNAME
return stack_error($robj, $self, $elem) unless ($op == DATAOBJECT);
($op, $obj) = splice(@{$robj->{stack}}, -2);
return stack_error($robj, $self, $elem) unless ($op == STRUCTNAME);
# Get the name off the stack to clear the way for the STRUCTMEM marker
# under it
($op, $name) = splice(@{$robj->{stack}}, -2);
# Push the name back on, with the value and the new marker (STRUCTMEM)
push(@{$robj->{stack}}, $name, $obj, STRUCTMEM);
}
elsif ($elem eq 'name')
{
# Fairly simple: just push the current content of CDATA on w/ a marker
push(@{$robj->{stack}}, $robj->{cdata}, STRUCTNAME);
}
elsif ($elem eq 'struct')
{
# Create the hash table in-place, then pass the ref to the constructor
$list = {};
# First off the stack needs to be STRUCTMEM or STRUCT
return stack_error($robj, $self, $elem)
unless ($op == STRUCTMEM or $op == STRUCT);
while ($op == STRUCTMEM)
{
# Next on stack (in list-order): name, value
($name, $obj) = splice(@{$robj->{stack}}, -2);
$list->{$name} = $obj;
$op = pop(@{$robj->{stack}});
}
# Now that we see something ! STRUCTMEM, it needs to be STRUCT
return stack_error($robj, $self, $elem) unless ($op == STRUCT);
$obj = RPC::XML::struct->new($list);
return error($robj, $self,
'Error creating a RPC::XML::struct object: ' .
$RPC::XML::ERROR)
unless $obj;
push(@{$robj->{stack}}, $obj, DATAOBJECT);
}
elsif ($elem eq 'array')
{
# This is similar in most ways to struct creation, save for the lack
# of naming for the elements.
# Create the list in-place, then pass the ref to the constructor
$list = [];
# Only DATAOBJECT or ARRAY should be visible
return stack_error($robj, $self, $elem)
unless ($op == DATAOBJECT or $op == ARRAY);
while ($op == DATAOBJECT)
{
unshift(@$list, pop(@{$robj->{stack}}));
$op = pop(@{$robj->{stack}});
}
# Now that we see something ! DATAOBJECT, it needs to be ARRAY
return stack_error($robj, $self, $elem) unless ($op == ARRAY);
$obj = RPC::XML::array->new($list);
return error($robj, $self,
'Error creating a RPC::XML::array object: ' .
$RPC::XML::ERROR)
unless $obj;
push(@{$robj->{stack}}, $obj, DATAOBJECT);
}
elsif ($elem eq 'methodName')
{
return error($robj, $self,
"<$elem> tag must immediately follow a tag")
unless ($robj->{stack}->[$#{$robj->{stack}}] == METHOD);
push(@{$robj->{stack}}, $robj->{cdata}, NAMEVAL);
}
elsif ($elem eq 'methodCall')
{
# A methodCall closing should have on the stack an optional PARAMLIST
# marker, a NAMEVAL marker, then the METHOD token from the
# opening tag. An ATTR_SET may follow the METHOD token.
if ($op == PARAMLIST)
{
($op, $list) = splice(@{$robj->{stack}}, -2);
}
else
{
$list = [];
}
if ($op == NAMEVAL)
{
($op, $name) = splice(@{$robj->{stack}}, -2);
}
return error($robj, $self,
"No methodName tag detected during methodCall parsing")
unless $name;
return stack_error($robj, $self, $elem) unless ($op == METHOD);
# Create the request object and push it on the stack
$obj = RPC::XML::request->new($name, @$list);
return error($robj, $self,
"Error creating request object: $RPC::XML::ERROR")
unless $obj;
push(@{$robj->{stack}}, $obj, METHODENT);
}
elsif ($elem eq 'methodResponse')
{
# A methodResponse closing should have on the stack only the
# DATAOBJECT marker, then the RESPONSE token from the opening tag.
if ($op == PARAMLIST)
{
# To my knowledge, the XML-RPC spec limits the params list for
# a response to exactly one object. Extract it from the listref
# and put it back.
$list = pop(@{$robj->{stack}});
return error($robj, $self,
"Params list for <$elem> tag invalid")
unless (@$list == 1);
$obj = $list->[0];
return error($robj, $self,
"Returned value on stack not a type reference")
unless (ref $obj and $obj->isa('RPC::XML::datatype'));
push(@{$robj->{stack}}, $obj);
}
elsif (! ($op == DATAOBJECT or $op == FAULTENT))
{
return error($robj, $self,
"No parameter was declared for the <$elem> tag");
}
($op, $list) = splice(@{$robj->{stack}}, -2);
return stack_error($robj, $self, $elem) unless ($op == RESPONSE);
# Create the response object and push it on the stack
$obj = RPC::XML::response->new($list);
return error($robj, $self,
"Error creating response object: $RPC::XML::ERROR")
unless $obj;
push(@{$robj->{stack}}, $obj, RESPONSEENT);
}
}
# This just spools the character data until a closing tag makes use of it
sub char_data
{
my $robj = shift;
my $self = shift;
my $data = shift;
$robj->{cdata} .= $data;
}
__END__
=head1 NAME
RPC::XML::Parser - A container class for XML::Parser
=head1 SYNOPSIS
use RPC::XML::Parser;
...
$P = new RPC::XML::Parser;
$P->parse($message);
=head1 DESCRIPTION
The B class encapsulates the parsing process, for turning a
string or an input stream into a B or B
object. The B class is used internally, with a new instance
created for each call to C (detailed below). This allows the
B object to be reusable, even though the B
objects are not. The methods are:
=over 4
=item new
Create a new instance of the class. Any extra data passed to the constructor
is taken as key/value pairs (B a hash reference) and attached to the
object.
=item parse { STRING | STREAM }
Parse the XML document specified in either a string or a stream. The stream
may be any file descriptor, derivative of B, etc. The return
value is either an object reference (to one of B or
B) or an error string. Any non-reference return value
should be treated as an error condition.
=back
=head1 DIAGNOSTICS
The constructor returns C upon failure, with the error message available
in the global variable B>.
=head1 CAVEATS
This is part of a reference implementation in which clarity of process and
readability of the code take precedence over general efficiency. Much, if not
all, of this can be written more compactly and/or efficiently.
=head1 CREDITS
The B standard is Copyright (c) 1998-2001, UserLand Software, Inc.
See for more information about the B
specification.
=head1 LICENSE
This module is licensed under the terms of the Artistic License that covers
Perl itself. See for the
license itself.
=head1 SEE ALSO
L, L, L, L
=head1 AUTHOR
Randy J. Ray
=cut