| Filename | /usr/lib/x86_64-linux-gnu/perl5/5.20/XML/Parser/Expat.pm |
| Statements | Executed 0 statements in 0s |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | package XML::Parser::Expat; | ||||
| 2 | |||||
| 3 | require 5.004; | ||||
| 4 | |||||
| 5 | use strict; | ||||
| 6 | use vars qw($VERSION @ISA %Handler_Setters %Encoding_Table @Encoding_Path | ||||
| 7 | $have_File_Spec); | ||||
| 8 | use Carp; | ||||
| 9 | |||||
| 10 | require DynaLoader; | ||||
| 11 | |||||
| 12 | @ISA = qw(DynaLoader); | ||||
| 13 | $VERSION = "2.41"; | ||||
| 14 | |||||
| 15 | $have_File_Spec = $INC{'File/Spec.pm'} || do 'File/Spec.pm'; | ||||
| 16 | |||||
| 17 | %Encoding_Table = (); | ||||
| 18 | if ($have_File_Spec) { | ||||
| 19 | @Encoding_Path = (grep(-d $_, | ||||
| 20 | map(File::Spec->catdir($_, qw(XML Parser Encodings)), | ||||
| 21 | @INC)), | ||||
| 22 | File::Spec->curdir); | ||||
| 23 | } | ||||
| 24 | else { | ||||
| 25 | @Encoding_Path = (grep(-d $_, map($_ . '/XML/Parser/Encodings', @INC)), '.'); | ||||
| 26 | } | ||||
| 27 | |||||
| 28 | |||||
| 29 | bootstrap XML::Parser::Expat $VERSION; | ||||
| 30 | |||||
| 31 | %Handler_Setters = ( | ||||
| 32 | Start => \&SetStartElementHandler, | ||||
| 33 | End => \&SetEndElementHandler, | ||||
| 34 | Char => \&SetCharacterDataHandler, | ||||
| 35 | Proc => \&SetProcessingInstructionHandler, | ||||
| 36 | Comment => \&SetCommentHandler, | ||||
| 37 | CdataStart => \&SetStartCdataHandler, | ||||
| 38 | CdataEnd => \&SetEndCdataHandler, | ||||
| 39 | Default => \&SetDefaultHandler, | ||||
| 40 | Unparsed => \&SetUnparsedEntityDeclHandler, | ||||
| 41 | Notation => \&SetNotationDeclHandler, | ||||
| 42 | ExternEnt => \&SetExternalEntityRefHandler, | ||||
| 43 | ExternEntFin => \&SetExtEntFinishHandler, | ||||
| 44 | Entity => \&SetEntityDeclHandler, | ||||
| 45 | Element => \&SetElementDeclHandler, | ||||
| 46 | Attlist => \&SetAttListDeclHandler, | ||||
| 47 | Doctype => \&SetDoctypeHandler, | ||||
| 48 | DoctypeFin => \&SetEndDoctypeHandler, | ||||
| 49 | XMLDecl => \&SetXMLDeclHandler | ||||
| 50 | ); | ||||
| 51 | |||||
| 52 | sub new { | ||||
| 53 | my ($class, %args) = @_; | ||||
| 54 | my $self = bless \%args, $_[0]; | ||||
| 55 | $args{_State_} = 0; | ||||
| 56 | $args{Context} = []; | ||||
| 57 | $args{Namespaces} ||= 0; | ||||
| 58 | $args{ErrorMessage} ||= ''; | ||||
| 59 | if ($args{Namespaces}) { | ||||
| 60 | $args{Namespace_Table} = {}; | ||||
| 61 | $args{Namespace_List} = [undef]; | ||||
| 62 | $args{Prefix_Table} = {}; | ||||
| 63 | $args{New_Prefixes} = []; | ||||
| 64 | } | ||||
| 65 | $args{_Setters} = \%Handler_Setters; | ||||
| 66 | $args{Parser} = ParserCreate($self, $args{ProtocolEncoding}, | ||||
| 67 | $args{Namespaces}); | ||||
| 68 | $self; | ||||
| 69 | } | ||||
| 70 | |||||
| 71 | sub load_encoding { | ||||
| 72 | my ($file) = @_; | ||||
| 73 | |||||
| 74 | $file =~ s!([^/]+)$!\L$1\E!; | ||||
| 75 | $file .= '.enc' unless $file =~ /\.enc$/; | ||||
| 76 | unless ($file =~ m!^/!) { | ||||
| 77 | foreach (@Encoding_Path) { | ||||
| 78 | my $tmp = ($have_File_Spec | ||||
| 79 | ? File::Spec->catfile($_, $file) | ||||
| 80 | : "$_/$file"); | ||||
| 81 | if (-e $tmp) { | ||||
| 82 | $file = $tmp; | ||||
| 83 | last; | ||||
| 84 | } | ||||
| 85 | } | ||||
| 86 | } | ||||
| 87 | |||||
| 88 | local(*ENC); | ||||
| 89 | open(ENC, $file) or croak("Couldn't open encmap $file:\n$!\n"); | ||||
| 90 | binmode(ENC); | ||||
| 91 | my $data; | ||||
| 92 | my $br = sysread(ENC, $data, -s $file); | ||||
| 93 | croak("Trouble reading $file:\n$!\n") | ||||
| 94 | unless defined($br); | ||||
| 95 | close(ENC); | ||||
| 96 | |||||
| 97 | my $name = LoadEncoding($data, $br); | ||||
| 98 | croak("$file isn't an encmap file") | ||||
| 99 | unless defined($name); | ||||
| 100 | |||||
| 101 | $name; | ||||
| 102 | } # End load_encoding | ||||
| 103 | |||||
| 104 | sub setHandlers { | ||||
| 105 | my ($self, @handler_pairs) = @_; | ||||
| 106 | |||||
| 107 | croak("Uneven number of arguments to setHandlers method") | ||||
| 108 | if (int(@handler_pairs) & 1); | ||||
| 109 | |||||
| 110 | my @ret; | ||||
| 111 | |||||
| 112 | while (@handler_pairs) { | ||||
| 113 | my $type = shift @handler_pairs; | ||||
| 114 | my $handler = shift @handler_pairs; | ||||
| 115 | croak "Handler for $type not a Code ref" | ||||
| 116 | unless (! defined($handler) or ! $handler or ref($handler) eq 'CODE'); | ||||
| 117 | |||||
| 118 | my $hndl = $self->{_Setters}->{$type}; | ||||
| 119 | |||||
| 120 | unless (defined($hndl)) { | ||||
| 121 | my @types = sort keys %{$self->{_Setters}}; | ||||
| 122 | croak("Unknown Expat handler type: $type\n Valid types: @types"); | ||||
| 123 | } | ||||
| 124 | |||||
| 125 | my $old = &$hndl($self->{Parser}, $handler); | ||||
| 126 | push (@ret, $type, $old); | ||||
| 127 | } | ||||
| 128 | |||||
| 129 | return @ret; | ||||
| 130 | } | ||||
| 131 | |||||
| 132 | sub xpcroak | ||||
| 133 | { | ||||
| 134 | my ($self, $message) = @_; | ||||
| 135 | |||||
| 136 | my $eclines = $self->{ErrorContext}; | ||||
| 137 | my $line = GetCurrentLineNumber($_[0]->{Parser}); | ||||
| 138 | $message .= " at line $line"; | ||||
| 139 | $message .= ":\n" . $self->position_in_context($eclines) | ||||
| 140 | if defined($eclines); | ||||
| 141 | croak $message; | ||||
| 142 | } | ||||
| 143 | |||||
| 144 | sub xpcarp { | ||||
| 145 | my ($self, $message) = @_; | ||||
| 146 | |||||
| 147 | my $eclines = $self->{ErrorContext}; | ||||
| 148 | my $line = GetCurrentLineNumber($_[0]->{Parser}); | ||||
| 149 | $message .= " at line $line"; | ||||
| 150 | $message .= ":\n" . $self->position_in_context($eclines) | ||||
| 151 | if defined($eclines); | ||||
| 152 | carp $message; | ||||
| 153 | } | ||||
| 154 | |||||
| 155 | sub default_current { | ||||
| 156 | my $self = shift; | ||||
| 157 | if ($self->{_State_} == 1) { | ||||
| 158 | return DefaultCurrent($self->{Parser}); | ||||
| 159 | } | ||||
| 160 | } | ||||
| 161 | |||||
| 162 | sub recognized_string { | ||||
| 163 | my $self = shift; | ||||
| 164 | if ($self->{_State_} == 1) { | ||||
| 165 | return RecognizedString($self->{Parser}); | ||||
| 166 | } | ||||
| 167 | } | ||||
| 168 | |||||
| 169 | sub original_string { | ||||
| 170 | my $self = shift; | ||||
| 171 | if ($self->{_State_} == 1) { | ||||
| 172 | return OriginalString($self->{Parser}); | ||||
| 173 | } | ||||
| 174 | } | ||||
| 175 | |||||
| 176 | sub current_line { | ||||
| 177 | my $self = shift; | ||||
| 178 | if ($self->{_State_} == 1) { | ||||
| 179 | return GetCurrentLineNumber($self->{Parser}); | ||||
| 180 | } | ||||
| 181 | } | ||||
| 182 | |||||
| 183 | sub current_column { | ||||
| 184 | my $self = shift; | ||||
| 185 | if ($self->{_State_} == 1) { | ||||
| 186 | return GetCurrentColumnNumber($self->{Parser}); | ||||
| 187 | } | ||||
| 188 | } | ||||
| 189 | |||||
| 190 | sub current_byte { | ||||
| 191 | my $self = shift; | ||||
| 192 | if ($self->{_State_} == 1) { | ||||
| 193 | return GetCurrentByteIndex($self->{Parser}); | ||||
| 194 | } | ||||
| 195 | } | ||||
| 196 | |||||
| 197 | sub base { | ||||
| 198 | my ($self, $newbase) = @_; | ||||
| 199 | my $p = $self->{Parser}; | ||||
| 200 | my $oldbase = GetBase($p); | ||||
| 201 | SetBase($p, $newbase) if @_ > 1; | ||||
| 202 | return $oldbase; | ||||
| 203 | } | ||||
| 204 | |||||
| 205 | sub context { | ||||
| 206 | my $ctx = $_[0]->{Context}; | ||||
| 207 | @$ctx; | ||||
| 208 | } | ||||
| 209 | |||||
| 210 | sub current_element { | ||||
| 211 | my ($self) = @_; | ||||
| 212 | @{$self->{Context}} ? $self->{Context}->[-1] : undef; | ||||
| 213 | } | ||||
| 214 | |||||
| 215 | sub in_element { | ||||
| 216 | my ($self, $element) = @_; | ||||
| 217 | @{$self->{Context}} ? $self->eq_name($self->{Context}->[-1], $element) | ||||
| 218 | : undef; | ||||
| 219 | } | ||||
| 220 | |||||
| 221 | sub within_element { | ||||
| 222 | my ($self, $element) = @_; | ||||
| 223 | my $cnt = 0; | ||||
| 224 | foreach (@{$self->{Context}}) { | ||||
| 225 | $cnt++ if $self->eq_name($_, $element); | ||||
| 226 | } | ||||
| 227 | return $cnt; | ||||
| 228 | } | ||||
| 229 | |||||
| 230 | sub depth { | ||||
| 231 | my ($self) = @_; | ||||
| 232 | int(@{$self->{Context}}); | ||||
| 233 | } | ||||
| 234 | |||||
| 235 | sub element_index { | ||||
| 236 | my ($self) = @_; | ||||
| 237 | |||||
| 238 | if ($self->{_State_} == 1) { | ||||
| 239 | return ElementIndex($self->{Parser}); | ||||
| 240 | } | ||||
| 241 | } | ||||
| 242 | |||||
| 243 | ################ | ||||
| 244 | # Namespace methods | ||||
| 245 | |||||
| 246 | sub namespace { | ||||
| 247 | my ($self, $name) = @_; | ||||
| 248 | local($^W) = 0; | ||||
| 249 | $self->{Namespace_List}->[int($name)]; | ||||
| 250 | } | ||||
| 251 | |||||
| 252 | sub eq_name { | ||||
| 253 | my ($self, $nm1, $nm2) = @_; | ||||
| 254 | local($^W) = 0; | ||||
| 255 | |||||
| 256 | int($nm1) == int($nm2) and $nm1 eq $nm2; | ||||
| 257 | } | ||||
| 258 | |||||
| 259 | sub generate_ns_name { | ||||
| 260 | my ($self, $name, $namespace) = @_; | ||||
| 261 | |||||
| 262 | $namespace ? | ||||
| 263 | GenerateNSName($name, $namespace, $self->{Namespace_Table}, | ||||
| 264 | $self->{Namespace_List}) | ||||
| 265 | : $name; | ||||
| 266 | } | ||||
| 267 | |||||
| 268 | sub new_ns_prefixes { | ||||
| 269 | my ($self) = @_; | ||||
| 270 | if ($self->{Namespaces}) { | ||||
| 271 | return @{$self->{New_Prefixes}}; | ||||
| 272 | } | ||||
| 273 | return (); | ||||
| 274 | } | ||||
| 275 | |||||
| 276 | sub expand_ns_prefix { | ||||
| 277 | my ($self, $prefix) = @_; | ||||
| 278 | |||||
| 279 | if ($self->{Namespaces}) { | ||||
| 280 | my $stack = $self->{Prefix_Table}->{$prefix}; | ||||
| 281 | return (defined($stack) and @$stack) ? $stack->[-1] : undef; | ||||
| 282 | } | ||||
| 283 | |||||
| 284 | return undef; | ||||
| 285 | } | ||||
| 286 | |||||
| 287 | sub current_ns_prefixes { | ||||
| 288 | my ($self) = @_; | ||||
| 289 | |||||
| 290 | if ($self->{Namespaces}) { | ||||
| 291 | my %set = %{$self->{Prefix_Table}}; | ||||
| 292 | |||||
| 293 | if (exists $set{'#default'} and not defined($set{'#default'}->[-1])) { | ||||
| 294 | delete $set{'#default'}; | ||||
| 295 | } | ||||
| 296 | |||||
| 297 | return keys %set; | ||||
| 298 | } | ||||
| 299 | |||||
| 300 | return (); | ||||
| 301 | } | ||||
| 302 | |||||
| 303 | |||||
| 304 | ################################################################ | ||||
| 305 | # Namespace declaration handlers | ||||
| 306 | # | ||||
| 307 | |||||
| 308 | sub NamespaceStart { | ||||
| 309 | my ($self, $prefix, $uri) = @_; | ||||
| 310 | |||||
| 311 | $prefix = '#default' unless defined $prefix; | ||||
| 312 | my $stack = $self->{Prefix_Table}->{$prefix}; | ||||
| 313 | |||||
| 314 | if (defined $stack) { | ||||
| 315 | push(@$stack, $uri); | ||||
| 316 | } | ||||
| 317 | else { | ||||
| 318 | $self->{Prefix_Table}->{$prefix} = [$uri]; | ||||
| 319 | } | ||||
| 320 | |||||
| 321 | # The New_Prefixes list gets emptied at end of startElement function | ||||
| 322 | # in Expat.xs | ||||
| 323 | |||||
| 324 | push(@{$self->{New_Prefixes}}, $prefix); | ||||
| 325 | } | ||||
| 326 | |||||
| 327 | sub NamespaceEnd { | ||||
| 328 | my ($self, $prefix) = @_; | ||||
| 329 | |||||
| 330 | $prefix = '#default' unless defined $prefix; | ||||
| 331 | |||||
| 332 | my $stack = $self->{Prefix_Table}->{$prefix}; | ||||
| 333 | if (@$stack > 1) { | ||||
| 334 | pop(@$stack); | ||||
| 335 | } | ||||
| 336 | else { | ||||
| 337 | delete $self->{Prefix_Table}->{$prefix}; | ||||
| 338 | } | ||||
| 339 | } | ||||
| 340 | |||||
| 341 | ################ | ||||
| 342 | |||||
| 343 | sub specified_attr { | ||||
| 344 | my $self = shift; | ||||
| 345 | |||||
| 346 | if ($self->{_State_} == 1) { | ||||
| 347 | return GetSpecifiedAttributeCount($self->{Parser}); | ||||
| 348 | } | ||||
| 349 | } | ||||
| 350 | |||||
| 351 | sub finish { | ||||
| 352 | my ($self) = @_; | ||||
| 353 | if ($self->{_State_} == 1) { | ||||
| 354 | my $parser = $self->{Parser}; | ||||
| 355 | UnsetAllHandlers($parser); | ||||
| 356 | } | ||||
| 357 | } | ||||
| 358 | |||||
| 359 | sub position_in_context { | ||||
| 360 | my ($self, $lines) = @_; | ||||
| 361 | if ($self->{_State_} == 1) { | ||||
| 362 | my $parser = $self->{Parser}; | ||||
| 363 | my ($string, $linepos) = PositionContext($parser, $lines); | ||||
| 364 | |||||
| 365 | return '' unless defined($string); | ||||
| 366 | |||||
| 367 | my $col = GetCurrentColumnNumber($parser); | ||||
| 368 | my $ptr = ('=' x ($col - 1)) . '^' . "\n"; | ||||
| 369 | my $ret; | ||||
| 370 | my $dosplit = $linepos < length($string); | ||||
| 371 | |||||
| 372 | $string .= "\n" unless $string =~ /\n$/; | ||||
| 373 | |||||
| 374 | if ($dosplit) { | ||||
| 375 | $ret = substr($string, 0, $linepos) . $ptr | ||||
| 376 | . substr($string, $linepos); | ||||
| 377 | } else { | ||||
| 378 | $ret = $string . $ptr; | ||||
| 379 | } | ||||
| 380 | |||||
| 381 | return $ret; | ||||
| 382 | } | ||||
| 383 | } | ||||
| 384 | |||||
| 385 | sub xml_escape { | ||||
| 386 | my $self = shift; | ||||
| 387 | my $text = shift; | ||||
| 388 | |||||
| 389 | study $text; | ||||
| 390 | $text =~ s/\&/\&/g; | ||||
| 391 | $text =~ s/</\</g; | ||||
| 392 | foreach (@_) { | ||||
| 393 | croak "xml_escape: '$_' isn't a single character" if length($_) > 1; | ||||
| 394 | |||||
| 395 | if ($_ eq '>') { | ||||
| 396 | $text =~ s/>/\>/g; | ||||
| 397 | } | ||||
| 398 | elsif ($_ eq '"') { | ||||
| 399 | $text =~ s/\"/\"/; | ||||
| 400 | } | ||||
| 401 | elsif ($_ eq "'") { | ||||
| 402 | $text =~ s/\'/\'/; | ||||
| 403 | } | ||||
| 404 | else { | ||||
| 405 | my $rep = '&#' . sprintf('x%X', ord($_)) . ';'; | ||||
| 406 | if (/\W/) { | ||||
| 407 | my $ptrn = "\\$_"; | ||||
| 408 | $text =~ s/$ptrn/$rep/g; | ||||
| 409 | } | ||||
| 410 | else { | ||||
| 411 | $text =~ s/$_/$rep/g; | ||||
| 412 | } | ||||
| 413 | } | ||||
| 414 | } | ||||
| 415 | $text; | ||||
| 416 | } | ||||
| 417 | |||||
| 418 | sub skip_until { | ||||
| 419 | my $self = shift; | ||||
| 420 | if ($self->{_State_} <= 1) { | ||||
| 421 | SkipUntil($self->{Parser}, $_[0]); | ||||
| 422 | } | ||||
| 423 | } | ||||
| 424 | |||||
| 425 | sub release { | ||||
| 426 | my $self = shift; | ||||
| 427 | ParserRelease($self->{Parser}); | ||||
| 428 | } | ||||
| 429 | |||||
| 430 | sub DESTROY { | ||||
| 431 | my $self = shift; | ||||
| 432 | ParserFree($self->{Parser}); | ||||
| 433 | } | ||||
| 434 | |||||
| 435 | sub parse { | ||||
| 436 | my $self = shift; | ||||
| 437 | my $arg = shift; | ||||
| 438 | croak "Parse already in progress (Expat)" if $self->{_State_}; | ||||
| 439 | $self->{_State_} = 1; | ||||
| 440 | my $parser = $self->{Parser}; | ||||
| 441 | my $ioref; | ||||
| 442 | my $result = 0; | ||||
| 443 | |||||
| 444 | if (defined $arg) { | ||||
| 445 | local *@; | ||||
| 446 | if (ref($arg) and UNIVERSAL::isa($arg, 'IO::Handle')) { | ||||
| 447 | $ioref = $arg; | ||||
| 448 | } elsif ($] < 5.008 and defined tied($arg)) { | ||||
| 449 | require IO::Handle; | ||||
| 450 | $ioref = $arg; | ||||
| 451 | } | ||||
| 452 | else { | ||||
| 453 | require IO::Handle; | ||||
| 454 | eval { | ||||
| 455 | no strict 'refs'; | ||||
| 456 | $ioref = *{$arg}{IO} if defined *{$arg}; | ||||
| 457 | }; | ||||
| 458 | } | ||||
| 459 | } | ||||
| 460 | |||||
| 461 | if (defined($ioref)) { | ||||
| 462 | my $delim = $self->{Stream_Delimiter}; | ||||
| 463 | my $prev_rs; | ||||
| 464 | my $ioclass = ref $ioref; | ||||
| 465 | $ioclass = "IO::Handle" if !length $ioclass; | ||||
| 466 | |||||
| 467 | $prev_rs = $ioclass->input_record_separator("\n$delim\n") | ||||
| 468 | if defined($delim); | ||||
| 469 | |||||
| 470 | 1276 | 13.0ms | $result = ParseStream($parser, $ioref, $delim); # spent 6.90ms making 238 calls to XML::SAX::Expat::_handle_start, avg 29µs/call
# spent 3.72ms making 764 calls to XML::SAX::Expat::_handle_char, avg 5µs/call
# spent 2.18ms making 238 calls to XML::SAX::Expat::_handle_end, avg 9µs/call
# spent 128µs making 32 calls to XML::SAX::Expat::_handle_comment, avg 4µs/call
# spent 82µs making 4 calls to IO::Handle::read, avg 20µs/call | ||
| 471 | |||||
| 472 | $ioclass->input_record_separator($prev_rs) | ||||
| 473 | if defined($delim); | ||||
| 474 | } else { | ||||
| 475 | $result = ParseString($parser, $arg); | ||||
| 476 | } | ||||
| 477 | |||||
| 478 | $self->{_State_} = 2; | ||||
| 479 | $result or croak $self->{ErrorMessage}; | ||||
| 480 | } | ||||
| 481 | |||||
| 482 | sub parsestring { | ||||
| 483 | my $self = shift; | ||||
| 484 | $self->parse(@_); | ||||
| 485 | } | ||||
| 486 | |||||
| 487 | sub parsefile { | ||||
| 488 | my $self = shift; | ||||
| 489 | croak "Parser has already been used" if $self->{_State_}; | ||||
| 490 | local(*FILE); | ||||
| 491 | open(FILE, $_[0]) or croak "Couldn't open $_[0]:\n$!"; | ||||
| 492 | binmode(FILE); | ||||
| 493 | my $ret = $self->parse(*FILE); | ||||
| 494 | close(FILE); | ||||
| 495 | $ret; | ||||
| 496 | } | ||||
| 497 | |||||
| 498 | ################################################################ | ||||
| 499 | package #hide from PAUSE | ||||
| 500 | XML::Parser::ContentModel; | ||||
| 501 | use overload '""' => \&asString, 'eq' => \&thiseq; | ||||
| 502 | |||||
| 503 | sub EMPTY () {1} | ||||
| 504 | sub ANY () {2} | ||||
| 505 | sub MIXED () {3} | ||||
| 506 | sub NAME () {4} | ||||
| 507 | sub CHOICE () {5} | ||||
| 508 | sub SEQ () {6} | ||||
| 509 | |||||
| 510 | |||||
| 511 | sub isempty { | ||||
| 512 | return $_[0]->{Type} == EMPTY; | ||||
| 513 | } | ||||
| 514 | |||||
| 515 | sub isany { | ||||
| 516 | return $_[0]->{Type} == ANY; | ||||
| 517 | } | ||||
| 518 | |||||
| 519 | sub ismixed { | ||||
| 520 | return $_[0]->{Type} == MIXED; | ||||
| 521 | } | ||||
| 522 | |||||
| 523 | sub isname { | ||||
| 524 | return $_[0]->{Type} == NAME; | ||||
| 525 | } | ||||
| 526 | |||||
| 527 | sub name { | ||||
| 528 | return $_[0]->{Tag}; | ||||
| 529 | } | ||||
| 530 | |||||
| 531 | sub ischoice { | ||||
| 532 | return $_[0]->{Type} == CHOICE; | ||||
| 533 | } | ||||
| 534 | |||||
| 535 | sub isseq { | ||||
| 536 | return $_[0]->{Type} == SEQ; | ||||
| 537 | } | ||||
| 538 | |||||
| 539 | sub quant { | ||||
| 540 | return $_[0]->{Quant}; | ||||
| 541 | } | ||||
| 542 | |||||
| 543 | sub children { | ||||
| 544 | my $children = $_[0]->{Children}; | ||||
| 545 | if (defined $children) { | ||||
| 546 | return @$children; | ||||
| 547 | } | ||||
| 548 | return undef; | ||||
| 549 | } | ||||
| 550 | |||||
| 551 | sub asString { | ||||
| 552 | my ($self) = @_; | ||||
| 553 | my $ret; | ||||
| 554 | |||||
| 555 | if ($self->{Type} == NAME) { | ||||
| 556 | $ret = $self->{Tag}; | ||||
| 557 | } | ||||
| 558 | elsif ($self->{Type} == EMPTY) { | ||||
| 559 | return "EMPTY"; | ||||
| 560 | } | ||||
| 561 | elsif ($self->{Type} == ANY) { | ||||
| 562 | return "ANY"; | ||||
| 563 | } | ||||
| 564 | elsif ($self->{Type} == MIXED) { | ||||
| 565 | $ret = '(#PCDATA'; | ||||
| 566 | foreach (@{$self->{Children}}) { | ||||
| 567 | $ret .= '|' . $_; | ||||
| 568 | } | ||||
| 569 | $ret .= ')'; | ||||
| 570 | } | ||||
| 571 | else { | ||||
| 572 | my $sep = $self->{Type} == CHOICE ? '|' : ','; | ||||
| 573 | $ret = '(' . join($sep, map { $_->asString } @{$self->{Children}}) . ')'; | ||||
| 574 | } | ||||
| 575 | |||||
| 576 | $ret .= $self->{Quant} if $self->{Quant}; | ||||
| 577 | return $ret; | ||||
| 578 | } | ||||
| 579 | |||||
| 580 | sub thiseq { | ||||
| 581 | my $self = shift; | ||||
| 582 | |||||
| 583 | return $self->asString eq $_[0]; | ||||
| 584 | } | ||||
| 585 | |||||
| 586 | ################################################################ | ||||
| 587 | package #hide from PAUSE | ||||
| 588 | XML::Parser::ExpatNB; | ||||
| 589 | |||||
| 590 | use vars qw(@ISA); | ||||
| 591 | use Carp; | ||||
| 592 | |||||
| 593 | @ISA = qw(XML::Parser::Expat); | ||||
| 594 | |||||
| 595 | sub parse { | ||||
| 596 | my $self = shift; | ||||
| 597 | my $class = ref($self); | ||||
| 598 | croak "parse method not supported in $class"; | ||||
| 599 | } | ||||
| 600 | |||||
| 601 | sub parsestring { | ||||
| 602 | my $self = shift; | ||||
| 603 | my $class = ref($self); | ||||
| 604 | croak "parsestring method not supported in $class"; | ||||
| 605 | } | ||||
| 606 | |||||
| 607 | sub parsefile { | ||||
| 608 | my $self = shift; | ||||
| 609 | my $class = ref($self); | ||||
| 610 | croak "parsefile method not supported in $class"; | ||||
| 611 | } | ||||
| 612 | |||||
| 613 | sub parse_more { | ||||
| 614 | my ($self, $data) = @_; | ||||
| 615 | |||||
| 616 | $self->{_State_} = 1; | ||||
| 617 | my $ret = XML::Parser::Expat::ParsePartial($self->{Parser}, $data); | ||||
| 618 | |||||
| 619 | croak $self->{ErrorMessage} unless $ret; | ||||
| 620 | } | ||||
| 621 | |||||
| 622 | sub parse_done { | ||||
| 623 | my $self = shift; | ||||
| 624 | |||||
| 625 | my $ret = XML::Parser::Expat::ParseDone($self->{Parser}); | ||||
| 626 | unless ($ret) { | ||||
| 627 | my $msg = $self->{ErrorMessage}; | ||||
| 628 | $self->release; | ||||
| 629 | croak $msg; | ||||
| 630 | } | ||||
| 631 | |||||
| 632 | $self->{_State_} = 2; | ||||
| 633 | |||||
| 634 | my $result = $ret; | ||||
| 635 | my @result = (); | ||||
| 636 | my $final = $self->{FinalHandler}; | ||||
| 637 | if (defined $final) { | ||||
| 638 | if (wantarray) { | ||||
| 639 | @result = &$final($self); | ||||
| 640 | } | ||||
| 641 | else { | ||||
| 642 | $result = &$final($self); | ||||
| 643 | } | ||||
| 644 | } | ||||
| 645 | |||||
| 646 | $self->release; | ||||
| 647 | |||||
| 648 | return unless defined wantarray; | ||||
| 649 | return wantarray ? @result : $result; | ||||
| 650 | } | ||||
| 651 | |||||
| 652 | ################################################################ | ||||
| 653 | |||||
| 654 | package #hide from PAUSE | ||||
| 655 | XML::Parser::Encinfo; | ||||
| 656 | |||||
| 657 | sub DESTROY { | ||||
| 658 | my $self = shift; | ||||
| 659 | XML::Parser::Expat::FreeEncoding($self); | ||||
| 660 | } | ||||
| 661 | |||||
| 662 | 1; | ||||
| 663 | |||||
| 664 | __END__ | ||||
| 665 | |||||
| 666 | =head1 NAME | ||||
| 667 | |||||
| 668 | XML::Parser::Expat - Lowlevel access to James Clark's expat XML parser | ||||
| 669 | |||||
| 670 | =head1 SYNOPSIS | ||||
| 671 | |||||
| 672 | use XML::Parser::Expat; | ||||
| 673 | |||||
| 674 | $parser = XML::Parser::Expat->new; | ||||
| 675 | $parser->setHandlers('Start' => \&sh, | ||||
| 676 | 'End' => \&eh, | ||||
| 677 | 'Char' => \&ch); | ||||
| 678 | open(FOO, '<', 'info.xml') or die "Couldn't open"; | ||||
| 679 | $parser->parse(*FOO); | ||||
| 680 | close(FOO); | ||||
| 681 | # $parser->parse('<foo id="me"> here <em>we</em> go </foo>'); | ||||
| 682 | |||||
| 683 | sub sh | ||||
| 684 | { | ||||
| 685 | my ($p, $el, %atts) = @_; | ||||
| 686 | $p->setHandlers('Char' => \&spec) | ||||
| 687 | if ($el eq 'special'); | ||||
| 688 | ... | ||||
| 689 | } | ||||
| 690 | |||||
| 691 | sub eh | ||||
| 692 | { | ||||
| 693 | my ($p, $el) = @_; | ||||
| 694 | $p->setHandlers('Char' => \&ch) # Special elements won't contain | ||||
| 695 | if ($el eq 'special'); # other special elements | ||||
| 696 | ... | ||||
| 697 | } | ||||
| 698 | |||||
| 699 | =head1 DESCRIPTION | ||||
| 700 | |||||
| 701 | This module provides an interface to James Clark's XML parser, expat. As in | ||||
| 702 | expat, a single instance of the parser can only parse one document. Calls | ||||
| 703 | to parsestring after the first for a given instance will die. | ||||
| 704 | |||||
| 705 | Expat (and XML::Parser::Expat) are event based. As the parser recognizes | ||||
| 706 | parts of the document (say the start or end of an XML element), then any | ||||
| 707 | handlers registered for that type of an event are called with suitable | ||||
| 708 | parameters. | ||||
| 709 | |||||
| 710 | =head1 METHODS | ||||
| 711 | |||||
| 712 | =over 4 | ||||
| 713 | |||||
| 714 | =item new | ||||
| 715 | |||||
| 716 | This is a class method, the constructor for XML::Parser::Expat. Options are | ||||
| 717 | passed as keyword value pairs. The recognized options are: | ||||
| 718 | |||||
| 719 | =over 4 | ||||
| 720 | |||||
| 721 | =item * ProtocolEncoding | ||||
| 722 | |||||
| 723 | The protocol encoding name. The default is none. The expat built-in | ||||
| 724 | encodings are: C<UTF-8>, C<ISO-8859-1>, C<UTF-16>, and C<US-ASCII>. | ||||
| 725 | Other encodings may be used if they have encoding maps in one of the | ||||
| 726 | directories in the @Encoding_Path list. Setting the protocol encoding | ||||
| 727 | overrides any encoding in the XML declaration. | ||||
| 728 | |||||
| 729 | =item * Namespaces | ||||
| 730 | |||||
| 731 | When this option is given with a true value, then the parser does namespace | ||||
| 732 | processing. By default, namespace processing is turned off. When it is | ||||
| 733 | turned on, the parser consumes I<xmlns> attributes and strips off prefixes | ||||
| 734 | from element and attributes names where those prefixes have a defined | ||||
| 735 | namespace. A name's namespace can be found using the L<"namespace"> method | ||||
| 736 | and two names can be checked for absolute equality with the L<"eq_name"> | ||||
| 737 | method. | ||||
| 738 | |||||
| 739 | =item * NoExpand | ||||
| 740 | |||||
| 741 | Normally, the parser will try to expand references to entities defined in | ||||
| 742 | the internal subset. If this option is set to a true value, and a default | ||||
| 743 | handler is also set, then the default handler will be called when an | ||||
| 744 | entity reference is seen in text. This has no effect if a default handler | ||||
| 745 | has not been registered, and it has no effect on the expansion of entity | ||||
| 746 | references inside attribute values. | ||||
| 747 | |||||
| 748 | =item * Stream_Delimiter | ||||
| 749 | |||||
| 750 | This option takes a string value. When this string is found alone on a line | ||||
| 751 | while parsing from a stream, then the parse is ended as if it saw an end of | ||||
| 752 | file. The intended use is with a stream of xml documents in a MIME multipart | ||||
| 753 | format. The string should not contain a trailing newline. | ||||
| 754 | |||||
| 755 | =item * ErrorContext | ||||
| 756 | |||||
| 757 | When this option is defined, errors are reported in context. The value | ||||
| 758 | of ErrorContext should be the number of lines to show on either side of | ||||
| 759 | the line in which the error occurred. | ||||
| 760 | |||||
| 761 | =item * ParseParamEnt | ||||
| 762 | |||||
| 763 | Unless standalone is set to "yes" in the XML declaration, setting this to | ||||
| 764 | a true value allows the external DTD to be read, and parameter entities | ||||
| 765 | to be parsed and expanded. | ||||
| 766 | |||||
| 767 | =item * Base | ||||
| 768 | |||||
| 769 | The base to use for relative pathnames or URLs. This can also be done by | ||||
| 770 | using the base method. | ||||
| 771 | |||||
| 772 | =back | ||||
| 773 | |||||
| 774 | =item setHandlers(TYPE, HANDLER [, TYPE, HANDLER [...]]) | ||||
| 775 | |||||
| 776 | This method registers handlers for the various events. If no handlers are | ||||
| 777 | registered, then a call to parsestring or parsefile will only determine if | ||||
| 778 | the corresponding XML document is well formed (by returning without error.) | ||||
| 779 | This may be called from within a handler, after the parse has started. | ||||
| 780 | |||||
| 781 | Setting a handler to something that evaluates to false unsets that | ||||
| 782 | handler. | ||||
| 783 | |||||
| 784 | This method returns a list of type, handler pairs corresponding to the | ||||
| 785 | input. The handlers returned are the ones that were in effect before the | ||||
| 786 | call to setHandlers. | ||||
| 787 | |||||
| 788 | The recognized events and the parameters passed to the corresponding | ||||
| 789 | handlers are: | ||||
| 790 | |||||
| 791 | =over 4 | ||||
| 792 | |||||
| 793 | =item * Start (Parser, Element [, Attr, Val [,...]]) | ||||
| 794 | |||||
| 795 | This event is generated when an XML start tag is recognized. Parser is | ||||
| 796 | an XML::Parser::Expat instance. Element is the name of the XML element that | ||||
| 797 | is opened with the start tag. The Attr & Val pairs are generated for each | ||||
| 798 | attribute in the start tag. | ||||
| 799 | |||||
| 800 | =item * End (Parser, Element) | ||||
| 801 | |||||
| 802 | This event is generated when an XML end tag is recognized. Note that | ||||
| 803 | an XML empty tag (<foo/>) generates both a start and an end event. | ||||
| 804 | |||||
| 805 | There is always a lower level start and end handler installed that wrap | ||||
| 806 | the corresponding callbacks. This is to handle the context mechanism. | ||||
| 807 | A consequence of this is that the default handler (see below) will not | ||||
| 808 | see a start tag or end tag unless the default_current method is called. | ||||
| 809 | |||||
| 810 | =item * Char (Parser, String) | ||||
| 811 | |||||
| 812 | This event is generated when non-markup is recognized. The non-markup | ||||
| 813 | sequence of characters is in String. A single non-markup sequence of | ||||
| 814 | characters may generate multiple calls to this handler. Whatever the | ||||
| 815 | encoding of the string in the original document, this is given to the | ||||
| 816 | handler in UTF-8. | ||||
| 817 | |||||
| 818 | =item * Proc (Parser, Target, Data) | ||||
| 819 | |||||
| 820 | This event is generated when a processing instruction is recognized. | ||||
| 821 | |||||
| 822 | =item * Comment (Parser, String) | ||||
| 823 | |||||
| 824 | This event is generated when a comment is recognized. | ||||
| 825 | |||||
| 826 | =item * CdataStart (Parser) | ||||
| 827 | |||||
| 828 | This is called at the start of a CDATA section. | ||||
| 829 | |||||
| 830 | =item * CdataEnd (Parser) | ||||
| 831 | |||||
| 832 | This is called at the end of a CDATA section. | ||||
| 833 | |||||
| 834 | =item * Default (Parser, String) | ||||
| 835 | |||||
| 836 | This is called for any characters that don't have a registered handler. | ||||
| 837 | This includes both characters that are part of markup for which no | ||||
| 838 | events are generated (markup declarations) and characters that | ||||
| 839 | could generate events, but for which no handler has been registered. | ||||
| 840 | |||||
| 841 | Whatever the encoding in the original document, the string is returned to | ||||
| 842 | the handler in UTF-8. | ||||
| 843 | |||||
| 844 | =item * Unparsed (Parser, Entity, Base, Sysid, Pubid, Notation) | ||||
| 845 | |||||
| 846 | This is called for a declaration of an unparsed entity. Entity is the name | ||||
| 847 | of the entity. Base is the base to be used for resolving a relative URI. | ||||
| 848 | Sysid is the system id. Pubid is the public id. Notation is the notation | ||||
| 849 | name. Base and Pubid may be undefined. | ||||
| 850 | |||||
| 851 | =item * Notation (Parser, Notation, Base, Sysid, Pubid) | ||||
| 852 | |||||
| 853 | This is called for a declaration of notation. Notation is the notation name. | ||||
| 854 | Base is the base to be used for resolving a relative URI. Sysid is the system | ||||
| 855 | id. Pubid is the public id. Base, Sysid, and Pubid may all be undefined. | ||||
| 856 | |||||
| 857 | =item * ExternEnt (Parser, Base, Sysid, Pubid) | ||||
| 858 | |||||
| 859 | This is called when an external entity is referenced. Base is the base to be | ||||
| 860 | used for resolving a relative URI. Sysid is the system id. Pubid is the public | ||||
| 861 | id. Base, and Pubid may be undefined. | ||||
| 862 | |||||
| 863 | This handler should either return a string, which represents the contents of | ||||
| 864 | the external entity, or return an open filehandle that can be read to obtain | ||||
| 865 | the contents of the external entity, or return undef, which indicates the | ||||
| 866 | external entity couldn't be found and will generate a parse error. | ||||
| 867 | |||||
| 868 | If an open filehandle is returned, it must be returned as either a glob | ||||
| 869 | (*FOO) or as a reference to a glob (e.g. an instance of IO::Handle). | ||||
| 870 | |||||
| 871 | =item * ExternEntFin (Parser) | ||||
| 872 | |||||
| 873 | This is called after an external entity has been parsed. It allows | ||||
| 874 | applications to perform cleanup on actions performed in the above | ||||
| 875 | ExternEnt handler. | ||||
| 876 | |||||
| 877 | =item * Entity (Parser, Name, Val, Sysid, Pubid, Ndata, IsParam) | ||||
| 878 | |||||
| 879 | This is called when an entity is declared. For internal entities, the Val | ||||
| 880 | parameter will contain the value and the remaining three parameters will | ||||
| 881 | be undefined. For external entities, the Val parameter | ||||
| 882 | will be undefined, the Sysid parameter will have the system id, the Pubid | ||||
| 883 | parameter will have the public id if it was provided (it will be undefined | ||||
| 884 | otherwise), the Ndata parameter will contain the notation for unparsed | ||||
| 885 | entities. If this is a parameter entity declaration, then the IsParam | ||||
| 886 | parameter is true. | ||||
| 887 | |||||
| 888 | Note that this handler and the Unparsed handler above overlap. If both are | ||||
| 889 | set, then this handler will not be called for unparsed entities. | ||||
| 890 | |||||
| 891 | =item * Element (Parser, Name, Model) | ||||
| 892 | |||||
| 893 | The element handler is called when an element declaration is found. Name is | ||||
| 894 | the element name, and Model is the content model as an | ||||
| 895 | XML::Parser::ContentModel object. See L<"XML::Parser::ContentModel Methods"> | ||||
| 896 | for methods available for this class. | ||||
| 897 | |||||
| 898 | =item * Attlist (Parser, Elname, Attname, Type, Default, Fixed) | ||||
| 899 | |||||
| 900 | This handler is called for each attribute in an ATTLIST declaration. | ||||
| 901 | So an ATTLIST declaration that has multiple attributes | ||||
| 902 | will generate multiple calls to this handler. The Elname parameter is the | ||||
| 903 | name of the element with which the attribute is being associated. The Attname | ||||
| 904 | parameter is the name of the attribute. Type is the attribute type, given as | ||||
| 905 | a string. Default is the default value, which will either be "#REQUIRED", | ||||
| 906 | "#IMPLIED" or a quoted string (i.e. the returned string will begin and end | ||||
| 907 | with a quote character). If Fixed is true, then this is a fixed attribute. | ||||
| 908 | |||||
| 909 | =item * Doctype (Parser, Name, Sysid, Pubid, Internal) | ||||
| 910 | |||||
| 911 | This handler is called for DOCTYPE declarations. Name is the document type | ||||
| 912 | name. Sysid is the system id of the document type, if it was provided, | ||||
| 913 | otherwise it's undefined. Pubid is the public id of the document type, | ||||
| 914 | which will be undefined if no public id was given. Internal will be | ||||
| 915 | true or false, indicating whether or not the doctype declaration contains | ||||
| 916 | an internal subset. | ||||
| 917 | |||||
| 918 | =item * DoctypeFin (Parser) | ||||
| 919 | |||||
| 920 | This handler is called after parsing of the DOCTYPE declaration has finished, | ||||
| 921 | including any internal or external DTD declarations. | ||||
| 922 | |||||
| 923 | =item * XMLDecl (Parser, Version, Encoding, Standalone) | ||||
| 924 | |||||
| 925 | This handler is called for XML declarations. Version is a string containg | ||||
| 926 | the version. Encoding is either undefined or contains an encoding string. | ||||
| 927 | Standalone is either undefined, or true or false. Undefined indicates | ||||
| 928 | that no standalone parameter was given in the XML declaration. True or | ||||
| 929 | false indicates "yes" or "no" respectively. | ||||
| 930 | |||||
| 931 | =back | ||||
| 932 | |||||
| 933 | =item namespace(name) | ||||
| 934 | |||||
| 935 | Return the URI of the namespace that the name belongs to. If the name doesn't | ||||
| 936 | belong to any namespace, an undef is returned. This is only valid on names | ||||
| 937 | received through the Start or End handlers from a single document, or through | ||||
| 938 | a call to the generate_ns_name method. In other words, don't use names | ||||
| 939 | generated from one instance of XML::Parser::Expat with other instances. | ||||
| 940 | |||||
| 941 | =item eq_name(name1, name2) | ||||
| 942 | |||||
| 943 | Return true if name1 and name2 are identical (i.e. same name and from | ||||
| 944 | the same namespace.) This is only meaningful if both names were obtained | ||||
| 945 | through the Start or End handlers from a single document, or through | ||||
| 946 | a call to the generate_ns_name method. | ||||
| 947 | |||||
| 948 | =item generate_ns_name(name, namespace) | ||||
| 949 | |||||
| 950 | Return a name, associated with a given namespace, good for using with the | ||||
| 951 | above 2 methods. The namespace argument should be the namespace URI, not | ||||
| 952 | a prefix. | ||||
| 953 | |||||
| 954 | =item new_ns_prefixes | ||||
| 955 | |||||
| 956 | When called from a start tag handler, returns namespace prefixes declared | ||||
| 957 | with this start tag. If called elsewere (or if there were no namespace | ||||
| 958 | prefixes declared), it returns an empty list. Setting of the default | ||||
| 959 | namespace is indicated with '#default' as a prefix. | ||||
| 960 | |||||
| 961 | =item expand_ns_prefix(prefix) | ||||
| 962 | |||||
| 963 | Return the uri to which the given prefix is currently bound. Returns | ||||
| 964 | undef if the prefix isn't currently bound. Use '#default' to find the | ||||
| 965 | current binding of the default namespace (if any). | ||||
| 966 | |||||
| 967 | =item current_ns_prefixes | ||||
| 968 | |||||
| 969 | Return a list of currently bound namespace prefixes. The order of the | ||||
| 970 | the prefixes in the list has no meaning. If the default namespace is | ||||
| 971 | currently bound, '#default' appears in the list. | ||||
| 972 | |||||
| 973 | =item recognized_string | ||||
| 974 | |||||
| 975 | Returns the string from the document that was recognized in order to call | ||||
| 976 | the current handler. For instance, when called from a start handler, it | ||||
| 977 | will give us the the start-tag string. The string is encoded in UTF-8. | ||||
| 978 | This method doesn't return a meaningful string inside declaration handlers. | ||||
| 979 | |||||
| 980 | =item original_string | ||||
| 981 | |||||
| 982 | Returns the verbatim string from the document that was recognized in | ||||
| 983 | order to call the current handler. The string is in the original document | ||||
| 984 | encoding. This method doesn't return a meaningful string inside declaration | ||||
| 985 | handlers. | ||||
| 986 | |||||
| 987 | =item default_current | ||||
| 988 | |||||
| 989 | When called from a handler, causes the sequence of characters that generated | ||||
| 990 | the corresponding event to be sent to the default handler (if one is | ||||
| 991 | registered). Use of this method is deprecated in favor the recognized_string | ||||
| 992 | method, which you can use without installing a default handler. This | ||||
| 993 | method doesn't deliver a meaningful string to the default handler when | ||||
| 994 | called from inside declaration handlers. | ||||
| 995 | |||||
| 996 | =item xpcroak(message) | ||||
| 997 | |||||
| 998 | Concatenate onto the given message the current line number within the | ||||
| 999 | XML document plus the message implied by ErrorContext. Then croak with | ||||
| 1000 | the formed message. | ||||
| 1001 | |||||
| 1002 | =item xpcarp(message) | ||||
| 1003 | |||||
| 1004 | Concatenate onto the given message the current line number within the | ||||
| 1005 | XML document plus the message implied by ErrorContext. Then carp with | ||||
| 1006 | the formed message. | ||||
| 1007 | |||||
| 1008 | =item current_line | ||||
| 1009 | |||||
| 1010 | Returns the line number of the current position of the parse. | ||||
| 1011 | |||||
| 1012 | =item current_column | ||||
| 1013 | |||||
| 1014 | Returns the column number of the current position of the parse. | ||||
| 1015 | |||||
| 1016 | =item current_byte | ||||
| 1017 | |||||
| 1018 | Returns the current position of the parse. | ||||
| 1019 | |||||
| 1020 | =item base([NEWBASE]); | ||||
| 1021 | |||||
| 1022 | Returns the current value of the base for resolving relative URIs. If | ||||
| 1023 | NEWBASE is supplied, changes the base to that value. | ||||
| 1024 | |||||
| 1025 | =item context | ||||
| 1026 | |||||
| 1027 | Returns a list of element names that represent open elements, with the | ||||
| 1028 | last one being the innermost. Inside start and end tag handlers, this | ||||
| 1029 | will be the tag of the parent element. | ||||
| 1030 | |||||
| 1031 | =item current_element | ||||
| 1032 | |||||
| 1033 | Returns the name of the innermost currently opened element. Inside | ||||
| 1034 | start or end handlers, returns the parent of the element associated | ||||
| 1035 | with those tags. | ||||
| 1036 | |||||
| 1037 | =item in_element(NAME) | ||||
| 1038 | |||||
| 1039 | Returns true if NAME is equal to the name of the innermost currently opened | ||||
| 1040 | element. If namespace processing is being used and you want to check | ||||
| 1041 | against a name that may be in a namespace, then use the generate_ns_name | ||||
| 1042 | method to create the NAME argument. | ||||
| 1043 | |||||
| 1044 | =item within_element(NAME) | ||||
| 1045 | |||||
| 1046 | Returns the number of times the given name appears in the context list. | ||||
| 1047 | If namespace processing is being used and you want to check | ||||
| 1048 | against a name that may be in a namespace, then use the generate_ns_name | ||||
| 1049 | method to create the NAME argument. | ||||
| 1050 | |||||
| 1051 | =item depth | ||||
| 1052 | |||||
| 1053 | Returns the size of the context list. | ||||
| 1054 | |||||
| 1055 | =item element_index | ||||
| 1056 | |||||
| 1057 | Returns an integer that is the depth-first visit order of the current | ||||
| 1058 | element. This will be zero outside of the root element. For example, | ||||
| 1059 | this will return 1 when called from the start handler for the root element | ||||
| 1060 | start tag. | ||||
| 1061 | |||||
| 1062 | =item skip_until(INDEX) | ||||
| 1063 | |||||
| 1064 | INDEX is an integer that represents an element index. When this method | ||||
| 1065 | is called, all handlers are suspended until the start tag for an element | ||||
| 1066 | that has an index number equal to INDEX is seen. If a start handler has | ||||
| 1067 | been set, then this is the first tag that the start handler will see | ||||
| 1068 | after skip_until has been called. | ||||
| 1069 | |||||
| 1070 | |||||
| 1071 | =item position_in_context(LINES) | ||||
| 1072 | |||||
| 1073 | Returns a string that shows the current parse position. LINES should be | ||||
| 1074 | an integer >= 0 that represents the number of lines on either side of the | ||||
| 1075 | current parse line to place into the returned string. | ||||
| 1076 | |||||
| 1077 | =item xml_escape(TEXT [, CHAR [, CHAR ...]]) | ||||
| 1078 | |||||
| 1079 | Returns TEXT with markup characters turned into character entities. Any | ||||
| 1080 | additional characters provided as arguments are also turned into character | ||||
| 1081 | references where found in TEXT. | ||||
| 1082 | |||||
| 1083 | =item parse (SOURCE) | ||||
| 1084 | |||||
| 1085 | The SOURCE parameter should either be a string containing the whole XML | ||||
| 1086 | document, or it should be an open IO::Handle. Only a single document | ||||
| 1087 | may be parsed for a given instance of XML::Parser::Expat, so this will croak | ||||
| 1088 | if it's been called previously for this instance. | ||||
| 1089 | |||||
| 1090 | =item parsestring(XML_DOC_STRING) | ||||
| 1091 | |||||
| 1092 | Parses the given string as an XML document. Only a single document may be | ||||
| 1093 | parsed for a given instance of XML::Parser::Expat, so this will die if either | ||||
| 1094 | parsestring or parsefile has been called for this instance previously. | ||||
| 1095 | |||||
| 1096 | This method is deprecated in favor of the parse method. | ||||
| 1097 | |||||
| 1098 | =item parsefile(FILENAME) | ||||
| 1099 | |||||
| 1100 | Parses the XML document in the given file. Will die if parsestring or | ||||
| 1101 | parsefile has been called previously for this instance. | ||||
| 1102 | |||||
| 1103 | =item is_defaulted(ATTNAME) | ||||
| 1104 | |||||
| 1105 | NO LONGER WORKS. To find out if an attribute is defaulted please use | ||||
| 1106 | the specified_attr method. | ||||
| 1107 | |||||
| 1108 | =item specified_attr | ||||
| 1109 | |||||
| 1110 | When the start handler receives lists of attributes and values, the | ||||
| 1111 | non-defaulted (i.e. explicitly specified) attributes occur in the list | ||||
| 1112 | first. This method returns the number of specified items in the list. | ||||
| 1113 | So if this number is equal to the length of the list, there were no | ||||
| 1114 | defaulted values. Otherwise the number points to the index of the | ||||
| 1115 | first defaulted attribute name. | ||||
| 1116 | |||||
| 1117 | =item finish | ||||
| 1118 | |||||
| 1119 | Unsets all handlers (including internal ones that set context), but expat | ||||
| 1120 | continues parsing to the end of the document or until it finds an error. | ||||
| 1121 | It should finish up a lot faster than with the handlers set. | ||||
| 1122 | |||||
| 1123 | =item release | ||||
| 1124 | |||||
| 1125 | There are data structures used by XML::Parser::Expat that have circular | ||||
| 1126 | references. This means that these structures will never be garbage | ||||
| 1127 | collected unless these references are explicitly broken. Calling this | ||||
| 1128 | method breaks those references (and makes the instance unusable.) | ||||
| 1129 | |||||
| 1130 | Normally, higher level calls handle this for you, but if you are using | ||||
| 1131 | XML::Parser::Expat directly, then it's your responsibility to call it. | ||||
| 1132 | |||||
| 1133 | =back | ||||
| 1134 | |||||
| 1135 | =head2 XML::Parser::ContentModel Methods | ||||
| 1136 | |||||
| 1137 | The element declaration handlers are passed objects of this class as the | ||||
| 1138 | content model of the element declaration. They also represent content | ||||
| 1139 | particles, components of a content model. | ||||
| 1140 | |||||
| 1141 | When referred to as a string, these objects are automagicly converted to a | ||||
| 1142 | string representation of the model (or content particle). | ||||
| 1143 | |||||
| 1144 | =over 4 | ||||
| 1145 | |||||
| 1146 | =item isempty | ||||
| 1147 | |||||
| 1148 | This method returns true if the object is "EMPTY", false otherwise. | ||||
| 1149 | |||||
| 1150 | =item isany | ||||
| 1151 | |||||
| 1152 | This method returns true if the object is "ANY", false otherwise. | ||||
| 1153 | |||||
| 1154 | =item ismixed | ||||
| 1155 | |||||
| 1156 | This method returns true if the object is "(#PCDATA)" or "(#PCDATA|...)*", | ||||
| 1157 | false otherwise. | ||||
| 1158 | |||||
| 1159 | =item isname | ||||
| 1160 | |||||
| 1161 | This method returns if the object is an element name. | ||||
| 1162 | |||||
| 1163 | =item ischoice | ||||
| 1164 | |||||
| 1165 | This method returns true if the object is a choice of content particles. | ||||
| 1166 | |||||
| 1167 | |||||
| 1168 | =item isseq | ||||
| 1169 | |||||
| 1170 | This method returns true if the object is a sequence of content particles. | ||||
| 1171 | |||||
| 1172 | =item quant | ||||
| 1173 | |||||
| 1174 | This method returns undef or a string representing the quantifier | ||||
| 1175 | ('?', '*', '+') associated with the model or particle. | ||||
| 1176 | |||||
| 1177 | =item children | ||||
| 1178 | |||||
| 1179 | This method returns undef or (for mixed, choice, and sequence types) | ||||
| 1180 | an array of component content particles. There will always be at least | ||||
| 1181 | one component for choices and sequences, but for a mixed content model | ||||
| 1182 | of pure PCDATA, "(#PCDATA)", then an undef is returned. | ||||
| 1183 | |||||
| 1184 | =back | ||||
| 1185 | |||||
| 1186 | =head2 XML::Parser::ExpatNB Methods | ||||
| 1187 | |||||
| 1188 | The class XML::Parser::ExpatNB is a subclass of XML::Parser::Expat used | ||||
| 1189 | for non-blocking access to the expat library. It does not support the parse, | ||||
| 1190 | parsestring, or parsefile methods, but it does have these additional methods: | ||||
| 1191 | |||||
| 1192 | =over 4 | ||||
| 1193 | |||||
| 1194 | =item parse_more(DATA) | ||||
| 1195 | |||||
| 1196 | Feed expat more text to munch on. | ||||
| 1197 | |||||
| 1198 | =item parse_done | ||||
| 1199 | |||||
| 1200 | Tell expat that it's gotten the whole document. | ||||
| 1201 | |||||
| 1202 | =back | ||||
| 1203 | |||||
| 1204 | =head1 FUNCTIONS | ||||
| 1205 | |||||
| 1206 | =over 4 | ||||
| 1207 | |||||
| 1208 | =item XML::Parser::Expat::load_encoding(ENCODING) | ||||
| 1209 | |||||
| 1210 | Load an external encoding. ENCODING is either the name of an encoding or | ||||
| 1211 | the name of a file. The basename is converted to lowercase and a '.enc' | ||||
| 1212 | extension is appended unless there's one already there. Then, unless | ||||
| 1213 | it's an absolute pathname (i.e. begins with '/'), the first file by that | ||||
| 1214 | name discovered in the @Encoding_Path path list is used. | ||||
| 1215 | |||||
| 1216 | The encoding in the file is loaded and kept in the %Encoding_Table | ||||
| 1217 | table. Earlier encodings of the same name are replaced. | ||||
| 1218 | |||||
| 1219 | This function is automatically called by expat when it encounters an encoding | ||||
| 1220 | it doesn't know about. Expat shouldn't call this twice for the same | ||||
| 1221 | encoding name. The only reason users should use this function is to | ||||
| 1222 | explicitly load an encoding not contained in the @Encoding_Path list. | ||||
| 1223 | |||||
| 1224 | =back | ||||
| 1225 | |||||
| 1226 | =head1 AUTHORS | ||||
| 1227 | |||||
| 1228 | Larry Wall <F<larry@wall.org>> wrote version 1.0. | ||||
| 1229 | |||||
| 1230 | Clark Cooper <F<coopercc@netheaven.com>> picked up support, changed the API | ||||
| 1231 | for this version (2.x), provided documentation, and added some standard | ||||
| 1232 | package features. | ||||
| 1233 | |||||
| 1234 | =cut |