Filename | /usr/lib/x86_64-linux-gnu/perl5/5.20/XML/Parser/Expat.pm |
Statements | Executed 0 statements in 0s |
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | package XML::Parser::Expat; | ||||
2 | |||||
3 | require 5.004; | ||||
4 | |||||
5 | use strict; | ||||
6 | use vars qw($VERSION @ISA %Handler_Setters %Encoding_Table @Encoding_Path | ||||
7 | $have_File_Spec); | ||||
8 | use Carp; | ||||
9 | |||||
10 | require DynaLoader; | ||||
11 | |||||
12 | @ISA = qw(DynaLoader); | ||||
13 | $VERSION = "2.41"; | ||||
14 | |||||
15 | $have_File_Spec = $INC{'File/Spec.pm'} || do 'File/Spec.pm'; | ||||
16 | |||||
17 | %Encoding_Table = (); | ||||
18 | if ($have_File_Spec) { | ||||
19 | @Encoding_Path = (grep(-d $_, | ||||
20 | map(File::Spec->catdir($_, qw(XML Parser Encodings)), | ||||
21 | @INC)), | ||||
22 | File::Spec->curdir); | ||||
23 | } | ||||
24 | else { | ||||
25 | @Encoding_Path = (grep(-d $_, map($_ . '/XML/Parser/Encodings', @INC)), '.'); | ||||
26 | } | ||||
27 | |||||
28 | |||||
29 | bootstrap XML::Parser::Expat $VERSION; | ||||
30 | |||||
31 | %Handler_Setters = ( | ||||
32 | Start => \&SetStartElementHandler, | ||||
33 | End => \&SetEndElementHandler, | ||||
34 | Char => \&SetCharacterDataHandler, | ||||
35 | Proc => \&SetProcessingInstructionHandler, | ||||
36 | Comment => \&SetCommentHandler, | ||||
37 | CdataStart => \&SetStartCdataHandler, | ||||
38 | CdataEnd => \&SetEndCdataHandler, | ||||
39 | Default => \&SetDefaultHandler, | ||||
40 | Unparsed => \&SetUnparsedEntityDeclHandler, | ||||
41 | Notation => \&SetNotationDeclHandler, | ||||
42 | ExternEnt => \&SetExternalEntityRefHandler, | ||||
43 | ExternEntFin => \&SetExtEntFinishHandler, | ||||
44 | Entity => \&SetEntityDeclHandler, | ||||
45 | Element => \&SetElementDeclHandler, | ||||
46 | Attlist => \&SetAttListDeclHandler, | ||||
47 | Doctype => \&SetDoctypeHandler, | ||||
48 | DoctypeFin => \&SetEndDoctypeHandler, | ||||
49 | XMLDecl => \&SetXMLDeclHandler | ||||
50 | ); | ||||
51 | |||||
52 | sub new { | ||||
53 | my ($class, %args) = @_; | ||||
54 | my $self = bless \%args, $_[0]; | ||||
55 | $args{_State_} = 0; | ||||
56 | $args{Context} = []; | ||||
57 | $args{Namespaces} ||= 0; | ||||
58 | $args{ErrorMessage} ||= ''; | ||||
59 | if ($args{Namespaces}) { | ||||
60 | $args{Namespace_Table} = {}; | ||||
61 | $args{Namespace_List} = [undef]; | ||||
62 | $args{Prefix_Table} = {}; | ||||
63 | $args{New_Prefixes} = []; | ||||
64 | } | ||||
65 | $args{_Setters} = \%Handler_Setters; | ||||
66 | $args{Parser} = ParserCreate($self, $args{ProtocolEncoding}, | ||||
67 | $args{Namespaces}); | ||||
68 | $self; | ||||
69 | } | ||||
70 | |||||
71 | sub load_encoding { | ||||
72 | my ($file) = @_; | ||||
73 | |||||
74 | $file =~ s!([^/]+)$!\L$1\E!; | ||||
75 | $file .= '.enc' unless $file =~ /\.enc$/; | ||||
76 | unless ($file =~ m!^/!) { | ||||
77 | foreach (@Encoding_Path) { | ||||
78 | my $tmp = ($have_File_Spec | ||||
79 | ? File::Spec->catfile($_, $file) | ||||
80 | : "$_/$file"); | ||||
81 | if (-e $tmp) { | ||||
82 | $file = $tmp; | ||||
83 | last; | ||||
84 | } | ||||
85 | } | ||||
86 | } | ||||
87 | |||||
88 | local(*ENC); | ||||
89 | open(ENC, $file) or croak("Couldn't open encmap $file:\n$!\n"); | ||||
90 | binmode(ENC); | ||||
91 | my $data; | ||||
92 | my $br = sysread(ENC, $data, -s $file); | ||||
93 | croak("Trouble reading $file:\n$!\n") | ||||
94 | unless defined($br); | ||||
95 | close(ENC); | ||||
96 | |||||
97 | my $name = LoadEncoding($data, $br); | ||||
98 | croak("$file isn't an encmap file") | ||||
99 | unless defined($name); | ||||
100 | |||||
101 | $name; | ||||
102 | } # End load_encoding | ||||
103 | |||||
104 | sub setHandlers { | ||||
105 | my ($self, @handler_pairs) = @_; | ||||
106 | |||||
107 | croak("Uneven number of arguments to setHandlers method") | ||||
108 | if (int(@handler_pairs) & 1); | ||||
109 | |||||
110 | my @ret; | ||||
111 | |||||
112 | while (@handler_pairs) { | ||||
113 | my $type = shift @handler_pairs; | ||||
114 | my $handler = shift @handler_pairs; | ||||
115 | croak "Handler for $type not a Code ref" | ||||
116 | unless (! defined($handler) or ! $handler or ref($handler) eq 'CODE'); | ||||
117 | |||||
118 | my $hndl = $self->{_Setters}->{$type}; | ||||
119 | |||||
120 | unless (defined($hndl)) { | ||||
121 | my @types = sort keys %{$self->{_Setters}}; | ||||
122 | croak("Unknown Expat handler type: $type\n Valid types: @types"); | ||||
123 | } | ||||
124 | |||||
125 | my $old = &$hndl($self->{Parser}, $handler); | ||||
126 | push (@ret, $type, $old); | ||||
127 | } | ||||
128 | |||||
129 | return @ret; | ||||
130 | } | ||||
131 | |||||
132 | sub xpcroak | ||||
133 | { | ||||
134 | my ($self, $message) = @_; | ||||
135 | |||||
136 | my $eclines = $self->{ErrorContext}; | ||||
137 | my $line = GetCurrentLineNumber($_[0]->{Parser}); | ||||
138 | $message .= " at line $line"; | ||||
139 | $message .= ":\n" . $self->position_in_context($eclines) | ||||
140 | if defined($eclines); | ||||
141 | croak $message; | ||||
142 | } | ||||
143 | |||||
144 | sub xpcarp { | ||||
145 | my ($self, $message) = @_; | ||||
146 | |||||
147 | my $eclines = $self->{ErrorContext}; | ||||
148 | my $line = GetCurrentLineNumber($_[0]->{Parser}); | ||||
149 | $message .= " at line $line"; | ||||
150 | $message .= ":\n" . $self->position_in_context($eclines) | ||||
151 | if defined($eclines); | ||||
152 | carp $message; | ||||
153 | } | ||||
154 | |||||
155 | sub default_current { | ||||
156 | my $self = shift; | ||||
157 | if ($self->{_State_} == 1) { | ||||
158 | return DefaultCurrent($self->{Parser}); | ||||
159 | } | ||||
160 | } | ||||
161 | |||||
162 | sub recognized_string { | ||||
163 | my $self = shift; | ||||
164 | if ($self->{_State_} == 1) { | ||||
165 | return RecognizedString($self->{Parser}); | ||||
166 | } | ||||
167 | } | ||||
168 | |||||
169 | sub original_string { | ||||
170 | my $self = shift; | ||||
171 | if ($self->{_State_} == 1) { | ||||
172 | return OriginalString($self->{Parser}); | ||||
173 | } | ||||
174 | } | ||||
175 | |||||
176 | sub current_line { | ||||
177 | my $self = shift; | ||||
178 | if ($self->{_State_} == 1) { | ||||
179 | return GetCurrentLineNumber($self->{Parser}); | ||||
180 | } | ||||
181 | } | ||||
182 | |||||
183 | sub current_column { | ||||
184 | my $self = shift; | ||||
185 | if ($self->{_State_} == 1) { | ||||
186 | return GetCurrentColumnNumber($self->{Parser}); | ||||
187 | } | ||||
188 | } | ||||
189 | |||||
190 | sub current_byte { | ||||
191 | my $self = shift; | ||||
192 | if ($self->{_State_} == 1) { | ||||
193 | return GetCurrentByteIndex($self->{Parser}); | ||||
194 | } | ||||
195 | } | ||||
196 | |||||
197 | sub base { | ||||
198 | my ($self, $newbase) = @_; | ||||
199 | my $p = $self->{Parser}; | ||||
200 | my $oldbase = GetBase($p); | ||||
201 | SetBase($p, $newbase) if @_ > 1; | ||||
202 | return $oldbase; | ||||
203 | } | ||||
204 | |||||
205 | sub context { | ||||
206 | my $ctx = $_[0]->{Context}; | ||||
207 | @$ctx; | ||||
208 | } | ||||
209 | |||||
210 | sub current_element { | ||||
211 | my ($self) = @_; | ||||
212 | @{$self->{Context}} ? $self->{Context}->[-1] : undef; | ||||
213 | } | ||||
214 | |||||
215 | sub in_element { | ||||
216 | my ($self, $element) = @_; | ||||
217 | @{$self->{Context}} ? $self->eq_name($self->{Context}->[-1], $element) | ||||
218 | : undef; | ||||
219 | } | ||||
220 | |||||
221 | sub within_element { | ||||
222 | my ($self, $element) = @_; | ||||
223 | my $cnt = 0; | ||||
224 | foreach (@{$self->{Context}}) { | ||||
225 | $cnt++ if $self->eq_name($_, $element); | ||||
226 | } | ||||
227 | return $cnt; | ||||
228 | } | ||||
229 | |||||
230 | sub depth { | ||||
231 | my ($self) = @_; | ||||
232 | int(@{$self->{Context}}); | ||||
233 | } | ||||
234 | |||||
235 | sub element_index { | ||||
236 | my ($self) = @_; | ||||
237 | |||||
238 | if ($self->{_State_} == 1) { | ||||
239 | return ElementIndex($self->{Parser}); | ||||
240 | } | ||||
241 | } | ||||
242 | |||||
243 | ################ | ||||
244 | # Namespace methods | ||||
245 | |||||
246 | sub namespace { | ||||
247 | my ($self, $name) = @_; | ||||
248 | local($^W) = 0; | ||||
249 | $self->{Namespace_List}->[int($name)]; | ||||
250 | } | ||||
251 | |||||
252 | sub eq_name { | ||||
253 | my ($self, $nm1, $nm2) = @_; | ||||
254 | local($^W) = 0; | ||||
255 | |||||
256 | int($nm1) == int($nm2) and $nm1 eq $nm2; | ||||
257 | } | ||||
258 | |||||
259 | sub generate_ns_name { | ||||
260 | my ($self, $name, $namespace) = @_; | ||||
261 | |||||
262 | $namespace ? | ||||
263 | GenerateNSName($name, $namespace, $self->{Namespace_Table}, | ||||
264 | $self->{Namespace_List}) | ||||
265 | : $name; | ||||
266 | } | ||||
267 | |||||
268 | sub new_ns_prefixes { | ||||
269 | my ($self) = @_; | ||||
270 | if ($self->{Namespaces}) { | ||||
271 | return @{$self->{New_Prefixes}}; | ||||
272 | } | ||||
273 | return (); | ||||
274 | } | ||||
275 | |||||
276 | sub expand_ns_prefix { | ||||
277 | my ($self, $prefix) = @_; | ||||
278 | |||||
279 | if ($self->{Namespaces}) { | ||||
280 | my $stack = $self->{Prefix_Table}->{$prefix}; | ||||
281 | return (defined($stack) and @$stack) ? $stack->[-1] : undef; | ||||
282 | } | ||||
283 | |||||
284 | return undef; | ||||
285 | } | ||||
286 | |||||
287 | sub current_ns_prefixes { | ||||
288 | my ($self) = @_; | ||||
289 | |||||
290 | if ($self->{Namespaces}) { | ||||
291 | my %set = %{$self->{Prefix_Table}}; | ||||
292 | |||||
293 | if (exists $set{'#default'} and not defined($set{'#default'}->[-1])) { | ||||
294 | delete $set{'#default'}; | ||||
295 | } | ||||
296 | |||||
297 | return keys %set; | ||||
298 | } | ||||
299 | |||||
300 | return (); | ||||
301 | } | ||||
302 | |||||
303 | |||||
304 | ################################################################ | ||||
305 | # Namespace declaration handlers | ||||
306 | # | ||||
307 | |||||
308 | sub NamespaceStart { | ||||
309 | my ($self, $prefix, $uri) = @_; | ||||
310 | |||||
311 | $prefix = '#default' unless defined $prefix; | ||||
312 | my $stack = $self->{Prefix_Table}->{$prefix}; | ||||
313 | |||||
314 | if (defined $stack) { | ||||
315 | push(@$stack, $uri); | ||||
316 | } | ||||
317 | else { | ||||
318 | $self->{Prefix_Table}->{$prefix} = [$uri]; | ||||
319 | } | ||||
320 | |||||
321 | # The New_Prefixes list gets emptied at end of startElement function | ||||
322 | # in Expat.xs | ||||
323 | |||||
324 | push(@{$self->{New_Prefixes}}, $prefix); | ||||
325 | } | ||||
326 | |||||
327 | sub NamespaceEnd { | ||||
328 | my ($self, $prefix) = @_; | ||||
329 | |||||
330 | $prefix = '#default' unless defined $prefix; | ||||
331 | |||||
332 | my $stack = $self->{Prefix_Table}->{$prefix}; | ||||
333 | if (@$stack > 1) { | ||||
334 | pop(@$stack); | ||||
335 | } | ||||
336 | else { | ||||
337 | delete $self->{Prefix_Table}->{$prefix}; | ||||
338 | } | ||||
339 | } | ||||
340 | |||||
341 | ################ | ||||
342 | |||||
343 | sub specified_attr { | ||||
344 | my $self = shift; | ||||
345 | |||||
346 | if ($self->{_State_} == 1) { | ||||
347 | return GetSpecifiedAttributeCount($self->{Parser}); | ||||
348 | } | ||||
349 | } | ||||
350 | |||||
351 | sub finish { | ||||
352 | my ($self) = @_; | ||||
353 | if ($self->{_State_} == 1) { | ||||
354 | my $parser = $self->{Parser}; | ||||
355 | UnsetAllHandlers($parser); | ||||
356 | } | ||||
357 | } | ||||
358 | |||||
359 | sub position_in_context { | ||||
360 | my ($self, $lines) = @_; | ||||
361 | if ($self->{_State_} == 1) { | ||||
362 | my $parser = $self->{Parser}; | ||||
363 | my ($string, $linepos) = PositionContext($parser, $lines); | ||||
364 | |||||
365 | return '' unless defined($string); | ||||
366 | |||||
367 | my $col = GetCurrentColumnNumber($parser); | ||||
368 | my $ptr = ('=' x ($col - 1)) . '^' . "\n"; | ||||
369 | my $ret; | ||||
370 | my $dosplit = $linepos < length($string); | ||||
371 | |||||
372 | $string .= "\n" unless $string =~ /\n$/; | ||||
373 | |||||
374 | if ($dosplit) { | ||||
375 | $ret = substr($string, 0, $linepos) . $ptr | ||||
376 | . substr($string, $linepos); | ||||
377 | } else { | ||||
378 | $ret = $string . $ptr; | ||||
379 | } | ||||
380 | |||||
381 | return $ret; | ||||
382 | } | ||||
383 | } | ||||
384 | |||||
385 | sub xml_escape { | ||||
386 | my $self = shift; | ||||
387 | my $text = shift; | ||||
388 | |||||
389 | study $text; | ||||
390 | $text =~ s/\&/\&/g; | ||||
391 | $text =~ s/</\</g; | ||||
392 | foreach (@_) { | ||||
393 | croak "xml_escape: '$_' isn't a single character" if length($_) > 1; | ||||
394 | |||||
395 | if ($_ eq '>') { | ||||
396 | $text =~ s/>/\>/g; | ||||
397 | } | ||||
398 | elsif ($_ eq '"') { | ||||
399 | $text =~ s/\"/\"/; | ||||
400 | } | ||||
401 | elsif ($_ eq "'") { | ||||
402 | $text =~ s/\'/\'/; | ||||
403 | } | ||||
404 | else { | ||||
405 | my $rep = '&#' . sprintf('x%X', ord($_)) . ';'; | ||||
406 | if (/\W/) { | ||||
407 | my $ptrn = "\\$_"; | ||||
408 | $text =~ s/$ptrn/$rep/g; | ||||
409 | } | ||||
410 | else { | ||||
411 | $text =~ s/$_/$rep/g; | ||||
412 | } | ||||
413 | } | ||||
414 | } | ||||
415 | $text; | ||||
416 | } | ||||
417 | |||||
418 | sub skip_until { | ||||
419 | my $self = shift; | ||||
420 | if ($self->{_State_} <= 1) { | ||||
421 | SkipUntil($self->{Parser}, $_[0]); | ||||
422 | } | ||||
423 | } | ||||
424 | |||||
425 | sub release { | ||||
426 | my $self = shift; | ||||
427 | ParserRelease($self->{Parser}); | ||||
428 | } | ||||
429 | |||||
430 | sub DESTROY { | ||||
431 | my $self = shift; | ||||
432 | ParserFree($self->{Parser}); | ||||
433 | } | ||||
434 | |||||
435 | sub parse { | ||||
436 | my $self = shift; | ||||
437 | my $arg = shift; | ||||
438 | croak "Parse already in progress (Expat)" if $self->{_State_}; | ||||
439 | $self->{_State_} = 1; | ||||
440 | my $parser = $self->{Parser}; | ||||
441 | my $ioref; | ||||
442 | my $result = 0; | ||||
443 | |||||
444 | if (defined $arg) { | ||||
445 | local *@; | ||||
446 | if (ref($arg) and UNIVERSAL::isa($arg, 'IO::Handle')) { | ||||
447 | $ioref = $arg; | ||||
448 | } elsif ($] < 5.008 and defined tied($arg)) { | ||||
449 | require IO::Handle; | ||||
450 | $ioref = $arg; | ||||
451 | } | ||||
452 | else { | ||||
453 | require IO::Handle; | ||||
454 | eval { | ||||
455 | no strict 'refs'; | ||||
456 | $ioref = *{$arg}{IO} if defined *{$arg}; | ||||
457 | }; | ||||
458 | } | ||||
459 | } | ||||
460 | |||||
461 | if (defined($ioref)) { | ||||
462 | my $delim = $self->{Stream_Delimiter}; | ||||
463 | my $prev_rs; | ||||
464 | my $ioclass = ref $ioref; | ||||
465 | $ioclass = "IO::Handle" if !length $ioclass; | ||||
466 | |||||
467 | $prev_rs = $ioclass->input_record_separator("\n$delim\n") | ||||
468 | if defined($delim); | ||||
469 | |||||
470 | 638 | 16.8ms | $result = ParseStream($parser, $ioref, $delim); # spent 11.3ms making 119 calls to XML::SAX::Expat::_handle_end, avg 95µs/call
# spent 3.33ms making 119 calls to XML::SAX::Expat::_handle_start, avg 28µs/call
# spent 2.06ms making 382 calls to XML::SAX::Expat::_handle_char, avg 5µs/call
# spent 78µs making 16 calls to XML::SAX::Expat::_handle_comment, avg 5µs/call
# spent 49µs making 2 calls to IO::Handle::read, avg 24µs/call | ||
471 | |||||
472 | $ioclass->input_record_separator($prev_rs) | ||||
473 | if defined($delim); | ||||
474 | } else { | ||||
475 | $result = ParseString($parser, $arg); | ||||
476 | } | ||||
477 | |||||
478 | $self->{_State_} = 2; | ||||
479 | $result or croak $self->{ErrorMessage}; | ||||
480 | } | ||||
481 | |||||
482 | sub parsestring { | ||||
483 | my $self = shift; | ||||
484 | $self->parse(@_); | ||||
485 | } | ||||
486 | |||||
487 | sub parsefile { | ||||
488 | my $self = shift; | ||||
489 | croak "Parser has already been used" if $self->{_State_}; | ||||
490 | local(*FILE); | ||||
491 | open(FILE, $_[0]) or croak "Couldn't open $_[0]:\n$!"; | ||||
492 | binmode(FILE); | ||||
493 | my $ret = $self->parse(*FILE); | ||||
494 | close(FILE); | ||||
495 | $ret; | ||||
496 | } | ||||
497 | |||||
498 | ################################################################ | ||||
499 | package #hide from PAUSE | ||||
500 | XML::Parser::ContentModel; | ||||
501 | use overload '""' => \&asString, 'eq' => \&thiseq; | ||||
502 | |||||
503 | sub EMPTY () {1} | ||||
504 | sub ANY () {2} | ||||
505 | sub MIXED () {3} | ||||
506 | sub NAME () {4} | ||||
507 | sub CHOICE () {5} | ||||
508 | sub SEQ () {6} | ||||
509 | |||||
510 | |||||
511 | sub isempty { | ||||
512 | return $_[0]->{Type} == EMPTY; | ||||
513 | } | ||||
514 | |||||
515 | sub isany { | ||||
516 | return $_[0]->{Type} == ANY; | ||||
517 | } | ||||
518 | |||||
519 | sub ismixed { | ||||
520 | return $_[0]->{Type} == MIXED; | ||||
521 | } | ||||
522 | |||||
523 | sub isname { | ||||
524 | return $_[0]->{Type} == NAME; | ||||
525 | } | ||||
526 | |||||
527 | sub name { | ||||
528 | return $_[0]->{Tag}; | ||||
529 | } | ||||
530 | |||||
531 | sub ischoice { | ||||
532 | return $_[0]->{Type} == CHOICE; | ||||
533 | } | ||||
534 | |||||
535 | sub isseq { | ||||
536 | return $_[0]->{Type} == SEQ; | ||||
537 | } | ||||
538 | |||||
539 | sub quant { | ||||
540 | return $_[0]->{Quant}; | ||||
541 | } | ||||
542 | |||||
543 | sub children { | ||||
544 | my $children = $_[0]->{Children}; | ||||
545 | if (defined $children) { | ||||
546 | return @$children; | ||||
547 | } | ||||
548 | return undef; | ||||
549 | } | ||||
550 | |||||
551 | sub asString { | ||||
552 | my ($self) = @_; | ||||
553 | my $ret; | ||||
554 | |||||
555 | if ($self->{Type} == NAME) { | ||||
556 | $ret = $self->{Tag}; | ||||
557 | } | ||||
558 | elsif ($self->{Type} == EMPTY) { | ||||
559 | return "EMPTY"; | ||||
560 | } | ||||
561 | elsif ($self->{Type} == ANY) { | ||||
562 | return "ANY"; | ||||
563 | } | ||||
564 | elsif ($self->{Type} == MIXED) { | ||||
565 | $ret = '(#PCDATA'; | ||||
566 | foreach (@{$self->{Children}}) { | ||||
567 | $ret .= '|' . $_; | ||||
568 | } | ||||
569 | $ret .= ')'; | ||||
570 | } | ||||
571 | else { | ||||
572 | my $sep = $self->{Type} == CHOICE ? '|' : ','; | ||||
573 | $ret = '(' . join($sep, map { $_->asString } @{$self->{Children}}) . ')'; | ||||
574 | } | ||||
575 | |||||
576 | $ret .= $self->{Quant} if $self->{Quant}; | ||||
577 | return $ret; | ||||
578 | } | ||||
579 | |||||
580 | sub thiseq { | ||||
581 | my $self = shift; | ||||
582 | |||||
583 | return $self->asString eq $_[0]; | ||||
584 | } | ||||
585 | |||||
586 | ################################################################ | ||||
587 | package #hide from PAUSE | ||||
588 | XML::Parser::ExpatNB; | ||||
589 | |||||
590 | use vars qw(@ISA); | ||||
591 | use Carp; | ||||
592 | |||||
593 | @ISA = qw(XML::Parser::Expat); | ||||
594 | |||||
595 | sub parse { | ||||
596 | my $self = shift; | ||||
597 | my $class = ref($self); | ||||
598 | croak "parse method not supported in $class"; | ||||
599 | } | ||||
600 | |||||
601 | sub parsestring { | ||||
602 | my $self = shift; | ||||
603 | my $class = ref($self); | ||||
604 | croak "parsestring method not supported in $class"; | ||||
605 | } | ||||
606 | |||||
607 | sub parsefile { | ||||
608 | my $self = shift; | ||||
609 | my $class = ref($self); | ||||
610 | croak "parsefile method not supported in $class"; | ||||
611 | } | ||||
612 | |||||
613 | sub parse_more { | ||||
614 | my ($self, $data) = @_; | ||||
615 | |||||
616 | $self->{_State_} = 1; | ||||
617 | my $ret = XML::Parser::Expat::ParsePartial($self->{Parser}, $data); | ||||
618 | |||||
619 | croak $self->{ErrorMessage} unless $ret; | ||||
620 | } | ||||
621 | |||||
622 | sub parse_done { | ||||
623 | my $self = shift; | ||||
624 | |||||
625 | my $ret = XML::Parser::Expat::ParseDone($self->{Parser}); | ||||
626 | unless ($ret) { | ||||
627 | my $msg = $self->{ErrorMessage}; | ||||
628 | $self->release; | ||||
629 | croak $msg; | ||||
630 | } | ||||
631 | |||||
632 | $self->{_State_} = 2; | ||||
633 | |||||
634 | my $result = $ret; | ||||
635 | my @result = (); | ||||
636 | my $final = $self->{FinalHandler}; | ||||
637 | if (defined $final) { | ||||
638 | if (wantarray) { | ||||
639 | @result = &$final($self); | ||||
640 | } | ||||
641 | else { | ||||
642 | $result = &$final($self); | ||||
643 | } | ||||
644 | } | ||||
645 | |||||
646 | $self->release; | ||||
647 | |||||
648 | return unless defined wantarray; | ||||
649 | return wantarray ? @result : $result; | ||||
650 | } | ||||
651 | |||||
652 | ################################################################ | ||||
653 | |||||
654 | package #hide from PAUSE | ||||
655 | XML::Parser::Encinfo; | ||||
656 | |||||
657 | sub DESTROY { | ||||
658 | my $self = shift; | ||||
659 | XML::Parser::Expat::FreeEncoding($self); | ||||
660 | } | ||||
661 | |||||
662 | 1; | ||||
663 | |||||
664 | __END__ | ||||
665 | |||||
666 | =head1 NAME | ||||
667 | |||||
668 | XML::Parser::Expat - Lowlevel access to James Clark's expat XML parser | ||||
669 | |||||
670 | =head1 SYNOPSIS | ||||
671 | |||||
672 | use XML::Parser::Expat; | ||||
673 | |||||
674 | $parser = XML::Parser::Expat->new; | ||||
675 | $parser->setHandlers('Start' => \&sh, | ||||
676 | 'End' => \&eh, | ||||
677 | 'Char' => \&ch); | ||||
678 | open(FOO, '<', 'info.xml') or die "Couldn't open"; | ||||
679 | $parser->parse(*FOO); | ||||
680 | close(FOO); | ||||
681 | # $parser->parse('<foo id="me"> here <em>we</em> go </foo>'); | ||||
682 | |||||
683 | sub sh | ||||
684 | { | ||||
685 | my ($p, $el, %atts) = @_; | ||||
686 | $p->setHandlers('Char' => \&spec) | ||||
687 | if ($el eq 'special'); | ||||
688 | ... | ||||
689 | } | ||||
690 | |||||
691 | sub eh | ||||
692 | { | ||||
693 | my ($p, $el) = @_; | ||||
694 | $p->setHandlers('Char' => \&ch) # Special elements won't contain | ||||
695 | if ($el eq 'special'); # other special elements | ||||
696 | ... | ||||
697 | } | ||||
698 | |||||
699 | =head1 DESCRIPTION | ||||
700 | |||||
701 | This module provides an interface to James Clark's XML parser, expat. As in | ||||
702 | expat, a single instance of the parser can only parse one document. Calls | ||||
703 | to parsestring after the first for a given instance will die. | ||||
704 | |||||
705 | Expat (and XML::Parser::Expat) are event based. As the parser recognizes | ||||
706 | parts of the document (say the start or end of an XML element), then any | ||||
707 | handlers registered for that type of an event are called with suitable | ||||
708 | parameters. | ||||
709 | |||||
710 | =head1 METHODS | ||||
711 | |||||
712 | =over 4 | ||||
713 | |||||
714 | =item new | ||||
715 | |||||
716 | This is a class method, the constructor for XML::Parser::Expat. Options are | ||||
717 | passed as keyword value pairs. The recognized options are: | ||||
718 | |||||
719 | =over 4 | ||||
720 | |||||
721 | =item * ProtocolEncoding | ||||
722 | |||||
723 | The protocol encoding name. The default is none. The expat built-in | ||||
724 | encodings are: C<UTF-8>, C<ISO-8859-1>, C<UTF-16>, and C<US-ASCII>. | ||||
725 | Other encodings may be used if they have encoding maps in one of the | ||||
726 | directories in the @Encoding_Path list. Setting the protocol encoding | ||||
727 | overrides any encoding in the XML declaration. | ||||
728 | |||||
729 | =item * Namespaces | ||||
730 | |||||
731 | When this option is given with a true value, then the parser does namespace | ||||
732 | processing. By default, namespace processing is turned off. When it is | ||||
733 | turned on, the parser consumes I<xmlns> attributes and strips off prefixes | ||||
734 | from element and attributes names where those prefixes have a defined | ||||
735 | namespace. A name's namespace can be found using the L<"namespace"> method | ||||
736 | and two names can be checked for absolute equality with the L<"eq_name"> | ||||
737 | method. | ||||
738 | |||||
739 | =item * NoExpand | ||||
740 | |||||
741 | Normally, the parser will try to expand references to entities defined in | ||||
742 | the internal subset. If this option is set to a true value, and a default | ||||
743 | handler is also set, then the default handler will be called when an | ||||
744 | entity reference is seen in text. This has no effect if a default handler | ||||
745 | has not been registered, and it has no effect on the expansion of entity | ||||
746 | references inside attribute values. | ||||
747 | |||||
748 | =item * Stream_Delimiter | ||||
749 | |||||
750 | This option takes a string value. When this string is found alone on a line | ||||
751 | while parsing from a stream, then the parse is ended as if it saw an end of | ||||
752 | file. The intended use is with a stream of xml documents in a MIME multipart | ||||
753 | format. The string should not contain a trailing newline. | ||||
754 | |||||
755 | =item * ErrorContext | ||||
756 | |||||
757 | When this option is defined, errors are reported in context. The value | ||||
758 | of ErrorContext should be the number of lines to show on either side of | ||||
759 | the line in which the error occurred. | ||||
760 | |||||
761 | =item * ParseParamEnt | ||||
762 | |||||
763 | Unless standalone is set to "yes" in the XML declaration, setting this to | ||||
764 | a true value allows the external DTD to be read, and parameter entities | ||||
765 | to be parsed and expanded. | ||||
766 | |||||
767 | =item * Base | ||||
768 | |||||
769 | The base to use for relative pathnames or URLs. This can also be done by | ||||
770 | using the base method. | ||||
771 | |||||
772 | =back | ||||
773 | |||||
774 | =item setHandlers(TYPE, HANDLER [, TYPE, HANDLER [...]]) | ||||
775 | |||||
776 | This method registers handlers for the various events. If no handlers are | ||||
777 | registered, then a call to parsestring or parsefile will only determine if | ||||
778 | the corresponding XML document is well formed (by returning without error.) | ||||
779 | This may be called from within a handler, after the parse has started. | ||||
780 | |||||
781 | Setting a handler to something that evaluates to false unsets that | ||||
782 | handler. | ||||
783 | |||||
784 | This method returns a list of type, handler pairs corresponding to the | ||||
785 | input. The handlers returned are the ones that were in effect before the | ||||
786 | call to setHandlers. | ||||
787 | |||||
788 | The recognized events and the parameters passed to the corresponding | ||||
789 | handlers are: | ||||
790 | |||||
791 | =over 4 | ||||
792 | |||||
793 | =item * Start (Parser, Element [, Attr, Val [,...]]) | ||||
794 | |||||
795 | This event is generated when an XML start tag is recognized. Parser is | ||||
796 | an XML::Parser::Expat instance. Element is the name of the XML element that | ||||
797 | is opened with the start tag. The Attr & Val pairs are generated for each | ||||
798 | attribute in the start tag. | ||||
799 | |||||
800 | =item * End (Parser, Element) | ||||
801 | |||||
802 | This event is generated when an XML end tag is recognized. Note that | ||||
803 | an XML empty tag (<foo/>) generates both a start and an end event. | ||||
804 | |||||
805 | There is always a lower level start and end handler installed that wrap | ||||
806 | the corresponding callbacks. This is to handle the context mechanism. | ||||
807 | A consequence of this is that the default handler (see below) will not | ||||
808 | see a start tag or end tag unless the default_current method is called. | ||||
809 | |||||
810 | =item * Char (Parser, String) | ||||
811 | |||||
812 | This event is generated when non-markup is recognized. The non-markup | ||||
813 | sequence of characters is in String. A single non-markup sequence of | ||||
814 | characters may generate multiple calls to this handler. Whatever the | ||||
815 | encoding of the string in the original document, this is given to the | ||||
816 | handler in UTF-8. | ||||
817 | |||||
818 | =item * Proc (Parser, Target, Data) | ||||
819 | |||||
820 | This event is generated when a processing instruction is recognized. | ||||
821 | |||||
822 | =item * Comment (Parser, String) | ||||
823 | |||||
824 | This event is generated when a comment is recognized. | ||||
825 | |||||
826 | =item * CdataStart (Parser) | ||||
827 | |||||
828 | This is called at the start of a CDATA section. | ||||
829 | |||||
830 | =item * CdataEnd (Parser) | ||||
831 | |||||
832 | This is called at the end of a CDATA section. | ||||
833 | |||||
834 | =item * Default (Parser, String) | ||||
835 | |||||
836 | This is called for any characters that don't have a registered handler. | ||||
837 | This includes both characters that are part of markup for which no | ||||
838 | events are generated (markup declarations) and characters that | ||||
839 | could generate events, but for which no handler has been registered. | ||||
840 | |||||
841 | Whatever the encoding in the original document, the string is returned to | ||||
842 | the handler in UTF-8. | ||||
843 | |||||
844 | =item * Unparsed (Parser, Entity, Base, Sysid, Pubid, Notation) | ||||
845 | |||||
846 | This is called for a declaration of an unparsed entity. Entity is the name | ||||
847 | of the entity. Base is the base to be used for resolving a relative URI. | ||||
848 | Sysid is the system id. Pubid is the public id. Notation is the notation | ||||
849 | name. Base and Pubid may be undefined. | ||||
850 | |||||
851 | =item * Notation (Parser, Notation, Base, Sysid, Pubid) | ||||
852 | |||||
853 | This is called for a declaration of notation. Notation is the notation name. | ||||
854 | Base is the base to be used for resolving a relative URI. Sysid is the system | ||||
855 | id. Pubid is the public id. Base, Sysid, and Pubid may all be undefined. | ||||
856 | |||||
857 | =item * ExternEnt (Parser, Base, Sysid, Pubid) | ||||
858 | |||||
859 | This is called when an external entity is referenced. Base is the base to be | ||||
860 | used for resolving a relative URI. Sysid is the system id. Pubid is the public | ||||
861 | id. Base, and Pubid may be undefined. | ||||
862 | |||||
863 | This handler should either return a string, which represents the contents of | ||||
864 | the external entity, or return an open filehandle that can be read to obtain | ||||
865 | the contents of the external entity, or return undef, which indicates the | ||||
866 | external entity couldn't be found and will generate a parse error. | ||||
867 | |||||
868 | If an open filehandle is returned, it must be returned as either a glob | ||||
869 | (*FOO) or as a reference to a glob (e.g. an instance of IO::Handle). | ||||
870 | |||||
871 | =item * ExternEntFin (Parser) | ||||
872 | |||||
873 | This is called after an external entity has been parsed. It allows | ||||
874 | applications to perform cleanup on actions performed in the above | ||||
875 | ExternEnt handler. | ||||
876 | |||||
877 | =item * Entity (Parser, Name, Val, Sysid, Pubid, Ndata, IsParam) | ||||
878 | |||||
879 | This is called when an entity is declared. For internal entities, the Val | ||||
880 | parameter will contain the value and the remaining three parameters will | ||||
881 | be undefined. For external entities, the Val parameter | ||||
882 | will be undefined, the Sysid parameter will have the system id, the Pubid | ||||
883 | parameter will have the public id if it was provided (it will be undefined | ||||
884 | otherwise), the Ndata parameter will contain the notation for unparsed | ||||
885 | entities. If this is a parameter entity declaration, then the IsParam | ||||
886 | parameter is true. | ||||
887 | |||||
888 | Note that this handler and the Unparsed handler above overlap. If both are | ||||
889 | set, then this handler will not be called for unparsed entities. | ||||
890 | |||||
891 | =item * Element (Parser, Name, Model) | ||||
892 | |||||
893 | The element handler is called when an element declaration is found. Name is | ||||
894 | the element name, and Model is the content model as an | ||||
895 | XML::Parser::ContentModel object. See L<"XML::Parser::ContentModel Methods"> | ||||
896 | for methods available for this class. | ||||
897 | |||||
898 | =item * Attlist (Parser, Elname, Attname, Type, Default, Fixed) | ||||
899 | |||||
900 | This handler is called for each attribute in an ATTLIST declaration. | ||||
901 | So an ATTLIST declaration that has multiple attributes | ||||
902 | will generate multiple calls to this handler. The Elname parameter is the | ||||
903 | name of the element with which the attribute is being associated. The Attname | ||||
904 | parameter is the name of the attribute. Type is the attribute type, given as | ||||
905 | a string. Default is the default value, which will either be "#REQUIRED", | ||||
906 | "#IMPLIED" or a quoted string (i.e. the returned string will begin and end | ||||
907 | with a quote character). If Fixed is true, then this is a fixed attribute. | ||||
908 | |||||
909 | =item * Doctype (Parser, Name, Sysid, Pubid, Internal) | ||||
910 | |||||
911 | This handler is called for DOCTYPE declarations. Name is the document type | ||||
912 | name. Sysid is the system id of the document type, if it was provided, | ||||
913 | otherwise it's undefined. Pubid is the public id of the document type, | ||||
914 | which will be undefined if no public id was given. Internal will be | ||||
915 | true or false, indicating whether or not the doctype declaration contains | ||||
916 | an internal subset. | ||||
917 | |||||
918 | =item * DoctypeFin (Parser) | ||||
919 | |||||
920 | This handler is called after parsing of the DOCTYPE declaration has finished, | ||||
921 | including any internal or external DTD declarations. | ||||
922 | |||||
923 | =item * XMLDecl (Parser, Version, Encoding, Standalone) | ||||
924 | |||||
925 | This handler is called for XML declarations. Version is a string containg | ||||
926 | the version. Encoding is either undefined or contains an encoding string. | ||||
927 | Standalone is either undefined, or true or false. Undefined indicates | ||||
928 | that no standalone parameter was given in the XML declaration. True or | ||||
929 | false indicates "yes" or "no" respectively. | ||||
930 | |||||
931 | =back | ||||
932 | |||||
933 | =item namespace(name) | ||||
934 | |||||
935 | Return the URI of the namespace that the name belongs to. If the name doesn't | ||||
936 | belong to any namespace, an undef is returned. This is only valid on names | ||||
937 | received through the Start or End handlers from a single document, or through | ||||
938 | a call to the generate_ns_name method. In other words, don't use names | ||||
939 | generated from one instance of XML::Parser::Expat with other instances. | ||||
940 | |||||
941 | =item eq_name(name1, name2) | ||||
942 | |||||
943 | Return true if name1 and name2 are identical (i.e. same name and from | ||||
944 | the same namespace.) This is only meaningful if both names were obtained | ||||
945 | through the Start or End handlers from a single document, or through | ||||
946 | a call to the generate_ns_name method. | ||||
947 | |||||
948 | =item generate_ns_name(name, namespace) | ||||
949 | |||||
950 | Return a name, associated with a given namespace, good for using with the | ||||
951 | above 2 methods. The namespace argument should be the namespace URI, not | ||||
952 | a prefix. | ||||
953 | |||||
954 | =item new_ns_prefixes | ||||
955 | |||||
956 | When called from a start tag handler, returns namespace prefixes declared | ||||
957 | with this start tag. If called elsewere (or if there were no namespace | ||||
958 | prefixes declared), it returns an empty list. Setting of the default | ||||
959 | namespace is indicated with '#default' as a prefix. | ||||
960 | |||||
961 | =item expand_ns_prefix(prefix) | ||||
962 | |||||
963 | Return the uri to which the given prefix is currently bound. Returns | ||||
964 | undef if the prefix isn't currently bound. Use '#default' to find the | ||||
965 | current binding of the default namespace (if any). | ||||
966 | |||||
967 | =item current_ns_prefixes | ||||
968 | |||||
969 | Return a list of currently bound namespace prefixes. The order of the | ||||
970 | the prefixes in the list has no meaning. If the default namespace is | ||||
971 | currently bound, '#default' appears in the list. | ||||
972 | |||||
973 | =item recognized_string | ||||
974 | |||||
975 | Returns the string from the document that was recognized in order to call | ||||
976 | the current handler. For instance, when called from a start handler, it | ||||
977 | will give us the the start-tag string. The string is encoded in UTF-8. | ||||
978 | This method doesn't return a meaningful string inside declaration handlers. | ||||
979 | |||||
980 | =item original_string | ||||
981 | |||||
982 | Returns the verbatim string from the document that was recognized in | ||||
983 | order to call the current handler. The string is in the original document | ||||
984 | encoding. This method doesn't return a meaningful string inside declaration | ||||
985 | handlers. | ||||
986 | |||||
987 | =item default_current | ||||
988 | |||||
989 | When called from a handler, causes the sequence of characters that generated | ||||
990 | the corresponding event to be sent to the default handler (if one is | ||||
991 | registered). Use of this method is deprecated in favor the recognized_string | ||||
992 | method, which you can use without installing a default handler. This | ||||
993 | method doesn't deliver a meaningful string to the default handler when | ||||
994 | called from inside declaration handlers. | ||||
995 | |||||
996 | =item xpcroak(message) | ||||
997 | |||||
998 | Concatenate onto the given message the current line number within the | ||||
999 | XML document plus the message implied by ErrorContext. Then croak with | ||||
1000 | the formed message. | ||||
1001 | |||||
1002 | =item xpcarp(message) | ||||
1003 | |||||
1004 | Concatenate onto the given message the current line number within the | ||||
1005 | XML document plus the message implied by ErrorContext. Then carp with | ||||
1006 | the formed message. | ||||
1007 | |||||
1008 | =item current_line | ||||
1009 | |||||
1010 | Returns the line number of the current position of the parse. | ||||
1011 | |||||
1012 | =item current_column | ||||
1013 | |||||
1014 | Returns the column number of the current position of the parse. | ||||
1015 | |||||
1016 | =item current_byte | ||||
1017 | |||||
1018 | Returns the current position of the parse. | ||||
1019 | |||||
1020 | =item base([NEWBASE]); | ||||
1021 | |||||
1022 | Returns the current value of the base for resolving relative URIs. If | ||||
1023 | NEWBASE is supplied, changes the base to that value. | ||||
1024 | |||||
1025 | =item context | ||||
1026 | |||||
1027 | Returns a list of element names that represent open elements, with the | ||||
1028 | last one being the innermost. Inside start and end tag handlers, this | ||||
1029 | will be the tag of the parent element. | ||||
1030 | |||||
1031 | =item current_element | ||||
1032 | |||||
1033 | Returns the name of the innermost currently opened element. Inside | ||||
1034 | start or end handlers, returns the parent of the element associated | ||||
1035 | with those tags. | ||||
1036 | |||||
1037 | =item in_element(NAME) | ||||
1038 | |||||
1039 | Returns true if NAME is equal to the name of the innermost currently opened | ||||
1040 | element. If namespace processing is being used and you want to check | ||||
1041 | against a name that may be in a namespace, then use the generate_ns_name | ||||
1042 | method to create the NAME argument. | ||||
1043 | |||||
1044 | =item within_element(NAME) | ||||
1045 | |||||
1046 | Returns the number of times the given name appears in the context list. | ||||
1047 | If namespace processing is being used and you want to check | ||||
1048 | against a name that may be in a namespace, then use the generate_ns_name | ||||
1049 | method to create the NAME argument. | ||||
1050 | |||||
1051 | =item depth | ||||
1052 | |||||
1053 | Returns the size of the context list. | ||||
1054 | |||||
1055 | =item element_index | ||||
1056 | |||||
1057 | Returns an integer that is the depth-first visit order of the current | ||||
1058 | element. This will be zero outside of the root element. For example, | ||||
1059 | this will return 1 when called from the start handler for the root element | ||||
1060 | start tag. | ||||
1061 | |||||
1062 | =item skip_until(INDEX) | ||||
1063 | |||||
1064 | INDEX is an integer that represents an element index. When this method | ||||
1065 | is called, all handlers are suspended until the start tag for an element | ||||
1066 | that has an index number equal to INDEX is seen. If a start handler has | ||||
1067 | been set, then this is the first tag that the start handler will see | ||||
1068 | after skip_until has been called. | ||||
1069 | |||||
1070 | |||||
1071 | =item position_in_context(LINES) | ||||
1072 | |||||
1073 | Returns a string that shows the current parse position. LINES should be | ||||
1074 | an integer >= 0 that represents the number of lines on either side of the | ||||
1075 | current parse line to place into the returned string. | ||||
1076 | |||||
1077 | =item xml_escape(TEXT [, CHAR [, CHAR ...]]) | ||||
1078 | |||||
1079 | Returns TEXT with markup characters turned into character entities. Any | ||||
1080 | additional characters provided as arguments are also turned into character | ||||
1081 | references where found in TEXT. | ||||
1082 | |||||
1083 | =item parse (SOURCE) | ||||
1084 | |||||
1085 | The SOURCE parameter should either be a string containing the whole XML | ||||
1086 | document, or it should be an open IO::Handle. Only a single document | ||||
1087 | may be parsed for a given instance of XML::Parser::Expat, so this will croak | ||||
1088 | if it's been called previously for this instance. | ||||
1089 | |||||
1090 | =item parsestring(XML_DOC_STRING) | ||||
1091 | |||||
1092 | Parses the given string as an XML document. Only a single document may be | ||||
1093 | parsed for a given instance of XML::Parser::Expat, so this will die if either | ||||
1094 | parsestring or parsefile has been called for this instance previously. | ||||
1095 | |||||
1096 | This method is deprecated in favor of the parse method. | ||||
1097 | |||||
1098 | =item parsefile(FILENAME) | ||||
1099 | |||||
1100 | Parses the XML document in the given file. Will die if parsestring or | ||||
1101 | parsefile has been called previously for this instance. | ||||
1102 | |||||
1103 | =item is_defaulted(ATTNAME) | ||||
1104 | |||||
1105 | NO LONGER WORKS. To find out if an attribute is defaulted please use | ||||
1106 | the specified_attr method. | ||||
1107 | |||||
1108 | =item specified_attr | ||||
1109 | |||||
1110 | When the start handler receives lists of attributes and values, the | ||||
1111 | non-defaulted (i.e. explicitly specified) attributes occur in the list | ||||
1112 | first. This method returns the number of specified items in the list. | ||||
1113 | So if this number is equal to the length of the list, there were no | ||||
1114 | defaulted values. Otherwise the number points to the index of the | ||||
1115 | first defaulted attribute name. | ||||
1116 | |||||
1117 | =item finish | ||||
1118 | |||||
1119 | Unsets all handlers (including internal ones that set context), but expat | ||||
1120 | continues parsing to the end of the document or until it finds an error. | ||||
1121 | It should finish up a lot faster than with the handlers set. | ||||
1122 | |||||
1123 | =item release | ||||
1124 | |||||
1125 | There are data structures used by XML::Parser::Expat that have circular | ||||
1126 | references. This means that these structures will never be garbage | ||||
1127 | collected unless these references are explicitly broken. Calling this | ||||
1128 | method breaks those references (and makes the instance unusable.) | ||||
1129 | |||||
1130 | Normally, higher level calls handle this for you, but if you are using | ||||
1131 | XML::Parser::Expat directly, then it's your responsibility to call it. | ||||
1132 | |||||
1133 | =back | ||||
1134 | |||||
1135 | =head2 XML::Parser::ContentModel Methods | ||||
1136 | |||||
1137 | The element declaration handlers are passed objects of this class as the | ||||
1138 | content model of the element declaration. They also represent content | ||||
1139 | particles, components of a content model. | ||||
1140 | |||||
1141 | When referred to as a string, these objects are automagicly converted to a | ||||
1142 | string representation of the model (or content particle). | ||||
1143 | |||||
1144 | =over 4 | ||||
1145 | |||||
1146 | =item isempty | ||||
1147 | |||||
1148 | This method returns true if the object is "EMPTY", false otherwise. | ||||
1149 | |||||
1150 | =item isany | ||||
1151 | |||||
1152 | This method returns true if the object is "ANY", false otherwise. | ||||
1153 | |||||
1154 | =item ismixed | ||||
1155 | |||||
1156 | This method returns true if the object is "(#PCDATA)" or "(#PCDATA|...)*", | ||||
1157 | false otherwise. | ||||
1158 | |||||
1159 | =item isname | ||||
1160 | |||||
1161 | This method returns if the object is an element name. | ||||
1162 | |||||
1163 | =item ischoice | ||||
1164 | |||||
1165 | This method returns true if the object is a choice of content particles. | ||||
1166 | |||||
1167 | |||||
1168 | =item isseq | ||||
1169 | |||||
1170 | This method returns true if the object is a sequence of content particles. | ||||
1171 | |||||
1172 | =item quant | ||||
1173 | |||||
1174 | This method returns undef or a string representing the quantifier | ||||
1175 | ('?', '*', '+') associated with the model or particle. | ||||
1176 | |||||
1177 | =item children | ||||
1178 | |||||
1179 | This method returns undef or (for mixed, choice, and sequence types) | ||||
1180 | an array of component content particles. There will always be at least | ||||
1181 | one component for choices and sequences, but for a mixed content model | ||||
1182 | of pure PCDATA, "(#PCDATA)", then an undef is returned. | ||||
1183 | |||||
1184 | =back | ||||
1185 | |||||
1186 | =head2 XML::Parser::ExpatNB Methods | ||||
1187 | |||||
1188 | The class XML::Parser::ExpatNB is a subclass of XML::Parser::Expat used | ||||
1189 | for non-blocking access to the expat library. It does not support the parse, | ||||
1190 | parsestring, or parsefile methods, but it does have these additional methods: | ||||
1191 | |||||
1192 | =over 4 | ||||
1193 | |||||
1194 | =item parse_more(DATA) | ||||
1195 | |||||
1196 | Feed expat more text to munch on. | ||||
1197 | |||||
1198 | =item parse_done | ||||
1199 | |||||
1200 | Tell expat that it's gotten the whole document. | ||||
1201 | |||||
1202 | =back | ||||
1203 | |||||
1204 | =head1 FUNCTIONS | ||||
1205 | |||||
1206 | =over 4 | ||||
1207 | |||||
1208 | =item XML::Parser::Expat::load_encoding(ENCODING) | ||||
1209 | |||||
1210 | Load an external encoding. ENCODING is either the name of an encoding or | ||||
1211 | the name of a file. The basename is converted to lowercase and a '.enc' | ||||
1212 | extension is appended unless there's one already there. Then, unless | ||||
1213 | it's an absolute pathname (i.e. begins with '/'), the first file by that | ||||
1214 | name discovered in the @Encoding_Path path list is used. | ||||
1215 | |||||
1216 | The encoding in the file is loaded and kept in the %Encoding_Table | ||||
1217 | table. Earlier encodings of the same name are replaced. | ||||
1218 | |||||
1219 | This function is automatically called by expat when it encounters an encoding | ||||
1220 | it doesn't know about. Expat shouldn't call this twice for the same | ||||
1221 | encoding name. The only reason users should use this function is to | ||||
1222 | explicitly load an encoding not contained in the @Encoding_Path list. | ||||
1223 | |||||
1224 | =back | ||||
1225 | |||||
1226 | =head1 AUTHORS | ||||
1227 | |||||
1228 | Larry Wall <F<larry@wall.org>> wrote version 1.0. | ||||
1229 | |||||
1230 | Clark Cooper <F<coopercc@netheaven.com>> picked up support, changed the API | ||||
1231 | for this version (2.x), provided documentation, and added some standard | ||||
1232 | package features. | ||||
1233 | |||||
1234 | =cut |