1 | | | | | package PerlIO; |
2 | | | | | |
3 | | | | | our $VERSION = '1.09'; |
4 | | | | | |
5 | | | | | # Map layer name to package that defines it |
6 | | | | | our %alias; |
7 | | | | | |
8 | | | | | sub import |
9 | | | | | { |
10 | | | | | my $class = shift; |
11 | | | | | while (@_) |
12 | | | | | { |
13 | | | | | my $layer = shift; |
14 | | | | | if (exists $alias{$layer}) |
15 | | | | | { |
16 | | | | | $layer = $alias{$layer} |
17 | | | | | } |
18 | | | | | else |
19 | | | | | { |
20 | | | | | $layer = "${class}::$layer"; |
21 | | | | | } |
22 | | | | | eval { require $layer =~ s{::}{/}gr . '.pm' }; |
23 | | | | | warn $@ if $@; |
24 | | | | | } |
25 | | | | | } |
26 | | | | | |
27 | | | | | sub F_UTF8 () { 0x8000 } |
28 | | | | | |
29 | | | | | 1; |
30 | | | | | __END__ |
31 | | | | | |
32 | | | | | =head1 NAME |
33 | | | | | |
34 | | | | | PerlIO - On demand loader for PerlIO layers and root of PerlIO::* name space |
35 | | | | | |
36 | | | | | =head1 SYNOPSIS |
37 | | | | | |
38 | | | | | open($fh, "<:crlf", "my.txt"); # support platform-native and |
39 | | | | | # CRLF text files |
40 | | | | | |
41 | | | | | open($fh, "<", "his.jpg"); # portably open a binary file for reading |
42 | | | | | binmode($fh); |
43 | | | | | |
44 | | | | | Shell: |
45 | | | | | PERLIO=perlio perl .... |
46 | | | | | |
47 | | | | | =head1 DESCRIPTION |
48 | | | | | |
49 | | | | | When an undefined layer 'foo' is encountered in an C<open> or |
50 | | | | | C<binmode> layer specification then C code performs the equivalent of: |
51 | | | | | |
52 | | | | | use PerlIO 'foo'; |
53 | | | | | |
54 | | | | | The perl code in PerlIO.pm then attempts to locate a layer by doing |
55 | | | | | |
56 | | | | | require PerlIO::foo; |
57 | | | | | |
58 | | | | | Otherwise the C<PerlIO> package is a place holder for additional |
59 | | | | | PerlIO related functions. |
60 | | | | | |
61 | | | | | The following layers are currently defined: |
62 | | | | | |
63 | | | | | =over 4 |
64 | | | | | |
65 | | | | | =item :unix |
66 | | | | | |
67 | | | | | Lowest level layer which provides basic PerlIO operations in terms of |
68 | | | | | UNIX/POSIX numeric file descriptor calls |
69 | | | | | (open(), read(), write(), lseek(), close()). |
70 | | | | | |
71 | | | | | =item :stdio |
72 | | | | | |
73 | | | | | Layer which calls C<fread>, C<fwrite> and C<fseek>/C<ftell> etc. Note |
74 | | | | | that as this is "real" stdio it will ignore any layers beneath it and |
75 | | | | | go straight to the operating system via the C library as usual. |
76 | | | | | |
77 | | | | | =item :perlio |
78 | | | | | |
79 | | | | | A from scratch implementation of buffering for PerlIO. Provides fast |
80 | | | | | access to the buffer for C<sv_gets> which implements perl's readline/E<lt>E<gt> |
81 | | | | | and in general attempts to minimize data copying. |
82 | | | | | |
83 | | | | | C<:perlio> will insert a C<:unix> layer below itself to do low level IO. |
84 | | | | | |
85 | | | | | =item :crlf |
86 | | | | | |
87 | | | | | A layer that implements DOS/Windows like CRLF line endings. On read |
88 | | | | | converts pairs of CR,LF to a single "\n" newline character. On write |
89 | | | | | converts each "\n" to a CR,LF pair. Note that this layer will silently |
90 | | | | | refuse to be pushed on top of itself. |
91 | | | | | |
92 | | | | | It currently does I<not> mimic MS-DOS as far as treating of Control-Z |
93 | | | | | as being an end-of-file marker. |
94 | | | | | |
95 | | | | | Based on the C<:perlio> layer. |
96 | | | | | |
97 | | | | | =item :utf8 |
98 | | | | | |
99 | | | | | Declares that the stream accepts perl's I<internal> encoding of |
100 | | | | | characters. (Which really is UTF-8 on ASCII machines, but is |
101 | | | | | UTF-EBCDIC on EBCDIC machines.) This allows any character perl can |
102 | | | | | represent to be read from or written to the stream. The UTF-X encoding |
103 | | | | | is chosen to render simple text parts (i.e. non-accented letters, |
104 | | | | | digits and common punctuation) human readable in the encoded file. |
105 | | | | | |
106 | | | | | (B<CAUTION>: This layer does not validate byte sequences. For reading input, |
107 | | | | | you should instead use C<:encoding(utf8)> instead of bare C<:utf8>.) |
108 | | | | | |
109 | | | | | Here is how to write your native data out using UTF-8 (or UTF-EBCDIC) |
110 | | | | | and then read it back in. |
111 | | | | | |
112 | | | | | open(F, ">:utf8", "data.utf"); |
113 | | | | | print F $out; |
114 | | | | | close(F); |
115 | | | | | |
116 | | | | | open(F, "<:utf8", "data.utf"); |
117 | | | | | $in = <F>; |
118 | | | | | close(F); |
119 | | | | | |
120 | | | | | |
121 | | | | | =item :bytes |
122 | | | | | |
123 | | | | | This is the inverse of the C<:utf8> layer. It turns off the flag |
124 | | | | | on the layer below so that data read from it is considered to |
125 | | | | | be "octets" i.e. characters in the range 0..255 only. Likewise |
126 | | | | | on output perl will warn if a "wide" character is written |
127 | | | | | to a such a stream. |
128 | | | | | |
129 | | | | | =item :raw |
130 | | | | | |
131 | | | | | The C<:raw> layer is I<defined> as being identical to calling |
132 | | | | | C<binmode($fh)> - the stream is made suitable for passing binary data, |
133 | | | | | i.e. each byte is passed as-is. The stream will still be |
134 | | | | | buffered. |
135 | | | | | |
136 | | | | | In Perl 5.6 and some books the C<:raw> layer (previously sometimes also |
137 | | | | | referred to as a "discipline") is documented as the inverse of the |
138 | | | | | C<:crlf> layer. That is no longer the case - other layers which would |
139 | | | | | alter the binary nature of the stream are also disabled. If you want UNIX |
140 | | | | | line endings on a platform that normally does CRLF translation, but still |
141 | | | | | want UTF-8 or encoding defaults, the appropriate thing to do is to add |
142 | | | | | C<:perlio> to the PERLIO environment variable. |
143 | | | | | |
144 | | | | | The implementation of C<:raw> is as a pseudo-layer which when "pushed" |
145 | | | | | pops itself and then any layers which do not declare themselves as suitable |
146 | | | | | for binary data. (Undoing :utf8 and :crlf are implemented by clearing |
147 | | | | | flags rather than popping layers but that is an implementation detail.) |
148 | | | | | |
149 | | | | | As a consequence of the fact that C<:raw> normally pops layers, |
150 | | | | | it usually only makes sense to have it as the only or first element in |
151 | | | | | a layer specification. When used as the first element it provides |
152 | | | | | a known base on which to build e.g. |
153 | | | | | |
154 | | | | | open($fh,":raw:utf8",...) |
155 | | | | | |
156 | | | | | will construct a "binary" stream, but then enable UTF-8 translation. |
157 | | | | | |
158 | | | | | =item :pop |
159 | | | | | |
160 | | | | | A pseudo layer that removes the top-most layer. Gives perl code a |
161 | | | | | way to manipulate the layer stack. Note that C<:pop> only works on |
162 | | | | | real layers and will not undo the effects of pseudo layers like |
163 | | | | | C<:utf8>. An example of a possible use might be: |
164 | | | | | |
165 | | | | | open($fh,...) |
166 | | | | | ... |
167 | | | | | binmode($fh,":encoding(...)"); # next chunk is encoded |
168 | | | | | ... |
169 | | | | | binmode($fh,":pop"); # back to un-encoded |
170 | | | | | |
171 | | | | | A more elegant (and safer) interface is needed. |
172 | | | | | |
173 | | | | | =item :win32 |
174 | | | | | |
175 | | | | | On Win32 platforms this I<experimental> layer uses the native "handle" IO |
176 | | | | | rather than the unix-like numeric file descriptor layer. Known to be |
177 | | | | | buggy as of perl 5.8.2. |
178 | | | | | |
179 | | | | | =back |
180 | | | | | |
181 | | | | | =head2 Custom Layers |
182 | | | | | |
183 | | | | | It is possible to write custom layers in addition to the above builtin |
184 | | | | | ones, both in C/XS and Perl. Two such layers (and one example written |
185 | | | | | in Perl using the latter) come with the Perl distribution. |
186 | | | | | |
187 | | | | | =over 4 |
188 | | | | | |
189 | | | | | =item :encoding |
190 | | | | | |
191 | | | | | Use C<:encoding(ENCODING)> either in open() or binmode() to install |
192 | | | | | a layer that transparently does character set and encoding transformations, |
193 | | | | | for example from Shift-JIS to Unicode. Note that under C<stdio> |
194 | | | | | an C<:encoding> also enables C<:utf8>. See L<PerlIO::encoding> |
195 | | | | | for more information. |
196 | | | | | |
197 | | | | | =item :mmap |
198 | | | | | |
199 | | | | | A layer which implements "reading" of files by using C<mmap()> to |
200 | | | | | make a (whole) file appear in the process's address space, and then |
201 | | | | | using that as PerlIO's "buffer". This I<may> be faster in certain |
202 | | | | | circumstances for large files, and may result in less physical memory |
203 | | | | | use when multiple processes are reading the same file. |
204 | | | | | |
205 | | | | | Files which are not C<mmap()>-able revert to behaving like the C<:perlio> |
206 | | | | | layer. Writes also behave like the C<:perlio> layer, as C<mmap()> for write |
207 | | | | | needs extra house-keeping (to extend the file) which negates any advantage. |
208 | | | | | |
209 | | | | | The C<:mmap> layer will not exist if the platform does not support C<mmap()>. |
210 | | | | | |
211 | | | | | =item :via |
212 | | | | | |
213 | | | | | Use C<:via(MODULE)> either in open() or binmode() to install a layer |
214 | | | | | that does whatever transformation (for example compression / |
215 | | | | | decompression, encryption / decryption) to the filehandle. |
216 | | | | | See L<PerlIO::via> for more information. |
217 | | | | | |
218 | | | | | =back |
219 | | | | | |
220 | | | | | =head2 Alternatives to raw |
221 | | | | | |
222 | | | | | To get a binary stream an alternate method is to use: |
223 | | | | | |
224 | | | | | open($fh,"whatever") |
225 | | | | | binmode($fh); |
226 | | | | | |
227 | | | | | this has the advantage of being backward compatible with how such things have |
228 | | | | | had to be coded on some platforms for years. |
229 | | | | | |
230 | | | | | To get an unbuffered stream specify an unbuffered layer (e.g. C<:unix>) |
231 | | | | | in the open call: |
232 | | | | | |
233 | | | | | open($fh,"<:unix",$path) |
234 | | | | | |
235 | | | | | =head2 Defaults and how to override them |
236 | | | | | |
237 | | | | | If the platform is MS-DOS like and normally does CRLF to "\n" |
238 | | | | | translation for text files then the default layers are : |
239 | | | | | |
240 | | | | | unix crlf |
241 | | | | | |
242 | | | | | (The low level "unix" layer may be replaced by a platform specific low |
243 | | | | | level layer.) |
244 | | | | | |
245 | | | | | Otherwise if C<Configure> found out how to do "fast" IO using the system's |
246 | | | | | stdio, then the default layers are: |
247 | | | | | |
248 | | | | | unix stdio |
249 | | | | | |
250 | | | | | Otherwise the default layers are |
251 | | | | | |
252 | | | | | unix perlio |
253 | | | | | |
254 | | | | | These defaults may change once perlio has been better tested and tuned. |
255 | | | | | |
256 | | | | | The default can be overridden by setting the environment variable |
257 | | | | | PERLIO to a space separated list of layers (C<unix> or platform low |
258 | | | | | level layer is always pushed first). |
259 | | | | | |
260 | | | | | This can be used to see the effect of/bugs in the various layers e.g. |
261 | | | | | |
262 | | | | | cd .../perl/t |
263 | | | | | PERLIO=stdio ./perl harness |
264 | | | | | PERLIO=perlio ./perl harness |
265 | | | | | |
266 | | | | | For the various values of PERLIO see L<perlrun/PERLIO>. |
267 | | | | | |
268 | | | | | =head2 Querying the layers of filehandles |
269 | | | | | |
270 | | | | | The following returns the B<names> of the PerlIO layers on a filehandle. |
271 | | | | | |
272 | | | | | my @layers = PerlIO::get_layers($fh); # Or FH, *FH, "FH". |
273 | | | | | |
274 | | | | | The layers are returned in the order an open() or binmode() call would |
275 | | | | | use them. Note that the "default stack" depends on the operating |
276 | | | | | system and on the Perl version, and both the compile-time and |
277 | | | | | runtime configurations of Perl. |
278 | | | | | |
279 | | | | | The following table summarizes the default layers on UNIX-like and |
280 | | | | | DOS-like platforms and depending on the setting of C<$ENV{PERLIO}>: |
281 | | | | | |
282 | | | | | PERLIO UNIX-like DOS-like |
283 | | | | | ------ --------- -------- |
284 | | | | | unset / "" unix perlio / stdio [1] unix crlf |
285 | | | | | stdio unix perlio / stdio [1] stdio |
286 | | | | | perlio unix perlio unix perlio |
287 | | | | | |
288 | | | | | # [1] "stdio" if Configure found out how to do "fast stdio" (depends |
289 | | | | | # on the stdio implementation) and in Perl 5.8, otherwise "unix perlio" |
290 | | | | | |
291 | | | | | By default the layers from the input side of the filehandle are |
292 | | | | | returned; to get the output side, use the optional C<output> argument: |
293 | | | | | |
294 | | | | | my @layers = PerlIO::get_layers($fh, output => 1); |
295 | | | | | |
296 | | | | | (Usually the layers are identical on either side of a filehandle but |
297 | | | | | for example with sockets there may be differences, or if you have |
298 | | | | | been using the C<open> pragma.) |
299 | | | | | |
300 | | | | | There is no set_layers(), nor does get_layers() return a tied array |
301 | | | | | mirroring the stack, or anything fancy like that. This is not |
302 | | | | | accidental or unintentional. The PerlIO layer stack is a bit more |
303 | | | | | complicated than just a stack (see for example the behaviour of C<:raw>). |
304 | | | | | You are supposed to use open() and binmode() to manipulate the stack. |
305 | | | | | |
306 | | | | | B<Implementation details follow, please close your eyes.> |
307 | | | | | |
308 | | | | | The arguments to layers are by default returned in parentheses after |
309 | | | | | the name of the layer, and certain layers (like C<utf8>) are not real |
310 | | | | | layers but instead flags on real layers; to get all of these returned |
311 | | | | | separately, use the optional C<details> argument: |
312 | | | | | |
313 | | | | | my @layer_and_args_and_flags = PerlIO::get_layers($fh, details => 1); |
314 | | | | | |
315 | | | | | The result will be up to be three times the number of layers: |
316 | | | | | the first element will be a name, the second element the arguments |
317 | | | | | (unspecified arguments will be C<undef>), the third element the flags, |
318 | | | | | the fourth element a name again, and so forth. |
319 | | | | | |
320 | | | | | B<You may open your eyes now.> |
321 | | | | | |
322 | | | | | =head1 AUTHOR |
323 | | | | | |
324 | | | | | Nick Ing-Simmons E<lt>nick@ing-simmons.netE<gt> |
325 | | | | | |
326 | | | | | =head1 SEE ALSO |
327 | | | | | |
328 | | | | | L<perlfunc/"binmode">, L<perlfunc/"open">, L<perlunicode>, L<perliol>, |
329 | | | | | L<Encode> |
330 | | | | | |
331 | | | | | =cut |