← Index
NYTProf Performance Profile   « line view »
For starman worker -M FindBin --max-requests 50 --workers 2 --user=kohadev-koha --group kohadev-koha --pid /var/run/koha/kohadev/plack.pid --daemonize --access-log /var/log/koha/kohadev/plack.log --error-log /var/log/koha/kohadev/plack-error.log -E deployment --socket /var/run/koha/kohadev/plack.sock /etc/koha/sites/kohadev/plack.psgi
  Run on Fri Jan 8 14:31:06 2016
Reported on Fri Jan 8 14:31:39 2016

Filename/usr/lib/x86_64-linux-gnu/perl/5.20/Encode/Alias.pm
StatementsExecuted 0 statements in 145µs
Line State
ments
Time
on line
Calls Time
in subs
Code
1package Encode::Alias;
2use strict;
3use warnings;
4no warnings 'redefine';
5our $VERSION = do { my @r = ( q$Revision: 2.18 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
6use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
7
8use Exporter 'import';
9
10# Public, encouraged API is exported by default
11
12our @EXPORT =
13 qw (
14 define_alias
15 find_alias
16);
17
18our @Alias; # ordered matching list
19our %Alias; # cached known aliases
20
21sub find_alias {
22 require Encode;
23 my $class = shift;
24 my $find = shift;
25 unless ( exists $Alias{$find} ) {
26 $Alias{$find} = undef; # Recursion guard
27 for ( my $i = 0 ; $i < @Alias ; $i += 2 ) {
28 my $alias = $Alias[$i];
29 my $val = $Alias[ $i + 1 ];
30 my $new;
31 if ( ref($alias) eq 'Regexp' && $find =~ $alias ) {
32 DEBUG and warn "eval $val";
331145µs $new = eval $val;
# spent 3µs executing statements in string eval
34 DEBUG and $@ and warn "$val, $@";
35 }
36 elsif ( ref($alias) eq 'CODE' ) {
37 DEBUG and warn "$alias", "->", "($find)";
38 $new = $alias->($find);
39 }
40 elsif ( lc($find) eq lc($alias) ) {
41 $new = $val;
42 }
43 if ( defined($new) ) {
44 next if $new eq $find; # avoid (direct) recursion on bugs
45 DEBUG and warn "$alias, $new";
46 my $enc =
47 ( ref($new) ) ? $new : Encode::find_encoding($new);
48 if ($enc) {
49 $Alias{$find} = $enc;
50 last;
51 }
52 }
53 }
54
55 # case insensitive search when canonical is not in all lowercase
56 # RT ticket #7835
57 unless ( $Alias{$find} ) {
58 my $lcfind = lc($find);
59 for my $name ( keys %Encode::Encoding, keys %Encode::ExtModule )
60 {
61 $lcfind eq lc($name) or next;
62 $Alias{$find} = Encode::find_encoding($name);
63 DEBUG and warn "$find => $name";
64 }
65 }
66 }
67 if (DEBUG) {
68 my $name;
69 if ( my $e = $Alias{$find} ) {
70 $name = $e->name;
71 }
72 else {
73 $name = "";
74 }
75 warn "find_alias($class, $find)->name = $name";
76 }
77 return $Alias{$find};
78}
79
80sub define_alias {
81 while (@_) {
82 my ( $alias, $name ) = splice( @_, 0, 2 );
83 unshift( @Alias, $alias => $name ); # newer one has precedence
84 if ( ref($alias) ) {
85
86 # clear %Alias cache to allow overrides
87 my @a = keys %Alias;
88 for my $k (@a) {
89 if ( ref($alias) eq 'Regexp' && $k =~ $alias ) {
90 DEBUG and warn "delete \$Alias\{$k\}";
91 delete $Alias{$k};
92 }
93 elsif ( ref($alias) eq 'CODE' && $alias->($k) ) {
94 DEBUG and warn "delete \$Alias\{$k\}";
95 delete $Alias{$k};
96 }
97 }
98 }
99 else {
100 DEBUG and warn "delete \$Alias\{$alias\}";
101 delete $Alias{$alias};
102 }
103 }
104}
105
106# Allow latin-1 style names as well
107# 0 1 2 3 4 5 6 7 8 9 10
108our @Latin2iso = ( 0, 1, 2, 3, 4, 9, 10, 13, 14, 15, 16 );
109
110# Allow winlatin1 style names as well
111our %Winlatin2cp = (
112 'latin1' => 1252,
113 'latin2' => 1250,
114 'cyrillic' => 1251,
115 'greek' => 1253,
116 'turkish' => 1254,
117 'hebrew' => 1255,
118 'arabic' => 1256,
119 'baltic' => 1257,
120 'vietnamese' => 1258,
121);
122
123init_aliases();
124
125sub undef_aliases {
126 @Alias = ();
127 %Alias = ();
128}
129
130sub init_aliases {
131 require Encode;
132 undef_aliases();
133
134 # Try all-lower-case version should all else fails
135 define_alias( qr/^(.*)$/ => '"\L$1"' );
136
137 # UTF/UCS stuff
138 define_alias( qr/^(unicode-1-1-)?UTF-?7$/i => '"UTF-7"' );
139 define_alias( qr/^UCS-?2-?LE$/i => '"UCS-2LE"' );
140 define_alias(
141 qr/^UCS-?2-?(BE)?$/i => '"UCS-2BE"',
142 qr/^UCS-?4-?(BE|LE)?$/i => 'uc("UTF-32$1")',
143 qr/^iso-10646-1$/i => '"UCS-2BE"'
144 );
145 define_alias(
146 qr/^UTF-?(16|32)-?BE$/i => '"UTF-$1BE"',
147 qr/^UTF-?(16|32)-?LE$/i => '"UTF-$1LE"',
148 qr/^UTF-?(16|32)$/i => '"UTF-$1"',
149 );
150
151 # ASCII
152 define_alias( qr/^(?:US-?)ascii$/i => '"ascii"' );
153 define_alias( 'C' => 'ascii' );
154 define_alias( qr/\b(?:ISO[-_]?)?646(?:[-_]?US)?$/i => '"ascii"' );
155
156 # Allow variants of iso-8859-1 etc.
157 define_alias( qr/\biso[-_]?(\d+)[-_](\d+)$/i => '"iso-$1-$2"' );
158
159 # At least HP-UX has these.
160 define_alias( qr/\biso8859(\d+)$/i => '"iso-8859-$1"' );
161
162 # More HP stuff.
163 define_alias(
164 qr/\b(?:hp-)?(arabic|greek|hebrew|kana|roman|thai|turkish)8$/i =>
165 '"${1}8"' );
166
167 # The Official name of ASCII.
168 define_alias( qr/\bANSI[-_]?X3\.4[-_]?1968$/i => '"ascii"' );
169
170 # This is a font issue, not an encoding issue.
171 # (The currency symbol of the Latin 1 upper half
172 # has been redefined as the euro symbol.)
173 define_alias( qr/^(.+)\@euro$/i => '"$1"' );
174
175 define_alias( qr/\b(?:iso[-_]?)?latin[-_]?(\d+)$/i =>
176'defined $Encode::Alias::Latin2iso[$1] ? "iso-8859-$Encode::Alias::Latin2iso[$1]" : undef'
177 );
178
179 define_alias(
180 qr/\bwin(latin[12]|cyrillic|baltic|greek|turkish|
181 hebrew|arabic|baltic|vietnamese)$/ix =>
182 '"cp" . $Encode::Alias::Winlatin2cp{lc($1)}'
183 );
184
185 # Common names for non-latin preferred MIME names
186 define_alias(
187 'ascii' => 'US-ascii',
188 'cyrillic' => 'iso-8859-5',
189 'arabic' => 'iso-8859-6',
190 'greek' => 'iso-8859-7',
191 'hebrew' => 'iso-8859-8',
192 'thai' => 'iso-8859-11',
193 );
194 # RT #20781
195 define_alias(qr/\btis-?620\b/i => '"iso-8859-11"');
196
197 # At least AIX has IBM-NNN (surprisingly...) instead of cpNNN.
198 # And Microsoft has their own naming (again, surprisingly).
199 # And windows-* is registered in IANA!
200 define_alias(
201 qr/\b(?:cp|ibm|ms|windows)[-_ ]?(\d{2,4})$/i => '"cp$1"' );
202
203 # Sometimes seen with a leading zero.
204 # define_alias( qr/\bcp037\b/i => '"cp37"');
205
206 # Mac Mappings
207 # predefined in *.ucm; unneeded
208 # define_alias( qr/\bmacIcelandic$/i => '"macIceland"');
209 define_alias( qr/^(?:x[_-])?mac[_-](.*)$/i => '"mac$1"' );
210 # http://rt.cpan.org/Ticket/Display.html?id=36326
211 define_alias( qr/^macintosh$/i => '"MacRoman"' );
212 # https://rt.cpan.org/Ticket/Display.html?id=78125
213 define_alias( qr/^macce$/i => '"MacCentralEurRoman"' );
214 # Ououououou. gone. They are different!
215 # define_alias( qr/\bmacRomanian$/i => '"macRumanian"');
216
217 # Standardize on the dashed versions.
218 define_alias( qr/\bkoi8[\s\-_]*([ru])$/i => '"koi8-$1"' );
219
220 unless ($Encode::ON_EBCDIC) {
221
222 # for Encode::CN
223 define_alias( qr/\beuc.*cn$/i => '"euc-cn"' );
224 define_alias( qr/\bcn.*euc$/i => '"euc-cn"' );
225
226 # define_alias( qr/\bGB[- ]?(\d+)$/i => '"euc-cn"' )
227 # CP936 doesn't have vendor-addon for GBK, so they're identical.
228 define_alias( qr/^gbk$/i => '"cp936"' );
229
230 # This fixes gb2312 vs. euc-cn confusion, practically
231 define_alias( qr/\bGB[-_ ]?2312(?!-?raw)/i => '"euc-cn"' );
232
233 # for Encode::JP
234 define_alias( qr/\bjis$/i => '"7bit-jis"' );
235 define_alias( qr/\beuc.*jp$/i => '"euc-jp"' );
236 define_alias( qr/\bjp.*euc$/i => '"euc-jp"' );
237 define_alias( qr/\bujis$/i => '"euc-jp"' );
238 define_alias( qr/\bshift.*jis$/i => '"shiftjis"' );
239 define_alias( qr/\bsjis$/i => '"shiftjis"' );
240 define_alias( qr/\bwindows-31j$/i => '"cp932"' );
241
242 # for Encode::KR
243 define_alias( qr/\beuc.*kr$/i => '"euc-kr"' );
244 define_alias( qr/\bkr.*euc$/i => '"euc-kr"' );
245
246 # This fixes ksc5601 vs. euc-kr confusion, practically
247 define_alias( qr/(?:x-)?uhc$/i => '"cp949"' );
248 define_alias( qr/(?:x-)?windows-949$/i => '"cp949"' );
249 define_alias( qr/\bks_c_5601-1987$/i => '"cp949"' );
250
251 # for Encode::TW
252 define_alias( qr/\bbig-?5$/i => '"big5-eten"' );
253 define_alias( qr/\bbig5-?et(?:en)?$/i => '"big5-eten"' );
254 define_alias( qr/\btca[-_]?big5$/i => '"big5-eten"' );
255 define_alias( qr/\bbig5-?hk(?:scs)?$/i => '"big5-hkscs"' );
256 define_alias( qr/\bhk(?:scs)?[-_]?big5$/i => '"big5-hkscs"' );
257 }
258
259 # utf8 is blessed :)
260 define_alias( qr/\bUTF-8$/i => '"utf-8-strict"' );
261
262 # At last, Map white space and _ to '-'
263 define_alias( qr/^(\S+)[\s_]+(.*)$/i => '"$1-$2"' );
264}
265
2661;
267__END__
268
269# TODO: HP-UX '8' encodings arabic8 greek8 hebrew8 kana8 thai8 turkish8
270# TODO: HP-UX '15' encodings japanese15 korean15 roi15
271# TODO: Cyrillic encoding ISO-IR-111 (useful?)
272# TODO: Armenian encoding ARMSCII-8
273# TODO: Hebrew encoding ISO-8859-8-1
274# TODO: Thai encoding TCVN
275# TODO: Vietnamese encodings VPS
276# TODO: Mac Asian+African encodings: Arabic Armenian Bengali Burmese
277# ChineseSimp ChineseTrad Devanagari Ethiopic ExtArabic
278# Farsi Georgian Gujarati Gurmukhi Hebrew Japanese
279# Kannada Khmer Korean Laotian Malayalam Mongolian
280# Oriya Sinhalese Symbol Tamil Telugu Tibetan Vietnamese
281
282=head1 NAME
283
284Encode::Alias - alias definitions to encodings
285
286=head1 SYNOPSIS
287
288 use Encode;
289 use Encode::Alias;
290 define_alias( "newName" => ENCODING);
291 define_alias( qr/.../ => ENCODING);
292 define_alias( sub { return ENCODING if ...; } );
293
294=head1 DESCRIPTION
295
296Allows newName to be used as an alias for ENCODING. ENCODING may be
297either the name of an encoding or an encoding object (as described
298in L<Encode>).
299
300Currently the first argument to define_alias() can be specified in the
301following ways:
302
303=over 4
304
305=item As a simple string.
306
307=item As a qr// compiled regular expression, e.g.:
308
309 define_alias( qr/^iso8859-(\d+)$/i => '"iso-8859-$1"' );
310
311In this case, if I<ENCODING> is not a reference, it is C<eval>-ed
312in order to allow C<$1> etc. to be substituted. The example is one
313way to alias names as used in X11 fonts to the MIME names for the
314iso-8859-* family. Note the double quotes inside the single quotes.
315
316(or, you don't have to do this yourself because this example is predefined)
317
318If you are using a regex here, you have to use the quotes as shown or
319it won't work. Also note that regex handling is tricky even for the
320experienced. Use this feature with caution.
321
322=item As a code reference, e.g.:
323
324 define_alias( sub {shift =~ /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } );
325
326The same effect as the example above in a different way. The coderef
327takes the alias name as an argument and returns a canonical name on
328success or undef if not. Note the second argument is ignored if provided.
329Use this with even more caution than the regex version.
330
331=back
332
333=head3 Changes in code reference aliasing
334
335As of Encode 1.87, the older form
336
337 define_alias( sub { return /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } );
338
339no longer works.
340
341Encode up to 1.86 internally used "local $_" to implement this older
342form. But consider the code below;
343
344 use Encode;
345 $_ = "eeeee" ;
346 while (/(e)/g) {
347 my $utf = decode('aliased-encoding-name', $1);
348 print "position:",pos,"\n";
349 }
350
351Prior to Encode 1.86 this fails because of "local $_".
352
353=head2 Alias overloading
354
355You can override predefined aliases by simply applying define_alias().
356The new alias is always evaluated first, and when necessary,
357define_alias() flushes the internal cache to make the new definition
358available.
359
360 # redirect SHIFT_JIS to MS/IBM Code Page 932, which is a
361 # superset of SHIFT_JIS
362
363 define_alias( qr/shift.*jis$/i => '"cp932"' );
364 define_alias( qr/sjis$/i => '"cp932"' );
365
366If you want to zap all predefined aliases, you can use
367
368 Encode::Alias->undef_aliases;
369
370to do so. And
371
372 Encode::Alias->init_aliases;
373
374gets the factory settings back.
375
376Note that define_alias() will not be able to override the canonical name
377of encodings. Encodings are first looked up by canonical name before
378potential aliases are tried.
379
380=head1 SEE ALSO
381
382L<Encode>, L<Encode::Supported>
383
384=cut
385