Revision ad7353df
Von Moritz Bunkus vor mehr als 14 Jahren hinzugefügt
SL/Iconv.pm | ||
---|---|---|
1 | 1 |
package SL::Iconv; |
2 | 2 |
|
3 |
use Encode; |
|
4 |
use English qw(-no_match_vars); |
|
3 | 5 |
use Text::Iconv; |
4 | 6 |
|
5 | 7 |
use SL::Common; |
... | ... | |
8 | 10 |
|
9 | 11 |
use strict; |
10 | 12 |
|
13 |
sub new { |
|
14 |
my $class = shift; |
|
15 |
my $self = bless { }, $class; |
|
16 |
|
|
17 |
$self->_init(@_); |
|
18 |
|
|
19 |
return $self; |
|
20 |
} |
|
21 |
|
|
11 | 22 |
sub get_converter { |
12 | 23 |
my ($from_charset, $to_charset) = @_; |
13 | 24 |
|
14 |
my $index = "${from_charset}::${to_charset}"; |
|
15 |
if (!$converters{$index}) { |
|
16 |
$converters{$index} = Text::Iconv->new($from_charset, $to_charset) || die; |
|
17 |
} |
|
25 |
my $index = join $SUBSCRIPT_SEPARATOR, $from_charset, $to_charset; |
|
26 |
$converters{$index} ||= SL::Iconv->new($from_charset, $to_charset); |
|
18 | 27 |
|
19 | 28 |
return $converters{$index}; |
20 | 29 |
} |
21 | 30 |
|
22 | 31 |
sub convert { |
32 |
return _convert(@_) if ref $_[0]; |
|
33 |
|
|
23 | 34 |
my ($from_charset, $to_charset, $text) = @_; |
24 | 35 |
|
25 | 36 |
$from_charset ||= Common::DEFAULT_CHARSET; |
... | ... | |
29 | 40 |
return $converter->convert($text); |
30 | 41 |
} |
31 | 42 |
|
43 |
sub _convert { |
|
44 |
my $self = shift; |
|
45 |
my $text = shift; |
|
46 |
|
|
47 |
$text = $self->{handle}->convert($text) if !$self->{to_is_utf8} || !Encode::is_utf8($text); |
|
48 |
$text = decode("utf-8-strict", $text) if $self->{to_is_utf8} && !Encode::is_utf8($text); |
|
49 |
|
|
50 |
return $text; |
|
51 |
} |
|
52 |
|
|
53 |
sub _init { |
|
54 |
my $self = shift; |
|
55 |
$self->{from} = shift; |
|
56 |
$self->{to} = shift; |
|
57 |
$self->{to} = 'UTF-8' if lc $self->{to} eq 'unicode'; |
|
58 |
$self->{to_is_utf8} = $self->{to} =~ m/^utf-?8$/i; |
|
59 |
$self->{handle} = Text::Iconv->new($self->{from}, $self->{to}) || die; |
|
60 |
|
|
61 |
return $self; |
|
62 |
} |
|
63 |
|
|
64 |
sub is_utf8 { |
|
65 |
return shift->{to_is_utf8}; |
|
66 |
} |
|
67 |
|
|
32 | 68 |
1; |
33 | 69 |
|
70 |
__END__ |
|
71 |
|
|
72 |
=head1 NAME |
|
73 |
|
|
74 |
SL::Iconv -- Thin layer on top of Text::Iconv including decode_utf8 usage |
|
75 |
|
|
76 |
=head1 SYNOPSIS |
|
77 |
|
|
78 |
Usage: |
|
79 |
|
|
80 |
use SL::Iconv; |
|
81 |
|
|
82 |
# Conversion without creating objects: |
|
83 |
my $text_utf8 = SL::Iconv::convert("ISO-8859-15", "UTF-8", $text_iso); |
|
84 |
|
|
85 |
# Conversion with an object: |
|
86 |
my $converter = SL::Iconv->new("ISO-8859-15", "UTF-8"); |
|
87 |
my $text_utf8 = $converter->convert($text_iso); |
|
88 |
|
|
89 |
=head1 DESCRIPTION |
|
90 |
|
|
91 |
A thin layer on top of L<Text::Iconv>. Special handling is implemented |
|
92 |
if the target charset is UTF-8: The resulting string has its UTF8 flag |
|
93 |
set via a call to C<Encode::decode("utf-8-strict", ...)>. |
|
94 |
|
|
95 |
=head1 CLASS FUNCTIONS |
|
96 |
|
|
97 |
=over 4 |
|
98 |
|
|
99 |
=item C<new $from_charset, $to_charset> |
|
100 |
|
|
101 |
Create a new object for conversion from C<$from_charset> to |
|
102 |
C<$to_charset>. |
|
103 |
|
|
104 |
=item C<convert $from_charset, $to_charset, $text> |
|
105 |
|
|
106 |
Converts the string C<$text> from charset C<$from_charset> to charset |
|
107 |
C<$to_charset>. See the instance method C<convert> for further |
|
108 |
discussion. |
|
109 |
|
|
110 |
The object used for this conversion is cached. Therefore multiple |
|
111 |
calls to C<convert> do not result in multiple initializations of the |
|
112 |
iconv library. |
|
113 |
|
|
114 |
=back |
|
115 |
|
|
116 |
=head1 INSTANCE FUNCTIONS |
|
117 |
|
|
118 |
=over 4 |
|
119 |
|
|
120 |
=item C<convert $text> |
|
121 |
|
|
122 |
Converts the string C<$text> from one charset to another (see C<new>). |
|
123 |
|
|
124 |
Special handling is implemented if the target charset is UTF-8: The |
|
125 |
resulting string has its UTF8 flag set via a call to |
|
126 |
C<Encode::decode("utf-8-strict", ...)>. It is also safe to call |
|
127 |
C<convert> multiple times for the same string in such cases as the |
|
128 |
conversion is only done if the UTF8 flag hasn't been set yet. |
|
129 |
|
|
130 |
=item C<is_utf8> |
|
131 |
|
|
132 |
Returns true if the handle converts into UTF8. |
|
133 |
|
|
134 |
=back |
|
135 |
|
|
136 |
=head1 MODULE AUTHORS |
|
137 |
|
|
138 |
Moritz Bunkus E<lt>m.bunkus@linet-services.deE<gt> |
|
139 |
|
|
140 |
L<http://linet-services.de> |
Auch abrufbar als: Unified diff
Erweiterung SL::Iconv um convert-Methode, die auch UTF8-Flag setzt & Objekt-Interface