Projekt

Allgemein

Profil

« Zurück | Weiter » 

Revision ad7353df

Von Moritz Bunkus vor mehr als 14 Jahren hinzugefügt

  • ID ad7353df162d32e3b6f9348a5f8c1310079e6110
  • Vorgänger cc042e07
  • Nachfolger f5f077a7

Erweiterung SL::Iconv um convert-Methode, die auch UTF8-Flag setzt & Objekt-Interface

Unterschiede anzeigen:

SL/Iconv.pm
1 1
package SL::Iconv;
2 2

  
3
use Encode;
4
use English qw(-no_match_vars);
3 5
use Text::Iconv;
4 6

  
5 7
use SL::Common;
......
8 10

  
9 11
use strict;
10 12

  
13
sub new {
14
  my $class = shift;
15
  my $self  = bless { }, $class;
16

  
17
  $self->_init(@_);
18

  
19
  return $self;
20
}
21

  
11 22
sub get_converter {
12 23
  my ($from_charset, $to_charset) = @_;
13 24

  
14
  my $index = "${from_charset}::${to_charset}";
15
  if (!$converters{$index}) {
16
    $converters{$index} = Text::Iconv->new($from_charset, $to_charset) || die;
17
  }
25
  my $index             = join $SUBSCRIPT_SEPARATOR, $from_charset, $to_charset;
26
  $converters{$index} ||= SL::Iconv->new($from_charset, $to_charset);
18 27

  
19 28
  return $converters{$index};
20 29
}
21 30

  
22 31
sub convert {
32
  return _convert(@_) if ref $_[0];
33

  
23 34
  my ($from_charset, $to_charset, $text) = @_;
24 35

  
25 36
  $from_charset ||= Common::DEFAULT_CHARSET;
......
29 40
  return $converter->convert($text);
30 41
}
31 42

  
43
sub _convert {
44
  my $self = shift;
45
  my $text = shift;
46

  
47
  $text    = $self->{handle}->convert($text) if !$self->{to_is_utf8} || !Encode::is_utf8($text);
48
  $text    = decode("utf-8-strict", $text)   if  $self->{to_is_utf8} && !Encode::is_utf8($text);
49

  
50
  return $text;
51
}
52

  
53
sub _init {
54
  my $self = shift;
55
  $self->{from}       = shift;
56
  $self->{to}         = shift;
57
  $self->{to}         = 'UTF-8' if lc $self->{to} eq 'unicode';
58
  $self->{to_is_utf8} = $self->{to} =~ m/^utf-?8$/i;
59
  $self->{handle}     = Text::Iconv->new($self->{from}, $self->{to}) || die;
60

  
61
  return $self;
62
}
63

  
64
sub is_utf8 {
65
  return shift->{to_is_utf8};
66
}
67

  
32 68
1;
33 69

  
70
__END__
71

  
72
=head1 NAME
73

  
74
SL::Iconv -- Thin layer on top of Text::Iconv including decode_utf8 usage
75

  
76
=head1 SYNOPSIS
77

  
78
Usage:
79

  
80
  use SL::Iconv;
81

  
82
  # Conversion without creating objects:
83
  my $text_utf8 = SL::Iconv::convert("ISO-8859-15", "UTF-8", $text_iso);
84

  
85
  # Conversion with an object:
86
  my $converter = SL::Iconv->new("ISO-8859-15", "UTF-8");
87
  my $text_utf8 = $converter->convert($text_iso);
88

  
89
=head1 DESCRIPTION
90

  
91
A thin layer on top of L<Text::Iconv>. Special handling is implemented
92
if the target charset is UTF-8: The resulting string has its UTF8 flag
93
set via a call to C<Encode::decode("utf-8-strict", ...)>.
94

  
95
=head1 CLASS FUNCTIONS
96

  
97
=over 4
98

  
99
=item C<new $from_charset, $to_charset>
100

  
101
Create a new object for conversion from C<$from_charset> to
102
C<$to_charset>.
103

  
104
=item C<convert $from_charset, $to_charset, $text>
105

  
106
Converts the string C<$text> from charset C<$from_charset> to charset
107
C<$to_charset>. See the instance method C<convert> for further
108
discussion.
109

  
110
The object used for this conversion is cached. Therefore multiple
111
calls to C<convert> do not result in multiple initializations of the
112
iconv library.
113

  
114
=back
115

  
116
=head1 INSTANCE FUNCTIONS
117

  
118
=over 4
119

  
120
=item C<convert $text>
121

  
122
Converts the string C<$text> from one charset to another (see C<new>).
123

  
124
Special handling is implemented if the target charset is UTF-8: The
125
resulting string has its UTF8 flag set via a call to
126
C<Encode::decode("utf-8-strict", ...)>. It is also safe to call
127
C<convert> multiple times for the same string in such cases as the
128
conversion is only done if the UTF8 flag hasn't been set yet.
129

  
130
=item C<is_utf8>
131

  
132
Returns true if the handle converts into UTF8.
133

  
134
=back
135

  
136
=head1 MODULE AUTHORS
137

  
138
Moritz Bunkus E<lt>m.bunkus@linet-services.deE<gt>
139

  
140
L<http://linet-services.de>

Auch abrufbar als: Unified diff