kivitendo/SL/HTML/Util.pm @ 79b7fc43
792ae733 | Moritz Bunkus | package SL::HTML::Util;
|
||
use strict;
|
||||
use warnings;
|
||||
use HTML::Parser;
|
||||
my %stripper;
|
||||
5de9f9f8 | Moritz Bunkus | my %entities = (
|
||
'lt' => '<',
|
||||
'gt' => '>',
|
||||
'amp' => '&',
|
||||
);
|
||||
792ae733 | Moritz Bunkus | |||
sub strip {
|
||||
my ($class_or_value) = @_;
|
||||
my $value = !ref($class_or_value) && (($class_or_value // '') eq 'SL::HTML::Util') ? $_[1] : $class_or_value;
|
||||
06a0f32d | Sven Schöling | return '' unless defined $value;
|
||
# Remove HTML comments.
|
||||
$value =~ s{ <!-- .*? --> }{}gx;
|
||||
ad06ed73 | Sven Schöling | |||
792ae733 | Moritz Bunkus | if (!%stripper) {
|
||
%stripper = ( parser => HTML::Parser->new );
|
||||
$stripper{parser}->handler(text => sub { $stripper{text} .= $_[1]; });
|
||||
}
|
||||
$stripper{text} = '';
|
||||
$stripper{parser}->parse($value);
|
||||
$stripper{parser}->eof;
|
||||
e09fbb11 | Moritz Bunkus | $stripper{text} =~ s{\&([^;]+);}{ $entities{$1} || "\&$1;" }eg;
|
||
5de9f9f8 | Moritz Bunkus | |||
792ae733 | Moritz Bunkus | return delete $stripper{text};
|
||
}
|
||||
1;
|
||||
__END__
|
||||
=pod
|
||||
=encoding utf8
|
||||
=head1 NAME
|
||||
SL::HTML::Util - Utility functions dealing with HTML
|
||||
=head1 SYNOPSIS
|
||||
my $plain_text = SL::HTML::Util->strip('<h1>Hello World</h1>');
|
||||
=head1 FUNCTIONS
|
||||
=over 4
|
||||
=item C<strip $html_content>
|
||||
Removes all HTML elements and tags from C<$html_content> and returns
|
||||
the remaining plain text.
|
||||
=back
|
||||
=head1 BUGS
|
||||
Nothing here yet.
|
||||
=head1 AUTHOR
|
||||
Moritz Bunkus E<lt>m.bunkus@linet-services.deE<gt>
|
||||
=cut
|