Projekt

Allgemein

Profil

Herunterladen (2,18 KB) Statistiken
| Zweig: | Markierung: | Revision:
792ae733 Moritz Bunkus
package SL::HTML::Util;

use strict;
use warnings;

use HTML::Parser;

my %stripper;
5de9f9f8 Moritz Bunkus
my %entities = (
0c1305a9 Bernd Bleßmann
'lt' => '<',
'gt' => '>',
'amp' => '&',
'nbsp' => ' ', # should be => "\x{00A0}", but this can lead to problems with
# a non-visible character in csv-exports for example
5de9f9f8 Moritz Bunkus
);
792ae733 Moritz Bunkus
sub strip {
my ($class_or_value) = @_;

my $value = !ref($class_or_value) && (($class_or_value // '') eq 'SL::HTML::Util') ? $_[1] : $class_or_value;

06a0f32d Sven Schöling
return '' unless defined $value;

# Remove HTML comments.
$value =~ s{ <!-- .*? --> }{}gx;
ad06ed73 Sven Schöling
792ae733 Moritz Bunkus
if (!%stripper) {
%stripper = ( parser => HTML::Parser->new );

f85be296 Moritz Bunkus
$stripper{parser}->handler(text => sub { $stripper{text} .= ' ' . $_[1]; });
792ae733 Moritz Bunkus
}

$stripper{text} = '';
$stripper{parser}->parse($value);
$stripper{parser}->eof;

e09fbb11 Moritz Bunkus
$stripper{text} =~ s{\&([^;]+);}{ $entities{$1} || "\&$1;" }eg;
f85be296 Moritz Bunkus
$stripper{text} =~ s{^ +| +$}{}g;
$stripper{text} =~ s{ {2,}}{ }g;
5de9f9f8 Moritz Bunkus
792ae733 Moritz Bunkus
return delete $stripper{text};
}

1f8b0477 Moritz Bunkus
sub plain_text_to_html {
my ($class_or_text) = @_;

my $text = !ref($class_or_text) && (($class_or_text // '') eq 'SL::HTML::Util') ? $_[1] : $class_or_text;

return $text if $text =~ m{^<p>.*</p>$};

$text =~ s{\r+}{}g;
$text =~ s{^[[:space:]]+|[[:space:]]+$}{}g;

return '' if $text eq '';

my @paragraphs;

foreach my $paragraph (split m{\n{2,}}, $text) {
no warnings 'once';
$paragraph = $::locale->quote_special_chars('HTML', $paragraph);
$paragraph =~ s{\n}{<br>}g;

push @paragraphs, $paragraph;
}

return '<p>' . join('</p><p>', @paragraphs) . '</p>';
}

792ae733 Moritz Bunkus
1;
__END__

=pod

=encoding utf8

=head1 NAME

SL::HTML::Util - Utility functions dealing with HTML

=head1 SYNOPSIS

my $plain_text = SL::HTML::Util->strip('<h1>Hello World</h1>');

=head1 FUNCTIONS

=over 4

=item C<strip $html_content>

Removes all HTML elements and tags from C<$html_content> and returns
the remaining plain text.

1f8b0477 Moritz Bunkus
=item C<plain_text_to_html $text>

Converts a plain text to HTML: paragraphs will be recognized by empty
lines; remaining newlines will be converted into forced line breaks;
the rest will be HTML escaped.

792ae733 Moritz Bunkus
=back

=head1 BUGS

Nothing here yet.

=head1 AUTHOR

Moritz Bunkus E<lt>m.bunkus@linet-services.deE<gt>

=cut