kivitendo/SL/Helper/Csv.pm @ 2f6ebd89
2f6ebd89 | Sven Schöling | package SL::Helper::Csv;
|
|
use strict;
|
|||
use warnings;
|
|||
use Carp;
|
|||
use IO::File;
|
|||
use Text::CSV;
|
|||
use Params::Validate qw(:all);
|
|||
use Rose::Object::MakeMethods::Generic scalar => [ qw(
|
|||
file encoding sep_char quote_char header header_acc class numberformat
|
|||
dateformat _io _csv _objects _parsed _data
|
|||
) ];
|
|||
# public interface
|
|||
sub new {
|
|||
my $class = shift;
|
|||
my %params = validate(@_, {
|
|||
sep_char => { default => ';' },
|
|||
quote_char => { default => '"' },
|
|||
header => { type => ARRAYREF, optional => 1 },
|
|||
header_acc => { type => HASHREF, optional => 1 },
|
|||
file => 1,
|
|||
encoding => 0,
|
|||
class => 0,
|
|||
numberformat => 0,
|
|||
dateformat => 0,
|
|||
});
|
|||
my $self = bless {}, $class;
|
|||
$self->$_($params{$_}) for keys %params;
|
|||
$self->_io(IO::File->new);
|
|||
$self->_csv(Text::CSV->new({
|
|||
binary => 1,
|
|||
sep_char => $self->sep_char,
|
|||
quote_char => $self->quote_char,
|
|||
}));
|
|||
return $self;
|
|||
}
|
|||
sub parse {
|
|||
my ($self, %params) = @_;
|
|||
$self->_open_file;
|
|||
return unless $self->_check_header;
|
|||
return unless $self->_parse_data;
|
|||
$self->_parsed(1);
|
|||
return $self;
|
|||
}
|
|||
sub get_data {
|
|||
$_[0]->_data;
|
|||
}
|
|||
sub get_objects {
|
|||
my ($self, %params) = @_;
|
|||
croak 'no class given' unless $self->class;
|
|||
croak 'must parse first' unless $self->_parsed;
|
|||
$self->_make_objects unless $self->_objects;
|
|||
return wantarray ? @{ $self->_objects } : $self->_objects;
|
|||
}
|
|||
# private stuff
|
|||
sub _open_file {
|
|||
my ($self, %params) = @_;
|
|||
$self->encoding($self->_guess_encoding) if !$self->encoding;
|
|||
$self->_io->open($self->file, '<' . $self->_encode_layer)
|
|||
or die "could not open file " . $self->file;
|
|||
return $self->_io;
|
|||
}
|
|||
sub _check_header {
|
|||
my ($self, %params) = @_;
|
|||
return $self->header if $self->header;
|
|||
my $header = $self->_csv->getline($self->_io);
|
|||
$self->header($header);
|
|||
}
|
|||
sub _parse_data {
|
|||
my ($self, %params) = @_;
|
|||
my @data;
|
|||
$self->_csv->column_names(@{ $self->header });
|
|||
push @data, $self->_csv->getline_hr($self->_io)
|
|||
while !$self->_csv->eof;
|
|||
$self->_data(\@data);
|
|||
}
|
|||
sub _encode_layer {
|
|||
':encoding(' . $_[0]->encoding . ')';
|
|||
}
|
|||
sub _make_objects {
|
|||
my ($self, %params) = @_;
|
|||
my @objs;
|
|||
eval "require " . $self->class;
|
|||
local $::myconfig{numberformat} = $self->numberformat if $self->numberformat;
|
|||
local $::myconfig{dateformat} = $self->dateformat if $self->dateformat;
|
|||
for my $line (@{ $self->_data }) {
|
|||
push @objs, $self->class->new(
|
|||
map {
|
|||
($self->header_acc && $self->header_acc->{$_}) || $_ => $line->{$_}
|
|||
} grep { $_ } keys %$line
|
|||
);
|
|||
}
|
|||
$self->_objects(\@objs);
|
|||
}
|
|||
sub _guess_encoding {
|
|||
# won't fix
|
|||
'utf-8';
|
|||
}
|
|||
1;
|
|||
__END__
|
|||
=head1 NAME
|
|||
SL::Helper::Csv - take care of csv file uploads
|
|||
=head1 SYNOPSIS
|
|||
use SL::Helper::Csv;
|
|||
my $csv = SL::Helper::Csv->new(
|
|||
file => \$::form->{upload_file},
|
|||
encoding => 'utf-8', # undef means utf8
|
|||
sep_char => ',', # default ';'
|
|||
quote_char => ''', # default '"'
|
|||
header => [qw(id text sellprice word)] # see later
|
|||
header_acc => { sellprice => 'sellprice_as_number' }
|
|||
class => 'SL::DB::CsvLine', # if present, map lines to this
|
|||
)
|
|||
my $status = $csv->parse;
|
|||
my @hrefs = $csv->get_data;
|
|||
my @objects = $scv->get_objects;
|
|||
=head1 DESCRIPTION
|
|||
See Synopsis.
|
|||
Text::CSV offeres already good functions to get lines out of a csv file, but in
|
|||
most cases you will want those line to be parsed into hashes or even objects,
|
|||
so this model just skips ahead and gives you objects.
|
|||
Encoding autodetection is not easy, and should not be trusted. Try to avoid it if possible.
|
|||
=head1 METHODS
|
|||
=over 4
|
|||
=item C<new> PARAMS
|
|||
Standard constructor. You can use this to set most of the data.
|
|||
=item C<parse>
|
|||
Do the actual work. Will return true ($self actually) if success, undef if not.
|
|||
=item C<get_objects>
|
|||
Parse the data into objects and return those.
|
|||
=item C<get_data>
|
|||
Returns an arrayref of the raw lines as hashrefs.
|
|||
=item C<file>
|
|||
The file which contents are to be read. Can be a name of a physical file or a
|
|||
scalar ref for memory data.
|
|||
=item C<encoding>
|
|||
Encoding of the CSV file. Note that this module does not do any encoding guessing.
|
|||
Know what your data ist. Defaults to utf-8.
|
|||
=item C<sep_char>
|
|||
=item C<quote_char>
|
|||
Same as in L<Text::CSV>
|
|||
=item C<header> \@FIELDS
|
|||
can be an array of columns, in this case the first line is not used as a
|
|||
header. Empty header fields will be ignored in objects.
|
|||
=item C<header_acc> \%ACCESSORS
|
|||
May be used to map header fields to custom accessors. Example:
|
|||
{ listprice => listprice_as_number }
|
|||
In this case C<listprice_as_number> will be used to read in values from the
|
|||
C<listprice> column.
|
|||
=item C<class>
|
|||
If present, the line will be handed to the new sub of this class,
|
|||
and the return value used instead of the line itself.
|
|||
=back
|
|||
=head1 BUGS
|
|||
=head1 AUTHOR
|
|||
=cut
|