Revision af205393
Von Bernd Blessmann vor fast 11 Jahren hinzugefügt
SL/Helper/Csv.pm | ||
---|---|---|
10 | 10 |
use Text::CSV_XS; |
11 | 11 |
use Rose::Object::MakeMethods::Generic scalar => [ qw( |
12 | 12 |
file encoding sep_char quote_char escape_char header profile |
13 |
numberformat dateformat ignore_unknown_columns strict_profile _io _csv
|
|
14 |
_objects _parsed _data _errors all_cvar_configs case_insensitive_header |
|
13 |
numberformat dateformat ignore_unknown_columns strict_profile is_multiplexed
|
|
14 |
_io _csv _objects _parsed _data _errors all_cvar_configs case_insensitive_header
|
|
15 | 15 |
) ]; |
16 | 16 |
|
17 | 17 |
use SL::Helper::Csv::Dispatcher; |
... | ... | |
26 | 26 |
quote_char => { default => '"' }, |
27 | 27 |
escape_char => { default => '"' }, |
28 | 28 |
header => { type => ARRAYREF, optional => 1 }, |
29 |
profile => { type => HASHREF, optional => 1 },
|
|
29 |
profile => { type => ARRAYREF, optional => 1 },
|
|
30 | 30 |
file => 1, |
31 | 31 |
encoding => 0, |
32 | 32 |
numberformat => 0, |
... | ... | |
56 | 56 |
my ($self, %params) = @_; |
57 | 57 |
|
58 | 58 |
$self->_open_file; |
59 |
return if ! $self->_check_multiplexed; |
|
59 | 60 |
return if ! $self->_check_header; |
60 | 61 |
return if ! $self->dispatcher->parse_profile; |
61 | 62 |
return if ! $self->_parse_data; |
... | ... | |
97 | 98 |
return $self->_io; |
98 | 99 |
} |
99 | 100 |
|
101 |
# check, if data is multiplexed and if all nessesary infos are given |
|
102 |
sub _check_multiplexed { |
|
103 |
my ($self, %params) = @_; |
|
104 |
|
|
105 |
$self->is_multiplexed(0); |
|
106 |
|
|
107 |
# If more than one profile is given, it is multiplexed. |
|
108 |
if ($self->profile) { |
|
109 |
my @profile = @{ $self->profile }; |
|
110 |
if (scalar @profile > 1) { |
|
111 |
my $info_ok = 1; |
|
112 |
# Each profile needs a class and a row_ident |
|
113 |
foreach my $p (@profile) { |
|
114 |
if ( !defined $p->{class} || !defined $p->{row_ident} ) { |
|
115 |
$info_ok = 0; |
|
116 |
last; |
|
117 |
} |
|
118 |
} |
|
119 |
|
|
120 |
# If header is given, there need to be a header for each profile |
|
121 |
if ($info_ok && $self->header) { |
|
122 |
my @header = @{ $self->header }; |
|
123 |
if (scalar @profile != scalar @header) { |
|
124 |
$info_ok = 0; |
|
125 |
} |
|
126 |
} |
|
127 |
$self->is_multiplexed($info_ok); |
|
128 |
return $info_ok; |
|
129 |
} |
|
130 |
} |
|
131 |
|
|
132 |
# ok, if not multiplexed |
|
133 |
return 1; |
|
134 |
} |
|
135 |
|
|
100 | 136 |
sub _check_header { |
101 | 137 |
my ($self, %params) = @_; |
102 |
my $header = $self->header;
|
|
138 |
my $header; |
|
103 | 139 |
|
104 |
if (! $header) { |
|
105 |
$header = $self->_csv->getline($self->_io); |
|
140 |
$header = $self->header; |
|
141 |
if (!$header) { |
|
142 |
my $n_header = ($self->is_multiplexed)? scalar @{ $self->profile } : 1; |
|
143 |
foreach my $p_num (0..$n_header - 1) { |
|
144 |
my $h = $self->_csv->getline($self->_io); |
|
106 | 145 |
|
107 |
$self->_push_error([ |
|
108 |
$self->_csv->error_input, |
|
109 |
$self->_csv->error_diag, |
|
110 |
0, |
|
111 |
]) unless $header; |
|
146 |
$self->_push_error([ |
|
147 |
$self->_csv->error_input, |
|
148 |
$self->_csv->error_diag, |
|
149 |
0, |
|
150 |
]) unless $h; |
|
151 |
|
|
152 |
push @{ $header }, $h; |
|
153 |
} |
|
112 | 154 |
} |
113 | 155 |
|
114 | 156 |
# Special case: utf8 BOM. |
115 | 157 |
# certain software (namely MS Office and notepad.exe insist on prefixing |
116 | 158 |
# data with a discouraged but valid byte order mark |
117 | 159 |
# if not removed, the first header field will not be recognized |
118 |
if ($header && $header->[0] && $self->encoding =~ /utf-?8/i) { |
|
119 |
$header->[0] =~ s/^\x{FEFF}//; |
|
160 |
if ($header) { |
|
161 |
foreach my $h (@{ $header }) { |
|
162 |
if ($h && $h->[0] && $self->encoding =~ /utf-?8/i) { |
|
163 |
$h->[0] =~ s/^\x{FEFF}//; |
|
164 |
} |
|
165 |
} |
|
120 | 166 |
} |
121 | 167 |
|
122 |
return unless $header; |
|
168 |
# check, if all header fields are parsed well |
|
169 |
my $all_ok = 1; |
|
170 |
if ($header) { |
|
171 |
foreach my $h (@{ $header }) { |
|
172 |
if (!$h) { |
|
173 |
$all_ok = 0; |
|
174 |
last; |
|
175 |
} |
|
176 |
} |
|
177 |
} else { |
|
178 |
$all_ok = 0; |
|
179 |
} |
|
180 |
return unless $all_ok; |
|
123 | 181 |
|
124 | 182 |
# Special case: human stupidity |
125 | 183 |
# people insist that case sensitivity doesn't exist and try to enter all |
... | ... | |
145 | 203 |
my ($self, %params) = @_; |
146 | 204 |
my (@data, @errors); |
147 | 205 |
|
148 |
$self->_csv->column_names(@{ $self->header }); |
|
149 |
|
|
150 | 206 |
while (1) { |
151 | 207 |
my $row = $self->_csv->getline($self->_io); |
152 | 208 |
if ($row) { |
209 |
my $header = $self->_header_by_row($row); |
|
210 |
$self->_csv->column_names(@{ $header }); |
|
153 | 211 |
my %hr; |
154 |
@hr{@{ $self->header }} = @$row;
|
|
212 |
@hr{@{ $header }} = @$row; |
|
155 | 213 |
push @data, \%hr; |
156 | 214 |
} else { |
157 | 215 |
last if $self->_csv->eof; |
... | ... | |
178 | 236 |
return ! @errors; |
179 | 237 |
} |
180 | 238 |
|
239 |
sub _header_by_row { |
|
240 |
my ($self, $row) = @_; |
|
241 |
|
|
242 |
my @header = @{ $self->header }; |
|
243 |
if ($self->is_multiplexed) { |
|
244 |
my $i = 0; |
|
245 |
foreach my $profile (@{ $self->profile }) { |
|
246 |
if (@{ $row }[0] eq $profile->{row_ident}) { |
|
247 |
return $header[$i]; |
|
248 |
} |
|
249 |
$i++; |
|
250 |
} |
|
251 |
} else { |
|
252 |
return $header[0]; |
|
253 |
} |
|
254 |
} |
|
255 |
|
|
181 | 256 |
sub _encode_layer { |
182 | 257 |
':encoding(' . $_[0]->encoding . ')'; |
183 | 258 |
} |
... | ... | |
243 | 318 |
sep_char => ',', # default ';' |
244 | 319 |
quote_char => '\'', # default '"' |
245 | 320 |
escape_char => '"', # default '"' |
246 |
header => [qw(id text sellprice word)], # see later |
|
247 |
profile => { profile => { sellprice => 'sellprice_as_number'}, class => SL::DB::Part }, |
|
321 |
header => [ [qw(id text sellprice word)] ], # see later |
|
322 |
profile => [ { profile => { sellprice => 'sellprice_as_number'}, |
|
323 |
class => 'SL::DB::Part' } ], |
|
248 | 324 |
); |
249 | 325 |
|
250 | 326 |
my $status = $csv->parse; |
... | ... | |
285 | 361 |
happen here. You will receive a plain mapping of the data into the class tree, |
286 | 362 |
nothing more. |
287 | 363 |
|
364 |
=item Multiplex data |
|
365 |
|
|
366 |
This module can handle multiplexed data of different class types. In that case |
|
367 |
multiple profiles with classes and row identifiers must be given. Multiple |
|
368 |
headers may also be given or read from csv data. Data must contain the row |
|
369 |
identifier in the first column and it's field name must be 'datatype'. |
|
370 |
|
|
288 | 371 |
=back |
289 | 372 |
|
290 | 373 |
=head1 METHODS |
... | ... | |
338 | 421 |
|
339 | 422 |
Same as in L<Text::CSV> |
340 | 423 |
|
341 |
=item C<header> \@FIELDS |
|
424 |
=item C<header> \@HEADERS |
|
425 |
|
|
426 |
If given, it contains an ARRAYREF for each different class type (i.e. one |
|
427 |
ARRAYREF if the data is only of one class type). These ARRAYREFS are the header |
|
428 |
fields which are an array of columns. In this case the first lines are not used |
|
429 |
as a header. Empty header fields will be ignored in objects. |
|
430 |
|
|
431 |
If not given, headers are taken from the first n lines of data, where n is the |
|
432 |
number of different class types. |
|
342 | 433 |
|
343 |
Can be an array of columns, in this case the first line is not used as a |
|
344 |
header. Empty header fields will be ignored in objects. |
|
434 |
Examples: |
|
345 | 435 |
|
346 |
=item C<profile> {profile => \%ACCESSORS, class => class} |
|
436 |
classic data of one type: |
|
437 |
[ [ 'name', 'street', 'zipcode', 'city' ] ] |
|
347 | 438 |
|
348 |
This is a HASHREF to hash which may contain the keys C<profile> and C<class>. |
|
439 |
multiplexed data with two different types |
|
440 |
[ [ 'ordernumber', 'customer', 'transdate' ], [ 'partnumber', 'qty', 'sellprice' ] ] |
|
441 |
|
|
442 |
=item C<profile> [{profile => \%ACCESSORS, class => class, row_ident => ri},] |
|
443 |
|
|
444 |
This is an ARRAYREF to HASHREFs which may contain the keys C<profile>, C<class> |
|
445 |
and C<row_ident>. |
|
349 | 446 |
|
350 | 447 |
The C<profile> is a HASHREF which may be used to map header fields to custom |
351 | 448 |
accessors. Example: |
352 | 449 |
|
353 |
{profile => { listprice => listprice_as_number }}
|
|
450 |
[ {profile => { listprice => listprice_as_number }} ]
|
|
354 | 451 |
|
355 | 452 |
In this case C<listprice_as_number> will be used to read in values from the |
356 | 453 |
C<listprice> column. |
... | ... | |
358 | 455 |
In case of a One-To-One relationsship these can also be set over |
359 | 456 |
relationsships by sparating the steps with a dot (C<.>). This will work: |
360 | 457 |
|
361 |
{profile => { customer => 'customer.name' }}
|
|
458 |
[ {profile => { customer => 'customer.name' }} ]
|
|
362 | 459 |
|
363 | 460 |
And will result in something like this: |
364 | 461 |
|
... | ... | |
374 | 471 |
If C<class> is present, the line will be handed to the new sub of this class, |
375 | 472 |
and the return value used instead of the line itself. |
376 | 473 |
|
474 |
C<row_ident> is a string to recognize the right profile and class for each data |
|
475 |
line in multiplexed data. |
|
476 |
|
|
477 |
In case of multiplexed data, C<class> and C<row_ident> must be given. |
|
478 |
Example: |
|
479 |
[ { |
|
480 |
class => 'SL::DB::Order', |
|
481 |
row_ident => 'O' |
|
482 |
}, |
|
483 |
{ |
|
484 |
class => 'SL::DB::OrderItem', |
|
485 |
row_ident => 'I', |
|
486 |
profile => {sellprice => sellprice_as_number} |
|
487 |
} ] |
|
488 |
|
|
377 | 489 |
=item C<ignore_unknown_columns> |
378 | 490 |
|
379 | 491 |
If set, the import will ignore unkown header columns. Useful for lazy imports, |
... | ... | |
433 | 545 |
|
434 | 546 |
$csv = SL::Helper::Csv->new( |
435 | 547 |
file => ... |
436 |
profile => { |
|
548 |
profile => [ {
|
|
437 | 549 |
profile => [ |
438 | 550 |
makemodel => { |
439 | 551 |
make_1 => make, |
... | ... | |
445 | 557 |
}, |
446 | 558 |
], |
447 | 559 |
class => SL::DB::Part, |
448 |
} |
|
560 |
} ]
|
|
449 | 561 |
); |
450 | 562 |
|
451 | 563 |
=head1 AUTHOR |
Auch abrufbar als: Unified diff
SL::Helper::Csv kann mit Multiplex-Daten (mehreren Profilen) umgehen.