Projekt

Allgemein

Profil

« Zurück | Weiter » 

Revision af205393

Von Bernd Blessmann vor fast 11 Jahren hinzugefügt

  • ID af2053931fb484b246ba42f93df4889c4257ab42
  • Vorgänger c8473408
  • Nachfolger 4b84cde9

SL::Helper::Csv kann mit Multiplex-Daten (mehreren Profilen) umgehen.

Unterschiede anzeigen:

SL/Helper/Csv.pm
10 10
use Text::CSV_XS;
11 11
use Rose::Object::MakeMethods::Generic scalar => [ qw(
12 12
  file encoding sep_char quote_char escape_char header profile
13
  numberformat dateformat ignore_unknown_columns strict_profile _io _csv
14
  _objects _parsed _data _errors all_cvar_configs case_insensitive_header
13
  numberformat dateformat ignore_unknown_columns strict_profile is_multiplexed
14
  _io _csv _objects _parsed _data _errors all_cvar_configs case_insensitive_header
15 15
) ];
16 16

  
17 17
use SL::Helper::Csv::Dispatcher;
......
26 26
    quote_char             => { default => '"' },
27 27
    escape_char            => { default => '"' },
28 28
    header                 => { type    => ARRAYREF, optional => 1 },
29
    profile                => { type    => HASHREF,  optional => 1 },
29
    profile                => { type    => ARRAYREF, optional => 1 },
30 30
    file                   => 1,
31 31
    encoding               => 0,
32 32
    numberformat           => 0,
......
56 56
  my ($self, %params) = @_;
57 57

  
58 58
  $self->_open_file;
59
  return if ! $self->_check_multiplexed;
59 60
  return if ! $self->_check_header;
60 61
  return if ! $self->dispatcher->parse_profile;
61 62
  return if ! $self->_parse_data;
......
97 98
  return $self->_io;
98 99
}
99 100

  
101
# check, if data is multiplexed and if all nessesary infos are given
102
sub _check_multiplexed {
103
  my ($self, %params) = @_;
104

  
105
  $self->is_multiplexed(0);
106

  
107
  # If more than one profile is given, it is multiplexed.
108
  if ($self->profile) {
109
    my @profile = @{ $self->profile };
110
    if (scalar @profile > 1) {
111
      my $info_ok = 1;
112
      # Each profile needs a class and a row_ident
113
      foreach my $p (@profile) {
114
        if ( !defined $p->{class} || !defined $p->{row_ident} ) {
115
          $info_ok = 0;
116
          last;
117
        }
118
      }
119

  
120
      # If header is given, there need to be a header for each profile
121
      if ($info_ok && $self->header) {
122
        my @header = @{ $self->header };
123
        if (scalar @profile != scalar @header) {
124
          $info_ok = 0;
125
        }
126
      }
127
      $self->is_multiplexed($info_ok);
128
      return $info_ok;
129
    }
130
  }
131

  
132
  # ok, if not multiplexed
133
  return 1;
134
}
135

  
100 136
sub _check_header {
101 137
  my ($self, %params) = @_;
102
  my $header = $self->header;
138
  my $header;
103 139

  
104
  if (! $header) {
105
    $header = $self->_csv->getline($self->_io);
140
  $header = $self->header;
141
  if (!$header) {
142
    my $n_header = ($self->is_multiplexed)? scalar @{ $self->profile } : 1;
143
    foreach my $p_num (0..$n_header - 1) {
144
      my $h = $self->_csv->getline($self->_io);
106 145

  
107
    $self->_push_error([
108
      $self->_csv->error_input,
109
      $self->_csv->error_diag,
110
      0,
111
    ]) unless $header;
146
      $self->_push_error([
147
        $self->_csv->error_input,
148
        $self->_csv->error_diag,
149
        0,
150
      ]) unless $h;
151

  
152
      push @{ $header }, $h;
153
    }
112 154
  }
113 155

  
114 156
  # Special case: utf8 BOM.
115 157
  # certain software (namely MS Office and notepad.exe insist on prefixing
116 158
  # data with a discouraged but valid byte order mark
117 159
  # if not removed, the first header field will not be recognized
118
  if ($header && $header->[0] && $self->encoding =~ /utf-?8/i) {
119
    $header->[0] =~ s/^\x{FEFF}//;
160
  if ($header) {
161
    foreach my $h (@{ $header }) {
162
      if ($h && $h->[0] && $self->encoding =~ /utf-?8/i) {
163
        $h->[0] =~ s/^\x{FEFF}//;
164
      }
165
    }
120 166
  }
121 167

  
122
  return unless $header;
168
  # check, if all header fields are parsed well
169
  my $all_ok = 1;
170
  if ($header) {
171
    foreach my $h (@{ $header }) {
172
      if (!$h) {
173
        $all_ok = 0;
174
        last;
175
      }
176
    }
177
  } else {
178
    $all_ok = 0;
179
  }
180
  return unless $all_ok;
123 181

  
124 182
  # Special case: human stupidity
125 183
  # people insist that case sensitivity doesn't exist and try to enter all
......
145 203
  my ($self, %params) = @_;
146 204
  my (@data, @errors);
147 205

  
148
  $self->_csv->column_names(@{ $self->header });
149

  
150 206
  while (1) {
151 207
    my $row = $self->_csv->getline($self->_io);
152 208
    if ($row) {
209
      my $header = $self->_header_by_row($row);
210
      $self->_csv->column_names(@{ $header });
153 211
      my %hr;
154
      @hr{@{ $self->header }} = @$row;
212
      @hr{@{ $header }} = @$row;
155 213
      push @data, \%hr;
156 214
    } else {
157 215
      last if $self->_csv->eof;
......
178 236
  return ! @errors;
179 237
}
180 238

  
239
sub _header_by_row {
240
  my ($self, $row) = @_;
241

  
242
  my @header = @{ $self->header };
243
  if ($self->is_multiplexed) {
244
    my $i = 0;
245
    foreach my $profile (@{ $self->profile }) {
246
      if (@{ $row }[0] eq $profile->{row_ident}) {
247
        return $header[$i];
248
      }
249
      $i++;
250
    }
251
  } else {
252
    return $header[0];
253
  }
254
}
255

  
181 256
sub _encode_layer {
182 257
  ':encoding(' . $_[0]->encoding . ')';
183 258
}
......
243 318
    sep_char    => ',',     # default ';'
244 319
    quote_char  => '\'',    # default '"'
245 320
    escape_char => '"',     # default '"'
246
    header      => [qw(id text sellprice word)], # see later
247
    profile     => { profile => { sellprice => 'sellprice_as_number'}, class => SL::DB::Part },
321
    header      => [ [qw(id text sellprice word)] ], # see later
322
    profile     => [ { profile => { sellprice => 'sellprice_as_number'},
323
                       class   => 'SL::DB::Part' } ],
248 324
  );
249 325

  
250 326
  my $status  = $csv->parse;
......
285 361
happen here. You will receive a plain mapping of the data into the class tree,
286 362
nothing more.
287 363

  
364
=item Multiplex data
365

  
366
This module can handle multiplexed data of different class types. In that case
367
multiple profiles with classes and row identifiers must be given. Multiple
368
headers may also be given or read from csv data. Data must contain the row
369
identifier in the first column and it's field name must be 'datatype'.
370

  
288 371
=back
289 372

  
290 373
=head1 METHODS
......
338 421

  
339 422
Same as in L<Text::CSV>
340 423

  
341
=item C<header> \@FIELDS
424
=item C<header> \@HEADERS
425

  
426
If given, it contains an ARRAYREF for each different class type (i.e. one
427
ARRAYREF if the data is only of one class type). These ARRAYREFS are the header
428
fields which are an array of columns. In this case the first lines are not used
429
as a header. Empty header fields will be ignored in objects.
430

  
431
If not given, headers are taken from the first n lines of data, where n is the
432
number of different class types.
342 433

  
343
Can be an array of columns, in this case the first line is not used as a
344
header. Empty header fields will be ignored in objects.
434
Examples:
345 435

  
346
=item C<profile> {profile => \%ACCESSORS, class => class}
436
  classic data of one type:
437
  [ [ 'name', 'street', 'zipcode', 'city' ] ]
347 438

  
348
This is a HASHREF to hash which may contain the keys C<profile> and C<class>.
439
  multiplexed data with two different types
440
  [ [ 'ordernumber', 'customer', 'transdate' ], [ 'partnumber', 'qty', 'sellprice' ] ]
441

  
442
=item C<profile> [{profile => \%ACCESSORS, class => class, row_ident => ri},]
443

  
444
This is an ARRAYREF to HASHREFs which may contain the keys C<profile>, C<class>
445
and C<row_ident>.
349 446

  
350 447
The C<profile> is a HASHREF which may be used to map header fields to custom
351 448
accessors. Example:
352 449

  
353
  {profile => { listprice => listprice_as_number }}
450
  [ {profile => { listprice => listprice_as_number }} ]
354 451

  
355 452
In this case C<listprice_as_number> will be used to read in values from the
356 453
C<listprice> column.
......
358 455
In case of a One-To-One relationsship these can also be set over
359 456
relationsships by sparating the steps with a dot (C<.>). This will work:
360 457

  
361
  {profile => { customer => 'customer.name' }}
458
  [ {profile => { customer => 'customer.name' }} ]
362 459

  
363 460
And will result in something like this:
364 461

  
......
374 471
If C<class> is present, the line will be handed to the new sub of this class,
375 472
and the return value used instead of the line itself.
376 473

  
474
C<row_ident> is a string to recognize the right profile and class for each data
475
line in multiplexed data.
476

  
477
In case of multiplexed data, C<class> and C<row_ident> must be given.
478
Example:
479
  [ {
480
      class     => 'SL::DB::Order',
481
      row_ident => 'O'
482
    },
483
    {
484
      class     => 'SL::DB::OrderItem',
485
      row_ident => 'I',
486
      profile   => {sellprice => sellprice_as_number}
487
    } ]
488

  
377 489
=item C<ignore_unknown_columns>
378 490

  
379 491
If set, the import will ignore unkown header columns. Useful for lazy imports,
......
433 545

  
434 546
 $csv = SL::Helper::Csv->new(
435 547
   file    => ...
436
   profile => {
548
   profile => [ {
437 549
     profile => [
438 550
       makemodel => {
439 551
         make_1  => make,
......
445 557
       },
446 558
     ],
447 559
     class   => SL::DB::Part,
448
   }
560
   } ]
449 561
 );
450 562

  
451 563
=head1 AUTHOR

Auch abrufbar als: Unified diff