Projekt

Allgemein

Profil

« Zurück | Weiter » 

Revision dc3f6120

Von Sven Schöling vor mehr als 12 Jahren hinzugefügt

  • ID dc3f6120f9bbacaa028e554d7fa71e481d4497b4
  • Vorgänger c3f94f18
  • Nachfolger 4ab897fd

Parsing von multipart/formdata beschleuningt.

Die entsprechende Routine hatte einen bösen Fall von Shlemiel the Painter's
algorithm [1]. Dadurch wurden Fileuploads mit mehr als 20k Zeilen extrem
langsam. Binärdaten wie pdfs oder Bilder hat das nicht gestört, aber bei CSV
Imports hat eine 80k Zeilen Datei dann auch mal 2-5min gebraucht, nur um den
Request zu parsen.

Jetzt werden nur die Indizes geparst und hinterher direkt aus dem Request der
substr gezogen. Ausserdem endlich einen Testfall dafür eingebaut.

[1] http://en.wikipedia.org/wiki/Schlemiel_the_Painter%27s_algorithm

Unterschiede anzeigen:

SL/Request.pm
53 53
sub _parse_multipart_formdata {
54 54
  my ($target, $temp_target, $input) = @_;
55 55
  my ($name, $filename, $headers_done, $content_type, $boundary_found, $need_cr, $previous, $p_attachment, $encoding, $transfer_encoding);
56
  my $data_start = 0;
57

  
58
  # teach substr and length to use good ol' bytes, not 'em fancy characters
59
  use bytes;
56 60

  
57 61
  # We SHOULD honor encodings and transfer-encodings here, but as hard as I
58 62
  # looked I couldn't find a reasonably recent webbrowser that makes use of
......
63 67
  $ENV{'CONTENT_TYPE'} =~ /multipart\/form-data\s*;\s*boundary\s*=\s*(.+)$/;
64 68
  my $boundary = '--' . $1;
65 69

  
70
  my $index = 0;
71
  my $line_length;
66 72
  foreach my $line (split m/\n/, $input) {
67
    last if (($line eq "${boundary}--") || ($line eq "${boundary}--\r"));
73
    $line_length = length $line;
74

  
75
    if ($line =~ /^\Q$boundary\E(--)?\r?$/) {
76
      my $last_boundary = $1;
77
      my $data       =  substr $input, $data_start, $index - $data_start;
78
      $data =~ s/\r?\n$//;
68 79

  
69
    if (($line eq $boundary) || ($line eq "$boundary\r")) {
70
      ${ $previous } =~ s|\r?\n$|| if $previous;
71
      ${ $previous } =  Encode::decode($encoding, $$previous) if $previous && !$filename && !$transfer_encoding eq 'binary';
80
      if ($previous && !$filename && $transfer_encoding && $transfer_encoding ne 'binary') {
81
        ${ $previous } = Encode::decode($encoding, $data);
82
      } else {
83
        ${ $previous } = $data;
84
      }
72 85

  
73 86
      undef $previous;
74 87
      undef $filename;
......
79 92
      $need_cr        = 0;
80 93
      $encoding       = $::lx_office_conf{system}->{dbcharset} || Common::DEFAULT_CHARSET;
81 94
      $transfer_encoding = undef;
82

  
95
      last if $last_boundary;
83 96
      next;
84 97
    }
85 98

  
......
90 103

  
91 104
      if (!$line) {
92 105
        $headers_done = 1;
106
        $data_start = $index + $line_length + 1;
93 107
        next;
94 108
      }
95 109

  
......
159 173

  
160 174
    next unless $previous;
161 175

  
162
    ${ $previous } .= "${line}\n";
176
  } continue {
177
    $index += $line_length + 1;
163 178
  }
164 179

  
165
  ${ $previous } =~ s|\r?\n$|| if $previous;
166

  
167 180
  $::lxdebug->leave_sub(2);
168 181
}
169 182

  
t/request/post_multipart.t
1
use strict;
2
use utf8;
3

  
4
use lib 't';
5
use lib 'modules/fallback';
6
BEGIN {
7
  unshift @INC, 'modules/override';
8
}
9

  
10
use Support::TestSetup;
11
use Test::More tests => 2;
12
use Data::Dumper;
13
require Test::Deep;
14
use Encode;
15

  
16
use SL::Request;
17

  
18
Support::TestSetup::login();
19

  
20
open my $fh, '<', 't/request/post_multipart_1' or die "can't load test";
21
my $data = do { $/ = undef; <$fh> };
22

  
23
my $t = {};
24
my $tt = {};
25

  
26
local $ENV{CONTENT_TYPE} = 'multipart/form-data; boundary=---------------------------23281168279961';
27
SL::Request::_parse_multipart_formdata($t, $tt, $data);
28

  
29

  
30
my $blob = Encode::encode('utf-8', qq|\x{feff}Stunde;Montag;Dienstag;Mittwoch;Donnerstag;Freitag
31
1;Mathe;Deutsch;Englisch;Mathe;Kunst
32
2;Sport;Französisch;Geschichte;Sport;Geschichte
33
3;Sport;"Religion ev;kath";Kunst;;Kunst|);
34

  
35
my $t_cmp = {
36
          'profile' => {
37
                       'name' => undef,
38
                       'type' => undef
39
                     },
40
          'quote_char' => undef,
41
          'file' => $blob,
42
          'custom_sep_char' => undef,
43
          'sep_char' => undef,
44
          'settings' => {
45
                        'article_number_policy' => undef,
46
                        'sellprice_places' => undef,
47
                        'charset' => undef,
48
                        'apply_buchungsgruppe' => undef,
49
                        'full_preview' => undef,
50
                        'parts_type' => undef,
51
                        'default_unit' => undef,
52
                        'default_buchungsgruppe' => undef,
53
                        'duplicates' => undef,
54
                        'numberformat' => undef,
55
                        'sellprice_adjustment_type' => undef,
56
                        'shoparticle_if_missing' => undef,
57
                        'sellprice_adjustment' => undef
58
                      },
59
          'custom_escape_char' => undef,
60
          'action_test' => undef,
61
          'custom_quote_char' => undef,
62
          'escape_char' => undef,
63
          'action' => undef
64
        };
65
$t_cmp->{ATTACHMENTS}{file}{data} =  \$t_cmp->{'file'};
66

  
67

  
68
is_deeply $t, $t_cmp;
69

  
70
is_deeply $tt,
71
        {
72
          'profile' => {
73
                       'name' => '',
74
                       'type' =>'parts',
75
                     },
76
          'file' => undef,
77
          'quote_char' => 'quote',
78
          'custom_sep_char' => '',
79
          'sep_char' => 'semicolon',
80
          'settings' => {
81
                        'article_number_policy' => 'update_prices',
82
                        'sellprice_places' => 2,
83
                        'charset' => 'UTF-8',
84
                        'apply_buchungsgruppe' => 'all',
85
                        'full_preview' => '0',
86
                        'parts_type' => 'part',
87
                        'default_unit' => 'g',
88
                        'default_buchungsgruppe' => '815',
89
                        'duplicates' => 'no_check',
90
                        'numberformat' => '1.000,00',
91
                        'sellprice_adjustment_type' => 'percent',
92
                        'shoparticle_if_missing' => '0',
93
                        'sellprice_adjustment' =>'0'
94
                      },
95
          'custom_escape_char' => '',
96
          'action_test' => 'Test und Vorschau',
97
          'ATTACHMENTS' => {
98
                           'file' => {
99
                                     'filename' => 'from_wikipedia.csv'
100
                                   }
101
                         },
102
          'custom_quote_char' => '',
103
          'escape_char' => 'quote',
104
          'action' => 'CsvImport/dispatch',
105
          'FILENAME' => 'from_wikipedia.csv'
106
        };
107

  
t/request/post_multipart_1
1
-----------------------------23281168279961
2
Content-Disposition: form-data; name="action"
3

  
4
CsvImport/dispatch
5
-----------------------------23281168279961
6
Content-Disposition: form-data; name="profile.type"
7

  
8
parts
9
-----------------------------23281168279961
10
Content-Disposition: form-data; name="profile.name"
11

  
12

  
13
-----------------------------23281168279961
14
Content-Disposition: form-data; name="settings.numberformat"
15

  
16
1.000,00
17
-----------------------------23281168279961
18
Content-Disposition: form-data; name="settings.charset"
19

  
20
UTF-8
21
-----------------------------23281168279961
22
Content-Disposition: form-data; name="sep_char"
23

  
24
semicolon
25
-----------------------------23281168279961
26
Content-Disposition: form-data; name="custom_sep_char"
27

  
28

  
29
-----------------------------23281168279961
30
Content-Disposition: form-data; name="quote_char"
31

  
32
quote
33
-----------------------------23281168279961
34
Content-Disposition: form-data; name="custom_quote_char"
35

  
36

  
37
-----------------------------23281168279961
38
Content-Disposition: form-data; name="escape_char"
39

  
40
quote
41
-----------------------------23281168279961
42
Content-Disposition: form-data; name="custom_escape_char"
43

  
44

  
45
-----------------------------23281168279961
46
Content-Disposition: form-data; name="settings.duplicates"
47

  
48
no_check
49
-----------------------------23281168279961
50
Content-Disposition: form-data; name="settings.article_number_policy"
51

  
52
update_prices
53
-----------------------------23281168279961
54
Content-Disposition: form-data; name="settings.sellprice_places"
55

  
56
2
57
-----------------------------23281168279961
58
Content-Disposition: form-data; name="settings.sellprice_adjustment"
59

  
60
0
61
-----------------------------23281168279961
62
Content-Disposition: form-data; name="settings.sellprice_adjustment_type"
63

  
64
percent
65
-----------------------------23281168279961
66
Content-Disposition: form-data; name="settings.shoparticle_if_missing"
67

  
68
0
69
-----------------------------23281168279961
70
Content-Disposition: form-data; name="settings.parts_type"
71

  
72
part
73
-----------------------------23281168279961
74
Content-Disposition: form-data; name="settings.default_buchungsgruppe"
75

  
76
815
77
-----------------------------23281168279961
78
Content-Disposition: form-data; name="settings.apply_buchungsgruppe"
79

  
80
all
81
-----------------------------23281168279961
82
Content-Disposition: form-data; name="settings.default_unit"
83

  
84
g
85
-----------------------------23281168279961
86
Content-Disposition: form-data; name="settings.full_preview"
87

  
88
0
89
-----------------------------23281168279961
90
Content-Disposition: form-data; name="file"; filename="from_wikipedia.csv"
91
Content-Type: text/comma-separated-values
92

  
93
Stunde;Montag;Dienstag;Mittwoch;Donnerstag;Freitag
94
1;Mathe;Deutsch;Englisch;Mathe;Kunst
95
2;Sport;Französisch;Geschichte;Sport;Geschichte
96
3;Sport;"Religion ev;kath";Kunst;;Kunst
97
-----------------------------23281168279961
98
Content-Disposition: form-data; name="action_test"
99

  
100
Test und Vorschau
101
-----------------------------23281168279961--

Auch abrufbar als: Unified diff