Revision dc3f6120
Von Sven Schöling vor mehr als 12 Jahren hinzugefügt
SL/Request.pm | ||
---|---|---|
53 | 53 |
sub _parse_multipart_formdata { |
54 | 54 |
my ($target, $temp_target, $input) = @_; |
55 | 55 |
my ($name, $filename, $headers_done, $content_type, $boundary_found, $need_cr, $previous, $p_attachment, $encoding, $transfer_encoding); |
56 |
my $data_start = 0; |
|
57 |
|
|
58 |
# teach substr and length to use good ol' bytes, not 'em fancy characters |
|
59 |
use bytes; |
|
56 | 60 |
|
57 | 61 |
# We SHOULD honor encodings and transfer-encodings here, but as hard as I |
58 | 62 |
# looked I couldn't find a reasonably recent webbrowser that makes use of |
... | ... | |
63 | 67 |
$ENV{'CONTENT_TYPE'} =~ /multipart\/form-data\s*;\s*boundary\s*=\s*(.+)$/; |
64 | 68 |
my $boundary = '--' . $1; |
65 | 69 |
|
70 |
my $index = 0; |
|
71 |
my $line_length; |
|
66 | 72 |
foreach my $line (split m/\n/, $input) { |
67 |
last if (($line eq "${boundary}--") || ($line eq "${boundary}--\r")); |
|
73 |
$line_length = length $line; |
|
74 |
|
|
75 |
if ($line =~ /^\Q$boundary\E(--)?\r?$/) { |
|
76 |
my $last_boundary = $1; |
|
77 |
my $data = substr $input, $data_start, $index - $data_start; |
|
78 |
$data =~ s/\r?\n$//; |
|
68 | 79 |
|
69 |
if (($line eq $boundary) || ($line eq "$boundary\r")) { |
|
70 |
${ $previous } =~ s|\r?\n$|| if $previous; |
|
71 |
${ $previous } = Encode::decode($encoding, $$previous) if $previous && !$filename && !$transfer_encoding eq 'binary'; |
|
80 |
if ($previous && !$filename && $transfer_encoding && $transfer_encoding ne 'binary') { |
|
81 |
${ $previous } = Encode::decode($encoding, $data); |
|
82 |
} else { |
|
83 |
${ $previous } = $data; |
|
84 |
} |
|
72 | 85 |
|
73 | 86 |
undef $previous; |
74 | 87 |
undef $filename; |
... | ... | |
79 | 92 |
$need_cr = 0; |
80 | 93 |
$encoding = $::lx_office_conf{system}->{dbcharset} || Common::DEFAULT_CHARSET; |
81 | 94 |
$transfer_encoding = undef; |
82 |
|
|
95 |
last if $last_boundary; |
|
83 | 96 |
next; |
84 | 97 |
} |
85 | 98 |
|
... | ... | |
90 | 103 |
|
91 | 104 |
if (!$line) { |
92 | 105 |
$headers_done = 1; |
106 |
$data_start = $index + $line_length + 1; |
|
93 | 107 |
next; |
94 | 108 |
} |
95 | 109 |
|
... | ... | |
159 | 173 |
|
160 | 174 |
next unless $previous; |
161 | 175 |
|
162 |
${ $previous } .= "${line}\n"; |
|
176 |
} continue { |
|
177 |
$index += $line_length + 1; |
|
163 | 178 |
} |
164 | 179 |
|
165 |
${ $previous } =~ s|\r?\n$|| if $previous; |
|
166 |
|
|
167 | 180 |
$::lxdebug->leave_sub(2); |
168 | 181 |
} |
169 | 182 |
|
Auch abrufbar als: Unified diff
Parsing von multipart/formdata beschleuningt.
Die entsprechende Routine hatte einen bösen Fall von Shlemiel the Painter's
algorithm [1]. Dadurch wurden Fileuploads mit mehr als 20k Zeilen extrem
langsam. Binärdaten wie pdfs oder Bilder hat das nicht gestört, aber bei CSV
Imports hat eine 80k Zeilen Datei dann auch mal 2-5min gebraucht, nur um den
Request zu parsen.
Jetzt werden nur die Indizes geparst und hinterher direkt aus dem Request der
substr gezogen. Ausserdem endlich einen Testfall dafür eingebaut.
[1] http://en.wikipedia.org/wiki/Schlemiel_the_Painter%27s_algorithm