Revision 6056e1d7
Von Sven Schöling vor fast 13 Jahren hinzugefügt
SL/Request.pm | ||
---|---|---|
49 | 49 |
$::lxdebug->leave_sub(2); |
50 | 50 |
} |
51 | 51 |
|
52 |
sub parse_multipart_formdata { |
|
53 |
my ($target, $input) = @_; |
|
54 |
my ($name, $filename, $headers_done, $content_type, $boundary_found, $need_cr, $previous); |
|
55 |
my $uploads = {}; |
|
52 |
sub _parse_multipart_formdata { |
|
53 |
my ($target, $temp_target, $input) = @_; |
|
54 |
my ($name, $filename, $headers_done, $content_type, $boundary_found, $need_cr, $previous, $encoding, $transfer_encoding); |
|
55 |
|
|
56 |
# We SHOULD honor encodings and transfer-encodings here, but as hard as I |
|
57 |
# looked I couldn't find a reasonably recent webbrowser that makes use of |
|
58 |
# these. Transfer encoding just eats up bandwidth... |
|
56 | 59 |
|
60 |
# so all I'm going to do is add a fail safe that if anyone ever encounters |
|
61 |
# this, it's going to croak so that debugging is easier |
|
62 |
$ENV{'CONTENT_TYPE'} =~ /multipart\/form-data\s*;\s*boundary\s*=\s*(.+)$/; |
|
57 | 63 |
my $boundary = '--' . $1; |
58 | 64 |
|
59 | 65 |
foreach my $line (split m/\n/, $input) { |
... | ... | |
61 | 67 |
|
62 | 68 |
if (($line eq $boundary) || ($line eq "$boundary\r")) { |
63 | 69 |
${ $previous } =~ s|\r?\n$|| if $previous; |
70 |
${ $previous } = Encode::decode($encoding, $$previous) if $previous && !$filename && !$transfer_encoding eq 'binary'; |
|
64 | 71 |
|
65 | 72 |
undef $previous; |
66 | 73 |
undef $filename; |
... | ... | |
69 | 76 |
$content_type = "text/plain"; |
70 | 77 |
$boundary_found = 1; |
71 | 78 |
$need_cr = 0; |
79 |
$encoding = $::lx_office_conf{system}->{dbcharset} || Common::DEFAULT_CHARSET; |
|
80 |
$transfer_encoding = undef; |
|
72 | 81 |
|
73 | 82 |
next; |
74 | 83 |
} |
... | ... | |
94 | 103 |
substr $line, $-[0], $+[0] - $-[0], ""; |
95 | 104 |
} |
96 | 105 |
|
97 |
$previous = _store_value($uploads, $name, '') if ($name);
|
|
98 |
$target->{FILENAME} = $filename if ($filename); |
|
106 |
$previous = _store_value($filename ? $target : $temp_target, $name, '') if ($name);
|
|
107 |
$temp_target->{FILENAME} = $filename if ($filename);
|
|
99 | 108 |
|
100 | 109 |
next; |
101 | 110 |
} |
102 | 111 |
|
103 |
if ($line =~ m|^content-type\s*:\s*(.*?)$|i) {
|
|
112 |
if ($line =~ m|^content-type\s*:\s*(.*?)[;\$]|i) {
|
|
104 | 113 |
$content_type = $1; |
114 |
|
|
115 |
if ($content_type =~ /^text/ && $line =~ m|;\s*charset\s*:\s*("?)(.*?)\1$|i) { |
|
116 |
$encoding = $2; |
|
117 |
} |
|
118 |
|
|
119 |
next; |
|
120 |
} |
|
121 |
|
|
122 |
if ($line =~ m|^content-transfer-encoding\s*=\s*(.*?)$|i) { |
|
123 |
$transfer_encoding = lc($1); |
|
124 |
if ($transfer_encoding && $transfer_encoding !~ /^[78]bit|binary$/) { |
|
125 |
die 'Transfer encodings beyond 7bit/8bit and binary are not implemented.'; |
|
126 |
} |
|
127 |
|
|
128 |
next; |
|
105 | 129 |
} |
106 | 130 |
|
107 | 131 |
next; |
... | ... | |
115 | 139 |
${ $previous } =~ s|\r?\n$|| if $previous; |
116 | 140 |
|
117 | 141 |
$::lxdebug->leave_sub(2); |
118 |
|
|
119 |
} |
|
120 |
|
|
121 |
sub _request_to_hash { |
|
122 |
$::lxdebug->enter_sub(2); |
|
123 |
|
|
124 |
my ($target, $input) = @_; |
|
125 |
my $uploads; |
|
126 |
|
|
127 |
if (!$ENV{'CONTENT_TYPE'} |
|
128 |
|| ($ENV{'CONTENT_TYPE'} !~ /multipart\/form-data\s*;\s*boundary\s*=\s*(.+)$/)) { |
|
129 |
|
|
130 |
$uploads = { }; |
|
131 |
_input_to_hash($target, $input); |
|
132 |
|
|
133 |
} else { |
|
134 |
$uploads = _parse_multipart_formdata($target, $input); |
|
135 |
} |
|
136 |
|
|
137 |
$main::lxdebug->leave_sub(2); |
|
138 |
return $uploads; |
|
139 | 142 |
} |
140 | 143 |
|
141 | 144 |
sub _recode_recursively { |
142 |
$main::lxdebug->enter_sub();
|
|
143 |
my ($iconv, $param) = @_;
|
|
145 |
$::lxdebug->enter_sub;
|
|
146 |
my ($iconv, $from, $to) = @_;
|
|
144 | 147 |
|
145 |
if (any { ref $param eq $_ } qw(Form HASH)) {
|
|
146 |
foreach my $key (keys %{ $param }) {
|
|
147 |
if (!ref $param->{$key}) {
|
|
148 |
# Workaround for a bug: converting $param->{$key} directly
|
|
148 |
if (any { ref $from eq $_ } qw(Form HASH)) {
|
|
149 |
for my $key (keys %{ $from }) {
|
|
150 |
if (!ref $from->{$key}) {
|
|
151 |
# Workaround for a bug: converting $from->{$key} directly
|
|
149 | 152 |
# leads to 'undef'. I don't know why. Converting a copy works, |
150 | 153 |
# though. |
151 |
$param->{$key} = $iconv->convert("" . $param->{$key});
|
|
154 |
$to->{$key} = $iconv->convert("" . $from->{$key});
|
|
152 | 155 |
} else { |
153 |
_recode_recursively($iconv, $param->{$key}); |
|
156 |
$to->{$key} = {} if 'HASH' eq ref $from->{$key}; |
|
157 |
$to->{$key} = [] if 'ARRAY' eq ref $from->{$key}; |
|
158 |
_recode_recursively($iconv, $from->{$key}, $to->{$key}); |
|
154 | 159 |
} |
155 | 160 |
} |
156 | 161 |
|
157 |
} elsif (ref $param eq 'ARRAY') {
|
|
158 |
foreach my $idx (0 .. scalar(@{ $param }) - 1) {
|
|
159 |
if (!ref $param->[$idx]) {
|
|
160 |
# Workaround for a bug: converting $param->[$idx] directly
|
|
162 |
} elsif (ref $from eq 'ARRAY') {
|
|
163 |
foreach my $idx (0 .. scalar(@{ $from }) - 1) {
|
|
164 |
if (!ref $from->[$idx]) {
|
|
165 |
# Workaround for a bug: converting $from->[$idx] directly
|
|
161 | 166 |
# leads to 'undef'. I don't know why. Converting a copy works, |
162 | 167 |
# though. |
163 |
$param->[$idx] = $iconv->convert("" . $param->[$idx]);
|
|
168 |
$from->[$idx] = $iconv->convert("" . $from->[$idx]);
|
|
164 | 169 |
} else { |
165 |
_recode_recursively($iconv, $param->[$idx]); |
|
170 |
$to->[$idx] = {} if 'HASH' eq ref $from->[$idx]; |
|
171 |
$to->[$idx] = [] if 'ARRAY' eq ref $from->[$idx]; |
|
172 |
_recode_recursively($iconv, $from->[$idx], $to->[$idx]); |
|
166 | 173 |
} |
167 | 174 |
} |
168 | 175 |
} |
... | ... | |
173 | 180 |
$::lxdebug->enter_sub; |
174 | 181 |
|
175 | 182 |
my ($target) = @_; |
183 |
my $db_charset = $::lx_office_conf{system}->{dbcharset} || Common::DEFAULT_CHARSET; |
|
184 |
|
|
185 |
# yes i know, copying all those values around isn't terribly efficient, but |
|
186 |
# the old version of dumping everything into form and then launching a |
|
187 |
# tactical recode nuke at the data is still worse. |
|
176 | 188 |
|
177 |
_input_to_hash($target, $ENV{QUERY_STRING}) if $ENV{QUERY_STRING}; |
|
178 |
_input_to_hash($target, $ARGV[0]) if @ARGV && $ARGV[0]; |
|
189 |
# this way the data can at least be recoded on the fly as soon as we get to |
|
190 |
# know the source encoding and only in the cases where encoding may be hidden |
|
191 |
# among the payload we take the hit of copying the request around |
|
192 |
my $temp_target = { }; |
|
193 |
|
|
194 |
# since both of these can potentially bring their encoding in INPUT_ENCODING |
|
195 |
# they get dumped into temp_target |
|
196 |
_input_to_hash($temp_target, $ENV{QUERY_STRING}) if $ENV{QUERY_STRING}; |
|
197 |
_input_to_hash($temp_target, $ARGV[0]) if @ARGV && $ARGV[0]; |
|
179 | 198 |
|
180 |
my $uploads; |
|
181 | 199 |
if ($ENV{CONTENT_LENGTH}) { |
182 | 200 |
my $content; |
183 | 201 |
read STDIN, $content, $ENV{CONTENT_LENGTH}; |
184 |
$uploads = _request_to_hash($target, $content); |
|
202 |
open my $fh, '>:raw', '/tmp/blubb.bin' or die; |
|
203 |
print $fh $content; |
|
204 |
close $fh; |
|
205 |
if ($ENV{'CONTENT_TYPE'} && $ENV{'CONTENT_TYPE'} =~ /multipart\/form-data/) { |
|
206 |
# multipart formdata can bring it's own encoding, so give it both |
|
207 |
# and let ti decide on it's own |
|
208 |
_parse_multipart_formdata($target, $temp_target, $content); |
|
209 |
} else { |
|
210 |
# normal encoding must be recoded |
|
211 |
_input_to_hash($temp_target, $content); |
|
212 |
} |
|
185 | 213 |
} |
186 | 214 |
|
187 | 215 |
if ($target->{RESTORE_FORM_FROM_SESSION_ID}) { |
188 | 216 |
my %temp_form; |
189 | 217 |
$::auth->restore_form_from_session(delete $target->{RESTORE_FORM_FROM_SESSION_ID}, form => \%temp_form); |
190 |
_input_to_hash($target, join '&', map { uri_encode($_) . '=' . uri_encode($temp_form{$_}) } keys %temp_form);
|
|
218 |
_store_value($target, $_, $temp_form{$_}) for keys %temp_form;
|
|
191 | 219 |
} |
192 | 220 |
|
193 |
my $db_charset = $::lx_office_conf{system}->{dbcharset} || Common::DEFAULT_CHARSET; |
|
194 |
my $encoding = delete $target->{INPUT_ENCODING} || $db_charset; |
|
221 |
my $encoding = delete $temp_target->{INPUT_ENCODING} || $db_charset; |
|
195 | 222 |
|
196 |
_recode_recursively(SL::Iconv->new($encoding, $db_charset), $target);
|
|
223 |
_recode_recursively(SL::Iconv->new($encoding, $db_charset), $temp_target => $target) if keys %$target;
|
|
197 | 224 |
|
198 |
map { $target->{$_} = $uploads->{$_} } keys %{ $uploads } if $uploads;
|
|
225 |
map { $target->{$_} = $temp_target->{$_} } keys %{ $temp_target };
|
|
199 | 226 |
|
200 | 227 |
$::lxdebug->leave_sub; |
201 | 228 |
|
Auch abrufbar als: Unified diff
Recoding von Daten konzeptuell getrennt.
Es gibt 4 Pfade um Daten in einen Request zu kriegen:
Der letzte Teil teilt sich noch einmal in
Alle Daten in LxOffice können über INPUT_ENCODING das encoding des Formulars
überschreiben, das ist nötig weil Javascript da sein eigenes Ding dreht.
Das führt dazu, dass alle http Quellen:
Uploads, die mit dem encoding binary geschickt werden, dürfen dagegen garnicht
recoded werden. Deshalb wurden bisher alle multipart/form-data Daten davon
ausgenommen, was aber zu Fehlern führt, wenn ein Formular gemischte Werte über
multipart/form-data sendet. Am einfachsten zu demonstrieren im CsvImport, wenn
man 'ä' als sep_char angibt.
Dieser Patch ändert das in zwei Container, in die einsortiert wird:
Dadurch muss das recoding nicht mehr in-place gemacht werden.
Alles in multipart/form-data wird jetzt decodiert, ausser Dateiuploads
(erkennbar am filename Attribut) und explizit binary geflaggtes
content-transfer-encoding.
Bei kollidierendem INPUT_ENCODING und "content-type; charset" wird erst das
content-type charset dekodiert, und dann ein recode aus internem coding in das
angefragte INPUT_ENCODING gemacht.