Revision 08649f7d
Von Johannes Grassler vor mehr als 1 Jahr hinzugefügt
SL/ZUGFeRD.pm | ||
---|---|---|
10 | 10 |
use XML::LibXML; |
11 | 11 |
|
12 | 12 |
use SL::Locale::String qw(t8); |
13 |
use SL::XMLInvoice; |
|
13 | 14 |
|
14 | 15 |
use parent qw(Exporter); |
15 | 16 |
our @EXPORT_PROFILES = qw(PROFILE_FACTURX_EXTENDED PROFILE_XRECHNUNG); |
... | ... | |
20 | 21 |
use constant PROFILE_XRECHNUNG => 1; |
21 | 22 |
|
22 | 23 |
use constant RES_OK => 0; |
23 |
use constant RES_ERR_FILE_OPEN => 1; |
|
24 |
use constant RES_ERR_NO_XMP_METADATA => 2; |
|
25 |
use constant RES_ERR_NO_XML_INVOICE => 3; |
|
26 |
use constant RES_ERR_NOT_ZUGFERD => 4; |
|
27 |
use constant RES_ERR_UNSUPPORTED_ZUGFERD_VERSION => 5; |
|
24 |
use constant RES_ERR_FILE_OPEN => -1; |
|
25 |
use constant RES_ERR_NO_ATTACHMENT => -2; |
|
28 | 26 |
|
29 | 27 |
our @customer_settings = ( |
30 | 28 |
[ 0, t8('Do not create Factur-X/ZUGFeRD invoices') ], |
... | ... | |
47 | 45 |
|
48 | 46 |
sub _extract_zugferd_invoice_xml { |
49 | 47 |
my $doc = shift; |
50 |
my $names_dict = $doc->getValue($doc->getRootDict->{Names}) or return {}; |
|
51 |
my $files_tree = $names_dict->{EmbeddedFiles} or return {}; |
|
48 |
my %res_fail; |
|
49 |
|
|
50 |
$res_fail{'result'} = RES_ERR_NO_ATTACHMENT(); |
|
51 |
$res_fail{'message'} = "PDF does not have a Names dictionary."; |
|
52 |
my $names_dict = $doc->getValue($doc->getRootDict->{Names}) or return \%res_fail; |
|
53 |
|
|
54 |
$res_fail{'message'} = "PDF does not have a EmbeddedFiles tree."; |
|
55 |
my $files_tree = $names_dict->{EmbeddedFiles} or return \%res_fail; |
|
56 |
|
|
52 | 57 |
my @agenda = $files_tree; |
53 |
my $ret = {}; |
|
58 |
|
|
59 |
my $parser; # SL::XMLInvoice object used as return value |
|
60 |
my @res; # Temporary storage for error messages encountered during |
|
61 |
# attempts to process attachments. |
|
54 | 62 |
|
55 | 63 |
# Hardly ever more than single leaf, but... |
56 | 64 |
|
... | ... | |
74 | 82 |
my $obj_node = $doc->dereference($any_num); |
75 | 83 |
my $content = $doc->decodeOne($obj_node->{value}, 0) // ''; |
76 | 84 |
|
77 |
#print "1\n"; |
|
78 |
|
|
79 |
next if $content !~ m{<rsm:CrossIndustryInvoice}; |
|
80 |
#print "2\n"; |
|
81 |
|
|
82 |
my $dom = eval { XML::LibXML->load_xml(string => $content) }; |
|
83 |
return $content if $dom && ($dom->documentElement->nodeName eq 'rsm:CrossIndustryInvoice'); |
|
85 |
$parser = $parser = SL::XMLInvoice->new($content); |
|
86 |
|
|
87 |
# Caveat: this will only ever catch the first attachment looking like |
|
88 |
# an XML invoice. |
|
89 |
if ( $parser->{status} == SL::XMLInvoice::RES_OK ){ |
|
90 |
return $parser; |
|
91 |
} else { |
|
92 |
push @res, t8("Could not parse PDF embedded attachment #1: #2", |
|
93 |
$k, |
|
94 |
$parser->{result}); |
|
95 |
} |
|
84 | 96 |
} |
85 | 97 |
} |
86 | 98 |
} |
87 | 99 |
|
88 |
return undef; |
|
100 |
# There's going to be at least one attachment that failed to parse as XML by |
|
101 |
# this point - if there were no attachments at all, we would have bailed out |
|
102 |
# a lot earlier. |
|
103 |
|
|
104 |
%res_fail = ( result => RES_ERR_FILE_OPEN(), |
|
105 |
message => join("; ", @res), |
|
106 |
); |
|
107 |
|
|
108 |
return \%res_fail; |
|
89 | 109 |
} |
90 | 110 |
|
91 | 111 |
sub _get_xmp_metadata { |
... | ... | |
95 | 115 |
if ($node && $node->{StreamData} && defined($node->{StreamData}->{value})) { |
96 | 116 |
return $node->{StreamData}->{value}; |
97 | 117 |
} |
98 |
|
|
99 | 118 |
return undef; |
100 | 119 |
} |
101 | 120 |
|
102 | 121 |
sub extract_from_pdf { |
103 | 122 |
my ($self, $file_name) = @_; |
123 |
my @warnings; |
|
104 | 124 |
|
105 | 125 |
my $pdf_doc = CAM::PDF->new($file_name); |
106 | 126 |
|
107 | 127 |
if (!$pdf_doc) { |
108 |
return { |
|
128 |
return \{
|
|
109 | 129 |
result => RES_ERR_FILE_OPEN(), |
110 | 130 |
message => $::locale->text('The file \'#1\' could not be opened for reading.', $file_name), |
111 | 131 |
}; |
112 | 132 |
} |
113 | 133 |
|
114 | 134 |
my $xmp = _get_xmp_metadata($pdf_doc); |
135 |
|
|
115 | 136 |
if (!defined $xmp) { |
116 |
return { |
|
117 |
result => RES_ERR_NO_XMP_METADATA(), |
|
118 |
message => $::locale->text('The file \'#1\' does not contain the required XMP meta data.', $file_name), |
|
119 |
}; |
|
120 |
} |
|
137 |
push @warnings, $::locale->text('The file \'#1\' does not contain the required XMP meta data.', $file_name); |
|
138 |
} else { |
|
139 |
my $dom = eval { XML::LibXML->load_xml(string => $xmp) }; |
|
121 | 140 |
|
122 |
my $bad = { |
|
123 |
result => RES_ERR_NO_XMP_METADATA(), |
|
124 |
message => $::locale->text('Parsing the XMP metadata failed.'), |
|
125 |
}; |
|
141 |
push @warnings, $::locale->text('Parsing the XMP metadata failed.'), if !$dom; |
|
126 | 142 |
|
127 |
my $dom = eval { XML::LibXML->load_xml(string => $xmp) }; |
|
143 |
my $xpc = XML::LibXML::XPathContext->new($dom); |
|
144 |
$xpc->registerNs('rdf', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'); |
|
128 | 145 |
|
129 |
return $bad if !$dom;
|
|
146 |
my $zugferd_version;
|
|
130 | 147 |
|
131 |
my $xpc = XML::LibXML::XPathContext->new($dom); |
|
132 |
$xpc->registerNs('rdf', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'); |
|
148 |
my $test = $xpc->findnodes('/x:xmpmeta/rdf:RDF/rdf:Description'); |
|
133 | 149 |
|
134 |
my $zugferd_version; |
|
150 |
foreach my $node ($xpc->findnodes('/x:xmpmeta/rdf:RDF/rdf:Description')) { |
|
151 |
my $ns = first { ref($_) eq 'XML::LibXML::Namespace' } $node->attributes; |
|
152 |
next unless $ns; |
|
135 | 153 |
|
136 |
foreach my $node ($xpc->findnodes('/x:xmpmeta/rdf:RDF/rdf:Description')) { |
|
137 |
my $ns = first { ref($_) eq 'XML::LibXML::Namespace' } $node->attributes; |
|
138 |
next unless $ns; |
|
154 |
if ($ns->getData =~ m{urn:zugferd:pdfa:CrossIndustryDocument:invoice:2p0}) { |
|
155 |
$zugferd_version = 'zugferd:2p0'; |
|
156 |
last; |
|
157 |
} |
|
139 | 158 |
|
140 |
if ($ns->getData =~ m{urn:zugferd:pdfa:CrossIndustryDocument:invoice:2p0}) {
|
|
141 |
$zugferd_version = 'zugferd:2p0';
|
|
142 |
last; |
|
143 |
} |
|
159 |
if ($ns->getData =~ m{urn:factur-x:pdfa:CrossIndustryDocument:invoice:1p0}) {
|
|
160 |
$zugferd_version = 'factur-x:1p0';
|
|
161 |
last;
|
|
162 |
}
|
|
144 | 163 |
|
145 |
if ($ns->getData =~ m{urn:factur-x:pdfa:CrossIndustryDocument:invoice:1p0}) { |
|
146 |
$zugferd_version = 'factur-x:1p0'; |
|
147 |
last; |
|
164 |
if ($ns->getData =~ m{zugferd|factur-x}i) { |
|
165 |
$zugferd_version = 'unsupported'; |
|
166 |
last; |
|
167 |
} |
|
148 | 168 |
} |
149 | 169 |
|
150 |
if ($ns->getData =~ m{zugferd|factur-x}i) { |
|
151 |
$zugferd_version = 'unsupported'; |
|
152 |
last; |
|
170 |
if (!$zugferd_version) { |
|
171 |
push @warnings, $::locale->text('The XMP metadata does not declare the Factur-X/ZUGFeRD data.'), |
|
153 | 172 |
} |
154 |
} |
|
155 | 173 |
|
156 |
if (!$zugferd_version) { |
|
157 |
return { |
|
158 |
result => RES_ERR_NOT_ZUGFERD(), |
|
159 |
message => $::locale->text('The XMP metadata does not declare the Factur-X/ZUGFeRD data.'), |
|
160 |
}; |
|
161 |
} |
|
162 |
|
|
163 |
if ($zugferd_version eq 'unsupported') { |
|
164 |
return { |
|
165 |
result => RES_ERR_UNSUPPORTED_ZUGFERD_VERSION(), |
|
166 |
message => $::locale->text('The Factur-X/ZUGFeRD version used is not supported.'), |
|
167 |
}; |
|
174 |
if ($zugferd_version eq 'unsupported') { |
|
175 |
push @warnings, $::locale->text('The Factur-X/ZUGFeRD version used is not supported.'), |
|
176 |
} |
|
168 | 177 |
} |
169 | 178 |
|
170 | 179 |
my $invoice_xml = _extract_zugferd_invoice_xml($pdf_doc); |
171 | 180 |
|
172 |
if (!defined $invoice_xml) { |
|
173 |
return { |
|
174 |
result => RES_ERR_NO_XML_INVOICE(), |
|
175 |
message => $::locale->text('The Factur-X/ZUGFeRD XML invoice was not found.'), |
|
176 |
}; |
|
177 |
} |
|
181 |
my %res; |
|
178 | 182 |
|
179 |
return { |
|
180 |
result => RES_OK(), |
|
183 |
%res = ( |
|
184 |
result => $invoice_xml->{result}, |
|
185 |
message => $invoice_xml->{message}, |
|
181 | 186 |
metadata_xmp => $xmp, |
182 | 187 |
invoice_xml => $invoice_xml, |
183 |
}; |
|
188 |
warnings => \@warnings, |
|
189 |
); |
|
190 |
|
|
191 |
return \%res; |
|
192 |
} |
|
193 |
|
|
194 |
sub extract_from_xml { |
|
195 |
my ($self, $data) = @_; |
|
196 |
|
|
197 |
my %res; |
|
198 |
|
|
199 |
my $invoice_xml = SL::XMLInvoice->new($data); |
|
200 |
|
|
201 |
%res = ( |
|
202 |
result => $invoice_xml->{result}, |
|
203 |
message => $invoice_xml->{message}, |
|
204 |
metadata_xmp => undef, |
|
205 |
invoice_xml => $invoice_xml, |
|
206 |
warnings => (), |
|
207 |
); |
|
208 |
|
|
209 |
return \%res; |
|
184 | 210 |
} |
185 | 211 |
|
186 | 212 |
1; |
... | ... | |
200 | 226 |
my $pdf = '/path/to/my.pdf'; |
201 | 227 |
my $info = SL::ZUGFeRD->extract_from_pdf($pdf); |
202 | 228 |
|
229 |
my $xml = '<?xml version="1.0" encoding="UTF-8"?> ...'; |
|
230 |
my $info = SL::ZUGFeRD->extract_from_xml($xml); |
|
231 |
|
|
203 | 232 |
if ($info->{result} != SL::ZUGFeRD::RES_OK()) { |
204 | 233 |
# An error occurred; log message from parser: |
205 | 234 |
$::lxdebug->message(LXDebug::DEBUG1(), "Could not extract ZUGFeRD data from $pdf: " . $info->{message}); |
206 | 235 |
return; |
207 | 236 |
} |
208 | 237 |
|
209 |
# Parse & handle invoice XML: |
|
210 |
my $dom = XML::LibXML->load_xml(string => $info->{invoice_xml}); |
|
238 |
# Access invoice XML data: |
|
239 |
my $inv = ${$info}{'invoice_xml}; |
|
240 |
my %metadata = %{$inv->metadata}; |
|
241 |
my @items = @{$inv->items}; |
|
242 |
my $dom = $inv->dom; |
|
211 | 243 |
|
212 | 244 |
|
213 | 245 |
=head1 FUNCTIONS |
214 | 246 |
|
215 |
=over 4 |
|
216 |
|
|
217 |
=item C<extract_from_pdf> C<$file_name> |
|
247 |
=head2 extract_from_pdf E<lt>file_nameE<gt> |
|
218 | 248 |
|
219 |
Opens an existing PDF in the file system and tries to extract |
|
220 |
Factur-X/ZUGFeRD invoice data from it. First it'll parse the XMP |
|
249 |
Opens an existing PDF file in the file system and tries to extract
|
|
250 |
Factur-X/XRechnung/ZUGFeRD invoice data from it. First it'll parse the XMP
|
|
221 | 251 |
metadata and look for the Factur-X/ZUGFeRD declaration inside. If the |
222 |
declaration isn't found or the declared version isn't 2p0, an error is
|
|
223 |
returned.
|
|
252 |
declaration isn't found or the declared version isn't 2p0, an warning is
|
|
253 |
recorded in the returned data structure's C<warnings> key.
|
|
224 | 254 |
|
225 |
Otherwise it'll continue to look through all embedded files in the |
|
226 |
PDF. The first embedded XML file with a root node of |
|
227 |
C<rsm:CrossCountryInvoice> will be returnd. |
|
255 |
Regardless of metadata presence, it will continue to iterate over all files |
|
256 |
embedded in the PDF and attempt to parse them with SL::XMLInvoice. If it |
|
257 |
succeeds, the first SL::XMLInvoice object that indicates successful parsing is |
|
258 |
returned. |
|
228 | 259 |
|
229 | 260 |
Always returns a hash ref containing the key C<result>, a number that |
230 | 261 |
can be one of the following constants: |
231 | 262 |
|
232 | 263 |
=over 4 |
233 | 264 |
|
234 |
=item C<RES_OK> (0): parsing was OK; the returned hash will also |
|
235 |
contain the keys C<xmp_metadata> and C<invoice_xml> which will contain |
|
236 |
the XML text of the metadata & the Factur-X/ZUGFeRD invoice. |
|
265 |
=item C<RES_OK> (0): parsing was OK. |
|
266 |
|
|
267 |
=item C<RES_ERR_…> (all values E<!=> 0): parsing failed. Values > 0 indicate a failure |
|
268 |
in C<SL::XMLInvoice>, Values < 0 indicate a failure in C<SL::ZUGFeRD>. |
|
269 |
|
|
270 |
=back |
|
271 |
|
|
272 |
Other than that, the hash ref contains the following keys: |
|
273 |
|
|
274 |
=over 4 |
|
275 |
|
|
276 |
=item C<message> - An error message detailing the problem upon nonzero C<result>, undef otherwise. |
|
277 |
|
|
278 |
=item C<metadata_xmp> - The XMP metadata extracted from the Factur-X/ZUGFeRD invoice (if present) |
|
279 |
|
|
280 |
=item C<invoice_xml> - An SL::XMLInvoice object holding the data extracted from the parsed XML invoice. |
|
281 |
|
|
282 |
=item C<warnings> - Warnings encountered upon extracting/parsing XML files (if any) |
|
283 |
|
|
284 |
=back |
|
285 |
|
|
286 |
=head2 extract_from_xml E<lt>stringE<gt> |
|
287 |
|
|
288 |
Takes a string containing an XML document with Factur-X/XRechnung/ZUGFeRD |
|
289 |
invoice data and attempts to parse it using C<SL::XMLInvoice>. |
|
290 |
|
|
291 |
If parsing is successful, an SL::XMLInvoice object containing the document's |
|
292 |
parsed data is returned. |
|
293 |
|
|
294 |
This method always returns a hash ref containing the key C<result>, a number that |
|
295 |
can be one of the following constants: |
|
296 |
|
|
297 |
=over 4 |
|
298 |
|
|
299 |
=item C<RES_OK> (0): parsing was OK. |
|
237 | 300 |
|
238 |
=item C<RES_ERR_…> (all values E<gt> 0): parsing failed; the hash will |
|
239 |
also contain a key C<message> which contains a human-readable |
|
240 |
information about what exactly failed. |
|
301 |
=item C<RES_ERR_…> (all values E<!=> 0): parsing failed. Values > 0 indicate a failure |
|
302 |
in C<SL::XMLInvoice>, Values < 0 indicate a failure in C<SL::ZUGFeRD>. |
|
241 | 303 |
|
242 | 304 |
=back |
243 | 305 |
|
306 |
Other than that, the hash ref contains the following keys: |
|
307 |
|
|
308 |
=over 4 |
|
309 |
|
|
310 |
=item C<message> - An error message detailing the problem upon nonzero C<result>, undef otherwise. |
|
311 |
|
|
312 |
=item C<metadata_xmp> - Always undef and only present to let downstream code expecting its presence fail gracefully. |
|
313 |
|
|
314 |
=item C<invoice_xml> - An SL::XMLInvoice object holding the data extracted from the parsed XML invoice. |
|
315 |
|
|
316 |
=item C<warnings> - Warnings encountered upon extracting/parsing XML data (if any) |
|
317 |
|
|
244 | 318 |
=back |
245 | 319 |
|
246 | 320 |
=head1 BUGS |
247 | 321 |
|
248 | 322 |
Nothing here yet. |
249 | 323 |
|
250 |
=head1 AUTHOR |
|
324 |
=head1 AUTHORS
|
|
251 | 325 |
|
252 |
Moritz Bunkus E<lt>m.bunkus@linet-services.deE<gt> |
|
326 |
=over 4 |
|
327 |
|
|
328 |
=item Moritz Bunkus E<lt>m.bunkus@linet-services.deE<gt> |
|
329 |
|
|
330 |
=item Johannes Graßler E<lt>info@computer-grassler.deE<gt> |
|
331 |
|
|
332 |
=back |
|
253 | 333 |
|
254 | 334 |
=cut |
Auch abrufbar als: Unified diff
ZUGFeRD-Import auf SL::XMLInvoice umgestellt
Wichtigste Aenderung dieses Commits ist die Umstellung des
ZUGFeRD-Imports in der Finanzbuchhaltung auf das neu
hinzugefuegte Modul SL::XMLInvoice, das auch die Verabeitung
von Rechnungen im XRechnung-Format erlaubt. Darueber hinaus
gibt es einige weitere Aenderungen:
unterstuetzt.
Warnungen, saemtliche XML-Anhaenge an PDF-Dateien werden
automatisch erkannt und verarbeitet (egal ob es sich um
ZUGFeRD/Faktur-X- oder XRechnung-Daten handelt).
Rechnungen im XRechnung-Format wichtig).
Formular eingetragen, nicht mehr nur Rechnungsnummer und
Datum.
existieren. Eine eigene Belegvorlage wird fuer jede
importierte Rechnung automatisch angelegt.
aus den Verbindlichkeitskonten ausgewaehlt.
Enthaelt die Rechnung keines, wird es auf das Rechnungsdatum
gesetzt.