|
1 |
package SL::ZUGFeRD;
|
|
2 |
|
|
3 |
use strict;
|
|
4 |
use warnings;
|
|
5 |
use utf8;
|
|
6 |
|
|
7 |
use CAM::PDF;
|
|
8 |
use Data::Dumper;
|
|
9 |
use List::Util qw(first);
|
|
10 |
use XML::LibXML;
|
|
11 |
|
|
12 |
use constant RES_OK => 0;
|
|
13 |
use constant RES_ERR_FILE_OPEN => 1;
|
|
14 |
use constant RES_ERR_NO_XMP_METADATA => 2;
|
|
15 |
use constant RES_ERR_NO_XML_INVOICE => 3;
|
|
16 |
use constant RES_ERR_NOT_ZUGFERD => 4;
|
|
17 |
use constant RES_ERR_UNSUPPORTED_ZUGFERD_VERSION => 5;
|
|
18 |
|
|
19 |
sub _extract_zugferd_invoice_xml {
|
|
20 |
my $doc = shift;
|
|
21 |
my $names_dict = $doc->getValue($doc->getRootDict->{Names}) or return {};
|
|
22 |
my $files_tree = $names_dict->{EmbeddedFiles} or return {};
|
|
23 |
my @agenda = $files_tree;
|
|
24 |
my $ret = {};
|
|
25 |
|
|
26 |
# Hardly ever more than single leaf, but...
|
|
27 |
|
|
28 |
while (@agenda) {
|
|
29 |
my $item = $doc->getValue(shift @agenda);
|
|
30 |
|
|
31 |
if ($item->{Kids}) {
|
|
32 |
my $kids = $doc->getValue($item->{Kids});
|
|
33 |
push @agenda, @$kids
|
|
34 |
|
|
35 |
} else {
|
|
36 |
my $nodes = $doc->getValue($item->{Names});
|
|
37 |
my @names = map { $doc->getValue($_)} @$nodes;
|
|
38 |
|
|
39 |
while (@names) {
|
|
40 |
my ($k, $v) = splice @names, 0, 2;
|
|
41 |
my $ef_node = $v->{EF};
|
|
42 |
my $ef_dict = $doc->getValue($ef_node);
|
|
43 |
my $fnode = (values %$ef_dict)[0];
|
|
44 |
my $any_num = $fnode->{value};
|
|
45 |
my $obj_node = $doc->dereference($any_num);
|
|
46 |
my $content = $doc->decodeOne($obj_node->{value}, 0) // '';
|
|
47 |
|
|
48 |
print "1\n";
|
|
49 |
|
|
50 |
next if $content !~ m{<rsm:CrossIndustryInvoice};
|
|
51 |
print "2\n";
|
|
52 |
|
|
53 |
my $dom = eval { XML::LibXML->load_xml(string => $content) };
|
|
54 |
return $content if $dom && ($dom->documentElement->nodeName eq 'rsm:CrossIndustryInvoice');
|
|
55 |
}
|
|
56 |
}
|
|
57 |
}
|
|
58 |
|
|
59 |
return undef;
|
|
60 |
}
|
|
61 |
|
|
62 |
sub _get_xmp_metadata {
|
|
63 |
my ($doc) = @_;
|
|
64 |
|
|
65 |
my $node = $doc->getValue($doc->getRootDict->{Metadata});
|
|
66 |
if ($node && $node->{StreamData} && defined($node->{StreamData}->{value})) {
|
|
67 |
return $node->{StreamData}->{value};
|
|
68 |
}
|
|
69 |
|
|
70 |
return undef;
|
|
71 |
}
|
|
72 |
|
|
73 |
sub extract_from_pdf {
|
|
74 |
my ($self, $file_name) = @_;
|
|
75 |
|
|
76 |
my $pdf_doc = CAM::PDF->new($file_name);
|
|
77 |
|
|
78 |
if (!$pdf_doc) {
|
|
79 |
return {
|
|
80 |
result => RES_ERR_FILE_OPEN(),
|
|
81 |
message => $::locale->text('The file \'#1\' could not be opened for reading.', $file_name),
|
|
82 |
};
|
|
83 |
}
|
|
84 |
|
|
85 |
my $xmp = _get_xmp_metadata($pdf_doc);
|
|
86 |
if (!defined $xmp) {
|
|
87 |
return {
|
|
88 |
result => RES_ERR_NO_XMP_METADATA(),
|
|
89 |
message => $::locale->text('The file \'#1\' does not contain the required XMP meta data.', $file_name),
|
|
90 |
};
|
|
91 |
}
|
|
92 |
|
|
93 |
my $bad = {
|
|
94 |
result => RES_ERR_NO_XMP_METADATA(),
|
|
95 |
message => $::locale->text('Parsing the XMP metadata failed.'),
|
|
96 |
};
|
|
97 |
|
|
98 |
my $dom = eval { XML::LibXML->load_xml(string => $xmp) };
|
|
99 |
|
|
100 |
return $bad if !$dom;
|
|
101 |
|
|
102 |
my $xpc = XML::LibXML::XPathContext->new($dom);
|
|
103 |
$xpc->registerNs('rdf', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#');
|
|
104 |
|
|
105 |
my $zugferd_version;
|
|
106 |
|
|
107 |
foreach my $node ($xpc->findnodes('/x:xmpmeta/rdf:RDF/rdf:Description')) {
|
|
108 |
my $ns = first { ref($_) eq 'XML::LibXML::Namespace' } $node->attributes;
|
|
109 |
next unless $ns;
|
|
110 |
|
|
111 |
if ($ns->getData =~ m{urn:zugferd:pdfa:CrossIndustryDocument:invoice:2p0}) {
|
|
112 |
$zugferd_version = '2p0';
|
|
113 |
last;
|
|
114 |
}
|
|
115 |
|
|
116 |
if ($ns->getData =~ m{zugferd}i) {
|
|
117 |
$zugferd_version = 'unsupported';
|
|
118 |
last;
|
|
119 |
}
|
|
120 |
}
|
|
121 |
|
|
122 |
if (!$zugferd_version) {
|
|
123 |
return {
|
|
124 |
result => RES_ERR_NOT_ZUGFERD(),
|
|
125 |
message => $::locale->text('The XMP metadata does not declare the ZUGFeRD data.'),
|
|
126 |
};
|
|
127 |
}
|
|
128 |
|
|
129 |
if ($zugferd_version !~ m{^2p}) {
|
|
130 |
return {
|
|
131 |
result => RES_ERR_UNSUPPORTED_ZUGFERD_VERSION(),
|
|
132 |
message => $::locale->text('The ZUGFeRD version used is not supported.'),
|
|
133 |
};
|
|
134 |
}
|
|
135 |
|
|
136 |
my $invoice_xml = _extract_zugferd_invoice_xml($pdf_doc);
|
|
137 |
|
|
138 |
if (!defined $invoice_xml) {
|
|
139 |
return {
|
|
140 |
result => RES_ERR_NO_XML_INVOICE(),
|
|
141 |
message => $::locale->text('The ZUGFeRD XML invoice was not found.'),
|
|
142 |
};
|
|
143 |
}
|
|
144 |
|
|
145 |
return {
|
|
146 |
result => RES_OK(),
|
|
147 |
metadata_xmp => $xmp,
|
|
148 |
invoice_xml => $invoice_xml,
|
|
149 |
};
|
|
150 |
}
|
|
151 |
|
|
152 |
1;
|
|
153 |
|
|
154 |
__END__
|
|
155 |
|
|
156 |
=pod
|
|
157 |
|
|
158 |
=encoding utf8
|
|
159 |
|
|
160 |
=head1 NAME
|
|
161 |
|
|
162 |
SL::ZUGFeRD - Helper functions for dealing with PDFs containing ZUGFeRD invoice data
|
|
163 |
|
|
164 |
=head1 SYNOPSIS
|
|
165 |
|
|
166 |
my $pdf = '/path/to/my.pdf';
|
|
167 |
my $info = SL::ZUGFeRD->extract_from_pdf($pdf);
|
|
168 |
|
|
169 |
if ($info->{result} != SL::ZUGFeRD::RES_OK()) {
|
|
170 |
# An error occurred; log message from parser:
|
|
171 |
$::lxdebug->message(LXDebug::DEBUG1(), "Could not extract ZUGFeRD data from $pdf: " . $info->{message});
|
|
172 |
return;
|
|
173 |
}
|
|
174 |
|
|
175 |
# Parse & handle invoice XML:
|
|
176 |
my $dom = XML::LibXML->load_xml(string => $info->{invoice_xml});
|
|
177 |
|
|
178 |
|
|
179 |
=head1 FUNCTIONS
|
|
180 |
|
|
181 |
=over 4
|
|
182 |
|
|
183 |
=item C<extract_from_pdf> C<$file_name>
|
|
184 |
|
|
185 |
Opens an existing PDF in the file system and tries to extract ZUGFeRD
|
|
186 |
invoice data from it. First it'll parse the XMP metadata and look for
|
|
187 |
the ZUGFeRD declaration inside. If the declaration isn't found or the
|
|
188 |
declared version isn't 2p0, an error is returned.
|
|
189 |
|
|
190 |
Otherwise it'll continue to look through all embedded files in the
|
|
191 |
PDF. The first embedded XML file with a root node of
|
|
192 |
C<rsm:CrossCountryInvoice> will be returnd.
|
|
193 |
|
|
194 |
Always returns a hash ref containing the key C<result>, a number that
|
|
195 |
can be one of the following constants:
|
|
196 |
|
|
197 |
=over 4
|
|
198 |
|
|
199 |
=item C<RES_OK> (0): parsing was OK; the returned hash will also
|
|
200 |
contain the keys C<xmp_metadata> and C<invoice_xml> which will contain
|
|
201 |
the XML text of the metadata & the ZUGFeRD invoice.
|
|
202 |
|
|
203 |
=item C<RES_ERR_…> (all values E<gt> 0): parsing failed; the hash will
|
|
204 |
also contain a key C<message> which contains a human-readable
|
|
205 |
information about what exactly failed.
|
|
206 |
|
|
207 |
=back
|
|
208 |
|
|
209 |
=back
|
|
210 |
|
|
211 |
=head1 BUGS
|
|
212 |
|
|
213 |
Nothing here yet.
|
|
214 |
|
|
215 |
=head1 AUTHOR
|
|
216 |
|
|
217 |
Moritz Bunkus E<lt>m.bunkus@linet-services.deE<gt>
|
|
218 |
|
|
219 |
=cut
|
SL::ZUGFeRD: Funktionen zum Extrahieren von ZUGFeRD-XML-Daten aus PDFs