Revision 81a5ba24
Von Sven Schöling vor etwa 1 Jahr hinzugefügt
SL/ZUGFeRD.pm | ||
---|---|---|
4 | 4 |
use warnings; |
5 | 5 |
use utf8; |
6 | 6 |
|
7 |
use CAM::PDF;
|
|
7 |
use PDF::API2;
|
|
8 | 8 |
use Data::Dumper; |
9 | 9 |
use List::Util qw(first); |
10 | 10 |
use XML::LibXML; |
... | ... | |
47 | 47 |
my $doc = shift; |
48 | 48 |
my %res_fail; |
49 | 49 |
|
50 |
$res_fail{'result'} = RES_ERR_NO_ATTACHMENT(); |
|
51 |
$res_fail{'message'} = "PDF does not have a Names dictionary."; |
|
52 |
my $names_dict = $doc->getValue($doc->getRootDict->{Names}) or return \%res_fail; |
|
53 |
|
|
54 |
$res_fail{'message'} = "PDF does not have a EmbeddedFiles tree."; |
|
55 |
my $files_tree = $names_dict->{EmbeddedFiles} or return \%res_fail; |
|
50 |
# unfortunately PDF::API2 has no public facing api to access the actual pdf name dictionaries |
|
51 |
# so we need to use the internal data, just like with PDF::CAM before |
|
52 |
# |
|
53 |
# PDF::API2 will internally read $doc->{pdf}{Root}{Names} for us, but after that every entry |
|
54 |
# in the tree may be an indirect object (Objind) before realising it. |
|
55 |
# |
|
56 |
# The actual embedded files will be located at $doc->{pdf}{Root}{Names}{EmbeddedFiles} |
|
57 |
# |
|
58 |
|
|
59 |
my $node = $doc->{pdf}; |
|
60 |
for (qw(Root Names EmbeddedFiles)) { |
|
61 |
$node = $node->{$_}; |
|
62 |
if (!ref $node) { |
|
63 |
return { |
|
64 |
result => RES_ERR_NO_ATTACHMENT(), |
|
65 |
message => "unexpected unbless node while trying to access $_ node", |
|
66 |
} |
|
67 |
} |
|
68 |
if ('PDF::API2::Basic::PDF::Objind' eq ref $node) { |
|
69 |
$node->realise; |
|
70 |
} |
|
71 |
# after realising it should be a Dict |
|
72 |
if ('PDF::API2::Basic::PDF::Dict' ne ref $node) { |
|
73 |
return { |
|
74 |
result => RES_ERR_NO_ATTACHMENT(), |
|
75 |
message => "unexpected node type [@{[ref($node)]}] after realising $_ node", |
|
76 |
} |
|
77 |
} |
|
78 |
} |
|
56 | 79 |
|
57 |
my @agenda = $files_tree; |
|
80 |
# now we have an array of possible attachments |
|
81 |
my @agenda = $node; |
|
58 | 82 |
|
59 | 83 |
my $parser; # SL::XMLInvoice object used as return value |
60 | 84 |
my @res; # Temporary storage for error messages encountered during |
... | ... | |
63 | 87 |
# Hardly ever more than single leaf, but... |
64 | 88 |
|
65 | 89 |
while (@agenda) { |
66 |
my $item = $doc->getValue(shift @agenda);
|
|
90 |
my $item = shift @agenda;
|
|
67 | 91 |
|
68 | 92 |
if ($item->{Kids}) { |
69 |
my $kids = $doc->getValue($item->{Kids});
|
|
70 |
push @agenda, @$kids
|
|
93 |
my @kids = $item->{Kids}->realise->elements;
|
|
94 |
push @agenda, @kids;
|
|
71 | 95 |
|
72 | 96 |
} else { |
73 |
my $nodes = $doc->getValue($item->{Names}); |
|
74 |
my @names = map { $doc->getValue($_)} @$nodes; |
|
97 |
my @names = $item->{Names}->realise->elements; |
|
75 | 98 |
|
99 |
TRY_NEXT: |
|
76 | 100 |
while (@names) { |
77 | 101 |
my ($k, $v) = splice @names, 0, 2; |
78 |
my $ef_node = $v->{EF}; |
|
79 |
my $ef_dict = $doc->getValue($ef_node); |
|
80 |
my $fnode = (values %$ef_dict)[0]; |
|
81 |
my $any_num = $fnode->{value}; |
|
82 |
my $obj_node = $doc->dereference($any_num); |
|
83 |
my $content = $doc->decodeOne($obj_node->{value}, 0) // ''; |
|
102 |
my $fnode = $v->realise->{EF}->realise->{F}->realise; |
|
103 |
|
|
104 |
$fnode->read_stream(1); |
|
105 |
|
|
106 |
my $content = $fnode->{' stream'}; |
|
84 | 107 |
|
85 |
$parser = $parser = SL::XMLInvoice->new($content);
|
|
108 |
$parser = SL::XMLInvoice->new($content); |
|
86 | 109 |
|
87 | 110 |
# Caveat: this will only ever catch the first attachment looking like |
88 | 111 |
# an XML invoice. |
... | ... | |
114 | 137 |
sub _get_xmp_metadata { |
115 | 138 |
my ($doc) = @_; |
116 | 139 |
|
117 |
my $node = $doc->getValue($doc->getRootDict->{Metadata}); |
|
118 |
if ($node && $node->{StreamData} && defined($node->{StreamData}->{value})) { |
|
119 |
return $node->{StreamData}->{value}; |
|
120 |
} |
|
121 |
return undef; |
|
140 |
$doc->xmpMetadata; |
|
122 | 141 |
} |
123 | 142 |
|
124 | 143 |
sub extract_from_pdf { |
125 | 144 |
my ($self, $file_name) = @_; |
126 | 145 |
my @warnings; |
127 | 146 |
|
128 |
my $pdf_doc = CAM::PDF->new($file_name);
|
|
147 |
my $pdf_doc = PDF::API2->openScalar($file_name);
|
|
129 | 148 |
|
130 | 149 |
if (!$pdf_doc) { |
131 | 150 |
return { |
... | ... | |
200 | 219 |
my %res; |
201 | 220 |
|
202 | 221 |
my $invoice_xml = SL::XMLInvoice->new($data); |
203 |
|
|
222 |
|
|
204 | 223 |
%res = ( |
205 | 224 |
result => $invoice_xml->{result}, |
206 | 225 |
message => $invoice_xml->{message}, |
Auch abrufbar als: Unified diff
ZUGFeRD: CAM::PDF durch PDF::API2 ersetzt
CAM::PDF wird seit 2014 nicht mehr gepflegt und kann PDF 1.7+ nicht
richtig öffnen. PDF::API2 ist aktiv maintained, hat aber nicht ganz so
schöne Zugriffsmethoden.
Die Version hier ist mit einer Rechnung von kivitendo getestet (PDF/A-1,
PDF 1.5) und einer externen (PDF/A-3, PDF 1.7).