Projekt

Allgemein

Profil

« Zurück | Weiter » 

Revision 81a5ba24

Von Sven Schöling vor mehr als 1 Jahr hinzugefügt

  • ID 81a5ba2415caab794235792ce71ed0663c949fab
  • Vorgänger 1b8c96ac
  • Nachfolger 350c829a

ZUGFeRD: CAM::PDF durch PDF::API2 ersetzt

CAM::PDF wird seit 2014 nicht mehr gepflegt und kann PDF 1.7+ nicht
richtig öffnen. PDF::API2 ist aktiv maintained, hat aber nicht ganz so
schöne Zugriffsmethoden.

Die Version hier ist mit einer Rechnung von kivitendo getestet (PDF/A-1,
PDF 1.5) und einer externen (PDF/A-3, PDF 1.7).

Unterschiede anzeigen:

SL/ZUGFeRD.pm
use warnings;
use utf8;
use CAM::PDF;
use PDF::API2;
use Data::Dumper;
use List::Util qw(first);
use XML::LibXML;
......
my $doc = shift;
my %res_fail;
$res_fail{'result'} = RES_ERR_NO_ATTACHMENT();
$res_fail{'message'} = "PDF does not have a Names dictionary.";
my $names_dict = $doc->getValue($doc->getRootDict->{Names}) or return \%res_fail;
$res_fail{'message'} = "PDF does not have a EmbeddedFiles tree.";
my $files_tree = $names_dict->{EmbeddedFiles} or return \%res_fail;
# unfortunately PDF::API2 has no public facing api to access the actual pdf name dictionaries
# so we need to use the internal data, just like with PDF::CAM before
#
# PDF::API2 will internally read $doc->{pdf}{Root}{Names} for us, but after that every entry
# in the tree may be an indirect object (Objind) before realising it.
#
# The actual embedded files will be located at $doc->{pdf}{Root}{Names}{EmbeddedFiles}
#
my $node = $doc->{pdf};
for (qw(Root Names EmbeddedFiles)) {
$node = $node->{$_};
if (!ref $node) {
return {
result => RES_ERR_NO_ATTACHMENT(),
message => "unexpected unbless node while trying to access $_ node",
}
}
if ('PDF::API2::Basic::PDF::Objind' eq ref $node) {
$node->realise;
}
# after realising it should be a Dict
if ('PDF::API2::Basic::PDF::Dict' ne ref $node) {
return {
result => RES_ERR_NO_ATTACHMENT(),
message => "unexpected node type [@{[ref($node)]}] after realising $_ node",
}
}
}
my @agenda = $files_tree;
# now we have an array of possible attachments
my @agenda = $node;
my $parser; # SL::XMLInvoice object used as return value
my @res; # Temporary storage for error messages encountered during
......
# Hardly ever more than single leaf, but...
while (@agenda) {
my $item = $doc->getValue(shift @agenda);
my $item = shift @agenda;
if ($item->{Kids}) {
my $kids = $doc->getValue($item->{Kids});
push @agenda, @$kids
my @kids = $item->{Kids}->realise->elements;
push @agenda, @kids;
} else {
my $nodes = $doc->getValue($item->{Names});
my @names = map { $doc->getValue($_)} @$nodes;
my @names = $item->{Names}->realise->elements;
TRY_NEXT:
while (@names) {
my ($k, $v) = splice @names, 0, 2;
my $ef_node = $v->{EF};
my $ef_dict = $doc->getValue($ef_node);
my $fnode = (values %$ef_dict)[0];
my $any_num = $fnode->{value};
my $obj_node = $doc->dereference($any_num);
my $content = $doc->decodeOne($obj_node->{value}, 0) // '';
my $fnode = $v->realise->{EF}->realise->{F}->realise;
$fnode->read_stream(1);
my $content = $fnode->{' stream'};
$parser = $parser = SL::XMLInvoice->new($content);
$parser = SL::XMLInvoice->new($content);
# Caveat: this will only ever catch the first attachment looking like
# an XML invoice.
......
sub _get_xmp_metadata {
my ($doc) = @_;
my $node = $doc->getValue($doc->getRootDict->{Metadata});
if ($node && $node->{StreamData} && defined($node->{StreamData}->{value})) {
return $node->{StreamData}->{value};
}
return undef;
$doc->xmpMetadata;
}
sub extract_from_pdf {
my ($self, $file_name) = @_;
my @warnings;
my $pdf_doc = CAM::PDF->new($file_name);
my $pdf_doc = PDF::API2->openScalar($file_name);
if (!$pdf_doc) {
return {
......
my %res;
my $invoice_xml = SL::XMLInvoice->new($data);
%res = (
result => $invoice_xml->{result},
message => $invoice_xml->{message},

Auch abrufbar als: Unified diff