Projekt

Allgemein

Profil

« Zurück | Weiter » 

Revision 81a5ba24

Von Sven Schöling vor etwa 1 Jahr hinzugefügt

  • ID 81a5ba2415caab794235792ce71ed0663c949fab
  • Vorgänger 1b8c96ac
  • Nachfolger 350c829a

ZUGFeRD: CAM::PDF durch PDF::API2 ersetzt

CAM::PDF wird seit 2014 nicht mehr gepflegt und kann PDF 1.7+ nicht
richtig öffnen. PDF::API2 ist aktiv maintained, hat aber nicht ganz so
schöne Zugriffsmethoden.

Die Version hier ist mit einer Rechnung von kivitendo getestet (PDF/A-1,
PDF 1.5) und einer externen (PDF/A-3, PDF 1.7).

Unterschiede anzeigen:

SL/ZUGFeRD.pm
4 4
use warnings;
5 5
use utf8;
6 6

  
7
use CAM::PDF;
7
use PDF::API2;
8 8
use Data::Dumper;
9 9
use List::Util qw(first);
10 10
use XML::LibXML;
......
47 47
  my $doc        = shift;
48 48
  my %res_fail;
49 49

  
50
  $res_fail{'result'}  = RES_ERR_NO_ATTACHMENT();
51
  $res_fail{'message'} = "PDF does not have a Names dictionary.";
52
  my $names_dict = $doc->getValue($doc->getRootDict->{Names}) or return \%res_fail;
53

  
54
  $res_fail{'message'} = "PDF does not have a EmbeddedFiles tree.";
55
  my $files_tree = $names_dict->{EmbeddedFiles}               or return \%res_fail;
50
  # unfortunately PDF::API2 has no public facing api to access the actual pdf name dictionaries
51
  # so we need to use the internal data, just like with PDF::CAM before
52
  #
53
  # PDF::API2 will internally read $doc->{pdf}{Root}{Names} for us, but after that every entry
54
  # in the tree may be an indirect object (Objind) before realising it.
55
  #
56
  # The actual embedded files will be located at $doc->{pdf}{Root}{Names}{EmbeddedFiles}
57
  #
58

  
59
  my $node = $doc->{pdf};
60
  for (qw(Root Names EmbeddedFiles)) {
61
    $node = $node->{$_};
62
    if (!ref $node) {
63
      return {
64
        result  => RES_ERR_NO_ATTACHMENT(),
65
        message => "unexpected unbless node while trying to access $_ node",
66
      }
67
    }
68
    if ('PDF::API2::Basic::PDF::Objind' eq ref $node) {
69
      $node->realise;
70
    }
71
    # after realising it should be a Dict
72
    if ('PDF::API2::Basic::PDF::Dict' ne ref $node) {
73
      return {
74
        result  => RES_ERR_NO_ATTACHMENT(),
75
        message => "unexpected node type [@{[ref($node)]}] after realising $_ node",
76
      }
77
    }
78
  }
56 79

  
57
  my @agenda     = $files_tree;
80
  # now we have an array of possible attachments
81
  my @agenda     = $node;
58 82

  
59 83
  my $parser;  # SL::XMLInvoice object used as return value
60 84
  my @res;     # Temporary storage for error messages encountered during
......
63 87
  # Hardly ever more than single leaf, but...
64 88

  
65 89
  while (@agenda) {
66
    my $item = $doc->getValue(shift @agenda);
90
    my $item = shift @agenda;
67 91

  
68 92
    if ($item->{Kids}) {
69
      my $kids = $doc->getValue($item->{Kids});
70
      push @agenda, @$kids
93
      my @kids = $item->{Kids}->realise->elements;
94
      push @agenda, @kids;
71 95

  
72 96
    } else {
73
      my $nodes = $doc->getValue($item->{Names});
74
      my @names = map { $doc->getValue($_)} @$nodes;
97
      my @names = $item->{Names}->realise->elements;
75 98

  
99
      TRY_NEXT:
76 100
      while (@names) {
77 101
        my ($k, $v)  = splice @names, 0, 2;
78
        my $ef_node  = $v->{EF};
79
        my $ef_dict  = $doc->getValue($ef_node);
80
        my $fnode    = (values %$ef_dict)[0];
81
        my $any_num  = $fnode->{value};
82
        my $obj_node = $doc->dereference($any_num);
83
        my $content  = $doc->decodeOne($obj_node->{value}, 0) // '';
102
        my $fnode    = $v->realise->{EF}->realise->{F}->realise;
103

  
104
        $fnode->read_stream(1);
105

  
106
        my $content  = $fnode->{' stream'};
84 107

  
85
        $parser = $parser = SL::XMLInvoice->new($content);
108
        $parser = SL::XMLInvoice->new($content);
86 109

  
87 110
        # Caveat: this will only ever catch the first attachment looking like
88 111
        #         an XML invoice.
......
114 137
sub _get_xmp_metadata {
115 138
  my ($doc) = @_;
116 139

  
117
  my $node = $doc->getValue($doc->getRootDict->{Metadata});
118
  if ($node && $node->{StreamData} && defined($node->{StreamData}->{value})) {
119
    return $node->{StreamData}->{value};
120
  }
121
  return undef;
140
  $doc->xmpMetadata;
122 141
}
123 142

  
124 143
sub extract_from_pdf {
125 144
  my ($self, $file_name) = @_;
126 145
  my @warnings;
127 146

  
128
  my $pdf_doc = CAM::PDF->new($file_name);
147
  my $pdf_doc = PDF::API2->openScalar($file_name);
129 148

  
130 149
  if (!$pdf_doc) {
131 150
    return {
......
200 219
  my %res;
201 220

  
202 221
  my $invoice_xml = SL::XMLInvoice->new($data);
203
  
222

  
204 223
  %res = (
205 224
    result       => $invoice_xml->{result},
206 225
    message      => $invoice_xml->{message},

Auch abrufbar als: Unified diff