/ - Diff - projekt kivitendo - wissen

« Zurück | Weiter »

Revision a827a37d

Von Moritz Bunkus vor fast 5 Jahren hinzugefügt

ID a827a37dbc9fda429719d199613a930200820ee6
Vorgänger dc78c225
Nachfolger 80cda5e6

SL::ZUGFeRD: Funktionen zum Extrahieren von ZUGFeRD-XML-Daten aus PDFs

       { name => "parent",                              url => "http://search.cpan.org/~corion/",    debian => 'libparent-perl' },
       { name => "Algorithm::CheckDigits",              url => "http://search.cpan.org/~mamawe/",    debian => 'libalgorithm-checkdigits-perl' },
       { name => "Archive::Zip",    version => '1.16',  url => "http://search.cpan.org/~phred/",     debian => 'libarchive-zip-perl' },
       { name => "CAM::PDF",                            url => "https://metacpan.org/pod/CAM::PDF",  debian => 'libcap-pdf-perl' },
       { name => "CGI",             version => '3.43',  url => "http://search.cpan.org/~leejo/",     debian => 'libcgi-pm-perl' }, # 4.09 is not core anymore (perl 5.20)
       { name => "Clone",                               url => "http://search.cpan.org/~rdf/",       debian => 'libclone-perl' },
       { name => "Config::Std",                         url => "http://search.cpan.org/~dconway/",   debian => 'libconfig-std-perl' },
-...
       { name => "Text::Iconv",     version => '1.2',   url => "http://search.cpan.org/~mpiotr/",    debian => 'libtext-iconv-perl' },
       { name => "Text::Unidecode",                     url => "http://search.cpan.org/~sburke/",    debian => 'libtext-unidecode-perl' },
       { name => "URI",             version => '1.35',  url => "http://search.cpan.org/~gaas/",      debian => 'liburi-perl' },
       { name => "XML::LibXML",                         url => "https://metacpan.org/pod/XML::LibXML", debian => 'libxml-libxml-perl' },
       { name => "XML::Writer",     version => '0.602', url => "http://search.cpan.org/~josephw/",   debian => 'libxml-writer-perl' },
       { name => "YAML",            version => '0.62',  url => "http://search.cpan.org/~ingy/",      debian => 'libyaml-perl' },
     );

     package SL::ZUGFeRD;
     use strict;
     use warnings;
     use utf8;
     use CAM::PDF;
     use Data::Dumper;
     use List::Util qw(first);
     use XML::LibXML;
     use constant RES_OK                              => 0;
     use constant RES_ERR_FILE_OPEN                   => 1;
     use constant RES_ERR_NO_XMP_METADATA             => 2;
     use constant RES_ERR_NO_XML_INVOICE              => 3;
     use constant RES_ERR_NOT_ZUGFERD                 => 4;
     use constant RES_ERR_UNSUPPORTED_ZUGFERD_VERSION => 5;
     sub _extract_zugferd_invoice_xml {
       my $doc        = shift;
       my $names_dict = $doc->getValue($doc->getRootDict->{Names}) or return {};
       my $files_tree = $names_dict->{EmbeddedFiles}               or return {};
       my @agenda     = $files_tree;
       my $ret        = {};
       # Hardly ever more than single leaf, but...
       while (@agenda) {
         my $item = $doc->getValue(shift @agenda);
         if ($item->{Kids}) {
           my $kids = $doc->getValue($item->{Kids});
           push @agenda, @$kids
         } else {
           my $nodes = $doc->getValue($item->{Names});
           my @names = map { $doc->getValue($_)} @$nodes;
           while (@names) {
             my ($k, $v)  = splice @names, 0, 2;
             my $ef_node  = $v->{EF};
             my $ef_dict  = $doc->getValue($ef_node);
             my $fnode    = (values %$ef_dict)[0];
             my $any_num  = $fnode->{value};
             my $obj_node = $doc->dereference($any_num);
             my $content  = $doc->decodeOne($obj_node->{value}, 0) // '';
             print "1\n";
             next if $content !~ m{<rsm:CrossIndustryInvoice};
             print "2\n";
             my $dom = eval { XML::LibXML->load_xml(string => $content) };
             return $content if $dom && ($dom->documentElement->nodeName eq 'rsm:CrossIndustryInvoice');
+          }
+        }
+      }
       return undef;
+    }
     sub _get_xmp_metadata {
       my ($doc) = @_;
       my $node = $doc->getValue($doc->getRootDict->{Metadata});
       if ($node && $node->{StreamData} && defined($node->{StreamData}->{value})) {
         return $node->{StreamData}->{value};
+      }
       return undef;
+    }
     sub extract_from_pdf {
       my ($self, $file_name) = @_;
       my $pdf_doc = CAM::PDF->new($file_name);
       if (!$pdf_doc) {
         return {
           result  => RES_ERR_FILE_OPEN(),
           message => $::locale->text('The file \'#1\' could not be opened for reading.', $file_name),
         };
+      }
       my $xmp = _get_xmp_metadata($pdf_doc);
       if (!defined $xmp) {
         return {
           result  => RES_ERR_NO_XMP_METADATA(),
           message => $::locale->text('The file \'#1\' does not contain the required XMP meta data.', $file_name),
         };
+      }
       my $bad = {
         result  => RES_ERR_NO_XMP_METADATA(),
         message => $::locale->text('Parsing the XMP metadata failed.'),
       };
       my $dom = eval { XML::LibXML->load_xml(string => $xmp) };
       return $bad if !$dom;
       my $xpc = XML::LibXML::XPathContext->new($dom);
       $xpc->registerNs('rdf', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#');
       my $zugferd_version;
       foreach my $node ($xpc->findnodes('/x:xmpmeta/rdf:RDF/rdf:Description')) {
         my $ns = first { ref($_) eq 'XML::LibXML::Namespace' } $node->attributes;
         next unless $ns;
         if ($ns->getData =~ m{urn:zugferd:pdfa:CrossIndustryDocument:invoice:2p0}) {
           $zugferd_version = '2p0';
           last;
+        }
         if ($ns->getData =~ m{zugferd}i) {
           $zugferd_version = 'unsupported';
           last;
+        }
+      }
       if (!$zugferd_version) {
         return {
           result  => RES_ERR_NOT_ZUGFERD(),
           message => $::locale->text('The XMP metadata does not declare the ZUGFeRD data.'),
         };
+      }
       if ($zugferd_version !~ m{^2p}) {
         return {
           result  => RES_ERR_UNSUPPORTED_ZUGFERD_VERSION(),
           message => $::locale->text('The ZUGFeRD version used is not supported.'),
         };
+      }
       my $invoice_xml = _extract_zugferd_invoice_xml($pdf_doc);
       if (!defined $invoice_xml) {
         return {
           result  => RES_ERR_NO_XML_INVOICE(),
           message => $::locale->text('The ZUGFeRD XML invoice was not found.'),
         };
+      }
       return {
         result       => RES_OK(),
         metadata_xmp => $xmp,
         invoice_xml  => $invoice_xml,
       };
+    }
 ;
     __END__
     =pod
     =encoding utf8
     =head1 NAME
     SL::ZUGFeRD - Helper functions for dealing with PDFs containing ZUGFeRD invoice data
     =head1 SYNOPSIS
         my $pdf  = '/path/to/my.pdf';
         my $info = SL::ZUGFeRD->extract_from_pdf($pdf);
         if ($info->{result} != SL::ZUGFeRD::RES_OK()) {
           # An error occurred; log message from parser:
           $::lxdebug->message(LXDebug::DEBUG1(), "Could not extract ZUGFeRD data from $pdf: " . $info->{message});
           return;
+        }
         # Parse & handle invoice XML:
         my $dom = XML::LibXML->load_xml(string => $info->{invoice_xml});
     =head1 FUNCTIONS
     =over 4
     =item C<extract_from_pdf> C<$file_name>
     Opens an existing PDF in the file system and tries to extract ZUGFeRD
     invoice data from it. First it'll parse the XMP metadata and look for
     the ZUGFeRD declaration inside. If the declaration isn't found or the
     declared version isn't 2p0, an error is returned.
     Otherwise it'll continue to look through all embedded files in the
     PDF. The first embedded XML file with a root node of
     C<rsm:CrossCountryInvoice> will be returnd.
     Always returns a hash ref containing the key C<result>, a number that
     can be one of the following constants:
     =over 4
     =item C<RES_OK> (0): parsing was OK; the returned hash will also
     contain the keys C<xmp_metadata> and C<invoice_xml> which will contain
     the XML text of the metadata & the ZUGFeRD invoice.
     =item C<RES_ERR_…> (all values E<gt> 0): parsing failed; the hash will
     also contain a key C<message> which contains a human-readable
     information about what exactly failed.
     =back
     =back
     =head1 BUGS
     Nothing here yet.
     =head1 AUTHOR
     Moritz Bunkus E<lt>m.bunkus@linet-services.deE<gt>
     =cut

Auch abrufbar als: Unified diff

Projekt

Allgemein

Profil

projekt kivitendo

Revision a827a37d

Von Moritz Bunkus vor fast 5 Jahren hinzugefügt