Projekt

Allgemein

Profil

Herunterladen (9,8 KB) Statistiken
| Zweig: | Markierung: | Revision:
a827a37d Moritz Bunkus
package SL::ZUGFeRD;

use strict;
use warnings;
use utf8;

81a5ba24 Sven Schöling
use PDF::API2;
a827a37d Moritz Bunkus
use Data::Dumper;
use List::Util qw(first);
use XML::LibXML;

ba40069b Moritz Bunkus
use SL::Locale::String qw(t8);
b525a340 Johannes Grassler
use SL::XMLInvoice;
ba40069b Moritz Bunkus
use parent qw(Exporter);
e2f0105f Moritz Bunkus
our @EXPORT_PROFILES = qw(PROFILE_FACTURX_EXTENDED PROFILE_XRECHNUNG);
ba40069b Moritz Bunkus
our @EXPORT_OK = (@EXPORT_PROFILES);
our %EXPORT_TAGS = (PROFILES => \@EXPORT_PROFILES);

use constant PROFILE_FACTURX_EXTENDED => 0;
e2f0105f Moritz Bunkus
use constant PROFILE_XRECHNUNG => 1;
ba40069b Moritz Bunkus
1522aeb7 Johannes Grassler
use constant RES_OK => 0;
use constant RES_ERR_FILE_OPEN => -1;
use constant RES_ERR_NO_ATTACHMENT => -2;
a827a37d Moritz Bunkus
ba40069b Moritz Bunkus
our @customer_settings = (
27cf8a41 Moritz Bunkus
[ 0, t8('Do not create Factur-X/ZUGFeRD invoices') ],
[ PROFILE_FACTURX_EXTENDED() * 2 + 1, t8('Create with profile \'Factur-X 1.01.06/ZUGFeRD 2.2 extended\'') ],
[ PROFILE_FACTURX_EXTENDED() * 2 + 2, t8('Create with profile \'Factur-X 1.01.06/ZUGFeRD 2.2 extended\' (test mode)') ],
[ PROFILE_XRECHNUNG() * 2 + 1, t8('Create with profile \'XRechnung 2.0.0\'') ],
[ PROFILE_XRECHNUNG() * 2 + 2, t8('Create with profile \'XRechnung 2.0.0\' (test mode)') ],
ba40069b Moritz Bunkus
);

sub convert_customer_setting {
my ($class, $customer_setting) = @_;

return () if ($customer_setting <= 0) || ($customer_setting >= scalar(@customer_settings));

return (
profile => int(($customer_setting - 1) / 2),
test_mode => ($customer_setting - 1) % 2,
);
}

a827a37d Moritz Bunkus
sub _extract_zugferd_invoice_xml {
my $doc = shift;
b525a340 Johannes Grassler
my %res_fail;

81a5ba24 Sven Schöling
# unfortunately PDF::API2 has no public facing api to access the actual pdf name dictionaries
# so we need to use the internal data, just like with PDF::CAM before
#
# PDF::API2 will internally read $doc->{pdf}{Root}{Names} for us, but after that every entry
# in the tree may be an indirect object (Objind) before realising it.
#
# The actual embedded files will be located at $doc->{pdf}{Root}{Names}{EmbeddedFiles}
#

my $node = $doc->{pdf};
for (qw(Root Names EmbeddedFiles)) {
$node = $node->{$_};
if (!ref $node) {
return {
result => RES_ERR_NO_ATTACHMENT(),
message => "unexpected unbless node while trying to access $_ node",
}
}
if ('PDF::API2::Basic::PDF::Objind' eq ref $node) {
$node->realise;
}
# after realising it should be a Dict
if ('PDF::API2::Basic::PDF::Dict' ne ref $node) {
return {
result => RES_ERR_NO_ATTACHMENT(),
message => "unexpected node type [@{[ref($node)]}] after realising $_ node",
}
}
}
b525a340 Johannes Grassler
81a5ba24 Sven Schöling
# now we have an array of possible attachments
my @agenda = $node;
b525a340 Johannes Grassler
my $parser; # SL::XMLInvoice object used as return value
my @res; # Temporary storage for error messages encountered during
# attempts to process attachments.
a827a37d Moritz Bunkus
# Hardly ever more than single leaf, but...

while (@agenda) {
81a5ba24 Sven Schöling
my $item = shift @agenda;
a827a37d Moritz Bunkus
if ($item->{Kids}) {
81a5ba24 Sven Schöling
my @kids = $item->{Kids}->realise->elements;
push @agenda, @kids;
a827a37d Moritz Bunkus
} else {
81a5ba24 Sven Schöling
my @names = $item->{Names}->realise->elements;
a827a37d Moritz Bunkus
81a5ba24 Sven Schöling
TRY_NEXT:
a827a37d Moritz Bunkus
while (@names) {
my ($k, $v) = splice @names, 0, 2;
81a5ba24 Sven Schöling
my $fnode = $v->realise->{EF}->realise->{F}->realise;

$fnode->read_stream(1);

my $content = $fnode->{' stream'};
a827a37d Moritz Bunkus
81a5ba24 Sven Schöling
$parser = SL::XMLInvoice->new($content);
b525a340 Johannes Grassler
# Caveat: this will only ever catch the first attachment looking like
# an XML invoice.
if ( $parser->{status} == SL::XMLInvoice::RES_OK ){
return $parser;
} else {
1522aeb7 Johannes Grassler
push @res, t8(
"Could not parse PDF embedded attachment #1: #2",
$k,
$parser->{result}
);
b525a340 Johannes Grassler
}
a827a37d Moritz Bunkus
}
}
}

b525a340 Johannes Grassler
# There's going to be at least one attachment that failed to parse as XML by
# this point - if there were no attachments at all, we would have bailed out
# a lot earlier.

1522aeb7 Johannes Grassler
%res_fail = (
1b8c96ac Johannes Grassler
result => RES_ERR_FILE_OPEN,
1522aeb7 Johannes Grassler
message => join("; ", @res),
b525a340 Johannes Grassler
);

return \%res_fail;
a827a37d Moritz Bunkus
}

sub _get_xmp_metadata {
my ($doc) = @_;

81a5ba24 Sven Schöling
$doc->xmpMetadata;
a827a37d Moritz Bunkus
}

sub extract_from_pdf {
my ($self, $file_name) = @_;
b525a340 Johannes Grassler
my @warnings;
a827a37d Moritz Bunkus
81a5ba24 Sven Schöling
my $pdf_doc = PDF::API2->openScalar($file_name);
a827a37d Moritz Bunkus
if (!$pdf_doc) {
1b8c96ac Johannes Grassler
return {
result => RES_ERR_FILE_OPEN,
a827a37d Moritz Bunkus
message => $::locale->text('The file \'#1\' could not be opened for reading.', $file_name),
};
}

my $xmp = _get_xmp_metadata($pdf_doc);
b525a340 Johannes Grassler
a827a37d Moritz Bunkus
if (!defined $xmp) {
b525a340 Johannes Grassler
push @warnings, $::locale->text('The file \'#1\' does not contain the required XMP meta data.', $file_name);
} else {
my $dom = eval { XML::LibXML->load_xml(string => $xmp) };
a827a37d Moritz Bunkus
b525a340 Johannes Grassler
push @warnings, $::locale->text('Parsing the XMP metadata failed.'), if !$dom;
a827a37d Moritz Bunkus
b525a340 Johannes Grassler
my $xpc = XML::LibXML::XPathContext->new($dom);
$xpc->registerNs('rdf', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#');
a827a37d Moritz Bunkus
b525a340 Johannes Grassler
my $zugferd_version;
a827a37d Moritz Bunkus
b525a340 Johannes Grassler
my $test = $xpc->findnodes('/x:xmpmeta/rdf:RDF/rdf:Description');
a827a37d Moritz Bunkus
b525a340 Johannes Grassler
foreach my $node ($xpc->findnodes('/x:xmpmeta/rdf:RDF/rdf:Description')) {
my $ns = first { ref($_) eq 'XML::LibXML::Namespace' } $node->attributes;
next unless $ns;
a827a37d Moritz Bunkus
b525a340 Johannes Grassler
if ($ns->getData =~ m{urn:zugferd:pdfa:CrossIndustryDocument:invoice:2p0}) {
$zugferd_version = 'zugferd:2p0';
last;
}
a827a37d Moritz Bunkus
b525a340 Johannes Grassler
if ($ns->getData =~ m{urn:factur-x:pdfa:CrossIndustryDocument:invoice:1p0}) {
$zugferd_version = 'factur-x:1p0';
last;
}
ba40069b Moritz Bunkus
b525a340 Johannes Grassler
if ($ns->getData =~ m{zugferd|factur-x}i) {
$zugferd_version = 'unsupported';
last;
}
a827a37d Moritz Bunkus
}

b525a340 Johannes Grassler
if (!$zugferd_version) {
push @warnings, $::locale->text('The XMP metadata does not declare the Factur-X/ZUGFeRD data.'),
a827a37d Moritz Bunkus
}

b525a340 Johannes Grassler
if ($zugferd_version eq 'unsupported') {
push @warnings, $::locale->text('The Factur-X/ZUGFeRD version used is not supported.'),
}
a827a37d Moritz Bunkus
}

my $invoice_xml = _extract_zugferd_invoice_xml($pdf_doc);

b525a340 Johannes Grassler
my %res;
a827a37d Moritz Bunkus
b525a340 Johannes Grassler
%res = (
result => $invoice_xml->{result},
message => $invoice_xml->{message},
a827a37d Moritz Bunkus
metadata_xmp => $xmp,
invoice_xml => $invoice_xml,
b525a340 Johannes Grassler
warnings => \@warnings,
);

return \%res;
}

sub extract_from_xml {
my ($self, $data) = @_;

my %res;

my $invoice_xml = SL::XMLInvoice->new($data);
81a5ba24 Sven Schöling
b525a340 Johannes Grassler
%res = (
result => $invoice_xml->{result},
message => $invoice_xml->{message},
metadata_xmp => undef,
invoice_xml => $invoice_xml,
warnings => (),
);

return \%res;
a827a37d Moritz Bunkus
}

1;

__END__

=pod

=encoding utf8

=head1 NAME

ba40069b Moritz Bunkus
SL::ZUGFeRD - Helper functions for dealing with PDFs containing Factur-X/ZUGFeRD invoice data
a827a37d Moritz Bunkus
=head1 SYNOPSIS

my $pdf = '/path/to/my.pdf';
my $info = SL::ZUGFeRD->extract_from_pdf($pdf);

b525a340 Johannes Grassler
my $xml = '<?xml version="1.0" encoding="UTF-8"?> ...';
my $info = SL::ZUGFeRD->extract_from_xml($xml);

a827a37d Moritz Bunkus
if ($info->{result} != SL::ZUGFeRD::RES_OK()) {
# An error occurred; log message from parser:
$::lxdebug->message(LXDebug::DEBUG1(), "Could not extract ZUGFeRD data from $pdf: " . $info->{message});
return;
}

b525a340 Johannes Grassler
# Access invoice XML data:
my $inv = ${$info}{'invoice_xml};
my %metadata = %{$inv->metadata};
my @items = @{$inv->items};
my $dom = $inv->dom;
a827a37d Moritz Bunkus

=head1 FUNCTIONS

b525a340 Johannes Grassler
=head2 extract_from_pdf E<lt>file_nameE<gt>
a827a37d Moritz Bunkus
b525a340 Johannes Grassler
Opens an existing PDF file in the file system and tries to extract
Factur-X/XRechnung/ZUGFeRD invoice data from it. First it'll parse the XMP
ba40069b Moritz Bunkus
metadata and look for the Factur-X/ZUGFeRD declaration inside. If the
b525a340 Johannes Grassler
declaration isn't found or the declared version isn't 2p0, an warning is
recorded in the returned data structure's C<warnings> key.
a827a37d Moritz Bunkus
b525a340 Johannes Grassler
Regardless of metadata presence, it will continue to iterate over all files
embedded in the PDF and attempt to parse them with SL::XMLInvoice. If it
succeeds, the first SL::XMLInvoice object that indicates successful parsing is
returned.
a827a37d Moritz Bunkus
Always returns a hash ref containing the key C<result>, a number that
can be one of the following constants:

=over 4

b525a340 Johannes Grassler
=item C<RES_OK> (0): parsing was OK.

ba845b8d Tamino Steinert
=item C<RES_ERR_…> (all values != 0): parsing failed. Values > 0 indicate a failure
b525a340 Johannes Grassler
in C<SL::XMLInvoice>, Values < 0 indicate a failure in C<SL::ZUGFeRD>.

=back

Other than that, the hash ref contains the following keys:

=over 4

=item C<message> - An error message detailing the problem upon nonzero C<result>, undef otherwise.

=item C<metadata_xmp> - The XMP metadata extracted from the Factur-X/ZUGFeRD invoice (if present)

=item C<invoice_xml> - An SL::XMLInvoice object holding the data extracted from the parsed XML invoice.

=item C<warnings> - Warnings encountered upon extracting/parsing XML files (if any)

=back

=head2 extract_from_xml E<lt>stringE<gt>

Takes a string containing an XML document with Factur-X/XRechnung/ZUGFeRD
invoice data and attempts to parse it using C<SL::XMLInvoice>.

If parsing is successful, an SL::XMLInvoice object containing the document's
parsed data is returned.

This method always returns a hash ref containing the key C<result>, a number that
can be one of the following constants:

=over 4

=item C<RES_OK> (0): parsing was OK.
a827a37d Moritz Bunkus
ba845b8d Tamino Steinert
=item C<RES_ERR_…> (all values != 0): parsing failed. Values > 0 indicate a failure
b525a340 Johannes Grassler
in C<SL::XMLInvoice>, Values < 0 indicate a failure in C<SL::ZUGFeRD>.
a827a37d Moritz Bunkus
=back

b525a340 Johannes Grassler
Other than that, the hash ref contains the following keys:

=over 4

=item C<message> - An error message detailing the problem upon nonzero C<result>, undef otherwise.

=item C<metadata_xmp> - Always undef and only present to let downstream code expecting its presence fail gracefully.

=item C<invoice_xml> - An SL::XMLInvoice object holding the data extracted from the parsed XML invoice.

=item C<warnings> - Warnings encountered upon extracting/parsing XML data (if any)

a827a37d Moritz Bunkus
=back

=head1 BUGS

Nothing here yet.

b525a340 Johannes Grassler
=head1 AUTHORS
a827a37d Moritz Bunkus
b525a340 Johannes Grassler
=over 4

=item Moritz Bunkus E<lt>m.bunkus@linet-services.deE<gt>

=item Johannes Graßler E<lt>info@computer-grassler.deE<gt>

=back
a827a37d Moritz Bunkus
=cut