/SL/BackgroundJob/CreateOrUpdateFileFullTexts.pm - Annotieren - projekt kivitendo - wissen

| Zweig: | Markierung: | Revision:

kivitendo/SL/BackgroundJob/CreateOrUpdateFileFullTexts.pm @ 4a663bf8

-e721
+package SL::BackgroundJob::CreateOrUpdateFileFullTexts;
 use strict;
 use parent qw(SL::BackgroundJob::Base);
 use Encode qw(decode);
 use English qw( -no_match_vars );
 use File::Slurp qw(read_file);
 use List::MoreUtils qw(uniq);
 use IPC::Run qw();
 use Unicode::Normalize qw();
 use SL::DB::File;
 use SL::DB::FileFullText;
 use SL::HTML::Util;
 my %extractor_by_mime_type = (
   'application/pdf' => \&_pdf_to_strings,
   'text/html'       => \&_html_to_strings,
   'text/plain'      => \&_text_to_strings,
 );
 sub create_job {
-            eaa42cae
+  $_[0]->create_standard_job('20 3 * * *'); # # every day at 3:20 am
-e721
+            Bernd Bleßmann
+#
 # If job does not throw an error,
 # success in background_job_histories is 'success'.
 # It is 'failure' otherwise.
+#
 # return value goes to result in background_job_histories
+#
 sub run {
   my $self    = shift;
   my $db_obj  = shift;
   my $all_dbfiles = SL::DB::Manager::File->get_all;
   foreach my $dbfile (@$all_dbfiles) {
     next if $dbfile->full_text && (($dbfile->mtime || $dbfile->itime) <= ($dbfile->full_text->mtime || $dbfile->full_text->itime));
     next if !defined $extractor_by_mime_type{$dbfile->mime_type};
     my $file_name;
     if (!eval { $file_name = SL::File->get(dbfile => $dbfile)->get_file(); 1; }) {
       $::lxdebug->message(LXDebug::WARN(), "CreateOrUpdateFileFullTexts::run: get_file failed: " . $EVAL_ERROR);
       next;
+    }
     my $text = $extractor_by_mime_type{$dbfile->mime_type}->($file_name);
     if ($dbfile->full_text) {
       $dbfile->full_text->update_attributes(full_text => $text);
     } else {
       SL::DB::FileFullText->new(file => $dbfile, full_text => $text)->save;
+    }
+  }
   return 'ok';
+}
 sub _pdf_to_strings {
   my ($file_name) = @_;
   my   @cmd = qw(pdftotext -enc UTF-8);
   push @cmd,  $file_name;
   push @cmd,  '-';
   my ($txt, $err);
   IPC::Run::run \@cmd, \undef, \$txt, \$err;
   if ($CHILD_ERROR) {
     $::lxdebug->message(LXDebug::WARN(), "CreateOrUpdateFileFullTexts::_pdf_to_text failed for '$file_name': " . ($CHILD_ERROR >> 8) . ": " . $err);
     return '';
+  }
   $txt = Encode::decode('utf-8-strict', $txt);
   $txt =~ s{\r}{ }g;
   $txt =~ s{\p{WSpace}+}{ }g;
   $txt = Unicode::Normalize::normalize('C', $txt);
   $txt = join ' ' , uniq(split(' ', $txt));
   return $txt;
+}
 sub _html_to_strings {
   my ($file_name) = @_;
   my $txt = read_file($file_name);
   $txt = Encode::decode('utf-8-strict', $txt);
   $txt = SL::HTML::Util::strip($txt);
   $txt =~ s{\r}{ }g;
   $txt =~ s{\p{WSpace}+}{ }g;
   $txt = Unicode::Normalize::normalize('C', $txt);
   $txt = join ' ' , uniq(split(' ', $txt));
   return $txt;
+}
 sub _text_to_strings {
   my ($file_name) = @_;
   my $txt = read_file($file_name);
   $txt = Encode::decode('utf-8-strict', $txt);
   $txt =~ s{\r}{ }g;
   $txt =~ s{\p{WSpace}+}{ }g;
   $txt = Unicode::Normalize::normalize('C', $txt);
   $txt = join ' ' , uniq(split(' ', $txt));
   return $txt;
+}
 ;
-            af8f0a39
+            Bernd Bleßmann
 __END__
 =pod
 =encoding utf8
 =head1 NAME
 SL::BackgroundJob::CreateOrUpdateFileFullTexts - Extract text strings/words from
 files in the DMS for full text search.
 =head1 SYNOPSIS
 Search all documents in the files table and try to extract strings from them
 and store the strings in the database.
 Duplicate strings/words in one text are removed.
 Strings are updated if the change or creation time of the document is newer than
 the old entry.
 =head1 AUTHOR
 Bernd Bleßmann E<lt>bernd@kivitendo-premium.deE<gt>
 =cut

Projekt

Allgemein

Profil

projekt kivitendo