#!@@perlbinary@@ -w use strict; # This is set to where Swish-e's "make install" installed the helper modules. use lib ('@@perlmoduledir@@'); use SWISH::Filter; =pod This is an example of how to use the SWISH::Filter module to filter documents using Swish-e's C feature. This will filter any number of document types, depending on what filter modules are installed. This program should typically only be used for the -S fs indexing method. For -S http the F program calls SWISH::Filter directly. And -S prog programs written in Perl can also make use of SWISH::Filter directly. In general, you will not want to filter with this program if you have a lot of files to filter. Running a perl program for many documents will be slow (due to the compiliation of the perl program). If you have many documents to convert with the -S fs method of indexing then consider using -S prog with F and use the SWISH::Filter module (see F). Swish-e configuration: FileFilter .pdf /path/to/swish_filter.pl FileFilter .doc /path/to/swish_filter.pl FileFilter .mp3 /path/to/swish_filter.pl IndexContents HTML2 .pdf .mp3 IndexContents TXT2 .doc Then when indexing those type of documents this program will attempt to filter (convert) them into a text format. See SWISH-CONFIG documentation on Filtering for more information. =cut my ( $work_path, $real_path ) = @ARGV; my $filter = SWISH::Filter->new; my $doc = $filter->convert( document => $work_path, name => $real_path, content_type => \$real_path, # use the real path to lookup the content type ); exit unless $doc; # empty doc, zero size, or no filters installed # Was the document converted by a filter? my $was_filtered = $doc->was_filtered; print STDERR $was_filtered ? " - Filtered: $real_path\n" : " - Not filtered: $real_path ($work_path)\n"; print $was_filtered ? ${ $doc->fetch_doc } : $real_path;