#!/usr/bin/env perl #!/usr/bin/perl -w use strict; use warnings; use FindBin qw'$Bin $RealBin'; use Template; #======================================================================= # Sat Jan 8 06:56:06 PST 2005 - moseley # # Script to build the swish-e web site and pod docs. Based on # script written by Peter Karman. Peter made it look too simple, so # I rewrote it to make it much more complex. # # The script exits true (zero) if ANY files are processed. # # Maintains a cache of table of contents from the pod files so that # only changes need to be updated (docs/index.html knows when to get generated) # # This roughly follows the examples in the Badger book, chapter 11. # # Questions/TODO # Should utime the output files to match timestamp of input? # Might be useful is reading from cvs or where someone's machine # might be off in time # # Currenly, the download dirs are defined in lib/config/site. # Means can't easily check to see if those pages need to be updated. # Since they don't change often, just do a -all run once a day (to catch # the swish-daily updates). # # -src sucks as a name. Is that the location of the source html files? # No, it's the top-level directory because it expects lib and src # to be subdirectories. # #======================================================================== $ENV{TZ} = 'UTC'; # Show all dates in UTC # List of pod files relative to INCLUDE_PATH # swish.css is needed for stand-alone html docs. use vars '$exit_value'; $exit_value = 1; # no files processed yet. my @pod_files = qw( pod/README.pod pod/INSTALL.pod pod/CHANGES.pod pod/SWISH-CONFIG.pod pod/SWISH-RUN.pod pod/SWISH-SEARCH.pod pod/SWISH-FAQ.pod pod/SWISH-BUGS.pod pod/SWISH-3.0.pod pod/SWISH-LIBRARY.pod perl/API.pm example/swish.cgi.in example/search.cgi.in prog-bin/spider.pl.in filters/SWISH/Filter.pm.in pod_toc/index.html swish.css ); my @src_dirs = File::Spec->splitdir( $RealBin ); pop @src_dirs; my $src_default = File::Spec->catfile( @src_dirs ); # my $dest_default = File::Spec->catfile( $src_default, 'public_html' ); # my $poddest_default = File::Spec->catfile( $dest_default, 'docs' ); my @argv = @ARGV; # save for later. my %default_paths = ( # relative to --root dest => 'public_html', indexes => 'indexes', swishsrc => 'swish_release_build/latest_swish_build/source', develsrc => 'swish_daily_build/latest_swish_build/source', download => 'distribution', daily => 'swish-daily', archive => 'archive', ); my %default_config = ( ignore => [ '.svn', '\b(CVS|RCS)\b', '^#', '\.gz$', '\.swp$'], copy => [ '\.(ico|pdf|gif|png|jpe?g|htaccess)$' ], include => ['lib' ], src => $src_default, websitesrc => 'src', # default website source directory relative to -src # added to INCLUDE_PATH web_site => 1, dev_site => 1, lists_site => 1, svn_site => 1, ); $SIG{__DIE__} = sub { print STDERR "@_"; print STDERR < = If set, will try and set all the following using defaults below the directory specified. See README. -all = don't check timestamps on destination files Will generate all files, not just ones with dates newer than the output files. -verbose = verbose -debug = eh, debuging output -dryrun = Don't write output file. If not using the default locations, these can be set specifically: -dest= = destination directory for website -indexes= = Path to directory where indexes are stored -swishsrc= = toplevel directory of swish-e source package -develsrc= = Directory of development. -download= = Path to directory where downloads are located -daily= = Path to directory where daily builds are located -archive= = Path to the hypermail archive -- Normally not needed -- -src= = top-level directory of website source Default: $default_config{src} \$src/src and \$src/lib are INCLUDE_PATH -poddest = where pods are stored (must set -swishsrc) Setting poddest will disable website generation -podonly = says do not write website files, only pod files (set automatically when -poddest != website dir -websitesrc = default dir where website src content is located Default: $default_config{websitesrc} Should never be used. -- options you don't need -- -copy = add regex to files that are just copies -include = add directories to the include path relative to -src unless absolute. -ignore = add regex of files to skip. -check = used by swish-e configure -- Apache config file generation - (Defaults are set in the template and may not match below) -apache = Only generate Apache configuration file. The file is sent to stdout. domain = Override the domain name [swish-e.org] module_dir = Location of Apache modules [modules] user = Apache user [apache] group = Apache group [apache] logs = Location of log files [logs] pid_file = Location of apache pid file [run/httpd.pid] ipaddr = IP address for Listen statement [*] port = Port to listen on [80] trac_doc_root = Path do DocumentRoot for trac [/opt/trac/htdocs] trac_env = Trac environment [/opt/trac] trac_password = Trac password file [/opt/svn/swish/conf/dav_svn.passwd] svn_repo = Path to Subversion Repository [/opt/svn/swish] web_site = Boolean to include website [true] dev_site = Boolean to include trac site [true] lists_site = Boolean to include mailman site [true] svn_site = Boolean to include svn repo [true] Script exits false (1) if no files are actually processed. Exits false even if -all -dryrun is used. Exits true is at least one file was processed. EOF exit 2; }; # This sucks because you cannot override arrays with Getopt::Long my @options = qw( src=s dest=s root=s swishsrc=s poddest=s verbose|v debug all|a ignore=s@ copy=s@ include=s podonly dryrun|n websitesrc=s abslinks develsrc=s indexes=s download=s daily=s check apache domain=s module_dir=s user=s group=s logs=s pid_file=s ipaddr=s port=i trac_doc_root=s trac_env=s trac_password=s svn_repo=s web_site! dev_site! lists_site! svn_site! ); my $config = LoadConfig->new( \%default_config, \@options ); if ( $config->{check} ) { print "a-ok\n"; exit(0); } # Now make the errors a bit more brief $SIG{__DIE__} = sub { print STDERR "\n$0:\n @_\n For Help:\n $0 --help\n"; exit 1; }; $SIG{__DIE__} = ''; # Set defaults if ( $config->{root} ) { my $root = File::Spec->rel2abs( $config->{root} ); for my $option ( keys %default_paths ) { next if $config->{$option}; my $dir = File::Spec->catfile( $root, $default_paths{$option} ); if ( -d $dir ) { $config->{$option} = $dir; } else { warn "Could not set option '--$option=$dir': $!\n"; } } } # The docs to go to the docs sub-directory by default $config->{poddest} = File::Spec->catfile( $config->{dest}, 'docs' ) if $config->{dest} && !$config->{poddest}; # Generate apache config if ( $config->{apache} ) { my $tt = Template->new( INCLUDE_PATH => File::Spec->catfile( $config->{src}, 'etc' ), ); my $stash = { %$config, script => $0, arguments => join( ' ', @argv ), }; my $template = $config->{template} || 'httpd.conf.tt'; $tt->process( $template, $stash ) || die $tt->error; exit 0; } # Build everything here my $generator = DocBuilder->new( $config ); # Generate the website if ( $config->dest && !$config->podonly ) { $generator->website; } elsif ( $config->verbose ) { if ( !$config->dest ) { warn "--dest not defined, not generating website\n"; } else { warn "--podonly set, not generating website\n"; } } # Generate the pods if ( $config->swishsrc ) { $generator->pods( \@pod_files, 'swishsrc' ); } elsif ( $config->verbose ) { warn "--swishsrc not defined, not generating release docs\n"; } # Generate the development docs if ( $config->dest && $config->develsrc && !$config->podonly ) { $generator->pods( \@pod_files, 'develsrc' ); } elsif ( $config->verbose ) { if ( !$config->dest ) { warn "--dest not defined, not generating development docs\n"; } elsif ( !$config->develsrc ) { warn "--develsrc not defined, not generating development docs\n"; } else { warn "--podonly defined, not generating development docs\n"; } } exit $exit_value; #============================================================================== # Package for containing config and template. # # This sets up and checks various paths and creates the template object # It sets include paths for where source files are found # It sets the OUTPUT_PATH to either the website or to where poddest is set # If poddest is set then only pods are written package DocBuilder; use strict; use warnings; use File::Spec; use File::Path; use File::Basename; use File::Copy; use Template; use Template::Constants qw( :debug ); use Storable; # cache sub new { my ( $class, $config ) = @_; $config->{verbose} = 1 if $config->debug; # Thise have defaults so this should not fail die "Must specify top-level web source directory with -src\n" unless $config->src; # Validate src directory my $topsrc_abs = File::Spec->rel2abs( $config->src ); die "src directory [$topsrc_abs] is not a directory\n" unless -d $topsrc_abs; warn "Source directory set to [$topsrc_abs]\n" if $config->verbose; # Setup for local plugins my $plugin_dir = File::Spec->catfile( $topsrc_abs, 'Plugin' ); unshift @INC, $plugin_dir; # Build include paths - reverse so -include=foo will be pre-pended to path my @includes = map { File::Spec->file_name_is_absolute( $_ ) ? $_ : File::Spec->rel2abs( $_, $topsrc_abs ) } reverse $config->websitesrc, @{ $config->include }; # Validate dest directory die "Must specify either -dest (or -root) or -poddest\n" unless $config->dest || $config->poddest; my $dest; if ( $config->dest ) { $dest = File::Spec->rel2abs( $config->dest ); mkdir $dest unless -e $dest; die "destination directory [$dest] does not exist\n" unless -e $dest; die "destination directory [$dest] is not a directory\n" unless -d $dest; die "destination directory [$dest] is not writable\n" unless -w $dest; die "Source and destination cannot be the same\n" if $dest eq $topsrc_abs; warn "Destination directory set to [$dest]\n" if $config->verbose; } # Set pod output location. Only used if -swishsrc specified my $poddest = File::Spec->rel2abs( $config->poddest ); # If the (website) dest directory is not set then assume we are # writing pods only. # Note: Used to check if the poddest was a sub-directory # of the dest directory, but that was when dest was automatically # set relative to $RealBin. unless ( $config->dest ) { $config->{podonly} = 1; # Don't write the web site $config->{abslinks} = 1; # Flag to generate absolute links on pod files $dest = $poddest; } # Now deal with -swishsrc for locating pod files if ( $config->swishsrc ) { my $swish = File::Spec->rel2abs( $config->swishsrc ); die "Swish source directory [$swish] is not a directory\n" unless -d $swish; die "Failed to find pod dir [$swish/pod]\n" unless -d "$swish/pod"; warn "Swish-e source directory set to [$swish]\n" if $config->verbose; warn "PODs will be written to [$poddest]\n" if $config->verbose; $config->{swishsrc} = $swish; push @includes, $swish; # so process() can find the pods (when it's done that way) } if ( $config->develsrc ) { my $develsrc = File::Spec->rel2abs( $config->develsrc ); die "Failed to find devel pod dir [$develsrc/pod]\n" unless -d "$develsrc/pod"; warn "Swish-e devel source directory set to [$develsrc]\n" if $config->verbose; $config->{develsrc} = $develsrc; } # Test the list indexes directory if ( $config->indexes ) { my $index = File::Spec->rel2abs( $config->indexes ); die "indexes setting of [$index] is not a directory\n" unless -d $index; $config->{indexes} = $index; } # Validate include directories. for ( @includes ) { die "include directory [$_] is not a directory\n" unless -d $_; warn "Adding template include (lib) directory: [$_]\n" if $config->verbose; } my $template = Template->new({ INCLUDE_PATH => \@includes, OUTPUT_PATH => $dest, PLUGIN_BASE => ['My'], # Fix for ignoring case in TT verson 2.15 when loading plugins PLUGINS => { POD => 'My::POD', }, PRE_PROCESS => 'config/main', WRAPPER => 'page/wrapper.tt', #PRE_CHOMP => 1, #POST_CHOMP => 1, RECURSION => 1, # DEBUG => $config->debug ? DEBUG_PROVIDER : 0, } ) || die Template->error; my $self = bless { topdir => $topsrc_abs, src_dir => File::Spec->rel2abs( $config->websitesrc, $topsrc_abs ), dest_dir => $dest, config => $config, template => $template, include_path => \@includes, # for updating the included path later plugin_dir => $plugin_dir, }, $class; $self->build_accessors; return $self; } sub build_accessors { my ( $self ) = @_; no strict 'refs'; for my $key ( keys %$self ) { *{$key} = sub { shift->{$key} }; } } #========================================================================= # pods() # This takes a list of input files relative to $self->swishsrc # and writes all files to one directory: $self->poddest or $self->dest/docs # # Currently, read the file into memory, convert to html and pass that to TT. # TT returns the doc and we write it to the destination ourself. # # The plan is to add $config->swishscr to INCLUDE_PATH and use a plugin # to parse the pod and generate a table of contents and navigation. # # Start off using Peter's existing work. Much easier that way... #------------------------------------------------------------------------ # sub pods { my ( $self, $pod_files, $key ) = @_; # Add source directory to INCLUDE_PATH my $src_dir = $config->$key; my $include_path = $self->include_path; unshift @$include_path, $src_dir; # Fetch toc cache from disk my $cache_file = File::Spec->catfile( $self->topdir, 'toc_cache.storable' ); my $toc_cache; eval { $toc_cache = retrieve( $cache_file ) }; warn "Cache file [$cache_file] not found\n" if $@; $toc_cache = {} unless ref $toc_cache eq 'HASH'; # Create sub-cache for this source, if doesn't exist # When processing a pod file the POD plugin will update its own cache entry # in the hash. Cache by source directory my $this_toc_cache = $toc_cache->{$src_dir} ||= {}; # set where the docs should be written my $out_prefix = $key eq 'swishsrc' ? 'docs' : 'devel/devel_docs'; my $version = $self->get_swish_version( $src_dir ); my $update_index = 0; for my $in_file ( @$pod_files ) { my $out_file = basename( $in_file ); # Change pods to .html my $doc_type = ''; if ( $out_file =~ /\.(pod|pm|pl|cgi)/ ) { $doc_type = 'pod'; ($out_file) = map { s/\.in$//; s/\.(pod|pm|pl)$//; lc($_).'.html' } ($out_file); } # Force processing of every pod if there's not cache entry for it. # And force index.html if any pods were processed # Note that '$in_file' ends up as template.name and is key used by POD plugin my $all = $config->all || 0; $config->{all} = 1 if $out_file eq 'index.html' && $update_index; $config->{all} = 1 if $doc_type eq 'pod' && !$this_toc_cache->{$in_file}; # Set destination directory my $abslinks = $self->config->abslinks; $out_file = "$out_prefix/$out_file" unless $abslinks; # Now process it as a template my $vars = { this => { type => $doc_type, page_id => basename( $out_file ), podfile => 1, abslinks => $abslinks, swish_version => $version, out_file => $out_file, }, mode => 0644, toc_cache => $this_toc_cache, # processing a pod updates this hash pod_files => $pod_files, # this is used to generate the toc }; my $processed = $self->process_file( $in_file, $out_file, $vars ); $update_index++ if $doc_type eq 'pod' && $processed; # need to update the index # if any pod files processed # might be better for POD plugin to set flag $config->{all} = $all; # reset (guess I need a set method) } # Write out cache store $toc_cache, $cache_file; shift @$include_path; # remove source dir. # Check all the L<> links My::Pod::View::HTML::validate_links($config) if $update_index; # and $config->verbose; } #======================================================================= sub get_swish_version { my ( $self, $dir ) = @_; my $version; # look for swish-config my $ver; for ( "$dir/swish-config", "$dir/../install/bin/swish-config", # daily build location ) { if ( -x ) { $ver = `$_ --version`; chomp $ver; warn "Found version [$ver] from program $_\n" if $self->config->verbose; return $ver if $ver; } } # Now parse from configure my $configure = "$dir/configure"; die "$configure not found\n" unless -f $configure; my %version; open FH, "<$configure" or die "Failed to open $configure: $!"; while ( ) { next unless /(MAJOR|MINOR|MICRO)_VERSION=(\d+)/; $version{$1} = $2; } die "Failed to find version in $configure file\n" unless 3 == keys %version; $ver = "$version{MAJOR}.$version{MINOR}.$version{MICRO}"; warn "Found version [$ver] from file $configure\n" if $self->config->verbose; return $ver; } #======================================================================= sub logfile { my ( $self, $type, $name, $length ) = @_; return unless $self->config->dryrun || $self->config->verbose || $self->config->debug; my $msg = $self->config->dryrun ? 'Dry Run: ' : ''; warn sprintf("${msg}[%-7s] %-40s %10d Bytes\n", $type, $name, $length ); } #========================================================================= # generate_website() # Recurses $topsrc/src generating output to $dest # output files match perms on input file # # $sub_dir is the directory relative to $self->src_dir #-------------------------------------------------------------------------- sub website { my ( $self, $sub_dir ) = @_; $sub_dir ||=''; #make File::Spec happy my ( @files, @dirs ); # Create full path to source directory my $curdir = File::Spec->rel2abs( $sub_dir, $self->src_dir ); warn "Entering directory [$curdir]\n" if $self->config->debug; opendir( DIR, $curdir ) || die "Failed to open dir [$curdir]\n"; # Grab all files and directories while ( my $file = readdir(DIR) ) { next if $file eq '..' || $file eq '.'; next if $config->test_ignore( $file ); # ignore this file? # Set full path to file my $path = File::Spec->catfile( $curdir, $file ); # Set path relative to $self->src_dir my $rel_path = $sub_dir ? File::Spec->catfile( $sub_dir, $file ) : $file; if ( -d $path ) { push @dirs, $rel_path; } else { # $$$ check for -f here? push @files, $rel_path; } } close DIR; $self->process_file( $_, $_ ) for @files; $self->website( $_ ) for @dirs; # Recurse } #========================================================================= # process an input file # pass in: # $in_file - input file relative to INCLUDE_PATH # $out_file - output file relative to OUTPUT_PATH # $options - hash ref of options to add to $vars # # returns: # true if processed the file (didn't skip for some reason) # # Checks that input file is newer than output file #------------------------------------------------------------------------- sub process_file { my ( $self, $in_file, $out_file, $options ) = @_; $options ||= {}; # Locate the input path in the INCLUDE_PATH my $inpath = $self->find_in_include( $in_file ); die "Failed to find input path for [$in_file]. Aborting.\n" unless $inpath; my $outpath = File::Spec->rel2abs( $out_file, $self->dest_dir ); return unless $self->is_newer( $inpath, $outpath ); unless ( -d dirname( $outpath ) ){ mkpath( dirname( $outpath )) unless $self->config->dryrun; } warn "$inpath->$outpath\n" if $self->config->debug; # Check if only need to copy if ( $self->config->test_copy( $inpath ) ) { $self->logfile('Copy', $in_file, (stat $inpath)[7]); return if $self->config->dryrun; $main::exit_value = 0; copy( $inpath, $outpath ); chmod( ( stat $inpath)[2] , $outpath ); return; } # Set the path to the top from this current file my $updir = dirname( $out_file ) eq '.' ? '.' : File::Spec->catdir( map { '..' } File::Spec->splitdir( dirname($out_file))); # So we can have relative paths to the toplevel dir my %vars = ( rooturl => $updir, # This is the prefix to get to the top-level self => $self, # so cgi script know INCLUDE_PATH and dest_dir ); my $vars = { %vars, %$options }; $vars->{this}{file} = $out_file; my $capture = ''; my $output = $self->config->dryrun ? \$capture : $out_file; $self->template->process( $in_file, $vars, $output ) || warn $self->template->error . "\n"; $self->logfile('Process', $in_file, length $capture || (stat $outpath)[7]); return 1 if $self->config->dryrun; $main::exit_value = 0; my $mode = $options->{mode} || ( stat $inpath)[2]; chmod( $mode , $outpath ); return 1; # return true if processed a file } #======================================================================= # Find a relative file in INCLUDE_PATH and return it. #----------------------------------------------------------------------- sub find_in_include { my ( $self, $file ) = @_; for my $dir ( @{$self->include_path} ) { my $path = File::Spec->rel2abs( $file, $dir ); warn "Looking for [$path]\n" if $self->config->debug; return $path if -f $path; } return; } #---------------------------------------------- # Compare to file dates sub is_newer { my ( $self, $in_file, $out_file ) = @_; # Check for newer file if ( !$self->config->all && !newer( $in_file, $out_file ) ) { warn "Skipping $in_file is not newer than $out_file\n" if $self->config->debug; return; } return 1; } sub newer { my ( $source, $dest ) = @_; my $source_date = ( stat $source )[9] || 0; my $dest_date = ( stat $dest)[9] || 0; return $source_date > $dest_date; } package LoadConfig; use warnings; use strict; use Getopt::Long; use File::Basename; sub new { my ( $class, $config, $options ) = @_; GetOptions( $config, @$options ) || die "Error parsing options\n"; my $self = bless $config, $class; # Create accessors # no strict 'refs'; for (@$options ) { next unless /(^[a-zA-Z_]+)/; my $method = $1; *{$method} = sub { return shift->{$method} || undef }; } return $self; } # Return true if matches sub test_array { my ( $self, $file, $type ) = @_; $file = basename( $file ); my $array = $self->$type; return 1 unless $array; die "type is not an array" unless ref $array eq 'ARRAY'; for ( @$array ) { if ( $file =~ /$_/ ) { warn "File [$file] matched $type [$_]\n" if $self->debug; return 1; } } return 0; } sub test_ignore { shift->test_array( shift, 'ignore' ) }; sub test_copy { shift->test_array( shift, 'copy' ) };