r10319 - /man-cgi/extractor/manpage-extractor.pl
Author: jfs
Date: Tue Nov 5 00:26:01 2013
New Revision: 10319
URL: http://svn.debian.org/wsvn/?sc=1&rev=10319
Log:
- Use getopt long (more versatile) instead of the short version.
- Make sure we only recurse through directories, reject anything else provided
- Improve debug messages for some situations
- Do not remove directories created for packages that do not have manpages. That way
the script skeeps them faster when (re)processing the archive
- Setup a proper template for the temporary files. Also, make sure that temporary files are created in the WORKDIR
Modified:
man-cgi/extractor/manpage-extractor.pl
Modified: man-cgi/extractor/manpage-extractor.pl
URL: http://svn.debian.org/wsvn/man-cgi/extractor/manpage-extractor.pl?rev=10319&op=diff
==============================================================================
--- man-cgi/extractor/manpage-extractor.pl (original)
+++ man-cgi/extractor/manpage-extractor.pl Tue Nov 5 00:26:01 2013
@@ -24,9 +24,10 @@
use strict;
use File::Basename;
-use Getopt::Std;
+use Getopt::Long;
use File::Temp qw/tempfile/;
use File::Path;
+
# Options
# -d - debug
# -f - force extraction
@@ -34,19 +35,33 @@
# -o directory - Output directory (defaults to './manpages-files')
# -w directory - Work directory (defaults to './work')
# -a architecture - Only analyse binary packages of this arch
-getopts('rdo:w:a:');
-use vars qw/$opt_r $opt_f $opt_d $opt_o $opt_w $opt_a $opt_r/;
-
-if ( $#ARGV == -1 ) {
-# TODO: fix for -r
- die "usage: $0 [-dfs] [-o dir] [-w dir] [-a arch] <package pool directory>\n";
-}
-
-# TODO: Use Getopt some more?
+
+
my $pwd = `pwd`;
chomp $pwd;
-my $OUTPUTDIR = $opt_o || $pwd."/manpages-files";
-my $WORKDIR = $opt_w || $pwd."/work";
+# Default values for options
+my $OUTPUTDIR = $pwd."/manpages-files";
+my $WORKDIR = $pwd."/work";
+my $debug = '';
+my $force = '';
+my $readinput = '';
+my $ARCHITECTURE = "i386";
+GetOptions ("output=s" => \$OUTPUTDIR, # string
+ "workdir=s" => \$WORKDIR, # string
+ "architecture=s" => \$ARCHITECTURE, # string
+ "readinput" => \$readinput, # flag
+ "force" => \$force, # flag
+ "debug" => \$debug) # flag
+ or die("Error in command line arguments\n");
+
+if ( $#ARGV == -1 ) {
+ die "usage: $0 [-dfr] [-o dir] [-w dir] [-a arch] <package pool directory>\n";
+}
+
+
+# TODO: Use Getopt some more?
+
+
# Are this relative to our path?
if ($OUTPUTDIR !~ /^\// ) {
$OUTPUTDIR=$pwd."/".$OUTPUTDIR;
@@ -54,8 +69,8 @@
if ($WORKDIR !~ /^\// ) {
$WORKDIR=$pwd."/".$WORKDIR;
}
-print STDERR "DEBUG Setting workdir to $WORKDIR\n" if $opt_d;
-print STDERR "DEBUG Setting outputdir $OUTPUTDIR\n" if $opt_d;
+print STDERR "DEBUG: Setting workdir to $WORKDIR\n" if $debug;
+print STDERR "DEBUG: Setting outputdir $OUTPUTDIR\n" if $debug;
# Create directories
if (! -e $OUTPUTDIR) {
@@ -64,29 +79,33 @@
if (! -e $WORKDIR) {
mkdir $WORKDIR || die ("Could not create $WORKDIR: $!");
}
-my $EXTENSION = "deb"; # Default, do binaries
-
-if ( defined($opt_r) ) {
+my $EXTENSION = "deb"; # Default, do binary packages
+
+if ( $readinput ) {
# Download packages and then extract
my $mirror = shift;
while ( my $package = <STDIN> ) {
chomp($package);
# Obtaint a list of all packages
- print "Looking for package $package\n" if $opt_d;
- open (PACK, '|', "find $mirror -name \"${package}_*${EXTENSION}\" -a -type f" );
+ print "DEBUG: Looking for package $package\n" if $debug;
+ open (PACK, "find $mirror -name \"${package}_*${EXTENSION}\" -a -type f |" );
while ( my $file = <PACK> ) {
chomp $file;
extract_package($file);
}
close PACK;
- print "Finished extraction.\n";
+ print "INFO: Finished extraction.\n";
}
} else {
# Recursive call
foreach my $dir (@ARGV) {
- print "Starting extraction of manpages in '$dir'\n";
- scan_directory($dir);
- print "Finished extraction.\n";
+ if ( -d $dir ) {
+ print "INFO: Starting extraction of manpages in '$dir'\n";
+ scan_directory($dir);
+ print "INFO: Finished extraction.\n";
+ } else {
+ print "ERROR: Will not extract manpages from '$dir', it is not a directory\n";
+ }
}
}
@@ -94,7 +113,7 @@
sub scan_directory {
my ($dir) =@_;
- print "DEBUG: Scanning dir $dir\n" if $opt_d;
+ print "DEBUG: Scanning dir $dir\n" if $debug;
my $DIRFH;
opendir $DIRFH, $dir || warn ("Cannot open directory $dir: $!");
@@ -115,8 +134,11 @@
# Extract a package to the working directory by making symlinks
# to its files
my ($file) = @_;
- print "DEBUG: Checking file $file\n" if $opt_d;
- return if $file !~ /\.$EXTENSION$/;
+ print "DEBUG: Checking file $file\n" if $debug;
+ if ( $file !~ /\.$EXTENSION$/ ) {
+ print "DEBUG: Omitting file (not a package)\n" if $debug;
+ return 0;
+ }
# The file is a deb file, extract the name of the source files
my @sources;
my $basedir = dirname($file);
@@ -132,35 +154,46 @@
chomp ( $version = `dpkg-deb -f "$file" Version` );
chomp ( $arch = `dpkg-deb -f "$file" Architecture` );
}
- if ( defined($opt_a) and $arch ne $opt_a ) {
- print "Skipping package $packagename (architecture '$arch')\n";
+
+ # Note, this means that we will only analyse one binary package
+ # of all the different architectures available
+ if ( $arch ne $ARCHITECTURE ) {
+ print "INFO: Skipping package file (architecture '$arch', we want '$ARCHITECTURE')\n" if $debug;
return 0;
}
if ( $EXTENSION eq "dsc" and $debfile =~ /^.*?_(.*?)\.$EXTENSION$/ ) {
$version = $1;
}
+
+ # Note, it might not be optimal to keep (forever) old versions of manpages
+ # maybe its best to add the distribution (sid, testing...) to the OUTPUTDIR
+ # and keep only one extracted package per release
if ( $version ne "undefined" ) {
$mandir = "${OUTPUTDIR}/${pooldir}/${packagename}_${version}";
} else {
$mandir = "${OUTPUTDIR}/${pooldir}/${packagename}";
}
- if ( -e $mandir && ! $opt_f) {
- # Note, this means that we will only analyse one binary package
- # of all the different architectures available
- print "Skipping package $packagename (version '$version' already extracted)\n";
- return 0;
+ if ( -e $mandir ){
+ if ( ! $force ) {
+ print "INFO: Skipping package $packagename (version '$version' already extracted)\n" if $debug;
+ return 0;
+ } else {
+ print "INFO: Forcing overwritting of package $packagename (version '$version' already extracted)\n" if $debug;
+ }
}
mkpath "$mandir" || die ("Could not create $mandir: $!");
- print "Extracting manpages of $packagename version '$version' in $mandir\n";
+ print "INFO: Extracting manpages of $packagename version '$version' in $mandir\n";
# You can either do a search in the binary files:
if ( $EXTENSION eq "deb" ) {
if ( extract_manpages($WORKDIR, $file, $mandir) ) {
- # Remove the directory, there were no manpages there
print "WARNING: No manpages found.\n";
- if ( -e "$mandir" ) {
- rmdir $mandir || die ("Could not remove $mandir: $!");
- }
+ # Optionally, remove the directory, there were no manpages there
+ # if ( -e "$mandir" ) {
+ # rmdir $mandir || die ("Could not remove $mandir: $!");
+ # }
+ # Its best to keept it to prevent the script (when its rerun) to go through the same
+ # packages twice
}
}
# Now we are done, cleanup
@@ -175,7 +208,7 @@
# Looks for manpages in the sources
my $result = 1;
# Temporary file for dpkg
- my $tempfileh = new File::Temp ( Template => "TEMPLATE.XXXXXX", DIR => File::Spec->tmpdir, SUFFIX => ".suf" ) or die "Cannot create temporary file: $!" ;
+ my $tempfileh = new File::Temp ( Template => "DPKG-DEB.XXXXXX", DIR => $WORKDIR, SUFFIX => ".tmp" ) or die "Cannot create temporary file: $!" ;
my $tempfile = $tempfileh->filename;
@@ -185,18 +218,18 @@
system "$command";
if ( $? != 0 ) {
if ($? == -1) {
- print STDERR "failed to execute: $!\n";
+ print STDERR "ERROR: failed to execute: $!\n";
} elsif ($? & 127) {
- printf STDERR "child died with signal %d, %s coredump\n",
+ printf STDERR "ERROR: child died with signal %d, %s coredump\n",
($? & 127), ($? & 128) ? 'with' : 'without';
} else {
- printf STDERR "child exited with value %d\n", $? >> 8;
+ printf STDERR "ERROR: child exited with value %d\n", $? >> 8;
}
die "Error running '$command'";
}
$command="tar -C $wdir -xf $tempfile usr/share/man ./usr/share/man usr/X11R6/man ./usr/X11R6/man 2>/dev/null";
system "$command";
- printf STDERR "tar exited with value %d\n", $? >> 8 if $? != 0 && $? != ( 2 << 8 );
+ printf STDERR "ERROR: tar exited with value %d\n", $? >> 8 if $? != 0 && $? != ( 2 << 8 );
# Note we skip exit value '2' which happens when tar does not find any file according to specification
# If we have a directory then move all the files in it
Reply to: