[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

r10319 - /man-cgi/extractor/manpage-extractor.pl



Author: jfs
Date: Tue Nov  5 00:26:01 2013
New Revision: 10319

URL: http://svn.debian.org/wsvn/?sc=1&rev=10319
Log:
- Use getopt long (more versatile) instead of the short version.
- Make sure we only recurse through directories, reject anything else provided
- Improve debug messages for some situations 
- Do not remove directories created for packages that do not have manpages. That way 
  the script skeeps them faster when (re)processing the archive
- Setup a proper template for the temporary files. Also, make sure that temporary files are created in the WORKDIR


Modified:
    man-cgi/extractor/manpage-extractor.pl

Modified: man-cgi/extractor/manpage-extractor.pl
URL: http://svn.debian.org/wsvn/man-cgi/extractor/manpage-extractor.pl?rev=10319&op=diff
==============================================================================
--- man-cgi/extractor/manpage-extractor.pl	(original)
+++ man-cgi/extractor/manpage-extractor.pl	Tue Nov  5 00:26:01 2013
@@ -24,9 +24,10 @@
 
 use strict;
 use File::Basename;
-use Getopt::Std;
+use Getopt::Long;
 use File::Temp qw/tempfile/;
 use File::Path;
+
 # Options
 # -d - debug
 # -f - force extraction
@@ -34,19 +35,33 @@
 # -o directory - Output directory (defaults to './manpages-files')
 # -w directory - Work directory (defaults to './work')
 # -a architecture - Only analyse binary packages of this arch
-getopts('rdo:w:a:');
-use vars qw/$opt_r $opt_f $opt_d $opt_o $opt_w $opt_a $opt_r/;
-
-if ( $#ARGV == -1  ) {
-# TODO: fix for -r
-    die "usage: $0 [-dfs] [-o dir] [-w dir] [-a arch] <package pool directory>\n";
-}
-
-# TODO: Use Getopt some more?
+
+
 my $pwd = `pwd`;
 chomp $pwd;
-my $OUTPUTDIR = $opt_o || $pwd."/manpages-files";
-my $WORKDIR = $opt_w || $pwd."/work";
+# Default values for options
+my $OUTPUTDIR = $pwd."/manpages-files";
+my $WORKDIR = $pwd."/work";
+my $debug = '';
+my $force = '';
+my $readinput = '';
+my $ARCHITECTURE = "i386";
+GetOptions ("output=s" => \$OUTPUTDIR,    # string
+		"workdir=s"   => \$WORKDIR,      # string
+		"architecture=s"   => \$ARCHITECTURE,      # string
+		"readinput"  => \$readinput,   # flag
+		"force"  => \$force,   # flag
+		"debug"  => \$debug)   # flag
+         or die("Error in command line arguments\n");
+
+if ( $#ARGV == -1  ) {
+    die "usage: $0 [-dfr] [-o dir] [-w dir] [-a arch] <package pool directory>\n";
+}
+
+
+# TODO: Use Getopt some more?
+
+
 # Are this relative to our path?
 if ($OUTPUTDIR !~ /^\// ) {
 	$OUTPUTDIR=$pwd."/".$OUTPUTDIR;
@@ -54,8 +69,8 @@
 if ($WORKDIR !~ /^\// ) {
 	$WORKDIR=$pwd."/".$WORKDIR;
 }
-print STDERR "DEBUG Setting workdir to $WORKDIR\n" if $opt_d;
-print STDERR "DEBUG Setting outputdir $OUTPUTDIR\n" if $opt_d;
+print STDERR "DEBUG: Setting workdir to $WORKDIR\n" if $debug;
+print STDERR "DEBUG: Setting outputdir $OUTPUTDIR\n" if $debug;
 
 # Create directories
 if (! -e $OUTPUTDIR) {
@@ -64,29 +79,33 @@
 if (! -e $WORKDIR) {
 	mkdir $WORKDIR || die ("Could not create $WORKDIR: $!");
 }
-my $EXTENSION = "deb"; # Default, do binaries
-
-if ( defined($opt_r) ) {
+my $EXTENSION = "deb"; # Default, do binary packages
+
+if ( $readinput ) {
 # Download packages and then extract
 	my $mirror = shift;
 	while ( my $package = <STDIN> ) {
 		chomp($package);
 		# Obtaint a list of all packages
-		print "Looking for package $package\n" if $opt_d;
-		open (PACK, '|', "find $mirror -name \"${package}_*${EXTENSION}\" -a -type f" );
+		print "DEBUG: Looking for package $package\n" if $debug;
+		open (PACK, "find $mirror -name \"${package}_*${EXTENSION}\" -a -type f |" );
 		while ( my $file = <PACK> ) {
 			chomp $file;
 			extract_package($file);
 		}
 		close PACK;
-		print "Finished extraction.\n";
+		print "INFO: Finished extraction.\n";
 	}
 } else { 
 # Recursive call
 	foreach my $dir (@ARGV) {
-		print "Starting extraction of manpages in '$dir'\n";
-		scan_directory($dir);
-		print "Finished extraction.\n";
+		if ( -d $dir ) {
+			print "INFO: Starting extraction of manpages in '$dir'\n";
+			scan_directory($dir);
+			print "INFO: Finished extraction.\n";
+		} else {
+			print "ERROR: Will not extract manpages from '$dir', it is not a directory\n";
+		}
 	}
 }
 
@@ -94,7 +113,7 @@
 
 sub scan_directory  {
 	my ($dir) =@_;
-	print "DEBUG: Scanning dir $dir\n" if $opt_d;
+	print "DEBUG: Scanning dir $dir\n" if $debug;
         my $DIRFH;
 
         opendir $DIRFH, $dir || warn ("Cannot open directory $dir: $!");
@@ -115,8 +134,11 @@
 # Extract a package to the working directory by making symlinks
 # to its files
 	my ($file) = @_;
-	print "DEBUG: Checking file $file\n" if $opt_d;
-	return if $file !~ /\.$EXTENSION$/;
+	print "DEBUG: Checking file $file\n" if $debug;
+	if ( $file !~ /\.$EXTENSION$/ ) {
+		print "DEBUG: Omitting file (not a package)\n" if $debug;
+		return 0;
+	}
 	# The file is a deb file, extract the name of the source files
 	my @sources;
 	my $basedir = dirname($file);
@@ -132,35 +154,46 @@
             chomp ( $version = `dpkg-deb -f "$file" Version` );
             chomp ( $arch = `dpkg-deb -f "$file" Architecture` );
 	} 
-	if ( defined($opt_a) and $arch ne $opt_a ) {
-		print "Skipping package $packagename (architecture '$arch')\n";
+
+	# Note, this means that we will only analyse one binary package
+	# of all the different architectures available
+	if ( $arch ne $ARCHITECTURE ) {
+		print "INFO: Skipping package file (architecture '$arch', we want '$ARCHITECTURE')\n" if $debug;
 		return 0;
 	}
 	if ( $EXTENSION eq "dsc" and $debfile =~ /^.*?_(.*?)\.$EXTENSION$/ ) {
 			$version = $1;
 	}
+
+	# Note, it might not be optimal to keep (forever) old versions of manpages
+	# maybe its best to add the distribution (sid, testing...) to the OUTPUTDIR
+	# and keep only one extracted package per release
 	if ( $version ne "undefined" ) {
 		$mandir = "${OUTPUTDIR}/${pooldir}/${packagename}_${version}";
 	} else {
 		$mandir = "${OUTPUTDIR}/${pooldir}/${packagename}";
 	}
-	if ( -e  $mandir && ! $opt_f) {
-	# Note, this means that we will only analyse one binary package
-	# of all the different architectures available
-		print "Skipping package $packagename (version '$version' already extracted)\n";
-		return 0;
+	if ( -e  $mandir ){
+		if ( ! $force ) {
+			print "INFO: Skipping package $packagename (version '$version' already extracted)\n" if $debug;
+			return 0;
+		} else {
+			print "INFO: Forcing overwritting of package $packagename (version '$version' already extracted)\n" if $debug;
+		}
 	}
 	mkpath "$mandir" || die ("Could not create $mandir: $!");
 
-	print "Extracting manpages of $packagename version '$version' in $mandir\n";
+	print "INFO: Extracting manpages of $packagename version '$version' in $mandir\n";
 	# You can either do a search in the binary files:
 	if ( $EXTENSION eq "deb" ) {
             if ( extract_manpages($WORKDIR, $file, $mandir) ) {
-            # Remove the directory, there were no manpages there
 		    print "WARNING: No manpages found.\n";
-		    if (  -e "$mandir" ) {
-			    rmdir $mandir || die ("Could not remove $mandir: $!");
-		    }
+       		# Optionally, remove the directory, there were no manpages there
+		#    if (  -e "$mandir" ) {
+		#	    rmdir $mandir || die ("Could not remove $mandir: $!");
+		#    }
+		# Its best to keept it to prevent the script (when its rerun) to go through the same
+		# packages twice
 	    }
 	}	
 	# Now we are done, cleanup
@@ -175,7 +208,7 @@
 	# Looks for manpages in the sources
 	my $result = 1;
 	# Temporary file for dpkg
-	my $tempfileh = new File::Temp ( Template => "TEMPLATE.XXXXXX", DIR => File::Spec->tmpdir, SUFFIX => ".suf" ) or  die "Cannot create temporary file: $!" ;
+	my $tempfileh = new File::Temp ( Template => "DPKG-DEB.XXXXXX", DIR => $WORKDIR, SUFFIX => ".tmp" ) or  die "Cannot create temporary file: $!" ;
 
 	my $tempfile = $tempfileh->filename;
 
@@ -185,18 +218,18 @@
 	system "$command";
 	if ( $? != 0 ) {
 		if ($? == -1) {
-			print STDERR "failed to execute: $!\n";
+			print STDERR "ERROR: failed to execute: $!\n";
 		} elsif ($? & 127) {
-			printf STDERR "child died with signal %d, %s coredump\n",
+			printf STDERR "ERROR: child died with signal %d, %s coredump\n",
 			       ($? & 127),  ($? & 128) ? 'with' : 'without';
 		} else {
-			printf STDERR "child exited with value %d\n", $? >> 8;
+			printf STDERR "ERROR: child exited with value %d\n", $? >> 8;
 		}
 		die "Error running '$command'";
 	}
 	$command="tar -C $wdir -xf $tempfile usr/share/man ./usr/share/man usr/X11R6/man ./usr/X11R6/man 2>/dev/null";
 	system "$command";
-	printf STDERR "tar exited with value %d\n", $? >> 8 if $? != 0 && $? != ( 2 << 8 );
+	printf STDERR "ERROR: tar exited with value %d\n", $? >> 8 if $? != 0 && $? != ( 2 << 8 );
 # Note we skip exit value '2' which happens when tar does not find any file according to specification
 
 # If we have a directory then move all the files in it


Reply to: