[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

r10417 - /man-cgi/extractor/manpage-extractor.pl



Author: jfs
Date: Sun May 25 13:30:16 2014
New Revision: 10417

URL: http://svn.debian.org/wsvn/?sc=1&rev=10417
Log:

- Add one option to retrieve the packages to extract from standard input (with the precise location
  in the pool)

- Generalise how information (INFO) is logged, add the option to provide a timestamp. So logs carry
  a timestamp of start/finish runs

- Do not move the whole contents of workdir, since this moves the *temporary* file itself too!
  (consequently, it does not get unlinked and removed from the system, leaving a lot of cruft)

- The --force option now removes the previous directory to ensure all the contents get replaced 
  without errors

- Use File::Path's remove_tree and make_path and manage errors properly in both situations, errors
  from mkpath (now obsolete) were not being handled properly


Modified:
    man-cgi/extractor/manpage-extractor.pl

Modified: man-cgi/extractor/manpage-extractor.pl
URL: http://svn.debian.org/wsvn/man-cgi/extractor/manpage-extractor.pl?rev=10417&op=diff
==============================================================================
--- man-cgi/extractor/manpage-extractor.pl	(original)
+++ man-cgi/extractor/manpage-extractor.pl	Sun May 25 13:30:16 2014
@@ -26,12 +26,13 @@
 use File::Basename;
 use Getopt::Long;
 use File::Temp qw/tempfile/;
-use File::Path;
+use File::Path qw(make_path remove_tree);
 
 # Options
 # -d - debug
 # -f - force extraction
-# -r - read list of packages from stdin
+# -r - read names of packages from stdin
+# -l - read list of package files from stdin
 # -o directory - Output directory (defaults to './manpages-files')
 # -w directory - Work directory (defaults to './work')
 # -a architecture - Only analyse binary packages of this arch
@@ -45,11 +46,13 @@
 my $debug = '';
 my $force = '';
 my $readinput = '';
+my $listinput = '';
 my $ARCHITECTURE = "i386";
 GetOptions ("output=s" => \$OUTPUTDIR,    # string
 		"workdir=s"   => \$WORKDIR,      # string
 		"architecture=s"   => \$ARCHITECTURE,      # string
 		"readinput"  => \$readinput,   # flag
+		"listinput"  => \$listinput,   # flag
 		"force"  => \$force,   # flag
 		"debug"  => \$debug)   # flag
          or die("Error in command line arguments\n");
@@ -84,6 +87,7 @@
 if ( $readinput ) {
 # Download packages and then extract
 	my $mirror = shift;
+	log_info("Starting extraction of selected packages.", 1);
 	while ( my $package = <STDIN> ) {
 		chomp($package);
 		# Obtaint a list of all packages
@@ -94,15 +98,32 @@
 			extract_package($file);
 		}
 		close PACK;
-		print "INFO: Finished extraction.\n";
+		log_info("Finished extraction.", 1);
+	}
+} elsif ( $listinput ) { 
+# Extract from packages provided
+	my $mirror = shift;
+	log_info("Starting extraction of selected packages.", 1);
+	while ( my $filename = <STDIN> ) {
+		chomp($filename);
+		print "DEBUG: Looking for $filename in $mirror\n" if $debug;
+		$filename = "/".$filename if ( $filename !~ /^\\/ ); # Add a separator if it does not exist
+		my $file = $mirror.$filename;
+		if ( -e "$file" ) {
+			extract_package($file);
+			log_info("Finished extraction.", 1);
+		} else {
+			print "ERROR: Cannot find file $file";
+		}	
 	}
 } else { 
 # Recursive call
 	foreach my $dir (@ARGV) {
 		if ( -d $dir ) {
-			print "INFO: Starting extraction of manpages in '$dir'\n";
+			my $timestamp = localtime(time);
+			log_info("Starting extraction of manpages in '$dir'", 1);
 			scan_directory($dir);
-			print "INFO: Finished extraction.\n";
+			log_info("Finished extraction.", 1);
 		} else {
 			print "ERROR: Will not extract manpages from '$dir', it is not a directory\n";
 		}
@@ -110,6 +131,18 @@
 }
 
 exit 0;
+
+# Log information with timestamp
+# TODO - generalise for ERROR and DEBUG
+sub log_info {
+	my ($message, $stamp) = @_;
+	$stamp = 0 if ! defined($stamp);
+	my $timestamp = localtime(time);
+	print "INFO: $message ";
+	print "- $timestamp" if $stamp eq 1;
+	print "\n";
+	return;
+}
 
 sub scan_directory  {
 	my ($dir) =@_;
@@ -158,7 +191,7 @@
 	# Note, this means that we will only analyse one binary package
 	# of all the different architectures available
 	if ( $arch ne $ARCHITECTURE && $arch ne "all" ) {
-		print "INFO: Skipping package file (architecture '$arch', we want '$ARCHITECTURE')\n" if $debug;
+		log_info("Skipping package file (architecture '$arch', we want '$ARCHITECTURE')\n") if $debug;
 		return 0;
 	}
 	if ( $EXTENSION eq "dsc" and $debfile =~ /^.*?_(.*?)\.$EXTENSION$/ ) {
@@ -174,16 +207,50 @@
 		$mandir = "${OUTPUTDIR}/${pooldir}/${packagename}";
 	}
 	if ( -e  $mandir ){
+	# Note: IF the directory contain any files, it means that either the package
+	# did not contain any files or that there was an error when extracting the manpages in
+	# previous runs
+	# TODO: Maybe its best to use an alternative mechanism to avoid going through
+	# the same package twice, like keeping an index of extracted packages in different
+	# runs and looking for the package there
 		if ( ! $force ) {
-			print "INFO: Skipping package $packagename (version '$version' already extracted)\n" if $debug;
+			log_info("Skipping package $packagename (version '$version' already extracted)\n") if $debug;
 			return 0;
 		} else {
-			print "INFO: Forcing overwritting of package $packagename (version '$version' already extracted)\n" if $debug;
-		}
-	}
-	mkpath "$mandir" || die ("Could not create $mandir: $!");
-
-	print "INFO: Extracting manpages of $packagename version '$version' in $mandir\n";
+			log_info("Forcing overwritting of package $packagename (version '$version' already extracted)\n") if $debug;
+# Remove mandir, it gets recreated again after its removal
+			my $result = remove_tree($mandir, {verbose => $debug, keep_root => 0, safe => 0, error => \my $err});
+			if (@$err) {
+				for my $diag (@$err) {
+					my ($file, $message) = %$diag;
+					if ($file eq '') {
+						print "ERROR: Error removing $mandir - general error: $message\n";
+					}
+					else {
+						print "ERROR: Error removing $mandir - problem unlinking $file: $message\n";
+					}
+				}
+			}
+		}
+	}
+
+# Creat the mandir
+	if ( make_path ("$mandir", {verbose => $debug, mode => 0755, error => \my $err}) == 0 ) {
+# No directory created, check why
+		if (@$err) {
+			for my $diag (@$err) {
+				my ($file, $message) = %$diag;
+				if ($file eq '') {
+					print "ERROR: Error creating $mandir - general error: $message\n";
+				}
+				else {
+					print "ERROR: Error creating $mandir - problem creating $file: $message\n";
+				}
+			}
+		}
+	}
+
+	log_info("Extracting manpages of $packagename version '$version' in $mandir\n");
 	# You can either do a search in the binary files:
 	if ( $EXTENSION eq "deb" ) {
 	    my $result =  extract_manpages($WORKDIR, $file, $mandir) ;
@@ -196,8 +263,10 @@
 		#    if (  -e "$mandir" ) {
 		#	    rmdir $mandir || die ("Could not remove $mandir: $!");
 		#    }
-		# Its best to keept it to prevent the script (when its rerun) to go through the same
-		# packages twice
+		# TODO: If the directory is kept it will  prevent the script (when its rerun) to go through the same
+		# packages twice due to the -e $mandir check above, however, this (needlessly) creates
+		# quite a few directories (one per manpage). It might be better to keep an index file
+		# of reviewed packages and check from there instead of by using empty directories
 	    }
 	}	
 	# Now we are done, cleanup
@@ -255,9 +324,9 @@
 		# If we have a directory then move all the files in it
 		# otherwise, we will return with an error 
 		if ( -e "$wdir/usr/" ) {
-			system "mv $wdir/* $dstdir" ;
+			system "mv $wdir/usr/ $dstdir" ;
 			if ( $? != 0 ) {
-				printf STDERR "Error moving directory $wdir to $dstdir: $?";
+				printf STDERR "Error moving directory $wdir/usr/ to $dstdir: $?";
 				$result = 1;
 			}  else {
 				# IF we got there everything worked fine and we have manpages in the archive
@@ -271,8 +340,8 @@
 	}
 
 # Clean up temporary files before returning
-	unlink $tempfile;
 	close $tempfileh; 
+	unlink $tempfile or warn "Could not unlink $tempfile: $!";
 # And return with our result
 	return $result;
 }


Reply to: