r10417 - /man-cgi/extractor/manpage-extractor.pl
Author: jfs
Date: Sun May 25 13:30:16 2014
New Revision: 10417
URL: http://svn.debian.org/wsvn/?sc=1&rev=10417
Log:
- Add one option to retrieve the packages to extract from standard input (with the precise location
in the pool)
- Generalise how information (INFO) is logged, add the option to provide a timestamp. So logs carry
a timestamp of start/finish runs
- Do not move the whole contents of workdir, since this moves the *temporary* file itself too!
(consequently, it does not get unlinked and removed from the system, leaving a lot of cruft)
- The --force option now removes the previous directory to ensure all the contents get replaced
without errors
- Use File::Path's remove_tree and make_path and manage errors properly in both situations, errors
from mkpath (now obsolete) were not being handled properly
Modified:
man-cgi/extractor/manpage-extractor.pl
Modified: man-cgi/extractor/manpage-extractor.pl
URL: http://svn.debian.org/wsvn/man-cgi/extractor/manpage-extractor.pl?rev=10417&op=diff
==============================================================================
--- man-cgi/extractor/manpage-extractor.pl (original)
+++ man-cgi/extractor/manpage-extractor.pl Sun May 25 13:30:16 2014
@@ -26,12 +26,13 @@
use File::Basename;
use Getopt::Long;
use File::Temp qw/tempfile/;
-use File::Path;
+use File::Path qw(make_path remove_tree);
# Options
# -d - debug
# -f - force extraction
-# -r - read list of packages from stdin
+# -r - read names of packages from stdin
+# -l - read list of package files from stdin
# -o directory - Output directory (defaults to './manpages-files')
# -w directory - Work directory (defaults to './work')
# -a architecture - Only analyse binary packages of this arch
@@ -45,11 +46,13 @@
my $debug = '';
my $force = '';
my $readinput = '';
+my $listinput = '';
my $ARCHITECTURE = "i386";
GetOptions ("output=s" => \$OUTPUTDIR, # string
"workdir=s" => \$WORKDIR, # string
"architecture=s" => \$ARCHITECTURE, # string
"readinput" => \$readinput, # flag
+ "listinput" => \$listinput, # flag
"force" => \$force, # flag
"debug" => \$debug) # flag
or die("Error in command line arguments\n");
@@ -84,6 +87,7 @@
if ( $readinput ) {
# Download packages and then extract
my $mirror = shift;
+ log_info("Starting extraction of selected packages.", 1);
while ( my $package = <STDIN> ) {
chomp($package);
# Obtaint a list of all packages
@@ -94,15 +98,32 @@
extract_package($file);
}
close PACK;
- print "INFO: Finished extraction.\n";
+ log_info("Finished extraction.", 1);
+ }
+} elsif ( $listinput ) {
+# Extract from packages provided
+ my $mirror = shift;
+ log_info("Starting extraction of selected packages.", 1);
+ while ( my $filename = <STDIN> ) {
+ chomp($filename);
+ print "DEBUG: Looking for $filename in $mirror\n" if $debug;
+ $filename = "/".$filename if ( $filename !~ /^\\/ ); # Add a separator if it does not exist
+ my $file = $mirror.$filename;
+ if ( -e "$file" ) {
+ extract_package($file);
+ log_info("Finished extraction.", 1);
+ } else {
+ print "ERROR: Cannot find file $file";
+ }
}
} else {
# Recursive call
foreach my $dir (@ARGV) {
if ( -d $dir ) {
- print "INFO: Starting extraction of manpages in '$dir'\n";
+ my $timestamp = localtime(time);
+ log_info("Starting extraction of manpages in '$dir'", 1);
scan_directory($dir);
- print "INFO: Finished extraction.\n";
+ log_info("Finished extraction.", 1);
} else {
print "ERROR: Will not extract manpages from '$dir', it is not a directory\n";
}
@@ -110,6 +131,18 @@
}
exit 0;
+
+# Log information with timestamp
+# TODO - generalise for ERROR and DEBUG
+sub log_info {
+ my ($message, $stamp) = @_;
+ $stamp = 0 if ! defined($stamp);
+ my $timestamp = localtime(time);
+ print "INFO: $message ";
+ print "- $timestamp" if $stamp eq 1;
+ print "\n";
+ return;
+}
sub scan_directory {
my ($dir) =@_;
@@ -158,7 +191,7 @@
# Note, this means that we will only analyse one binary package
# of all the different architectures available
if ( $arch ne $ARCHITECTURE && $arch ne "all" ) {
- print "INFO: Skipping package file (architecture '$arch', we want '$ARCHITECTURE')\n" if $debug;
+ log_info("Skipping package file (architecture '$arch', we want '$ARCHITECTURE')\n") if $debug;
return 0;
}
if ( $EXTENSION eq "dsc" and $debfile =~ /^.*?_(.*?)\.$EXTENSION$/ ) {
@@ -174,16 +207,50 @@
$mandir = "${OUTPUTDIR}/${pooldir}/${packagename}";
}
if ( -e $mandir ){
+ # Note: IF the directory contain any files, it means that either the package
+ # did not contain any files or that there was an error when extracting the manpages in
+ # previous runs
+ # TODO: Maybe its best to use an alternative mechanism to avoid going through
+ # the same package twice, like keeping an index of extracted packages in different
+ # runs and looking for the package there
if ( ! $force ) {
- print "INFO: Skipping package $packagename (version '$version' already extracted)\n" if $debug;
+ log_info("Skipping package $packagename (version '$version' already extracted)\n") if $debug;
return 0;
} else {
- print "INFO: Forcing overwritting of package $packagename (version '$version' already extracted)\n" if $debug;
- }
- }
- mkpath "$mandir" || die ("Could not create $mandir: $!");
-
- print "INFO: Extracting manpages of $packagename version '$version' in $mandir\n";
+ log_info("Forcing overwritting of package $packagename (version '$version' already extracted)\n") if $debug;
+# Remove mandir, it gets recreated again after its removal
+ my $result = remove_tree($mandir, {verbose => $debug, keep_root => 0, safe => 0, error => \my $err});
+ if (@$err) {
+ for my $diag (@$err) {
+ my ($file, $message) = %$diag;
+ if ($file eq '') {
+ print "ERROR: Error removing $mandir - general error: $message\n";
+ }
+ else {
+ print "ERROR: Error removing $mandir - problem unlinking $file: $message\n";
+ }
+ }
+ }
+ }
+ }
+
+# Creat the mandir
+ if ( make_path ("$mandir", {verbose => $debug, mode => 0755, error => \my $err}) == 0 ) {
+# No directory created, check why
+ if (@$err) {
+ for my $diag (@$err) {
+ my ($file, $message) = %$diag;
+ if ($file eq '') {
+ print "ERROR: Error creating $mandir - general error: $message\n";
+ }
+ else {
+ print "ERROR: Error creating $mandir - problem creating $file: $message\n";
+ }
+ }
+ }
+ }
+
+ log_info("Extracting manpages of $packagename version '$version' in $mandir\n");
# You can either do a search in the binary files:
if ( $EXTENSION eq "deb" ) {
my $result = extract_manpages($WORKDIR, $file, $mandir) ;
@@ -196,8 +263,10 @@
# if ( -e "$mandir" ) {
# rmdir $mandir || die ("Could not remove $mandir: $!");
# }
- # Its best to keept it to prevent the script (when its rerun) to go through the same
- # packages twice
+ # TODO: If the directory is kept it will prevent the script (when its rerun) to go through the same
+ # packages twice due to the -e $mandir check above, however, this (needlessly) creates
+ # quite a few directories (one per manpage). It might be better to keep an index file
+ # of reviewed packages and check from there instead of by using empty directories
}
}
# Now we are done, cleanup
@@ -255,9 +324,9 @@
# If we have a directory then move all the files in it
# otherwise, we will return with an error
if ( -e "$wdir/usr/" ) {
- system "mv $wdir/* $dstdir" ;
+ system "mv $wdir/usr/ $dstdir" ;
if ( $? != 0 ) {
- printf STDERR "Error moving directory $wdir to $dstdir: $?";
+ printf STDERR "Error moving directory $wdir/usr/ to $dstdir: $?";
$result = 1;
} else {
# IF we got there everything worked fine and we have manpages in the archive
@@ -271,8 +340,8 @@
}
# Clean up temporary files before returning
- unlink $tempfile;
close $tempfileh;
+ unlink $tempfile or warn "Could not unlink $tempfile: $!";
# And return with our result
return $result;
}
Reply to: