[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

[Patch] Replaced-unpack-srcpkg-l1-with-an-extended-coll-index



Hi

Attached is a patch I believe just that, but as you may have guessed I
would like a bit of peer review before I push it.  As a side-effect of
this patch, I have made the indexing support multiple tarballs.
  My greatest concern are the changes to collection/index; the rest of
them I feel are more or less straight forward.

I have not been able to fix a multi tarball source package into our test
suite as I believe it still lacks this support as well.  As test case I
used [1] and checked the index + file-info[2] file in the lab.

Thanks for considering,
~Niels

[1]
http://people.debian.org/~nthykier/lintian-tests/libcgi-application-basic-plugin-bundle-perl_0.5-1.dsc

[2] If the index file contains errors, the file-info will have a lot of
"file-not-found" errors.

>From 2f2b0015ee5671fa45eb9d5e6b3c8fb3490f5b1a Mon Sep 17 00:00:00 2001
From: Niels Thykier <niels@thykier.net>
Date: Tue, 28 Jun 2011 11:55:52 +0200
Subject: [PATCH] Replaced unpack-srcpkg-l1 with an extended coll/index

Moved the indexing code from unpack-srcpkg-l1 to coll/index with
a few changes to handle multiple orig-tarballs.

Note: Symlink of source pkg parts has been moved to Lab::Package for
when it creates the entry.  Creating a separate coll for that seemed
like overkill.
---
 collection/index        |  214 ++++++++++++++++++++++++++++++++++++++++-------
 collection/index.desc   |    7 +-
 lib/Lab/Package.pm      |   85 ++++++-------------
 unpack/unpack-srcpkg-l1 |  163 -----------------------------------
 4 files changed, 212 insertions(+), 257 deletions(-)
 delete mode 100755 unpack/unpack-srcpkg-l1

diff --git a/collection/index b/collection/index
index eedc12a..00f8263 100755
--- a/collection/index
+++ b/collection/index
@@ -29,6 +29,8 @@ use vars qw($verbose);
 
 # import perl libraries
 use lib "$ENV{'LINTIAN_ROOT'}/lib";
+use Cwd();
+use File::Spec;
 use Util;
 use Lintian::Command qw(spawn reap);
 
@@ -36,37 +38,189 @@ use Lintian::Command qw(spawn reap);
 my $pkg = shift;
 my $type = shift;
 
-my (@jobs, $job);
+unlink 'index' or fail "Could not unlink index: $!" if -e 'index' && -s 'index';
+unlink 'index-errors' or fail "Could not unlink index-errors: $!" if -e 'index-errors' && -s 'index-errors';
 
-foreach my $file qw(index index-errors index-owner-id) {
-    unlink $file or fail "$file: $!" if -f $file;
+if ($type ne 'source') {
+    index_deb();
+} else {
+    index_src();
 }
 
-$job = { fail => 'error',
-         out  => 'index',
-         err  => 'index-errors' };
-push @jobs, $job;
-# (replaces dpkg-deb -c)
-# create index file for package
-spawn($job,
-      ['dpkg-deb', '--fsys-tarfile', 'deb' ],
-      '|', ['tar', 'tfv', '-'],
-      '|', ['sed', '-e', 's/^h/-/'],
-      '|', ['sort', '-k', '6'], '&');
-
-$job = { fail => 'error',
-         out  => 'index-owner-id',
-         err  => '/dev/null' };
-push @jobs, $job;
-# (replaces dpkg-deb -c)
-# create index file for package with owner IDs instead of names
-spawn($job,
-      ['dpkg-deb', '--fsys-tarfile', 'deb' ],
-      '|', ['tar', '--numeric-owner', '-tvf', '-'],
-      '|', ['sed', '-e', 's/^h/-/'],
-      '|', ['sort', '-k', '6'], '&');
-
-reap(@jobs);
-undef @jobs;
-
 exit 0;
+
+# returns all (orig) tarballs.
+sub gather_tarballs {
+    my $file = Cwd::realpath('dsc');
+    my $dir;
+    my $data;
+    my $version;
+    my @tarballs;
+    my $base;
+    my $baserev;
+    fail "Cannot resolve \"dsc\" link for $pkg or it does not point to a file.\n" unless $file and -e $file;
+    (undef, $dir, undef) = File::Spec->splitpath($file);
+    $data = get_dsc_info($file) or fail "Could not parse dsc file for $pkg.\n";
+    #  Version handling is based on Dpkg::Version::parseversion.
+    $version = $data->{'version'};
+    if ($version =~ /:/) {
+        $version =~ s/^(?:\d+):(.+)/$1/ or fail("bad version number '$version'");
+    }
+    $baserev = $data->{'source'} . '_' . $version;
+    $version =~ s/(.+)-(.*)$/$1/;
+    $base = $data->{'source'} . '_' . $version;
+    for my $fs (split(/\n/,$data->{'files'})) {
+        $fs =~ s/^\s*//;
+        next if $fs eq '';
+        my @t = split(/\s+/o,$fs);
+        next if ($t[2] =~ m,/,);
+        # Look for $pkg_$version.orig(-$comp)?.tar.$ext (non-native)
+        #       or $pkg_$version.tar.$ext (native)
+        #  - This deliberately does not look for the debian packaging
+        #    even when this would be a tarball.
+        if ($t[2] =~ /^(?:\Q$base\E\.orig(?:-(.*))?|\Q$baserev\E)\.tar\.(?:gz|bz2|lzma|xz)$/) {
+            push @tarballs, [$t[2], $1//''];
+        }
+    }
+    fail('could not find the source tarball') unless @tarballs;
+    return @tarballs;
+}
+
+# Creates an index for the source package
+sub index_src {
+    my @tarballs = gather_tarballs();
+    my @result;
+    foreach my $tardata (@tarballs) {
+	my ($tarball, $compname) = @$tardata;
+	my @index;
+        # Collect a list of the files in the source package.  tar currently doesn't
+        # automatically recognize LZMA / XZ, so we need to add the option where it's
+        # needed.  Change hard link status (h) to regular files and remove a leading
+        # ./ prefix on filenames while we're reading the tar output.  We intentionally
+        # don't parallelize this job because we need to use the output below.
+        my @tar_options = ('-tvf');
+        my $last = '';
+        my $collect;
+        if ($tarball =~ /\.(lzma|xz)\z/) {
+            unshift(@tar_options, "--$1");
+        }
+        $collect = sub {
+            my @lines = map { split "\n" } @_;
+            if ($last ne '') {
+                $lines[0] = $last . $lines[0];
+            }
+            if ($_[-1] !~ /\n\z/) {
+                $last = pop @lines;
+            } else {
+                $last = '';
+            }
+            for my $line (@lines) {
+                $line =~ s/^h/-/;
+                if ($line and $line !~ m,^(?:\S+\s+){5}\./$,) {
+                    push(@index, $line . "\n");
+                }
+            }
+        }; # End $collect = sub;
+        spawn({ fail => 'never', out => $collect, err_append => 'index-errors' },
+              ['tar', @tar_options, $tarball]);
+        if ($last) {
+            fail("tar output (for $tarball from $pkg) does not end in a newline");
+        }
+	# We now need to see if all files in the tarball have a common prefix.  If so,
+	# we're going to strip that prefix off each file name.  We also remove lines
+	# that consist solely of the prefix.
+	my $prefix;
+	for my $line (@index) {
+	    my ($filename) = ($line =~ /^(?:\S+\s+){5}(.*)/);
+	    $filename =~ s,^\./+,,o;
+	    my ($dirname) = ($filename =~ m,^([^/]+),);
+	    if (defined($dirname) and $dirname eq $filename and not $line =~ m/^d/o) {
+		$prefix = '';
+	    } elsif (defined $dirname) {
+		if (not defined $prefix) {
+		    $prefix = $dirname;
+		} elsif ($dirname ne $prefix) {
+		    $prefix = '';
+		}
+	    } else {
+		$prefix = '';
+	    }
+	}
+	# If there is a common prefix and it is $compname, then we use that
+	# becaues that is where they will be extracted by unpacked.
+	if ($prefix ne $compname) {
+	    # If there is a common prefix and it is not $compname
+	    # then strip the prefix and add $compname (if any)
+	    if ($prefix) {
+		@index = map {
+		    if (m,^((?:\S+\s+){5})(?:\./)?\Q$prefix\E(?:/+(.*+)?|\Z),){
+			my ($data, $file) = ($1, $2);
+			if ($file && $file !~ m,^(?:/++)?\Z,o){
+			    $file = "$compname/$file" if $compname;
+			    "$data$file\n";
+			} else {
+			    ();
+			}
+		    } else {
+			();
+		    }
+		} @index;
+		my $filename = 'source-prefix';
+		$filename .= "-$compname" if $compname;
+		open(PREFIX, '>', $filename)
+		    or fail("cannot create $filename for $pkg: $!");
+                print PREFIX "$prefix\n";
+		close PREFIX;
+	    } elsif ($compname) {
+		# Prefix with the compname (because that is where they will be
+		# unpacked to.
+		@index = map { s,^((?:\S++\s++){5})(?:\./)?\Q$prefix\E(?:/+)?,$1$compname/, } @index;
+	    }
+	}
+	push @result, @index;
+    }
+    # Now that we have the file names we want, write them out sorted to the index
+    # file.
+    spawn({ fail => 'error', out_append => "index" },
+	  sub { print @result }, '|', ['sort', '-k', '6']);
+    return 1;
+}
+
+# Creates an index for binary packages
+sub index_deb {
+    my (@jobs, $job);
+
+    foreach my $file qw(index index-errors index-owner-id) {
+        unlink $file or fail "$file: $!" if -f $file;
+    }
+
+    $job = { fail => 'error',
+             out  => 'index',
+             err  => 'index-errors' };
+    push @jobs, $job;
+    # (replaces dpkg-deb -c)
+    # create index file for package
+    spawn($job,
+          ['dpkg-deb', '--fsys-tarfile', 'deb' ],
+          '|', ['tar', 'tfv', '-'],
+          '|', ['sed', '-e', 's/^h/-/'],
+          '|', ['sort', '-k', '6'], '&');
+
+    $job = { fail => 'error',
+             out  => 'index-owner-id',
+             err  => '/dev/null' };
+    push @jobs, $job;
+    # (replaces dpkg-deb -c)
+    # create index file for package with owner IDs instead of names
+    spawn($job,
+          ['dpkg-deb', '--fsys-tarfile', 'deb' ],
+          '|', ['tar', '--numeric-owner', '-tvf', '-'],
+          '|', ['sed', '-e', 's/^h/-/'],
+          '|', ['sort', '-k', '6'], '&');
+
+    reap(@jobs);
+    undef @jobs;
+
+    return 1;
+}
+
diff --git a/collection/index.desc b/collection/index.desc
index cb68914..7baecb3 100644
--- a/collection/index.desc
+++ b/collection/index.desc
@@ -1,6 +1,5 @@
 Collector-Script: index
-Info: This script create an index file of the contents in the
- binary package.
-Type: binary, udeb
-Version: 1
+Info: This script create an index file of the contents of a package.
+Type: source, binary, udeb
+Version: 2
 
diff --git a/lib/Lab/Package.pm b/lib/Lab/Package.pm
index e957a32..b7c881a 100644
--- a/lib/Lab/Package.pm
+++ b/lib/Lab/Package.pm
@@ -46,6 +46,9 @@ Hallo world
 use base qw(Class::Accessor);
 
 use strict;
+use warnings;
+
+use File::Spec;
 
 use Util;
 use Lintian::Output qw(:messages); # debug_msg and warning
@@ -154,14 +157,13 @@ sub entry_exists(){
     my $pkg_type = $self->{pkg_type};
     my $base_dir = $self->{base_dir};
 
-    # If we have a positive unpack level, something exists
-    return 1 if ($self->{_unpack_level} > 0);
-
     # Check if the relevant symlink exists.
     if ($pkg_type eq 'changes'){
 	return 1 if -l "$base_dir/changes";
     } elsif ($pkg_type eq 'binary' or $pkg_type eq 'udeb') {
 	return 1 if -l "$base_dir/deb";
+    } elsif ($pkg_type eq 'source'){
+	return 1 if -l "$base_dir/dsc";
     }
 
     # No unpack level and no symlink => the entry does not
@@ -188,8 +190,6 @@ sub create_entry(){
     my $madedir = 0;
     # It already exists.
     return 1 if ($self->entry_exists());
-    # We still use the "legacy" unpack for some things.
-    return $self->_unpack() unless ($pkg_type ne 'source');
 
     unless (-d $base_dir) {
 	mkdir($base_dir, 0777) or return 0;
@@ -199,6 +199,8 @@ sub create_entry(){
 	$link = "$base_dir/changes";
     } elsif ($pkg_type eq 'binary' or $pkg_type eq 'udeb') {
 	$link = "$base_dir/deb";
+    } elsif ($pkg_type eq 'source'){
+	$link = "$base_dir/dsc";
     } else {
 	fail "create_entry cannot handle $pkg_type";
     }
@@ -207,56 +209,25 @@ sub create_entry(){
 	rmdir($base_dir) if($madedir);
 	return 0;
     }
-    # Set the legacy "_unpack_level"
-    $self->{_unpack_level} = 1;
-    return 1;
-}
-
-
-=pod
-
-=item $lpkg->_unpack()
-
-DEPRECATED
-
-Runs the unpack script for the type of package.  This is
-deprecated but remains until all the unpack scripts have
-been replaced by coll scripts.
-
-=cut
-
-sub _unpack {
-    my ($self) = @_;
-    my $level = $self->{_unpack_level};
-    my $base_dir = $self->{base_dir};
-    my $pkg_type = $self->{pkg_type};
-    my $pkg_path = $self->{pkg_path};
-
-    debug_msg(1, sprintf("Current unpack level is %d",$level));
-
-    # Have we already run the unpack script?
-    return 1 if $level;
-
-    $self->remove_status_file();
-
-    if ( -d $base_dir ) {
-        # We were lied to, there's something already there - clean it up first
-        $self->delete_lab_entry() or return 0;
-    }
-
-    # create new directory
-    debug_msg(1, "Unpacking package ...");
-    if ($pkg_type eq 'source') {
-	Lintian::Command::Simple::run("$ENV{LINTIAN_ROOT}/unpack/unpack-srcpkg-l1", $base_dir, $pkg_path) == 0
-	    or return 0;
-    } else {
-	fail("_unpack does not know how to handle $pkg_type");
+    if ($pkg_type eq 'source'){
+	# If it is a source package, pull in all the related files
+	#  - else unpacked will fail or we would need a separate
+	#    collection for the symlinking.
+	my $data = get_dsc_info($pkg_path);
+	my (undef, $dir, undef) = File::Spec->splitpath($pkg_path);
+	for my $fs (split(m/\n/o,$data->{'files'})) {
+	    $fs =~ s/^\s*//o;
+	    next if $fs eq '';
+	    my @t = split(/\s+/o,$fs);
+	    next if ($t[2] =~ m,/,o);
+	    symlink("$dir/$t[2]", "$base_dir/$t[2]")
+		or fail("cannot symlink file $t[2]: $!");
+	}
     }
-
-    $self->{_unpack_level} = 1;
     return 1;
 }
 
+
 sub update_status_file{
     my ($self, $lint_version) = @_;
     my @stat;
@@ -264,7 +235,7 @@ sub update_status_file{
     my $fd;
     my $stf = "$self->{base_dir}/.lintian-status";
     # We are not unpacked => no place to put the status file.
-    return 0 if($self->{_unpack_level} < 1);
+    return 0 if $self->entry_exists();
     $pkg_path = $self->{pkg_path};
     unless( @stat = stat($pkg_path)){
 	warning("cannot stat file $pkg_path: $!",
@@ -308,10 +279,10 @@ sub remove_status_file{
 
 ## INTERNAL METHODS ##
 
-# Determines / Guesses the current unpack level - used by the constructor.
+# Checks if the existing (if any) entry is compatible,
+# if not, it will be removed.
 sub _check {
     my ($self) = @_;
-    my $act_unpack_level = 0;
     my $basedir = $self->{base_dir};
     if( -d $basedir ) {
 	my $remove_basedir = 0;
@@ -319,10 +290,6 @@ sub _check {
 	my $data;
 	my $pkg_version = $self->{pkg_version};
 
-	# there's a base dir, so we assume that at least
-	# one level of unpacking has been done
-	$act_unpack_level = 1;
-
 	# lintian status file exists?
 	unless (-f "$basedir/.lintian-status") {
 	    v_msg("No lintian status file found (removing old directory in lab)");
@@ -372,10 +339,8 @@ sub _check {
 	    my $lab = $self->{lab};
 	    v_msg("Removing $pkg_name");
 	    $self->delete_lab_entry() or die("Could not remove $pkg_name from lab.");
-	    $act_unpack_level = 0;
 	}
     }
-    $self->{_unpack_level} = $act_unpack_level;
     return 1;
 }
 
diff --git a/unpack/unpack-srcpkg-l1 b/unpack/unpack-srcpkg-l1
deleted file mode 100755
index 27ac7c8..0000000
--- a/unpack/unpack-srcpkg-l1
+++ /dev/null
@@ -1,163 +0,0 @@
-#!/usr/bin/perl
-# unpack-srcpkg-l1 -- lintian unpack script (source packages level 1)
-#
-# syntax: unpack-srcpkg-l1 <base-dir> <dsc-file>
-#
-# Note, that <dsc-file> must be specified with absolute path.
-
-# Copyright (C) 1998 Christian Schwarz
-# Copyright (C) 2009 Raphael Geissert
-# Copyright (C) 2009 Russ Allbery
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, you can find it on the World Wide
-# Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free
-# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
-# MA 02110-1301, USA.
-
-use strict;
-use warnings;
-use vars qw($verbose);
-
-($#ARGV == 1) or die 'syntax: unpack-srcpkg-l1 <base-dir> <dsc-file>';
-my $base_dir = shift;
-my $file = shift;
-
-# import perl libraries
-use lib "$ENV{'LINTIAN_ROOT'}/lib";
-use Util;
-
-use File::Spec;
-use Lintian::Command qw(spawn reap);
-
-# stat $file
-(my @stat = stat $file) or fail("$file: cannot stat: $!");
-
-# get package control information
-my $data = get_dsc_info($file);
-
-# create directory in lab
-print "N: Creating directory $base_dir ...\n" if $verbose;
-mkdir($base_dir, 0777) or fail("mkdir $base_dir: $!");
-
-# Install symbolic links to source package files.  Version handling is based
-# on Dpkg::Version::parseversion.
-my (undef, $dir, $name) = File::Spec->splitpath($file);
-my $version = $data->{'version'};
-if ($version =~ /:/) {
-    $version =~ s/^(?:\d+):(.+)/$1/ or fail("bad version number '$version'");
-}
-my $baserev = $data->{'source'} . '_' . $version;
-$version =~ s/(.+)-(.*)$/$1/;
-my $base = $data->{'source'} . '_' . $version;
-symlink($file,"$base_dir/dsc") or fail("cannot symlink dsc file: $!");
-my $tarball;
-for my $fs (split(/\n/,$data->{'files'})) {
-    $fs =~ s/^\s*//;
-    next if $fs eq '';
-    my @t = split(/\s+/o,$fs);
-    next if ($t[2] =~ m,/,);
-    if ($t[2] =~ /^(\Q$base\E\.orig|\Q$baserev\E)\.tar\.(gz|bz2|lzma|xz)$/) {
-        $tarball = $t[2];
-    }
-    symlink("$dir/$t[2]", "$base_dir/$t[2]")
-        or fail("cannot symlink file $t[2]: $!");
-}
-if (!$tarball) {
-    fail('could not find the source tarball');
-}
-
-# Collect a list of the files in the source package.  tar currently doesn't
-# automatically recognize LZMA / XZ, so we need to add the option where it's
-# needed.  Change hard link status (h) to regular files and remove a leading
-# ./ prefix on filenames while we're reading the tar output.  We intentionally
-# don't parallelize this job because we need to use the output below.
-my @tar_options = ('-tvf');
-if ($tarball =~ /\.(lzma|xz)\z/) {
-    unshift(@tar_options, "--$1");
-}
-my @index;
-my $last = '';
-my $collect = sub {
-    my @lines = map { split "\n" } @_;
-    if ($last ne '') {
-        $lines[0] = $last . $lines[0];
-    }
-    if ($_[-1] !~ /\n\z/) {
-        $last = pop @lines;
-    } else {
-        $last = '';
-    }
-    for my $line (@lines) {
-        $line =~ s/^h/-/;
-        if ($line and $line !~ m,^(?:\S+\s+){5}\./$,) {
-            push(@index, $line . "\n");
-        }
-    }
-};
-spawn({ fail => 'never', out => $collect, err => "$base_dir/index-errors" },
-      ['tar', @tar_options, "$base_dir/$tarball"]);
-if ($last) {
-    fail('tar output does not end in a newline');
-}
-
-# We now need to see if all files in the tarball have a common prefix.  If so,
-# we're going to strip that prefix off each file name.  We also remove lines
-# that consist solely of the prefix.
-my $prefix;
-for my $line (@index) {
-    my ($filename) = ($line =~ /^(?:\S+\s+){5}(.*)/);
-    $filename =~ s,^\./+,,;
-    my ($dirname) = ($filename =~ m,^([^/]+),);
-    if (defined($dirname) and $dirname eq $filename and not $line =~ /^d/) {
-        $prefix = '';
-    } elsif (defined $dirname) {
-        if (not defined $prefix) {
-            $prefix = $dirname;
-        } elsif ($dirname ne $prefix) {
-            $prefix = '';
-        }
-    } else {
-        $prefix = '';
-    }
-}
-if ($prefix) {
-    @index = map {
-        s,^((?:\S+\s+){5})(?:\./+)?\Q$prefix\E(?:/+|\Z),$1,;
-        if (/^(?:\S+\s+){5}\S+/) {
-            $_;
-        } else {
-            ();
-        }
-    } @index;
-    open(PREFIX, '>', "$base_dir/source-prefix")
-        or fail("cannot create $base_dir/source-prefix: $!");
-    print PREFIX "$prefix\n";
-    close PREFIX;
-}
-
-# Now that we have the file names we want, write them out sorted to the index
-# file.
-my $job = { fail => 'error', out => "$base_dir/index" };
-spawn($job, sub { print @index }, '|', ['sort', '-k', '6'], '&');
-
-# Wait for all jobs to finish.
-reap($job);
-
-exit 0;
-
-# Local Variables:
-# indent-tabs-mode: nil
-# cperl-indent-level: 4
-# End:
-# vim: syntax=perl sw=4 sts=4 ts=4 et shiftround
-- 
1.7.5.4


Reply to: