[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#950589: lintian: collection/src-orig-index mishandles tarballs with whitespace in owner field



Niko Tyni wrote:

> I see #895175 was a similar issue and resulted in a rather complicated
> regexp in Lintian::Collect::Package for parsing tar output (commit
> a75f3edcb099bd4b8794e2ecb7fd19e129e77f03). I expect something like that
> should work here as well. Sorry about the lack of a patch.

No worries. This is actually a bit more complicated that I thought.
Unfinished patch follows (note the FIXME):

--- a/collection/src-orig-index
+++ b/collection/src-orig-index
@@ -38,7 +38,7 @@ use lib "$ENV{'LINTIAN_ROOT'}/lib";
 
 use Lintian::Collect::Dispatcher qw(create_info);
 use Lintian::Processable::Source;
-use Lintian::Util qw(internal_error sort_file_index gzip);
+use Lintian::Util qw(internal_error sort_file_index gzip $TARTVF_REGEX);
 
 use constant EMPTY => q{};
 use constant SPACE => q{ };
@@ -183,7 +183,7 @@ sub index_orig {
         # prefix.
         my $prefix;
         for my $line (@index) {
-            my ($filename) = ($line =~ /^(?:\S+\s+){5}(.*)/);
+            my $filename = ($line =~ $TARTVF_REGEX)[5];
             $filename =~ s,^\./+,,o;
             my ($dirname) = ($filename =~ m,^([^/]+),);
             if (    defined $dirname
@@ -212,11 +212,14 @@ sub index_orig {
             # then strip the prefix and add $compname (if any)
             if ($prefix) {
                 @index = map {
-                    if (m,^((?:\S+\s+){5})(?:\./)?\Q$prefix\E(?:/+(.*+)?|\Z),){
-                        my ($data, $file) = ($1, $2);
+                    my @line = $_ =~ $TARTVF_REGEX;
+                    if (@line) {
+                        my $file = $line[5];
+                        $file =~ s/^(?:\.\/)?\Q$prefix\E\/+//;
                         if ($file && $file !~ m,^(?:/++)?\Z,o){
                             $file = "$compname/$file" if $compname;
-                            "$data$file\n";
+                            $line[5] = $file;
+                            join(' ', @line) . "\n";
                         } else {
                             ();
                         }
@@ -228,6 +231,7 @@ sub index_orig {
                 # Prefix with the compname (because that is where they will be
                 # unpacked to.
                 @index = map {
+                    # FIXME: Use $TARTVF_REGEX
                     s{^((?:\S++\s++){5})(?:\./)?\Q$prefix\E(?:/+)?}
                     {$1$compname/}r
                 } @index;
diff --git a/lib/Lintian/Info/Package.pm b/lib/Lintian/Info/Package.pm
index 91011d8c7..ebf717875 100644
--- a/lib/Lintian/Info/Package.pm
+++ b/lib/Lintian/Info/Package.pm
@@ -30,7 +30,7 @@ use Scalar::Util qw(blessed);
 use Lintian::Path;
 use Lintian::Path::FSInfo;
 use Lintian::Util
-  qw(internal_error open_gz perm2oct normalize_pkg_path dequote_name);
+  qw(internal_error open_gz perm2oct normalize_pkg_path dequote_name $TARTVF_REGEX);
 
 use Moo::Role;
 use namespace::clean;
@@ -286,14 +286,7 @@ sub _fetch_index_data {
         my (%file, $perm, $operm, $ownership, $name, $raw_type, $size);
         my ($date, $time);
 
-        # Parse line from output of "tar -tvf" allowing for spaces within the
-        # ownership field whilst still allowing spaces in filenames. (#895175)
-        #
-        # Note this cannot ever be 100% reliable as the filename might contain
-        # "fake" dates.
-        ($perm,$ownership,$size,$date,$time,$name)
-          = $line
-          =~ /^(.{10}) (.*?) (\d+) ([-\d]{10}) (?:([:\d]{5,8}(?:.\d+)?)[ ]+)?(.*)$/;
+        ($perm,$ownership,$size,$date,$time,$name) = $line =~ $TARTVF_REGEX;
         croak "cannot parse tar output from $index: \"$line\""
           unless defined $perm;
         $ownership =~ s/\s+$//;
diff --git a/lib/Lintian/Util.pm b/lib/Lintian/Util.pm
index 2487f5af7..84cab8135 100644
--- a/lib/Lintian/Util.pm
+++ b/lib/Lintian/Util.pm
@@ -85,6 +85,7 @@ BEGIN {
           $PKGNAME_REGEX
           $PKGREPACK_REGEX
           $PKGVERSION_REGEX
+          $TARTVF_REGEX
     ));
 }
 
@@ -165,6 +166,19 @@ our $PKGVERSION_REGEX = qr/
                  (?: - [0-9A-Za-z.+:~]+ )*   # Optional debian revision (+ upstreams versions with hyphens)
                           /xoa;
 

§


Regards,

-- 
      ,''`.
     : :'  :     Chris Lamb
     `. `'`      lamby@debian.org 🍥 chris-lamb.co.uk
       `-


Reply to: