Bug#950589: lintian: collection/src-orig-index mishandles tarballs with whitespace in owner field
Niko Tyni wrote:
> I see #895175 was a similar issue and resulted in a rather complicated
> regexp in Lintian::Collect::Package for parsing tar output (commit
> a75f3edcb099bd4b8794e2ecb7fd19e129e77f03). I expect something like that
> should work here as well. Sorry about the lack of a patch.
No worries. This is actually a bit more complicated that I thought.
Unfinished patch follows (note the FIXME):
--- a/collection/src-orig-index
+++ b/collection/src-orig-index
@@ -38,7 +38,7 @@ use lib "$ENV{'LINTIAN_ROOT'}/lib";
use Lintian::Collect::Dispatcher qw(create_info);
use Lintian::Processable::Source;
-use Lintian::Util qw(internal_error sort_file_index gzip);
+use Lintian::Util qw(internal_error sort_file_index gzip $TARTVF_REGEX);
use constant EMPTY => q{};
use constant SPACE => q{ };
@@ -183,7 +183,7 @@ sub index_orig {
# prefix.
my $prefix;
for my $line (@index) {
- my ($filename) = ($line =~ /^(?:\S+\s+){5}(.*)/);
+ my $filename = ($line =~ $TARTVF_REGEX)[5];
$filename =~ s,^\./+,,o;
my ($dirname) = ($filename =~ m,^([^/]+),);
if ( defined $dirname
@@ -212,11 +212,14 @@ sub index_orig {
# then strip the prefix and add $compname (if any)
if ($prefix) {
@index = map {
- if (m,^((?:\S+\s+){5})(?:\./)?\Q$prefix\E(?:/+(.*+)?|\Z),){
- my ($data, $file) = ($1, $2);
+ my @line = $_ =~ $TARTVF_REGEX;
+ if (@line) {
+ my $file = $line[5];
+ $file =~ s/^(?:\.\/)?\Q$prefix\E\/+//;
if ($file && $file !~ m,^(?:/++)?\Z,o){
$file = "$compname/$file" if $compname;
- "$data$file\n";
+ $line[5] = $file;
+ join(' ', @line) . "\n";
} else {
();
}
@@ -228,6 +231,7 @@ sub index_orig {
# Prefix with the compname (because that is where they will be
# unpacked to.
@index = map {
+ # FIXME: Use $TARTVF_REGEX
s{^((?:\S++\s++){5})(?:\./)?\Q$prefix\E(?:/+)?}
{$1$compname/}r
} @index;
diff --git a/lib/Lintian/Info/Package.pm b/lib/Lintian/Info/Package.pm
index 91011d8c7..ebf717875 100644
--- a/lib/Lintian/Info/Package.pm
+++ b/lib/Lintian/Info/Package.pm
@@ -30,7 +30,7 @@ use Scalar::Util qw(blessed);
use Lintian::Path;
use Lintian::Path::FSInfo;
use Lintian::Util
- qw(internal_error open_gz perm2oct normalize_pkg_path dequote_name);
+ qw(internal_error open_gz perm2oct normalize_pkg_path dequote_name $TARTVF_REGEX);
use Moo::Role;
use namespace::clean;
@@ -286,14 +286,7 @@ sub _fetch_index_data {
my (%file, $perm, $operm, $ownership, $name, $raw_type, $size);
my ($date, $time);
- # Parse line from output of "tar -tvf" allowing for spaces within the
- # ownership field whilst still allowing spaces in filenames. (#895175)
- #
- # Note this cannot ever be 100% reliable as the filename might contain
- # "fake" dates.
- ($perm,$ownership,$size,$date,$time,$name)
- = $line
- =~ /^(.{10}) (.*?) (\d+) ([-\d]{10}) (?:([:\d]{5,8}(?:.\d+)?)[ ]+)?(.*)$/;
+ ($perm,$ownership,$size,$date,$time,$name) = $line =~ $TARTVF_REGEX;
croak "cannot parse tar output from $index: \"$line\""
unless defined $perm;
$ownership =~ s/\s+$//;
diff --git a/lib/Lintian/Util.pm b/lib/Lintian/Util.pm
index 2487f5af7..84cab8135 100644
--- a/lib/Lintian/Util.pm
+++ b/lib/Lintian/Util.pm
@@ -85,6 +85,7 @@ BEGIN {
$PKGNAME_REGEX
$PKGREPACK_REGEX
$PKGVERSION_REGEX
+ $TARTVF_REGEX
));
}
@@ -165,6 +166,19 @@ our $PKGVERSION_REGEX = qr/
(?: - [0-9A-Za-z.+:~]+ )* # Optional debian revision (+ upstreams versions with hyphens)
/xoa;
§
Regards,
--
,''`.
: :' : Chris Lamb
`. `'` lamby@debian.org 🍥 chris-lamb.co.uk
`-
Reply to: