Hi, I've noticed recently that linda has its own icheck for doc-base files (which unfortunatelly hasn't work yet because of bug#296859), and I realised that I can now better verify results given by lintian with my patch applied. It's worth notcing that linda authors did the doc-base check in a very different and more reliable way than me, namely they unpack *.deb to a temporary directory and then check existence of files referenced in doc-base. To test my patch I've run both linda and lintian on set of 200 packages containing doc-base files. Of course, while testing I found some minor bugs in the patch, there's the patch changelog: + `*' and `?' glob characters no longer match slashes (previously for example `/directory/*.html' matched `/directory/subdirectory/test.html') + explicity ignore filename patterns contianing character classes wildcards (but I don't think it's used in any doc-base file) + handle the case where file referenced by doc-base file exists, but is a directory + do not split contents of `Files:' field; the doc-base documentation does not say weather the field can contain list of files, so I assume that it can't The corrected patch is attached. It gives the same results as the linda's check (on the package set I've tested.), and I think it is really worth including in lintian. Best Regards, robert
diff -Nur lintian-1.23.8.old/checks/menus lintian-1.23.8/checks/menus --- lintian-1.23.8.old/checks/menus 2004-06-20 14:09:00.000000000 +0200 +++ lintian-1.23.8/checks/menus 2005-02-26 21:52:05.000000000 +0100 @@ -26,6 +26,8 @@ use Util; my $pkg; +my @all_files = (); +my %all_links = (); sub run { @@ -74,6 +76,7 @@ $file =~ s,^(\./),,; my $temp_file = $file; # save this for the link checks to follow $file =~ s/ link to .*//; + &add_file_link_info($file); # must be called before stripping " ->.*" part $file =~ s/ -> .*//; my $operm = perm2oct($perm); @@ -111,6 +114,7 @@ } } } +close IN; # prerm scripts should not call update-menus if ($prerm{'calls-updatemenus'}) { @@ -162,6 +166,8 @@ } # check the contents of the doc-base file(s) +# my $dbdir="/usr/share/doc-base"; + my $dbdir="doc-base"; opendir DOCBASEDIR, "doc-base" or fail("cannot read doc-base directory."); while (my $dbfile = readdir DOCBASEDIR) { next if -x "doc-base/$dbfile"; # don't try to parse executables, plus we already warned about it @@ -169,7 +175,42 @@ fail("cannot open doc-base file $dbfile for reading."); while (<IN>) { if (/usr\/doc/) { - tag "doc-base-file-references-usr-doc", "$dbfile"; + tag "doc-base-file-references-usr-doc", "$dbdir/$dbfile:$."; + } + # Check if files refrenced by doc-base are included in the package. + # The "Index" field should refer to only one file, wildcards are not allowed + # The "Files" field is a whitespace-separated list of files and can contain wildcards + if (/^(Index|Files)\s*:\s*(.*)\s*$/i) { + my $is_index = (lc($1) eq "index"); + my $ref_file = $2; + + my @ref_files = (); + # Some packages have space-separated list of filenames in the Files field. + # Although I think such a list should be allowed, according to + # the (poor) doc-base's documentation the `Files:' field can contain only one + # filename pattern. In a case you want to support the list, just uncomment + # the double-hashed lines below. + ## if ($is_index) { + push @ref_files, $ref_file; + ##} else { + ## @ref_files = split(/\s+/, $ref_file); + ##} + + foreach my $file (@ref_files) { + my $re = quotemeta( &delink( $file ) ); + if (not $is_index) { + next if ($re =~ /\[/); # filename probably contains `[...]' globbing + # which we don't support + # handle other shell wildcards + $re =~ s/\\\*/[^\/]*/g; + $re =~ s/\\\?/[^\/]/g; + $re .= '/?'; # allow filenames to be directories + } + + if (not grep (/^${re}$/, @all_files)) { + tag "doc-base-file-refers-to-unknown-file", "$dbdir/$dbfile:$.", "$file"; + } + } } } close IN; @@ -207,6 +248,92 @@ # ----------------------------------- +# Add file and link to %all_files and %all_links +# Note that both files and links had to include leading `/' +sub add_file_link_info { + my $file = shift; + my $link = undef; + + $file = "/" . $file if (not $file =~ m/^\//); # make file absolute + $file =~ s/\/+/\//g; # remove duplicated `/' + ($file, $link) = split(/ -> /, $file); + + push @all_files, $file; + + if (defined $link) { + if (not $link =~ m,^/,) { # not absolute link + $link = "/" . $link; # make sure link starts with '/' + $link =~ s,/+\./+,/,g; # remove all /./ parts + my $dcount = 1; + while ($link =~ s,^/+\.\./+,/,) { #\ count & remove + $dcount++; #/ any leading /../ parts + } + my $f = $file; + while ($dcount--) { #\ remove last $dcount + $f =~ s,/[^/]*$,,; #/ path components from $file + } + $link = $f . $link; # now we should have absolute link + } + $all_links{$file} = $link unless ($link eq $file); # ignore self-referencing symlinks + ### print STDERR "Link: $file --> $link\n"; + } +} + + +# Dereference all symlinks in file, uses %all_links +sub delink { + my $file = shift; + + $file =~ s/\/+/\//g; # remove duplicated '/' + return $file unless scalar(%all_links); # package doesn't contain any symlink + + ### print STDERR "Delink Input: $file\n"; + + my $p1 = ""; + my $p2 = $file; + my %used_links = (); + + # + # In the loop below we split $file into two parts on each next '/' + # until there's no remaining slashes. + # We try substituting the first part with corresponding symlink + # and if it succeedes, we start the procedure from beginning. + # + # Example: + # Let $all_links{"/a/b"} == "/d", and $file == "/a/b/c" + # Then 0) $p1 == "", $p2 == "/a/b/c" + # 1) $p1 == "/a", $p2 == "/b/c" + # 2) $p1 == "/a/b", $p2 == "/c" ; substitute "/a/b" for "/d" + # 3) $p1 == "", $p2 == "/d/c" + # 4) $p1 == "/d", $p2 == "/c" + # 5) $p1 == "/d/c", $p2 == "" + # + # Note that the algorithm supposes, that + # i) $all_links{$X} != $X for each $X + # ii) both keys and values of %all_links start with '/' + # + + while (($p2 =~ s/^\/[^\/]*//g) > 0) { + $p1 .= $&; + ### print STDERR " (p1,p2): ($p1 , $p2)\n"; + if (defined $all_links{$p1}) { + return '!!! SYMLINK LOOP !!!' if (defined $used_links{$p1}); # symlink loop + $p2 = $all_links{$p1} . $p2; + $p1 = ""; + $used_links{$p1} = 1; + } + } + + + # After the loop $p2 should be empty and $p1 should contain dereferenced file. + # In some rare cases when $file contains no slashes, $p1 will be empty + # and $p2 will contain out result (which will be equal to $file) + ### print STDERR "Delink Output: " . ( $p1 ne "" ? $p1 : $p2 ). "\n\n"; + return $p1 ne "" ? $p1 : $p2; +} + + + # translate permission strings like `-rwxrwxrwx' into an octal number sub perm2oct { my ($t) = @_; diff -Nur lintian-1.23.8.old/checks/menus.desc lintian-1.23.8/checks/menus.desc --- lintian-1.23.8.old/checks/menus.desc 2004-06-20 14:09:00.000000000 +0200 +++ lintian-1.23.8/checks/menus.desc 2005-02-26 10:35:58.000000000 +0100 @@ -149,3 +149,8 @@ Type: warning Info: Files in <tt>/usr/share/doc-base</tt> should only contain links to files in the <tt>/usr/share/doc</tt> directory. + +Tag: doc-base-file-refers-to-unknown-file +Type: warning +Info: File referenced by the <tt>doc-base</tt> file is not included in the package. +
Attachment:
signature.asc
Description: Digital signature