Hi, I've noticed recently that linda has its own icheck for doc-base files (which unfortunatelly hasn't work yet because of bug#296859), and I realised that I can now better verify results given by lintian with my patch applied. It's worth notcing that linda authors did the doc-base check in a very different and more reliable way than me, namely they unpack *.deb to a temporary directory and then check existence of files referenced in doc-base. To test my patch I've run both linda and lintian on set of 200 packages containing doc-base files. Of course, while testing I found some minor bugs in the patch, there's the patch changelog: + `*' and `?' glob characters no longer match slashes (previously for example `/directory/*.html' matched `/directory/subdirectory/test.html') + explicity ignore filename patterns contianing character classes wildcards (but I don't think it's used in any doc-base file) + handle the case where file referenced by doc-base file exists, but is a directory + do not split contents of `Files:' field; the doc-base documentation does not say weather the field can contain list of files, so I assume that it can't The corrected patch is attached. It gives the same results as the linda's check (on the package set I've tested.), and I think it is really worth including in lintian. Best Regards, robert
diff -Nur lintian-1.23.8.old/checks/menus lintian-1.23.8/checks/menus
--- lintian-1.23.8.old/checks/menus 2004-06-20 14:09:00.000000000 +0200
+++ lintian-1.23.8/checks/menus 2005-02-26 21:52:05.000000000 +0100
@@ -26,6 +26,8 @@
use Util;
my $pkg;
+my @all_files = ();
+my %all_links = ();
sub run {
@@ -74,6 +76,7 @@
$file =~ s,^(\./),,;
my $temp_file = $file; # save this for the link checks to follow
$file =~ s/ link to .*//;
+ &add_file_link_info($file); # must be called before stripping " ->.*" part
$file =~ s/ -> .*//;
my $operm = perm2oct($perm);
@@ -111,6 +114,7 @@
}
}
}
+close IN;
# prerm scripts should not call update-menus
if ($prerm{'calls-updatemenus'}) {
@@ -162,6 +166,8 @@
}
# check the contents of the doc-base file(s)
+# my $dbdir="/usr/share/doc-base";
+ my $dbdir="doc-base";
opendir DOCBASEDIR, "doc-base" or fail("cannot read doc-base directory.");
while (my $dbfile = readdir DOCBASEDIR) {
next if -x "doc-base/$dbfile"; # don't try to parse executables, plus we already warned about it
@@ -169,7 +175,42 @@
fail("cannot open doc-base file $dbfile for reading.");
while (<IN>) {
if (/usr\/doc/) {
- tag "doc-base-file-references-usr-doc", "$dbfile";
+ tag "doc-base-file-references-usr-doc", "$dbdir/$dbfile:$.";
+ }
+ # Check if files refrenced by doc-base are included in the package.
+ # The "Index" field should refer to only one file, wildcards are not allowed
+ # The "Files" field is a whitespace-separated list of files and can contain wildcards
+ if (/^(Index|Files)\s*:\s*(.*)\s*$/i) {
+ my $is_index = (lc($1) eq "index");
+ my $ref_file = $2;
+
+ my @ref_files = ();
+ # Some packages have space-separated list of filenames in the Files field.
+ # Although I think such a list should be allowed, according to
+ # the (poor) doc-base's documentation the `Files:' field can contain only one
+ # filename pattern. In a case you want to support the list, just uncomment
+ # the double-hashed lines below.
+ ## if ($is_index) {
+ push @ref_files, $ref_file;
+ ##} else {
+ ## @ref_files = split(/\s+/, $ref_file);
+ ##}
+
+ foreach my $file (@ref_files) {
+ my $re = quotemeta( &delink( $file ) );
+ if (not $is_index) {
+ next if ($re =~ /\[/); # filename probably contains `[...]' globbing
+ # which we don't support
+ # handle other shell wildcards
+ $re =~ s/\\\*/[^\/]*/g;
+ $re =~ s/\\\?/[^\/]/g;
+ $re .= '/?'; # allow filenames to be directories
+ }
+
+ if (not grep (/^${re}$/, @all_files)) {
+ tag "doc-base-file-refers-to-unknown-file", "$dbdir/$dbfile:$.", "$file";
+ }
+ }
}
}
close IN;
@@ -207,6 +248,92 @@
# -----------------------------------
+# Add file and link to %all_files and %all_links
+# Note that both files and links had to include leading `/'
+sub add_file_link_info {
+ my $file = shift;
+ my $link = undef;
+
+ $file = "/" . $file if (not $file =~ m/^\//); # make file absolute
+ $file =~ s/\/+/\//g; # remove duplicated `/'
+ ($file, $link) = split(/ -> /, $file);
+
+ push @all_files, $file;
+
+ if (defined $link) {
+ if (not $link =~ m,^/,) { # not absolute link
+ $link = "/" . $link; # make sure link starts with '/'
+ $link =~ s,/+\./+,/,g; # remove all /./ parts
+ my $dcount = 1;
+ while ($link =~ s,^/+\.\./+,/,) { #\ count & remove
+ $dcount++; #/ any leading /../ parts
+ }
+ my $f = $file;
+ while ($dcount--) { #\ remove last $dcount
+ $f =~ s,/[^/]*$,,; #/ path components from $file
+ }
+ $link = $f . $link; # now we should have absolute link
+ }
+ $all_links{$file} = $link unless ($link eq $file); # ignore self-referencing symlinks
+ ### print STDERR "Link: $file --> $link\n";
+ }
+}
+
+
+# Dereference all symlinks in file, uses %all_links
+sub delink {
+ my $file = shift;
+
+ $file =~ s/\/+/\//g; # remove duplicated '/'
+ return $file unless scalar(%all_links); # package doesn't contain any symlink
+
+ ### print STDERR "Delink Input: $file\n";
+
+ my $p1 = "";
+ my $p2 = $file;
+ my %used_links = ();
+
+ #
+ # In the loop below we split $file into two parts on each next '/'
+ # until there's no remaining slashes.
+ # We try substituting the first part with corresponding symlink
+ # and if it succeedes, we start the procedure from beginning.
+ #
+ # Example:
+ # Let $all_links{"/a/b"} == "/d", and $file == "/a/b/c"
+ # Then 0) $p1 == "", $p2 == "/a/b/c"
+ # 1) $p1 == "/a", $p2 == "/b/c"
+ # 2) $p1 == "/a/b", $p2 == "/c" ; substitute "/a/b" for "/d"
+ # 3) $p1 == "", $p2 == "/d/c"
+ # 4) $p1 == "/d", $p2 == "/c"
+ # 5) $p1 == "/d/c", $p2 == ""
+ #
+ # Note that the algorithm supposes, that
+ # i) $all_links{$X} != $X for each $X
+ # ii) both keys and values of %all_links start with '/'
+ #
+
+ while (($p2 =~ s/^\/[^\/]*//g) > 0) {
+ $p1 .= $&;
+ ### print STDERR " (p1,p2): ($p1 , $p2)\n";
+ if (defined $all_links{$p1}) {
+ return '!!! SYMLINK LOOP !!!' if (defined $used_links{$p1}); # symlink loop
+ $p2 = $all_links{$p1} . $p2;
+ $p1 = "";
+ $used_links{$p1} = 1;
+ }
+ }
+
+
+ # After the loop $p2 should be empty and $p1 should contain dereferenced file.
+ # In some rare cases when $file contains no slashes, $p1 will be empty
+ # and $p2 will contain out result (which will be equal to $file)
+ ### print STDERR "Delink Output: " . ( $p1 ne "" ? $p1 : $p2 ). "\n\n";
+ return $p1 ne "" ? $p1 : $p2;
+}
+
+
+
# translate permission strings like `-rwxrwxrwx' into an octal number
sub perm2oct {
my ($t) = @_;
diff -Nur lintian-1.23.8.old/checks/menus.desc lintian-1.23.8/checks/menus.desc
--- lintian-1.23.8.old/checks/menus.desc 2004-06-20 14:09:00.000000000 +0200
+++ lintian-1.23.8/checks/menus.desc 2005-02-26 10:35:58.000000000 +0100
@@ -149,3 +149,8 @@
Type: warning
Info: Files in <tt>/usr/share/doc-base</tt> should only contain links to
files in the <tt>/usr/share/doc</tt> directory.
+
+Tag: doc-base-file-refers-to-unknown-file
+Type: warning
+Info: File referenced by the <tt>doc-base</tt> file is not included in the package.
+
Attachment:
signature.asc
Description: Digital signature