[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#196122: Patch for Bug#196122: lintian: please verify doc-base files



tags 196122 patch
thanks

Hi, 

I think lintian should validate Index: and Files: fields of doc-base
files too.
We have so many packages with those fields pointing to nonexistant files[1],
that I decided to write a small check for lintian (and it was very small
until I run it on comerr-dev package and realised that there's 
a need of handling symlinks;) )


The patch is attached. I've tested it, and haven't noticed any problems
or false positives.

Example output:
   lintian -C menus glade-doc-2_2.6.4-2_all.deb
   E: glade-doc-2: doc-base-file-refers-to-unknown-file doc-base/glade-2-turbo-start:8 /usr/share/doc/glade-common-2/help/glade-2/C/glade-turbo-start.xml
   
   lintian -C menus grub-doc_0.95+cvs20040624-10_all.deb
   E: grub-doc: doc-base-file-refers-to-unknown-file doc-base/grub:14 /usr/share/info/grub.info-*.gz
   
   lintian -C menus subversion_1.0.9-2_i386.deb
   E: subversion: doc-base-file-refers-to-unknown-file doc-base/subversion-book:8 /usr/share/doc/subversion/book/book.html
   E: subversion: doc-base-file-refers-to-unknown-file doc-base/subversion-book:9 /usr/share/doc/subversion/book/book.html


Best Regards,

robert

[1] Please see http://localhost/dwww/menu/errors.html if you have dwww
installed.
diff -Nur /home/tmp/lintian_1.23.3_all.deb/usr/share/lintian/checks/menus checks/menus
--- /home/tmp/lintian_1.23.3_all.deb/usr/share/lintian/checks/menus	2004-06-18 22:15:25.000000000 +0200
+++ checks/menus	2004-10-26 00:49:45.000000000 +0200
@@ -26,6 +26,8 @@
 use Util;
 
 my $pkg;
+my @all_files = ();
+my %all_links = ();
 
 sub run {
 
@@ -74,6 +76,7 @@
     $file =~ s,^(\./),,;
     my $temp_file = $file; # save this for the link checks to follow
     $file =~ s/ link to .*//;
+    &add_file_link_info($file); # must be called before stripping " ->.*" part 
     $file =~ s/ -> .*//;
 
     my $operm = perm2oct($perm);
@@ -111,6 +114,7 @@
 	}
     }
 }
+close IN;
 
 # prerm scripts should not call update-menus
 if ($prerm{'calls-updatemenus'}) {
@@ -162,6 +166,8 @@
     }
 
     # check the contents of the doc-base file(s)
+#    my $dbdir="/usr/share/doc-base";
+    my $dbdir="doc-base";
     opendir DOCBASEDIR, "doc-base" or fail("cannot read doc-base directory.");
     while (my $dbfile = readdir DOCBASEDIR) {
         next if -x "doc-base/$dbfile"; # don't try to parse executables, plus we already warned about it
@@ -169,7 +175,34 @@
             fail("cannot open doc-base file $dbfile for reading.");
         while (<IN>) {
             if (/usr\/doc/) {
-                tag "doc-base-file-references-usr-doc", "$dbfile";
+                tag "doc-base-file-references-usr-doc", "$dbdir/$dbfile:$.";
+            }
+            # Check if files refrenced by doc-base are included in the package.
+            # The "Index" field should refer to only one file, wildcards are not allowed
+            # The "Files" field is a whitespace-separated list of files and can contain wildcards
+            if (/^(Index|Files)\s*:\s*(.*)\s*$/i) {
+                my $is_index = (lc($1) eq "index");
+                my $ref_file = $2;
+    
+                my @ref_files = ();
+                if ($is_index) {
+                    push @ref_files, $ref_file;
+                } else {
+                    @ref_files = split(/\s+/, $ref_file);
+                }
+
+                foreach my $file (@ref_files) {
+                    my $re = quotemeta( &delink( $file ) );
+                    if (not $is_index) {
+                        # handle shell wildcards
+                        $re =~ s/\\\*/.*/g;
+                        $re =~ s/\\\?/./g;
+                    }
+    
+                    if (not grep (/^${re}$/, @all_files)) {
+                        tag "doc-base-file-refers-to-unknown-file", "$dbdir/$dbfile:$.", "$file";
+                    }
+                }
             }
         }
         close IN;
@@ -207,6 +240,90 @@
 
 # -----------------------------------
 
+# Add file and link to %all_files and %all_links
+# Note that both files and links had to include leading `/'
+sub add_file_link_info {
+    my $file = shift;
+    my $link = undef;
+    
+    $file = "/" . $file if (not $file =~ m/^\//); # make file absolute
+    $file =~ s/\/+/\//g;                          # remove duplicated `/'
+    ($file, $link) = split(/ -> /, $file);
+
+    push @all_files,  $file;
+
+    if (defined $link) {
+        if (not $link =~ m,^/,) {                 # not absolute link
+            $link = "/" . $link;                  # make sure link starts with '/'
+            $link =~ s,/+\./+,/,g;                # remove all /./ parts 
+            my $dcount = 1;       
+            while ($link =~ s,^/+\.\./+,/,) {     #\ count & remove
+               $dcount++;                         #/ any leading /../ parts 
+            }
+            my $f = $file;
+            while ($dcount--) {                   #\ remove last $dcount
+                $f =~ s,/[^/]*$,,;                #/ path components from $file
+            }
+            $link = $f . $link;                   # now we should have absolute link
+        }
+        $all_links{$file} = $link unless ($link eq $file); # ignore self-referencing symlinks
+        ### print STDERR "Link: $file --> $link\n";
+    }
+}
+    
+
+# Dereference all symlinks in file, uses %all_links
+sub delink {
+    my $file = shift;
+
+    $file =~ s/\/+/\//g;                    # remove duplicated '/'
+    return $file unless scalar(%all_links); # package doesn't contain any symlink
+
+    ### print STDERR "Delink Input: $file\n";
+
+    my $p1 = "";
+    my $p2 = $file;
+    my %used_links = ();
+
+    #
+    # In the loop below we split $file into two parts on each next '/' 
+    # until there's no remaining slashes. 
+    # We try substituting the first part with corresponding symlink
+    # and if it succeedes, we start the procedure from beginning.
+    #
+    # Example: 
+    #    Let $all_links{"/a/b"} == "/d", and $file == "/a/b/c"
+    #    Then 0) $p1 == "",     $p2 == "/a/b/c"
+    #         1) $p1 == "/a",   $p2 == "/b/c"    
+    #         2) $p1 == "/a/b", $p2 == "/c"      ; substitute "/a/b" for "/d"
+    #         3) $p1 == "",     $p2 == "/d/c"
+    #         4) $p1 == "/d",   $p2 == "/c"
+    #         5) $p1 == "/d/c", $p2 == ""
+    #
+    # Note that the algorithm supposes, that 
+    #    i) $all_links{$X} != $X for each $X
+    #   ii) both keys and values of %all_links start with '/'
+    #
+
+    while (($p2 =~ s/^\/[^\/]*//g) > 0) {
+        $p1 .= $&;
+        ### print STDERR " (p1,p2): ($p1 ,  $p2)\n";
+        if (defined $all_links{$p1}) {
+            return '!!! SYMLINK LOOP !!!' if (defined $used_links{$p1}); # symlink loop
+            $p2 = $all_links{$p1} . $p2;
+            $p1 = "";
+            $used_links{$p1} = 1;
+        }
+    }
+
+    
+    # after the loop $p2 should be empty and $p1 should contain dereferenced file
+    ### print STDERR "Delink Output: $p1\n\n";
+    return $p1;
+}
+
+
+
 # translate permission strings like `-rwxrwxrwx' into an octal number
 sub perm2oct {
     my ($t) = @_;
diff -Nur /home/tmp/lintian_1.23.3_all.deb/usr/share/lintian/checks/menus.desc checks/menus.desc
--- /home/tmp/lintian_1.23.3_all.deb/usr/share/lintian/checks/menus.desc	2004-06-18 22:15:25.000000000 +0200
+++ checks/menus.desc	2004-10-25 23:34:44.000000000 +0200
@@ -149,3 +149,8 @@
 Type: warning
 Info: Files in <tt>/usr/share/doc-base</tt> should only contain links to
  files in the <tt>/usr/share/doc</tt> directory.
+
+Tag: doc-base-file-refers-to-unknown-file
+Type: error
+Info: File referenced by the <tt>doc-base</tt> file is not included in the package.
+

Reply to: