[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#757551: lintian: check if DEP-5 debian/copyright covers all files in the unpacked sources



* Johannes Schauer <j.schauer@email.de>, 2014-08-09, 09:43:
it seems that lintian does not yet notice if a DEP-5 debian/copyright file misses specifying copyright information for some files in the unpacked sources. It would be nice if lintian could warn if debian/copyright misses to provide information about some files.

Here's a patch... sort of.

I wrote it in 2012, so it likely no longer applies cleanly. I probably tested it only lightly, if at all. The tag descriptions are missing. There are no tests.

I don't intend to work on improving the patch, but maybe somebody else will find it useful.

--
Jakub Wilk
diff --git a/checks/source-copyright b/checks/source-copyright
--- a/checks/source-copyright
+++ b/checks/source-copyright
@@ -23,6 +23,9 @@
 use strict;
 use warnings;
 
+use Cwd qw(getcwd);
+use File::Find qw();
+
 use Lintian::Relation::Version qw(versions_compare);
 use Lintian::Tags qw(tag);
 use Lintian::Util qw(read_dpkg_control slurp_entire_file);
@@ -162,6 +165,9 @@
         $required_standalone_licenses{$license} = 1;
     }
     my $commas_in_files = 0;
+    my %file_coverage = map { $_ => 0 } get_all_files($info);
+    my %file_para_coverage = ();
+    my $comma_separated_files = 0;
     my $i = 0;
     for my $para (@dep5) {
         $i++;
@@ -179,7 +185,32 @@
         }
         elsif (defined $files) {
             # Files paragraph
-            $commas_in_files = $i if not $commas_in_files and $files =~ /,/;
+            my @wildcards = split(' ', $files);
+            for my $wildcard (@wildcards) {
+                my ($regex, $wildcard_error) = wildcard_to_regex($wildcard);
+                if (defined $wildcard_error) {
+                    tag 'invalid-escape-sequence-in-dep5-copyright', substr($wildcard_error, 0, 2) . " (paragraph at line $lines[$i])";
+                } elsif ($comma_separated_files) {
+                    # Give up.
+                } else {
+                    my $used = 0;
+                    for my $srcfile (keys %file_coverage) {
+                        if ($srcfile =~ $regex) {
+                            $used = 1;
+                            $file_coverage{$srcfile} = $i;
+                            $file_para_coverage{$i} = 1;
+                        }
+                    }
+                    if (not $used) {
+                        if ($wildcard =~ /,/ and not grep(/,/, keys %file_coverage)) {
+                            $comma_separated_files = 1;
+                        } else {
+                            tag 'wildcard-matches-nothing-in-dep5-copyright', "$wildcard (paragraph at line $lines[$i])";
+
+                        }
+                    }
+                }
+            }
             if (defined $license) {
                 for (split_licenses($license)) {
                     $required_standalone_licenses{$_} = $i;
@@ -196,9 +227,20 @@
             tag 'unknown-paragraph-in-dep5-copyright', 'paragraph at line', $lines[$i];
         }
     }
-    if ($commas_in_files) {
+    if ($comma_separated_files) {
         tag 'comma-separated-files-in-dep5-copyright', 'paragraph at line', $lines[$i]
             unless grep(/,/, $info->sorted_index);
+    } else {
+        foreach my $srcfile (sort keys %file_coverage) {
+            my $i = $file_coverage{$srcfile};
+            if (not $i) {
+                tag 'file-without-copyright-information', $srcfile;
+            }
+            delete $file_para_coverage{$i};
+        }
+        foreach my $i (sort keys %file_para_coverage) {
+            tag 'unused-file-paragraph-in-dep5-copyright', "paragraph at line $lines[$i]";
+        }
     }
     while ((my $license, $i) = each %required_standalone_licenses) {
         if (not defined $standalone_licenses{$license}) {
@@ -222,6 +264,55 @@
     return map "\L$_", (split /\s++(?:and|or)\s++/);
 }
 
+sub wildcard_to_regex {
+    my ($regex) = @_;
+    $regex =~ s,^\./+,,;
+    $regex =~ s,//+,/,g;
+    my $error;
+    eval {
+        $regex =~ s{
+            (\*) |
+            (\?) |
+            ([^*?\\]+) |
+            (\\[\\*?]) |
+            (.+)
+        }{
+            if (defined $1) {
+                '.*';
+            } elsif (defined $2) {
+                '.'
+            } elsif (defined $3) {
+                quotemeta($3);
+            } elsif (defined $4) {
+                $4;
+            } else {
+                $error = $5;
+                die;
+            }
+        }egx;
+    };
+    if ($@) {
+        return (undef, $error);
+    } else {
+        return (qr/^(?:$regex)$/, undef);
+    }
+}
+
+sub get_all_files {
+    my ($info) = @_;
+    my @all_files = @{$info->sorted_index};
+    my $debfiles_root = $info->debfiles;
+    File::Find::find({
+        wanted => sub {
+            return unless -f $_;
+            my $dir = $File::Find::dir;
+            $dir =~ s,^\Q$debfiles_root\E(?:(?=/)|$),debian,;
+            push @all_files, "$dir/$_";
+        },
+    }, $debfiles_root);
+    return @all_files;
+}
+
 1;
 
 # Local Variables:
diff --git a/checks/source-copyright.desc b/checks/source-copyright.desc
--- a/checks/source-copyright.desc
+++ b/checks/source-copyright.desc
@@ -132,3 +132,27 @@
 Ref: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
 Info: The machine-readable copyright file contains a paragraph that is neither
  a standalone license paragraph nor a files paragraph.
+
+Tag: invalid-escape-sequence-in-dep5-copyright
+Severity: normal
+Certainty: possible
+Ref: http://dep.debian.net/deps/dep5/
+Info: XXX
+
+Tag: wildcard-matches-nothing-in-dep5-copyright
+Severity: minor
+Certainty: possible
+Ref: http://dep.debian.net/deps/dep5/
+Info: XXX
+
+Tag: file-without-copyright-information
+Severity: normal
+Certainty: possible
+Ref: http://dep.debian.net/deps/dep5/
+Info: XXX
+
+Tag: unused-file-paragraph-in-dep5-copyright
+Severity: minor
+Certainty: possible
+Ref: http://dep.debian.net/deps/dep5/
+Info: XXX

Reply to: