Bug#757551: lintian: check if DEP-5 debian/copyright covers all files in the unpacked sources
* Johannes Schauer <j.schauer@email.de>, 2014-08-09, 09:43:
it seems that lintian does not yet notice if a DEP-5 debian/copyright
file misses specifying copyright information for some files in the
unpacked sources. It would be nice if lintian could warn if
debian/copyright misses to provide information about some files.
Here's a patch... sort of.
I wrote it in 2012, so it likely no longer applies cleanly. I probably
tested it only lightly, if at all. The tag descriptions are missing.
There are no tests.
I don't intend to work on improving the patch, but maybe somebody else
will find it useful.
--
Jakub Wilk
diff --git a/checks/source-copyright b/checks/source-copyright
--- a/checks/source-copyright
+++ b/checks/source-copyright
@@ -23,6 +23,9 @@
use strict;
use warnings;
+use Cwd qw(getcwd);
+use File::Find qw();
+
use Lintian::Relation::Version qw(versions_compare);
use Lintian::Tags qw(tag);
use Lintian::Util qw(read_dpkg_control slurp_entire_file);
@@ -162,6 +165,9 @@
$required_standalone_licenses{$license} = 1;
}
my $commas_in_files = 0;
+ my %file_coverage = map { $_ => 0 } get_all_files($info);
+ my %file_para_coverage = ();
+ my $comma_separated_files = 0;
my $i = 0;
for my $para (@dep5) {
$i++;
@@ -179,7 +185,32 @@
}
elsif (defined $files) {
# Files paragraph
- $commas_in_files = $i if not $commas_in_files and $files =~ /,/;
+ my @wildcards = split(' ', $files);
+ for my $wildcard (@wildcards) {
+ my ($regex, $wildcard_error) = wildcard_to_regex($wildcard);
+ if (defined $wildcard_error) {
+ tag 'invalid-escape-sequence-in-dep5-copyright', substr($wildcard_error, 0, 2) . " (paragraph at line $lines[$i])";
+ } elsif ($comma_separated_files) {
+ # Give up.
+ } else {
+ my $used = 0;
+ for my $srcfile (keys %file_coverage) {
+ if ($srcfile =~ $regex) {
+ $used = 1;
+ $file_coverage{$srcfile} = $i;
+ $file_para_coverage{$i} = 1;
+ }
+ }
+ if (not $used) {
+ if ($wildcard =~ /,/ and not grep(/,/, keys %file_coverage)) {
+ $comma_separated_files = 1;
+ } else {
+ tag 'wildcard-matches-nothing-in-dep5-copyright', "$wildcard (paragraph at line $lines[$i])";
+
+ }
+ }
+ }
+ }
if (defined $license) {
for (split_licenses($license)) {
$required_standalone_licenses{$_} = $i;
@@ -196,9 +227,20 @@
tag 'unknown-paragraph-in-dep5-copyright', 'paragraph at line', $lines[$i];
}
}
- if ($commas_in_files) {
+ if ($comma_separated_files) {
tag 'comma-separated-files-in-dep5-copyright', 'paragraph at line', $lines[$i]
unless grep(/,/, $info->sorted_index);
+ } else {
+ foreach my $srcfile (sort keys %file_coverage) {
+ my $i = $file_coverage{$srcfile};
+ if (not $i) {
+ tag 'file-without-copyright-information', $srcfile;
+ }
+ delete $file_para_coverage{$i};
+ }
+ foreach my $i (sort keys %file_para_coverage) {
+ tag 'unused-file-paragraph-in-dep5-copyright', "paragraph at line $lines[$i]";
+ }
}
while ((my $license, $i) = each %required_standalone_licenses) {
if (not defined $standalone_licenses{$license}) {
@@ -222,6 +264,55 @@
return map "\L$_", (split /\s++(?:and|or)\s++/);
}
+sub wildcard_to_regex {
+ my ($regex) = @_;
+ $regex =~ s,^\./+,,;
+ $regex =~ s,//+,/,g;
+ my $error;
+ eval {
+ $regex =~ s{
+ (\*) |
+ (\?) |
+ ([^*?\\]+) |
+ (\\[\\*?]) |
+ (.+)
+ }{
+ if (defined $1) {
+ '.*';
+ } elsif (defined $2) {
+ '.'
+ } elsif (defined $3) {
+ quotemeta($3);
+ } elsif (defined $4) {
+ $4;
+ } else {
+ $error = $5;
+ die;
+ }
+ }egx;
+ };
+ if ($@) {
+ return (undef, $error);
+ } else {
+ return (qr/^(?:$regex)$/, undef);
+ }
+}
+
+sub get_all_files {
+ my ($info) = @_;
+ my @all_files = @{$info->sorted_index};
+ my $debfiles_root = $info->debfiles;
+ File::Find::find({
+ wanted => sub {
+ return unless -f $_;
+ my $dir = $File::Find::dir;
+ $dir =~ s,^\Q$debfiles_root\E(?:(?=/)|$),debian,;
+ push @all_files, "$dir/$_";
+ },
+ }, $debfiles_root);
+ return @all_files;
+}
+
1;
# Local Variables:
diff --git a/checks/source-copyright.desc b/checks/source-copyright.desc
--- a/checks/source-copyright.desc
+++ b/checks/source-copyright.desc
@@ -132,3 +132,27 @@
Ref: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
Info: The machine-readable copyright file contains a paragraph that is neither
a standalone license paragraph nor a files paragraph.
+
+Tag: invalid-escape-sequence-in-dep5-copyright
+Severity: normal
+Certainty: possible
+Ref: http://dep.debian.net/deps/dep5/
+Info: XXX
+
+Tag: wildcard-matches-nothing-in-dep5-copyright
+Severity: minor
+Certainty: possible
+Ref: http://dep.debian.net/deps/dep5/
+Info: XXX
+
+Tag: file-without-copyright-information
+Severity: normal
+Certainty: possible
+Ref: http://dep.debian.net/deps/dep5/
+Info: XXX
+
+Tag: unused-file-paragraph-in-dep5-copyright
+Severity: minor
+Certainty: possible
+Ref: http://dep.debian.net/deps/dep5/
+Info: XXX
Reply to: