[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#633779: lintian: validate DEP-5 debian/copyright files



* Niels Thykier <niels@thykier.net>, 2011-08-13, 13:20:
Feel free to add a new separate check if that results in more
maintainable code.  :)

Okay, updated patch attached.

--
Jakub Wilk
diff --git a/checks/source-copyright-file b/checks/source-copyright-file
new file mode 100644
--- /dev/null
+++ b/checks/source-copyright-file
@@ -0,0 +1,231 @@
+# source-copyright-file -- lintian check script -*- perl -*-
+
+# Copyright (C) 2011 Jakub Wilk
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, you can find it on the World Wide
+# Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free
+# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+# MA 02110-1301, USA.
+#
+package Lintian::source_copyright_file;
+
+use strict;
+use warnings;
+
+use Lintian::Relation::Version qw(versions_compare);
+use Lintian::Tags qw(tag);
+use Util;
+
+my $dep5_last_normative_change = '0+svn~166';
+my $dep5_last_overhaul = '0+svn~148';
+my %dep5_renamed_fields = (
+    'format-specification' => 'format',
+    'maintainer' => 'upstream-contact',
+    'upstream-maintainer' => 'upstream-contact',
+    'contact' => 'upstream-contact',
+    'name' => 'upstream-name',
+);
+
+sub run {
+ 
+my ($pkg, $type, $info) = @_;
+my $pkgs = $info->binaries;
+
+my $copyright_filename = $info->debfiles('copyright');
+
+if (-l $copyright_filename)
+{
+    tag 'debian-copyright-is-symlink';
+    return;
+}
+
+if (not -f $copyright_filename) {
+    tag 'no-debian-copyright';
+    $copyright_filename = undef;
+    if (keys(%$pkgs) == 1)
+    {
+	# If debian/copyright doesn't exist, and the only a single binary
+	# package is built, there's a good chance that the copyright file is
+	# available as debian/<pkgname>.copyright.
+	$copyright_filename = $info->debfiles((keys(%$pkgs))[0] . '.copyright');
+	if (not -f $copyright_filename or -l $copyright_filename) {
+	    $copyright_filename = undef;
+	}
+    }
+}
+
+return unless defined $copyright_filename;
+
+$_ = slurp_entire_file($copyright_filename);
+study $_;
+
+my @dep5;
+
+if (m{
+    (^ | \n)
+    (?i: format(:|[-\s]spec) )
+    (?: . | \n\s+ )*
+    (?: /dep[5s]?\b | \bDEP-?5\b | [Mm]achine-readable\s(?:license|copyright) | /copyright-format/ | CopyrightFormat | VERSIONED_FORMAT_URL )
+}x)
+{
+    # Before trying to parse the copyright as Debian control file, try to
+    # determine the format URI.
+    my $first_para = $_;
+    $first_para =~ s,^#.*,,mg;
+    $first_para =~ s,[ \t]+$,,mg;
+    $first_para =~ s,^\n+,,g;
+    $first_para =~ s,\n\n.*,\n,s;
+    $first_para =~ s,\n?[ \t]+, ,g;
+    $first_para =~ m,^Format(?:-Specification)?:\s*(.*),mi;
+    my $uri = $1;
+    $uri =~ s/^([^#\s]+)#/$1/ if defined $uri; # strip fragment identifier
+    if (defined $uri) {
+	my $original_uri = $uri;
+	my $version;
+	if ($uri =~ m,\b(?:rev=REVISION|VERSIONED_FORMAT_URL)\b,) {
+	    tag 'boilerplate-copyright-format-uri';
+	}
+	elsif ($uri =~ s,http://wiki\.debian\.org/Proposals/CopyrightFormat\b,,) {
+	    $version = '0~wiki';
+	    $uri =~ m,^\?action=recall&rev=(\d+)$, and $version = "$version~$1";
+	}
+	elsif ($uri =~ m,^http://dep\.debian\.net/deps/dep5/?$,) {
+	    $version = '0+svn';
+	}
+	elsif ($uri =~ s,^http://svn\.debian\.org/wsvn/dep/web/deps/dep5\.mdwn\b,,) {
+	    $version = '0+svn';
+	    $uri =~ m,^\?(?:\S+&)?rev=(\d+)(?:&\S+)?$, and $version = "$version~$1";
+	}
+	elsif ($uri =~ s,^http://anonscm\.debian\.org/viewvc/dep/web/deps/dep5\.mdwn\b,,) {
+	    $version = '0+svn';
+	    $uri =~ m,^\?(?:\S+&)?revision=(\d+)(?:&\S+)?$, and $version = "$version~$1";
+	}
+	elsif ($uri =~ m,^http://www\.debian\.org/doc/(?:packaging-manuals/)?copyright-format/(\d+\.\d+)$,) {
+	    $version = $1;
+	}
+	else {
+	    tag 'unknown-copyright-format-uri', $original_uri;
+	}
+	if (defined $version) {
+	    if ($version =~ m,wiki,) {	
+		    tag 'wiki-copyright-format-uri', $original_uri;
+	    }
+	    elsif ($version =~ m,svn$,) {
+		tag 'unversioned-copyright-format-uri', $original_uri;
+	    }
+	    elsif (versions_compare $version, '<<', $dep5_last_normative_change) {
+		tag 'out-of-date-copyright-format-uri', $original_uri;
+	    }
+	    if (versions_compare $version, '>=', $dep5_last_overhaul) {
+		# We are reasonably certain that we're dealing with an up-to-date
+		# DEP-5 format. Let's try to do more strict checks.
+		eval {
+		    @dep5 = read_dpkg_control($copyright_filename);
+		};
+		if ($@) {
+		    $@ =~ s/^internal error: //;
+		    tag 'syntax-error-in-dep5-copyright', $@;
+		}
+	    }
+	}
+    }
+    else {
+	tag 'unknown-copyright-format-uri';
+    }
+}
+
+if (@dep5) {
+    my $first_para = shift @dep5;
+    my %standalone_licenses;
+    my %required_standalone_licenses;
+    for my $field (keys %{$first_para}) {
+	my $renamed_to = $dep5_renamed_fields{$field};
+	if (defined $renamed_to) {
+	    tag 'obsolete-field-in-dep5-copyright', $field, $renamed_to
+	}
+    }
+    for my $license (split_licenses($first_para->{'license'})) {
+	$required_standalone_licenses{$license} = 1;
+    }
+    my $commas_in_files = 0;
+    my $i = 1;
+    for my $para (@dep5) {
+	$i++;
+	my $license = $para->{'license'};
+	my $files = $para->{'files'};
+	if (defined $license and not defined $files) {
+	    # Standalone license paragraph
+	    if (not $license =~ m/\n/) {
+		tag 'missing-license-text-in-dep5-copyright', $license;
+	    }
+	    else {
+		($license, undef) = split /\n/, $license, 2;
+		for (split_licenses($license)) {
+		    $standalone_licenses{$_} = $i;
+		}
+	    }
+	}
+	elsif (defined $files) {
+	    # Files paragraph
+	    $commas_in_files = 1 if $files =~ /,/;
+	    if (defined $license) {
+		for (split_licenses($license)) {
+		    $required_standalone_licenses{$_} = $i;
+		}
+	    }
+	    else {
+		tag 'missing-field-in-dep5-copyright', 'paragraph', $i, 'license';
+	    }
+	    if (not defined $para->{'copyright'}) {
+		tag 'missing-field-in-dep5-copyright', 'paragraph', $i, 'copyright';
+	    }
+	}
+	else {
+	    tag 'unknown-paragraph-in-dep5-copyright', 'paragraph', $i;
+	}
+    }
+    if ($commas_in_files) {
+	tag 'comma-separated-files-in-dep5-copyright'
+	    unless grep(/,/, keys %{$info->file_info});
+    }
+    for my $license (keys %required_standalone_licenses) {
+	if (not defined $standalone_licenses{$license}) {
+	    tag 'missing-license-paragraph-in-dep5-copyright', 'paragraph', $i, $license;
+	}
+    }
+    for my $license (keys %standalone_licenses) {
+	if (not defined $required_standalone_licenses{$license}) {
+	    tag 'unused-license-paragraph-in-dep5-copyright', 'paragraph', $i, $license;
+	}
+    }
+}
+
+}
+
+sub split_licenses
+{
+    my ($_) = @_;
+    return () unless defined;
+    return () if /\n/;
+    s/[(),]//;
+    return map "\L$_", (split /\s++(?:and|or)\s++/);
+}
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: t
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sw=4 ts=8 noet shiftround
diff --git a/checks/source-copyright-file.desc b/checks/source-copyright-file.desc
new file mode 100644
--- /dev/null
+++ b/checks/source-copyright-file.desc
@@ -0,0 +1,132 @@
+Check-Script: source-copyright-file
+Author: Jakub Wilk <jwilk@debian.org>
+Abbrev: scpy
+Type: source
+Needs-Info: debfiles, source-control-file, file-info
+Info: This script checks if a source package conforms to policy
+ with regard to copyright files.
+ .
+ Each source package should have a debian/copyright file.
+
+Tag: debian-copyright-is-symlink
+Severity: normal
+Certainty: certain
+Info: The file <tt>debian/copyright</tt> is a symlink instead of a regular
+ file. This makes package checking and manipulation more difficult.
+ .
+ This problem may have prevented lintian from performing other checks.
+
+Tag: no-debian-copyright
+Severity: minor
+Certainty: certain
+Ref: policy 12.5
+Info: Every package must include the file <tt>/usr/share/doc/<i>pkg</i>/copyright</tt>.
+ A copy of this file should be in <tt>debian/copyright</tt> in the source package.
+
+Tag: unknown-copyright-format-uri
+Severity: pedantic
+Certainty: wild-guess
+Info: The copyright file appears to intended as machine-readable, but lintian
+ cannot recognize its format URI.  It could be a typo for a common URI or a
+ syntax error in the first paragraph.  Please file a bug against Lintian if you
+ believe that the copyright file in syntactically valid and the URI is
+ correct.
+
+Tag: boilerplate-copyright-format-uri
+Severity: normal
+Certainty: possible
+Info: Format URI of the machine-readable copyright file contains
+ <tt>VERSIONED_FORMAT_URL</tt> or <tt>REVISION</tt> string. Please replace it
+ with an actual URI or an actual revision number respectively.
+
+Tag: wiki-copyright-format-uri
+Severity: pedantic
+Certainty: possible
+Ref: http://dep.debian.net/deps/dep5/
+Info: Format URI of the machine-readable copyright file refers to Debian Wiki.
+ .
+ Debian Wiki is not used for the format development anymore.  Please use
+ <tt>http://anonscm.debian.org/viewvc/dep/web/deps/dep5.mdwn?revision=<i>revision</i></tt>
+ as the format URI instead.
+
+Tag: unversioned-copyright-format-uri
+Severity: pedantic
+Certainty: possible
+Ref: http://dep.debian.net/deps/dep5/
+Info: Format URI of the machine-readable copyright file is not versioned.
+ .
+ Please use
+ <tt>http://anonscm.debian.org/viewvc/dep/web/deps/dep5.mdwn?revision=<i>revision</i></tt>
+ as the format URI instead.
+
+Tag: out-of-date-copyright-format-uri
+Severity: pedantic
+Certainty: possible
+Ref: http://dep.debian.net/deps/dep5/
+Info: A newer version of the machine-readable copyright file specification,
+ than the one referenced by the copyright file, is available.
+ .
+ This problem may have prevented lintian from performing other checks.
+
+Tag: syntax-error-in-dep5-copyright
+Severity: normal
+Certainty: possible
+Ref: http://dep.debian.net/deps/dep5/
+Info: The machine-readable copyright file didn't pass Debian control file
+ syntax check.
+
+Tag: obsolete-field-in-dep5-copyright
+Severity: normal
+Certainty: possible
+Ref: http://dep.debian.net/deps/dep5/
+Info: The machine-readable copyright file uses a field, that used to be defined
+ by the specification, but has been renamed since then.
+ .
+ Please use Format instead of Format-Specification.
+ .
+ Please use Upstream-Contact instead of Contact, Maintainer or Upstream-Maintainer.
+ .
+ Please use Upstream-Name instead of Name.
+
+Tag: comma-separated-files-in-dep5-copyright
+Severity: normal
+Certainty: possible
+Ref: http://dep.debian.net/deps/dep5/
+Info: A list of files in the machine-readable copyright format appears to be
+ separated by commas. The file list should be whitespace separated instead.
+
+Tag: missing-field-in-dep5-copyright
+Severity: normal
+Certainty: possible
+Ref: http://dep.debian.net/deps/dep5/
+Info: The paragraph in the machine readable copyright file is missing a field
+ that is required by the specification.
+
+Tag: missing-license-paragraph-in-dep5-copyright
+Severity: normal
+Certainty: possible
+Ref: http://dep.debian.net/deps/dep5/
+Info: The files paragraph in the machine readable copyright file references a
+ license, for which no standalone license paragraph exists.
+
+Tag: missing-license-text-in-dep5-copyright
+Severity: normal
+Certainty: possible
+Ref: http://dep.debian.net/deps/dep5/
+Info: The standalone license header contains only short license name, but the
+ license text.
+
+Tag: unused-license-paragraph-in-dep5-copyright
+Severity: minor
+Certainty: possible
+Ref: http://dep.debian.net/deps/dep5/
+Info: The license paragraph in the machine-readable copyright file is not
+ referenced by any files paragraph.  It could be a typo in the license name or
+ the license paragraph is simply not needed and can be removed.
+
+Tag: unknown-paragraph-in-dep5-copyright
+Severity: normal
+Certainty: possible
+Ref: http://dep.debian.net/deps/dep5/
+Info: The machine-readable copyright file contains a paragraph that is neither
+ a standalone license paragraph nor a files paragraph.
diff --git a/profiles/debian/main.profile b/profiles/debian/main.profile
--- a/profiles/debian/main.profile
+++ b/profiles/debian/main.profile
@@ -6,6 +6,6 @@
  debhelper, debian-readme, debian-source-dir, description, duplicate-files,
  fields, filename-length, files, huge-usr-share, infofiles, init.d, java,
  lintian, manpages, md5sums, menu-format, menus, nmu, ocaml, patch-systems,
- po-debconf, rules, scripts, shared-libs, standards-version, symlinks,
- version-substvars, watch-file
+ po-debconf, rules, scripts, shared-libs, source-copyright-file,
+ standards-version, symlinks, version-substvars, watch-file
 

Reply to: