--- Begin Message ---
- To: Debian Bug Tracking System <submit@bugs.debian.org>
- Subject: lintian: Check for rfc2822 debian/copyright (http://wiki.debian.org/Proposals/CopyrightFormat)
- From: Mathieu Parent <math.parent@gmail.com>
- Date: Thu, 01 May 2008 21:45:34 +0200
- Message-id: <20080501194534.30387.66675.reportbug@servthieu.sathieu.net>
Package: lintian
Version: 1.23.46
Severity: wishlist
Tags: patch
The files add checks for the proposal:
http://wiki.debian.org/Proposals/CopyrightFormat
I hope this implementation will help to clarify things.
To enable, copy:
- copyright-specification to /usr/share/lintian/checks/
- copyright-specification.desc to /usr/share/lintian/checks/
- DebianCopyrightParser.pm to /usr/share/lintian/lib
and check with -I
-- System Information:
Debian Release: lenny/sid
APT prefers testing
APT policy: (990, 'testing'), (500, 'unstable'), (1, 'experimental')
Architecture: i386 (i686)
Kernel: Linux 2.6.18-6-xen-686 (SMP w/2 CPU cores)
Locale: LANG=fr_FR.UTF-8, LC_CTYPE=fr_FR.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/dash
Versions of packages lintian depends on:
ii binutils 2.18.1~cvs20080103-4 The GNU assembler, linker and bina
ii diffstat 1.45-2 produces graph of changes introduc
ii dpkg-dev 1.14.18 package building tools for Debian
ii file 4.23-2 Determines file type using "magic"
ii gettext 0.17-2 GNU Internationalization utilities
ii intltool-debian 0.35.0+20060710.1 Help i18n of RFC822 compliant conf
ii libparse-debianchan 1.1.1-2 parse Debian changelogs and output
ii liburi-perl 1.35.dfsg.1-1 Manipulates and accesses URI strin
ii man-db 2.5.1-3 on-line manual pager
ii perl [libdigest-md5 5.8.8-12 Larry Wall's Practical Extraction
lintian recommends no packages.
-- no debconf information
# control-file -- lintian check script -*- perl -*-
#
# Copyright (C) 2004 Marc Brockschmidt
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, you can find it on the World Wide
# Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free
# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
# MA 02110-1301, USA.
package Lintian::copyright_specification;
use strict;
use lib "$ENV{'LINTIAN_ROOT'}/checks/";
use common_data;
use Dep;
use DebianCopyrightParser;
use Tags;
use Data::Dumper;
sub run {
my $pkg = shift;
my $type = shift;
#parse the copyright file
my @data = DebianCopyrightParser::read_dpkg_copyright("unpacked/debian/copyright");
my $follow_spec = 0;
#check if it has a format-specification header
foreach my $section (@data) {
#next section if this is not an error
next if $section->{'format-specification'};
$follow_spec = 1;
last;
}
#don't check other stuff if it doesn't have the format-specification header
if ($follow_spec == 0) {
tag "debian-copyright-no-specification", '';
} else {
#errors found by the parser
foreach my $section (@data) {
#next section if this is not an error
next if not $section->{error};
tag "debian-copyright-".$section->{error}, $section->{info};
}
# Check that every file in the tree has a license
my $command = 'cd unpacked && find . -type f -a -not \( -false';
foreach my $section (@data) {
next if not $section->{files};
#patterns are comma separated
#TODO: manage quoted strings with comma in it
$command .= ' -o -path ./'.join(' -o -path ./', split m/,\s/, $section->{files});
}
$command .= ' \) ; cd .. ';
my $files_without_copyright = `$command`;
if ($files_without_copyright) {
foreach my $file (split '\n', $files_without_copyright) {
tag 'debian-copyright-file-without-copyright', $file;
}
}
# Check that every pattern match something
foreach my $section (@data) {
#next section if this is not a files section
next if not $section->{files};
#files are comma separated
#TODO: manage quoted strings (with comma in it)
my @patterns = split m/,\s/, $section->{files} ;
foreach my $pattern (@patterns) {
if (not `ls -l && cd unpacked && find . -type f -a -path $pattern ; cd ..`) {
tag 'debian-copyright-section-without-match', $pattern ;
}
}
}
}
}
1;
# vim: syntax=perl sw=4 ts=4 noet shiftround
Check-Script: copyright-specification
Author: Mathieu Parent <math.parent@gmail.com>
Abbrev: csp
Type: source
Unpack-Level: 2
Tag: debian-copyright-no-specification
Type: info
Info: The package contains a copyright file that that does not follow the
proposed copyright format. This is not required by the policy.
.
More information on how to follow this proposed format at
http://wiki.debian.org/Proposals/CopyrightFormat
Tag: debian-copyright-unknown-field
Type: warning
Info: The package contains a copyright file that as an unknown field.
Tag: debian-copyright-duplicate-field
Type: warning
Info: The package contains a copyright file whose one section has a duplicated
field.
Tag: debian-copyright-extra-line
Type: warning
Info: The package contains a copyright file that has non-rfc2822 lines.
Tag: debian-copyright-file-without-copyright
Type: warning
Info: The package contains a copyright file that does match the specified file.
Tag: debian-copyright-section-without-match
Type: warning
Info: The package contains a copyright file which has a section which does
match any file.
# Hey emacs! This is a -*- Perl -*- script!
# DebianCopyrightParser -- debian copyright parser
# Copyright (C) 1998 Christian Schwarz
# Copyright (C) 2008 Mathieu Parent <math.parent@gmail.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, you can find it on the World Wide
# Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free
# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
# MA 02110-1301, USA.
package DebianCopyrightParser;
use strict;
use Exporter;
our @ISA = qw(Exporter);
our @EXPORT = qw(parse_dpkg_copyright
read_dpkg_copyright
get_file_copyright
fail);
use FileHandle;
# general function to read dpkg copyright files
# arguments:
# $filehandle
# output:
# list of hashes
# (a hash contains one sections,
# keys in hash are lower case letters of control fields
# errors are sections also)
sub parse_dpkg_copyright {
my ($COPYRIGHT) = @_;
my @errors;
my $cur_error = 0;
my @data;
my $cur_section = 0;
my $open_section = 0;
my $last_tag;
my $line_number = 0;
while (<$COPYRIGHT>) {
chomp;
$line_number++;
# empty line? -> force new section
if (m/^$/) {
if ($open_section) { # end of current section
$cur_section++;
$open_section = 0;
}
}
# pgp sig? -> skip until end of signature
elsif (m/^-----BEGIN PGP SIGNATURE/) {
while (<$COPYRIGHT>) {
$line_number++;
last if m/^-----END PGP SIGNATURE/o;
}
}
# other pgp control? -> skip until the next blank line
elsif (m/^-----BEGIN PGP/) {
while (<$COPYRIGHT>) {
$line_number++;
last if /^\s*$/o;
}
}
# new field?
elsif (m/^(\S+):\s*(.*)$/o) {
my ($tag,$value) = (lc $1,$2);
#format-specification, files and notice always start a section
if($tag =~ /format-specification|files|notice/i) {
$cur_section++ if $open_section;
#license only start a section if the current section already have a license field
} elsif ($tag =~ /license/i and $data[$cur_section]->{'license'}) {
$cur_section++ if $open_section;
#other known fields
} elsif($tag =~ /debianized-by|debianized-date|original-source-location|upstream-author|copyright|license/i) {
#do nothing
#unknown fields
} else {
$errors[$cur_error]->{error} = 'unknown-field';
$errors[$cur_error]->{info} = "$tag";
$cur_error++;
}
#fields already parsed for current section
if($data[$cur_section]->{$tag}) {
$errors[$cur_error]->{error} = 'duplicate-field';
$errors[$cur_error]->{info} = "$tag in section $cur_section line $line_number";
$cur_error++;
}
$open_section = 1;
$data[$cur_section]->{$tag} = $value;
$last_tag = $tag;
}
# continued field?
elsif (m/^ (.*)$/o and $open_section) {
$data[$cur_section]->{$last_tag} .= "\n".$1;
}
#everything else
else {
$errors[$cur_error]->{error} = 'extra-line';
$errors[$cur_error]->{info} = "line $line_number";
$cur_error++;
}
}
return (@errors,@data);
}
sub read_dpkg_copyright {
my ($file) = @_;
if (not _ensure_file_is_sane($file)) {
return undef;
}
my $COPYRIGHT = FileHandle->new;
open($COPYRIGHT, '<', $file)
or fail("cannot open copyright file $file for reading: $!");
my @data = parse_dpkg_copyright($COPYRIGHT);
close($COPYRIGHT)
or fail("pipe for copyright file $file exited with status: $?");
return @data;
}
# not used
sub _file_matches {
my ($file,$pattern) = @_;
#TODO: expand patterns
#find . -path "$PATTERN"
#$output_string = `find . -path "$pattern"`;
return $pattern eq $file;
}
# not used
sub _get_complete_license {
my ($license,@data) = @_;
foreach my $section (@data) {
#next section if this is not a license section
next if $section->{files} or not $section->{license};
#check that this is the matching license section
return $section->{license} if $section->{license} =~ /^\s*$license/
}
return $license;
}
# not used
sub get_file_copyright {
my ($file,@data) = @_;
my $ret= {};
foreach my $section (@data) {
next if not $section->{files};
#files are comma separated
#TODO: manage quoted strings (with comma in it)
my @patterns = split m/,\s/, $section->{files} ;
foreach my $pattern (@patterns) {
#TODO: what if it matches several patterns?
# the current implementation takes the first match
if (_file_matches($file,$pattern)) {
$ret->{copyright} = $section->{copyright};
#one term: check for license section
$ret->{license} = $section->{license};
$ret->{'matching-pattern'} = $pattern;
if($ret->{license} =~ /\s*(\S+)\s*/) {
$ret->{license} = _get_complete_license($1, @data);
}
return $ret;
}
}
}
$ret->{license} = "unknown";
$ret->{copyright} = "unknown";
return $ret;
}
sub _ensure_file_is_sane {
my ($file) = @_;
# if file exists and is not 0 bytes
if (-f $file and -s $file) {
return 1;
}
return 0;
}
# ------------------------
sub fail {
my $str = "internal error";
if (@_) {
$str .= ": ".join( "\n", @_)."\n";
} elsif ($!) {
$str .= ": $!\n";
} else {
$str .= ".\n";
}
$! = 2; # set return code outside eval()
die $str;
}
1;
--- End Message ---