[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#478930: lintian: Check for rfc2822 debian/copyright (http://wiki.debian.org/Proposals/CopyrightFormat)



Package: lintian
Version: 1.23.46
Severity: wishlist
Tags: patch

The files add checks for the proposal:
http://wiki.debian.org/Proposals/CopyrightFormat

I hope this implementation will help to clarify things.

To enable, copy:
- copyright-specification to /usr/share/lintian/checks/
- copyright-specification.desc to /usr/share/lintian/checks/
- DebianCopyrightParser.pm to /usr/share/lintian/lib

and check with -I

-- System Information:
Debian Release: lenny/sid
  APT prefers testing
  APT policy: (990, 'testing'), (500, 'unstable'), (1, 'experimental')
Architecture: i386 (i686)

Kernel: Linux 2.6.18-6-xen-686 (SMP w/2 CPU cores)
Locale: LANG=fr_FR.UTF-8, LC_CTYPE=fr_FR.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/dash

Versions of packages lintian depends on:
ii  binutils            2.18.1~cvs20080103-4 The GNU assembler, linker and bina
ii  diffstat            1.45-2               produces graph of changes introduc
ii  dpkg-dev            1.14.18              package building tools for Debian
ii  file                4.23-2               Determines file type using "magic"
ii  gettext             0.17-2               GNU Internationalization utilities
ii  intltool-debian     0.35.0+20060710.1    Help i18n of RFC822 compliant conf
ii  libparse-debianchan 1.1.1-2              parse Debian changelogs and output
ii  liburi-perl         1.35.dfsg.1-1        Manipulates and accesses URI strin
ii  man-db              2.5.1-3              on-line manual pager
ii  perl [libdigest-md5 5.8.8-12             Larry Wall's Practical Extraction 

lintian recommends no packages.

-- no debconf information
# control-file -- lintian check script -*- perl -*-
#
# Copyright (C) 2004 Marc Brockschmidt
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, you can find it on the World Wide
# Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free
# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
# MA 02110-1301, USA.

package Lintian::copyright_specification;
use strict;
use lib "$ENV{'LINTIAN_ROOT'}/checks/";
use common_data;
use Dep;
use DebianCopyrightParser;
use Tags;
use Data::Dumper;

sub run {

	my $pkg = shift;
	my $type = shift;

	#parse the copyright file
	my @data = DebianCopyrightParser::read_dpkg_copyright("unpacked/debian/copyright");
	my $follow_spec = 0;
	#check if it has a format-specification header
	foreach my $section (@data) {
		#next section if this is not an error
		next if $section->{'format-specification'};
		$follow_spec = 1;
		last;
	}

	#don't check other stuff if it doesn't have the format-specification header
	if ($follow_spec == 0) {
		tag "debian-copyright-no-specification", '';
	} else {
		#errors found by the parser
		foreach my $section (@data) {
			#next section if this is not an error
			next if not $section->{error};
			tag "debian-copyright-".$section->{error}, $section->{info};
		}

		# Check that every file in the tree has a license
		my $command = 'cd unpacked && find . -type f -a -not \( -false';
		foreach my $section (@data) {
			next if not $section->{files};
			#patterns are comma separated
			#TODO: manage quoted strings with comma in it
			$command .= ' -o -path ./'.join(' -o -path ./', split m/,\s/, $section->{files});
	
		}
		$command .= ' \) ; cd .. ';
		my $files_without_copyright = `$command`;
		if ($files_without_copyright) {
			foreach my $file (split '\n', $files_without_copyright) {
				tag 'debian-copyright-file-without-copyright', $file;
			}
		}

		# Check that every pattern match something
		foreach my $section (@data) {
			#next section if this is not a files section
			next if not $section->{files};
			#files are comma separated
			#TODO: manage quoted strings (with comma in it)
			my @patterns = split m/,\s/, $section->{files} ;
			foreach my $pattern (@patterns) {
				if (not `ls -l && cd unpacked && find . -type f -a -path $pattern ; cd ..`) {
					tag 'debian-copyright-section-without-match', $pattern ;
				}
			}
		}
	}

}

1;

# vim: syntax=perl sw=4 ts=4 noet shiftround
Check-Script: copyright-specification
Author: Mathieu Parent <math.parent@gmail.com>
Abbrev: csp
Type: source
Unpack-Level: 2

Tag: debian-copyright-no-specification
Type: info
Info: The package contains a copyright file that  that does not follow the
 proposed copyright format. This is not required by the policy.
 .
 More information on how to follow this proposed format at
 http://wiki.debian.org/Proposals/CopyrightFormat

Tag: debian-copyright-unknown-field
Type: warning
Info: The package contains a copyright file that as an unknown field.

Tag: debian-copyright-duplicate-field
Type: warning
Info: The package contains a copyright file whose one section has a duplicated
 field.

Tag: debian-copyright-extra-line
Type: warning
Info: The package contains a copyright file that has non-rfc2822 lines.

Tag: debian-copyright-file-without-copyright
Type: warning
Info: The package contains a copyright file that does match the specified file.

Tag: debian-copyright-section-without-match
Type: warning
Info: The package contains a copyright file which has a section which does
 match any file.

# Hey emacs! This is a -*- Perl -*- script!
# DebianCopyrightParser -- debian copyright parser

# Copyright (C) 1998 Christian Schwarz
# Copyright (C) 2008 Mathieu Parent <math.parent@gmail.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, you can find it on the World Wide
# Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free
# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
# MA 02110-1301, USA.

package DebianCopyrightParser;
use strict;

use Exporter;
our @ISA = qw(Exporter);
our @EXPORT = qw(parse_dpkg_copyright
	read_dpkg_copyright
	get_file_copyright
	fail);

use FileHandle;

# general function to read dpkg copyright files
# arguments:
#    $filehandle 
# output:
#    list of hashes
#    (a hash contains one sections,
#    keys in hash are lower case letters of control fields
#    errors are sections also)
sub parse_dpkg_copyright {
    my ($COPYRIGHT) = @_;

    my @errors;
    my $cur_error = 0;
    my @data;
    my $cur_section = 0;
    my $open_section = 0;
    my $last_tag;
    my $line_number = 0;

    while (<$COPYRIGHT>) {
	chomp;
	$line_number++;

	# empty line? -> force new section
	if (m/^$/) {
	    if ($open_section) { # end of current section
		$cur_section++;
		$open_section = 0;
	    }
	}
	# pgp sig? -> skip until end of signature
	elsif (m/^-----BEGIN PGP SIGNATURE/) {
	    while (<$COPYRIGHT>) {
		$line_number++;
		last if m/^-----END PGP SIGNATURE/o;
	    }
	}
	# other pgp control? -> skip until the next blank line
	elsif (m/^-----BEGIN PGP/) {
	    while (<$COPYRIGHT>) {
		$line_number++;
		last if /^\s*$/o;
	    }
	}
	# new field?
	elsif (m/^(\S+):\s*(.*)$/o) {
	    my ($tag,$value) = (lc $1,$2);
	    #format-specification, files and notice always start a section
	    if($tag =~ /format-specification|files|notice/i) {
		$cur_section++ if $open_section;
	    #license only start a section if the current section already have a license field
	    } elsif ($tag =~ /license/i and $data[$cur_section]->{'license'}) {
		$cur_section++ if $open_section;
	    #other known fields
	    } elsif($tag =~ /debianized-by|debianized-date|original-source-location|upstream-author|copyright|license/i) {
		#do nothing
	    #unknown fields
	    } else {
		$errors[$cur_error]->{error} = 'unknown-field';
	        $errors[$cur_error]->{info} = "$tag";
		$cur_error++;
	    }
	    #fields already parsed for current section
	    if($data[$cur_section]->{$tag}) {
		$errors[$cur_error]->{error} = 'duplicate-field';
	        $errors[$cur_error]->{info} = "$tag in section $cur_section line $line_number";
		$cur_error++;
	    }
	    $open_section = 1;
	    $data[$cur_section]->{$tag} = $value;
	    $last_tag = $tag;
	}
	# continued field?
	elsif (m/^ (.*)$/o and $open_section) {
	    $data[$cur_section]->{$last_tag} .= "\n".$1;
	}
	#everything else	
	else {
	    $errors[$cur_error]->{error} = 'extra-line';
	    $errors[$cur_error]->{info} = "line $line_number";
	    $cur_error++;
	}
    }
    return (@errors,@data);
}

sub read_dpkg_copyright {
    my ($file) = @_;

    if (not _ensure_file_is_sane($file)) {
	return undef;
    }

    my $COPYRIGHT = FileHandle->new;
    open($COPYRIGHT, '<', $file)
	or fail("cannot open copyright file $file for reading: $!");
    my @data = parse_dpkg_copyright($COPYRIGHT);
    close($COPYRIGHT)
	or fail("pipe for copyright file $file exited with status: $?");
    return @data;
}

# not used
sub _file_matches {
    my ($file,$pattern) = @_;
    #TODO: expand patterns
    #find . -path "$PATTERN"
    #$output_string = `find . -path "$pattern"`;
    return $pattern eq $file;
}

# not used
sub _get_complete_license {
    my ($license,@data) = @_;
    foreach my $section (@data) {
	#next section if this is not a license section
	next if $section->{files} or not $section->{license};
	#check that this is the matching license section
	return $section->{license} if $section->{license} =~ /^\s*$license/
    }
    return $license;
}

# not used
sub get_file_copyright {
    my ($file,@data) = @_;
    my $ret= {};
    foreach my $section (@data) {
	next if not $section->{files};
	#files are comma separated
	#TODO: manage quoted strings (with comma in it)
	my @patterns = split m/,\s/, $section->{files} ;
	foreach my $pattern (@patterns) {
	    #TODO: what if it matches several patterns?
	    # the current implementation takes the first match
	    if (_file_matches($file,$pattern)) {
		$ret->{copyright} = $section->{copyright};
		#one term: check for license section
		$ret->{license} = $section->{license};
		$ret->{'matching-pattern'} = $pattern;
		if($ret->{license} =~ /\s*(\S+)\s*/) {
		    $ret->{license} = _get_complete_license($1, @data);
		}
		return $ret;
	    }
	}
    }
    $ret->{license} = "unknown";
    $ret->{copyright} = "unknown";
    return $ret;
}

sub _ensure_file_is_sane {
    my ($file) = @_;

    # if file exists and is not 0 bytes
    if (-f $file and -s $file) {
	return 1;
    }
    return 0;
}

# ------------------------

sub fail {
    my $str = "internal error";
    if (@_) {
	$str .= ": ".join( "\n", @_)."\n";
    } elsif ($!) {
	$str .= ": $!\n";
    } else {
	$str .= ".\n";
    }
    $! = 2; # set return code outside eval()
    die $str;

}

1;

Reply to: