[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#478930: marked as done ([checks/copyright-file] check for new copyright format)



Your message dated Sun, 28 Aug 2011 19:00:38 +0200
with message-id <4E5A7436.3020102@thykier.net>
and subject line Re: Bug#633779: lintian: validate DEP-5 debian/copyright files
has caused the Debian Bug report #633779,
regarding [checks/copyright-file] check for new copyright format
to be marked as done.

This means that you claim that the problem has been dealt with.
If this is not the case it is now your responsibility to reopen the
Bug report if necessary, and/or fix the problem forthwith.

(NB: If you are a system administrator and have no idea what this
message is talking about, this may indicate a serious mail system
misconfiguration somewhere. Please contact owner@bugs.debian.org
immediately.)


-- 
633779: http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=633779
Debian Bug Tracking System
Contact owner@bugs.debian.org with problems
--- Begin Message ---
Package: lintian
Version: 1.23.46
Severity: wishlist
Tags: patch

The files add checks for the proposal:
http://wiki.debian.org/Proposals/CopyrightFormat

I hope this implementation will help to clarify things.

To enable, copy:
- copyright-specification to /usr/share/lintian/checks/
- copyright-specification.desc to /usr/share/lintian/checks/
- DebianCopyrightParser.pm to /usr/share/lintian/lib

and check with -I

-- System Information:
Debian Release: lenny/sid
  APT prefers testing
  APT policy: (990, 'testing'), (500, 'unstable'), (1, 'experimental')
Architecture: i386 (i686)

Kernel: Linux 2.6.18-6-xen-686 (SMP w/2 CPU cores)
Locale: LANG=fr_FR.UTF-8, LC_CTYPE=fr_FR.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/dash

Versions of packages lintian depends on:
ii  binutils            2.18.1~cvs20080103-4 The GNU assembler, linker and bina
ii  diffstat            1.45-2               produces graph of changes introduc
ii  dpkg-dev            1.14.18              package building tools for Debian
ii  file                4.23-2               Determines file type using "magic"
ii  gettext             0.17-2               GNU Internationalization utilities
ii  intltool-debian     0.35.0+20060710.1    Help i18n of RFC822 compliant conf
ii  libparse-debianchan 1.1.1-2              parse Debian changelogs and output
ii  liburi-perl         1.35.dfsg.1-1        Manipulates and accesses URI strin
ii  man-db              2.5.1-3              on-line manual pager
ii  perl [libdigest-md5 5.8.8-12             Larry Wall's Practical Extraction 

lintian recommends no packages.

-- no debconf information
# control-file -- lintian check script -*- perl -*-
#
# Copyright (C) 2004 Marc Brockschmidt
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, you can find it on the World Wide
# Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free
# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
# MA 02110-1301, USA.

package Lintian::copyright_specification;
use strict;
use lib "$ENV{'LINTIAN_ROOT'}/checks/";
use common_data;
use Dep;
use DebianCopyrightParser;
use Tags;
use Data::Dumper;

sub run {

	my $pkg = shift;
	my $type = shift;

	#parse the copyright file
	my @data = DebianCopyrightParser::read_dpkg_copyright("unpacked/debian/copyright");
	my $follow_spec = 0;
	#check if it has a format-specification header
	foreach my $section (@data) {
		#next section if this is not an error
		next if $section->{'format-specification'};
		$follow_spec = 1;
		last;
	}

	#don't check other stuff if it doesn't have the format-specification header
	if ($follow_spec == 0) {
		tag "debian-copyright-no-specification", '';
	} else {
		#errors found by the parser
		foreach my $section (@data) {
			#next section if this is not an error
			next if not $section->{error};
			tag "debian-copyright-".$section->{error}, $section->{info};
		}

		# Check that every file in the tree has a license
		my $command = 'cd unpacked && find . -type f -a -not \( -false';
		foreach my $section (@data) {
			next if not $section->{files};
			#patterns are comma separated
			#TODO: manage quoted strings with comma in it
			$command .= ' -o -path ./'.join(' -o -path ./', split m/,\s/, $section->{files});
	
		}
		$command .= ' \) ; cd .. ';
		my $files_without_copyright = `$command`;
		if ($files_without_copyright) {
			foreach my $file (split '\n', $files_without_copyright) {
				tag 'debian-copyright-file-without-copyright', $file;
			}
		}

		# Check that every pattern match something
		foreach my $section (@data) {
			#next section if this is not a files section
			next if not $section->{files};
			#files are comma separated
			#TODO: manage quoted strings (with comma in it)
			my @patterns = split m/,\s/, $section->{files} ;
			foreach my $pattern (@patterns) {
				if (not `ls -l && cd unpacked && find . -type f -a -path $pattern ; cd ..`) {
					tag 'debian-copyright-section-without-match', $pattern ;
				}
			}
		}
	}

}

1;

# vim: syntax=perl sw=4 ts=4 noet shiftround
Check-Script: copyright-specification
Author: Mathieu Parent <math.parent@gmail.com>
Abbrev: csp
Type: source
Unpack-Level: 2

Tag: debian-copyright-no-specification
Type: info
Info: The package contains a copyright file that  that does not follow the
 proposed copyright format. This is not required by the policy.
 .
 More information on how to follow this proposed format at
 http://wiki.debian.org/Proposals/CopyrightFormat

Tag: debian-copyright-unknown-field
Type: warning
Info: The package contains a copyright file that as an unknown field.

Tag: debian-copyright-duplicate-field
Type: warning
Info: The package contains a copyright file whose one section has a duplicated
 field.

Tag: debian-copyright-extra-line
Type: warning
Info: The package contains a copyright file that has non-rfc2822 lines.

Tag: debian-copyright-file-without-copyright
Type: warning
Info: The package contains a copyright file that does match the specified file.

Tag: debian-copyright-section-without-match
Type: warning
Info: The package contains a copyright file which has a section which does
 match any file.

# Hey emacs! This is a -*- Perl -*- script!
# DebianCopyrightParser -- debian copyright parser

# Copyright (C) 1998 Christian Schwarz
# Copyright (C) 2008 Mathieu Parent <math.parent@gmail.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, you can find it on the World Wide
# Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free
# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
# MA 02110-1301, USA.

package DebianCopyrightParser;
use strict;

use Exporter;
our @ISA = qw(Exporter);
our @EXPORT = qw(parse_dpkg_copyright
	read_dpkg_copyright
	get_file_copyright
	fail);

use FileHandle;

# general function to read dpkg copyright files
# arguments:
#    $filehandle 
# output:
#    list of hashes
#    (a hash contains one sections,
#    keys in hash are lower case letters of control fields
#    errors are sections also)
sub parse_dpkg_copyright {
    my ($COPYRIGHT) = @_;

    my @errors;
    my $cur_error = 0;
    my @data;
    my $cur_section = 0;
    my $open_section = 0;
    my $last_tag;
    my $line_number = 0;

    while (<$COPYRIGHT>) {
	chomp;
	$line_number++;

	# empty line? -> force new section
	if (m/^$/) {
	    if ($open_section) { # end of current section
		$cur_section++;
		$open_section = 0;
	    }
	}
	# pgp sig? -> skip until end of signature
	elsif (m/^-----BEGIN PGP SIGNATURE/) {
	    while (<$COPYRIGHT>) {
		$line_number++;
		last if m/^-----END PGP SIGNATURE/o;
	    }
	}
	# other pgp control? -> skip until the next blank line
	elsif (m/^-----BEGIN PGP/) {
	    while (<$COPYRIGHT>) {
		$line_number++;
		last if /^\s*$/o;
	    }
	}
	# new field?
	elsif (m/^(\S+):\s*(.*)$/o) {
	    my ($tag,$value) = (lc $1,$2);
	    #format-specification, files and notice always start a section
	    if($tag =~ /format-specification|files|notice/i) {
		$cur_section++ if $open_section;
	    #license only start a section if the current section already have a license field
	    } elsif ($tag =~ /license/i and $data[$cur_section]->{'license'}) {
		$cur_section++ if $open_section;
	    #other known fields
	    } elsif($tag =~ /debianized-by|debianized-date|original-source-location|upstream-author|copyright|license/i) {
		#do nothing
	    #unknown fields
	    } else {
		$errors[$cur_error]->{error} = 'unknown-field';
	        $errors[$cur_error]->{info} = "$tag";
		$cur_error++;
	    }
	    #fields already parsed for current section
	    if($data[$cur_section]->{$tag}) {
		$errors[$cur_error]->{error} = 'duplicate-field';
	        $errors[$cur_error]->{info} = "$tag in section $cur_section line $line_number";
		$cur_error++;
	    }
	    $open_section = 1;
	    $data[$cur_section]->{$tag} = $value;
	    $last_tag = $tag;
	}
	# continued field?
	elsif (m/^ (.*)$/o and $open_section) {
	    $data[$cur_section]->{$last_tag} .= "\n".$1;
	}
	#everything else	
	else {
	    $errors[$cur_error]->{error} = 'extra-line';
	    $errors[$cur_error]->{info} = "line $line_number";
	    $cur_error++;
	}
    }
    return (@errors,@data);
}

sub read_dpkg_copyright {
    my ($file) = @_;

    if (not _ensure_file_is_sane($file)) {
	return undef;
    }

    my $COPYRIGHT = FileHandle->new;
    open($COPYRIGHT, '<', $file)
	or fail("cannot open copyright file $file for reading: $!");
    my @data = parse_dpkg_copyright($COPYRIGHT);
    close($COPYRIGHT)
	or fail("pipe for copyright file $file exited with status: $?");
    return @data;
}

# not used
sub _file_matches {
    my ($file,$pattern) = @_;
    #TODO: expand patterns
    #find . -path "$PATTERN"
    #$output_string = `find . -path "$pattern"`;
    return $pattern eq $file;
}

# not used
sub _get_complete_license {
    my ($license,@data) = @_;
    foreach my $section (@data) {
	#next section if this is not a license section
	next if $section->{files} or not $section->{license};
	#check that this is the matching license section
	return $section->{license} if $section->{license} =~ /^\s*$license/
    }
    return $license;
}

# not used
sub get_file_copyright {
    my ($file,@data) = @_;
    my $ret= {};
    foreach my $section (@data) {
	next if not $section->{files};
	#files are comma separated
	#TODO: manage quoted strings (with comma in it)
	my @patterns = split m/,\s/, $section->{files} ;
	foreach my $pattern (@patterns) {
	    #TODO: what if it matches several patterns?
	    # the current implementation takes the first match
	    if (_file_matches($file,$pattern)) {
		$ret->{copyright} = $section->{copyright};
		#one term: check for license section
		$ret->{license} = $section->{license};
		$ret->{'matching-pattern'} = $pattern;
		if($ret->{license} =~ /\s*(\S+)\s*/) {
		    $ret->{license} = _get_complete_license($1, @data);
		}
		return $ret;
	    }
	}
    }
    $ret->{license} = "unknown";
    $ret->{copyright} = "unknown";
    return $ret;
}

sub _ensure_file_is_sane {
    my ($file) = @_;

    # if file exists and is not 0 bytes
    if (-f $file and -s $file) {
	return 1;
    }
    return 0;
}

# ------------------------

sub fail {
    my $str = "internal error";
    if (@_) {
	$str .= ": ".join( "\n", @_)."\n";
    } elsif ($!) {
	$str .= ": $!\n";
    } else {
	$str .= ".\n";
    }
    $! = 2; # set return code outside eval()
    die $str;

}

1;

--- End Message ---
--- Begin Message ---
On 2011-08-28 12:51, Jakub Wilk wrote:
> The attached patch fixes a minor bug. Without it, a format like
> http://anonscm.debian.org/viewvc/dep/web/deps/dep5.mdwn?view=markup&pathrev=174
> 
> would trigger unversioned-copyright-format-uri.
> 

Hi,

Accepted as commit 81033a9c3da1cce306ddd5d7ad93eb16183c75a.

Since this has not been part of an official release I am closing it now.  :)

~Niels



--- End Message ---

Reply to: