Bug#720348: [lintian] New patch set
Package: lintian
Version: 2.5.17
Here new patch set for abstracting
>From c319699320630b4ce67740cecd466da7726a08c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bastien=20ROUCARI=C3=88S?= <roucaries.bastien@gmail.com>
Date: Mon, 26 Aug 2013 16:26:56 +0200
Subject: [PATCH 1/3] Add sliding windows abstraction
---
lib/Lintian/SlidingWindow.pm | 153 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 153 insertions(+)
create mode 100644 lib/Lintian/SlidingWindow.pm
diff --git a/lib/Lintian/SlidingWindow.pm b/lib/Lintian/SlidingWindow.pm
new file mode 100644
index 0000000..185ef41
--- /dev/null
+++ b/lib/Lintian/SlidingWindow.pm
@@ -0,0 +1,153 @@
+# -*- perl -*-
+# Lintian::Data -- interface to query lists of keywords
+
+# Copyright (C) 2008 Russ Allbery
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program. If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::SlidingWindow;
+
+use strict;
+use warnings;
+use autodie;
+
+use Carp qw(croak);
+
+use Lintian::Util qw(strip);
+
+sub new {
+ my ($class, $mode, $file, $blocksub) = @_;
+ my $self = {};
+
+ my $block = '';
+
+ open(my $handle, $mode, $file);
+ binmode($handle);
+
+ $self->{'_handle'} = $handle;
+ $self->{'_queue'} = ['', ''];
+ $self->{'_block'} = '';
+ $self->{'_blocksize'} = 4096;
+ $self->{'_blocksub'} = defined($blocksub) ? $blocksub : undef;
+ $self->{'_blocknumber'} = -1;
+
+ bless($self, $class);
+ return $self;
+}
+
+sub readwindow {
+ my ($self) = @_;
+ my $window;
+ unless(read($self->{'_handle'}, $window, $self->{'_blocksize'})) {
+ return;
+ }
+
+ if(defined($self->{'_blocksub'})) {
+ local $_ = $window;
+ $self->{'_blocksub'}->();
+ $window = $_;
+ }
+
+ $self->{'_blocknumber'}++;
+
+ my $block;
+ shift @{$self->{'_queue'}};
+ push (@{$self->{'_queue'}}, $window);
+ $block = join '', @{$self->{'_queue'}};
+ return $block;
+}
+
+sub blocknumber {
+ my ($self) = @_;
+ if($self->{'_blocknumber'} == -1) {
+ return undef;
+ }
+ return $self->{'_blocknumber'};
+}
+
+=head1 NAME
+
+Lintian::Sliding - Lintian interface to sliding window match
+
+=head1 SYNOPSIS
+
+ my $sfd = Lintian::SlidingWindow->new('<','someevilfile.c', sub locallc { $_ = lc($_); });
+ my $window;
+ while ($window = $sfd->readwindow()) {
+ if (index($window, 'evil') > -1) {
+ if($window =~
+ m/software \s++ shall \s++
+ be \s++ used \s++ for \s++ good \s*+ ,?+ \s*+
+ not \s++ evil/xsim) {
+ tag 'license-problem-json-evil';
+ }
+ }
+ }
+
+=head1 DESCRIPTION
+
+Lintian::SlidingWindow provides a way of matching some pattern,
+including multi line pattern, without needing to fully load the
+file in memory.
+
+=head1 CLASS METHODS
+
+=over 4
+
+=item new(mode,file,[blocksub])
+
+Create a new sliding window for file file using mode mode. Optionnaly run blocksub against
+each block. Note that blocksub should apply transform byte by byte and does not depend of context.
+
+=back
+
+=head1 INSTANCE METHODS
+
+=over 4
+
+=item readwindow
+
+Return a new block of sliding window
+
+=item blocknumber
+
+return the number of block read by the instance. Return undef if no block read.
+
+=back
+
+=head1 DIAGNOSTICS
+
+=over 4
+
+=item no data type specified
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Bastien ROUCARIÈS for Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et
--
1.8.4.rc3
>From 6a414c51da9f3f5c1aedf1c709991473bf248668 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bastien=20ROUCARI=C3=88S?= <roucaries.bastien@gmail.com>
Date: Sun, 6 Oct 2013 15:06:43 +0200
Subject: [PATCH 2/3] Extract licensecheck to it own function
It decrease the indent level a little bit.
---
checks/cruft.pm | 303 +++++++++++++++++++++++++++++---------------------------
1 file changed, 155 insertions(+), 148 deletions(-)
diff --git a/checks/cruft.pm b/checks/cruft.pm
index 9f2f73a..544693d 100644
--- a/checks/cruft.pm
+++ b/checks/cruft.pm
@@ -451,95 +451,104 @@ sub find_cruft {
# test license problem is source file (only text file)
next ENTRY unless -T $path;
+ license_check($info, $name, $path);
+ }
+ return;
+}
- open(my $F, '<', $path);
- binmode($F);
-
- my @queue = ('', '');
- my %licenseproblemhash = ();
- my $blocknumber = 0;
-
- # we try to read this file in block and use a sliding window
- # for efficiency. We store two blocks in @queue and the whole
- # string to match in $block. Please emit license tags only once
- # per file
- BLOCK:
- while (read($F, my $window, BLOCKSIZE)) {
- my $block;
- shift @queue;
- push(@queue, lc($window));
- $block = join '', @queue;
-
- if (index($block, '\\') > -1) {
- # Remove formatting commonly added by pod2man
- $block =~ s{ \\ & }{}gxsm;
- $block =~ s{ \\s (?:0|-1) }{}gxsm;
- $block =~ s{ \\ \* \( [LR] \" }{\"}gxsm;
- }
+# do basic license check against well known offender
+# note that it does not replace licensecheck(1)
+# and is only used for autoreject by ftp-master
+sub license_check {
+ my ($info, $name, $path) = @_;
+ open(my $F, '<', $path);
+ binmode($F);
+
+ my @queue = ('', '');
+ my %licenseproblemhash = ();
+ my $blocknumber = 0;
+
+ # we try to read this file in block and use a sliding window
+ # for efficiency. We store two blocks in @queue and the whole
+ # string to match in $block. Please emit license tags only once
+ # per file
+ BLOCK:
+ while (read($F, my $window, BLOCKSIZE)) {
+ my $block;
+ shift @queue;
+ push(@queue, lc($window));
+ $block = join '', @queue;
+
+ if (index($block, '\\') > -1) {
+ # Remove formatting commonly added by pod2man
+ $block =~ s{ \\ & }{}gxsm;
+ $block =~ s{ \\s (?:0|-1) }{}gxsm;
+ $block =~ s{ \\ \* \( [LR] \" }{\"}gxsm;
+ }
- if ( index($block, 'intellectual') > -1
- && index($block, 'property') > -1
- && index($block, 'all') > -1) {
+ if ( index($block, 'intellectual') > -1
+ && index($block, 'property') > -1
+ && index($block, 'all') > -1) {
- # nvdia opencv infamous license
- # non-distributable
- if (!exists $licenseproblemhash{'nvidia-intellectual'}) {
- my $cleanedblock = _clean_block($block);
- if (
- $cleanedblock =~ m/retain \s+ all \s+ intellectual \s+
+ # nvdia opencv infamous license
+ # non-distributable
+ if (!exists $licenseproblemhash{'nvidia-intellectual'}) {
+ my $cleanedblock = _clean_block($block);
+ if (
+ $cleanedblock =~ m/retain \s+ all \s+ intellectual \s+
property \s+ and \s+ proprietary \s+ rights \s+ in \s+
and \s+ to \s+ this \s+ software \s+ and \s+
related \s+ documentation/xism
- ) {
- tag 'license-problem-nvidia-intellectual', $name;
- $licenseproblemhash{'nvidia-intellectual'} = 1;
- }
+ ) {
+ tag 'license-problem-nvidia-intellectual', $name;
+ $licenseproblemhash{'nvidia-intellectual'} = 1;
}
}
+ }
- # some license issues do not apply to non-free
- # because these file are distribuable
- if ($info->is_non_free) {
- next BLOCK;
- }
+ # some license issues do not apply to non-free
+ # because these file are distribuable
+ if ($info->is_non_free) {
+ next BLOCK;
+ }
- if (
- index($block, 'evil') > -1
- && $block =~ m/software \s++ shall \s++
+ if (
+ index($block, 'evil') > -1
+ && $block =~ m/software \s++ shall \s++
be \s++ used \s++ for \s++ good \s*+ ,?+ \s*+
not \s++ evil/xsm
- ) {
- # json evil license
+ ) {
+ # json evil license
- if (!exists $licenseproblemhash{'json-evil'}) {
- tag 'license-problem-json-evil', $name;
- $licenseproblemhash{'json-evil'} = 1;
- }
+ if (!exists $licenseproblemhash{'json-evil'}) {
+ tag 'license-problem-json-evil', $name;
+ $licenseproblemhash{'json-evil'} = 1;
}
+ }
- # check GFDL block - The ".{0,1024}"-part in the regex
- # will contain the "no invariants etc." part if
- # it is a good use of the license. We include it
- # here to ensure that we do not emit a false positive
- # if the "redeeming" part is in the next block.
- #
- # See cruft-gfdl-fp-sliding-win for the test case
- if ( index($block, 'license') > -1
- && index($block, 'documentation') > -1
- && index($block, 'gnu') > -1
- && index($block, 'copy') > -1) {
-
- my $cleanedblock = $block;
-
- # gnu word is often highlighted
- # do a minimal replace in order to do the hard work
- # only in case of positively matched GFDL
- $cleanedblock =~ s{
+ # check GFDL block - The ".{0,1024}"-part in the regex
+ # will contain the "no invariants etc." part if
+ # it is a good use of the license. We include it
+ # here to ensure that we do not emit a false positive
+ # if the "redeeming" part is in the next block.
+ #
+ # See cruft-gfdl-fp-sliding-win for the test case
+ if ( index($block, 'license') > -1
+ && index($block, 'documentation') > -1
+ && index($block, 'gnu') > -1
+ && index($block, 'copy') > -1) {
+
+ my $cleanedblock = $block;
+
+ # gnu word is often highlighted
+ # do a minimal replace in order to do the hard work
+ # only in case of positively matched GFDL
+ $cleanedblock =~ s{
(?:<span\s*[^>]>)?\s*gnu\s*</span\s*[^>]*?> | # html span
(?:@[[:alpha:]]*?\{)?\s*gnu\s*\} # Tex info cmd
}{ gnu }gxms;
- # classical gfdl matching pattern
- my $normalgfdlpattern = qr/
+ # classical gfdl matching pattern
+ my $normalgfdlpattern = qr/
(?'contextbefore'(?:
(?:(?!a \s+ copy \s+ of \s+ the \s+ license \s+ is).){1024}|
(?:\s+ copy \s+ of \s+ the \s+ license \s+ is.{0,1024}?)))
@@ -548,8 +557,8 @@ sub find_cruft {
a \s+ copy \s+ of \s+ the \s+ license \s+ is
/xsmo;
- # for first block we get context from the beginning
- my $firstblockgfdlpattern = qr/
+ # for first block we get context from the beginning
+ my $firstblockgfdlpattern = qr/
(?'rawcontextbefore'(?:
(?:(?!a \s+ copy \s+ of \s+ the \s+ license \s+ is).){1024}|
\A(?:(?!a \s+ copy \s+ of \s+ the \s+ license \s+ is).){0,1024}|
@@ -561,137 +570,135 @@ sub find_cruft {
a \s+ copy \s+ of \s+ the \s+ license \s+ is
/xsmo;
- my $gfdlpattern
- = $blocknumber
- ? $normalgfdlpattern
- : $firstblockgfdlpattern;
+ my $gfdlpattern
+ = $blocknumber
+ ? $normalgfdlpattern
+ : $firstblockgfdlpattern;
- if ($cleanedblock =~ $gfdlpattern) {
- if (!exists $licenseproblemhash{'gfdl-invariants'}) {
- my $rawgfdlsections = $+{rawgfdlsections} || '';
- my $rawcontextbefore = $+{rawcontextbefore} || '';
+ if ($cleanedblock =~ $gfdlpattern) {
+ if (!exists $licenseproblemhash{'gfdl-invariants'}) {
+ my $rawgfdlsections = $+{rawgfdlsections} || '';
+ my $rawcontextbefore = $+{rawcontextbefore} || '';
- # replace some common comment-marker/markup with space
- my $gfdlsections = _clean_block($rawgfdlsections);
- my $contextbefore = _clean_block($rawcontextbefore);
+ # replace some common comment-marker/markup with space
+ my $gfdlsections = _clean_block($rawgfdlsections);
+ my $contextbefore = _clean_block($rawcontextbefore);
- # remove classical and without meaning part of
- # matched string
- $gfdlsections =~ s{
+ # remove classical and without meaning part of
+ # matched string
+ $gfdlsections =~ s{
\A version \s \d+(?:\.\d+)? \s
(?:or \s any \s later \s version \s)?
published \s by \s the \s Free \s Software \s Foundation
\s?[,\.;]?\s?}{}xismo;
- $contextbefore =~ s{
+ $contextbefore =~ s{
\s? (:?[,\.;]? \s?)?
permission \s is \s granted \s to \s copy \s?[,\.;]?\s?
distribute \s?[,\.;]?\s? and\s?/?\s?or \s modify \s
this \s document \s under \s the \s terms \s of \s the\Z}
{}xismo;
- # GFDL license, assume it is bad unless it
- # explicitly states it has no "bad sections".
- if (
- $gfdlsections =~ m/
+ # GFDL license, assume it is bad unless it
+ # explicitly states it has no "bad sections".
+ if (
+ $gfdlsections =~ m/
no \s? Invariant \s+ Sections? \s? [,\.;]?
\s? (?:with\s)? (?:the\s)? no \s
Front(?:\s?\\?-)?\s?Cover (?:\s Texts?)? \s? [,\.;]? \s? (?:and\s)?
(?:with\s)? (?:the\s)? no
\s Back(?:\s?\\?-)?\s?Cover/xiso
- ) {
- # no invariant
- } elsif (
- $gfdlsections =~ m/
+ ) {
+ # no invariant
+ } elsif (
+ $gfdlsections =~ m/
no \s Invariant \s Sections? \s? [,\.;]?
\s? (?:no\s)? Front(?:\s?[\\]?-)? \s or
\s (?:no\s)? Back(?:\s?[\\]?-)?\s?Cover \s Texts?/xiso
- ) {
- # no invariant variant (dict-foldoc)
- } elsif (
- $gfdlsections =~ m/
+ ) {
+ # no invariant variant (dict-foldoc)
+ } elsif (
+ $gfdlsections =~ m/
\A There \s are \s no \s invariants? \s sections? \Z
/xiso
- ) {
- # no invariant libnss-pgsql version
- } elsif (
- $gfdlsections =~ m/
+ ) {
+ # no invariant libnss-pgsql version
+ } elsif (
+ $gfdlsections =~ m/
\A without \s any \s Invariant \s Sections? \Z
/xiso
- ) {
- # no invariant parsewiki version
- } elsif (
- $gfdlsections=~ m/
+ ) {
+ # no invariant parsewiki version
+ } elsif (
+ $gfdlsections=~ m/
\A with \s no \s invariants? \s sections? \Z
/xiso
- ) {
- # no invariant lilypond version
- } elsif (
- $gfdlsections =~ m/\A
+ ) {
+ # no invariant lilypond version
+ } elsif (
+ $gfdlsections =~ m/\A
with \s the \s Invariant \s Sections \s being \s
LIST (?:\s THEIR \s TITLES)? \s? [,\.;]? \s?
with \s the \s Front(?:\s?[\\]?-)\s?Cover \s Texts \s being \s
LIST (?:\s THEIR \s TITLES)? \s? [,\.;]? \s?
(?:and\s)? with \s the \s Back(?:\s?[\\]?-)\s?Cover \s Texts \s being \s
LIST (?:\s THEIR \s TITLES)? \Z/xiso
- ) {
- # verbatim text of license is ok
- } elsif ($gfdlsections eq '') {
- # empty text is ambiguous
- tag 'license-problem-gfdl-invariants-empty',$name;
- $licenseproblemhash{'gfdl-invariants'} = 1;
- } elsif (
- $gfdlsections =~ m/
+ ) {
+ # verbatim text of license is ok
+ } elsif ($gfdlsections eq '') {
+ # empty text is ambiguous
+ tag 'license-problem-gfdl-invariants-empty',$name;
+ $licenseproblemhash{'gfdl-invariants'} = 1;
+ } elsif (
+ $gfdlsections =~ m/
with \s \&FDLInvariantSections; \s? [,\.;]? \s?
with \s+\&FDLFrontCoverText; \s? [,\.;]? \s?
and \s with \s \&FDLBackCoverText;/xiso
- ) {
- # fix #708957 about FDL entities in template
- unless (
- $name =~ m{
+ ) {
+ # fix #708957 about FDL entities in template
+ unless (
+ $name =~ m{
/customization/[^/]+/entities/[^/]+\.docbook \Z
}xsm
- ) {
- tag 'license-problem-gfdl-invariants',$name;
- $licenseproblemhash{'gfdl-invariants'} = 1;
- }
- } elsif (
- $gfdlsections =~ m{
+ ) {
+ tag 'license-problem-gfdl-invariants',$name;
+ $licenseproblemhash{'gfdl-invariants'} = 1;
+ }
+ } elsif (
+ $gfdlsections =~ m{
\A with \s the \s? <_: \s? link-\d+ \s? /> \s?
being \s list \s their \s titles \s?[,\.;]?\s?
with \s the \s? <_: \s* link-\d+ \s? /> \s?
being \s list \s?[,\.;]?\s?
(?:and\s)? with \s the \s? <_:\s? link-\d+ \s? /> \s?
being \s list \Z}xiso
- ) {
- # fix a false positive in .po file
- unless ($name =~ m,\.po$,) {
- tag 'license-problem-gfdl-invariants', $name;
- $licenseproblemhash{'gfdl-invariants'} = 1;
- }
- } else {
- if (
- $contextbefore =~ m/
+ ) {
+ # fix a false positive in .po file
+ unless ($name =~ m,\.po$,) {
+ tag 'license-problem-gfdl-invariants', $name;
+ $licenseproblemhash{'gfdl-invariants'} = 1;
+ }
+ } else {
+ if (
+ $contextbefore =~ m/
Following \s is \s an \s example
(:?\s of \s the \s license \s notice \s to \s use
(?:\s after \s the \s copyright \s (?:line(?:\(s\)|s)?)?
(?:\s using \s all \s the \s features? \s of \s the \s GFDL)?
)?
)? \s? [,:]? \Z/xiso
- ) {
- # it is an example
- } else {
- tag 'license-problem-gfdl-invariants', $name;
- $licenseproblemhash{'gfdl-invariants'} = 1;
- }
+ ) {
+ # it is an example
+ } else {
+ tag 'license-problem-gfdl-invariants', $name;
+ $licenseproblemhash{'gfdl-invariants'} = 1;
}
}
}
}
- $blocknumber++;
}
- close($F);
+ $blocknumber++;
}
- return;
+ close($F);
}
sub _clean_block {
--
1.8.4.rc3
>From 82b7962a3ee2f5984bdf3d27cd6a447e781a0bc6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bastien=20ROUCARI=C3=88S?= <roucaries.bastien@gmail.com>
Date: Sun, 6 Oct 2013 15:18:40 +0200
Subject: [PATCH 3/3] Port cruft.pm to sliding window
Port cruft.pm to sliding window
---
checks/cruft.pm | 17 ++++-------------
1 file changed, 4 insertions(+), 13 deletions(-)
diff --git a/checks/cruft.pm b/checks/cruft.pm
index 544693d..9cebb4a 100644
--- a/checks/cruft.pm
+++ b/checks/cruft.pm
@@ -41,6 +41,7 @@ use Lintian::Data;
use Lintian::Relation ();
use Lintian::Tags qw(tag);
use Lintian::Util qw(fail is_ancestor_of normalize_pkg_path strip);
+use Lintian::SlidingWindow;
# All the packages that may provide config.{sub,guess} during the build, used
# to suppress warnings about outdated autotools helper files. I'm not
@@ -461,24 +462,16 @@ sub find_cruft {
# and is only used for autoreject by ftp-master
sub license_check {
my ($info, $name, $path) = @_;
- open(my $F, '<', $path);
- binmode($F);
- my @queue = ('', '');
+ my $sfd = Lintian::SlidingWindow->new('<', $path, sub { $_=lc($_); });
my %licenseproblemhash = ();
- my $blocknumber = 0;
# we try to read this file in block and use a sliding window
# for efficiency. We store two blocks in @queue and the whole
# string to match in $block. Please emit license tags only once
# per file
BLOCK:
- while (read($F, my $window, BLOCKSIZE)) {
- my $block;
- shift @queue;
- push(@queue, lc($window));
- $block = join '', @queue;
-
+ while (my $block = $sfd->readwindow()) {
if (index($block, '\\') > -1) {
# Remove formatting commonly added by pod2man
$block =~ s{ \\ & }{}gxsm;
@@ -571,7 +564,7 @@ sub license_check {
/xsmo;
my $gfdlpattern
- = $blocknumber
+ = $sfd->blocknumber()
? $normalgfdlpattern
: $firstblockgfdlpattern;
@@ -696,9 +689,7 @@ sub license_check {
}
}
}
- $blocknumber++;
}
- close($F);
}
sub _clean_block {
--
1.8.4.rc3
Reply to: