[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#720348: [lintian] New patch set



Package: lintian
Version: 2.5.17

Here new patch set for abstracting
>From c319699320630b4ce67740cecd466da7726a08c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bastien=20ROUCARI=C3=88S?= <roucaries.bastien@gmail.com>
Date: Mon, 26 Aug 2013 16:26:56 +0200
Subject: [PATCH 1/3] Add sliding windows abstraction

---
 lib/Lintian/SlidingWindow.pm | 153 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 153 insertions(+)
 create mode 100644 lib/Lintian/SlidingWindow.pm

diff --git a/lib/Lintian/SlidingWindow.pm b/lib/Lintian/SlidingWindow.pm
new file mode 100644
index 0000000..185ef41
--- /dev/null
+++ b/lib/Lintian/SlidingWindow.pm
@@ -0,0 +1,153 @@
+# -*- perl -*-
+# Lintian::Data -- interface to query lists of keywords
+
+# Copyright (C) 2008 Russ Allbery
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::SlidingWindow;
+
+use strict;
+use warnings;
+use autodie;
+
+use Carp qw(croak);
+
+use Lintian::Util qw(strip);
+
+sub new {
+    my ($class, $mode, $file, $blocksub) = @_;
+    my $self = {};
+
+    my $block = '';
+
+    open(my $handle, $mode, $file);
+    binmode($handle);
+
+    $self->{'_handle'} = $handle;
+    $self->{'_queue'} = ['', ''];
+    $self->{'_block'} = '';
+    $self->{'_blocksize'} = 4096;
+    $self->{'_blocksub'} = defined($blocksub) ? $blocksub : undef;
+    $self->{'_blocknumber'} = -1;
+
+    bless($self, $class);
+    return $self;
+}
+
+sub readwindow {
+    my ($self) = @_;
+    my $window;
+    unless(read($self->{'_handle'}, $window, $self->{'_blocksize'})) {
+        return;
+    }
+
+    if(defined($self->{'_blocksub'})) {
+	local $_ = $window;
+	$self->{'_blocksub'}->();
+	$window = $_;
+    }
+
+    $self->{'_blocknumber'}++;
+
+    my $block;
+    shift @{$self->{'_queue'}};
+    push (@{$self->{'_queue'}}, $window);
+    $block =  join '', @{$self->{'_queue'}};
+    return $block;
+}
+
+sub blocknumber {
+    my ($self) = @_;
+    if($self->{'_blocknumber'} == -1) {
+	return undef;
+    }
+    return $self->{'_blocknumber'};
+}
+
+=head1 NAME
+
+Lintian::Sliding - Lintian interface to sliding window match
+
+=head1 SYNOPSIS
+
+    my $sfd = Lintian::SlidingWindow->new('<','someevilfile.c', sub locallc { $_ = lc($_); });
+    my $window;
+    while ($window = $sfd->readwindow()) {
+       if (index($window, 'evil') > -1) {
+           if($window =~
+                 m/software \s++ shall \s++
+                   be \s++ used \s++ for \s++ good \s*+ ,?+ \s*+
+                   not \s++ evil/xsim) {
+              tag 'license-problem-json-evil';
+           }
+       }
+    }
+
+=head1 DESCRIPTION
+
+Lintian::SlidingWindow provides a way of matching some pattern,
+including multi line pattern, without needing to fully load the
+file in memory.
+
+=head1 CLASS METHODS
+
+=over 4
+
+=item new(mode,file,[blocksub])
+
+Create a new sliding window for file file using mode mode. Optionnaly run blocksub against
+each block. Note that blocksub should apply transform byte by byte and does not depend of context.
+
+=back
+
+=head1 INSTANCE METHODS
+
+=over 4
+
+=item readwindow
+
+Return a new block of sliding window
+
+=item blocknumber
+
+return the number of block read by the instance. Return undef if no block read.
+
+=back
+
+=head1 DIAGNOSTICS
+
+=over 4
+
+=item no data type specified
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Bastien ROUCARIÈS for Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et
-- 
1.8.4.rc3

>From 6a414c51da9f3f5c1aedf1c709991473bf248668 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bastien=20ROUCARI=C3=88S?= <roucaries.bastien@gmail.com>
Date: Sun, 6 Oct 2013 15:06:43 +0200
Subject: [PATCH 2/3] Extract licensecheck to it own function

It decrease the indent level a little bit.
---
 checks/cruft.pm | 303 +++++++++++++++++++++++++++++---------------------------
 1 file changed, 155 insertions(+), 148 deletions(-)

diff --git a/checks/cruft.pm b/checks/cruft.pm
index 9f2f73a..544693d 100644
--- a/checks/cruft.pm
+++ b/checks/cruft.pm
@@ -451,95 +451,104 @@ sub find_cruft {
 
         # test license problem is source file (only text file)
         next ENTRY unless -T $path;
+        license_check($info, $name, $path);
+    }
+    return;
+}
 
-        open(my $F, '<', $path);
-        binmode($F);
-
-        my @queue = ('', '');
-        my %licenseproblemhash = ();
-        my $blocknumber = 0;
-
-        # we try to read this file in block and use a sliding window
-        # for efficiency.  We store two blocks in @queue and the whole
-        # string to match in $block. Please emit license tags only once
-        # per file
-      BLOCK:
-        while (read($F, my $window, BLOCKSIZE)) {
-            my $block;
-            shift @queue;
-            push(@queue, lc($window));
-            $block =  join '', @queue;
-
-            if (index($block, '\\') > -1) {
-                # Remove formatting commonly added by pod2man
-                $block =~ s{ \\ & }{}gxsm;
-                $block =~ s{ \\s (?:0|-1) }{}gxsm;
-                $block =~ s{ \\ \* \( [LR] \" }{\"}gxsm;
-            }
+# do basic license check against well known offender
+# note that it does not replace licensecheck(1) 
+# and is only used for autoreject by ftp-master
+sub license_check {
+    my ($info, $name, $path) = @_;
+    open(my $F, '<', $path);
+    binmode($F);
+
+    my @queue = ('', '');
+    my %licenseproblemhash = ();
+    my $blocknumber = 0;
+
+    # we try to read this file in block and use a sliding window
+    # for efficiency.  We store two blocks in @queue and the whole
+    # string to match in $block. Please emit license tags only once
+    # per file
+  BLOCK:
+    while (read($F, my $window, BLOCKSIZE)) {
+        my $block;
+        shift @queue;
+        push(@queue, lc($window));
+        $block =  join '', @queue;
+
+        if (index($block, '\\') > -1) {
+            # Remove formatting commonly added by pod2man
+            $block =~ s{ \\ & }{}gxsm;
+            $block =~ s{ \\s (?:0|-1) }{}gxsm;
+            $block =~ s{ \\ \* \( [LR] \" }{\"}gxsm;
+        }
 
-            if (   index($block, 'intellectual') > -1
-                && index($block, 'property') > -1
-                && index($block, 'all') > -1) {
+        if (   index($block, 'intellectual') > -1
+            && index($block, 'property') > -1
+            && index($block, 'all') > -1) {
 
-                # nvdia opencv infamous license
-                # non-distributable
-                if (!exists $licenseproblemhash{'nvidia-intellectual'}) {
-                    my $cleanedblock = _clean_block($block);
-                    if (
-                        $cleanedblock =~ m/retain \s+ all \s+ intellectual \s+
+            # nvdia opencv infamous license
+            # non-distributable
+            if (!exists $licenseproblemhash{'nvidia-intellectual'}) {
+                my $cleanedblock = _clean_block($block);
+                if (
+                    $cleanedblock =~ m/retain \s+ all \s+ intellectual \s+
                           property \s+ and \s+ proprietary \s+ rights \s+ in \s+
                           and \s+ to \s+ this \s+ software \s+ and \s+
                           related \s+ documentation/xism
-                      ) {
-                        tag 'license-problem-nvidia-intellectual', $name;
-                        $licenseproblemhash{'nvidia-intellectual'} = 1;
-                    }
+                  ) {
+                    tag 'license-problem-nvidia-intellectual', $name;
+                    $licenseproblemhash{'nvidia-intellectual'} = 1;
                 }
             }
+        }
 
-            # some license issues do not apply to non-free
-            # because these file are distribuable
-            if ($info->is_non_free) {
-                next BLOCK;
-            }
+        # some license issues do not apply to non-free
+        # because these file are distribuable
+        if ($info->is_non_free) {
+            next BLOCK;
+        }
 
-            if (
-                index($block, 'evil') > -1
-                && $block =~ m/software \s++ shall \s++
+        if (
+            index($block, 'evil') > -1
+            && $block =~ m/software \s++ shall \s++
                      be \s++ used \s++ for \s++ good \s*+ ,?+ \s*+
                      not \s++ evil/xsm
-              ) {
-                # json evil license
+          ) {
+            # json evil license
 
-                if (!exists $licenseproblemhash{'json-evil'}) {
-                    tag 'license-problem-json-evil', $name;
-                    $licenseproblemhash{'json-evil'} = 1;
-                }
+            if (!exists $licenseproblemhash{'json-evil'}) {
+                tag 'license-problem-json-evil', $name;
+                $licenseproblemhash{'json-evil'} = 1;
             }
+        }
 
-            # check GFDL block - The ".{0,1024}"-part in the regex
-            # will contain the "no invariants etc."  part if
-            # it is a good use of the license.  We include it
-            # here to ensure that we do not emit a false positive
-            # if the "redeeming" part is in the next block.
-            #
-            # See cruft-gfdl-fp-sliding-win for the test case
-            if (   index($block, 'license') > -1
-                && index($block, 'documentation') > -1
-                && index($block, 'gnu') > -1
-                && index($block, 'copy') > -1) {
-
-                my $cleanedblock = $block;
-
-                # gnu word is often highlighted
-                # do a minimal replace in order to do the hard work
-                # only in case of positively matched GFDL
-                $cleanedblock =~ s{
+        # check GFDL block - The ".{0,1024}"-part in the regex
+        # will contain the "no invariants etc."  part if
+        # it is a good use of the license.  We include it
+        # here to ensure that we do not emit a false positive
+        # if the "redeeming" part is in the next block.
+        #
+        # See cruft-gfdl-fp-sliding-win for the test case
+        if (   index($block, 'license') > -1
+            && index($block, 'documentation') > -1
+            && index($block, 'gnu') > -1
+            && index($block, 'copy') > -1) {
+
+            my $cleanedblock = $block;
+
+            # gnu word is often highlighted
+            # do a minimal replace in order to do the hard work
+            # only in case of positively matched GFDL
+            $cleanedblock =~ s{
                  (?:<span\s*[^>]>)?\s*gnu\s*</span\s*[^>]*?>  | # html span
                  (?:@[[:alpha:]]*?\{)?\s*gnu\s*\}               # Tex info cmd
                 }{ gnu }gxms;
-                # classical gfdl matching pattern
-                my $normalgfdlpattern = qr/
+            # classical gfdl matching pattern
+            my $normalgfdlpattern = qr/
                  (?'contextbefore'(?:
                     (?:(?!a \s+ copy \s+ of \s+ the \s+ license \s+ is).){1024}|
                     (?:\s+ copy \s+ of \s+ the \s+ license \s+ is.{0,1024}?)))
@@ -548,8 +557,8 @@ sub find_cruft {
                  a \s+ copy \s+ of \s+ the \s+ license \s+ is
                 /xsmo;
 
-                # for first block we get context from the beginning
-                my $firstblockgfdlpattern = qr/
+            # for first block we get context from the beginning
+            my $firstblockgfdlpattern = qr/
                  (?'rawcontextbefore'(?:
                     (?:(?!a \s+ copy \s+ of \s+ the \s+ license \s+ is).){1024}|
                   \A(?:(?!a \s+ copy \s+ of \s+ the \s+ license \s+ is).){0,1024}|
@@ -561,137 +570,135 @@ sub find_cruft {
                  a \s+ copy \s+ of \s+ the \s+ license \s+ is
                  /xsmo;
 
-                my $gfdlpattern
-                  = $blocknumber
-                  ? $normalgfdlpattern
-                  : $firstblockgfdlpattern;
+            my $gfdlpattern
+              = $blocknumber
+              ? $normalgfdlpattern
+              : $firstblockgfdlpattern;
 
-                if ($cleanedblock =~ $gfdlpattern) {
-                    if (!exists $licenseproblemhash{'gfdl-invariants'}) {
-                        my $rawgfdlsections = $+{rawgfdlsections} || '';
-                        my $rawcontextbefore = $+{rawcontextbefore} || '';
+            if ($cleanedblock =~ $gfdlpattern) {
+                if (!exists $licenseproblemhash{'gfdl-invariants'}) {
+                    my $rawgfdlsections = $+{rawgfdlsections} || '';
+                    my $rawcontextbefore = $+{rawcontextbefore} || '';
 
-                        # replace some common comment-marker/markup with space
-                        my $gfdlsections = _clean_block($rawgfdlsections);
-                        my $contextbefore = _clean_block($rawcontextbefore);
+                    # replace some common comment-marker/markup with space
+                    my $gfdlsections = _clean_block($rawgfdlsections);
+                    my $contextbefore = _clean_block($rawcontextbefore);
 
-                        # remove classical and without meaning part of
-                        # matched string
-                        $gfdlsections =~ s{
+                    # remove classical and without meaning part of
+                    # matched string
+                    $gfdlsections =~ s{
                           \A version \s \d+(?:\.\d+)? \s
                            (?:or \s any \s later \s version \s)?
                            published \s by \s the \s Free \s Software \s Foundation
                            \s?[,\.;]?\s?}{}xismo;
-                        $contextbefore =~ s{
+                    $contextbefore =~ s{
                           \s? (:?[,\.;]? \s?)?
                            permission \s is \s granted \s to \s copy \s?[,\.;]?\s?
                            distribute \s?[,\.;]?\s? and\s?/?\s?or \s modify \s
                            this \s document \s under \s the \s terms \s of \s the\Z}
                         {}xismo;
 
-                        # GFDL license, assume it is bad unless it
-                        # explicitly states it has no "bad sections".
-                        if (
-                            $gfdlsections =~ m/
+                    # GFDL license, assume it is bad unless it
+                    # explicitly states it has no "bad sections".
+                    if (
+                        $gfdlsections =~ m/
                             no \s? Invariant \s+ Sections? \s? [,\.;]?
                                \s? (?:with\s)? (?:the\s)? no \s
                                Front(?:\s?\\?-)?\s?Cover (?:\s Texts?)? \s? [,\.;]? \s? (?:and\s)?
                                (?:with\s)? (?:the\s)? no
                                \s Back(?:\s?\\?-)?\s?Cover/xiso
-                          ) {
-                            # no invariant
-                        } elsif (
-                            $gfdlsections =~ m/
+                      ) {
+                        # no invariant
+                    } elsif (
+                        $gfdlsections =~ m/
                             no \s Invariant \s Sections? \s? [,\.;]?
                                \s? (?:no\s)? Front(?:\s?[\\]?-)? \s or
                                \s (?:no\s)? Back(?:\s?[\\]?-)?\s?Cover \s Texts?/xiso
-                          ) {
-                            # no invariant variant (dict-foldoc)
-                        } elsif (
-                            $gfdlsections =~ m/
+                      ) {
+                        # no invariant variant (dict-foldoc)
+                    } elsif (
+                        $gfdlsections =~ m/
                             \A There \s are \s no \s invariants? \s sections? \Z
                           /xiso
-                          ) {
-                            # no invariant libnss-pgsql version
-                        } elsif (
-                            $gfdlsections =~ m/
+                      ) {
+                        # no invariant libnss-pgsql version
+                    } elsif (
+                        $gfdlsections =~ m/
                             \A without \s any \s Invariant \s Sections? \Z
                           /xiso
-                          ) {
-                            # no invariant parsewiki version
-                        } elsif (
-                            $gfdlsections=~ m/
+                      ) {
+                        # no invariant parsewiki version
+                    } elsif (
+                        $gfdlsections=~ m/
                             \A with \s no \s invariants? \s sections? \Z
                          /xiso
-                          ) {
-                            # no invariant lilypond version
-                        } elsif (
-                            $gfdlsections =~ m/\A
+                      ) {
+                        # no invariant lilypond version
+                    } elsif (
+                        $gfdlsections =~ m/\A
                             with \s the \s Invariant \s Sections \s being \s
                             LIST (?:\s THEIR \s TITLES)? \s? [,\.;]? \s?
                             with \s the \s Front(?:\s?[\\]?-)\s?Cover \s Texts \s being \s
                             LIST (?:\s THEIR \s TITLES)? \s? [,\.;]? \s?
                             (?:and\s)? with \s the \s Back(?:\s?[\\]?-)\s?Cover \s Texts \s being \s
                             LIST (?:\s THEIR \s TITLES)? \Z/xiso
-                          ) {
-                            # verbatim text of license is ok
-                        } elsif ($gfdlsections eq '') {
-                            # empty text is ambiguous
-                            tag 'license-problem-gfdl-invariants-empty',$name;
-                            $licenseproblemhash{'gfdl-invariants'} = 1;
-                        } elsif (
-                            $gfdlsections =~ m/
+                      ) {
+                        # verbatim text of license is ok
+                    } elsif ($gfdlsections eq '') {
+                        # empty text is ambiguous
+                        tag 'license-problem-gfdl-invariants-empty',$name;
+                        $licenseproblemhash{'gfdl-invariants'} = 1;
+                    } elsif (
+                        $gfdlsections =~ m/
                             with \s \&FDLInvariantSections; \s? [,\.;]? \s?
                             with \s+\&FDLFrontCoverText; \s? [,\.;]? \s?
                             and \s with \s \&FDLBackCoverText;/xiso
-                          ) {
-                            # fix #708957 about FDL entities in template
-                            unless (
-                                $name =~ m{
+                      ) {
+                        # fix #708957 about FDL entities in template
+                        unless (
+                            $name =~ m{
                                 /customization/[^/]+/entities/[^/]+\.docbook \Z
                               }xsm
-                              ) {
-                                tag 'license-problem-gfdl-invariants',$name;
-                                $licenseproblemhash{'gfdl-invariants'} = 1;
-                            }
-                        } elsif (
-                            $gfdlsections =~ m{
+                          ) {
+                            tag 'license-problem-gfdl-invariants',$name;
+                            $licenseproblemhash{'gfdl-invariants'} = 1;
+                        }
+                    } elsif (
+                        $gfdlsections =~ m{
                             \A with \s the \s? <_: \s? link-\d+ \s? /> \s?
                             being \s list \s their \s titles \s?[,\.;]?\s?
                             with \s the \s? <_: \s* link-\d+ \s? /> \s?
                             being \s list \s?[,\.;]?\s?
                             (?:and\s)? with \s the \s? <_:\s? link-\d+ \s? /> \s?
                             being \s list \Z}xiso
-                          ) {
-                            # fix a false positive in .po file
-                            unless ($name =~ m,\.po$,) {
-                                tag 'license-problem-gfdl-invariants', $name;
-                                $licenseproblemhash{'gfdl-invariants'} = 1;
-                            }
-                        } else {
-                            if (
-                                $contextbefore =~ m/
+                      ) {
+                        # fix a false positive in .po file
+                        unless ($name =~ m,\.po$,) {
+                            tag 'license-problem-gfdl-invariants', $name;
+                            $licenseproblemhash{'gfdl-invariants'} = 1;
+                        }
+                    } else {
+                        if (
+                            $contextbefore =~ m/
                                   Following \s is \s an \s example
                                   (:?\s of \s the \s license \s notice \s to \s use
                                     (?:\s after \s the \s copyright \s (?:line(?:\(s\)|s)?)?
                                       (?:\s using \s all \s the \s features? \s of \s the \s GFDL)?
                                     )?
                                   )? \s? [,:]? \Z/xiso
-                              ) {
-                                # it is an example
-                            } else {
-                                tag 'license-problem-gfdl-invariants', $name;
-                                $licenseproblemhash{'gfdl-invariants'} = 1;
-                            }
+                          ) {
+                            # it is an example
+                        } else {
+                            tag 'license-problem-gfdl-invariants', $name;
+                            $licenseproblemhash{'gfdl-invariants'} = 1;
                         }
                     }
                 }
             }
-            $blocknumber++;
         }
-        close($F);
+        $blocknumber++;
     }
-    return;
+    close($F);
 }
 
 sub _clean_block {
-- 
1.8.4.rc3

>From 82b7962a3ee2f5984bdf3d27cd6a447e781a0bc6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bastien=20ROUCARI=C3=88S?= <roucaries.bastien@gmail.com>
Date: Sun, 6 Oct 2013 15:18:40 +0200
Subject: [PATCH 3/3] Port cruft.pm to sliding window

Port cruft.pm to sliding window
---
 checks/cruft.pm | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/checks/cruft.pm b/checks/cruft.pm
index 544693d..9cebb4a 100644
--- a/checks/cruft.pm
+++ b/checks/cruft.pm
@@ -41,6 +41,7 @@ use Lintian::Data;
 use Lintian::Relation ();
 use Lintian::Tags qw(tag);
 use Lintian::Util qw(fail is_ancestor_of normalize_pkg_path strip);
+use Lintian::SlidingWindow;
 
 # All the packages that may provide config.{sub,guess} during the build, used
 # to suppress warnings about outdated autotools helper files.  I'm not
@@ -461,24 +462,16 @@ sub find_cruft {
 # and is only used for autoreject by ftp-master
 sub license_check {
     my ($info, $name, $path) = @_;
-    open(my $F, '<', $path);
-    binmode($F);
 
-    my @queue = ('', '');
+    my $sfd = Lintian::SlidingWindow->new('<', $path, sub { $_=lc($_); });
     my %licenseproblemhash = ();
-    my $blocknumber = 0;
 
     # we try to read this file in block and use a sliding window
     # for efficiency.  We store two blocks in @queue and the whole
     # string to match in $block. Please emit license tags only once
     # per file
   BLOCK:
-    while (read($F, my $window, BLOCKSIZE)) {
-        my $block;
-        shift @queue;
-        push(@queue, lc($window));
-        $block =  join '', @queue;
-
+    while (my $block = $sfd->readwindow()) {
         if (index($block, '\\') > -1) {
             # Remove formatting commonly added by pod2man
             $block =~ s{ \\ & }{}gxsm;
@@ -571,7 +564,7 @@ sub license_check {
                  /xsmo;
 
             my $gfdlpattern
-              = $blocknumber
+              = $sfd->blocknumber()
               ? $normalgfdlpattern
               : $firstblockgfdlpattern;
 
@@ -696,9 +689,7 @@ sub license_check {
                 }
             }
         }
-        $blocknumber++;
     }
-    close($F);
 }
 
 sub _clean_block {
-- 
1.8.4.rc3


Reply to: