[lintian] 01/02: Rewrite check_spelling{,_picky} subroutines

To: debian-lint-maint@lists.debian.org
Subject: [lintian] 01/02: Rewrite check_spelling{,_picky} subroutines
From: Niels Thykier <nthykier@moszumanska.debian.org>
Date: Wed, 17 Jun 2015 21:37:10 +0000
Message-id: <[🔎] E1Z5L1C-0003af-IO@moszumanska.debian.org>
Reply-to: Niels Thykier <niels@thykier.net>
In-reply-to: <[🔎] 20150617213710.13307.29906@moszumanska.debian.org>
References: <[🔎] 20150617213710.13307.29906@moszumanska.debian.org>
This is an automated email from the git hooks/post-receive script.

nthykier pushed a commit to branch master
in repository lintian.

commit 85bde304050bdda0954911d72077824f5b459620
Author: Niels Thykier <niels@thykier.net>
Date:   Wed Jun 17 22:27:16 2015 +0200

    Rewrite check_spelling{,_picky} subroutines
    
    Signed-off-by: Niels Thykier <niels@thykier.net>
---
 checks/binaries.pm       |  6 ++--
 checks/changelog-file.pm | 15 ++++++----
 checks/copyright-file.pm |  9 ++++--
 checks/debian-readme.pm  |  9 ++++--
 checks/description.pm    | 31 ++++++++++++++------
 checks/manpages.pm       |  8 +++--
 checks/menus.pm          | 30 +++++++++----------
 debian/changelog         |  8 +++++
 lib/Lintian/Check.pm     | 76 +++++++++++++++++++++++++++++++++++-------------
 lib/Test/Lintian.pm      | 20 ++++++++++---
 t/scripts/pod-spelling.t |  2 +-
 11 files changed, 148 insertions(+), 66 deletions(-)

diff --git a/checks/binaries.pm b/checks/binaries.pm
index 34efa34..d6d763c 100644
--- a/checks/binaries.pm
+++ b/checks/binaries.pm
@@ -27,7 +27,7 @@ use autodie;
 use constant NUMPY_STRING => 'module compiled against ABI version %x'
   . ' but this version of numpy is %x';
 
-use Lintian::Check qw(check_spelling);
+use Lintian::Check qw(check_spelling spelling_tag_emitter);
 use Lintian::Data;
 use Lintian::Relation qw(:constants);
 use Lintian::Tags qw(tag);
@@ -331,7 +331,9 @@ sub run {
             'tEH' => 1, # From #782902
             'tEh' => 1, # From #782902, too
         };
-        check_spelling('spelling-error-in-binary',$strings, $file,$exceptions);
+        my $tag_emitter
+          = spelling_tag_emitter('spelling-error-in-binary', $file);
+        check_spelling($strings, $exceptions, $tag_emitter);
 
         # stripped?
         if ($fileinfo =~ m,\bnot stripped\b,o) {
diff --git a/checks/changelog-file.pm b/checks/changelog-file.pm
index 3710320..d898c16 100644
--- a/checks/changelog-file.pm
+++ b/checks/changelog-file.pm
@@ -29,11 +29,16 @@ use List::Util qw(first);
 use List::MoreUtils qw(any);
 use Parse::DebianChangelog;
 
-use Lintian::Check qw(check_spelling);
+use Lintian::Check qw(check_spelling spelling_tag_emitter);
 use Lintian::Relation::Version qw(versions_gt);
 use Lintian::Tags qw(tag);
 use Lintian::Util qw(file_is_encoded_in_non_utf8 strip);
 
+my $SPELLING_ERROR_IN_NEWS
+  = spelling_tag_emitter('spelling-error-in-news-debian');
+my $SPELLING_ERROR_CHANGELOG
+  = spelling_tag_emitter('spelling-error-in-changelog');
+
 sub run {
     my ($pkg, undef, $info, undef, $group) = @_;
     my $found_html = 0;
@@ -132,8 +137,8 @@ sub run {
                 tag 'debian-news-entry-has-strange-distribution',
                   $news->Distribution;
             }
-            check_spelling('spelling-error-in-news-debian',
-                $news->Changes,undef, $group->info->spelling_exceptions);
+            check_spelling($news->Changes, $group->info->spelling_exceptions,
+                $SPELLING_ERROR_IN_NEWS);
             if ($news->Changes =~ /^\s*\*\s/) {
                 tag 'debian-news-entry-uses-asterisk';
             }
@@ -443,8 +448,8 @@ sub run {
         # Strip out all lines that contain the word spelling to avoid false
         # positives on changelog entries for spelling fixes.
         $changes =~ s/^.*spelling.*\n//gm;
-        check_spelling('spelling-error-in-changelog', $changes, undef,
-            $group->info->spelling_exceptions);
+        check_spelling($changes, $group->info->spelling_exceptions,
+            $SPELLING_ERROR_CHANGELOG);
     }
 
     return;
diff --git a/checks/copyright-file.pm b/checks/copyright-file.pm
index aa9108f..c6e35ef 100644
--- a/checks/copyright-file.pm
+++ b/checks/copyright-file.pm
@@ -37,7 +37,7 @@ use constant {
 use Encode qw(decode);
 use List::MoreUtils qw(any);
 
-use Lintian::Check qw(check_spelling);
+use Lintian::Check qw(check_spelling spelling_tag_emitter);
 use Lintian::Data ();
 use Lintian::Tags qw(tag);
 use Lintian::Util qw(slurp_entire_file file_is_encoded_in_non_utf8);
@@ -46,6 +46,9 @@ our $KNOWN_ESSENTIAL = Lintian::Data->new('fields/essential');
 our $KNOWN_COMMON_LICENSES
   =  Lintian::Data->new('copyright-file/common-licenses');
 
+my $SPELLING_ERROR_IN_COPYRIGHT
+  = spelling_tag_emitter('spelling-error-in-copyright');
+
 sub run {
     my ($pkg, undef, $info, $proc, $group) = @_;
     my $found = 0;
@@ -305,8 +308,8 @@ sub run {
         tag 'copyright-without-copyright-notice';
     }
 
-    check_spelling('spelling-error-in-copyright', $_, undef,
-        $group->info->spelling_exceptions);
+    check_spelling($_, $group->info->spelling_exceptions,
+        $SPELLING_ERROR_IN_COPYRIGHT);
 
     # Now, check for linking against libssl if the package is covered
     # by the GPL.  (This check was requested by ftp-master.)  First,
diff --git a/checks/debian-readme.pm b/checks/debian-readme.pm
index f486e8c..3ad8247 100644
--- a/checks/debian-readme.pm
+++ b/checks/debian-readme.pm
@@ -23,9 +23,12 @@ use strict;
 use warnings;
 use autodie;
 
-use Lintian::Check qw(check_spelling);
+use Lintian::Check qw(check_spelling spelling_tag_emitter);
 use Lintian::Tags qw(tag);
 
+my $SPELLING_ERROR_IN_README
+  = spelling_tag_emitter('spelling-error-in-readme-debian');
+
 sub run {
     my ($pkg, undef, $info, undef, $group) = @_;
     my $readme = '';
@@ -51,8 +54,8 @@ sub run {
         tag 'readme-debian-contains-invalid-email-address', $1;
     }
 
-    check_spelling('spelling-error-in-readme-debian',
-        $readme, undef,$group->info->spelling_exceptions);
+    check_spelling($readme,$group->info->spelling_exceptions,
+        $SPELLING_ERROR_IN_README);
 
     return;
 }
diff --git a/checks/description.pm b/checks/description.pm
index ecfe899..cade0d0 100644
--- a/checks/description.pm
+++ b/checks/description.pm
@@ -29,10 +29,21 @@ use constant DH_MAKE_PERL_TEMPLATE => 'this description was'
 
 use Encode qw(decode);
 
-use Lintian::Check qw(check_spelling check_spelling_picky);
+use Lintian::Check
+  qw(check_spelling check_spelling_picky spelling_tag_emitter);
 use Lintian::Tags qw(tag);
 use Lintian::Util qw(strip);
 
+my $SPELLING_ERROR_IN_SYNOPSIS
+  = spelling_tag_emitter('spelling-error-in-description-synopsis');
+my $SPELLING_ERROR_IN_DESCRIPTION
+  = spelling_tag_emitter('spelling-error-in-description');
+
+my $PICKY_SPELLING_ERROR_IN_SYNOPSIS
+  = spelling_tag_emitter('capitalization-error-in-description-synopsis');
+my $PICKY_SPELLING_ERROR_IN_DESCRIPTION
+  = spelling_tag_emitter('capitalization-error-in-description');
+
 sub run {
     my ($pkg, $type, $info, undef, $group) = @_;
     my $tabs = 0;
@@ -224,17 +235,19 @@ sub run {
     }
 
     if ($synopsis) {
-        check_spelling('spelling-error-in-description-synopsis',
-            $synopsis,undef, $group->info->spelling_exceptions);
-        check_spelling_picky('capitalization-error-in-description-synopsis',
-            $synopsis);
+        check_spelling($synopsis, $group->info->spelling_exceptions,
+            $SPELLING_ERROR_IN_SYNOPSIS);
+        check_spelling_picky($synopsis, $PICKY_SPELLING_ERROR_IN_SYNOPSIS);
     }
 
     if ($description) {
-        check_spelling('spelling-error-in-description',
-            $description,undef, $group->info->spelling_exceptions);
-        check_spelling_picky('capitalization-error-in-description',
-            $description);
+        check_spelling(
+            $description,
+            $group->info->spelling_exceptions,
+            $SPELLING_ERROR_IN_DESCRIPTION
+        );
+        check_spelling_picky($description,
+            $PICKY_SPELLING_ERROR_IN_DESCRIPTION);
     }
 
     return;
diff --git a/checks/manpages.pm b/checks/manpages.pm
index a71f274..5685902 100644
--- a/checks/manpages.pm
+++ b/checks/manpages.pm
@@ -29,7 +29,7 @@ use File::Basename;
 use List::MoreUtils qw(any none);
 use Text::ParseWords ();
 
-use Lintian::Check qw(check_spelling);
+use Lintian::Check qw(check_spelling spelling_tag_emitter);
 use Lintian::Tags qw(tag);
 use Lintian::Util qw(clean_env drain_pipe fail open_gz);
 
@@ -315,6 +315,8 @@ sub run {
             waitpid $pid, 0;
             # Now we search through the whole man page for some common errors
             my $lc = 0;
+            my $stag_emitter
+              = spelling_tag_emitter('spelling-error-in-manpage', $file);
             foreach my $line (@manfile) {
                 $lc++;
                 chomp $line;
@@ -337,8 +339,8 @@ sub run {
                     tag 'manpage-has-errors-from-pod2man', "$file:$lc";
                 }
                 # Check for spelling errors if the manpage is English
-                check_spelling('spelling-error-in-manpage', $line, $file,
-                    $ginfo->spelling_exceptions)
+                check_spelling($line, $ginfo->spelling_exceptions,
+                    $stag_emitter)
                   if ($path =~ m,/man/man\d/,);
             }
         }
diff --git a/checks/menus.pm b/checks/menus.pm
index 0304194..1bb4d46 100644
--- a/checks/menus.pm
+++ b/checks/menus.pm
@@ -25,7 +25,8 @@ use strict;
 use warnings;
 use autodie;
 
-use Lintian::Check qw(check_spelling check_spelling_picky $known_shells_regex);
+use Lintian::Check
+  qw(check_spelling check_spelling_picky $known_shells_regex spelling_tag_emitter);
 use Lintian::Data;
 use Lintian::Tags qw(tag);
 use Lintian::Util qw(file_is_encoded_in_non_utf8 strip);
@@ -393,16 +394,15 @@ sub check_doc_base_field {
         # Title field.
     } elsif ($field eq 'title') {
         if (@$vals) {
+            my $stag_emitter
+              = spelling_tag_emitter('spelling-error-in-doc-base-title-field',
+                "${dbfile}:${line}");
             check_spelling(
-                'spelling-error-in-doc-base-title-field',
                 join(' ', @$vals),
-                "$dbfile:$line",$group->info->spelling_exceptions
-            );
-            check_spelling_picky(
-                'spelling-error-in-doc-base-title-field',
-                join(' ', @$vals),
-                "$dbfile:$line"
+                $group->info->spelling_exceptions,
+                $stag_emitter
             );
+            check_spelling_picky(join(' ', @$vals), $stag_emitter);
         }
 
         # Section field.
@@ -466,16 +466,16 @@ sub check_doc_base_field {
 
         # Check spelling.
         if (@$vals) {
-            check_spelling(
-                'spelling-error-in-doc-base-abstract-field',
-                join(' ', @$vals),
-                "$dbfile:$line",$group->info->spelling_exceptions
-            );
-            check_spelling_picky(
+            my $stag_emitter
+              = spelling_tag_emitter(
                 'spelling-error-in-doc-base-abstract-field',
+                "${dbfile}:${line}");
+            check_spelling(
                 join(' ', @$vals),
-                "$dbfile:$line"
+                $group->info->spelling_exceptions,
+                $stag_emitter
             );
+            check_spelling_picky(join(' ', @$vals), $stag_emitter);
         }
     }
 
diff --git a/debian/changelog b/debian/changelog
index 9e602ba..90abde1 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -2,6 +2,8 @@ lintian (2.5.32) UNRELEASED; urgency=medium
 
   XXX: generate tag summary
 
+  * checks/*.pm:
+    + [NT] Rewrite to handle new check_spelling{,_picky}
   * checks/cruft.{desc,pm}:
     + [BR] Detect non free lenna file based on filename and filetype.
       (Closes: #771191).
@@ -125,6 +127,10 @@ lintian (2.5.32) UNRELEASED; urgency=medium
       deprecated in 2.5.28.  It was deprecated in that release
       despite the omission.
 
+  * lib/Lintian/Check.pm:
+    + [NT] Rewrite check_spelling{,_picky} with a new interface,
+      which is better suited for additional purposes than just
+      emitting Lintian tags.
   * lib/Lintian/Collect/Package.pm:
     + [NT] Create "faux" Lintian::Path entries for missing
       intermediate directories.  This avoids a crash and made
@@ -138,6 +144,8 @@ lintian (2.5.32) UNRELEASED; urgency=medium
       to fully connect all path entries in the package.
   * lib/Lintian/Tag/Info.pm:
     + [NT] Expand BTS links using https rather than http.
+  * lib/Test/Lintian.pm:
+    + [NT] Output spelling mistakes as "diag()" messages now.
 
   * mail-templates/source-is-missing:
     + [BR] Apply patch from Holger Levsen improving language.
diff --git a/lib/Lintian/Check.pm b/lib/Lintian/Check.pm
index df31a06..b3bfa2a 100644
--- a/lib/Lintian/Check.pm
+++ b/lib/Lintian/Check.pm
@@ -32,7 +32,7 @@ use Lintian::Tags qw(tag);
 our $KNOWN_BOUNCE_ADDRESSES = Lintian::Data->new('fields/bounce-addresses');
 
 our @EXPORT_OK = qw(check_maintainer check_spelling check_spelling_picky
-  $known_shells_regex
+  $known_shells_regex spelling_tag_emitter
 );
 
 =head1 NAME
@@ -226,18 +226,44 @@ sub check_maintainer {
     return;
 }
 
+=item spelling_tag_emitter(TAGNAME[, FILENAME])
+
+Create and return a subroutine that is useful for emitting lintian
+tags for spelling mistakes.  The returned CODE ref can be passed to
+L</check_spelling(TEXT,[ EXCEPTIONS,] CODEREF)> and will faithfully
+emit TAGNAME once for each unique spelling mistake.
+
+The optional extra parameter FILENAME is used to denote the file name,
+when this is not given from the tagname.
+
+=cut
+
+sub spelling_tag_emitter {
+    my (@orig_args) = @_;
+    return sub {
+        return tag(@orig_args, @_);
+    };
+}
+
 sub _tag {
     my @args = grep { defined($_) } @_;
     return tag(@args);
 }
 
-=item check_spelling(TAG, TEXT, FILENAME, EXCEPTION)
+=item check_spelling(TEXT,[ EXCEPTIONS,] CODEREF)
+
+Performs a spelling check of TEXT.  Call CODEREF once for each unique
+misspelling with the following arguments:
+
+=over 4
+
+=item The misspelled word/phrase
 
-Performs a spelling check of TEXT, reporting TAG if any errors are found.
+=item The correct word/phrase
 
-If FILENAME is given, it will be used as the first argument to TAG.
+=back
 
-If EXCEPTION is given, it will be used as a hash ref of exceptions.
+If EXCEPTIONS is given, it will be used as a hash ref of exceptions.
 Any lowercase word appearing as a key of this hash ref will never be
 considered a spelling mistake (exception being if it is a part of a
 multiword misspelling).
@@ -247,8 +273,14 @@ Returns the number of spelling mistakes found in TEXT.
 =cut
 
 sub check_spelling {
-    my ($tag, $text, $filename, $exceptions) = @_;
+    my ($text, $exceptions, $code_ref) = @_;
     return 0 unless $text;
+    if (not $code_ref and $exceptions and ref($code_ref) eq 'CODE') {
+        $code_ref = $exceptions;
+        $exceptions = {};
+    } else {
+        $exceptions //= {};
+    }
 
     my %seen;
     my $counter = 0;
@@ -259,8 +291,6 @@ sub check_spelling {
     $text =~ s/[()\[\]]//g;
     $text =~ s/(\w-)\s*\n\s*/$1/;
 
-    $exceptions = {} unless (defined($exceptions));
-
     for my $word (split(/\s+/, $text)) {
         $word =~ s/[.,;:?!]+$//;
         next if ($word =~ /^[A-Z]{1,5}\z/);
@@ -277,7 +307,7 @@ sub check_spelling {
                 $correction = ucfirst $correction;
             }
             next if $seen{$lcword}++;
-            _tag($tag, $filename, $word, $correction) if defined $tag;
+            $code_ref->($word, $correction);
         }
     }
 
@@ -293,29 +323,36 @@ sub check_spelling {
             }
             $counter++;
             next if $seen{lc $word}++;
-            _tag($tag, $filename, $word, $correction)
-              if defined $tag;
+            $code_ref->($word, $correction);
         }
     }
 
     return $counter;
 }
 
-=item check_spelling_picky(TAG, TEXT, FILENAME)
+=item check_spelling_picky(TEXT, CODEREF)
+
+Performs a spelling check of TEXT.  Call CODEREF once for each unique
+misspelling with the following arguments:
+
+=over 4
+
+=item The misspelled word/phrase
+
+=item The correct word/phrase
+
+=back
 
-Perform a spelling check of TEXT, reporting TAG if any mistakes are found.
 This method performs some pickier corrections - such as checking for common
 capitalization mistakes - which would are not included in check_spelling as
 they are not appropriate for some files, such as changelogs.
 
-If FILENAME is given, it will be used as the first argument to TAG.
-
 Returns the number of spelling mistakes found in TEXT.
 
 =cut
 
 sub check_spelling_picky {
-    my ($tag, $text, $filename) = @_;
+    my ($text, $code_ref) = @_;
 
     my %seen;
     my $counter = 0;
@@ -326,8 +363,7 @@ sub check_spelling_picky {
     # removed below.
     if ($text =~ m,meta\s+package,) {
         $counter++;
-        _tag($tag, $filename, 'meta package', 'metapackage')
-          if defined $tag;
+        $code_ref->('meta package', 'metapackage');
     }
 
     # Exclude text enclosed in square brackets as it could be a package list
@@ -339,9 +375,7 @@ sub check_spelling_picky {
         if ($corrections_case->known($word)) {
             $counter++;
             next if $seen{$word}++;
-            _tag($tag, $filename, $word, $corrections_case->value($word))
-              if defined $tag;
-            next;
+            $code_ref->($word, $corrections_case->value($word));
         }
     }
 
diff --git a/lib/Test/Lintian.pm b/lib/Test/Lintian.pm
index a5f184d..68cbb6e 100644
--- a/lib/Test/Lintian.pm
+++ b/lib/Test/Lintian.pm
@@ -264,8 +264,13 @@ sub test_check_desc {
         if ($is_translation) {
             $builder->skip('Skip language specific test');
         } else {
-            $builder->is_eq(check_spelling(undef, $cinfo),
-                0, "$cname Info has no spelling errors");
+            my $mistakes = 0;
+            my $handler = sub {
+                my ($incorrect, $correct) = @_;
+                $builder->diag("Spelling ($cname): $incorrect => $correct");
+                $mistakes++;
+            };
+            $builder->is_eq($mistakes, 0,"$cname Info has no spelling errors");
         }
 
         foreach my $tpara (@tagpara) {
@@ -309,8 +314,15 @@ sub test_check_desc {
                 $builder->skip('Skip language specific test');
                 $builder->skip('Skip language specific test');
             } else {
-                $builder->is_eq(check_spelling(undef, $info),
-                    0, "$content_type $cname: $tag has no spelling errors");
+                my $mistakes = 0;
+                my $handler = sub {
+                    my ($incorrect, $correct) = @_;
+                    $builder->diag(
+                        "Spelling ($cname/$tag): $incorrect => $correct");
+                    $mistakes++;
+                };
+                $builder->is_eq($mistakes, 0,
+                    "$content_type $cname: $tag has no spelling errors");
 
                 $builder->ok($info !~ /(?:^| )(?:[Ww]e|I)\b/,
                     'Tag info does not speak of "I", or "we"')
diff --git a/t/scripts/pod-spelling.t b/t/scripts/pod-spelling.t
index 1f060c9..b706204 100755
--- a/t/scripts/pod-spelling.t
+++ b/t/scripts/pod-spelling.t
@@ -120,7 +120,7 @@ multiarch relationA relationB Multi natively unordered arg CVE autodie
 hashrefs namespace subdir SIGPIPE SIG blocknumber blocksub readwindow
 REMOVESLASH STAMPFILE TAGNAME TCODE TESTDATA BLOCKSIZE jN
 POSIX t1c2pfb init runtime txt executability writability
-INHANDLE OUTHANDLES UTC timestamp faux
+INHANDLE OUTHANDLES UTC timestamp faux tagname
 
 __END__
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/lintian/lintian.git
Reply to:
References:
- [lintian] branch master updated (6b4ab50 -> a0a025d)
  - From: Niels Thykier <nthykier@moszumanska.debian.org>
Prev by Date: [lintian] 02/02: L::Check: Improve normalisation in check_spelling
Next by Date: [lintian] 01/01: L::Check: Add missing /g modifier to regex
Previous by thread: [lintian] 02/02: L::Check: Improve normalisation in check_spelling
Next by thread: [lintian] branch master updated (a0a025d -> de4594c)
Index(es):
- Date
- Thread