[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

[lintian] 02/02: L::Check: Improve normalisation in check_spelling



This is an automated email from the git hooks/post-receive script.

nthykier pushed a commit to branch master
in repository lintian.

commit a0a025d7e4f6c21a868acbe0cc81cb7cc305315b
Author: Niels Thykier <niels@thykier.net>
Date:   Wed Jun 17 23:36:16 2015 +0200

    L::Check: Improve normalisation in check_spelling
    
    Signed-off-by: Niels Thykier <niels@thykier.net>
---
 debian/changelog     | 3 +++
 lib/Lintian/Check.pm | 7 +++++++
 2 files changed, 10 insertions(+)

diff --git a/debian/changelog b/debian/changelog
index 90abde1..5ef528b 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -131,6 +131,9 @@ lintian (2.5.32) UNRELEASED; urgency=medium
     + [NT] Rewrite check_spelling{,_picky} with a new interface,
       which is better suited for additional purposes than just
       emitting Lintian tags.
+    + [JW, NT] Normalise spaces a bit better in check_spelling to
+      catch some "multi-word" misspellings, where said words are
+      not on the same line.  (Closes: #763456)
   * lib/Lintian/Collect/Package.pm:
     + [NT] Create "faux" Lintian::Path entries for missing
       intermediate directories.  This avoids a crash and made
diff --git a/lib/Lintian/Check.pm b/lib/Lintian/Check.pm
index b3bfa2a..adca85b 100644
--- a/lib/Lintian/Check.pm
+++ b/lib/Lintian/Check.pm
@@ -28,6 +28,7 @@ use Email::Valid;
 
 use Lintian::Data;
 use Lintian::Tags qw(tag);
+use Lintian::Util qw(strip);
 
 our $KNOWN_BOUNCE_ADDRESSES = Lintian::Data->new('fields/bounce-addresses');
 
@@ -290,6 +291,9 @@ sub check_spelling {
 
     $text =~ s/[()\[\]]//g;
     $text =~ s/(\w-)\s*\n\s*/$1/;
+    $text =~ tr/\r\n \t/ /s;
+    $text =~ s/\s++/ /;
+    strip($text);
 
     for my $word (split(/\s+/, $text)) {
         $word =~ s/[.,;:?!]+$//;
@@ -370,6 +374,9 @@ sub check_spelling_picky {
     # or similar which may legitimately contain lower-cased versions of
     # the words.
     $text =~ s/\[.+?\]//sg;
+    $text =~ tr/\r\n \t/ /s;
+    $text =~ s/\s++/ /;
+    strip($text);
     for my $word (split(/\s+/, $text)) {
         $word =~ s/^\(|[).,?!:;]+$//g;
         if ($corrections_case->known($word)) {

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/lintian/lintian.git


Reply to: