[lintian] 03/05: L::Check: Extract spelling info into simple perl data structures

To: debian-lint-maint@lists.debian.org
Subject: [lintian] 03/05: L::Check: Extract spelling info into simple perl data structures
From: Niels Thykier <nthykier@moszumanska.debian.org>
Date: Fri, 03 Jul 2015 17:05:01 +0000
Message-id: <[🔎] E1ZB4Ob-0006i6-Q4@moszumanska.debian.org>
Reply-to: Niels Thykier <niels@thykier.net>
In-reply-to: <[🔎] 20150703170501.25679.56439@moszumanska.debian.org>
References: <[🔎] 20150703170501.25679.56439@moszumanska.debian.org>

This is an automated email from the git hooks/post-receive script.

nthykier pushed a commit to branch master
in repository lintian.

commit b824170f8f3569478fad5a1f1cecd647254ac588
Author: Niels Thykier <niels@thykier.net>
Date:   Fri Jul 3 18:34:31 2015 +0200

    L::Check: Extract spelling info into simple perl data structures
    
    At least for regular (single-word) spell-checking, the overhead of
    calling "known" from L::Data sums up to a couple of seconds on
    linux-image-4.0.0-2-rt-amd64_4.0.5-1_amd64.deb.
    
    Signed-off-by: Niels Thykier <niels@thykier.net>
---
 debian/changelog     |  5 +++++
 lib/Lintian/Check.pm | 29 +++++++++++++++++++++--------
 2 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/debian/changelog b/debian/changelog
index 9e86da6..4d0f0f7 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -38,6 +38,11 @@ lintian (2.5.33) UNRELEASED; urgency=medium
       spawn+reap.  In some cases, this can reduce the runtime
       of this command by ~50%.
 
+  * lib/Lintian/Check.pm:
+    + [NT] Optimise out some calls to Lintian::Data, which in
+      a few cases adds up to a couple of seconds in total
+      runtime.
+
   * reporting/harness:
     + [NT] Add a --[no-]generate-reports option as alias of
       the -r mode.  The option can now be used together with
diff --git a/lib/Lintian/Check.pm b/lib/Lintian/Check.pm
index 08382bf..4b08fae 100644
--- a/lib/Lintian/Check.pm
+++ b/lib/Lintian/Check.pm
@@ -268,6 +268,8 @@ Returns the number of spelling mistakes found in TEXT.
 
 =cut
 
+my (%CORRECTIONS, @CORRECTIONS_MULTIWORD);
+
 sub check_spelling {
     my ($text, $exceptions, $code_ref) = @_;
     return 0 unless $text;
@@ -280,9 +282,20 @@ sub check_spelling {
 
     my %seen;
     my $counter = 0;
-    my $corrections = Lintian::Data->new('spelling/corrections', '\|\|');
-    my $corrections_multiword
-      = Lintian::Data->new('spelling/corrections-multiword', '\|\|');
+
+    if (!%CORRECTIONS) {
+        my $corrections_multiword
+          = Lintian::Data->new('spelling/corrections-multiword', '\|\|');
+        my $corrections = Lintian::Data->new('spelling/corrections', '\|\|');
+        for my $misspelled ($corrections->all) {
+            $CORRECTIONS{$misspelled} = $corrections->value($misspelled);
+        }
+        for my $misspelled_regex ($corrections_multiword->all) {
+            my $correct = $corrections_multiword->value($misspelled_regex);
+            push(@CORRECTIONS_MULTIWORD,
+                [qr/\b($misspelled_regex)\b/, $correct]);
+        }
+    }
 
     $text =~ tr/()[]//d;
     $text =~ s/(\w-)\s*\n\s*/$1/;
@@ -296,10 +309,10 @@ sub check_spelling {
         # Some exceptions are based on case (e.g. "teH").
         next if exists($exceptions->{$word});
         my $lcword = lc $word;
-        if ($corrections->known($lcword)
+        if (exists($CORRECTIONS{$lcword})
             &&!exists($exceptions->{$lcword})) {
             $counter++;
-            my $correction = $corrections->value($lcword);
+            my $correction = $CORRECTIONS{$lcword};
             if ($word =~ /^[A-Z]+$/) {
                 $correction = uc $correction;
             } elsif ($word =~ /^[A-Z]/) {
@@ -311,10 +324,10 @@ sub check_spelling {
     }
 
     # Special case for correcting multi-word strings.
-    for my $oregex ($corrections_multiword->all) {
-        if ($text =~ m,\b($oregex)\b,) {
+    for my $cm (@CORRECTIONS_MULTIWORD) {
+        my ($oregex, $correction) = @{$cm};
+        if ($text =~ $oregex) {
             my $word = $1;
-            my $correction = $corrections_multiword->value($oregex);
             if ($word =~ /^[A-Z]+$/) {
                 $correction = uc $correction;
             } elsif ($word =~ /^[A-Z]/) {

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/lintian/lintian.git

Reply to:

References:
- [lintian] branch master updated (b058fef -> 0fcfdbb)
  - From: Niels Thykier <nthykier@moszumanska.debian.org>

Prev by Date: Re: lintian: Performance log graphed
Next by Date: [lintian] 01/05: L::Check: Remove now unsed subroutine
Previous by thread: [lintian] branch master updated (b058fef -> 0fcfdbb)
Next by thread: [lintian] 01/05: L::Check: Remove now unsed subroutine
Index(es):
- Date
- Thread