[lintian] 03/05: L::Check: Extract spelling info into simple perl data structures
This is an automated email from the git hooks/post-receive script.
nthykier pushed a commit to branch master
in repository lintian.
commit b824170f8f3569478fad5a1f1cecd647254ac588
Author: Niels Thykier <niels@thykier.net>
Date: Fri Jul 3 18:34:31 2015 +0200
L::Check: Extract spelling info into simple perl data structures
At least for regular (single-word) spell-checking, the overhead of
calling "known" from L::Data sums up to a couple of seconds on
linux-image-4.0.0-2-rt-amd64_4.0.5-1_amd64.deb.
Signed-off-by: Niels Thykier <niels@thykier.net>
---
debian/changelog | 5 +++++
lib/Lintian/Check.pm | 29 +++++++++++++++++++++--------
2 files changed, 26 insertions(+), 8 deletions(-)
diff --git a/debian/changelog b/debian/changelog
index 9e86da6..4d0f0f7 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -38,6 +38,11 @@ lintian (2.5.33) UNRELEASED; urgency=medium
spawn+reap. In some cases, this can reduce the runtime
of this command by ~50%.
+ * lib/Lintian/Check.pm:
+ + [NT] Optimise out some calls to Lintian::Data, which in
+ a few cases adds up to a couple of seconds in total
+ runtime.
+
* reporting/harness:
+ [NT] Add a --[no-]generate-reports option as alias of
the -r mode. The option can now be used together with
diff --git a/lib/Lintian/Check.pm b/lib/Lintian/Check.pm
index 08382bf..4b08fae 100644
--- a/lib/Lintian/Check.pm
+++ b/lib/Lintian/Check.pm
@@ -268,6 +268,8 @@ Returns the number of spelling mistakes found in TEXT.
=cut
+my (%CORRECTIONS, @CORRECTIONS_MULTIWORD);
+
sub check_spelling {
my ($text, $exceptions, $code_ref) = @_;
return 0 unless $text;
@@ -280,9 +282,20 @@ sub check_spelling {
my %seen;
my $counter = 0;
- my $corrections = Lintian::Data->new('spelling/corrections', '\|\|');
- my $corrections_multiword
- = Lintian::Data->new('spelling/corrections-multiword', '\|\|');
+
+ if (!%CORRECTIONS) {
+ my $corrections_multiword
+ = Lintian::Data->new('spelling/corrections-multiword', '\|\|');
+ my $corrections = Lintian::Data->new('spelling/corrections', '\|\|');
+ for my $misspelled ($corrections->all) {
+ $CORRECTIONS{$misspelled} = $corrections->value($misspelled);
+ }
+ for my $misspelled_regex ($corrections_multiword->all) {
+ my $correct = $corrections_multiword->value($misspelled_regex);
+ push(@CORRECTIONS_MULTIWORD,
+ [qr/\b($misspelled_regex)\b/, $correct]);
+ }
+ }
$text =~ tr/()[]//d;
$text =~ s/(\w-)\s*\n\s*/$1/;
@@ -296,10 +309,10 @@ sub check_spelling {
# Some exceptions are based on case (e.g. "teH").
next if exists($exceptions->{$word});
my $lcword = lc $word;
- if ($corrections->known($lcword)
+ if (exists($CORRECTIONS{$lcword})
&&!exists($exceptions->{$lcword})) {
$counter++;
- my $correction = $corrections->value($lcword);
+ my $correction = $CORRECTIONS{$lcword};
if ($word =~ /^[A-Z]+$/) {
$correction = uc $correction;
} elsif ($word =~ /^[A-Z]/) {
@@ -311,10 +324,10 @@ sub check_spelling {
}
# Special case for correcting multi-word strings.
- for my $oregex ($corrections_multiword->all) {
- if ($text =~ m,\b($oregex)\b,) {
+ for my $cm (@CORRECTIONS_MULTIWORD) {
+ my ($oregex, $correction) = @{$cm};
+ if ($text =~ $oregex) {
my $word = $1;
- my $correction = $corrections_multiword->value($oregex);
if ($word =~ /^[A-Z]+$/) {
$correction = uc $correction;
} elsif ($word =~ /^[A-Z]/) {
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/lintian/lintian.git
Reply to: