[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

[SCM] Debian package checker branch, master, updated. 2.2.13-67-g01f7e42



The following commit has been merged in the master branch:
commit 296b2e4c7d4a95f49493465aae6006596c01858b
Author: Russ Allbery <rra@debian.org>
Date:   Sat Aug 15 22:45:59 2009 -0700

    More duplicate word false positive fixes
    
    Require that duplicate words start and end with a word character so that
    we don't think two punctuation marks in a row are duplicate words.

diff --git a/checks/description b/checks/description
index bc71c26..021fd66 100644
--- a/checks/description
+++ b/checks/description
@@ -125,9 +125,13 @@ foreach (split /\n/, $description) {
     # Check for duplicated words.  We want to catch "this this." but not
     # "ITU-T T.81", so compare non-whitespace sequences rather than word
     # characters but allow punctuation at the end.
+    #
+    # We don't want to think ", ," or "a, a" is a duplicated word, so require
+    # that a word start and end with a word character.
     my $stripped = $_;
     $stripped =~ s,(\"|\')(.*?)(\1),,g;
-    while ($stripped =~ m%(?:\s|^)((\S+)(\s+(\2))+)(?:[\).,?!:;\s]|\z)%i) {
+    while ($stripped =~
+	   m%(?:\s|^)((\w(?:\S*\w)?)(\s+(\2))+)(?:[\).,?!:;\s]|\z)%i) {
 	my $words = $1;
 	$stripped =~ s/\Q$words//;
         tag "description-contains-duplicated-word", "$words";
@@ -135,8 +139,7 @@ foreach (split /\n/, $description) {
 
     my $first_person = $_;
     while ($first_person =~
-	m/(?:^|\s)(I|[Mm]y|[Oo]urs?|mine|myself|me|us|[Ww]e)(?:$|\s)/) {
-
+	   m/(?:^|\s)(I|[Mm]y|[Oo]urs?|mine|myself|me|us|[Ww]e)(?:$|\s)/) {
 	my $word = $1;
 	$first_person =~ s/\Q$word//;
 	tag "using-first-person-in-description", "line $lines: $word";
diff --git a/t/tests/description-general/debian/debian/control.in b/t/tests/description-general/debian/debian/control.in
index 1837a69..cbfac4c 100644
--- a/t/tests/description-general/debian/debian/control.in
+++ b/t/tests/description-general/debian/debian/control.in
@@ -91,6 +91,8 @@ Architecture: {$architecture}
 Depends: $\{shlibs:Depends\}, $\{misc:Depends\}
 Description: test package with duplicated words that aren't
  Lossless JPEG is defined in ITU-T T.81, ISO/IEC IS 10918-1.
+ Contain the strings " link to ", " -> ", or ": ".
+ This is train A, a particularly fast train.
  .
  This is a test package designed to exercise some feature or tag of
  Lintian.  It is part of the Lintian test suite and may do very odd

-- 
Debian package checker


Reply to: