[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#721252: [PATCH 1/3] Clean up gfdl detection



Instead of using a variable for space like expression, tranform every space like expression to space.

This is a net win of about 5% on execution time
---
 checks/cruft.pm |  112 +++++++++++++++++++++++++++----------------------------
 1 file changed, 55 insertions(+), 57 deletions(-)

diff --git a/checks/cruft.pm b/checks/cruft.pm
index ac2a9fc..aeee3ea 100644
--- a/checks/cruft.pm
+++ b/checks/cruft.pm
@@ -505,101 +505,99 @@ sub find_cruft {
             if (
                 index($block, 'license') > -1
                 && $block =~ m/gnu (?:\s+|\s*<\/span>\s*|\s*\}\s+)? free \s+
-                     documentation \s+ license (?'gfdlsections'.{0,1024}?)
+                     documentation \s+ license (?'rawgfdlsections'.{0,1024}?)
                      a \s+ copy \s+ of \s+ the \s+ license \s+ is \s+ included/xsm
               ) {
                 if (!exists $licenseproblemhash{'gfdl-invariants'}) {
-                    my $gfdlsections = $+{gfdlsections};
-                    # local space
-                    my $s = qr{(?:
-                      \s              |  # regular space(s)
+                    my $rawgfdlsections = $+{rawgfdlsections};
+                    my $gfdlsections = $rawgfdlsections;
+
+                    # delete some tag consider as space
+                    # order from more specific to less specific
+                    $gfdlsections =~ s{(?:
+                      ^[-\+!<>]       |  # diff/patch lines
+                      ^\.\\\"         |  # man comments
                       \@c(?:omment)?  |  # Tex info comment
-                      [%\*\"\|\\]     |  # String, C-style comment/javadoc indent, quotes for strings, pipe and antislash in some txt
                       \"\s*,          |  # String array (e.g. "line1",\n"line2")
                       ,\s*\"          |  # String array (e.g. "line1"\n ,"line2"), seen in findutils
-                      \\n             |  # Verbatim \n in string array
-                      \n[-\+!<>]      |  # diff/patch lines
-                      \n\.\\\"        |  # man comments
                       <br\s*/?>       |  # (X)HTML line breaks
-                      </?link.*?>     |  # xml link
-                      </?a.*?>        |  # a link
-                      </?p.*?>        |  # html paragraph
+                      </?link[^>]*?>  |  # xml link
+                      </?a[^>]*?>     |  # a link
+                      </?p[^>]*?>     |  # html paragraph
                       \(\*note.*?::\) |  # info file note
-                    )}xsmo;
+                      \\n             |  # Verbatim \n in string array
+                      \s*[,\.;]\s*\Z  |  # final punctuation
+                      \A\s*[,\.;]\s*  |  # punctuation at the beginning
+                      [%\*\"\|\\]        # String, C-style comment/javadoc indent, quotes for strings, pipe and antislash in some txt
+                    )}{ }gxms;
+
+                    # delete double spacing now
+                    $gfdlsections =~ s{\s+}{ }gsm;
+                    $gfdlsections =~ s{\A\s+}{}gsm;
+                    $gfdlsections =~ s{\s+\Z}{}gsm;
+
+                    $gfdlsections =~ s/
+                            \A \s* version \s+ \d+(?:\.\d+)? \s+
+                            (?:or \s+ any \s+ later \s+ version \s+)?
+                            published \s+ by \s+ the \s+ Free \s+ Software \s+ Foundation \s*
+                            (?: [,\.;] \s*)?
+                            //xism;
+
                     # GFDL license, assume it is bad unless it
                     # explicitly states it has no "bad sections".
                     if (
                         $gfdlsections =~ m/
-                            no $s* Invariant $s+ Sections? $s* ,?
-                               $s+ (?:with$s+)? (?:the$s+)? no $s+ Front(?:\\?-)?$s*Cover $s+ (?:Texts?)? $s* ,? $s+ (?:and$s+)?
-                                   (?:with$s+)? (?:the$s+)? no $s+ Back(?:\\?-)?$s*Cover/xiso
+                            no \s* Invariant \s+ Sections? \s* ,?
+                               \s+ (?:with\s+)? (?:the\s+)? no \s+ Front(?:\s*\\?-)?\s*Cover (?:\s+Texts?)? \s* ,? \s+ (?:and\s+)?
+                                   (?:with\s+)? (?:the\s+)? no \s+ Back(?:\s*\\?-)?\s*Cover/xiso
                       ) {
                         # no invariant
                     } elsif (
                         $gfdlsections =~ m/
-                            no $s+ Invariant $s+ Sections?,?
-                               $s+ (?:no$s+)? Front(?:[\\]?-)? $s+ or
-                               $s+ (?:no$s+)? Back(?:[\\]?-)?$s*Cover $s+ Texts?/xiso
+                            no \s+ Invariant \s+ Sections?,?
+                               \s+ (?:no\s+)? Front(?:\s*[\\]?-)? \s+ or
+                               \s+ (?:no\s+)? Back(?:\s*[\\]?-)?\s*Cover \s+ Texts?/xiso
                       ) {
                         # no invariant variant (dict-foldoc)
                     } elsif (
-                        $gfdlsections =~ m/
-                            \A $s* (?: [\,\.;] $s* )? version $s+ \d+(?:\.\d+)? $s+
-                               (?:or $s+ any $s+ later $s+ version $s+)?
-                               published $s+ by $s+ the $s+ Free $s+ Software $s+ Foundation $s*
-                               (?: [,\.;] $s*)?
-                               There $s+ are $s+ no $s+ invariants? $s+ sections?
-                               (?: [,\.;] $s*)? \Z
+                        $gfdlsections =~ m/\A
+                               There \s+ are \s+ no \s+ invariants? \s+ sections? \Z
                                /xismo
                       ) {
                         # no invariant libnss-pgsql version
                     } elsif (
                         $gfdlsections =~ m/
-                            \A $s* (?: [\,\.;] $s* )? version $s+ \d+(?:\.\d+)? $s+
-                               (?:or $s+ any $s+ later $s+ version $s+)?
-                               published $s+ by $s+ the $s+ Free $s+ Software $s+ Foundation $s*
-                               (?: [,\.;] $s*)?
-                               without $s+ any $s+ Invariant $s+ Sections $s*
-                               (?: [,\.;] $s*)? \Z
+                               without \s+ any \s+ Invariant \s+ Sections \s*
+                               \Z
                                /xismo
                       ) {
                         # no invariant parsewiki version
                     } elsif (
                         $gfdlsections =~ m/
-                            (?: [,\.;] $s*)? version $s+ \d+(?:\.\d+)? $s+
-                            (?:or $s+ any $s+ later $s+ version $s+)?
-                            published $s+ by $s+ the $s+ Free $s+ Software $s+ Foundation $s*
-                            (?: [,\.;] $s*)?
-                            with $s+ no $s+ invariants? $s+ sections?
-                            (?: [,\.;] $s*)? \Z
+                               \A with \s+ no \s+ invariants? \s+ sections? \Z
                                /xismo
                       ) {
                         # no invariant lilypond version
                     } elsif (
                         $gfdlsections =~ m/
-                            with $s+ the $s+ Invariant $s+ Sections $s+ being
-                                 $s+ (?:\@var\{|<var>)? LIST $s+ THEIR $s+TITLES (?:\}|<\/var>)? $s* ,?
-                                 $s+ with $s+ the $s+ Front(?:[\\]?-)$s*Cover $s+ Texts $s+ being
-                                 $s+ (?:\@var\{|<var>)? LIST (?:\}|<\/var>)? $s* ,?
-                                 $s+ and $s+ with $s+ the $s+ Back(?:[\\]?-)$s*Cover $s+ Texts $s+ being
-                                 $s+ (?:\@var\{|<var>)? LIST (?:\}|<\/var>)?/xiso
+                            with \s+ the \s+ Invariant \s+ Sections \s+ being
+                                 \s+ (?:\@var\{|<var>)? LIST \s+ THEIR \s+TITLES (?:\}|<\/var>)? \s* ,?
+                                 \s+ with \s+ the \s+ Front(?:\s*[\\]?-)\s*Cover \s+ Texts \s+ being
+                                 \s+ (?:\@var\{|<var>)? LIST (?:\}|<\/var>)? \s* ,?
+                                 \s+ and \s+ with \s+ the \s+ Back(?:\s*[\\]?-)\s*Cover \s+ Texts \s+ being
+                                 \s+ (?:\@var\{|<var>)? LIST (?:\}|<\/var>)?/xiso
                       ) {
                         # verbatim text of license is ok
                     } elsif (
-                        $gfdlsections =~ m/
-                            \A $s* (?: [,\.;] $s*)? version $s+ \d+(?:\.\d+)? $s+
-                               (?:or $s+ any $s+ later $s+ version $s+)?
-                               published $s+ by $s+ the $s+ Free $s+ Software $s+ Foundation $s*
-                               (?: [,\.;] $s*)? \Z
-                               /xismo
+                        $gfdlsections =~ m/\A \s* \Z/xismo
                       ) {
                         # empty text is ambiguous
                         tag 'license-problem-gfdl-invariants-empty',$name;
                         $licenseproblemhash{'gfdl-invariants'} = 1;
                     } elsif (
                         $gfdlsections =~ m/
-                            with $s+ \&FDLInvariantSections;, $s+ with $s+ \&FDLFrontCoverText;,
-                                 $s+ and $s+ with $s+ \&FDLBackCoverText;/xiso
+                            with \s+ \&FDLInvariantSections;, \s+ with \s+ \&FDLFrontCoverText;,
+                                 \s+ and \s+ with \s+ \&FDLBackCoverText;/xiso
                       ) {
                         # fix #708957 about FDL entities in template
                         unless ($name
@@ -610,10 +608,10 @@ sub find_cruft {
                         }
                     } elsif (
                         # fix a false positive in maintain.texi
-                        $gfdlsections =~ m/\A $s* \. $s*
-                            Following $s+ is $s+ an $s+ example $s+ of $s+ the $s+ license $s+ notice $s+
-                            to $s+ use $s+ after $s+ the $s+ copyright $s+ line\(s\) $s+ using $s+ all $s+ the $s+
-                            features $s+ of $s+ the $s+ GFDL/xismo
+                        $gfdlsections =~ m/\A
+                            Following \s+ is \s+ an \s+ example \s+ of \s+ the \s+ license \s+ notice \s+
+                            to \s+ use \s+ after \s+ the \s+ copyright \s+ line\(s\) \s+ using \s+ all \s+ the \s+
+                            features \s+ of \s+ the \s+ GFDL/xiso
                       ) {
                         # allow only one text
                         unless ($name =~ m/maintain/) {
@@ -621,7 +619,7 @@ sub find_cruft {
                             $licenseproblemhash{'gfdl-invariants'} = 1;
                         }
                     } else {
-                        tag 'license-problem-gfdl-invariants', $name;
+                        tag 'license-problem-gfdl-invariants', $name, "\"$gfdlsections\"";
                         $licenseproblemhash{'gfdl-invariants'} = 1;
                     }
                 }
-- 
1.7.10.4


Reply to: