[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

[SCM] Debian package checker branch, master, updated. 2.5.11-247-gfed3588



The following commit has been merged in the master branch:
commit c8b97181e1b78508c9264a559368b9dca6540f77
Author: Niels Thykier <niels@thykier.net>
Date:   Sat Apr 13 11:00:23 2013 +0200

    c/files: Check for files in non-UTF-8 encoding
    
    Signed-off-by: Niels Thykier <niels@thykier.net>

diff --git a/checks/files b/checks/files
index 5ba18c8..c77e07b 100644
--- a/checks/files
+++ b/checks/files
@@ -26,7 +26,7 @@ use File::Basename;
 
 use Lintian::Data;
 use Lintian::Tags qw(tag);
-use Lintian::Util qw(fail open_gz);
+use Lintian::Util qw(fail is_string_utf8_encoded open_gz);
 
 my $FONT_PACKAGES = Lintian::Data->new ('files/fonts', qr/\s++/);
 my $TRIPLETS = Lintian::Data->new ('files/triplets', qr/\s++/);
@@ -242,7 +242,6 @@ if (!$is_dummy) {
 
 # Read package contents...
 foreach my $file ($info->sorted_index) {
-    next if $file eq '';
     my $index_info = $info->index ($file);
     my $owner = $index_info->owner . '/' . $index_info->group;
     my $operm = $index_info->operm;
@@ -250,6 +249,10 @@ foreach my $file ($info->sorted_index) {
 
     $arch_dep_files = 1 if $file !~ m,^usr/share/,o && $file ne 'usr/';
 
+    if (!is_string_utf8_encoded($file)) {
+        tag 'file-name-is-not-valid-UTF-8', $file;
+    }
+
     if ($index_info->is_hardlink) {
         my $link_target_dir = $link;
         $link_target_dir =~ s,[^/]*$,,;
diff --git a/checks/files.desc b/checks/files.desc
index 0e47660..12d571a 100644
--- a/checks/files.desc
+++ b/checks/files.desc
@@ -1304,3 +1304,13 @@ Tag: dir-or-file-in-build-tree
 Severity: serious
 Certainty: possible
 Info: Your package install file in our build tree.
+
+Tag: file-name-is-not-valid-UTF-8
+Severity: normal
+Certainty: certain
+Ref: #701081
+Info: The file name does not appear to be valid UTF-8.  This may become
+ a requirement in future Policy versions.
+ .
+ Note that Lintian may be unable to display the filename accurately.
+ Unprintable characters may have been replaced.
diff --git a/debian/changelog b/debian/changelog
index 8205368..c72abfa 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -11,6 +11,7 @@ lintian (2.5.12) UNRELEASED; urgency=low
       - dir-or-file-in-build-tree
       - dir-or-file-in-etc-opt
       - dir-or-file-in-home
+      - file-name-is-not-valid-UTF-8
       - font-adobe-copyrighted-fragment-no-credit
       - font-package-not-multi-arch-foreign
       - illegal-runtime-test-name
@@ -94,6 +95,9 @@ lintian (2.5.12) UNRELEASED; urgency=low
       spotting it.  (Closes: #699452)
     + [NT] Add patch from Bastien Roucariès to check for another
       adobe font license issues.  (Closes: #705175)
+    + [NT] Test for use of file names that are contain invalid
+      UTF-8 byte sequences.  Thanks to Helmut Grohne for the
+      suggestion.  (Closes: #704446)
   * checks/init.d:
     + [NT] Fix regression where Lintian would not properly match
       init.d passed to update-rc.d.  Thanks to Michael Meskes for
diff --git a/lib/Lintian/Util.pm b/lib/Lintian/Util.pm
index df7c093..9c9fd4f 100644
--- a/lib/Lintian/Util.pm
+++ b/lib/Lintian/Util.pm
@@ -57,6 +57,7 @@ BEGIN {
                  get_file_checksum
                  slurp_entire_file
                  file_is_encoded_in_non_utf8
+                 is_string_utf8_encoded
                  fail
                  strip
                  lstrip
@@ -712,6 +713,29 @@ sub get_file_checksum {
     return $digest->hexdigest;
 }
 
+=item is_string_utf8_encoded(STRING)
+
+Returns a truth value if STRING can be decoded as valid UTF-8.
+
+=cut
+
+sub is_string_utf8_encoded {
+    my ($str) = @_;
+    if ($str =~ m,\e[-!"\$%()*+./],) {
+        # ISO-2022
+        return 0;
+    }
+    eval {
+        Encode::decode('UTF-8', $str, Encode::FB_CROAK);
+    };
+    if ($@) {
+        # fail
+        return 0;
+    }
+    # pass
+    return 1;
+}
+
 =item file_is_encoded_in_non_utf8 (...)
 
 Undocumented
@@ -726,15 +750,7 @@ sub file_is_encoded_in_non_utf8 {
         or fail("failure while checking encoding of $file for $type package $pkg");
     my $line = 0;
     while (<$fd>) {
-        if (m,\e[-!"\$%()*+./],) {
-            # ISO-2022
-            $line = $.;
-            last;
-        }
-        eval {
-            $_ = Encode::decode('UTF-8', $_, Encode::FB_CROAK);
-        };
-        if ($@) {
+        if (!is_string_utf8_encoded($_)) {
             $line = $.;
             last;
         }
diff --git a/t/tests/files-general/debian/debian/rules b/t/tests/files-general/debian/debian/rules
index 1806e1d..e5e7c6e 100755
--- a/t/tests/files-general/debian/debian/rules
+++ b/t/tests/files-general/debian/debian/rules
@@ -13,6 +13,10 @@ override_dh_install:
 	touch $(tmp)/usr/share/foo/'*'
 	touch $(tmp)/usr/share/foo/'ws '
 	touch $(tmp)/usr/share/foo/.nfs-fake-tmpfile
+	# If the following line gets messed up, it can be
+	# restored with something like:
+	#   sed -i 's/@FILE@/bokm\xe5l/'
+	touch $(tmp)/usr/share/foo/bokm� 	touch $(tmp)/var/catman/do
 
 override_dh_fixperms:
diff --git a/t/tests/files-general/desc b/t/tests/files-general/desc
index a39c520..51b701a 100644
--- a/t/tests/files-general/desc
+++ b/t/tests/files-general/desc
@@ -14,6 +14,7 @@ Test-For:
  executable-manpage
  file-in-unusual-dir
  file-name-ends-in-whitespace
+ file-name-is-not-valid-UTF-8
  global-data-in-games-directory
  icon-size-and-directory-name-mismatch
  lengthy-symlink
diff --git a/t/tests/files-general/tags b/t/tests/files-general/tags
index df6f91f..ecdf64c 100644
--- a/t/tests/files-general/tags
+++ b/t/tests/files-general/tags
@@ -22,6 +22,7 @@ W: files-general: executable-not-elf-or-script usr/share/man/man5/foo.5.gz
 W: files-general: file-in-unusual-dir new-top-level-dir/file-in-new-top-level-dir
 W: files-general: file-in-unusual-dir var/catman/do
 W: files-general: file-name-ends-in-whitespace usr/share/foo/ws 
+W: files-general: file-name-is-not-valid-UTF-8 usr/share/foo/bokm?l
 W: files-general: icon-size-and-directory-name-mismatch usr/share/apps/lintian/icons/hicolor/22x22/lintian-16x16.png 16x16
 W: files-general: icon-size-and-directory-name-mismatch usr/share/apps/lintian/icons/hicolor/64x64/lintian-64x64.png 22x22
 W: files-general: icon-size-and-directory-name-mismatch usr/share/icons/hicolor/22x22/apps/lintian-16x16.png 16x16
diff --git a/testset/tags.filenames b/testset/tags.filenames
index 0a45f1c..7fe8258 100644
--- a/testset/tags.filenames
+++ b/testset/tags.filenames
@@ -99,6 +99,8 @@ W: filenames: file-in-unusual-dir files/svk-commitsEr9P.tmp
 W: filenames: file-in-unusual-dir files/svn-commit.tmp
 W: filenames: file-in-usr-lib-sgml usr/lib/sgml/package
 W: filenames: file-name-ends-in-whitespace files/'\ 
+W: filenames: file-name-is-not-valid-UTF-8 usr/share/doc/filenames/bokm?l
+W: filenames: file-name-is-not-valid-UTF-8 usr/share/doc/filenames/bokm\?l
 W: filenames: gz-file-not-gzip usr/share/filenames/prototype.js.gz
 W: filenames: macos-ds-store-file-in-package usr/share/doc/filenames/.DS_Store
 W: filenames: macos-resource-fork-file-in-package usr/share/doc/filenames/._NEWS.Debian

-- 
Debian package checker


Reply to: