[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

[SCM] Debian package checker branch, master, updated. 2.3.3-70-gddd3bed



The following commit has been merged in the master branch:
commit 49ee2434a05fa199fc6dbe965105e279b9e5c02f
Author: Raphael Geissert <atomo64@gmail.com>
Date:   Sun Mar 21 20:07:49 2010 -0600

    New checks for incorrect and unknown locale codes
    
    * checks/files{,.desc}:
      + [RG] New checks for incorrect and unknown locale codes in
        /usr/share/locale subdirectories.
    
    * data/files/incorrect-locale-codes:
      + [RG] New list of common incorrect locale names
    * data/files/locale-codes:
      + [RG] Generated from unstable's iso-codes.
    
    * private/refresh-locale-codes:
      + [RG] New script to generate a list of locale codes.

diff --git a/checks/files b/checks/files
index e1f2071..3946c88 100644
--- a/checks/files
+++ b/checks/files
@@ -29,6 +29,8 @@ use Util;
 
 our $FONT_PACKAGES;
 our $TRIPLETS;
+our $LOCALE_CODES;
+our $INCORRECT_LOCALE_CODES;
 
 # A list of known packaged Javascript libraries
 # and the packages providing them
@@ -1037,6 +1039,34 @@ foreach my $file (sort keys %{$info->index}) {
 	if ($file =~ m,usr/share/doc/[^/]+/examples/examples/?$,) {
 	    tag "nested-examples-directory", "$file";
 	}
+	if ($file =~ m,^usr/share/locale/([^/]+)/$,) {
+	    # Without encoding:
+	    my ($lwccode) = split(/[.@]/, $1);
+	    # Without country code:
+	    my ($lcode) = split(/_/, $lwccode);
+
+	    # special exception:
+	    if ($lwccode ne 'l10n') {
+		$INCORRECT_LOCALE_CODES = Lintian::Data->new('files/incorrect-locale-codes', '\s+')
+		    unless defined($INCORRECT_LOCALE_CODES);
+		$LOCALE_CODES = Lintian::Data->new('files/locale-codes', '\s+')
+		    unless defined($LOCALE_CODES);
+
+		if ($INCORRECT_LOCALE_CODES->known($lwccode)) {
+		    tag 'incorrect-locale-code',
+			"$lwccode ->", $INCORRECT_LOCALE_CODES->value($lwccode);
+		} elsif ($INCORRECT_LOCALE_CODES->known($lcode)) {
+		    tag 'incorrect-locale-code',
+			"$lcode ->", $INCORRECT_LOCALE_CODES->value($lcode);
+		} elsif (!$LOCALE_CODES->known($lcode)) {
+		    tag 'unknown-locale-code', $lcode;
+		} elsif ($LOCALE_CODES->known($lcode) && defined($LOCALE_CODES->value($lcode))) {
+		    # If there's a key-value pair in the codes list it
+		    # means the ISO 639-2 code is being used instead of ISO 639-1's
+		    tag 'incorrect-locale-code', "$lcode ->", $LOCALE_CODES->value($lcode);
+		}
+	    }
+	}
     }
     # ---------------- symbolic links
     elsif ($index_info->{type} =~ m/^l/) {
diff --git a/checks/files.desc b/checks/files.desc
index fefa174..72b2a9a 100644
--- a/checks/files.desc
+++ b/checks/files.desc
@@ -1152,3 +1152,30 @@ Certainty: possible
 Info: The given file is literally installed as <tt>*</tt> (star
  symbol).  Normally this indicates a mistake in the installation
  process of the package either when creating symlinks or renaming files.
+
+Tag: incorrect-locale-code
+Severity: normal
+Certainty: possible
+Info: The package appears to ship locales for a language but uses an
+ incorrect locale code as a subdirectory of <tt>/usr/share/locale</tt>.
+ This usually results in users of the intended target language not
+ finding the locale.  The language codes used in the locale directories
+ are those from the ISO 639-1 and ISO 639-2 standards, not those
+ usually used as TLDs (which are from the ISO 3166 standard).
+ .
+ Lintian only knows about some commonly-mistaken set of incorrect
+ locale codes.
+
+Tag: unknown-locale-code
+Severity: normal
+Certainty: certain
+Ref: http://www.loc.gov/standards/iso639-2/php/code_list.php
+Info: The package appears to ship locales for a language but uses an
+ unknown locale code as a subdirectory of <tt>/usr/share/locale</tt>.
+ This usually results in users of the intended target language not
+ finding the locale.  The language codes used in the locale directories
+ are those from the ISO 639-1 and ISO 639-2 standards, not those
+ usually used as TLDs (which are from the ISO 3166 standard).
+ .
+ It is possible that the language code was mistyped or incorrectly
+ guessed from the language's or country's name.
diff --git a/data/files/incorrect-locale-codes b/data/files/incorrect-locale-codes
new file mode 100644
index 0000000..e69a93f
--- /dev/null
+++ b/data/files/incorrect-locale-codes
@@ -0,0 +1,16 @@
+# List of known common incorrect locale codes.
+
+# Albanian is sq, not al:
+al sq
+# Chinese is zh, not cn:
+cn zh
+# Czech is cs, not cz:
+cz cs
+# Danish is da, not dk:
+da dk
+# UK != GB
+en_UK en_GB
+# Greek is el, not gr:
+gr el
+# Indonesian is id, not in:
+in id
diff --git a/data/files/locale-codes b/data/files/locale-codes
new file mode 100644
index 0000000..cccfd01
--- /dev/null
+++ b/data/files/locale-codes
@@ -0,0 +1,677 @@
+# List of locale codes.  This is derived from the ISO 639-1 and ISO
+# 639-2 standards.
+# If a language has 639-1 and 639-2 codes, the -2 code is also included
+# as a key to be mapped to the -1 code.
+#
+# Last updated: 2010-03-22
+
+aa
+aar aa
+ab
+abk ab
+ace
+ach
+ada
+ady
+ae
+af
+afa
+afh
+afr af
+ain
+ak
+aka ak
+akk
+ale
+alg
+alt
+am
+amh am
+an
+ang
+anp
+apa
+ar
+ara ar
+arc
+arg an
+arn
+arp
+art
+arw
+as
+asm as
+ast
+ath
+aus
+av
+ava av
+ave ae
+awa
+ay
+aym ay
+az
+aze az
+ba
+bad
+bai
+bak ba
+bal
+bam bm
+ban
+bas
+bat
+be
+bej
+bel be
+bem
+ben bn
+ber
+bg
+bh
+bho
+bi
+bih bh
+bik
+bin
+bis bi
+bla
+bm
+bn
+bnt
+bo
+bod bo
+bos bs
+br
+bra
+bre br
+bs
+btk
+bua
+bug
+bul bg
+byn
+ca
+cad
+cai
+car
+cat ca
+cau
+ce
+ceb
+cel
+ces cs
+ch
+cha ch
+chb
+che ce
+chg
+chk
+chm
+chn
+cho
+chp
+chr
+chu cu
+chv cv
+chy
+cmc
+co
+cop
+cor kw
+cos co
+cpe
+cpf
+cpp
+cr
+cre cr
+crh
+crp
+cs
+csb
+cu
+cus
+cv
+cy
+cym cy
+da
+dak
+dan da
+dar
+day
+de
+del
+den
+deu de
+dgr
+din
+div dv
+doi
+dra
+dsb
+dua
+dum
+dv
+dyu
+dz
+dzo dz
+ee
+efi
+egy
+eka
+el
+ell el
+elx
+en
+eng en
+enm
+eo
+epo eo
+es
+est et
+et
+eu
+eus eu
+ewe ee
+ewo
+fa
+fan
+fao fo
+fas fa
+fat
+ff
+fi
+fij fj
+fil
+fin fi
+fiu
+fj
+fo
+fon
+fr
+fra fr
+frm
+fro
+frr
+frs
+fry fy
+ful ff
+fur
+fy
+ga
+gaa ga
+gay
+gba
+gd
+gem
+gez
+gil
+gl
+gla gd
+gle ga
+glg gl
+glv gv
+gmh
+gn
+goh
+gon
+gor
+got
+grb
+grc
+grn gn
+gsw
+gu
+guj gu
+gv
+gwi
+ha
+hai
+hat ht
+hau ha
+haw
+he
+heb he
+her hz
+hi
+hil
+him
+hin hi
+hit
+hmn
+hmo ho
+ho
+hr
+hrv hr
+hsb
+ht
+hu
+hun hu
+hup
+hy
+hye hy
+hz
+ia
+iba
+ibo ig
+id
+ido io
+ie
+ig
+ii
+iii ii
+ijo
+ik
+iku iu
+ile ie
+ilo
+ina ia
+inc
+ind id
+ine
+inh
+io
+ipk ik
+ira
+iro
+is
+isl is
+it
+ita it
+iu
+ja
+jav jv
+jbo
+jpn ja
+jpr
+jrb
+jv
+ka
+kaa
+kab
+kac
+kal kl
+kam
+kan kn
+kar
+kas ks
+kat ka
+kau kr
+kaw
+kaz kk
+kbd
+kg
+kha
+khi
+khm km
+kho
+ki
+kik ki
+kin rw
+kir ky
+kj
+kk
+kl
+km
+kmb
+kn
+ko
+kok
+kom kv
+kon kg
+kor ko
+kos
+kpe
+kr
+krc
+krl
+kro
+kru
+ks
+ku
+kua kj
+kum
+kur ku
+kut
+kv
+kw
+ky
+la
+lad
+lah
+lam
+lao lo
+lat la
+lav lv
+lb
+lez
+lg
+li
+lim li
+lin ln
+lit lt
+ln
+lo
+lol
+loz
+lt
+ltz lb
+lu
+lua
+lub lu
+lug lg
+lui
+lun
+luo
+lus
+lv
+mad
+mag
+mah mh
+mai
+mak
+mal ml
+man
+map
+mar mr
+mas
+mdf
+mdr
+men
+mg
+mga
+mh
+mi
+mic
+min
+mis
+mk
+mkd mk
+mkh
+ml
+mlg mg
+mlt mt
+mn
+mnc
+mni
+mno
+mo
+moh
+mol mo
+mon mn
+mos
+mr
+mri mi
+ms
+msa ms
+mt
+mul
+mun
+mus
+mwl
+mwr
+my
+mya my
+myn
+myv
+na
+nah
+nai
+nap
+nau na
+nav nv
+nb
+nbl nr
+nd
+nde nd
+ndo ng
+nds
+ne
+nep ne
+new
+ng
+nia
+nic
+niu
+nl
+nld nl
+nn
+nno nn
+no
+nob nb
+nog
+non
+nor no
+nqo
+nr
+nso
+nub
+nv
+nwc
+ny
+nya ny
+nym
+nyn
+nyo
+nzi
+oc
+oci oc
+oj
+oji oj
+om
+or
+ori or
+orm om
+os
+osa
+oss os
+ota
+oto
+pa
+paa
+pag
+pal
+pam
+pan pa
+pap
+pau
+peo
+phi
+phn
+pi
+pl
+pli pi
+pol pl
+pon
+por pt
+pra
+pro
+ps
+pt
+pus ps
+qu
+que qu
+raj
+rap
+rar
+rm
+rn
+ro
+roa
+roh rm
+rom
+ron ro
+ru
+run rn
+rup
+rus ru
+rw
+sa
+sad
+sag sg
+sah
+sai
+sal
+sam
+san sa
+sas
+sat
+sc
+scn
+sco
+sd
+se
+sel
+sem
+sg
+sga
+sgn
+shn
+si
+sid
+sin si
+sio
+sit
+sk
+sl
+sla
+slk sk
+slv sl
+sm
+sma
+sme se
+smi
+smj
+smn
+smo sm
+sms
+sn
+sna sn
+snd sd
+snk
+so
+sog
+som so
+son
+sot st
+spa es
+sq
+sqi sq
+sr
+srd sc
+srn
+srp sr
+srr
+ss
+ssa
+ssw ss
+st
+su
+suk
+sun su
+sus
+sux
+sv
+sw
+swa sw
+swe sv
+syc
+syr
+ta
+tah ty
+tai
+tam ta
+tat tt
+te
+tel te
+tem
+ter
+tet
+tg
+tgk tg
+tgl tl
+th
+tha th
+ti
+tig
+tir ti
+tiv
+tk
+tkl
+tl
+tlh
+tli
+tmh
+tn
+to
+tog
+ton to
+tpi
+tr
+ts
+tsi
+tsn tn
+tso ts
+tt
+tuk tk
+tum
+tup
+tur tr
+tut
+tvl
+tw
+twi tw
+ty
+tyv
+udm
+ug
+uga
+uig ug
+uk
+ukr uk
+umb
+und
+ur
+urd ur
+uz
+uzb uz
+vai
+ve
+ven ve
+vi
+vie vi
+vo
+vol vo
+vot
+wa
+wak
+wal
+war
+was
+wen
+wln wa
+wo
+wol wo
+xal
+xh
+xho xh
+yao
+yap
+yi
+yid yi
+yo
+yor yo
+ypk
+za
+zap
+zbl
+zen
+zh
+zha za
+zho zh
+znd
+zu
+zul zu
+zun
+zxx no
+zza
diff --git a/debian/changelog b/debian/changelog
index d04e9c1..aeb3426 100755
--- a/debian/changelog
+++ b/debian/changelog
@@ -5,11 +5,13 @@ lintian (2.3.4) UNRELEASED; urgency=low
       - control-file-with-CRLF-EOLs
       - debian-rules-makemaker-prefix-is-deprecated
       - empty-binary-package
+      - incorrect-locale-code
       - missing-debian-source-format
       - possible-new-upstream-release-without-new-version
       - star-file
       - unknown-file-in-debian-source
       - unknown-source-format
+      - unknown-locale-code
 
   * checks/binaries:
     + [RA] Exclude packages from the klibc source package from the
@@ -58,6 +60,8 @@ lintian (2.3.4) UNRELEASED; urgency=low
       Falavigna.  (Closes: #569220)
     + [RG] New check for packages shipping files literally called '*'
       (star symbol).  Thanks, Niels Thykier.  (Closes: #574771)
+    + [RG] New checks for incorrect and unknown locale codes in
+      /usr/share/locale subdirectories.
   * checks/init.d:
     + [RA] Exclude symlinks to upstart-job from init script syntax checks.
       Based on a patch by Jos Boumans.  (Closes: #569492)
@@ -98,6 +102,10 @@ lintian (2.3.4) UNRELEASED; urgency=low
     + [RG] Refreshed against dpkg 1.15.6.
   * data/files/fonts:
     + [RG] Refresh against unstable.
+  * data/files/incorrect-locale-codes:
+    + [RG] New list of common incorrect locale names
+  * data/files/locale-codes:
+    + [RG] Generated from unstable's iso-codes.
   * data/files/triplets:
     + [RG] Refreshed against dpkg 1.15.6.
 
@@ -127,6 +135,9 @@ lintian (2.3.4) UNRELEASED; urgency=low
     + [CW] Fix typo.
     + [RA] Document new debian-source-dir check script.
 
+  * private/refresh-locale-codes:
+    + [RG] New script to generate a list of locale codes.
+
   * reporting/templates/foot.tmpl:
     + [ADB] Replace the "please mail comments to the maintainer" request
       with a pointer to reportbug.  (Closes: #572298)
diff --git a/private/refresh-locale-codes b/private/refresh-locale-codes
new file mode 100755
index 0000000..076a88a
--- /dev/null
+++ b/private/refresh-locale-codes
@@ -0,0 +1,72 @@
+#!/bin/sh
+# refresh-locale-codes -- Refresh the locale (aka ISO 639-1/639-2 codes)
+
+# Copyright (C) 2010 Raphael Geissert <atomo64@gmail.com>
+#
+# This file is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+#
+# This file is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this file.  If not, see <http://www.gnu.org/licenses/>.
+
+set -e
+
+if [ -z "$1" ]; then
+    printf "Usage: %s <path-to-data>\n" "$(basename "$0")"
+    cat <<INFO
+<path-to-data> must be the path to the root of the Lintian data
+directory to update.
+
+This script requires the isoquery package to be installed.
+INFO
+    exit
+fi
+
+readonly lintian_data="$(readlink -f "$1")"
+
+[ -d "$lintian_data" ] || {
+    printf "%s is not a directory, aborting" "$lintian_data" >&2
+    exit 1
+}
+
+readonly tmpfile="$(mktemp)"
+
+cleanup () {
+    [ ! -f "$tmpfile" ] || unlink "$tmpfile"
+}; trap cleanup EXIT
+
+mkdir -p "$lintian_data/files"
+
+cat > "$tmpfile" <<EOF
+# List of locale codes.  This is derived from the ISO 639-1 and ISO
+# 639-2 standards.
+# If a language has 639-1 and 639-2 codes, the -2 code is also included
+# as a key to be mapped to the -1 code.
+#
+# Last updated: $(date -u +'%Y-%m-%d')
+
+EOF
+
+export LANG=C
+
+isoquery -i 639 |
+    perl -w -n -e 'next unless m/^\w{3}\s+(\w{3})\s+(?:(\w{2})\s+)?/;
+		    ($iso1, $iso2) = ($2, $1);
+		    if (!defined($iso1)) {
+			$iso1 = $iso2;
+			$iso2 = undef;
+		    }
+		    print "\L$iso1\n";
+		    if (defined $iso2) {
+			print "\L$iso2 $iso1\n";
+		    }' |
+    sort -u >> "$tmpfile"
+
+mv "$tmpfile" "$lintian_data/files/locale-codes"
diff --git a/t/tests/files-locales/debian/debian/install b/t/tests/files-locales/debian/debian/install
new file mode 100644
index 0000000..00baa2f
--- /dev/null
+++ b/t/tests/files-locales/debian/debian/install
@@ -0,0 +1,6 @@
+dummy usr/share/locale/en_US/
+dummy usr/share/locale/en_UK/
+dummy usr/share/locale/ind/
+dummy usr/share/locale/wa/
+dummy usr/share/locale/zz/
+dummy usr/share/locale/cz_CZ/
diff --git a/t/tests/runtests-arch-amd64/debian/dummy b/t/tests/files-locales/debian/dummy
similarity index 100%
copy from t/tests/runtests-arch-amd64/debian/dummy
copy to t/tests/files-locales/debian/dummy
diff --git a/t/tests/files-locales/desc b/t/tests/files-locales/desc
new file mode 100644
index 0000000..8fafb96
--- /dev/null
+++ b/t/tests/files-locales/desc
@@ -0,0 +1,7 @@
+Testname: files-locales
+Sequence: 6000
+Version: 1.0
+Description: Test checks about incorrect locale codes
+Test-For:
+ incorrect-locale-code
+ unknown-locale-code
diff --git a/t/tests/files-locales/tags b/t/tests/files-locales/tags
new file mode 100644
index 0000000..f6ccb37
--- /dev/null
+++ b/t/tests/files-locales/tags
@@ -0,0 +1,4 @@
+W: files-locales: incorrect-locale-code cz -> cs
+W: files-locales: incorrect-locale-code en_UK -> en_GB
+W: files-locales: incorrect-locale-code ind -> id
+W: files-locales: unknown-locale-code zz

-- 
Debian package checker


Reply to: