[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

[SCM] Debian package checker branch, master, updated. 2.5.11-96-ga58477a



The following commit has been merged in the master branch:
commit a58477a3810993f817e9035223bc9d3529893de4
Author: Niels Thykier <niels@thykier.net>
Date:   Fri Jan 25 13:45:12 2013 +0100

    refresh-locale-codes: Rewrite and regen the data file
    
    Rewrite private/refresh-locale-codes to have it filter out "special"
    locale codes like "qaa".
    
    Signed-off-by: Niels Thykier <niels@thykier.net>

diff --git a/data/files/locale-codes b/data/files/locale-codes
index 567af7b..aad0744 100644
--- a/data/files/locale-codes
+++ b/data/files/locale-codes
@@ -3,7 +3,7 @@
 # If a language has 639-1 and 639-2 codes, the -2 code is also included
 # as a key to be mapped to the -1 code.
 #
-# Last updated: 2013-01-06
+# Last updated: 2013-01-25
 
 aa
 aaa
@@ -4290,7 +4290,6 @@ mio
 mip
 miq
 mir
-mis
 mit
 miu
 miv
@@ -4599,7 +4598,6 @@ muh
 mui
 muj
 muk
-mul
 mum
 mun
 muo
@@ -5807,526 +5805,6 @@ pyu
 pyx
 pyy
 pzn
-qaa
-qab
-qac
-qad
-qae
-qaf
-qag
-qah
-qai
-qaj
-qak
-qal
-qam
-qan
-qao
-qap
-qaq
-qar
-qas
-qat
-qau
-qav
-qaw
-qax
-qay
-qaz
-qba
-qbb
-qbc
-qbd
-qbe
-qbf
-qbg
-qbh
-qbi
-qbj
-qbk
-qbl
-qbm
-qbn
-qbo
-qbp
-qbq
-qbr
-qbs
-qbt
-qbu
-qbv
-qbw
-qbx
-qby
-qbz
-qca
-qcb
-qcc
-qcd
-qce
-qcf
-qcg
-qch
-qci
-qcj
-qck
-qcl
-qcm
-qcn
-qco
-qcp
-qcq
-qcr
-qcs
-qct
-qcu
-qcv
-qcw
-qcx
-qcy
-qcz
-qda
-qdb
-qdc
-qdd
-qde
-qdf
-qdg
-qdh
-qdi
-qdj
-qdk
-qdl
-qdm
-qdn
-qdo
-qdp
-qdq
-qdr
-qds
-qdt
-qdu
-qdv
-qdw
-qdx
-qdy
-qdz
-qea
-qeb
-qec
-qed
-qee
-qef
-qeg
-qeh
-qei
-qej
-qek
-qel
-qem
-qen
-qeo
-qep
-qeq
-qer
-qes
-qet
-qeu
-qev
-qew
-qex
-qey
-qez
-qfa
-qfb
-qfc
-qfd
-qfe
-qff
-qfg
-qfh
-qfi
-qfj
-qfk
-qfl
-qfm
-qfn
-qfo
-qfp
-qfq
-qfr
-qfs
-qft
-qfu
-qfv
-qfw
-qfx
-qfy
-qfz
-qga
-qgb
-qgc
-qgd
-qge
-qgf
-qgg
-qgh
-qgi
-qgj
-qgk
-qgl
-qgm
-qgn
-qgo
-qgp
-qgq
-qgr
-qgs
-qgt
-qgu
-qgv
-qgw
-qgx
-qgy
-qgz
-qha
-qhb
-qhc
-qhd
-qhe
-qhf
-qhg
-qhh
-qhi
-qhj
-qhk
-qhl
-qhm
-qhn
-qho
-qhp
-qhq
-qhr
-qhs
-qht
-qhu
-qhv
-qhw
-qhx
-qhy
-qhz
-qia
-qib
-qic
-qid
-qie
-qif
-qig
-qih
-qii
-qij
-qik
-qil
-qim
-qin
-qio
-qip
-qiq
-qir
-qis
-qit
-qiu
-qiv
-qiw
-qix
-qiy
-qiz
-qja
-qjb
-qjc
-qjd
-qje
-qjf
-qjg
-qjh
-qji
-qjj
-qjk
-qjl
-qjm
-qjn
-qjo
-qjp
-qjq
-qjr
-qjs
-qjt
-qju
-qjv
-qjw
-qjx
-qjy
-qjz
-qka
-qkb
-qkc
-qkd
-qke
-qkf
-qkg
-qkh
-qki
-qkj
-qkk
-qkl
-qkm
-qkn
-qko
-qkp
-qkq
-qkr
-qks
-qkt
-qku
-qkv
-qkw
-qkx
-qky
-qkz
-qla
-qlb
-qlc
-qld
-qle
-qlf
-qlg
-qlh
-qli
-qlj
-qlk
-qll
-qlm
-qln
-qlo
-qlp
-qlq
-qlr
-qls
-qlt
-qlu
-qlv
-qlw
-qlx
-qly
-qlz
-qma
-qmb
-qmc
-qmd
-qme
-qmf
-qmg
-qmh
-qmi
-qmj
-qmk
-qml
-qmm
-qmn
-qmo
-qmp
-qmq
-qmr
-qms
-qmt
-qmu
-qmv
-qmw
-qmx
-qmy
-qmz
-qna
-qnb
-qnc
-qnd
-qne
-qnf
-qng
-qnh
-qni
-qnj
-qnk
-qnl
-qnm
-qnn
-qno
-qnp
-qnq
-qnr
-qns
-qnt
-qnu
-qnv
-qnw
-qnx
-qny
-qnz
-qoa
-qob
-qoc
-qod
-qoe
-qof
-qog
-qoh
-qoi
-qoj
-qok
-qol
-qom
-qon
-qoo
-qop
-qoq
-qor
-qos
-qot
-qou
-qov
-qow
-qox
-qoy
-qoz
-qpa
-qpb
-qpc
-qpd
-qpe
-qpf
-qpg
-qph
-qpi
-qpj
-qpk
-qpl
-qpm
-qpn
-qpo
-qpp
-qpq
-qpr
-qps
-qpt
-qpu
-qpv
-qpw
-qpx
-qpy
-qpz
-qqa
-qqb
-qqc
-qqd
-qqe
-qqf
-qqg
-qqh
-qqi
-qqj
-qqk
-qql
-qqm
-qqn
-qqo
-qqp
-qqq
-qqr
-qqs
-qqt
-qqu
-qqv
-qqw
-qqx
-qqy
-qqz
-qra
-qrb
-qrc
-qrd
-qre
-qrf
-qrg
-qrh
-qri
-qrj
-qrk
-qrl
-qrm
-qrn
-qro
-qrp
-qrq
-qrr
-qrs
-qrt
-qru
-qrv
-qrw
-qrx
-qry
-qrz
-qsa
-qsb
-qsc
-qsd
-qse
-qsf
-qsg
-qsh
-qsi
-qsj
-qsk
-qsl
-qsm
-qsn
-qso
-qsp
-qsq
-qsr
-qss
-qst
-qsu
-qsv
-qsw
-qsx
-qsy
-qsz
-qta
-qtb
-qtc
-qtd
-qte
-qtf
-qtg
-qth
-qti
-qtj
-qtk
-qtl
-qtm
-qtn
-qto
-qtp
-qtq
-qtr
-qts
-qtt
-qtu
-qtv
-qtw
-qtx
-qty
-qtz
 qu
 qua
 qub
@@ -7750,7 +7228,6 @@ umr
 ums
 umu
 una
-und
 une
 ung
 unk
diff --git a/private/refresh-locale-codes b/private/refresh-locale-codes
index 741d46d..d59affb 100755
--- a/private/refresh-locale-codes
+++ b/private/refresh-locale-codes
@@ -1,7 +1,9 @@
-#!/bin/sh
+#!/usr/bin/perl
 # refresh-locale-codes -- Refresh the locale (aka ISO 639-1/639-2 codes)
 
-# Copyright (C) 2010 Raphael Geissert <atomo64@gmail.com>
+# Copyright (C) 2013 Niels Thykier <niels@thykier.net>
+# Based on a shell script, which was:
+#   Copyright (C) 2010 Raphael Geissert <atomo64@gmail.com>
 #
 # This file is free software: you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
@@ -16,73 +18,132 @@
 # You should have received a copy of the GNU General Public License
 # along with this file.  If not, see <http://www.gnu.org/licenses/>.
 
-set -e
+use strict;
+use warnings;
 
-if [ -z "$1" ]; then
-    printf "Usage: %s <path-to-data>\n" "$(basename "$0")"
-    cat <<INFO
-<path-to-data> must be the path to the root of the Lintian data
-directory to update.
+use POSIX qw(strftime);
 
-This script requires the isoquery package to be installed.
-INFO
-    exit
-fi
+BEGIN {
+  # determine LINTIAN_ROOT
+    my $LINTIAN_ROOT = $ENV{'LINTIAN_ROOT'} || '.';
+    $ENV{'LINTIAN_ROOT'} = $LINTIAN_ROOT
+        unless exists $ENV{'LINTIAN_ROOT'};
+};
 
-readonly lintian_data="$(readlink -f "$1")"
+use lib "$ENV{'LINTIAN_ROOT'}/lib";
+use Lintian::Util qw(check_path);
 
-[ -d "$lintian_data" ] || {
-    printf "%s is not a directory, aborting" "$lintian_data" >&2
-    exit 1
-}
+my ($DATADIR) = @ARGV;
+my %CODES;
+my $outfile;
+
+die "Usage: $0 <path-to-data-dir>\n"
+    unless defined $DATADIR and -d $DATADIR;
+
+check_requirements();
 
-readonly tmpfile="$(mktemp)"
+my $date = strftime ('%Y-%m-%d', gmtime);
 
-cleanup () {
-    [ ! -f "$tmpfile" ] || unlink "$tmpfile"
-}; trap cleanup EXIT
+$ENV{LC_ALL} = 'C';
 
-mkdir -p "$lintian_data/files"
+parse_iso_query();
+parse_iso_xml();
 
-cat > "$tmpfile" <<EOF
+$outfile = "$DATADIR/files/locale-codes.new";
+
+open my $out, '>', $outfile or die "open $outfile: $!";
+
+print {$out} <<EOF ;
 # List of locale codes.  This is derived from the ISO 639-1, ISO
 # 639-2, and ISO 639-3 standards.
 # If a language has 639-1 and 639-2 codes, the -2 code is also included
 # as a key to be mapped to the -1 code.
 #
-# Last updated: $(date -u +'%Y-%m-%d')
+# Last updated: $date
 
 EOF
 
-export LC_ALL=C
-
-{
-    isoquery -i 639
-    sed -nr '/\bid=/{s/^.*id="([^"]+)".*$/xxx \1 x/;p}' \
-        /usr/share/xml/iso-codes/iso_639_3.xml
-} | perl -w -e ' my %codes;
-                 while (<>) {
-                    next unless m/^\w{3}\s+(\w{3})\s+(?:(\w{2})\s+)?/;
-                    ($iso1, $iso2) = ($2, $1);
-                    next if $iso2 eq "zxx";
-                    if (!defined($iso1)) {
-                        $iso1 = $iso2;
-                        $iso2 = undef;
-                    }
-                    $iso1 = lc $iso1;
-                    $codes{$iso1} = undef unless (exists $codes{$iso1});
-                    if (defined $iso2) {
-                        $codes{lc $iso2} = $iso1;
-                    }
-                }
-                while (my ($a, $b) = each %codes) {
-                    print $a.(defined($b)? " $b" : "")."\n";
-                } ' |
-    sort -u >> "$tmpfile"
-
-mv "$tmpfile" "$lintian_data/files/locale-codes"
-
-# Local Variables:
-# indent-tabs-mode: nil
-# End:
-# vim: syntax=sh sw=4 sts=4 sr et
+foreach my $code (sort keys %CODES) {
+    my $alt = $CODES{$code};
+    print {$out} $code;
+    print {$out} " $alt" if defined $alt;
+    print {$out} "\n";
+}
+
+close $out or die "close $outfile: $!";
+
+rename $outfile, "$DATADIR/files/locale-codes"
+    or die "rename $outfile -> $DATADIR/files/locale-codes: $!";
+
+exit 0;
+
+sub parse_iso_xml {
+    open my $fd, '<', '/usr/share/xml/iso-codes/iso_639_3.xml'
+        or die "open iso_639_3.xml: $!";
+    local $_;
+    local $/ = '/>';
+    while (<$fd>) {
+        my $special = 0;
+        # skip it if it is a "special" isotype (#692548, comment #10).  However
+        # sometimes we "collect" these from iso-query.  If so, we have to
+        # prune them from %CODES.
+        $special = 1 if m/\<iso_639_3_entry [^\>]* \btype=[\'\"]S?[\'\"]/x;
+        # Extract the id of the entry.  We match the start of the tag
+        # again to ensure we catch the id inside the tag.  (Our input
+        # separator causes us to consume a lot of leading "stuff"
+        # prior to the first entry being closed).
+        next unless m/\<iso_639_3_entry [^\>]* \bid=[\'\"]([^\'\"]+)[\'\"]/x;
+        my $id = lc $1;
+
+        if ($special) {
+            delete $CODES{$id};
+        } else {
+            $CODES{$id} = undef unless exists $CODES{$id};
+        }
+    }
+
+    close $fd;
+
+}
+
+sub translator {
+    return unless m/\bid=/;
+    # Skip "special" types (#692548, comment #10)
+    return if m/\btype=[\"\']S?[\"\']/;
+    s/^.*id="([^"]+)".*$/xxx $1 x/;
+}
+
+sub parse_iso_query {
+    open my $fd, '-|', 'isoquery', '-i', '639'
+        or die "fork/exec isoquery: $!";
+    local $_;
+    while (<$fd>) {
+        next unless m/^\w{3}\s+(\w{3})\s+(?:(\w{2})\s+)?/;
+        my ($iso1, $iso2) = ($2, $1);
+        next if $iso2 eq "zxx";
+        if (!defined $iso1) {
+            $iso1 = $iso2;
+            $iso2 = undef;
+        }
+        $iso1 = lc $iso1;
+        $CODES{$iso1} = undef unless exists $CODES{$iso1};
+        if (defined $iso2) {
+            $CODES{lc $iso2} = $iso1;
+        }
+    }
+    close $fd;
+}
+
+sub check_requirements {
+    my @missing;
+    push @missing, 'isoquery in PATH'
+        unless check_path ('isoquery');
+    push @missing, 'The file /usr/share/xml/iso-codes/iso_639_3.xml'
+        unless -f '/usr/share/xml/iso-codes/iso_639_3.xml';
+
+    return unless @missing;
+
+    print STDERR "Missing requirements:\n";
+    print STDERR "\t", join ("\n\t", @missing), "\n";
+    exit 1;
+}

-- 
Debian package checker


Reply to: