[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#915547: ibus: IBus only knows about major languages (i.e. those with iso639-2 codes)



Source: ibus
Severity: normal
Tags: patch

Dear Team,

IBus parses the iso-codes iso_639-2.xml file to get the name of languages that IBus engines
support. That has under 500 languages.

The iso639-3.xml file has codes and names for the known languages at its time of publication.

Keyman (www.keyman.com) already has support for over 1000 languages, many of which are only named
in iso639-3. At the moment they are all grouped under "Other".

Other engines such as m17n may support some of these languages too.

I'm attaching a patch to use iso639-3 instead of iso639-2
I've made a PR for it upstream at https://github.com/ibus/ibus/pull/2061

Regards,
Daniel

-- System Information:
Debian Release: buster/sid
  APT prefers testing
  APT policy: (500, 'testing')
Architecture: amd64 (x86_64)

Kernel: Linux 4.18.0-2-amd64 (SMP w/2 CPU cores)
Locale: LANG=en_GB.UTF-8, LC_CTYPE=en_GB.UTF-8 (charmap=UTF-8), LANGUAGE=en_GB:en (charmap=UTF-8)
Shell: /bin/sh linked to /bin/dash
Init: systemd (via /run/systemd/system)
LSM: AppArmor: enabled
--- a/ibus/lang.py
+++ b/ibus/lang.py
@@ -36,7 +36,7 @@
     lang = lang.lower()
     if lang in __languages_dict:
         lang = __languages_dict[lang]
-        lang = gettext.dgettext("iso_639", lang)
+        lang = gettext.dgettext("iso_639-3", lang)
     else:
         lang = _(u"Other")
         lang = gettext.dgettext("ibus", lang)
@@ -46,7 +46,7 @@
     global __languages_dict
     try:
         name = attrs[u"name"]
-        for attr_name in (u"iso_639_2B_code", u"iso_639_2T_code", u"iso_639_1_code"):
+        for attr_name in (u"id", u"part1_code", u"part2_code"):
             if attr_name in attrs:
                 attr_value = attrs[attr_name]
                 __languages_dict[attr_value] = name
@@ -62,12 +62,12 @@
 def __load_lang():
     import os
     import _config
-    iso_639_xml = os.path.join(_config.ISOCODES_PREFIX, "share/xml/iso-codes/iso_639.xml")
+    iso_639_3_xml = os.path.join(_config.ISOCODES_PREFIX, "share/xml/iso-codes/iso_639-3.xml")
     p = xml.parsers.expat.ParserCreate()
     p.StartElementHandler = __start_element
     p.EndElementHandler = __end_element
     p.CharacterDataHandler = __char_data
-    p.ParseFile(file(iso_639_xml))
+    p.ParseFile(file(iso_639_3_xml))
 
 __load_lang()
 
--- a/engine/iso639converter.py
+++ b/engine/iso639converter.py
@@ -43,7 +43,7 @@
 else:
     # io.StringIO does not work with XMLGenerator
     from cStringIO import StringIO
-    # iso_639.xml includes UTF-8
+    # iso_639-3.xml includes UTF-8
     reload(sys)
     sys.setdefaultencoding('utf-8')
 
@@ -63,27 +63,27 @@
 
 class ISO639XML(XMLFilterBase):
     def __init__(self, parser=None):
-        self.__code2to1 = {}
+        self.__code2to3 = {}
         self.__codetoname = {}
         XMLFilterBase.__init__(self, parser)
     def startElement(self, name, attrs):
-        if name != 'iso_639_entry':
+        if name != 'iso_639_3_entry':
             return
         n = attrs.get('name')
-        iso639_1 = attrs.get('iso_639_1_code')
-        iso639_2b = attrs.get('iso_639_2B_code')
-        iso639_2t = attrs.get('iso_639_2T_code')
-        if iso639_1 != None:
-            self.__codetoname[iso639_1] = n
+        iso639_3 = attrs.get('id')
+        iso639_2b = attrs.get('part1_code')
+        iso639_2t = attrs.get('part2_code')
+        if iso639_3 != None:
+            self.__codetoname[iso639_3] = n
             if iso639_2b != None:
-                self.__code2to1[iso639_2b] = iso639_1
+                self.__code2to3[iso639_2b] = iso639_3
                 self.__codetoname[iso639_2b] = n
-            if iso639_2t != None and iso639_2b != iso639_2t:
-                self.__code2to1[iso639_2t] = iso639_1
+            if iso639_2t != None:
+                self.__code2to3[iso639_2t] = iso639_3
                 self.__codetoname[iso639_2t] = n
-    def code2to1(self, iso639_2):
+    def code2to3(self, iso639_2):
         try:
-            return self.__code2to1[iso639_2]
+            return self.__code2to3[iso639_2]
         except KeyError:
             return None
 
@@ -113,9 +113,9 @@
     def characters(self, text):
         if self.__is_language:
             if self.__iso639:
-                iso639_1 = self.__iso639.code2to1(text)
-                if iso639_1 != None:
-                    text = iso639_1
+                iso639_3 = self.__iso639.code2to3(text)
+                if iso639_3 != None:
+                    text = iso639_3
         if self.__downstream:
             self.__downstream.characters(text)
 
@@ -192,6 +192,6 @@
         elif opt in ('-o', '--output'):
             output = arg
 
-    iso639 = parse_iso639('/usr/share/xml/iso-codes/iso_639.xml')
+    iso639 = parse_iso639('/usr/share/xml/iso-codes/iso_639-3.xml')
     xml = ConvertEngineXML(input, iso639)
     xml.write(output)
--- a/src/ibusutil.c
+++ b/src/ibusutil.c
@@ -45,7 +45,7 @@
     GList *p;
     g_assert (node);
 
-    if (G_UNLIKELY (g_strcmp0 (node->name, "iso_639_entries") != 0)) {
+    if (G_UNLIKELY (g_strcmp0 (node->name, "iso_639_3_entries") != 0)) {
         return FALSE;
     }
 
@@ -57,9 +57,9 @@
             const gchar *key;
             gchar *value;
         } entries[] = {
-            { "iso_639_2B_code", NULL },
-            { "iso_639_2T_code", NULL },
-            { "iso_639_1_code", NULL },
+            { "id", NULL },
+            { "part1_code", NULL },
+            { "part2_code", NULL },
         };
 
         if (sub_node->attributes == NULL) {
@@ -99,14 +99,14 @@
     struct stat buf;
 
 #ifdef ENABLE_NLS
-    bindtextdomain ("iso_639", GLIB_LOCALE_DIR);
-    bind_textdomain_codeset ("iso_639", "UTF-8");
+    bindtextdomain ("iso_639-3", GLIB_LOCALE_DIR);
+    bind_textdomain_codeset ("iso_639-3", "UTF-8");
 #endif
 
     __languages_dict = g_hash_table_new_full (g_str_hash,
             g_str_equal, g_free, g_free);
     filename = g_build_filename (ISOCODES_PREFIX,
-                                 "share/xml/iso-codes/iso_639.xml",
+                                 "share/xml/iso-codes/iso_639-3.xml",
                                  NULL);
     if (g_stat (filename, &buf) != 0) {
         g_warning ("Can not get stat of file %s", filename);
@@ -157,7 +157,7 @@
     if (g_strcmp0 (retval, "Other") == 0)
         return dgettext (GETTEXT_PACKAGE, N_("Other"));
     else
-        return dgettext ("iso_639", retval);
+        return dgettext ("iso_639-3", retval);
 #else
     return retval;
 #endif

Reply to: