Bug#915547: ibus: IBus only knows about major languages (i.e. those with iso639-2 codes)
Source: ibus
Severity: normal
Tags: patch
Dear Team,
IBus parses the iso-codes iso_639-2.xml file to get the name of languages that IBus engines
support. That has under 500 languages.
The iso639-3.xml file has codes and names for the known languages at its time of publication.
Keyman (www.keyman.com) already has support for over 1000 languages, many of which are only named
in iso639-3. At the moment they are all grouped under "Other".
Other engines such as m17n may support some of these languages too.
I'm attaching a patch to use iso639-3 instead of iso639-2
I've made a PR for it upstream at https://github.com/ibus/ibus/pull/2061
Regards,
Daniel
-- System Information:
Debian Release: buster/sid
APT prefers testing
APT policy: (500, 'testing')
Architecture: amd64 (x86_64)
Kernel: Linux 4.18.0-2-amd64 (SMP w/2 CPU cores)
Locale: LANG=en_GB.UTF-8, LC_CTYPE=en_GB.UTF-8 (charmap=UTF-8), LANGUAGE=en_GB:en (charmap=UTF-8)
Shell: /bin/sh linked to /bin/dash
Init: systemd (via /run/systemd/system)
LSM: AppArmor: enabled
--- a/ibus/lang.py
+++ b/ibus/lang.py
@@ -36,7 +36,7 @@
lang = lang.lower()
if lang in __languages_dict:
lang = __languages_dict[lang]
- lang = gettext.dgettext("iso_639", lang)
+ lang = gettext.dgettext("iso_639-3", lang)
else:
lang = _(u"Other")
lang = gettext.dgettext("ibus", lang)
@@ -46,7 +46,7 @@
global __languages_dict
try:
name = attrs[u"name"]
- for attr_name in (u"iso_639_2B_code", u"iso_639_2T_code", u"iso_639_1_code"):
+ for attr_name in (u"id", u"part1_code", u"part2_code"):
if attr_name in attrs:
attr_value = attrs[attr_name]
__languages_dict[attr_value] = name
@@ -62,12 +62,12 @@
def __load_lang():
import os
import _config
- iso_639_xml = os.path.join(_config.ISOCODES_PREFIX, "share/xml/iso-codes/iso_639.xml")
+ iso_639_3_xml = os.path.join(_config.ISOCODES_PREFIX, "share/xml/iso-codes/iso_639-3.xml")
p = xml.parsers.expat.ParserCreate()
p.StartElementHandler = __start_element
p.EndElementHandler = __end_element
p.CharacterDataHandler = __char_data
- p.ParseFile(file(iso_639_xml))
+ p.ParseFile(file(iso_639_3_xml))
__load_lang()
--- a/engine/iso639converter.py
+++ b/engine/iso639converter.py
@@ -43,7 +43,7 @@
else:
# io.StringIO does not work with XMLGenerator
from cStringIO import StringIO
- # iso_639.xml includes UTF-8
+ # iso_639-3.xml includes UTF-8
reload(sys)
sys.setdefaultencoding('utf-8')
@@ -63,27 +63,27 @@
class ISO639XML(XMLFilterBase):
def __init__(self, parser=None):
- self.__code2to1 = {}
+ self.__code2to3 = {}
self.__codetoname = {}
XMLFilterBase.__init__(self, parser)
def startElement(self, name, attrs):
- if name != 'iso_639_entry':
+ if name != 'iso_639_3_entry':
return
n = attrs.get('name')
- iso639_1 = attrs.get('iso_639_1_code')
- iso639_2b = attrs.get('iso_639_2B_code')
- iso639_2t = attrs.get('iso_639_2T_code')
- if iso639_1 != None:
- self.__codetoname[iso639_1] = n
+ iso639_3 = attrs.get('id')
+ iso639_2b = attrs.get('part1_code')
+ iso639_2t = attrs.get('part2_code')
+ if iso639_3 != None:
+ self.__codetoname[iso639_3] = n
if iso639_2b != None:
- self.__code2to1[iso639_2b] = iso639_1
+ self.__code2to3[iso639_2b] = iso639_3
self.__codetoname[iso639_2b] = n
- if iso639_2t != None and iso639_2b != iso639_2t:
- self.__code2to1[iso639_2t] = iso639_1
+ if iso639_2t != None:
+ self.__code2to3[iso639_2t] = iso639_3
self.__codetoname[iso639_2t] = n
- def code2to1(self, iso639_2):
+ def code2to3(self, iso639_2):
try:
- return self.__code2to1[iso639_2]
+ return self.__code2to3[iso639_2]
except KeyError:
return None
@@ -113,9 +113,9 @@
def characters(self, text):
if self.__is_language:
if self.__iso639:
- iso639_1 = self.__iso639.code2to1(text)
- if iso639_1 != None:
- text = iso639_1
+ iso639_3 = self.__iso639.code2to3(text)
+ if iso639_3 != None:
+ text = iso639_3
if self.__downstream:
self.__downstream.characters(text)
@@ -192,6 +192,6 @@
elif opt in ('-o', '--output'):
output = arg
- iso639 = parse_iso639('/usr/share/xml/iso-codes/iso_639.xml')
+ iso639 = parse_iso639('/usr/share/xml/iso-codes/iso_639-3.xml')
xml = ConvertEngineXML(input, iso639)
xml.write(output)
--- a/src/ibusutil.c
+++ b/src/ibusutil.c
@@ -45,7 +45,7 @@
GList *p;
g_assert (node);
- if (G_UNLIKELY (g_strcmp0 (node->name, "iso_639_entries") != 0)) {
+ if (G_UNLIKELY (g_strcmp0 (node->name, "iso_639_3_entries") != 0)) {
return FALSE;
}
@@ -57,9 +57,9 @@
const gchar *key;
gchar *value;
} entries[] = {
- { "iso_639_2B_code", NULL },
- { "iso_639_2T_code", NULL },
- { "iso_639_1_code", NULL },
+ { "id", NULL },
+ { "part1_code", NULL },
+ { "part2_code", NULL },
};
if (sub_node->attributes == NULL) {
@@ -99,14 +99,14 @@
struct stat buf;
#ifdef ENABLE_NLS
- bindtextdomain ("iso_639", GLIB_LOCALE_DIR);
- bind_textdomain_codeset ("iso_639", "UTF-8");
+ bindtextdomain ("iso_639-3", GLIB_LOCALE_DIR);
+ bind_textdomain_codeset ("iso_639-3", "UTF-8");
#endif
__languages_dict = g_hash_table_new_full (g_str_hash,
g_str_equal, g_free, g_free);
filename = g_build_filename (ISOCODES_PREFIX,
- "share/xml/iso-codes/iso_639.xml",
+ "share/xml/iso-codes/iso_639-3.xml",
NULL);
if (g_stat (filename, &buf) != 0) {
g_warning ("Can not get stat of file %s", filename);
@@ -157,7 +157,7 @@
if (g_strcmp0 (retval, "Other") == 0)
return dgettext (GETTEXT_PACKAGE, N_("Other"));
else
- return dgettext ("iso_639", retval);
+ return dgettext ("iso_639-3", retval);
#else
return retval;
#endif
Reply to: