[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

d-i 매뉴얼 번역 용어 검사하기



마감이 얼마 남지 않아서 제가 d-i 매뉴얼 번역을 업데이트했습니다.

그런데 전부터 느끼던 거지만 매뉴얼의 내용과 d-i의 메세지 번역이 맞지 않
은 경우가 발생하는 게 신경쓰이더군요.  매번 매뉴얼에 <guimenu>,
<guimenuitem> 태그가 나올 때마다 찾아볼 수도 없는 노릇이구요.  그래서 간
단히 검사하는 스크립트를 만들었습니다.  ko-po-check에 쓰던 PO 파싱 모듈
을 써먹을 기회가 되었습니다.

이 스크립트는 매뉴얼의 msgid에 <guimenu> 및 <guimenuitem> 태그가 나타날
때마다 d-i 메세지를 찾아보고 매뉴얼의 해당 msgstr에 번역이 그대로 되어
있지 않으면 파일:줄번호: 형식으로 애러 메세지를 출력합니다.  컴파일러 형
식과 똑같으니 이맥스를 사용하시면 C-x compile에서 사용하셔도 편리하구요.
첫번째 인자가 d-i 메세지 PO 파일, 나머지가 매뉴얼 PO 파일입니다.  매뉴얼
디렉토리에서는,

$ python dicheck.py ../../../packages/po/ko.po *.po


일단 현재 발견한 애러는 전부 고쳐 놓은 상태입니다.

Happy Translating!  :)

-- 
Changwoo Ryu <cwryu@debian.org>
# -*- coding: utf-8 -*-

import poparse
import re

dic = {}
dic_filename = ''

def build_dictionary(filename):
    p = poparse.parse_file(file(filename))
    for e in p.entries:
        dic[e.msgid] = (e.msgstr, e.msgstr_lineno)

    
def check_file(filename):
    p = poparse.parse_file(file(filename))
    for entry in p.entries:
        items = re.findall("<guimenu>([^<]+)</guimenu>", entry.msgid)
        items += re.findall("<guimenuitem>([^<]+)</guimenuitem>", entry.msgid)
        for en in items:
            try:
                (ko, line) = dic[en]
                if entry.msgstr.find("<guimenuitem>"+ko+"</guimenuitem>") < 0 and entry.msgstr.find("<guimenu>"+ko+"</guimenu>") < 0:
                    print "%s:%d: UI string \"%s\" was translated as \"%s\" in d-i" % (dic_filename, line, en, ko)
                    if len(items) == 1:
                        s = re.compile("<guimenu>([^<]+)</guimenu>").search(entry.msgstr)
                        if not s:
                            s = re.compile("<guimenuitem>([^<]+)</guimenuitem>").search(entry.msgstr)
                        if s:
                            print "%s:%d: but \"%s\" in the manual" % (filename, entry.msgstr_lineno, s.group(1))
                        else:
                            print "%s:%d: but nothing in the manual?" % (filename, entry.msgstr_lineno, s.group(1))
                    else:
                        print "%s:%d: but not in the manual" % (filename, entry.msgstr_lineno)
            except KeyError:
                pass

import sys
if len(sys.argv) < 3:
    print "Usage: python ditrcheck.py <d-i po file> <manual po files>...\n"
    sys.exit(1)

build_dictionary(sys.argv[1])
dic_filename = sys.argv[1]

for f in sys.argv[2:]:
    check_file(f)
# -*- coding: utf-8 -*-
import string

FUZZY,OBSOLETE,C_FORMAT,NO_C_FORMAT,NO_WRAP = 1,2,4,8,16

class entry:
    def __init__(self):
        self.msgid = ""
        self.msgid_plural = ""
        self.msgstr = ""
        self.translator_comment = ""
        self.automatic_comment = ""
        self.references = []
        self.flag = 0

    # attributes handling
    def set_flag(self,flag):
        self.flag = self.flag | flag
    def unset_flag(self,flag):
        self.flag = self.flag & ~flag
    def is_fuzzy(self):
        return (self.flag & FUZZY)
    def is_obsolete(self):
        return (self.flag & OBSOLETE)
    def is_untranslated(self):
        return (self.msgstr == "")
    def is_translated(self):
        return (not self.is_fuzzy() and
                not self.is_obsolete() and
                not self.is_untranslated())
    def is_c_format(self):
        return (self.flag & C_FORMAT)
    def is_no_c_format(self):
        return (self.flag & NO_C_FORMAT)
    def is_no_wrap(self):
        return (self.flag & NO_WRAP)
    def __repr__(self):
        return repr(self.msgid) + ":::" + repr(self.msgstr)

class catalog:
    def __init__(self):
        self.entries = []
        self.metadata = {}
        self.textdomain = ''
        self.language = 'ko'
    def add_entry(self,entry):
        if (entry.msgid == ''):         # header entry
            a = string.split(entry.msgstr,"\n")
            for l in a:
                if len(l) == 0:
                    continue
                k = string.split(l, ": ")
                self.metadata[k[0]] = k[1]
        else:
            self.entries.append(entry)
    def settextdomain(self, d):
        self.textdomain = d
# -*- coding: utf-8 -*-
"""Parse GNU gettext compliant PO (Portable Object) file.

"""

__all__ = ["parse_file", "parse_entry", "ParseError",
           "FUZZY", "OBSOLETE", "C_FORMAT", "NO_C_FORMAT", "NO_WRAP"]

import po,re,string

ParseError = 'ParseError'

def c2rawstring(str):
    return eval('"'+str+'"')

FUZZY,OBSOLETE,C_FORMAT,NO_C_FORMAT,NO_WRAP = 1,2,4,8,16

def parse_file(file):
    reader = file
    lineno = 0
    catalog = po.catalog()
    try:
        (entry,lineno) = parse_entry(reader,lineno)
    except ParseError:
        raise ParseError, lineno
    catalog.add_entry(entry)
    content_type = catalog.metadata['Content-Type']
    charset = re.compile("charset=(.+)$").search(content_type).group(1)
    while 1:
        try:
            (entry,lineno) = parse_entry(reader,lineno)
        except ParseError, l:
            raise ParseError, l
        if not entry:
            return catalog
        try:
            entry.translator_comment = unicode(entry.translator_comment,charset)
            entry.msgstr = unicode(entry.msgstr,charset)
        except:
            raise ParseError, lineno
        catalog.add_entry(entry)

STATE_FIRST,STATE_COMMENT,STATE_ECOMMENT,STATE_MSGID,STATE_MSGSTR = 1,2,3,4,5
emptyline_re = re.compile(r"^\s*$")
translator_comment_re = re.compile(r"^\#( (.*))?$")
automatic_comment_re = re.compile(r"^\#. (.*)$")
reference_re = re.compile(r"^\#: (.*)$")
flag_re = re.compile(r"^\#, (.*)$")
string_re = re.compile(r"^\"(.*)\"\w*")

def read_string(fmt):
    try:
        str = string_re.match(fmt).group(1)
    except:
        raise ParseError
    return c2rawstring(str)

import codecs

def parse_entry(file,lineno):
    state = STATE_FIRST
    new_entry = po.entry()
    while 1:
        lineno += 1
        line = file.readline()
        if not line:                    # EOF
            if state == STATE_FIRST or state == STATE_COMMENT:
                return (None,lineno)    # no more messages -- return nothing
            elif state != STATE_MSGSTR:
                raise ParseError, lineno        # unexpected EOF
            else:
                return (new_entry,lineno)
        if emptyline_re.match(line):
            if state == STATE_FIRST or state == STATE_COMMENT:
                continue
            elif state != STATE_MSGSTR:
                raise ParseError, lineno
            else:
                return (new_entry,lineno)
        if line[-1] == '\n':            # remove the trailing newline
            line = line[:-1]
        if line[:3] == '#~ ':
            new_entry.set_flag(OBSOLETE)
            line = line[3:]
        # comments
        if line[0] == '#' and (len(line) == 1 or line[1] != '~'):
            if state == STATE_FIRST:
                state = STATE_COMMENT
            if len(line) == 1:
                new_entry.translator_comment += '\n'
            elif line[1] == ' ':          # automatic comment
                new_entry.translator_comment += line[2:] + '\n'
            elif line[1] == ':':
                state = STATE_ECOMMENT
                new_entry.references += string.split(line[3:],' ')
            elif line[1] == ',':
                state = STATE_ECOMMENT
                for flag in string.split(line[3:], ', '):
                    if flag == 'c-format':
                        new_entry.set_flag(C_FORMAT)
                    elif flag == 'no-c-format':
                        new_entry.set_flag(NO_C_FORMAT)
                    elif flag == 'fuzzy':
                        new_entry.set_flag(FUZZY)
                    elif flag == 'no-wrap':
                        new_entry.set_flag(NO_WRAP)
                pass
            elif line[1] == '~':
                state = STATE_ECOMMENT
                pass
        else:
            if line[:7] == 'msgid "':
                state = STATE_MSGID
                try:
                    new_entry.msgid += read_string(line[6:])
                except ParseError:
                    raise ParseError, lineno
                new_entry.msgid_lineno = lineno
            elif line[:14] == 'msgid_plural "':
                state = STATE_MSGID
                new_entry.msgid_plural += read_string(line[13:])
            elif line[:8] == 'msgstr "':
                state = STATE_MSGSTR
                new_entry.msgstr += read_string(line[7:])
                new_entry.msgstr_lineno = lineno
            elif line[:7] == 'msgstr[':
                state = STATE_MSGSTR
                new_entry.msgstr += read_string(line[10:])
                new_entry.msgstr_lineno = lineno
            elif line[0] == '"':
                if state == STATE_MSGID:
                    new_entry.msgid += read_string(line)
                elif state == STATE_MSGSTR:
                    new_entry.msgstr += read_string(line)
                else:
                    raise ParseError, lineno
            else:
                raise ParseError, lineno
            
    #new_entry.msgid += line
    return (new_entry,lineno)
        
        
def test():
    import sys
    if sys.argv[1:]:
        fn = sys.argv[1]
        if fn == '-':
            fp = sys.stdin
        else:
            fp = open(fn)
    else:
        import StringIO
        fp = StringIO.StringIO(test_input)
    catalog = parse_file(fp)
    print str(catalog)

if __name__ == '__main__':
    test()

Attachment: signature.asc
Description: This is a digitally signed message part


Reply to: