마감이 얼마 남지 않아서 제가 d-i 매뉴얼 번역을 업데이트했습니다. 그런데 전부터 느끼던 거지만 매뉴얼의 내용과 d-i의 메세지 번역이 맞지 않 은 경우가 발생하는 게 신경쓰이더군요. 매번 매뉴얼에 <guimenu>, <guimenuitem> 태그가 나올 때마다 찾아볼 수도 없는 노릇이구요. 그래서 간 단히 검사하는 스크립트를 만들었습니다. ko-po-check에 쓰던 PO 파싱 모듈 을 써먹을 기회가 되었습니다. 이 스크립트는 매뉴얼의 msgid에 <guimenu> 및 <guimenuitem> 태그가 나타날 때마다 d-i 메세지를 찾아보고 매뉴얼의 해당 msgstr에 번역이 그대로 되어 있지 않으면 파일:줄번호: 형식으로 애러 메세지를 출력합니다. 컴파일러 형 식과 똑같으니 이맥스를 사용하시면 C-x compile에서 사용하셔도 편리하구요. 첫번째 인자가 d-i 메세지 PO 파일, 나머지가 매뉴얼 PO 파일입니다. 매뉴얼 디렉토리에서는, $ python dicheck.py ../../../packages/po/ko.po *.po 일단 현재 발견한 애러는 전부 고쳐 놓은 상태입니다. Happy Translating! :) -- Changwoo Ryu <cwryu@debian.org>
# -*- coding: utf-8 -*- import poparse import re dic = {} dic_filename = '' def build_dictionary(filename): p = poparse.parse_file(file(filename)) for e in p.entries: dic[e.msgid] = (e.msgstr, e.msgstr_lineno) def check_file(filename): p = poparse.parse_file(file(filename)) for entry in p.entries: items = re.findall("<guimenu>([^<]+)</guimenu>", entry.msgid) items += re.findall("<guimenuitem>([^<]+)</guimenuitem>", entry.msgid) for en in items: try: (ko, line) = dic[en] if entry.msgstr.find("<guimenuitem>"+ko+"</guimenuitem>") < 0 and entry.msgstr.find("<guimenu>"+ko+"</guimenu>") < 0: print "%s:%d: UI string \"%s\" was translated as \"%s\" in d-i" % (dic_filename, line, en, ko) if len(items) == 1: s = re.compile("<guimenu>([^<]+)</guimenu>").search(entry.msgstr) if not s: s = re.compile("<guimenuitem>([^<]+)</guimenuitem>").search(entry.msgstr) if s: print "%s:%d: but \"%s\" in the manual" % (filename, entry.msgstr_lineno, s.group(1)) else: print "%s:%d: but nothing in the manual?" % (filename, entry.msgstr_lineno, s.group(1)) else: print "%s:%d: but not in the manual" % (filename, entry.msgstr_lineno) except KeyError: pass import sys if len(sys.argv) < 3: print "Usage: python ditrcheck.py <d-i po file> <manual po files>...\n" sys.exit(1) build_dictionary(sys.argv[1]) dic_filename = sys.argv[1] for f in sys.argv[2:]: check_file(f)
# -*- coding: utf-8 -*- import string FUZZY,OBSOLETE,C_FORMAT,NO_C_FORMAT,NO_WRAP = 1,2,4,8,16 class entry: def __init__(self): self.msgid = "" self.msgid_plural = "" self.msgstr = "" self.translator_comment = "" self.automatic_comment = "" self.references = [] self.flag = 0 # attributes handling def set_flag(self,flag): self.flag = self.flag | flag def unset_flag(self,flag): self.flag = self.flag & ~flag def is_fuzzy(self): return (self.flag & FUZZY) def is_obsolete(self): return (self.flag & OBSOLETE) def is_untranslated(self): return (self.msgstr == "") def is_translated(self): return (not self.is_fuzzy() and not self.is_obsolete() and not self.is_untranslated()) def is_c_format(self): return (self.flag & C_FORMAT) def is_no_c_format(self): return (self.flag & NO_C_FORMAT) def is_no_wrap(self): return (self.flag & NO_WRAP) def __repr__(self): return repr(self.msgid) + ":::" + repr(self.msgstr) class catalog: def __init__(self): self.entries = [] self.metadata = {} self.textdomain = '' self.language = 'ko' def add_entry(self,entry): if (entry.msgid == ''): # header entry a = string.split(entry.msgstr,"\n") for l in a: if len(l) == 0: continue k = string.split(l, ": ") self.metadata[k[0]] = k[1] else: self.entries.append(entry) def settextdomain(self, d): self.textdomain = d
# -*- coding: utf-8 -*- """Parse GNU gettext compliant PO (Portable Object) file. """ __all__ = ["parse_file", "parse_entry", "ParseError", "FUZZY", "OBSOLETE", "C_FORMAT", "NO_C_FORMAT", "NO_WRAP"] import po,re,string ParseError = 'ParseError' def c2rawstring(str): return eval('"'+str+'"') FUZZY,OBSOLETE,C_FORMAT,NO_C_FORMAT,NO_WRAP = 1,2,4,8,16 def parse_file(file): reader = file lineno = 0 catalog = po.catalog() try: (entry,lineno) = parse_entry(reader,lineno) except ParseError: raise ParseError, lineno catalog.add_entry(entry) content_type = catalog.metadata['Content-Type'] charset = re.compile("charset=(.+)$").search(content_type).group(1) while 1: try: (entry,lineno) = parse_entry(reader,lineno) except ParseError, l: raise ParseError, l if not entry: return catalog try: entry.translator_comment = unicode(entry.translator_comment,charset) entry.msgstr = unicode(entry.msgstr,charset) except: raise ParseError, lineno catalog.add_entry(entry) STATE_FIRST,STATE_COMMENT,STATE_ECOMMENT,STATE_MSGID,STATE_MSGSTR = 1,2,3,4,5 emptyline_re = re.compile(r"^\s*$") translator_comment_re = re.compile(r"^\#( (.*))?$") automatic_comment_re = re.compile(r"^\#. (.*)$") reference_re = re.compile(r"^\#: (.*)$") flag_re = re.compile(r"^\#, (.*)$") string_re = re.compile(r"^\"(.*)\"\w*") def read_string(fmt): try: str = string_re.match(fmt).group(1) except: raise ParseError return c2rawstring(str) import codecs def parse_entry(file,lineno): state = STATE_FIRST new_entry = po.entry() while 1: lineno += 1 line = file.readline() if not line: # EOF if state == STATE_FIRST or state == STATE_COMMENT: return (None,lineno) # no more messages -- return nothing elif state != STATE_MSGSTR: raise ParseError, lineno # unexpected EOF else: return (new_entry,lineno) if emptyline_re.match(line): if state == STATE_FIRST or state == STATE_COMMENT: continue elif state != STATE_MSGSTR: raise ParseError, lineno else: return (new_entry,lineno) if line[-1] == '\n': # remove the trailing newline line = line[:-1] if line[:3] == '#~ ': new_entry.set_flag(OBSOLETE) line = line[3:] # comments if line[0] == '#' and (len(line) == 1 or line[1] != '~'): if state == STATE_FIRST: state = STATE_COMMENT if len(line) == 1: new_entry.translator_comment += '\n' elif line[1] == ' ': # automatic comment new_entry.translator_comment += line[2:] + '\n' elif line[1] == ':': state = STATE_ECOMMENT new_entry.references += string.split(line[3:],' ') elif line[1] == ',': state = STATE_ECOMMENT for flag in string.split(line[3:], ', '): if flag == 'c-format': new_entry.set_flag(C_FORMAT) elif flag == 'no-c-format': new_entry.set_flag(NO_C_FORMAT) elif flag == 'fuzzy': new_entry.set_flag(FUZZY) elif flag == 'no-wrap': new_entry.set_flag(NO_WRAP) pass elif line[1] == '~': state = STATE_ECOMMENT pass else: if line[:7] == 'msgid "': state = STATE_MSGID try: new_entry.msgid += read_string(line[6:]) except ParseError: raise ParseError, lineno new_entry.msgid_lineno = lineno elif line[:14] == 'msgid_plural "': state = STATE_MSGID new_entry.msgid_plural += read_string(line[13:]) elif line[:8] == 'msgstr "': state = STATE_MSGSTR new_entry.msgstr += read_string(line[7:]) new_entry.msgstr_lineno = lineno elif line[:7] == 'msgstr[': state = STATE_MSGSTR new_entry.msgstr += read_string(line[10:]) new_entry.msgstr_lineno = lineno elif line[0] == '"': if state == STATE_MSGID: new_entry.msgid += read_string(line) elif state == STATE_MSGSTR: new_entry.msgstr += read_string(line) else: raise ParseError, lineno else: raise ParseError, lineno #new_entry.msgid += line return (new_entry,lineno) def test(): import sys if sys.argv[1:]: fn = sys.argv[1] if fn == '-': fp = sys.stdin else: fp = open(fn) else: import StringIO fp = StringIO.StringIO(test_input) catalog = parse_file(fp) print str(catalog) if __name__ == '__main__': test()
Attachment:
signature.asc
Description: This is a digitally signed message part