Merge GB and Big5 Fonts...

To: debian-chinese-big5@lists.debian.org
Subject: Merge GB and Big5 Fonts...
From: ha shao <hashao@chinese.com>
Date: Sun, 26 Aug 2001 21:37:01 +0800
Message-id: <[🔎] 20010826213701.E1871@hashao.ddts.net>
Mail-followup-to: ha shao <hashao@chinese.com>, debian-chinese-gb@lists.debian.org
Reply-to: hashao@chinese.com

目前沒有一套自由發行的GBK字型，saka 說 foka 做了一個 24x24
的點陣字型，不知道有沒有完成。最新的 pfaedit 可以處理TTF，
而且他可以任何格式的字型存成一個文本格式的過度文件，我們
可以通過合並文本格式的文件來合並國標和五大碼字型。然後利用
pfaedit 產生一個新的包括國標和五大所有的字的字型。這樣就
省了自己讀ttf的步驟。而且可以通過pfaedit產生其他格式的
字型。

我寫了這麼一個程序(f+f.py)，讀入幾個文件，然後合並。我用文鼎的明體
加宋體，從14078 和 7682 裡合並出16852個字。離GBK的兩萬一千
多個字還差了三四千。不過應該可以包括大多數常用的漢字了。
不知道如果加入日文字型，是不是能再多幾百個字。

字型我就不放出來了，把程序放上來，有興趣的可以參考。
pfaedit 可以到 http://pfaedit.sourceforge.net 上得到。建議
使用cvs 版。

由于pfaedit 沒有做大字庫優化，如果要加載這種上萬字的字型，
你最好有超過160MB的內存加兩倍的交換。一但加載，大概佔用
220MB 的內存。

f+f.py 是python程序，自然需要python了。

-- 
hashao|    何人半夜推山去？四面浮雲猜是汝。常時相對兩三峰，走遍溪頭無覓處。
hashao|    西風瞥起雲橫渡，忽見東南天一柱。老僧拍手笑相夸，且喜青山依舊住。
-- 
| This message was re-posted from debian-chinese-gb@lists.debian.org
| and converted from gb2312 to big5 by an automatic gateway.

#!/usr/bin/env python
# "Font plus Font" merges two .sfd fonts into one. .sfd is pfaedit's
# internal file format. pfaedit is an all purpose font editor writen
# by George Williams at http://pfaedit.sourceforge.net.

# Version: 20010826
"""
Copyright 2001 hashao <hashao@chinese...com>

Release under GPL version 2. GPL can be found at:
http://www.fsf.org/copyleft/gpl.html

THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE. 
"""

"""
Output a merged font with the name font1-fontN and a file listing
glyph encoding included in the new font.
"""

"""
* Do not handle bitmap yet."
* Do not have special routin to deal with glyphs which reference to
     other glyphs in the same font.
* Do not know CID or compound font yet.
"""
DEBUG = 1
import sys
import os
import re
import tempfile
import string

tempfile.template = "f+f"

item_patten = re.compile("^([^:]+:)\s*(.*)$") # Option: OK
TOPTITLES = [
		"SplineFontDB:",
		"FontName:",
		"FullName:",
		"FamilyName:",
		"Weight:",
		"Copyright:",
		"Version:",
		"ItalicAngle:",
		"UnderlinePosition:",
		"UnderlineWidth:",
		"Ascent:",
		"Descent:",
		"DisplaySize:",
		"AntiAlias:",
		"XUID:",
		"Encoding:",
		"FSType:",
		"PfmFamily:",
		"TTFWeight:",
		"TTFWidth:",
		"Panose:",
		"LangName:",
]
def debug(arg):
	if DEBUG:
		print arg
	return
def warn(arg):
	print "Warning: "+arg
def getint(str):
	try:
		a = int(str)
		return a
	except ValueError:
		return str

maxgroup = ["UnderlineWidth:", "Ascent:", "Descent:", "DisplaySize:"]
mingroup = ["UnderlinePosition:"]

"""Not really OO, just use class to limite global variables in a
   narrower namespace."""
class parser:
	"The actual object to read fonts."
	def __init__(self, filename):
		self.file = open(filename, 'r')
		self.grid = []
		self.header = {}
		self.tempfilename = tempfile.mktemp()
		self.tempfile = open(self.tempfilename, 'w+')
		self.uchars = [] # unicode chars in the font.
		self.lchars = [] # local chars in the font.
		self.count = 0 	 # Counter for readed charstrings.
		self.charspace = 0	# encoding space as specified at BeginChars:

	def reset_file(self, filename):
		"reset font name to the next one."
		self.file.close()
		self.file = open(filename, 'r')
		self.count = 0

	def savehead(self, item):
		"Save header into a hashtable. Don't overwrite"
		header = self.header
		opt = item[0]
		val = item[1]
		if not header.has_key(opt):
			if opt == "LangName:":
				header[opt] = [val]
			else:
				header[opt] = val
			return
		if opt == "LangName:":
			header[opt].append(val)
			return
		if val == header[opt]:
			return

		"Replace old value with better ones."
		if val in mingroup:
			header[opt] = min(header[opt], val)
			warn("%s is different" % opt)
		elif val in maxgroup:
			header[opt] = max(header[opt], val)
			warn("%s is different" % opt)
		elif val == "ItalicAngle:":
			warn("ItalicAngles are differnt")
		elif val == "Encoding:":
			warn("The fonts have different encoding. quit.")
			sys.exit()

	def charstring(self):
		"central function. Read charstring to a tempfile."
		"Make sure do not add glyphs already added."
		print "Reading CharStrings"


		inside = 0	# Flag for inside a charstring.
		file = self.file
		line = file.readline()
		
		while line:
			if line == "EndChars":
				return
				
			#print string.strip(line), "::", inside
			if inside:
				if string.strip(line) == "EndChar":
					inside = 0
				self.tempfile.write(line)
				"Readline and return. Don't do next test."
				line = file.readline()
				continue
				
			if string.find(line, "StartChar:") != -1:
				"Print a progress..."
				self.count = self.count + 1
				if not self.count % 37:
					sys.stdout.write( "\rGlyphs: %.5d" % self.count)
					sys.stdout.flush()

				"read one more line and check for glyph encoding."
				templine = line
				line = file.readline()
				encoding = string.split(line)
				if encoding[0] != "Encoding:":
					pass
				elif encoding[2] not in self.uchars:
					"Assume local and unicode encodings are fixed pairs."
					self.uchars.append(encoding[2])
					self.lchars.append(encoding[1])
					self.tempfile.write(templine)
					self.tempfile.write(line)
					inside = 1
				elif encoding[2] == -1:		# I don't know what is -1
					"FIXME: can be confusing in 2 fonts."
					self.lchars.append(encoding[1])
					self.tempfile.write(templine)
					self.tempfile.write(line)
					inside = 1
					

			line = file.readline()
		sys.stdout.write( "\rGlyphs: %.5d\n" % self.count)
		sys.stdout.flush()
			
					
	def readgrid(self):
		"First line is the line after 'Grid'"
		line = readline()
		while line:
			if line != "EndSplineset":
				self.grid.append(line)
				return
			self.grid.append(line)
			line = self.file.readline()

	def readbitmap(self):
		"Somehow read bitmap fonts."
		pass
	
	def parse_beginchars(self, line):
		"Find glyph space from the BeginChars: line"
		charcount = string.split(line)
		self.charspace = max(self.charspace, getint(charcount[1]))
		
	def readall(self):
		"start up of font reading."
		file = self.file
		line = file.readline()
		print "reading font..."
		while line:
			if string.find(line, "BeginChars:") != -1:
				"Start read chars. Don't save BeginChars:"
				self.parse_beginchars(line)
				self.charstring()
				line = file.readline()
				continue

			result = item_patten.match(line)
			if result:
				item = list(result.groups())
				item[1] = getint(item[1])
				self.savehead(item)
			elif line == "Grid":
				if not len(self.grid):
					self.readgrid()

			line = file.readline()	
			
	
	def flushheader(self):
		"Write out header in order of TOPTITLES list."
		print "Flusing headers"
		header = self.header
		keys = header.keys()
		"Write simple important headers first."
		for i in range(len(TOPTITLES)):
			opt = TOPTITLES[i]
			if header.has_key(opt):
				if opt == "LangName:":
					for j in range(len(header[opt])):
						self.output.write(opt + ' ' + header[opt][j] + '\n')
				else:
					self.output.write(opt + ' ' + str(header[opt]) + '\n')

		"Write headers not in TOPTITLES."
		for i in range(len(keys)):
			opt = keys[i]
			if keys[i] in TOPTITLES:
				continue
			self.output.write(opt + ' ' + str(header[opt]) + '\n')

	def flushchars(self):
		"Write charstrings. Mostly copy from the charstring tempfile."
		print "Flushing CharStrings"
		self.uchars.sort()
		uchars = self.uchars
		#self.output.write('BeginChars: %d %d\n' % (uchars[-1], len(uchars)))
		"Use the max of save charspace and the max index of glyphs."
		charspace = max(self.charspace, getint(uchars[-1]))
		self.output.write('BeginChars: %d %d\n' % (charspace, len(uchars)))
		self.tempfile.flush()
		self.tempfile.seek(0,0)
		buffer = self.tempfile.read()
		while buffer:
			self.output.write(buffer)
			buffer = self.tempfile.read()
		self.output.write("EndChars\n")
	def flushgrid(self):
		if not len(self.grid):
			return
		print "Flushing grid"
		self.output.write("Grid\n")
		for i in (len(self.grid)):
			self.output.write(self.grid[i])
			
	def flush(self):
		self.flushheader()
		self.flushgrid()
		self.flushchars()
		self.tempfile.close()
		os.remove(self.tempfilename)
		self.output.write('EndSplineFont\n')
		self.output.close()

def check_sfd(filename):
	if not os.path.isfile(filename):
		print "File '%s' does not exist" % filename
		return None
	fd = open(filename, 'r')
	line = fd.readline()
	sfdtag = string.split(line)
	if sfdtag[0] != "SplineFontDB:":
		fd.close()
		print "File '%s' is not a font file" % filename
		return None
	fd.close()
	return sfdtag[1]

def pusage():
	print "Usage: %s font1.sfd font2.sfd ...\n" % argv[0]
	print "Output: font1-fontN and font1-fontN.enc\n"
	sys.exit()

if __name__ == '__main__':
	argv = sys.argv
	if len(argv) < 3:
		pusage()
	
	for i in range(1, len(argv)):
		sfd_version = check_sfd(argv[i])
		if not sfd_version:
			pusage()
	obj = parser(argv[1])
	obj.readall()
	for i in range(2, len(argv)):
		obj.reset_file(argv[2])
		obj.readall()
	newname = os.path.basename(argv[1]) + "-" + os.path.basename(argv[-1])
	obj.output = open(newname, 'w')
	obj.flush()
	obj.output.close()
	codenamefd = open(newname+'.enc', 'w')
	b = map(int, obj.uchars)
	b.sort()
	for key in b:
		codenamefd.write(str(key)+'\n')
	codenamefd.close()

# vim:set ts=4:sw=4:st=4
-- 
| This message was re-posted from debian-chinese-gb@lists.debian.org
| and converted from gb2312 to big5 by an automatic gateway.

Reply to:

Follow-Ups:
- Re: Merge GB and Big5 Fonts...
  - From: "Edward G.J. Lee" <edt1023@ms17.hinet.net>

Prev by Date: Re: 在linuxforum.net下的鏡像不全嗎�H
Next by Date: Re: Merge GB and Big5 Fonts...
Previous by thread: Re: 在linuxforum.net下的鏡像不全嗎�H
Next by thread: Re: Merge GB and Big5 Fonts...
Index(es):
- Date
- Thread