Merge GB and Big5 Fonts...

To: debian-chinese-gb@lists.debian.org
Subject: Merge GB and Big5 Fonts...
From: ha shao <hashao@chinese.com>
Date: Sun, 26 Aug 2001 21:37:01 +0800
Message-id: <[🔎] 20010826213701.E1871@hashao.ddts.net>
Mail-followup-to: ha shao <hashao@chinese.com>, debian-chinese-gb@lists.debian.org
Reply-to: hashao@chinese.com

目前没有一套自由发行的GBK字型，saka 说 foka 做了一个 24x24
的点阵字型，不知道有没有完成。最新的 pfaedit 可以处理TTF，
而且他可以任何格式的字型存成一个文本格式的过度文件，我们
可以通过合并文本格式的文件来合并国标和五大码字型。然后利用
pfaedit 产生一个新的包括国标和五大所有的字的字型。这样就
省了自己读ttf的步骤。而且可以通过pfaedit产生其他格式的
字型。

我写了这么一个程序(f+f.py)，读入几个文件，然后合并。我用文鼎的明体
加宋体，从14078 和 7682 里合并出16852个字。离GBK的两万一千
多个字还差了三四千。不过应该可以包括大多数常用的汉字了。
不知道如果加入日文字型，是不是能再多几百个字。

字型我就不放出来了，把程序放上来，有兴趣的可以参考。
pfaedit 可以到 http://pfaedit.sourceforge.net 上得到。建议
使用cvs 版。

由于pfaedit 没有做大字库优化，如果要加载这种上万字的字型，
你最好有超过160MB的内存加两倍的交换。一但加载，大概占用
220MB 的内存。

f+f.py 是python程序，自然需要python了。

-- 
hashao|    何人半夜推山去？四面浮云猜是汝。常时相对两三峰，走遍溪头无觅处。
hashao|    西风瞥起云横渡，忽见东南天一柱。老僧拍手笑相夸，且喜青山依旧住。

#!/usr/bin/env python
# "Font plus Font" merges two .sfd fonts into one. .sfd is pfaedit's
# internal file format. pfaedit is an all purpose font editor writen
# by George Williams at http://pfaedit.sourceforge.net.

# Version: 20010826
"""
Copyright 2001 hashao <hashao@chinese...com>

Release under GPL version 2. GPL can be found at:
http://www.fsf.org/copyleft/gpl.html

THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE. 
"""

"""
Output a merged font with the name font1-fontN and a file listing
glyph encoding included in the new font.
"""

"""
* Do not handle bitmap yet."
* Do not have special routin to deal with glyphs which reference to
     other glyphs in the same font.
* Do not know CID or compound font yet.
"""
DEBUG = 1
import sys
import os
import re
import tempfile
import string

tempfile.template = "f+f"

item_patten = re.compile("^([^:]+:)\s*(.*)$") # Option: OK
TOPTITLES = [
		"SplineFontDB:",
		"FontName:",
		"FullName:",
		"FamilyName:",
		"Weight:",
		"Copyright:",
		"Version:",
		"ItalicAngle:",
		"UnderlinePosition:",
		"UnderlineWidth:",
		"Ascent:",
		"Descent:",
		"DisplaySize:",
		"AntiAlias:",
		"XUID:",
		"Encoding:",
		"FSType:",
		"PfmFamily:",
		"TTFWeight:",
		"TTFWidth:",
		"Panose:",
		"LangName:",
]
def debug(arg):
	if DEBUG:
		print arg
	return
def warn(arg):
	print "Warning: "+arg
def getint(str):
	try:
		a = int(str)
		return a
	except ValueError:
		return str

maxgroup = ["UnderlineWidth:", "Ascent:", "Descent:", "DisplaySize:"]
mingroup = ["UnderlinePosition:"]

"""Not really OO, just use class to limite global variables in a
   narrower namespace."""
class parser:
	"The actual object to read fonts."
	def __init__(self, filename):
		self.file = open(filename, 'r')
		self.grid = []
		self.header = {}
		self.tempfilename = tempfile.mktemp()
		self.tempfile = open(self.tempfilename, 'w+')
		self.uchars = [] # unicode chars in the font.
		self.lchars = [] # local chars in the font.
		self.count = 0 	 # Counter for readed charstrings.
		self.charspace = 0	# encoding space as specified at BeginChars:

	def reset_file(self, filename):
		"reset font name to the next one."
		self.file.close()
		self.file = open(filename, 'r')
		self.count = 0

	def savehead(self, item):
		"Save header into a hashtable. Don't overwrite"
		header = self.header
		opt = item[0]
		val = item[1]
		if not header.has_key(opt):
			if opt == "LangName:":
				header[opt] = [val]
			else:
				header[opt] = val
			return
		if opt == "LangName:":
			header[opt].append(val)
			return
		if val == header[opt]:
			return

		"Replace old value with better ones."
		if val in mingroup:
			header[opt] = min(header[opt], val)
			warn("%s is different" % opt)
		elif val in maxgroup:
			header[opt] = max(header[opt], val)
			warn("%s is different" % opt)
		elif val == "ItalicAngle:":
			warn("ItalicAngles are differnt")
		elif val == "Encoding:":
			warn("The fonts have different encoding. quit.")
			sys.exit()

	def charstring(self):
		"central function. Read charstring to a tempfile."
		"Make sure do not add glyphs already added."
		print "Reading CharStrings"


		inside = 0	# Flag for inside a charstring.
		file = self.file
		line = file.readline()
		
		while line:
			if line == "EndChars":
				return
				
			#print string.strip(line), "::", inside
			if inside:
				if string.strip(line) == "EndChar":
					inside = 0
				self.tempfile.write(line)
				"Readline and return. Don't do next test."
				line = file.readline()
				continue
				
			if string.find(line, "StartChar:") != -1:
				"Print a progress..."
				self.count = self.count + 1
				if not self.count % 37:
					sys.stdout.write( "\rGlyphs: %.5d" % self.count)
					sys.stdout.flush()

				"read one more line and check for glyph encoding."
				templine = line
				line = file.readline()
				encoding = string.split(line)
				if encoding[0] != "Encoding:":
					pass
				elif encoding[2] not in self.uchars:
					"Assume local and unicode encodings are fixed pairs."
					self.uchars.append(encoding[2])
					self.lchars.append(encoding[1])
					self.tempfile.write(templine)
					self.tempfile.write(line)
					inside = 1
				elif encoding[2] == -1:		# I don't know what is -1
					"FIXME: can be confusing in 2 fonts."
					self.lchars.append(encoding[1])
					self.tempfile.write(templine)
					self.tempfile.write(line)
					inside = 1
					

			line = file.readline()
		sys.stdout.write( "\rGlyphs: %.5d\n" % self.count)
		sys.stdout.flush()
			
					
	def readgrid(self):
		"First line is the line after 'Grid'"
		line = readline()
		while line:
			if line != "EndSplineset":
				self.grid.append(line)
				return
			self.grid.append(line)
			line = self.file.readline()

	def readbitmap(self):
		"Somehow read bitmap fonts."
		pass
	
	def parse_beginchars(self, line):
		"Find glyph space from the BeginChars: line"
		charcount = string.split(line)
		self.charspace = max(self.charspace, getint(charcount[1]))
		
	def readall(self):
		"start up of font reading."
		file = self.file
		line = file.readline()
		print "reading font..."
		while line:
			if string.find(line, "BeginChars:") != -1:
				"Start read chars. Don't save BeginChars:"
				self.parse_beginchars(line)
				self.charstring()
				line = file.readline()
				continue

			result = item_patten.match(line)
			if result:
				item = list(result.groups())
				item[1] = getint(item[1])
				self.savehead(item)
			elif line == "Grid":
				if not len(self.grid):
					self.readgrid()

			line = file.readline()	
			
	
	def flushheader(self):
		"Write out header in order of TOPTITLES list."
		print "Flusing headers"
		header = self.header
		keys = header.keys()
		"Write simple important headers first."
		for i in range(len(TOPTITLES)):
			opt = TOPTITLES[i]
			if header.has_key(opt):
				if opt == "LangName:":
					for j in range(len(header[opt])):
						self.output.write(opt + ' ' + header[opt][j] + '\n')
				else:
					self.output.write(opt + ' ' + str(header[opt]) + '\n')

		"Write headers not in TOPTITLES."
		for i in range(len(keys)):
			opt = keys[i]
			if keys[i] in TOPTITLES:
				continue
			self.output.write(opt + ' ' + str(header[opt]) + '\n')

	def flushchars(self):
		"Write charstrings. Mostly copy from the charstring tempfile."
		print "Flushing CharStrings"
		self.uchars.sort()
		uchars = self.uchars
		#self.output.write('BeginChars: %d %d\n' % (uchars[-1], len(uchars)))
		"Use the max of save charspace and the max index of glyphs."
		charspace = max(self.charspace, getint(uchars[-1]))
		self.output.write('BeginChars: %d %d\n' % (charspace, len(uchars)))
		self.tempfile.flush()
		self.tempfile.seek(0,0)
		buffer = self.tempfile.read()
		while buffer:
			self.output.write(buffer)
			buffer = self.tempfile.read()
		self.output.write("EndChars\n")
	def flushgrid(self):
		if not len(self.grid):
			return
		print "Flushing grid"
		self.output.write("Grid\n")
		for i in (len(self.grid)):
			self.output.write(self.grid[i])
			
	def flush(self):
		self.flushheader()
		self.flushgrid()
		self.flushchars()
		self.tempfile.close()
		os.remove(self.tempfilename)
		self.output.write('EndSplineFont\n')
		self.output.close()

def check_sfd(filename):
	if not os.path.isfile(filename):
		print "File '%s' does not exist" % filename
		return None
	fd = open(filename, 'r')
	line = fd.readline()
	sfdtag = string.split(line)
	if sfdtag[0] != "SplineFontDB:":
		fd.close()
		print "File '%s' is not a font file" % filename
		return None
	fd.close()
	return sfdtag[1]

def pusage():
	print "Usage: %s font1.sfd font2.sfd ...\n" % argv[0]
	print "Output: font1-fontN and font1-fontN.enc\n"
	sys.exit()

if __name__ == '__main__':
	argv = sys.argv
	if len(argv) < 3:
		pusage()
	
	for i in range(1, len(argv)):
		sfd_version = check_sfd(argv[i])
		if not sfd_version:
			pusage()
	obj = parser(argv[1])
	obj.readall()
	for i in range(2, len(argv)):
		obj.reset_file(argv[2])
		obj.readall()
	newname = os.path.basename(argv[1]) + "-" + os.path.basename(argv[-1])
	obj.output = open(newname, 'w')
	obj.flush()
	obj.output.close()
	codenamefd = open(newname+'.enc', 'w')
	b = map(int, obj.uchars)
	b.sort()
	for key in b:
		codenamefd.write(str(key)+'\n')
	codenamefd.close()

# vim:set ts=4:sw=4:st=4

Reply to:

Follow-Ups:
- Re: Merge GB and Big5 Fonts...
  - From: "Edward G.J. Lee" <edt1023@ms17.hinet.net>

Prev by Date: Re: 在linuxforum.net下的镜像不全吗？
Next by Date: Re: Merge GB and Big5 Fonts...
Previous by thread: Re: 在linuxforum.net下的镜像不全吗？
Next by thread: Re: Merge GB and Big5 Fonts...
Index(es):
- Date
- Thread