[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Re: dbootstrap's zh_CN.po encoding



Marcin Owsiany <porridge@pandora.info.bielsko.pl>:

> Looks like something's wrong with the chineese po file. Can
> someone more competent look at it? The following is a snippet
> from 'make ../all.utf' in boot-floppies/utilities/bogl
> 
> iconv -f "`grep -a '^"Content-Type:' zh_CN.po | sed -e 's/^.*charset=\\(.*\\)\\\\n.*/\\1/'`" -t utf-8 < zh_CN.po > utf/zh_CN.po
> iconv: illegal input sequence at position 990

What appears to have happened is that some program has reformated the
file, splitting multibyte characters between lines.

So you can fix the problem with something like:

cd utilities/dbootstrap/po
mv -i zh_CN.po zh_CN.po-
perl -e '$_=join("",<>);s/"\n"//g;print;' zh_CN.po- > zh_CN.po

That might be a permanent solution, because msgmerge didn't seem to
split strings dangerously when I tried it. However, if the problem
keeps reappearing then you might need to apply something like the
attached UNTESTED patch to utilities/dbootstrap/po/Makefile. But try
fixing just the PO file first.

Edmund
--- Makefile.orig	Wed May 23 23:48:04 2001
+++ Makefile	Mon May 28 13:43:50 2001
@@ -51,9 +51,9 @@
 	if [ $$lang = 'zh_TW' ]; then \
 	  mv $$lang.po $$lang.old.po; \
 	  echo "$$lang:"; \
-	  iconv -f "`grep -a '^"Content-Type:' $$lang.old.po | sed -e 's/^.*charset=\\(.*\\)\\\\n.*/\\1/'`" -t utf-8 <  $$lang.old.po > $$lang.old.utf.po; \
+	  perl -e '$_=join("",<>);s/"\n"//g;print;' $$lang.old.po | iconv -f "`grep -a '^"Content-Type:' $$lang.old.po | sed -e 's/^.*charset=\\(.*\\)\\\\n.*/\\1/'`" -t utf-8 > $$lang.old.utf.po; \
 	  if msgmerge $$msgmergeopt -v $$lang.old.utf.po $(PACKAGE).pot -o $$lang.utf.po; then \
-	    iconv -f utf-8 -t "`grep -a '^"Content-Type:' $$lang.old.po | sed -e 's/^.*charset=\\(.*\\)\\\\n.*/\\1/'`" <  $$lang.utf.po > $$lang.po; \
+	    perl -e '$_=join("",<>);s/"\n"//g;print;' $$lang.utf.po | iconv -f utf-8 -t "`grep -a '^"Content-Type:' $$lang.old.po | sed -e 's/^.*charset=\\(.*\\)\\\\n.*/\\1/'`" > $$lang.po; \
 	    rm -f $$lang.old.po $$lang.old.utf.po $$lang.utf.po; \
 	  else \
 	    echo "msgmerge for $$cat failed!"; \
@@ -90,7 +90,7 @@
 	@msgfmt --statistics -o $@ $<
 
 utf/%.po: %.po
-	iconv -f "`grep -a '^"Content-Type:' $< | sed -e 's/^.*charset=\\(.*\\)\\\\n.*/\\1/'`" -t utf-8 < $< > $@
+	perl -e '$_=join("",<>);s/"\n"//g;print;' $< | iconv -f "`grep -a '^"Content-Type:' $< | sed -e 's/^.*charset=\\(.*\\)\\\\n.*/\\1/'`" -t utf-8 > $@
 
 %.pox: %.po
 	$(MAKE) $(PACKAGE).pot
@@ -154,9 +154,9 @@
 	  if [ $$lang = 'zh_TW' ]; then \
 	    mv $$lang.po $$lang.old.po; \
 	    echo "$$lang:"; \
-	    iconv -f "`grep -a '^"Content-Type:' $$lang.old.po | sed -e 's/^.*charset=\\(.*\\)\\\\n.*/\\1/'`" -t utf-8 <  $$lang.old.po > $$lang.old.utf.po; \
+	    perl -e '$_=join("",<>);s/"\n"//g;print;' $$lang.old.po | iconv -f "`grep -a '^"Content-Type:' $$lang.old.po | sed -e 's/^.*charset=\\(.*\\)\\\\n.*/\\1/'`" -t utf-8 > $$lang.old.utf.po; \
 	    if msgmerge $$msgmergeopt -v $$lang.old.utf.po $(PACKAGE).pot -o $$lang.utf.po; then \
-	      iconv -f utf-8 -t "`grep -a '^"Content-Type:' $$lang.old.po | sed -e 's/^.*charset=\\(.*\\)\\\\n.*/\\1/'`" <  $$lang.utf.po > $$lang.po; \
+	      perl -e '$_=join("",<>);s/"\n"//g;print;' $$lang.utf.po | iconv -f utf-8 -t "`grep -a '^"Content-Type:' $$lang.old.po | sed -e 's/^.*charset=\\(.*\\)\\\\n.*/\\1/'`" > $$lang.po; \
 	      rm -f $$lang.old.po $$lang.old.utf.po $$lang.utf.po; \
 	    else \
 	      echo "msgmerge for $$cat failed!"; \

Reply to: