using divided source to find from where a string came

To: debian-edu@lists.debian.org
Subject: using divided source to find from where a string came
From: victory <victory.deb@gmail.com>
Date: Thu, 25 Aug 2016 02:52:57 +0900
Message-id: <[🔎] 20160825025257.3db43bdcacc18dbcacb565df@gmail.com>
* keep each of pages in separate files from wiki
* translated files and all-in-one files are created on the fly
* keep id file which stores page name in the order to be included;
  this is needed to build actual docs and translated material
* english xml is $(name).en.xml instead of $(name).xml;
  this way lots of "ifndef LINGUA" will not be needed in Makefile.common
* LINGUA is used indirectly 
* resurrects reference lines (this is what I want);
  to prevent Mega-Bytes of diff, line # is fixed to 1 (po4a's behavior)

  strings in po will be like: 

  #. type: Content of: <article><section><section><title>
  #: AboutDebianEdu.en.dbk:1 AppendixA.en.dbk:1
  msgid "Manual for Debian Edu 9+edu0 Codename Stretch"
  msgstr ""


this patch also changes generating flow as such:
old-flow:
  update: wiki -> (en-each-sec.dbk ->) aio.xml -> pot -> po
  build-*, pdf, epub: po -> trans-aio.xml -> pdf, html, epub

new-flow:
  update: wiki -> en-each-sec.dbk -> pot -> po
  ep, ht, pd: aio.xml -> pdf, html, epub
  aio: loops $(name).$(LINGUA).xml
  %.$(LINGUA).dbk: po -> trans-each-sec.dbk
  $(name).$(LINGUA).xml: each-sec.dbk -> aio.xml 

usage:
  "make e" generates epub, "make h" generates html, "make p" generates pdf versions 

additionally, e.g. "TARGET=ja+en make ..." generates ja and en docs
(currently it generates only one lang when LINGUA=[a lang],
 or all langs when no LINGUA specified)

the patch does not use existing genaration targets, as install: uses them;
no test is done for it

-- 
victory
no need to CC me :-)
http://userscripts.org/scripts/show/102724 0.0.1.4
http://userscripts.org/scripts/show/163846 0.0.1
http://userscripts.org/scripts/show/163848 0.0.1
----
Index: ../common/Makefile.common
===================================================================
--- ../common/Makefile.common	(rev. 47)
+++ ../common/Makefile.common	(wc)
@@ -6,24 +6,41 @@
 # Use Make internal functions 'subst' and 'wildcard'; (from right to left):
 # get list of all PO files in dir, first substitute the extension with nothing,
 # then do so for the manual name to get the list of languages.
-LANGUAGES = $(subst $(name).,,$(subst .po,,$(wildcard *.po)))
+LANGUAGES = $(subst .po,,$(wildcard *.po))
 # Program name and option
 DBTOEPUB = dbtoepub
 XP = xsltproc --nonet --novalid --xinclude ../common/html.xsl
 DBLATEX = dblatex -T db2latex -b xetex -p ../common/dblatex.xsl
-SED_JA_REGEX = 's/dbtimestamp/dbtimestamp\ format=\"Y\ 年\ m\ 月\ d\ 日\"/'
+SED_JA_REGEX = 's/dbtimestamp?/dbtimestamp\ format=\"Y\ 年\ m\ 月\ d\ 日\"?/'
 
 # Use Make internal function 'subst': substitute -manual with nothing to get
 # the directory name.
 directory = $(subst -manual,,$(name))
 
+ifdef LINGUA
+TARGET_LANGS = $(LINGUA)
+else
+ifdef TARGET
+TARGET_LANGS = $(sort $(subst +, ,$(TARGET)))
+else
+TARGET_LANGS = $(sort en $(subst .po,,$(wildcard *.po)))
+endif
+endif
+
 all: build
 
 update:
 	../scripts/get_manual_version
 	../scripts/get_manual
-	po4a --no-translations --msgmerge-opt --no-location po4a.cfg
-	msgcat --no-location -o $(name).pot $(name).pot
+	echo '<?xml version="1.0" encoding="UTF-8"?>' > head.en.dbk
+	echo '<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN"' >> head.en.dbk
+	echo '  "http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd";>' >> head.en.dbk
+	echo '<article lang="en">' >> head.en.dbk
+	echo '<articleinfo>' >> head.en.dbk
+	echo '<title>$(DEBIAN_EDU_DOC_TITLE) <?dbtimestamp?></title>' >> head.en.dbk
+	echo '</articleinfo>' >> head.en.dbk
+	po4a --no-translations --porefs noline po4a.cfg
+	msgcat -o $(name).pot $(name).pot
 
 update-copyright:
 	../scripts/get_copyright $(path1) $(name)
@@ -111,6 +128,54 @@
 	$(DBTOEPUB) $(name).$(LINGUA).xml
 endif
 
+e: aio ep
+h: aio ht
+p: aio pd
+
+ep:
+	# build the EPUB version
+	for l in $(TARGET_LANGS) ; do \
+	  $(DBTOEPUB) $(name).$$l.xml ; \
+	done
+
+ht:
+	# build the HTML version
+	for l in $(TARGET_LANGS) ; do \
+	  $(XP) $(name).$$l.xml && mv index.html $(name).$$l.html ; \
+	  if [ "$$l" != "en" ] ; then \
+	    # restore internal links \
+	    sed -i "s/href=\"index.html/href=\"$(name).$$l.html/g" $(name).$$l.html ; \
+	  fi ; \
+	done
+
+pd:
+	for l in $(TARGET_LANGS) ; do \
+	  $(DBLATEX) $(name).$$l.xml --param=lingua=$$l ; \
+	done
+
+aio:
+	for l in $(TARGET_LANGS) ; do \
+	  LINGUA=$$l make $(name).$$l.xml ; \
+	done
+
+%.$(LINGUA).dbk: $(LINGUA).po
+	if [ "$(LINGUA)" != "en" ] ; then \
+	  po4a $(patsubst %,--translate-only %.$(LINGUA).dbk,head \
+	  $(shell grep . id)) po4a.cfg ;\
+	fi ; \
+	if [ "$(LINGUA)" = "ja" ] ; then \
+	  sed -i $(SED_JA_REGEX) head.ja.dbk ; \
+	fi
+
+$(name).$(LINGUA).xml: $(patsubst %,%.$(LINGUA).dbk,head $(shell grep . id))
+	cp head.$(LINGUA).dbk $(name).$(LINGUA).xml 
+	for i in `grep . id` ; do \
+	  cat $${i}.$(LINGUA).dbk | \
+	  perl -pe "s%</?article>%%g" >> $(name).$(LINGUA).xml ;\
+	done; \
+	echo "</article>" >> $(name).$(LINGUA).xml
+
+
 install: build
 	# en needs to be first
 	for f in en $(LANGUAGES) ; do \
@@ -165,3 +230,5 @@
 	rm -f *.epub
 	rm -f *.pdf
 	rm -f *.po~
+	rm -f *.[^e]*.dbk
+	rm -f po4a.cfg
Index: ../scripts/get_manual
===================================================================
--- ../scripts/get_manual	(rev. 47)
+++ ../scripts/get_manual	(wc)
@@ -32,7 +32,7 @@
 	exit 1
 fi
 TMPFILE=$(mktemp)
-xmlfile=$name.xml
+xmlfile=$name.en.xml
 
 # Make sure all section IDs are unique.  If file--subsection is not
 # unique, use file--section--subsection--subsubsection instead.
@@ -80,7 +80,7 @@
 	NAME=`echo "${i}" |sed "s/\(.*\)\/\(.*\)/\2/" `
 	# The ø -> oe conversion is a workaround for bug #657511.
  	ASCIINAME=$(echo $NAME  | tr "ø" "oe" | iconv -t ASCII//TRANSLIT)
-	TARGET=${NAME}.xml
+	TARGET=`echo "${i}.en.dbk" |sed "s/.*\///" `
 	echo "$TARGET		${url}${i}?action=show&mimetype=text/docbook"
 	# download the docbook version of the manual from the wiki and pipe it through sed to
 	#   - insert the build date
@@ -94,8 +94,6 @@
 	# replace tags:
 	sed "s%code>%computeroutput>%g" |
 	sed "s%/htdocs/rightsidebar/img/%./images/%g" |
-	# remove initial and final tags:
-	perl -pe "s%</?article>%%g" |
 	# remove tags and enclosed content:
 	sed "s#<articleinfo>\(.*\)</articleinfo>##g" |
 	# Comment useless remarks from XML: they just show an ugly drawing in XML
@@ -104,8 +102,8 @@
 	sed "s%<ulink url=\"https://wiki.debian.org/${path1}${i}/%<ulink url=\"https://wiki.debian.org/%g"; |
 	# Make wiki self links actually local
 	sed "s%<link linkend=\"%<link linkend=\"${ASCIINAME}--%g" |
-	perl -pe "s%<ulink url=\"https://wiki.debian.org/${path1}/(HowTo/)?(\w+)#\">(.*?)</ulink>%<link linkend='\2'>\3</link>%g" |
-	perl -pe "s%<ulink url=\"https://wiki.debian.org/${path1}/(HowTo/)?(\w+)#(.*?)\">(.*?)</ulink>%<link linkend='\2--\3'>\4</link>%g" |
+	perl -pe "s%<ulink url=\"https://wiki.debian.org/${path1}/(HowTo/)?(\w+)#\">(.*?)</ulink>%<link linkend=\"\2\">\3</link>%g" |
+	perl -pe "s%<ulink url=\"https://wiki.debian.org/${path1}/(HowTo/)?(\w+)#(.*?)\">(.*?)</ulink>%<link linkend=\"\2--\3\">\4</link>%g" |
 	perl -000 -pe "s%<para><ulink url=\"https://wiki.debian.org/CategoryPermalink#\";>CategoryPermalink</ulink>\s*</para>%%" |
 	unique_section_ids |
 	# introduce line breaks:
@@ -118,6 +116,8 @@
 	sed "s%FIXME%\nFIXME%g" |
 	sed "s%<itemizedlist>%\n<itemizedlist>%" |
 	sed "s%<listitem>%\n<listitem>%" |
+	sed 's%&quot;%"%g' |
+	sed 's%\]\]><\!\[CDATA\[%%g' |
 	# cut off first lines:
 	sed '1,4d' > $TARGET
 	if [ "$(grep -v FIXMEs $TARGET | grep FIXME | grep -v 'FIXME&gt;' | grep -v 'status ignore')" != "" ] ; then
@@ -133,35 +133,26 @@
 
 # add id= to <section>s and a linebreak at the end
 for i in `cat id` ; do
-	sed -i "0,/<section id=\".*\">/ s/<section id=\".*\">/<section id=\"$i\">/" ${i}.xml 
-	sed -i "$ s#>#>\n#" ${i}.xml
+	sed -i "0,/<section id=\".*\">/ s/<section id=\".*\">/<section id=\"$i\">/" ${i}.en.dbk 
+	sed -i "$ s#>#>\n#" ${i}.en.dbk
 done
 
 # paste it together
 rm -f $xmlfile
 for i in `cat id` ; do
-	cat ${i}.xml >> $xmlfile
-	rm ${i}.xml
-done
-rm id
+# get images and modify ${i}.dbk
+	echo "calling ../scripts/get_images ${i}.en.dbk $path1"
+	../scripts/get_images ${i}.en.dbk $path1
 
-# get images and modify $xmlfile
-echo "calling ../scripts/get_images $xmlfile $path1"
-../scripts/get_images $xmlfile $path1
-
 # turn links into internal references if appropriate
 # this needs to run after ./get_images
 #
 #  -0\777  read multiple lines
-perl -0\777 -pi -e "s/<ulink url=\"$path2(.*)\/(.*)\">(.*)\n<\/ulink>/<link linkend=\"\2\">\3<\/link>/g" $xmlfile
+	perl -0\777 -pi -e "s/<ulink url=\"$path2(.*)\/(.*)\">(.*)\n<\/ulink>/<link linkend=\"\2\">\3<\/link>/g" ${i}.en.dbk
+done
 
-# make it a docbook article again
-sed -i "1,/</ s#<#<?xml version=\"1.0\" encoding=\"UTF-8\"?><!DOCTYPE article PUBLIC \"-//OASIS//DTD DocBook XML V4.4//EN\" \"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd\";><article lang=\"en\"><articleinfo><title>$DEBIAN_EDU_DOC_TITLE <?dbtimestamp?></title></articleinfo>\n<#" $xmlfile
-sed -i "$ s#>#>\n</article>#" $xmlfile
+LINGUA=en make $xmlfile
 
-# remove the first empty lines
-sed -i "1,2d" $xmlfile
-
 # clean it further
 TMPFILE2=$(mktemp)
 xmllint $xmlfile > $TMPFILE2
@@ -174,3 +165,16 @@
 	echo "====================" >> $TMPFILE
 fi
 mv $TMPFILE fixme-status.txt
+
+# create po4a.cfg
+echo [po_directory] . > po4a.cfg
+echo [po4a_alias:edu] docbook opt:\" \\ >> po4a.cfg
+echo "-o nodefault='<inlinemediaobject> <imagedata>' \\" >> po4a.cfg
+echo "-o untranslated='<listitem> <inlinemediaobject> <imagedata>' \\" >> po4a.cfg
+echo -M UTF-8 -k 5\" >> po4a.cfg
+echo [type: edu] head.en.dbk \$lang:head.\$lang.dbk >> po4a.cfg
+
+for i in `cat id` ; do
+echo [type: edu] ${i}.en.dbk \$lang:${i}.\$lang.dbk >> po4a.cfg
+done
+
Reply to:
Follow-Ups:
- Re: using divided source to find from where a string came
  - From: Holger Levsen <holger@layer-acht.org>
Prev by Date: Bug#834719: debian-edu: the provided debian-edu-tasks.desc file is unusable to install the selected profile(s)
Next by Date: Any advocacy material maybe?
Previous by thread: Re: Upgrading to Wheezy issues
Next by thread: Re: using divided source to find from where a string came
Index(es):
- Date
- Thread