[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

[PATCH] Make use of a separate script to generate debian.{bib,tex} files



---
 config-ullmann.yaml           |   4 +
 scripts/cron_ftpnew_blends.sh |   1 +
 udd/bibref_gatherer.py        | 112 +-------------------
 udd/generate_bibtex.py        | 230 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 237 insertions(+), 110 deletions(-)
 create mode 100644 udd/generate_bibtex.py

diff --git a/config-ullmann.yaml b/config-ullmann.yaml
index 901550e..06c8a19 100644
--- a/config-ullmann.yaml
+++ b/config-ullmann.yaml
@@ -45,6 +45,7 @@ general:
     i18n-apps: module udd.i18n_apps_gatherer
     hints: module udd.hints_gatherer
     deferred: module udd.deferred_gatherer
+    generate-bibtex: module udd.generate_bibtex
   timestamp-dir: /srv/udd.debian.org/timestamps
   lock-dir: /srv/udd.debian.org/locks
   archs:
@@ -471,3 +472,6 @@ vcswatch:
 
 reproducible:
   type: reproducible
+
+generate-bibtex:
+  type: generate-bibtex
diff --git a/scripts/cron_ftpnew_blends.sh b/scripts/cron_ftpnew_blends.sh
index fc0d087..c38c076 100755
--- a/scripts/cron_ftpnew_blends.sh
+++ b/scripts/cron_ftpnew_blends.sh
@@ -11,3 +11,4 @@ $UAR ftpnew
 $UAR blends-prospective
 # $UAR blends-metadata
 $UAR blends-all
+$UAR generate-bibtex
diff --git a/udd/bibref_gatherer.py b/udd/bibref_gatherer.py
index 654d7e7..41f9618 100644
--- a/udd/bibref_gatherer.py
+++ b/udd/bibref_gatherer.py
@@ -6,8 +6,7 @@ This script imports bibliographic references from upstream-metadata.debian.net.
 
 from gatherer import gatherer
 from sys import stderr, exit
-from os import listdir, unlink, rename, access, X_OK
-from os.path import isfile
+from os import listdir
 from fnmatch import fnmatch
 import yaml
 from psycopg2 import IntegrityError, InternalError
@@ -23,43 +22,9 @@ debug=0
 def get_gatherer(connection, config, source):
   return bibref_gatherer(connection, config, source)
 
-def rm_f(file):
-  try:
-    unlink(file)
-  except OSError:
-    pass
-
-def cleanup_tex_logs(basetexfile):
-  rm_f(basetexfile+'.aux')
-  rm_f(basetexfile+'.bbl')
-  rm_f(basetexfile+'.blg')
-  rm_f(basetexfile+'.log')
-
 # seek for authors separated by ',' rather than by ' and '
 seek_broken_authors_re = re.compile('^[^\s^,]+\s+[^\s^,]+\s*,\s*[^\s^,]+\s+[^\s^,]')
 
-def open_tex_process(texexe, basetexfile):
-  if texexe == 'pdflatex':
-    ptex = Popen(['pdflatex', '-interaction=batchmode', basetexfile], shell=False, stdout=PIPE)
-  elif texexe == 'bibtex':
-    ptex = Popen(['bibtex', basetexfile], shell=False, stdout=PIPE)
-  else:
-    return(False, 'Wrong exe: '+texexe)
-  errstring=""
-  if ptex.wait():
-    if texexe == 'pdflatex':
-      for logrow in ptex.communicate()[0].splitlines():
-        if logrow.startswith('!'):
-          errstring += logrow
-      return(False, errstring)
-    else:
-      for logrow in ptex.communicate()[0].splitlines():
-        if logrow.startswith('This is BibTeX'):
-          continue
-        errstring += logrow + '\n'
-      return(True, errstring)
-  return(True, errstring)
-
 other_known_keys = ('Archive',
                     'Bug-Database',
                     'Cite-As',
@@ -297,10 +262,6 @@ class bibref_gatherer(gatherer):
     handler.setFormatter(formatter)
     self.log.addHandler(handler)
 
-
-    self.bibtexfile = 'debian.bib'
-    self.bibtex_example_tex = 'debian.tex'
-
   def run(self):
     my_config = self.my_config
     #start harassing the DB, preparing the final inserts and making place
@@ -364,76 +325,7 @@ class bibref_gatherer(gatherer):
     # commit before check to make sure the table is not locked in case LaTeX run will fail for whatever reason
     self.connection.commit()
 
-    # if there is a working LaTeX installation try to build a BibTeX database and test it by creating a debian.pdf file
-    if isfile('/usr/bin/pdflatex') and access('/usr/bin/pdflatex', X_OK) and \
-       isfile('/usr/bin/bibtex')   and access('/usr/bin/bibtex', X_OK) and \
-       ( isfile('/usr/share/texlive/texmf-dist/fonts/source/jknappen/ec/ecrm.mf') or \
-         isfile('/usr/share/texmf-texlive/fonts/source/jknappen/ec/ecrm.mf') ) :
-      # create BibTeX file
-      bf = open(self.bibtexfile, 'w')
-      cur.execute("SELECT * FROM bibtex()")
-      for row in cur.fetchall():
-	print >>bf, row[0]
-      bf.close()
-
-      # create LaTeX file to test BibTeX functionality
-      bf = open(self.bibtex_example_tex, 'w')
-      print >>bf, """\documentclass[10]{article}
-\usepackage[T1]{fontenc}
-\usepackage[utf8]{inputenc}
-\usepackage[left=2mm,top=2mm,right=2mm,bottom=2mm,nohead,nofoot]{geometry}
-\usepackage{longtable}
-\usepackage[super]{natbib}
-\setlongtables
-\\begin{document}
-\small
-\\begin{longtable}{llp{70mm}l}
-\\bf package & \\bf source & \\bf description & BibTeX key \\\\ \hline"""
-
-      cur.execute("SELECT * FROM bibtex_example_data() AS (package text, source text, bibkey text, description text)")
-      for row in cur.fetchall():
-	print >>bf, row[0], '&', row[1], '&', row[3] , '&', row[2]+'\cite{'+row[2]+'} \\\\'
-
-      print >>bf, """\end{longtable}
-
-% \\bibliographystyle{plain}
-% Try a bit harder by also including URL+DOI
-\\bibliographystyle{plainnat}
-\\bibliography{debian}
-
-\end{document}
-"""
-      bf.close()
-
-      # try to build debian.pdf file to test aboc LaTeX file
-      basetexfile = self.bibtex_example_tex.replace('.tex','')
-      cleanup_tex_logs(basetexfile)
-      try:
-        rename(basetexfile+'.pdf', basetexfile+'.pdf~')
-      except OSError:
-        pass
-
-      (retcode,errstring) = open_tex_process('pdflatex', basetexfile)
-      if not retcode:
-        self.log.error("Problem in 1. PdfLaTeX run of %s.tex: `%s` --> please inspect %s.log" % (basetexfile, errstring, basetexfile))
-        exit(1)
-      (retcode,errstring) = open_tex_process('bibtex', basetexfile)
-      if errstring != "":
-        if not retcode:
-          self.log.error("Problem in BibTeX run of %s.bib: `%s`" % (basetexfile, errstring))
-          exit(1)
-        self.log.error("Ignore the following problems in BibTeX run of %s.bib: `%s`" % (basetexfile, errstring))
-      (retcode,errstring) = open_tex_process('pdflatex', basetexfile)
-      if not retcode:
-        self.log.error("Problem in 2. PdfLaTeX run of %s.tex: `%s` --> please inspect %s.log" % (basetexfile, errstring, basetexfile))
-        exit(1)
-      (retcode,errstring) = open_tex_process('pdflatex', basetexfile)
-      if not retcode:
-        self.log.error("Problem in 3. PdfLaTeX run of %s.tex: `%s` --> please inspect %s.log" % (basetexfile, errstring, basetexfile))
-        exit(1)
-
-      cleanup_tex_logs(basetexfile)
-
+    
 if __name__ == '__main__':
   main()
 
diff --git a/udd/generate_bibtex.py b/udd/generate_bibtex.py
new file mode 100644
index 0000000..6ddc03a
--- /dev/null
+++ b/udd/generate_bibtex.py
@@ -0,0 +1,230 @@
+from gatherer import  gatherer
+from os import unlink, rename, access, X_OK
+from os.path import isfile
+from subprocess import Popen, PIPE
+import logging
+import logging.handlers
+
+debug = 0
+
+def get_gatherer(connection, config, source):
+  return generate_bibtex(connection, config, source)
+
+def rm_f(file):
+  try:
+    unlink(file)
+  except OSError:
+    pass
+
+
+def cleanup_tex_logs(basetexfile):
+  rm_f(basetexfile+'.aux')
+  rm_f(basetexfile+'.bbl')
+  rm_f(basetexfile+'.blg')
+  rm_f(basetexfile+'.log')
+
+
+def open_tex_process(texexe, basetexfile):
+  if texexe == 'pdflatex':
+    ptex = Popen(['pdflatex', '-interaction=batchmode', basetexfile], shell=False, stdout=PIPE)
+  elif texexe == 'bibtex':
+    ptex = Popen(['bibtex', basetexfile], shell=False, stdout=PIPE)
+  else:
+    return(False, 'Wrong exe: '+texexe)
+  errstring=""
+  if ptex.wait():
+    if texexe == 'pdflatex':
+      for logrow in ptex.communicate()[0].splitlines():
+        if logrow.startswith('!'):
+          errstring += logrow
+      return(False, errstring)
+    else:
+      for logrow in ptex.communicate()[0].splitlines():
+        if logrow.startswith('This is BibTeX'):
+          continue
+        errstring += logrow + '\n'
+      return(True, errstring)
+  return(True, errstring)
+
+
+class generate_bibtex(gatherer):
+  """
+  Generate a debian.bib and debian.tex files
+  """
+
+  def __init__(self, connection, config, source):
+    gatherer.__init__(self, connection, config, source)
+
+    self.log = logging.getLogger(self.__class__.__name__)
+    if debug==1:
+        self.log.setLevel(logging.DEBUG)
+    else:
+        self.log.setLevel(logging.INFO)
+    handler = logging.handlers.RotatingFileHandler(filename=self.__class__.__name__+'.log',mode='w')
+    formatter = logging.Formatter("%(asctime)s - %(levelname)s - (%(lineno)d): %(message)s")
+    handler.setFormatter(formatter)
+    self.log.addHandler(handler)
+
+    self.bibtexfile = 'debian.bib'
+    self.bibtex_example_tex = 'debian.tex'
+    self.all_ref = 0	# to include all references from bibref table set it to 1
+
+  def run(self):
+    cur = self.cursor()
+    
+    # if there is a working LaTeX installation try to build a BibTeX database and test it by creating a debian.pdf file
+    if isfile('/usr/bin/pdflatex') and access('/usr/bin/pdflatex', X_OK) and \
+       isfile('/usr/bin/bibtex')   and access('/usr/bin/bibtex', X_OK) and \
+       ( isfile('/usr/share/texlive/texmf-dist/fonts/source/jknappen/ec/ecrm.mf') or \
+         isfile('/usr/share/texmf-texlive/fonts/source/jknappen/ec/ecrm.mf') ) :
+      
+      # create BibTeX file
+      bf = open(self.bibtexfile, 'w')
+
+      if self.all_ref == 1:
+        query = "SELECT * FROM bibtex()"
+      else:
+      	query = """ SELECT DISTINCT 
+                        CASE WHEN bibjournal.value IS NULL AND bibin.value IS NOT NULL AND bibpublisher.value IS NOT NULL THEN '@Book{' || bibkey.value
+                            ELSE CASE WHEN bibauthor.value IS NULL OR bibjournal.value IS NULL THEN '@Misc{'|| bibkey.value ||
+                                 CASE WHEN bibauthor.value IS NULL THEN E',\n  Key     = "' || bibkey.value || '"' ELSE '' END -- without author we need a sorting key
+                            ELSE '@Article{' || bibkey.value END END  ||
+                        CASE WHEN bibauthor.value  IS NOT NULL THEN E',\n  Author  = {' || bibauthor.value  || '}' ELSE '' END ||
+                        CASE WHEN bibtitle.value   IS NOT NULL THEN E',\n  Title   = "{' || 
+                          replace(replace(replace(bibtitle.value,
+                                          '_', E'\\_'),            --
+                                          '%', E'\\%'),            --
+                                          E'\xe2\x80\x89', E'\\,') -- TeX syntax for '_' and UTF-8 "thin space"
+                                          -- see http://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=128&utf8=string-literal
+                                  || '}"'
+                        ELSE '' END ||
+                        CASE WHEN bibbooktitle.value IS NOT NULL THEN E',\n  Booktitle = "{' || bibbooktitle.value || '}"' ELSE '' END ||
+                        CASE WHEN bibyear.value    IS NOT NULL THEN E',\n  Year    = {' || bibyear.value    || '}' ELSE '' END ||
+                        CASE WHEN bibmonth.value   IS NOT NULL THEN E',\n  Month   = {' || bibmonth.value   || '}' ELSE '' END ||
+                        CASE WHEN bibjournal.value IS NOT NULL THEN E',\n  Journal = {' || replace(bibjournal.value, '&', E'\\&') || '}' ELSE '' END ||
+                        CASE WHEN bibaddress.value IS NOT NULL THEN E',\n  Address = {' || bibaddress.value || '}' ELSE '' END ||
+                        CASE WHEN bibpublisher.value IS NOT NULL THEN E',\n  Publisher = {' || bibpublisher.value || '}' ELSE '' END ||
+                        CASE WHEN bibvolume.value  IS NOT NULL THEN E',\n  Volume  = {' || bibvolume.value  || '}' ELSE '' END ||
+                        CASE WHEN bibnumber.value  IS NOT NULL THEN E',\n  Number  = {' || bibnumber.value  || '}' ELSE '' END ||
+                        CASE WHEN bibpages.value   IS NOT NULL THEN E',\n  Pages   = {' || regexp_replace(bibpages.value, E'(\\d)-([\\d])', E'\\1--\\2')   || '}' ELSE '' END ||
+                        CASE WHEN biburl.value     IS NOT NULL THEN E',\n  URL     = {' ||
+                          replace(replace(replace(replace(biburl.value,
+                                          '_', E'\\_'),           --
+                                          '%', E'\\%'),           --
+                                          '&', E'\\&'),           --
+                                          '~', E'\\~{}')          --
+                                  || '}'
+                        ELSE '' END ||
+                        CASE WHEN bibdoi.value     IS NOT NULL THEN E',\n  DOI     = {' ||
+                          replace(replace(bibdoi.value,
+                                          '_', E'\\_'),           --
+                                          '&', E'\\&')            --
+                                  || '}'
+                        ELSE '' END ||
+                        CASE WHEN bibpmid.value    IS NOT NULL THEN E',\n  PMID    = {' || bibpmid.value    || '}' ELSE '' END ||
+                        CASE WHEN bibeprint.value  IS NOT NULL THEN E',\n  EPrint  = {' ||
+                          replace(replace(replace(replace(bibeprint.value,
+                                         '_', E'\\_'),           --
+                                         '%', E'\\%'),           --
+                                         '&', E'\\&'),           --
+                                         '~', E'\\~{}')          --
+                                  || '}'
+                        ELSE '' END ||
+                        CASE WHEN bibin.value      IS NOT NULL THEN E',\n  In      = {' || bibin.value      || '}' ELSE '' END ||
+                        CASE WHEN bibissn.value    IS NOT NULL THEN E',\n  ISSN    = {' || bibissn.value    || '}' ELSE '' END ||
+                        E',\n}\n'
+                        AS bibentry
+                        --         p.source         AS source,
+                        --         p.rank           AS rank,
+                FROM (SELECT DISTINCT source, package, rank FROM bibref) p
+                INNER JOIN sources s ON s.source = p.source
+                LEFT OUTER JOIN bibref bibkey     ON p.source = bibkey.source     AND bibkey.rank     = p.rank AND bibkey.package     = p.package AND bibkey.key     = 'bibtex'
+                LEFT OUTER JOIN bibref bibyear    ON p.source = bibyear.source    AND bibyear.rank    = p.rank AND bibyear.package    = p.package AND bibyear.key    = 'year'  
+                LEFT OUTER JOIN bibref bibmonth   ON p.source = bibmonth.source   AND bibmonth.rank   = p.rank AND bibmonth.package   = p.package AND bibmonth.key   = 'month'  
+                LEFT OUTER JOIN bibref bibtitle   ON p.source = bibtitle.source   AND bibtitle.rank   = p.rank AND bibtitle.package   = p.package AND bibtitle.key   = 'title'  
+                LEFT OUTER JOIN bibref bibbooktitle ON p.source = bibbooktitle.source AND bibbooktitle.rank = p.rank AND bibbooktitle.package = p.package AND bibbooktitle.key = 'booktitle'  
+                LEFT OUTER JOIN bibref bibauthor  ON p.source = bibauthor.source  AND bibauthor.rank  = p.rank AND bibauthor.package  = p.package AND bibauthor.key  = 'author'
+                LEFT OUTER JOIN bibref bibjournal ON p.source = bibjournal.source AND bibjournal.rank = p.rank AND bibjournal.package = p.package AND bibjournal.key = 'journal'
+                LEFT OUTER JOIN bibref bibaddress ON p.source = bibaddress.source AND bibaddress.rank = p.rank AND bibaddress.package = p.package AND bibaddress.key = 'address'
+                LEFT OUTER JOIN bibref bibpublisher ON p.source = bibpublisher.source AND bibpublisher.rank = p.rank AND bibpublisher.package = p.package AND bibpublisher.key = 'publisher'
+                LEFT OUTER JOIN bibref bibvolume  ON p.source = bibvolume.source  AND bibvolume.rank  = p.rank AND bibvolume.package  = p.package AND bibvolume.key  = 'volume'
+                LEFT OUTER JOIN bibref bibdoi     ON p.source = bibdoi.source     AND bibdoi.rank     = p.rank AND bibdoi.package     = p.package AND bibdoi.key     = 'doi'
+                LEFT OUTER JOIN bibref bibpmid    ON p.source = bibpmid.source    AND bibpmid.rank    = p.rank AND bibpmid.package    = p.package AND bibpmid.key    = 'pmid'LEFT OUTER JOIN bibref biburl     ON p.source = biburl.source     AND biburl.rank     = p.rank AND biburl.package     = p.package AND biburl.key     = 'url'
+                LEFT OUTER JOIN bibref bibnumber  ON p.source = bibnumber.source  AND bibnumber.rank  = p.rank AND bibnumber.package  = p.package AND bibnumber.key  = 'number'
+                LEFT OUTER JOIN bibref bibpages   ON p.source = bibpages.source   AND bibpages.rank   = p.rank AND bibpages.package   = p.package AND bibpages.key   = 'pages'
+                LEFT OUTER JOIN bibref bibeprint  ON p.source = bibeprint.source  AND bibeprint.rank  = p.rank AND bibeprint.package  = p.package AND bibeprint.key  = 'eprint'
+                LEFT OUTER JOIN bibref bibin      ON p.source = bibin.source      AND bibin.rank      = p.rank AND bibin.package      = p.package AND bibin.key      = 'in'
+                LEFT OUTER JOIN bibref bibissn    ON p.source = bibissn.source    AND bibissn.rank    = p.rank AND bibissn.package    = p.package AND bibissn.key    = 'issn'
+                ORDER BY bibentry -- p.source
+                ;"""
+      
+      cur.execute(query)  
+      for row in cur.fetchall():
+          print >>bf, row[0]
+
+      bf.close()
+
+      # create LaTeX file to test BibTeX functionality
+      bf = open(self.bibtex_example_tex, 'w')
+      print >>bf, """\documentclass[10]{article}
+\usepackage[T1]{fontenc}
+\usepackage[utf8]{inputenc}
+\usepackage[left=2mm,top=2mm,right=2mm,bottom=2mm,nohead,nofoot]{geometry}
+\usepackage{longtable}
+\usepackage[super]{natbib}
+\setlongtables
+\\begin{document}
+\small
+\\begin{longtable}{llp{70mm}l}
+\\bf package & \\bf source & \\bf description & BibTeX key \\\\ \hline"""
+
+      cur.execute("SELECT * FROM bibtex_example_data() AS (package text, source text, bibkey text, description text)")
+      for row in cur.fetchall():
+  	print >>bf, row[0], '&', row[1], '&', row[3] , '&', row[2]+'\cite{'+row[2]+'} \\\\'
+	
+      print >>bf, """\end{longtable}
+
+% \\bibliographystyle{plain}
+% Try a bit harder by also including URL+DOI
+\\bibliographystyle{plainnat}
+\\bibliography{debian}
+
+\end{document}
+"""
+      bf.close()
+
+      # try to build debian.pdf file to test aboc LaTeX file
+      basetexfile = self.bibtex_example_tex.replace('.tex','')
+      cleanup_tex_logs(basetexfile)
+      try:
+        rename(basetexfile+'.pdf', basetexfile+'.pdf~')
+      except OSError:
+        pass
+
+      (retcode,errstring) = open_tex_process('pdflatex', basetexfile)
+      if not retcode:
+        self.log.error("Problem in 1. PdfLaTeX run of %s.tex: `%s` --> please inspect %s.log" % (basetexfile, errstring, basetexfile))
+        exit(1)
+      
+      (retcode,errstring) = open_tex_process('bibtex', basetexfile)
+      if errstring != "":
+        if not retcode:
+          self.log.error("Problem in BibTeX run of %s.bib: `%s`" % (basetexfile, errstring))
+          exit(1)
+        self.log.error("Ignore the following problems in BibTeX run of %s.bib: `%s`" % (basetexfile, errstring))
+      
+      (retcode,errstring) = open_tex_process('pdflatex', basetexfile)
+      if not retcode:
+        self.log.error("Problem in 2. PdfLaTeX run of %s.tex: `%s` --> please inspect %s.log" % (basetexfile, errstring, basetexfile))
+        exit(1)
+      
+      (retcode,errstring) = open_tex_process('pdflatex', basetexfile)
+      if not retcode:
+        self.log.error("Problem in 3. PdfLaTeX run of %s.tex: `%s` --> please inspect %s.log" % (basetexfile, errstring, basetexfile))
+        exit(1)
+
+      cleanup_tex_logs(basetexfile)
+
+if __name__ == '__main__':
+  main()
+
-- 
1.9.1


Reply to: