---
config-ullmann.yaml | 4 +
scripts/cron_ftpnew_blends.sh | 1 +
udd/bibref_gatherer.py | 112 +-------------------
udd/generate_bibtex.py | 230 ++++++++++++++++++++++++++++++++++++++++++
4 files changed, 237 insertions(+), 110 deletions(-)
create mode 100644 udd/generate_bibtex.py
diff --git a/config-ullmann.yaml b/config-ullmann.yaml
index 901550e..06c8a19 100644
--- a/config-ullmann.yaml
+++ b/config-ullmann.yaml
@@ -45,6 +45,7 @@ general:
i18n-apps: module udd.i18n_apps_gatherer
hints: module udd.hints_gatherer
deferred: module udd.deferred_gatherer
+ generate-bibtex: module udd.generate_bibtex
timestamp-dir: /srv/udd.debian.org/timestamps
lock-dir: /srv/udd.debian.org/locks
archs:
@@ -471,3 +472,6 @@ vcswatch:
reproducible:
type: reproducible
+
+generate-bibtex:
+ type: generate-bibtex
diff --git a/scripts/cron_ftpnew_blends.sh b/scripts/cron_ftpnew_blends.sh
index fc0d087..c38c076 100755
--- a/scripts/cron_ftpnew_blends.sh
+++ b/scripts/cron_ftpnew_blends.sh
@@ -11,3 +11,4 @@ $UAR ftpnew
$UAR blends-prospective
# $UAR blends-metadata
$UAR blends-all
+$UAR generate-bibtex
diff --git a/udd/bibref_gatherer.py b/udd/bibref_gatherer.py
index 654d7e7..41f9618 100644
--- a/udd/bibref_gatherer.py
+++ b/udd/bibref_gatherer.py
@@ -6,8 +6,7 @@ This script imports bibliographic references from upstream-metadata.debian.net.
from gatherer import gatherer
from sys import stderr, exit
-from os import listdir, unlink, rename, access, X_OK
-from os.path import isfile
+from os import listdir
from fnmatch import fnmatch
import yaml
from psycopg2 import IntegrityError, InternalError
@@ -23,43 +22,9 @@ debug=0
def get_gatherer(connection, config, source):
return bibref_gatherer(connection, config, source)
-def rm_f(file):
- try:
- unlink(file)
- except OSError:
- pass
-
-def cleanup_tex_logs(basetexfile):
- rm_f(basetexfile+'.aux')
- rm_f(basetexfile+'.bbl')
- rm_f(basetexfile+'.blg')
- rm_f(basetexfile+'.log')
-
# seek for authors separated by ',' rather than by ' and '
seek_broken_authors_re = re.compile('^[^\s^,]+\s+[^\s^,]+\s*,\s*[^\s^,]+\s+[^\s^,]')
-def open_tex_process(texexe, basetexfile):
- if texexe == 'pdflatex':
- ptex = Popen(['pdflatex', '-interaction=batchmode', basetexfile], shell=False, stdout=PIPE)
- elif texexe == 'bibtex':
- ptex = Popen(['bibtex', basetexfile], shell=False, stdout=PIPE)
- else:
- return(False, 'Wrong exe: '+texexe)
- errstring=""
- if ptex.wait():
- if texexe == 'pdflatex':
- for logrow in ptex.communicate()[0].splitlines():
- if logrow.startswith('!'):
- errstring += logrow
- return(False, errstring)
- else:
- for logrow in ptex.communicate()[0].splitlines():
- if logrow.startswith('This is BibTeX'):
- continue
- errstring += logrow + '\n'
- return(True, errstring)
- return(True, errstring)
-
other_known_keys = ('Archive',
'Bug-Database',
'Cite-As',
@@ -297,10 +262,6 @@ class bibref_gatherer(gatherer):
handler.setFormatter(formatter)
self.log.addHandler(handler)
-
- self.bibtexfile = 'debian.bib'
- self.bibtex_example_tex = 'debian.tex'
-
def run(self):
my_config = self.my_config
#start harassing the DB, preparing the final inserts and making place
@@ -364,76 +325,7 @@ class bibref_gatherer(gatherer):
# commit before check to make sure the table is not locked in case LaTeX run will fail for whatever reason
self.connection.commit()
- # if there is a working LaTeX installation try to build a BibTeX database and test it by creating a debian.pdf file
- if isfile('/usr/bin/pdflatex') and access('/usr/bin/pdflatex', X_OK) and \
- isfile('/usr/bin/bibtex') and access('/usr/bin/bibtex', X_OK) and \
- ( isfile('/usr/share/texlive/texmf-dist/fonts/source/jknappen/ec/ecrm.mf') or \
- isfile('/usr/share/texmf-texlive/fonts/source/jknappen/ec/ecrm.mf') ) :
- # create BibTeX file
- bf = open(self.bibtexfile, 'w')
- cur.execute("SELECT * FROM bibtex()")
- for row in cur.fetchall():
- print >>bf, row[0]
- bf.close()
-
- # create LaTeX file to test BibTeX functionality
- bf = open(self.bibtex_example_tex, 'w')
- print >>bf, """\documentclass[10]{article}
-\usepackage[T1]{fontenc}
-\usepackage[utf8]{inputenc}
-\usepackage[left=2mm,top=2mm,right=2mm,bottom=2mm,nohead,nofoot]{geometry}
-\usepackage{longtable}
-\usepackage[super]{natbib}
-\setlongtables
-\\begin{document}
-\small
-\\begin{longtable}{llp{70mm}l}
-\\bf package & \\bf source & \\bf description & BibTeX key \\\\ \hline"""
-
- cur.execute("SELECT * FROM bibtex_example_data() AS (package text, source text, bibkey text, description text)")
- for row in cur.fetchall():
- print >>bf, row[0], '&', row[1], '&', row[3] , '&', row[2]+'\cite{'+row[2]+'} \\\\'
-
- print >>bf, """\end{longtable}
-
-% \\bibliographystyle{plain}
-% Try a bit harder by also including URL+DOI
-\\bibliographystyle{plainnat}
-\\bibliography{debian}
-
-\end{document}
-"""
- bf.close()
-
- # try to build debian.pdf file to test aboc LaTeX file
- basetexfile = self.bibtex_example_tex.replace('.tex','')
- cleanup_tex_logs(basetexfile)
- try:
- rename(basetexfile+'.pdf', basetexfile+'.pdf~')
- except OSError:
- pass
-
- (retcode,errstring) = open_tex_process('pdflatex', basetexfile)
- if not retcode:
- self.log.error("Problem in 1. PdfLaTeX run of %s.tex: `%s` --> please inspect %s.log" % (basetexfile, errstring, basetexfile))
- exit(1)
- (retcode,errstring) = open_tex_process('bibtex', basetexfile)
- if errstring != "":
- if not retcode:
- self.log.error("Problem in BibTeX run of %s.bib: `%s`" % (basetexfile, errstring))
- exit(1)
- self.log.error("Ignore the following problems in BibTeX run of %s.bib: `%s`" % (basetexfile, errstring))
- (retcode,errstring) = open_tex_process('pdflatex', basetexfile)
- if not retcode:
- self.log.error("Problem in 2. PdfLaTeX run of %s.tex: `%s` --> please inspect %s.log" % (basetexfile, errstring, basetexfile))
- exit(1)
- (retcode,errstring) = open_tex_process('pdflatex', basetexfile)
- if not retcode:
- self.log.error("Problem in 3. PdfLaTeX run of %s.tex: `%s` --> please inspect %s.log" % (basetexfile, errstring, basetexfile))
- exit(1)
-
- cleanup_tex_logs(basetexfile)
-
+
if __name__ == '__main__':
main()
diff --git a/udd/generate_bibtex.py b/udd/generate_bibtex.py
new file mode 100644
index 0000000..6ddc03a
--- /dev/null
+++ b/udd/generate_bibtex.py
@@ -0,0 +1,230 @@
+from gatherer import gatherer
+from os import unlink, rename, access, X_OK
+from os.path import isfile
+from subprocess import Popen, PIPE
+import logging
+import logging.handlers
+
+debug = 0
+
+def get_gatherer(connection, config, source):
+ return generate_bibtex(connection, config, source)
+
+def rm_f(file):
+ try:
+ unlink(file)
+ except OSError:
+ pass
+
+
+def cleanup_tex_logs(basetexfile):
+ rm_f(basetexfile+'.aux')
+ rm_f(basetexfile+'.bbl')
+ rm_f(basetexfile+'.blg')
+ rm_f(basetexfile+'.log')
+
+
+def open_tex_process(texexe, basetexfile):
+ if texexe == 'pdflatex':
+ ptex = Popen(['pdflatex', '-interaction=batchmode', basetexfile], shell=False, stdout=PIPE)
+ elif texexe == 'bibtex':
+ ptex = Popen(['bibtex', basetexfile], shell=False, stdout=PIPE)
+ else:
+ return(False, 'Wrong exe: '+texexe)
+ errstring=""
+ if ptex.wait():
+ if texexe == 'pdflatex':
+ for logrow in ptex.communicate()[0].splitlines():
+ if logrow.startswith('!'):
+ errstring += logrow
+ return(False, errstring)
+ else:
+ for logrow in ptex.communicate()[0].splitlines():
+ if logrow.startswith('This is BibTeX'):
+ continue
+ errstring += logrow + '\n'
+ return(True, errstring)
+ return(True, errstring)
+
+
+class generate_bibtex(gatherer):
+ """
+ Generate a debian.bib and debian.tex files
+ """
+
+ def __init__(self, connection, config, source):
+ gatherer.__init__(self, connection, config, source)
+
+ self.log = logging.getLogger(self.__class__.__name__)
+ if debug==1:
+ self.log.setLevel(logging.DEBUG)
+ else:
+ self.log.setLevel(logging.INFO)
+ handler = logging.handlers.RotatingFileHandler(filename=self.__class__.__name__+'.log',mode='w')
+ formatter = logging.Formatter("%(asctime)s - %(levelname)s - (%(lineno)d): %(message)s")
+ handler.setFormatter(formatter)
+ self.log.addHandler(handler)
+
+ self.bibtexfile = 'debian.bib'
+ self.bibtex_example_tex = 'debian.tex'
+ self.all_ref = 0 # to include all references from bibref table set it to 1
+
+ def run(self):
+ cur = self.cursor()
+
+ # if there is a working LaTeX installation try to build a BibTeX database and test it by creating a debian.pdf file
+ if isfile('/usr/bin/pdflatex') and access('/usr/bin/pdflatex', X_OK) and \
+ isfile('/usr/bin/bibtex') and access('/usr/bin/bibtex', X_OK) and \
+ ( isfile('/usr/share/texlive/texmf-dist/fonts/source/jknappen/ec/ecrm.mf') or \
+ isfile('/usr/share/texmf-texlive/fonts/source/jknappen/ec/ecrm.mf') ) :
+
+ # create BibTeX file
+ bf = open(self.bibtexfile, 'w')
+
+ if self.all_ref == 1:
+ query = "SELECT * FROM bibtex()"
+ else:
+ query = """ SELECT DISTINCT
+ CASE WHEN bibjournal.value IS NULL AND bibin.value IS NOT NULL AND bibpublisher.value IS NOT NULL THEN '@Book{' || bibkey.value
+ ELSE CASE WHEN bibauthor.value IS NULL OR bibjournal.value IS NULL THEN '@Misc{'|| bibkey.value ||
+ CASE WHEN bibauthor.value IS NULL THEN E',\n Key = "' || bibkey.value || '"' ELSE '' END -- without author we need a sorting key
+ ELSE '@Article{' || bibkey.value END END ||
+ CASE WHEN bibauthor.value IS NOT NULL THEN E',\n Author = {' || bibauthor.value || '}' ELSE '' END ||
+ CASE WHEN bibtitle.value IS NOT NULL THEN E',\n Title = "{' ||
+ replace(replace(replace(bibtitle.value,
+ '_', E'\\_'), --
+ '%', E'\\%'), --
+ E'\xe2\x80\x89', E'\\,') -- TeX syntax for '_' and UTF-8 "thin space"
+ -- see http://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=128&utf8=string-literal
+ || '}"'
+ ELSE '' END ||
+ CASE WHEN bibbooktitle.value IS NOT NULL THEN E',\n Booktitle = "{' || bibbooktitle.value || '}"' ELSE '' END ||
+ CASE WHEN bibyear.value IS NOT NULL THEN E',\n Year = {' || bibyear.value || '}' ELSE '' END ||
+ CASE WHEN bibmonth.value IS NOT NULL THEN E',\n Month = {' || bibmonth.value || '}' ELSE '' END ||
+ CASE WHEN bibjournal.value IS NOT NULL THEN E',\n Journal = {' || replace(bibjournal.value, '&', E'\\&') || '}' ELSE '' END ||
+ CASE WHEN bibaddress.value IS NOT NULL THEN E',\n Address = {' || bibaddress.value || '}' ELSE '' END ||
+ CASE WHEN bibpublisher.value IS NOT NULL THEN E',\n Publisher = {' || bibpublisher.value || '}' ELSE '' END ||
+ CASE WHEN bibvolume.value IS NOT NULL THEN E',\n Volume = {' || bibvolume.value || '}' ELSE '' END ||
+ CASE WHEN bibnumber.value IS NOT NULL THEN E',\n Number = {' || bibnumber.value || '}' ELSE '' END ||
+ CASE WHEN bibpages.value IS NOT NULL THEN E',\n Pages = {' || regexp_replace(bibpages.value, E'(\\d)-([\\d])', E'\\1--\\2') || '}' ELSE '' END ||
+ CASE WHEN biburl.value IS NOT NULL THEN E',\n URL = {' ||
+ replace(replace(replace(replace(biburl.value,
+ '_', E'\\_'), --
+ '%', E'\\%'), --
+ '&', E'\\&'), --
+ '~', E'\\~{}') --
+ || '}'
+ ELSE '' END ||
+ CASE WHEN bibdoi.value IS NOT NULL THEN E',\n DOI = {' ||
+ replace(replace(bibdoi.value,
+ '_', E'\\_'), --
+ '&', E'\\&') --
+ || '}'
+ ELSE '' END ||
+ CASE WHEN bibpmid.value IS NOT NULL THEN E',\n PMID = {' || bibpmid.value || '}' ELSE '' END ||
+ CASE WHEN bibeprint.value IS NOT NULL THEN E',\n EPrint = {' ||
+ replace(replace(replace(replace(bibeprint.value,
+ '_', E'\\_'), --
+ '%', E'\\%'), --
+ '&', E'\\&'), --
+ '~', E'\\~{}') --
+ || '}'
+ ELSE '' END ||
+ CASE WHEN bibin.value IS NOT NULL THEN E',\n In = {' || bibin.value || '}' ELSE '' END ||
+ CASE WHEN bibissn.value IS NOT NULL THEN E',\n ISSN = {' || bibissn.value || '}' ELSE '' END ||
+ E',\n}\n'
+ AS bibentry
+ -- p.source AS source,
+ -- p.rank AS rank,
+ FROM (SELECT DISTINCT source, package, rank FROM bibref) p
+ INNER JOIN sources s ON s.source = p.source
+ LEFT OUTER JOIN bibref bibkey ON p.source = bibkey.source AND bibkey.rank = p.rank AND bibkey.package = p.package AND bibkey.key = 'bibtex'
+ LEFT OUTER JOIN bibref bibyear ON p.source = bibyear.source AND bibyear.rank = p.rank AND bibyear.package = p.package AND bibyear.key = 'year'
+ LEFT OUTER JOIN bibref bibmonth ON p.source = bibmonth.source AND bibmonth.rank = p.rank AND bibmonth.package = p.package AND bibmonth.key = 'month'
+ LEFT OUTER JOIN bibref bibtitle ON p.source = bibtitle.source AND bibtitle.rank = p.rank AND bibtitle.package = p.package AND bibtitle.key = 'title'
+ LEFT OUTER JOIN bibref bibbooktitle ON p.source = bibbooktitle.source AND bibbooktitle.rank = p.rank AND bibbooktitle.package = p.package AND bibbooktitle.key = 'booktitle'
+ LEFT OUTER JOIN bibref bibauthor ON p.source = bibauthor.source AND bibauthor.rank = p.rank AND bibauthor.package = p.package AND bibauthor.key = 'author'
+ LEFT OUTER JOIN bibref bibjournal ON p.source = bibjournal.source AND bibjournal.rank = p.rank AND bibjournal.package = p.package AND bibjournal.key = 'journal'
+ LEFT OUTER JOIN bibref bibaddress ON p.source = bibaddress.source AND bibaddress.rank = p.rank AND bibaddress.package = p.package AND bibaddress.key = 'address'
+ LEFT OUTER JOIN bibref bibpublisher ON p.source = bibpublisher.source AND bibpublisher.rank = p.rank AND bibpublisher.package = p.package AND bibpublisher.key = 'publisher'
+ LEFT OUTER JOIN bibref bibvolume ON p.source = bibvolume.source AND bibvolume.rank = p.rank AND bibvolume.package = p.package AND bibvolume.key = 'volume'
+ LEFT OUTER JOIN bibref bibdoi ON p.source = bibdoi.source AND bibdoi.rank = p.rank AND bibdoi.package = p.package AND bibdoi.key = 'doi'
+ LEFT OUTER JOIN bibref bibpmid ON p.source = bibpmid.source AND bibpmid.rank = p.rank AND bibpmid.package = p.package AND bibpmid.key = 'pmid'LEFT OUTER JOIN bibref biburl ON p.source = biburl.source AND biburl.rank = p.rank AND biburl.package = p.package AND biburl.key = 'url'
+ LEFT OUTER JOIN bibref bibnumber ON p.source = bibnumber.source AND bibnumber.rank = p.rank AND bibnumber.package = p.package AND bibnumber.key = 'number'
+ LEFT OUTER JOIN bibref bibpages ON p.source = bibpages.source AND bibpages.rank = p.rank AND bibpages.package = p.package AND bibpages.key = 'pages'
+ LEFT OUTER JOIN bibref bibeprint ON p.source = bibeprint.source AND bibeprint.rank = p.rank AND bibeprint.package = p.package AND bibeprint.key = 'eprint'
+ LEFT OUTER JOIN bibref bibin ON p.source = bibin.source AND bibin.rank = p.rank AND bibin.package = p.package AND bibin.key = 'in'
+ LEFT OUTER JOIN bibref bibissn ON p.source = bibissn.source AND bibissn.rank = p.rank AND bibissn.package = p.package AND bibissn.key = 'issn'
+ ORDER BY bibentry -- p.source
+ ;"""
+
+ cur.execute(query)
+ for row in cur.fetchall():
+ print >>bf, row[0]
+
+ bf.close()
+
+ # create LaTeX file to test BibTeX functionality
+ bf = open(self.bibtex_example_tex, 'w')
+ print >>bf, """\documentclass[10]{article}
+\usepackage[T1]{fontenc}
+\usepackage[utf8]{inputenc}
+\usepackage[left=2mm,top=2mm,right=2mm,bottom=2mm,nohead,nofoot]{geometry}
+\usepackage{longtable}
+\usepackage[super]{natbib}
+\setlongtables
+\\begin{document}
+\small
+\\begin{longtable}{llp{70mm}l}
+\\bf package & \\bf source & \\bf description & BibTeX key \\\\ \hline"""
+
+ cur.execute("SELECT * FROM bibtex_example_data() AS (package text, source text, bibkey text, description text)")
+ for row in cur.fetchall():
+ print >>bf, row[0], '&', row[1], '&', row[3] , '&', row[2]+'\cite{'+row[2]+'} \\\\'
+
+ print >>bf, """\end{longtable}
+
+% \\bibliographystyle{plain}
+% Try a bit harder by also including URL+DOI
+\\bibliographystyle{plainnat}
+\\bibliography{debian}
+
+\end{document}
+"""
+ bf.close()
+
+ # try to build debian.pdf file to test aboc LaTeX file
+ basetexfile = self.bibtex_example_tex.replace('.tex','')
+ cleanup_tex_logs(basetexfile)
+ try:
+ rename(basetexfile+'.pdf', basetexfile+'.pdf~')
+ except OSError:
+ pass
+
+ (retcode,errstring) = open_tex_process('pdflatex', basetexfile)
+ if not retcode:
+ self.log.error("Problem in 1. PdfLaTeX run of %s.tex: `%s` --> please inspect %s.log" % (basetexfile, errstring, basetexfile))
+ exit(1)
+
+ (retcode,errstring) = open_tex_process('bibtex', basetexfile)
+ if errstring != "":
+ if not retcode:
+ self.log.error("Problem in BibTeX run of %s.bib: `%s`" % (basetexfile, errstring))
+ exit(1)
+ self.log.error("Ignore the following problems in BibTeX run of %s.bib: `%s`" % (basetexfile, errstring))
+
+ (retcode,errstring) = open_tex_process('pdflatex', basetexfile)
+ if not retcode:
+ self.log.error("Problem in 2. PdfLaTeX run of %s.tex: `%s` --> please inspect %s.log" % (basetexfile, errstring, basetexfile))
+ exit(1)
+
+ (retcode,errstring) = open_tex_process('pdflatex', basetexfile)
+ if not retcode:
+ self.log.error("Problem in 3. PdfLaTeX run of %s.tex: `%s` --> please inspect %s.log" % (basetexfile, errstring, basetexfile))
+ exit(1)
+
+ cleanup_tex_logs(basetexfile)
+
+if __name__ == '__main__':
+ main()
+
--
1.9.1