Bug#1009196: texlive-binaries: Reproducible content of .fmt files
Package: texlive-binaries
Version: 2021.20210626.59705-1
Severity: wishlist
Tags: patch
User: reproducible-builds@lists.alioth.debian.org
Usertags: randomness
X-Debbugs-Cc: reproducible-bugs@lists.alioth.debian.org
Hello maintainers of texlive-binaries,
While working on the “reproducible builds” effort [1], I have noticed that the
live image for Cinnamon in bookworm is no longer reproducible [2].
The attached patch ensures that the output of the function 'exception_strings'
always uses the same order of the hyphenation exceptions.
I've written the solution in C, perhaps someone more versed in lua could
rewrite it more elegantly.
(The lua manual says for the 'next' function: 'The order in which the indices
are enumerated is not specified' [3])
With the attached patch applied, I'm able (with the help of FORCE_SOURCE_DATE=1
and SOURCE_DATE_EPOCH) to reproducibly rebuild the .fmt files, as created by
'fmtutil --sys --all'.
Small test case to reproduce:
export FORCE_SOURCE_DATE=1
export SOURCE_DATE_EPOCH=$(date +%s)
for i in `seq 1 10`; do luahbtex -ini -jobname=luahbtex -progname=luabhtex
luatex.ini > /dev/null; md5sum luahbtex.*; done
With kind regards,
Roland Clobus
[1]: https://wiki.debian.org/ReproducibleBuilds
[2]:
https://jenkins.debian.net/view/live/job/reproducible_debian_live_build_cinnamon_bookworm/
[3]: http://www.lua.org/manual/5.4/manual.html#pdf-next
-- System Information:
Debian Release: bookworm/sid
APT prefers testing
APT policy: (990, 'testing'), (500, 'testing-debug'), (50, 'unstable')
Architecture: amd64 (x86_64)
Foreign Architectures: i386
Kernel: Linux 5.16.0-5-amd64 (SMP w/8 CPU threads; PREEMPT)
Locale: LANG=en_GB.UTF-8, LC_CTYPE=en_GB.UTF-8 (charmap=UTF-8), LANGUAGE=en_GB:en
Shell: /bin/sh linked to /bin/dash
Init: systemd (via /run/systemd/system)
LSM: AppArmor: enabled
Versions of packages texlive-binaries depends on:
ii dpkg 1.21.7
ii install-info 6.8-4+b1
ii libc6 2.33-7
ii libcairo2 1.16.0-5
ii libfontconfig1 2.13.1-4.4
ii libfreetype6 2.11.1+dfsg-1
ii libgcc-s1 12-20220319-1
ii libgraphite2-3 1.3.14-1
ii libharfbuzz0b 2.7.4-1
ii libicu67 67.1-7
ii libkpathsea6 2021.20210626.59705-1
ii libmpfr6 4.1.0-3
ii libpaper1 1.1.28+b1
ii libpixman-1-0 0.40.0-1
ii libpng16-16 1.6.37-3
ii libptexenc1 2021.20210626.59705-1
ii libstdc++6 12-20220319-1
ii libsynctex2 2021.20210626.59705-1
ii libteckit0 2.5.11+ds1-1
ii libtexlua53 2021.20210626.59705-1
ii libtexluajit2 2021.20210626.59705-1
ii libx11-6 2:1.7.2-2+b1
ii libxaw7 2:1.0.14-1
ii libxi6 2:1.8-1
ii libxmu6 2:1.1.3-3
ii libxpm4 1:3.5.12-1
ii libxt6 1:1.2.1-1
ii libzzip-0-13 0.13.72+dfsg.1-1.1
ii perl 5.34.0-3
ii t1utils 1.41-4
ii tex-common 6.17
ii zlib1g 1:1.2.11.dfsg-4
Versions of packages texlive-binaries recommends:
ii dvisvgm 2.13.3-1
ii texlive-base 2021.20220204-1
texlive-binaries suggests no packages.
-- no debconf information
diff --git a/texk/web2c/luatexdir/lang/texlang.c b/texk/web2c/luatexdir/lang/texlang.c
index ba7614ff..ccc0ec90 100644
--- a/texk/web2c/luatexdir/lang/texlang.c
+++ b/texk/web2c/luatexdir/lang/texlang.c
@@ -498,10 +498,20 @@ static char *hyphenation_exception(int exceptions, char *w)
return ret;
}
+char *unsorted_buffer = NULL;
+size_t *indexes = NULL;
+
+static int sort_func(const void *a, const void *b) {
+ size_t ia = *(size_t*)a;
+ size_t ib = *(size_t*)b;
+ return strcmp(&unsorted_buffer[ia], &unsorted_buffer[ib]);
+}
+
char *exception_strings(struct tex_language *lang)
{
const char *value;
size_t size = 0, current = 0;
+ size_t num_bytes = 0;
size_t l = 0;
char *ret = NULL;
if (lang->exceptions == 0)
@@ -509,19 +519,42 @@ char *exception_strings(struct tex_language *lang)
lua_checkstack(Luas, 2);
lua_rawgeti(Luas, LUA_REGISTRYINDEX, lang->exceptions);
if (lua_istable(Luas, -1)) {
- /*tex Iterate and join. */
+ /*tex Determine required memory. */
lua_pushnil(Luas);
while (lua_next(Luas, -2) != 0) {
value = lua_tolstring(Luas, -1, &l);
- if (current + 2 + l > size) {
- ret = xrealloc(ret, (unsigned) ((size + size / 5) + current + l + 1024));
- size = (size + size / 5) + current + l + 1024;
- }
- *(ret + current) = ' ';
- strcpy(ret + current + 1, value);
+ num_bytes += l + 1;
+ size++;
+ lua_pop(Luas, 1);
+ }
+ unsorted_buffer = xmalloc(num_bytes);
+ indexes = xmalloc(sizeof(size_t)*size);
+
+ /*tex Fetch values. */
+ current = 0;
+ size = 0;
+ lua_pushnil(Luas);
+ while (lua_next(Luas, -2) != 0) {
+ value = lua_tolstring(Luas, -1, &l);
+ strcpy(unsorted_buffer + current, value);
+ indexes[size++] = current;
current += l + 1;
lua_pop(Luas, 1);
}
+ /*tex Sort and join. */
+ qsort(indexes, size, sizeof(size_t), sort_func);
+ ret = xmalloc(num_bytes);
+ current = 0;
+ for (l = 0; l < size; l++) {
+ strcpy(ret + current, &unsorted_buffer[indexes[l]]);
+ current += strlen(&unsorted_buffer[indexes[l]]);
+ ret[current] = ' ';
+ current += 1;
+ }
+ ret[current - 1] = '\0';
+
+ free(unsorted_buffer);
+ free(indexes);
}
return ret;
}
Reply to: