Bug#1009196: texlive-binaries: Reproducible content of .fmt files
Hi Luigi, hi all luatex devs,
here at Debian we got a bug report about reproducability of luatex
format dumps. It contains a patch to make the hyphenation exception list
sorted. (I attach the patch)
Could you please take a look whether this is still relevant for the
latest release of luatex.
Thanks
Norbert
On Fri, 08 Apr 2022, Roland Clobus wrote:
> Hello maintainers of texlive-binaries,
>
> While working on the “reproducible builds” effort [1], I have noticed that the
> live image for Cinnamon in bookworm is no longer reproducible [2].
>
> The attached patch ensures that the output of the function 'exception_strings'
> always uses the same order of the hyphenation exceptions.
> I've written the solution in C, perhaps someone more versed in lua could
> rewrite it more elegantly.
> (The lua manual says for the 'next' function: 'The order in which the indices
> are enumerated is not specified' [3])
>
> With the attached patch applied, I'm able (with the help of FORCE_SOURCE_DATE=1
> and SOURCE_DATE_EPOCH) to reproducibly rebuild the .fmt files, as created by
> 'fmtutil --sys --all'.
>
> Small test case to reproduce:
> export FORCE_SOURCE_DATE=1
> export SOURCE_DATE_EPOCH=$(date +%s)
> for i in `seq 1 10`; do luahbtex -ini -jobname=luahbtex -progname=luabhtex
> luatex.ini > /dev/null; md5sum luahbtex.*; done
>
> With kind regards,
> Roland Clobus
>
> [1]: https://wiki.debian.org/ReproducibleBuilds
> [2]:
> https://jenkins.debian.net/view/live/job/reproducible_debian_live_build_cinnamon_bookworm/
> [3]: http://www.lua.org/manual/5.4/manual.html#pdf-next
>
--
PREINING Norbert https://www.preining.info
Mercari Inc. + IFMGA Guide + TU Wien + TeX Live
GPG: 0x860CDC13 fp: F7D8 A928 26E3 16A1 9FA0 ACF0 6CAC A448 860C DC13
diff --git a/texk/web2c/luatexdir/lang/texlang.c b/texk/web2c/luatexdir/lang/texlang.c
index ba7614ff..ccc0ec90 100644
--- a/texk/web2c/luatexdir/lang/texlang.c
+++ b/texk/web2c/luatexdir/lang/texlang.c
@@ -498,10 +498,20 @@ static char *hyphenation_exception(int exceptions, char *w)
return ret;
}
+char *unsorted_buffer = NULL;
+size_t *indexes = NULL;
+
+static int sort_func(const void *a, const void *b) {
+ size_t ia = *(size_t*)a;
+ size_t ib = *(size_t*)b;
+ return strcmp(&unsorted_buffer[ia], &unsorted_buffer[ib]);
+}
+
char *exception_strings(struct tex_language *lang)
{
const char *value;
size_t size = 0, current = 0;
+ size_t num_bytes = 0;
size_t l = 0;
char *ret = NULL;
if (lang->exceptions == 0)
@@ -509,19 +519,42 @@ char *exception_strings(struct tex_language *lang)
lua_checkstack(Luas, 2);
lua_rawgeti(Luas, LUA_REGISTRYINDEX, lang->exceptions);
if (lua_istable(Luas, -1)) {
- /*tex Iterate and join. */
+ /*tex Determine required memory. */
lua_pushnil(Luas);
while (lua_next(Luas, -2) != 0) {
value = lua_tolstring(Luas, -1, &l);
- if (current + 2 + l > size) {
- ret = xrealloc(ret, (unsigned) ((size + size / 5) + current + l + 1024));
- size = (size + size / 5) + current + l + 1024;
- }
- *(ret + current) = ' ';
- strcpy(ret + current + 1, value);
+ num_bytes += l + 1;
+ size++;
+ lua_pop(Luas, 1);
+ }
+ unsorted_buffer = xmalloc(num_bytes);
+ indexes = xmalloc(sizeof(size_t)*size);
+
+ /*tex Fetch values. */
+ current = 0;
+ size = 0;
+ lua_pushnil(Luas);
+ while (lua_next(Luas, -2) != 0) {
+ value = lua_tolstring(Luas, -1, &l);
+ strcpy(unsorted_buffer + current, value);
+ indexes[size++] = current;
current += l + 1;
lua_pop(Luas, 1);
}
+ /*tex Sort and join. */
+ qsort(indexes, size, sizeof(size_t), sort_func);
+ ret = xmalloc(num_bytes);
+ current = 0;
+ for (l = 0; l < size; l++) {
+ strcpy(ret + current, &unsorted_buffer[indexes[l]]);
+ current += strlen(&unsorted_buffer[indexes[l]]);
+ ret[current] = ' ';
+ current += 1;
+ }
+ ret[current - 1] = '\0';
+
+ free(unsorted_buffer);
+ free(indexes);
}
return ret;
}
Reply to: