[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#801161: anna: handle multiple udeb versions in Packages file



On Wed, Oct 07, 2015 at 02:51:33AM +0200, Cyril Brulebois wrote:
> Steven Chamberlain <steven@pyro.eu.org> (2015-10-07):
> > If we had multiple udeb sources (Bug#345419), it could mean that
> > anna reads a Packages file having multiple versions for some udebs.
> > It's very easy to make net-retriever generate such a file, combining
> > multiple suites, e.g. stable + stable-proposed-updates;  or maybe
> > stable + stable-backports.
> 
> TBH, I'm not sure whether this should be dealt with in anna or in
> net-retriever. The latter is responsible for the addition in the first
> place, so could be considered as the one responsible for avoiding
> duplicate entries. Another way to look at it would be: net-retriever
> knows this limitation in anna, and is fine with trusting anna to do the
> "only pick the last occurrence" selection, provided n-r cats everything
> in the right order.
> 
> ISTR some shell dance in net-retriever from my old patchset; having some
> C code in anna instead doesn't seem crazier.
> 
> 
> Taking a step back, one could argue that the logic could be slightly
> different, like picking the highest version. Even if it's a corner case,
> we could imagine having bits merged in a point release that's higher
> than what's in backports. More interestingly, that would mean anna
> doesn't care about the order in which the files were cat'd together.

If it's helpful, here's most of the patch I wrote for Ubuntu's
net-retriever a while back that implements basically what you suggest
here.  The remaining piece is that you'd need to actually call
deduplicate at an appropriate point.

It might be more robust to have this in anna though, so that it would
work for other retrievers that might somehow end up in a similar
situation.

Beware of doing it in shell; an earlier version of my patch did that and
it was prohibitively slow (~10 minutes) for large Packages files, which
we ended up with as a result of lots of kernel ABIs.

  * Deduplicate Packages files before passing them to anna.

diff -Nru net-retriever-1.39/Makefile net-retriever-1.39ubuntu1/Makefile
--- net-retriever-1.39/Makefile	1970-01-01 01:00:00.000000000 +0100
+++ net-retriever-1.39ubuntu1/Makefile	2013-07-16 12:24:57.000000000 +0100
@@ -0,0 +1,7 @@
+CFLAGS := -Os -fomit-frame-pointer -g -Wall
+LDLIBS := -ldebian-installer
+
+all: deduplicate
+
+clean:
+	rm -f deduplicate
diff -Nru net-retriever-1.39/debian/control net-retriever-1.39ubuntu1/debian/control
--- net-retriever-1.39/debian/control	2014-03-03 10:26:08.000000000 +0000
+++ net-retriever-1.39ubuntu1/debian/control	2014-11-17 16:30:47.000000000 +0000
@@ -3,15 +3,15 @@
 Priority: optional
 Maintainer: Debian Install System Team <debian-boot@lists.debian.org>
 Uploaders: Christian Perrier <bubulle@debian.org>, Cyril Brulebois <kibi@debian.org>
-Build-Depends: debhelper (>= 9)
+Build-Depends: debhelper (>= 9), libdebian-installer4-dev
 Build-Depends-Indep: dpkg-dev (>= 1.7.0)
 Vcs-Browser: http://anonscm.debian.org/gitweb/?p=d-i/net-retriever.git
 Vcs-Git: git://anonscm.debian.org/d-i/net-retriever.git
 
 Package: net-retriever
 Package-Type: udeb
-Architecture: all
-Depends: ${misc:Depends}, choose-mirror, configured-network, di-utils (>= 1.58), gpgv-udeb, debian-archive-keyring-udeb
+Architecture: any
+Depends: ${shlibs:Depends}, ${misc:Depends}, choose-mirror, configured-network, di-utils (>= 1.58), gpgv-udeb, ubuntu-keyring-udeb
 Provides: retriever
 Description: Fetch modules from the Internet
  This is a retriever that uses wget to fetch files over http or ftp.
diff -Nru net-retriever-1.39/debian/net-retriever.install net-retriever-1.39ubuntu1/debian/net-retriever.install
--- net-retriever-1.39/debian/net-retriever.install	2012-03-12 09:17:51.000000000 +0000
+++ net-retriever-1.39ubuntu1/debian/net-retriever.install	2013-07-16 12:42:20.000000000 +0100
@@ -1 +1,2 @@
 net-retriever usr/lib/debian-installer/retriever
+deduplicate usr/lib/net-retriever
diff -Nru net-retriever-1.39/debian/rules net-retriever-1.39ubuntu1/debian/rules
--- net-retriever-1.39/debian/rules	2012-03-12 09:17:51.000000000 +0000
+++ net-retriever-1.39ubuntu1/debian/rules	2012-12-06 18:02:14.000000000 +0000
@@ -1,3 +1,12 @@
 #! /usr/bin/make -f
+
+DEB_BUILD_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE)
+DEB_HOST_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE)
+
 %:
 	dh $@
+
+ifneq ($(DEB_BUILD_GNU_TYPE),$(DEB_HOST_GNU_TYPE))
+override_dh_auto_build:
+	dh_auto_build -- CC=$(DEB_HOST_GNU_TYPE)-gcc
+endif
diff -Nru net-retriever-1.39/deduplicate.c net-retriever-1.39ubuntu1/deduplicate.c
--- net-retriever-1.39/deduplicate.c	1970-01-01 01:00:00.000000000 +0100
+++ net-retriever-1.39ubuntu1/deduplicate.c	2013-07-16 12:41:13.000000000 +0100
@@ -0,0 +1,216 @@
+/* Remove duplicates in Packages file, pending libd-i doing it for us. */
+
+#define _GNU_SOURCE
+
+#include <sys/types.h>
+#include <ctype.h>
+#include <regex.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <debian-installer.h>
+
+di_hash_table *versions, *entries;
+regex_t package_re, version_re;
+
+static void package_version_free (void *key)
+{
+	di_package_version *ver = key;
+
+	di_free (ver->upstream);
+	di_free (ver->debian_revision);
+	di_free (ver);
+}
+
+static void xregcomp (regex_t *preg, const char *regex, int cflags)
+{
+	int err;
+
+	err = regcomp (preg, regex, cflags);
+	if (err) {
+		char *errbuf;
+		size_t errbuf_size;
+
+		errbuf_size = regerror (err, preg, NULL, 0);
+		errbuf = di_malloc (errbuf_size);
+		regerror (err, preg, errbuf, errbuf_size);
+		fprintf (stderr, "Failed to compile /%s/: %s", regex, errbuf);
+		di_free (errbuf);
+		exit (1);
+	}
+}
+
+static bool match_regex_capture_one (regex_t *preg, di_rstring *entry,
+				     di_rstring *out)
+{
+	regmatch_t matches[2];
+
+	if (regexec (preg, entry->string, 2, matches, 0) != 0 ||
+	    matches[1].rm_so == -1 || matches[1].rm_eo == -1)
+		return false;
+	out->size = matches[1].rm_eo - matches[1].rm_so;
+	out->string = di_stradup (entry->string + matches[1].rm_so, out->size);
+	return true;
+}
+
+static di_rstring *rstring_copy (di_rstring *orig)
+{
+	di_rstring *copy;
+
+	copy = di_new (di_rstring, 1);
+	copy->string = strdup (orig->string);
+	copy->size = orig->size;
+	return copy;
+}
+
+static void deduplicate_one (di_rstring *entry)
+{
+	di_rstring package, newver_str;
+	di_package dummynewver;
+	di_package_version *newver, *oldver;
+
+	package.string = NULL;
+	newver_str.string = NULL;
+
+	if (!match_regex_capture_one (&package_re, entry, &package))
+		goto out;
+	if (!*package.string ||
+	    memchr (package.string, '/', package.size))
+		goto out;
+
+	if (!match_regex_capture_one (&version_re, entry, &newver_str))
+		goto out;
+
+	/* libdebian-installer has a crazy interface that won't let me parse
+	 * raw strings, so I need this dance.
+	 */
+	dummynewver.version = newver_str.string;
+	newver = di_package_version_parse (&dummynewver);
+	if (!newver) {
+		fprintf (stderr, "failed to parse version %s\n",
+			 newver_str.string);
+		exit (2);
+	}
+	oldver = di_hash_table_lookup (versions, &package);
+	if (oldver && di_package_version_compare (oldver, newver) >= 0) {
+		package_version_free (newver);
+		goto out;
+	}
+	di_hash_table_insert (versions, rstring_copy (&package), newver);
+	di_hash_table_insert (entries, rstring_copy (&package),
+			      rstring_copy (entry));
+
+out:
+	di_free (package.string);
+	di_free (newver_str.string);
+}
+
+struct entry {
+	di_rstring key, value;
+};
+
+struct all_entries {
+	struct entry *entries;
+	di_ksize_t size, pos;
+};
+
+static void entry_append (void *key, void *value, void *user_data)
+{
+	struct all_entries *all_entries = user_data;
+	all_entries->entries[all_entries->pos].key = *(di_rstring *) key;
+	all_entries->entries[all_entries->pos].value = *(di_rstring *) value;
+	++all_entries->pos;
+}
+
+static int entry_compare (const void *va, const void *vb)
+{
+	const struct entry *a = va, *b = vb;
+	return strcmp (a->key.string, b->key.string);
+}
+
+static void output (void)
+{
+	struct all_entries all_entries;
+	di_ksize_t i;
+
+	all_entries.size = di_hash_table_size (entries);
+	all_entries.entries = di_new (struct entry, all_entries.size);
+	all_entries.pos = 0;
+	di_hash_table_foreach (entries, entry_append, &all_entries);
+	qsort (all_entries.entries, all_entries.size, sizeof (struct entry),
+	       entry_compare);
+
+	for (i = 0; i < all_entries.size; ++i) {
+		di_rstring *value = &all_entries.entries[i].value;
+
+		fputs (value->string, stdout);
+		if (!value->size || value->string[value->size - 1] != '\n')
+			fputc ('\n', stdout);
+		fputc ('\n', stdout);
+	}
+
+	di_free (all_entries.entries);
+}
+
+int main (int argc, char **argv)
+{
+	int ret;
+	di_rstring line;
+	ssize_t line_size;
+	size_t line_alloc;
+	di_rstring entry;
+	size_t entry_alloc;
+
+	versions = di_hash_table_new_full (di_rstring_hash, di_rstring_equal,
+					   free, package_version_free);
+	entries = di_hash_table_new_full (di_rstring_hash, di_rstring_equal,
+					  free, free);
+	xregcomp (&package_re, "^Package:[[:space:]]+(.*)",
+		  REG_EXTENDED | REG_ICASE | REG_NEWLINE);
+	xregcomp (&version_re, "^Version:[[:space:]]+(.*)",
+		  REG_EXTENDED | REG_ICASE | REG_NEWLINE);
+	line_alloc = 0;
+	line.string = NULL;
+	entry_alloc = 4096;
+	entry.string = di_malloc (entry_alloc);
+	entry.size = 0;
+
+	while ((line_size = getline (&line.string, &line_alloc, stdin)) >= 0) {
+		line.size = (di_ksize_t) line_size;
+		if (*line.string && *line.string != '\n') {
+			di_ksize_t new_size;
+
+			new_size = entry.size + line.size + 1;
+			if (new_size > entry_alloc) {
+				while (new_size > entry_alloc)
+					entry_alloc *= 2;
+				entry.string = di_realloc (entry.string,
+							   entry_alloc);
+			}
+			memcpy (entry.string + entry.size, line.string,
+				line.size + 1);
+			entry.size += line.size;
+		} else {
+			deduplicate_one (&entry);
+			*entry.string = '\0';
+			entry.size = 0;
+		}
+	}
+	if (ferror (stdin)) {
+		perror ("getline");
+		ret = 1;
+		goto out;
+	}
+	if (entry.size)
+		deduplicate_one (&entry);
+	output ();
+	ret = 0;
+
+out:
+	di_hash_table_destroy (versions);
+	di_hash_table_destroy (entries);
+	regfree (&package_re);
+	regfree (&version_re);
+	return ret;
+}
diff -Nru net-retriever-1.39/net-retriever net-retriever-1.39ubuntu1/net-retriever
--- net-retriever-1.39/net-retriever	2014-03-03 10:26:08.000000000 +0000
+++ net-retriever-1.39ubuntu1/net-retriever	2014-11-17 16:30:25.000000000 +0000
@@ -86,6 +86,12 @@
 	exit 1
 }
 
+# Nasty hack to remove duplicates in Packages file.
+deduplicate () {
+	/usr/lib/net-retriever/deduplicate <"$1" >"$1.new"
+	mv "$1.new" "$1"
+}
+
 cmd="$1"
 shift
 

-- 
Colin Watson                                       [cjwatson@debian.org]


Reply to: