[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#944968: popularity-contest: Program accesses internal dpkg database



Hi!

On Mon, 2019-11-18 at 06:51:00 +0000, Niels Thykier wrote:
> On Sun, 17 Nov 2019 22:59:58 +0100 Bill Allombert wrote:
> > On Sun, Nov 17, 2019 at 10:44:02PM +0100, Guillem Jover wrote:
> > > Source: popularity-contest
> > > Source-Version: 1.69
> > > Severity: important
> > > User: debian-dpkg@lists.debian.org
> > > Usertags: dpkg-db-access-blocker

> > > This package contains the «popularity-contest» program, which directly
> > > accesses the dpkg internal database, instead of using one of the public
> > > interfaces provided by dpkg.
> > > 
> > > The program should stop reading the files list files, and switched to
> > > use something like:
> > > 
> > >   «dpkg-query \
> > >     --showformat 'Package: ${Package}\nFiles:\n${db-fsys:Files}\n' \
> > >     --show»
> > > 
> > > to get them.

> > the last time this comes up the performance of using dpkg-query was poor. 
> > Was it improved ? What is the first release to support this syntax ?

Just to clarify, the command above, does not need packages specified,
it will dump contents for the entire database.

> I tested Guillem's command on my own system and got:
> 
>  ~11.5s (cold cache/first time)
>  ~0.3 (warm cache/2nd+3rd time)
> 
> For reference, "dpkg -l | wc -l" says I have roughly 1500 packages
> installed.
> 
> I do not know what the original numbers were, so I cannot put the
> following numbers into that context.

I got a similar concern from the dh-make-perl bug, and looked into
this. I've got the attached patch locally, and will keep looking
further for optimizations. The database change should probably make
things better, as I'm considering moving to a single file for all
fsys entries.

From the man page, the first dpkg-query to support that virtual field
is 1.19.3.

Thanks,
Guillem
diff --git i/lib/dpkg/pkg-format.c w/lib/dpkg/pkg-format.c
index 1985df068..d54ddf865 100644
--- i/lib/dpkg/pkg-format.c
+++ w/lib/dpkg/pkg-format.c
@@ -308,6 +308,10 @@ virt_fsys_last_modified(struct varbuf *vb,
 	varbuf_printf(vb, "%ld", st.st_mtime);
 }
 
+/*
+ * This function requires the caller to have loaded the package fsys metadata,
+ * otherwise it will do nothing.
+ */
 static void
 virt_fsys_files(struct varbuf *vb,
                 const struct pkginfo *pkg, const struct pkgbin *pkgbin,
@@ -315,15 +319,6 @@ virt_fsys_files(struct varbuf *vb,
 {
 	struct fsys_namenode_list *node;
 
-	/* XXX: This cast is so wrong on so many levels, but the alternatives
-	 * are apparently worse. We might need to end up removing the const
-	 * from the arguments.
-	 *
-	 * Ideally loading the entire fsys db would be cheaper, and stored
-	 * in a single file, so we could do it unconditionally, before any
-	 * formatting. */
-	ensure_packagefiles_available((struct pkginfo *)pkg);
-
 	if (!pkg->files_list_valid)
 		return;
 
@@ -388,6 +383,36 @@ static const struct fieldinfo virtinfos[] = {
 	{ NULL },
 };
 
+bool
+pkg_format_needs_fsys_db(const struct pkg_format_node *head)
+{
+	const struct pkg_format_node *node;
+
+	for (node = head; node; node = node->next) {
+		if (node->type != PKG_FORMAT_FIELD)
+			continue;
+		if (strcmp(node->data, "db-fsys:Files") == 0)
+			return true;
+	}
+
+	return false;
+}
+
+static void
+pkg_format_strfmt(struct varbuf *vb,
+                  const struct pkg_format_node *node, const char *str)
+{
+	if (node->width == 0) {
+		varbuf_add_str(vb, str);
+	} else {
+		char fmt[16];
+
+		snprintf(fmt, sizeof(fmt), "%%%s%zus",
+		         ((node->pad) ? "-" : ""), node->width);
+		varbuf_printf(vb, fmt, str);
+	}
+}
+
 void
 pkg_format_show(const struct pkg_format_node *head,
                 struct pkginfo *pkg, struct pkgbin *pkgbin)
@@ -397,18 +422,11 @@ pkg_format_show(const struct pkg_format_node *head,
 
 	for (node = head; node; node = node->next) {
 		bool ok;
-		char fmt[16];
 
 		ok = false;
 
-		if (node->width > 0)
-			snprintf(fmt, 16, "%%%s%zus",
-			         ((node->pad) ? "-" : ""), node->width);
-		else
-			strcpy(fmt, "%s");
-
 		if (node->type == PKG_FORMAT_STRING) {
-			varbuf_printf(&fb, fmt, node->data);
+			pkg_format_strfmt(&fb, node, node->data);
 			ok = true;
 		} else if (node->type == PKG_FORMAT_FIELD) {
 			const struct fieldinfo *fip;
@@ -421,7 +439,7 @@ pkg_format_show(const struct pkg_format_node *head,
 				fip->wcall(&wb, pkg, pkgbin, 0, fip);
 
 				varbuf_end_str(&wb);
-				varbuf_printf(&fb, fmt, wb.buf);
+				pkg_format_strfmt(&fb, node, wb.buf);
 				varbuf_reset(&wb);
 				ok = true;
 			} else {
@@ -429,7 +447,7 @@ pkg_format_show(const struct pkg_format_node *head,
 
 				afp = find_arbfield_info(pkgbin->arbs, node->data);
 				if (afp) {
-					varbuf_printf(&fb, fmt, afp->value);
+					pkg_format_strfmt(&fb, node, afp->value);
 					ok = true;
 				}
 			}
diff --git i/lib/dpkg/pkg-format.h w/lib/dpkg/pkg-format.h
index 3e013003d..14e6a7605 100644
--- i/lib/dpkg/pkg-format.h
+++ w/lib/dpkg/pkg-format.h
@@ -35,6 +35,9 @@ DPKG_BEGIN_DECLS
 
 struct pkg_format_node;
 
+bool
+pkg_format_needs_fsys_db(const struct pkg_format_node *head);
+
 struct pkg_format_node *pkg_format_parse(const char *fmt,
                                          struct dpkg_error *err);
 void pkg_format_free(struct pkg_format_node *head);
diff --git i/src/querycmd.c w/src/querycmd.c
index 472d87f79..b2ffb0147 100644
--- i/src/querycmd.c
+++ w/src/querycmd.c
@@ -540,6 +540,12 @@ list_files(const char *const *argv)
   return failures;
 }
 
+static void
+pkg_array_load_fsys(struct pkg_array *array, struct pkginfo *pkg, void *pkg_data)
+{
+  ensure_packagefiles_available(pkg);
+}
+
 static void
 pkg_array_show_item(struct pkg_array *array, struct pkginfo *pkg, void *pkg_data)
 {
@@ -555,6 +561,7 @@ showpackages(const char *const *argv)
   struct pkg_array array;
   struct pkginfo *pkg;
   struct pkg_format_node *fmt;
+  bool fmt_needs_fsys_db;
   int i;
   int rc = 0;
 
@@ -566,6 +573,8 @@ showpackages(const char *const *argv)
     return rc;
   }
 
+  fmt_needs_fsys_db = pkg_format_needs_fsys_db(fmt);
+
   if (!opt_loadavail)
     modstatdb_open(msdbrw_readonly);
   else
@@ -575,6 +584,8 @@ showpackages(const char *const *argv)
   pkg_array_sort(&array, pkg_sorter_by_nonambig_name_arch);
 
   if (!*argv) {
+    if (fmt_needs_fsys_db)
+      ensure_allinstfiles_available();
     for (i = 0; i < array.n_pkgs; i++) {
       pkg = array.pkgs[i];
       if (pkg->status == PKG_STAT_NOTINSTALLED)
@@ -582,6 +593,8 @@ showpackages(const char *const *argv)
       pkg_format_show(fmt, pkg, &pkg->installed);
     }
   } else {
+    if (fmt_needs_fsys_db)
+      pkg_array_foreach(&array, pkg_array_load_fsys, NULL);
     rc = pkg_array_match_patterns(&array, pkg_array_show_item, fmt, argv);
   }
 

Reply to: