[PATCH] Use FIEMAP to sort .list files before scanning
This is a revision of an earlier patch, based on comments from Guillem Jover:
http://lists.debian.org/debian-dpkg/2009/10/msg00003.html
When running dpkg from a cold cache on a system where /var/lib/dpkg/info/ lies
on a harddisk, a lot of time is spent waiting for seeks between (typically)
thousands of files. This patch changes the behavior of
ensure_allinstfiles_available, so that it accesses the packages in the order of
their .list files' physical locations on the harddisk, greatly reducing
drive head movements.
The performance improvement is around 330% on my system: reinstalling a simple
package takes 8 seconds instead of 27 seconds. I dropped the caches before
each run, and did 10 runs with consistent results. The performance is
identical to the previous patch using FIBMAP.
---
configure.ac | 2 +-
src/filesdb.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---
src/main.h | 1 +
3 files changed, 75 insertions(+), 5 deletions(-)
diff --git a/configure.ac b/configure.ac
index 2272eb0..54cdb6f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -79,7 +79,7 @@ fi
# Checks for header files.
AC_HEADER_STDC
AC_CHECK_HEADERS([stddef.h error.h locale.h libintl.h kvm.h \
- sys/cdefs.h sys/syscall.h])
+ sys/cdefs.h sys/syscall.h linux/fiemap.h])
DPKG_CHECK_DEFINE(TIOCNOTTY, [sys/ioctl.h])
# Checks for typedefs, structures, and compiler characteristics.
diff --git a/src/filesdb.c b/src/filesdb.c
index 0cc0791..a2eda65 100644
--- a/src/filesdb.c
+++ b/src/filesdb.c
@@ -26,6 +26,12 @@
#include <sys/types.h>
#include <sys/stat.h>
+#if HAVE_LINUX_FIEMAP_H
+#include <linux/fiemap.h>
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#endif
+
#include <assert.h>
#include <errno.h>
#include <string.h>
@@ -40,6 +46,7 @@
#include <dpkg/dpkg-db.h>
#include <dpkg/path.h>
#include <dpkg/buffer.h>
+#include <dpkg/pkg-array.h>
#include <dpkg/progress.h>
#include "filesdb.h"
@@ -60,6 +67,7 @@ ensure_package_clientdata(struct pkginfo *pkg)
pkg->clientdata->fileslistvalid = 0;
pkg->clientdata->files = NULL;
pkg->clientdata->trigprocdeferred = NULL;
+ pkg->clientdata->listfile_phys_offset = 0;
}
void note_must_reread_files_inpackage(struct pkginfo *pkg) {
@@ -254,10 +262,29 @@ ensure_packagefiles_available(struct pkginfo *pkg)
pkg->clientdata->fileslistvalid= 1;
}
+#if HAVE_LINUX_FIEMAP_H
+int
+pkg_sorter_by_listfile_phys_offset(const void *a, const void *b) {
+ struct pkginfo *const *lhs = a;
+ struct pkginfo *const *rhs = b;
+
+ /* We can't simply subtract, because the difference may be greater than INT_MAX.
+ */
+ if((*lhs)->clientdata->listfile_phys_offset < (*rhs)->clientdata->listfile_phys_offset)
+ return -1;
+ else
+ return 1;
+}
+#endif
+
void ensure_allinstfiles_available(void) {
- struct pkgiterator *it;
struct pkginfo *pkg;
struct progress progress;
+ struct pkg_array array;
+ int i;
+#if HAVE_LINUX_FIEMAP_H
+ int blocksize;
+#endif
if (allpackagesdone) return;
if (saidread<2) {
@@ -267,14 +294,56 @@ void ensure_allinstfiles_available(void) {
progress_init(&progress, _("(Reading database ... "), max);
}
- it= iterpkgstart();
- while ((pkg = iterpkgnext(it)) != NULL) {
+ pkg_array_init_from_db(&array);
+
+ /* Sort packages by the physical location of their list files, so that
+ * scanning them later will minimize disk drive head movements.
+ */
+#if HAVE_LINUX_FIEMAP_H
+ for (i = 0; i < array.n_pkgs; i++) {
+ pkg = array.pkgs[i];
+ if(!pkg->clientdata->listfile_phys_offset
+ && pkg->status != stat_notinstalled) {
+ struct {
+ struct fiemap fiemap;
+ struct fiemap_extent extent;
+ } fm;
+ const char *listfile;
+ int fd;
+
+ pkg->clientdata->listfile_phys_offset = -1;
+
+ listfile= pkgadminfile(pkg, LISTFILE);
+
+ if(-1 == (fd= open(listfile, O_RDONLY)))
+ continue;
+
+ if(!blocksize && -1 == ioctl(fd, FIGETBSZ, &blocksize))
+ break;
+
+ memset(&fm, 0, sizeof(fm));
+ fm.fiemap.fm_start = 0;
+ fm.fiemap.fm_length = blocksize;
+ fm.fiemap.fm_flags = 0;
+ fm.fiemap.fm_extent_count = 1;
+
+ if(-1 != ioctl(fd, FS_IOC_FIEMAP, (unsigned long) &fm))
+ pkg->clientdata->listfile_phys_offset = fm.fiemap.fm_extents[0].fe_physical;
+
+ close(fd);
+ }
+ }
+ pkg_array_sort(&array, pkg_sorter_by_listfile_phys_offset);
+#endif
+
+ for (i = 0; i < array.n_pkgs; i++) {
+ pkg = array.pkgs[i];
ensure_packagefiles_available(pkg);
if (saidread == 1)
progress_step(&progress);
}
- iterpkgend(it);
+
allpackagesdone= 1;
if (saidread==1) {
diff --git a/src/main.h b/src/main.h
index cbcc2df..eefabff 100644
--- a/src/main.h
+++ b/src/main.h
@@ -46,6 +46,7 @@ struct perpackagestate {
/* Non-NULL iff in trigproc.c:deferred. */
struct pkg_list *trigprocdeferred;
+ off_t listfile_phys_offset;
};
enum action {
--
1.6.4.3
Reply to: