[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

[PATCH] Use FIEMAP to sort .list files before scanning



This is a revision of an earlier patch, based on comments from Guillem Jover:

  http://lists.debian.org/debian-dpkg/2009/10/msg00003.html

When running dpkg from a cold cache on a system where /var/lib/dpkg/info/ lies
on a harddisk, a lot of time is spent waiting for seeks between (typically)
thousands of files.  This patch changes the behavior of
ensure_allinstfiles_available, so that it accesses the packages in the order of
their .list files' physical locations on the harddisk, greatly reducing
drive head movements.

The performance improvement is around 330% on my system: reinstalling a simple
package takes 8 seconds instead of 27 seconds.  I dropped the caches before
each run, and did 10 runs with consistent results.  The performance is
identical to the previous patch using FIBMAP.
---
 configure.ac  |    2 +-
 src/filesdb.c |   77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 src/main.h    |    1 +
 3 files changed, 75 insertions(+), 5 deletions(-)

diff --git a/configure.ac b/configure.ac
index 2272eb0..54cdb6f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -79,7 +79,7 @@ fi
 # Checks for header files.
 AC_HEADER_STDC
 AC_CHECK_HEADERS([stddef.h error.h locale.h libintl.h kvm.h \
-                  sys/cdefs.h sys/syscall.h])
+                  sys/cdefs.h sys/syscall.h linux/fiemap.h])
 DPKG_CHECK_DEFINE(TIOCNOTTY, [sys/ioctl.h])
 
 # Checks for typedefs, structures, and compiler characteristics.
diff --git a/src/filesdb.c b/src/filesdb.c
index 0cc0791..a2eda65 100644
--- a/src/filesdb.c
+++ b/src/filesdb.c
@@ -26,6 +26,12 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 
+#if HAVE_LINUX_FIEMAP_H
+#include <linux/fiemap.h>
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#endif
+
 #include <assert.h>
 #include <errno.h>
 #include <string.h>
@@ -40,6 +46,7 @@
 #include <dpkg/dpkg-db.h>
 #include <dpkg/path.h>
 #include <dpkg/buffer.h>
+#include <dpkg/pkg-array.h>
 #include <dpkg/progress.h>
 
 #include "filesdb.h"
@@ -60,6 +67,7 @@ ensure_package_clientdata(struct pkginfo *pkg)
   pkg->clientdata->fileslistvalid = 0;
   pkg->clientdata->files = NULL;
   pkg->clientdata->trigprocdeferred = NULL;
+  pkg->clientdata->listfile_phys_offset = 0;
 }
 
 void note_must_reread_files_inpackage(struct pkginfo *pkg) {
@@ -254,10 +262,29 @@ ensure_packagefiles_available(struct pkginfo *pkg)
   pkg->clientdata->fileslistvalid= 1;
 }
 
+#if HAVE_LINUX_FIEMAP_H
+int
+pkg_sorter_by_listfile_phys_offset(const void *a, const void *b) {
+  struct pkginfo *const *lhs = a;
+  struct pkginfo *const *rhs = b;
+
+  /* We can't simply subtract, because the difference may be greater than INT_MAX.
+   */
+  if((*lhs)->clientdata->listfile_phys_offset < (*rhs)->clientdata->listfile_phys_offset)
+    return -1;
+  else
+    return 1;
+}
+#endif
+
 void ensure_allinstfiles_available(void) {
-  struct pkgiterator *it;
   struct pkginfo *pkg;
   struct progress progress;
+  struct pkg_array array;
+  int i;
+#if HAVE_LINUX_FIEMAP_H
+  int blocksize;
+#endif
 
   if (allpackagesdone) return;
   if (saidread<2) {
@@ -267,14 +294,56 @@ void ensure_allinstfiles_available(void) {
     progress_init(&progress, _("(Reading database ... "), max);
   }
 
-  it= iterpkgstart();
-  while ((pkg = iterpkgnext(it)) != NULL) {
+  pkg_array_init_from_db(&array);
+
+  /* Sort packages by the physical location of their list files, so that
+   * scanning them later will minimize disk drive head movements.
+   */
+#if HAVE_LINUX_FIEMAP_H
+  for (i = 0; i < array.n_pkgs; i++) {
+    pkg = array.pkgs[i];
+    if(!pkg->clientdata->listfile_phys_offset
+       && pkg->status != stat_notinstalled) {
+      struct {
+        struct fiemap fiemap;
+        struct fiemap_extent extent;
+      } fm;
+      const char *listfile;
+      int fd;
+
+      pkg->clientdata->listfile_phys_offset = -1;
+
+      listfile= pkgadminfile(pkg, LISTFILE);
+
+      if(-1 == (fd= open(listfile, O_RDONLY)))
+        continue;
+
+      if(!blocksize && -1 == ioctl(fd, FIGETBSZ, &blocksize))
+        break;
+
+      memset(&fm, 0, sizeof(fm));
+      fm.fiemap.fm_start = 0;
+      fm.fiemap.fm_length = blocksize;
+      fm.fiemap.fm_flags = 0;
+      fm.fiemap.fm_extent_count = 1;
+
+      if(-1 != ioctl(fd, FS_IOC_FIEMAP, (unsigned long) &fm))
+        pkg->clientdata->listfile_phys_offset = fm.fiemap.fm_extents[0].fe_physical;
+
+      close(fd);
+    }
+  }
+  pkg_array_sort(&array, pkg_sorter_by_listfile_phys_offset);
+#endif
+
+  for (i = 0; i < array.n_pkgs; i++) {
+    pkg = array.pkgs[i];
     ensure_packagefiles_available(pkg);
 
     if (saidread == 1)
       progress_step(&progress);
   }
-  iterpkgend(it);
+
   allpackagesdone= 1;
 
   if (saidread==1) {
diff --git a/src/main.h b/src/main.h
index cbcc2df..eefabff 100644
--- a/src/main.h
+++ b/src/main.h
@@ -46,6 +46,7 @@ struct perpackagestate {
 
   /* Non-NULL iff in trigproc.c:deferred. */
   struct pkg_list *trigprocdeferred;
+  off_t listfile_phys_offset;
 };
 
 enum action {
-- 
1.6.4.3


Reply to: