[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Re: [PATCH] Use FIBMAP to sort .list files before scanning



Hi Morten!

On Sat, 2009-09-05 at 01:34:50 +0200, Morten Hustveit wrote:
> When running dpkg from a cold cache on a system where /var/lib/dpkg/info/ lies
> on a harddisk, a lot of time is spent waiting for seeks between (typically)
> thousands of files.  This patch ensures that the package iterator used by
> ensure_allinstfiles_available returns the packages sorted by their .list file's
> physical location on the harddisk, greatly reducing drive head movements.
> 
> The performance improvement is around 330% on my system: reinstalling a simple
> package takes 8 seconds instead of 27 seconds.  I dropped the caches before
> each run, and did 10 runs with consistent results.

Ah, nice speed up! Although mid term I'd prefer a portable solution
that benefits all architectures in a similar way, and not just Linux.

On kFreeBSD there's FIOGETLBA, but it seems to only be implemented for
the cd9660 file system, so not of much use. Also AFAIU from taking a
look at Linux, FIBMAP is being phased out by FIEMAP?

For now if the changes would be more localized, basically the following
comments addressed, I'd be ok applying it until we get something better.

I've done some reorganizations in master to make some of those remarks
clearer and some changes easier (like moving LISTFILE to src/, and
moving pkg-array to libdpkg).

> diff --git a/configure.ac b/configure.ac
> index bb5456c..eadf9a9 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -73,7 +73,7 @@ fi
>  # Checks for header files.
>  AC_HEADER_STDC
>  AC_CHECK_HEADERS([stddef.h error.h locale.h libintl.h kvm.h \
> -                  sys/cdefs.h sys/syscall.h])
> +                  sys/cdefs.h sys/syscall.h linux/fs.h])
>  DPKG_CHECK_DEFINE(TIOCNOTTY, [sys/ioctl.h])
>  

Check for FIBMAP in linux/fs.h using DPKG_CHECK_DEFINE, the HAVE_
macro is going to be more descriptive.

>  # Checks for typedefs, structures, and compiler characteristics.
> diff --git a/lib/dpkg/dpkg-db.h b/lib/dpkg/dpkg-db.h
> index a9debc7..e761d3e 100644
> --- a/lib/dpkg/dpkg-db.h
> +++ b/lib/dpkg/dpkg-db.h
> @@ -185,6 +185,8 @@ struct pkginfo { /* pig */
>    /* ->pend == this, non-NULL for us when Triggers-Pending. */
>    struct trigaw *othertrigaw_head;
>    struct trigpend *trigpend_head;
> +  struct pkginfo *iter_next;
> +  int phys_offset;
>  };
>  
>  /*** from lock.c ***/

This is an abstraction violation. The list files are part of the files
db, and you are adding this to the metadata db. These changes should
be localized in the src/main.h and src/filesdb.[ch] files. The structure
to hold the offset data should be clientdata (struct perpackagestate),
and probably with a less generic name.

> @@ -327,7 +329,11 @@ int informative(struct pkginfo *pkg, struct pkginfoperfile *info);
>  int countpackages(void);
>  void resetpackages(void);
>  
> -struct pkgiterator *iterpkgstart(void);
> +enum itertype {
> +  iter_any = 1,
> +  iter_list = 2
> +};
> +struct pkgiterator *iterpkgstart(int type);

I don't think it makes sense to add this argument to the function when
the only user is the one handling the .list files.

>  struct pkginfo *iterpkgnext(struct pkgiterator*);
>  void iterpkgend(struct pkgiterator*);
>  

> diff --git a/lib/dpkg/database.c b/lib/dpkg/database.c
> index cc9dd85..322b549 100644
> --- a/lib/dpkg/database.c
> +++ b/lib/dpkg/database.c
> @@ -20,6 +20,12 @@
>   */
>  #include <config.h>
>  #include <compat.h>
> +#if HAVE_LINUX_FS_H
> +#  include <unistd.h>
> +#  include <fcntl.h>
> +#  include <sys/ioctl.h>
> +#  include <linux/fs.h>
> +#endif
>  
>  #include <dpkg/i18n.h>
>  
> @@ -40,6 +46,10 @@
>  static struct pkginfo *bins[BINS];
>  static int npackages;
>  
> +static struct pkginfo *first_package;
> +static struct pkginfo *last_package;
> +static int sort_order = -1;
> +
>  #define FNV_offset_basis 2166136261ul
>  #define FNV_mixing_prime 16777619ul
>  
> @@ -81,6 +91,8 @@ void blankpackage(struct pkginfo *pigp) {
>    pigp->trigpend_head = NULL;
>    blankpackageperfile(&pigp->installed);
>    blankpackageperfile(&pigp->available);
> +  pigp->iter_next= NULL;
> +  pigp->phys_offset= 0;
>  }
>  
>  void blankpackageperfile(struct pkginfoperfile *pifp) {
> @@ -141,6 +153,13 @@ struct pkginfo *findpackage(const char *inname) {
>    newpkg->next= NULL;
>    *pointerp= newpkg;
>    npackages++;
> +  if(!first_package) {
> +    first_package= newpkg;
> +    last_package= newpkg;
> +  } else {
> +    last_package->iter_next= newpkg;
> +    last_package= newpkg;
> +  }
>  
>    free(name);
>    return newpkg;
> @@ -152,24 +171,74 @@ int countpackages(void) {
>  
>  struct pkgiterator {
>    struct pkginfo *pigp;
> -  int nbinn;
>  };
>  
> -struct pkgiterator *iterpkgstart(void) {
> +#if HAVE_LINUX_FS_H
> +static int qsort_compare_phys_offset(const void* vlhs, const void* vrhs) {
> +  struct pkginfo *const *lhs = vlhs;
> +  struct pkginfo *const *rhs = vrhs;
> +  return (*lhs)->phys_offset - (*rhs)->phys_offset;
> +}
> +
> +static void pkgsort(int type) {
> +  struct pkginfo **pigps;
> +  struct pkginfo *pigp;
> +  int i;
> +  if(type != iter_list)
> +    return;
> +  pigps= malloc(sizeof(*pigps) * npackages);
> +  pigp= first_package;
> +  for (i=0; i<npackages; i++) {
> +    static int have_fibmap = 1;
> +    if(type == iter_list && have_fibmap
> +       && pigp->status != stat_notinstalled) {
> +      const char *listfile;
> +      int fd, lba = 0;
> +
> +      listfile= pkgadminfile(pigp, LISTFILE);
> +      if(-1 != (fd= open(listfile, O_RDONLY))) {
> +        if(-1 != ioctl(fd, FIBMAP, &lba)) {
> +          pigp->phys_offset= lba;
> +        }
> +        else
> +          have_fibmap= 0;
> +        close(fd);
> +      }
> +    }
> +    pigps[i]= pigp;
> +    pigp= pigp->iter_next;
> +  }
> +  qsort(pigps, npackages, sizeof(*pigps), qsort_compare_phys_offset);
> +  first_package= pigp= pigps[0];
> +  for (i=1; i<npackages; i++) {
> +    pigp->iter_next= pigps[i];
> +    pigp= pigp->iter_next;
> +  }
> +  last_package= pigp;
> +  pigp->iter_next= 0;
> +  sort_order= type;
> +  free(pigps);
> +}
> +#endif

I'd rather see this handled entirely in ensure_allinstfiles_available.
In case it got called several times, we'd have to do the sort each
time, as the .list files might have changed. Or an alternative would
be to update the offset after each .list file write.

So just use the pkg_array support, sort it with pkg_array_sort and
then traverse it, which is more or less what you are doing here
manually.

> +struct pkgiterator *iterpkgstart(int type) {
>    struct pkgiterator *i;
> +#if HAVE_LINUX_FS_H
> +  if(type != iter_any && type != sort_order)

You are already checking that the type is not iter_any pkgsort. Also
it wouldn't matter if on iter_any it would get the iterator in
iter_list order.

> +    pkgsort(type);
> +#endif
>    i= m_malloc(sizeof(struct pkgiterator));
> -  i->pigp= NULL;
> -  i->nbinn= 0;
> +  i->pigp= first_package;
>    return i;
>  }
>  
>  struct pkginfo *iterpkgnext(struct pkgiterator *i) {
>    struct pkginfo *r;
> -  while (!i->pigp) {
> -    if (i->nbinn >= BINS) return NULL;
> -    i->pigp= bins[i->nbinn++];
> -  }
> -  r= i->pigp; i->pigp= r->next; return r;
> +  if(!i->pigp)
> +    return NULL;
> +  r= i->pigp;
> +  i->pigp= i->pigp->iter_next;
> +  return r;
>  }
>  
>  void iterpkgend(struct pkgiterator *i) {

thanks,
guillem


Reply to: