[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Re: md5sums files



(I'm not subscribed to this list, so go ahead and Cc me)

On Thu, Mar 4, 2010 at 02:05, Peter Samuelson <peter@p12n.org> wrote:
> [Wouter Verhelst]
> > I must say I was somewhat surprised by these numbers. Out of 2483
> > packages installed on my laptop, 2340 install md5sums.
> The surprising part, perhaps, is that dpkg itself didn't just generate
> the other 143 md5sums files at installation time.

The easy (and usually correct) reason for things like that is "dpkg's
source is scary".

> I suggested this a long time ago and of course was met with "so where's
> your patch?"  Of course I was not willing to do the work.

See? Anyway, my patch is attached. It makes dpkg create a "foo.hashes"
when unpacking foo, whose contents looks like:

MD5:32b5e22f8e336b2f34e0dd87652e6dfc  usr/share/doc/mawk/changelog.gz
MD5:87a34f1f55ac3f7fec2c7fc82565e8eb  usr/share/doc/mawk/changelog.Debian.gz
...

Verification is a matter of something like:

$ cat /var/lib/dpkg/info/*.hashes | sed -n 's/^MD5://p' | (cd /;
md5sum -c) | grep -v ': OK$'

There's an option (--hash) that you can set to "none" to avoid
spending time calculating md5s if you so choose. Adding support for
sha1/sha256/whatever should be straightforward; afaik dpkg only has
code for md5 already built in though (though just invoking
/usr/bin/sha1sum etc would be an option of course).

Of course another option is just to pull the md5sums directly from the deb:

$ ar p /var/cache/apt/archives/ifupdown_0.6.9_i386.deb data.tar.gz |
    tar --to-command='printf "%s%s\n" "$(md5sum - | sed s/-$//)"
"${TAR_FILENAME#./}"'  -xzf - |
    diff - /var/lib/dpkg/info/ifupdown.md5sums
1,3d0
< 346208729633adf45e2fa3f2bd3b19c6  etc/init.d/ifupdown
< c6fffaae03271f1641920105ce68796b  etc/init.d/ifupdown-clean
< fab851ca87c5deb9d6f665e610184648  etc/default/ifupdown
4a2
> a0f11cf1809a468c49b72e0aa0a8e26b  sbin/ifup

(md5sums doesn't normally list conffiles, but does list hardlinks; the
above command does the opposite)

> But
> fundamentally, shipping a md5sums file is really just a tradeoff in
> download size vs. installation speed, not unlike gzip vs. bzip2.

Advantages of doing in when unpacking:
 - choice of checksum is the admin's decision
 - we can quickly roll out support for sha1/sha256/crc/... checksums
by just changing one package
 - admin has hashes of exactly what was unpacked, no matter the source
 - no concerns about bugs in dh_md5sums or similar resulting in bad checksums

Advantages of doing it when uploading:
 - provides some sort of double check of what's being uploaded
 - saves CPU time on users' machines

For me, I'd rather have dpkg generate the hashes.

Cheers,
aj

--
Anthony Towns <aj@erisian.com.au>
diff -urb dpkg-1.15.5.6/debian/changelog dpkg-1.15.5.6-aj/debian/changelog
--- dpkg-1.15.5.6/debian/changelog	2010-01-09 04:02:03.000000000 +1000
+++ dpkg-1.15.5.6-aj/debian/changelog	2010-03-07 04:13:03.171356041 +1000
@@ -1,3 +1,11 @@
+dpkg (1.15.5.6+aj) unstable; urgency=low
+
+  * Non-maintainer upload.
+  * Automatically create /var/lib/dpkg/info/pkg.hashes containing MD5 hashes
+    for unpacked files.
+
+ -- Anthony Towns <aj@lazuline>  Sun, 07 Mar 2010 04:12:32 +1000
+
 dpkg (1.15.5.6) unstable; urgency=low
 
   * dpkg-source: with format "3.0 (quilt)" ensure quilt's .pc directory is
diff -urb dpkg-1.15.5.6/configure.ac dpkg-1.15.5.6-aj/configure.ac
--- dpkg-1.15.5.6/configure.ac	2010-01-08 18:00:34.000000000 +1000
+++ dpkg-1.15.5.6-aj/configure.ac	2010-03-07 04:38:32.547372468 +1000
@@ -51,6 +51,16 @@
 esac])
 AC_SUBST(admindir)
 
+# Allow alternative default hash function
+hashtype="md5"
+AC_ARG_WITH(hashtype,
+	AS_HELP_STRING([--with-hashtype=HASH],
+	               [hash function to use for .hashes files]),
+[case "$with_hashtype" in
+      "md5"|"none") hashtype="$with_hashtype" ;;
+      *) AC_MSG_ERROR([invalid hashtype specified]) ;;
+esac])
+AC_SUBST(hashtype)
 
 # Checks for programs.
 AC_PROG_CC
diff -urb dpkg-1.15.5.6/src/Makefile.am dpkg-1.15.5.6-aj/src/Makefile.am
--- dpkg-1.15.5.6/src/Makefile.am	2010-01-09 03:23:06.000000000 +1000
+++ dpkg-1.15.5.6-aj/src/Makefile.am	2010-03-07 04:28:18.771356095 +1000
@@ -6,6 +6,7 @@
 AM_CPPFLAGS = \
 	-DLOCALEDIR=\"$(localedir)\" \
 	-DADMINDIR=\"$(admindir)\" \
+	-DHASHTYPE=\"$(hashtype)\" \
 	-idirafter $(top_srcdir)/lib/compat \
 	-I$(top_builddir) \
 	-I$(top_srcdir)/lib
diff -urb dpkg-1.15.5.6/src/main.c dpkg-1.15.5.6-aj/src/main.c
--- dpkg-1.15.5.6/src/main.c	2010-01-09 03:23:06.000000000 +1000
+++ dpkg-1.15.5.6-aj/src/main.c	2010-03-07 04:29:59.271360858 +1000
@@ -187,6 +187,7 @@
 const char *admindir= ADMINDIR;
 const char *instdir= "";
 struct pkg_list *ignoredependss = NULL;
+const char *hashtype= HASHTYPE;
 
 static const struct forceinfo {
   const char *name;
@@ -516,6 +517,7 @@
   { "admindir",          0,   1, NULL,          &admindir, NULL,          0 },
   { "instdir",           0,   1, NULL,          &instdir,  NULL,          0 },
   { "ignore-depends",    0,   1, NULL,          NULL,      ignoredepends, 0 },
+  { "hash",              0,   1, NULL,          &hashtype, NULL,          0 },
   { "force",             0,   2, NULL,          NULL,      setforce,      1 },
   { "refuse",            0,   2, NULL,          NULL,      setforce,      0 },
   { "no-force",          0,   2, NULL,          NULL,      setforce,      0 },
diff -urb dpkg-1.15.5.6/src/archives.c dpkg-1.15.5.6-aj/src/archives.c
--- dpkg-1.15.5.6/src/archives.c	2010-01-09 03:23:06.000000000 +1000
+++ dpkg-1.15.5.6-aj/src/archives.c	2010-03-07 04:51:53.743360015 +1000
@@ -629,7 +629,7 @@
     /* We create the file with mode 0 to make sure nobody can do anything with
      * it until we apply the proper mode, which might be a statoverride.
      */
-    fd= open(fnamenewvb.buf, (O_CREAT|O_EXCL|O_WRONLY), 0);
+    fd= open(fnamenewvb.buf, (O_CREAT|O_EXCL|O_RDWR), 0);
     if (fd < 0)
       ohshite(_("unable to create `%.255s' (while processing `%.255s')"), fnamenewvb.buf, ti->Name);
     push_cleanup(cu_closefd, ehflag_bombout, NULL, 0, 1, &fd);
@@ -642,6 +642,13 @@
     }
     r= ti->Size % TARBLKSZ;
     if (r > 0) r= safe_read(tc->backendpipe,databuf,TARBLKSZ - r);
+
+    if (tc->hashfn)
+    {
+      lseek(fd, 0, SEEK_SET);
+      tc->hashfn(tc, fd, fnamevb.buf);
+    }
+
     if (nifd->namenode->statoverride) 
       debug(dbg_eachfile, "tarobject ... stat override, uid=%d, gid=%d, mode=%04o",
 			  nifd->namenode->statoverride->uid,
diff -urb dpkg-1.15.5.6/src/archives.h dpkg-1.15.5.6-aj/src/archives.h
--- dpkg-1.15.5.6/src/archives.h	2010-01-08 18:00:34.000000000 +1000
+++ dpkg-1.15.5.6-aj/src/archives.h	2010-03-07 03:41:10.871370703 +1000
@@ -27,6 +27,8 @@
   int backendpipe;
   struct pkginfo *pkg;
   struct fileinlist **newfilesp;
+  void (*hashfn)(struct tarcontext *tc, int fd, char *fname);
+  FILE *hashfile;
 };
 
 struct pkg_deconf_list {
diff -urb dpkg-1.15.5.6/src/main.h dpkg-1.15.5.6-aj/src/main.h
--- dpkg-1.15.5.6/src/main.h	2010-01-09 03:23:06.000000000 +1000
+++ dpkg-1.15.5.6-aj/src/main.h	2010-03-07 04:27:39.835358649 +1000
@@ -127,6 +127,7 @@
 extern const char *admindir;
 extern const char *instdir;
 extern struct pkg_list *ignoredependss;
+extern const char *hashtype;
 extern const char architecture[];
 
 struct invoke_hook {
diff -urb dpkg-1.15.5.6/src/processarc.c dpkg-1.15.5.6-aj/src/processarc.c
--- dpkg-1.15.5.6/src/processarc.c	2010-01-09 03:23:06.000000000 +1000
+++ dpkg-1.15.5.6-aj/src/processarc.c	2010-03-07 04:42:17.299356104 +1000
@@ -72,6 +72,28 @@
   return pfilename;
 }
 
+static void 
+nullhashfn(struct tarcontext *tc, int fd, char *fname)
+{
+  if (fname[0] == '/') 
+    fname += 1;
+  if (fname[0] == '\0')
+    return;
+  fprintf(tc->hashfile, "-  %s\n", fname);
+}
+
+static void 
+md5hashfn(struct tarcontext *tc, int fd, char *fname)
+{
+  char md5hash[MD5HASHLEN+1];
+  if (fname[0] == '/') 
+    fname += 1;
+  if (fname[0] == '\0')
+    return;
+  fd_md5(fd, md5hash, -1, _("md5hash"));
+  fprintf(tc->hashfile, "MD5:%s  %s\n", md5hash, fname);
+}
+
 void process_archive(const char *filename) {
   static const struct TarFunctions tf = {
     .Read = tarfileread,
@@ -608,6 +630,18 @@
   tc.pkg= pkg;
   tc.backendpipe= p1[0];
 
+  strcpy(cidirrest, "hashes");
+  tc.hashfile= fopen(cidir, "w");
+  if (tc.hashfile) {
+    push_cleanup(cu_closefile, ehflag_bombout, NULL, 0, 1, (void *)tc.hashfile);
+
+    if (strcmp(hashtype, "md5") == 0) {
+      tc.hashfn= md5hashfn;
+    } else {
+      tc.hashfn= nullhashfn;
+    }
+  }
+
   r= TarExtractor((void*)&tc, &tf);
   if (r) {
     if (errno) {
@@ -616,11 +650,17 @@
       ohshit(_("corrupted filesystem tarfile - corrupted package archive"));
     }
   }
+
   fd_null_copy(p1[0], -1, _("dpkg-deb: zap possible trailing zeros"));
   close(p1[0]);
   p1[0] = -1;
   subproc_wait_check(c1, BACKEND " --fsys-tarfile", PROCPIPE);
 
+  if (tc.hashfile) {
+    pop_cleanup(ehflag_normaltidy); /* tc.hashfile= fopen() */
+    if (fclose(tc.hashfile)) ohshite(_("error closing %.250s"),cidir);
+  }
+
   if (oldversionstatus == stat_halfinstalled || oldversionstatus == stat_unpacked) {
     /* Packages that were in `installed' and `postinstfailed' have been reduced
      * to `unpacked' by now, by the running of the prerm script.

Reply to: