[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#128818: [patch] packages.gz diff support for apt



Hi,

attached is the first version of a patch that will enable diff files
for the index files (Packages.gz, Sources.gz). It's basicly a
implementation of the ideas of
http://lists.debian.org/debian-devel/2002/04/msg00502.html and
http://azure.humbug.org.au/~aj/blog/2003/12/02#2003-12-02-pdiffs

Patches for the package file are generated like this:
"diff Packages-23-08-2004 Packages-24-08-2004 | gzip -c >      \
 Packages_diff_`md5sum Packages-23-08-2004|awk '{print $1}'`.gz"

The code will download until it finds a empty patch, it assumes then
that the index is now up-to-date and stops. If it does not find a
patch it will auto-fallback to Packages.bz2 and then to
Packages.gz. The code is diffed against the arch repository at:
http://people.debian.org/~mdz/arch/apt@packages.debian.org
(apt@packages.debian.org/apt--main--0) 

It's up to the people generating the diffs how much they want to
provide. A amount like 10-20 sounds reasonable to me, that means 10-20
days in unstable. 

I would love to get feedback from the apt upstream people (like Jason,
Gustavo, Matt). I wonder if this should be implemented differently
(like with a "patch" method). 

thanks,
 Michael

-- 
The first rule of holes is: when you find yourself in one, stop digging. - PJ
Linux is not The Answer. Yes is the answer. Linux is The Question. - Neo
--- orig/apt-pkg/acquire-item.cc
+++ mod/apt-pkg/acquire-item.cc
@@ -22,14 +22,15 @@
 #include <apt-pkg/error.h>
 #include <apt-pkg/strutl.h>
 #include <apt-pkg/fileutl.h>
+#include <apt-pkg/md5.h>
 
 #include <apti18n.h>
     
 #include <sys/stat.h>
-#include <unistd.h>
 #include <errno.h>
 #include <string>
 #include <stdio.h>
+#include <unistd.h>
 									/*}}}*/
 
 using std::string;
@@ -135,21 +136,29 @@
    instantiated to fetch the revision file */   
 pkgAcqIndex::pkgAcqIndex(pkgAcquire *Owner,
 			 string URI,string URIDesc,string ShortDesc) :
-                      Item(Owner), RealURI(URI)
+   Item(Owner), RealURI(URI), Description(URIDesc)
 {
    Decompression = false;
    Erase = false;
-   
-   DestFile = _config->FindDir("Dir::State::lists") + "partial/";
-   DestFile += URItoFileName(URI);
 
-   // Create the item
-   Desc.URI = URI + ".bz2"; 
+   Debug = _config->FindB("Debug::pkgAcquire::Diffs",false);
+
    Desc.Description = URIDesc;
    Desc.Owner = this;
    Desc.ShortDesc = ShortDesc;
+
+   // check if we have a (possible older) package file, if not dl it 
+   string CurrentPackagesFile = _config->FindDir("Dir::State::lists");
+   CurrentPackagesFile += URItoFileName(RealURI);
+   if(!FileExists(CurrentPackagesFile) || !_config->FindB("Acquire::Diffs",true)) {
+      Desc.URI = URI + ".bz2";
+      DestFile = _config->FindDir("Dir::State::lists") + "partial/";
+      DestFile += URItoFileName(URI);
       
-   QueueURI(Desc);
+      QueueURI(Desc);
+   } else {
+      QueueDiff(URI);
+   }
 }
 									/*}}}*/
 // AcqIndex::Custom600Headers - Insert custom request headers		/*{{{*/
@@ -170,9 +179,29 @@
 
 void pkgAcqIndex::Failed(string Message,pkgAcquire::MethodConfig *Cnf)
 {
+   if(Debug)
+      std::cout << "\npkgAcqIndex::Failed()" << std::endl;
+
+   // if we are in diff download mode and anything goes wrong, back out!
+   if(Desc.URI.find("_diff_") != string::npos) {
+      if(Debug)
+	 std::cout << "diff not found, retrying complete File next" 
+		   << std::endl;
+
+      Desc.URI = RealURI + ".bz2";
+      DestFile = _config->FindDir("Dir::State::lists") + "partial/";
+      DestFile += URItoFileName(RealURI);
+
+      QueueURI(Desc);
+      return;
+   }
+   
    // no .bz2 found, retry with .gz
    if(Desc.URI.substr(Desc.URI.size()-3,Desc.URI.size()-1) == "bz2") {
+      if(Debug)
+	 std::cout << "bz2 not found, trying .gz" << std::endl;
       Desc.URI = Desc.URI.substr(0,Desc.URI.size()-3) + "gz"; 
+      Desc.Description = Description;
       QueueURI(Desc);
       return;
    }
@@ -181,6 +210,58 @@
    Item::Failed(Message,Cnf);
 }
 
+// queue the next diff, calc md5sum for it
+void pkgAcqIndex::QueueDiff(string URI)
+{
+   string CurrentPackagesFile = _config->FindDir("Dir::State::lists");
+   CurrentPackagesFile += URItoFileName(RealURI);
+
+   FileFd fd(CurrentPackagesFile, FileFd::ReadOnly);
+   MD5Summation MD5;
+   MD5.AddFD(fd.Fd(), fd.Size());
+   Desc.URI = URI + "_diff_" + string(MD5.Result()) + ".gz"; 
+   Desc.Description = Description + string("-diff");
+
+   DestFile = _config->FindDir("Dir::State::lists") + "partial/";
+   DestFile += URItoFileName(URI) + string("_diff_");
+
+   if(Debug)
+      std::cout << "\ntrying: " << Desc.URI << std::endl;
+
+   QueueURI(Desc);
+}
+
+// do the actual patching
+void pkgAcqIndex::Patch(string PatchFile)
+{
+   char *error = NULL;
+
+   string FinalFile = _config->FindDir("Dir::State::lists");
+   FinalFile += URItoFileName(RealURI);
+
+   int Process = ExecFork();
+   if (Process == 0)
+   {
+      chdir(_config->FindDir("Dir::State::lists").c_str());
+      string cmd = "patch -s " + FinalFile + " < " + PatchFile;
+      _exit(system(cmd.c_str()));
+   }
+   ExecWait(Process, error, true);
+   if(error != NULL)
+      _error->Error("Patch failed: %s ", error);
+}
+
+// check if we are at the last diff
+bool pkgAcqIndex::DiffEmpty(string PatchFile)
+{
+   FileFd fd(PatchFile, FileFd::ReadOnly);
+   return (fd.Size() == 0);
+}
+
+void pkgAcqIndex::Start(string Message,unsigned long Size)
+{
+   FileSize = Size;
+}
 
 // AcqIndex::Done - Finished a fetch					/*{{{*/
 // ---------------------------------------------------------------------
@@ -194,6 +275,49 @@
 {
    Item::Done(Message,Size,MD5,Cfg);
 
+   // we are working on diffs
+   if(Desc.URI.find("_diff_") != string::npos) {
+      if(Debug)
+	 std::cout << "\nfound diff file, about to unpack and apply patch" 
+		   << std::endl;
+
+      // unpack first
+      string compExt = Desc.URI.substr(Desc.URI.size()-3,Desc.URI.size()-1);
+      char *decompProg = NULL;
+      if(compExt == "bz2") 
+	 decompProg = "bzip2";
+      else if(compExt == ".gz") 
+	 decompProg = "gzip";
+      
+      if(decompProg != NULL) {
+	 if(Debug)
+	    std::cout << "Unpacking the patch file" << std::endl;
+	 Decompression = true;
+	 Desc.URI = string(decompProg) + ":" + DestFile;
+	 DestFile += ".decomp";
+	 QueueURI(Desc);
+	 Mode = decompProg;
+	 return;
+      }
+      
+      if(!DiffEmpty(DestFile)) { // need ... more ... patches 
+	 if(Debug)
+	    std::cout << "Applying patch file" << std::endl;
+	 // do the patching
+	 Patch(DestFile);
+	 // qeue next diff
+	 QueueDiff(RealURI);
+	 return;
+      }
+
+      /* We restore the original name to DestFile so that the clean operation
+         will work OK */
+      DestFile = _config->FindDir("Dir::State::lists") + "partial/";
+      DestFile += URItoFileName(RealURI);
+
+      return;
+   }
+
    if (Decompression == true)
    {
       // Done, move it into position


--- orig/apt-pkg/acquire-item.h
+++ mod/apt-pkg/acquire-item.h
@@ -84,12 +84,19 @@
    
    bool Decompression;
    bool Erase;
+   bool Debug;
    pkgAcquire::ItemDesc Desc;
    string RealURI;
-   
+   string Description;
+
+   void QueueDiff(string URI);
+   void Patch(string PatchFile);
+   bool DiffEmpty(string PatchFile);
+
    public:
    
    // Specialized action members
+   virtual void Start(string Message,unsigned long Size);
    virtual void Failed(string Message,pkgAcquire::MethodConfig *Cnf);
    virtual void Done(string Message,unsigned long Size,string Md5Hash,
 		     pkgAcquire::MethodConfig *Cnf);



Reply to: