Bug#128818: [patch] packages.gz diff support for apt
Hi,
attached is the first version of a patch that will enable diff files
for the index files (Packages.gz, Sources.gz). It's basicly a
implementation of the ideas of
http://lists.debian.org/debian-devel/2002/04/msg00502.html and
http://azure.humbug.org.au/~aj/blog/2003/12/02#2003-12-02-pdiffs
Patches for the package file are generated like this:
"diff Packages-23-08-2004 Packages-24-08-2004 | gzip -c > \
Packages_diff_`md5sum Packages-23-08-2004|awk '{print $1}'`.gz"
The code will download until it finds a empty patch, it assumes then
that the index is now up-to-date and stops. If it does not find a
patch it will auto-fallback to Packages.bz2 and then to
Packages.gz. The code is diffed against the arch repository at:
http://people.debian.org/~mdz/arch/apt@packages.debian.org
(apt@packages.debian.org/apt--main--0)
It's up to the people generating the diffs how much they want to
provide. A amount like 10-20 sounds reasonable to me, that means 10-20
days in unstable.
I would love to get feedback from the apt upstream people (like Jason,
Gustavo, Matt). I wonder if this should be implemented differently
(like with a "patch" method).
thanks,
Michael
--
The first rule of holes is: when you find yourself in one, stop digging. - PJ
Linux is not The Answer. Yes is the answer. Linux is The Question. - Neo
--- orig/apt-pkg/acquire-item.cc
+++ mod/apt-pkg/acquire-item.cc
@@ -22,14 +22,15 @@
#include <apt-pkg/error.h>
#include <apt-pkg/strutl.h>
#include <apt-pkg/fileutl.h>
+#include <apt-pkg/md5.h>
#include <apti18n.h>
#include <sys/stat.h>
-#include <unistd.h>
#include <errno.h>
#include <string>
#include <stdio.h>
+#include <unistd.h>
/*}}}*/
using std::string;
@@ -135,21 +136,29 @@
instantiated to fetch the revision file */
pkgAcqIndex::pkgAcqIndex(pkgAcquire *Owner,
string URI,string URIDesc,string ShortDesc) :
- Item(Owner), RealURI(URI)
+ Item(Owner), RealURI(URI), Description(URIDesc)
{
Decompression = false;
Erase = false;
-
- DestFile = _config->FindDir("Dir::State::lists") + "partial/";
- DestFile += URItoFileName(URI);
- // Create the item
- Desc.URI = URI + ".bz2";
+ Debug = _config->FindB("Debug::pkgAcquire::Diffs",false);
+
Desc.Description = URIDesc;
Desc.Owner = this;
Desc.ShortDesc = ShortDesc;
+
+ // check if we have a (possible older) package file, if not dl it
+ string CurrentPackagesFile = _config->FindDir("Dir::State::lists");
+ CurrentPackagesFile += URItoFileName(RealURI);
+ if(!FileExists(CurrentPackagesFile) || !_config->FindB("Acquire::Diffs",true)) {
+ Desc.URI = URI + ".bz2";
+ DestFile = _config->FindDir("Dir::State::lists") + "partial/";
+ DestFile += URItoFileName(URI);
- QueueURI(Desc);
+ QueueURI(Desc);
+ } else {
+ QueueDiff(URI);
+ }
}
/*}}}*/
// AcqIndex::Custom600Headers - Insert custom request headers /*{{{*/
@@ -170,9 +179,29 @@
void pkgAcqIndex::Failed(string Message,pkgAcquire::MethodConfig *Cnf)
{
+ if(Debug)
+ std::cout << "\npkgAcqIndex::Failed()" << std::endl;
+
+ // if we are in diff download mode and anything goes wrong, back out!
+ if(Desc.URI.find("_diff_") != string::npos) {
+ if(Debug)
+ std::cout << "diff not found, retrying complete File next"
+ << std::endl;
+
+ Desc.URI = RealURI + ".bz2";
+ DestFile = _config->FindDir("Dir::State::lists") + "partial/";
+ DestFile += URItoFileName(RealURI);
+
+ QueueURI(Desc);
+ return;
+ }
+
// no .bz2 found, retry with .gz
if(Desc.URI.substr(Desc.URI.size()-3,Desc.URI.size()-1) == "bz2") {
+ if(Debug)
+ std::cout << "bz2 not found, trying .gz" << std::endl;
Desc.URI = Desc.URI.substr(0,Desc.URI.size()-3) + "gz";
+ Desc.Description = Description;
QueueURI(Desc);
return;
}
@@ -181,6 +210,58 @@
Item::Failed(Message,Cnf);
}
+// queue the next diff, calc md5sum for it
+void pkgAcqIndex::QueueDiff(string URI)
+{
+ string CurrentPackagesFile = _config->FindDir("Dir::State::lists");
+ CurrentPackagesFile += URItoFileName(RealURI);
+
+ FileFd fd(CurrentPackagesFile, FileFd::ReadOnly);
+ MD5Summation MD5;
+ MD5.AddFD(fd.Fd(), fd.Size());
+ Desc.URI = URI + "_diff_" + string(MD5.Result()) + ".gz";
+ Desc.Description = Description + string("-diff");
+
+ DestFile = _config->FindDir("Dir::State::lists") + "partial/";
+ DestFile += URItoFileName(URI) + string("_diff_");
+
+ if(Debug)
+ std::cout << "\ntrying: " << Desc.URI << std::endl;
+
+ QueueURI(Desc);
+}
+
+// do the actual patching
+void pkgAcqIndex::Patch(string PatchFile)
+{
+ char *error = NULL;
+
+ string FinalFile = _config->FindDir("Dir::State::lists");
+ FinalFile += URItoFileName(RealURI);
+
+ int Process = ExecFork();
+ if (Process == 0)
+ {
+ chdir(_config->FindDir("Dir::State::lists").c_str());
+ string cmd = "patch -s " + FinalFile + " < " + PatchFile;
+ _exit(system(cmd.c_str()));
+ }
+ ExecWait(Process, error, true);
+ if(error != NULL)
+ _error->Error("Patch failed: %s ", error);
+}
+
+// check if we are at the last diff
+bool pkgAcqIndex::DiffEmpty(string PatchFile)
+{
+ FileFd fd(PatchFile, FileFd::ReadOnly);
+ return (fd.Size() == 0);
+}
+
+void pkgAcqIndex::Start(string Message,unsigned long Size)
+{
+ FileSize = Size;
+}
// AcqIndex::Done - Finished a fetch /*{{{*/
// ---------------------------------------------------------------------
@@ -194,6 +275,49 @@
{
Item::Done(Message,Size,MD5,Cfg);
+ // we are working on diffs
+ if(Desc.URI.find("_diff_") != string::npos) {
+ if(Debug)
+ std::cout << "\nfound diff file, about to unpack and apply patch"
+ << std::endl;
+
+ // unpack first
+ string compExt = Desc.URI.substr(Desc.URI.size()-3,Desc.URI.size()-1);
+ char *decompProg = NULL;
+ if(compExt == "bz2")
+ decompProg = "bzip2";
+ else if(compExt == ".gz")
+ decompProg = "gzip";
+
+ if(decompProg != NULL) {
+ if(Debug)
+ std::cout << "Unpacking the patch file" << std::endl;
+ Decompression = true;
+ Desc.URI = string(decompProg) + ":" + DestFile;
+ DestFile += ".decomp";
+ QueueURI(Desc);
+ Mode = decompProg;
+ return;
+ }
+
+ if(!DiffEmpty(DestFile)) { // need ... more ... patches
+ if(Debug)
+ std::cout << "Applying patch file" << std::endl;
+ // do the patching
+ Patch(DestFile);
+ // qeue next diff
+ QueueDiff(RealURI);
+ return;
+ }
+
+ /* We restore the original name to DestFile so that the clean operation
+ will work OK */
+ DestFile = _config->FindDir("Dir::State::lists") + "partial/";
+ DestFile += URItoFileName(RealURI);
+
+ return;
+ }
+
if (Decompression == true)
{
// Done, move it into position
--- orig/apt-pkg/acquire-item.h
+++ mod/apt-pkg/acquire-item.h
@@ -84,12 +84,19 @@
bool Decompression;
bool Erase;
+ bool Debug;
pkgAcquire::ItemDesc Desc;
string RealURI;
-
+ string Description;
+
+ void QueueDiff(string URI);
+ void Patch(string PatchFile);
+ bool DiffEmpty(string PatchFile);
+
public:
// Specialized action members
+ virtual void Start(string Message,unsigned long Size);
virtual void Failed(string Message,pkgAcquire::MethodConfig *Cnf);
virtual void Done(string Message,unsigned long Size,string Md5Hash,
pkgAcquire::MethodConfig *Cnf);
Reply to: