partial mirroring
I wrote a small Python script to create a partial Debian mirror. In
contrast to other mirror scripts that I know of, it is capable of
mirroring a given list of packages from multiple dists. To be able to
do this, the script parses the Packages files and keeps track which
information goes with which dist (apt-ftparchive or dpkg-scanpackages
wouldn't work for multiple dists). After fetching the required packages
(if they don't exist already), the Packages files for all dists are
generated from the information gathered previously. I use this in
combination with a Perl script I wrote some time ago which accepts a
list of packages and resolves all dependencies with AptPkg::Cache. The
code could probably be cleaned up and documented, but otherwise it seems
to work.
It'd be happy about feedback,
Roland
--
Roland Bauerschmidt
#!/usr/bin/perl -w
# vim: nowrap ts=8
# apt-mkpkglist -- create consistent package list with resolved dependencies
# Copyright 2001 Roland Bauerschmidt <rb@debian.org>, GPL
# with lots of help from Brenden O'Dea over mail and IRC...
use strict;
use AptPkg::Config '$_config';
use AptPkg::System '$_system';
use AptPkg::Cache;
(my $self = $0) =~ s#.*/##;
$_config->init();
$_system = $_config->system or die 'system';
$_config->{quiet} = 2;
my @ARGV = $_config->parse_cmdline([
[ 'v', 'verbose', 'verbose' ],
[ 'R', 'recommends', 'recommends' ],
[ 'S', 'suggests', 'suggests' ],
], @ARGV);
my $input = shift(@ARGV) or die "$self: no input file given\n";
my $output = shift(@ARGV) or die "$self: no output file given\n";
open(IN, "<$input");
open(OUT, ">$output");
my @packages;
my @virtual;
my $cache = AptPkg::Cache->new(0);
foreach(<IN>)
{
chomp();
$_ =~ s/ *\#.*//;
next if($_ eq "");
push_package($_);
}
foreach(@packages) {
print OUT $_."\n";
}
sub push_package {
my $pkg = shift;
dprintf("processing package %s\n", $pkg);
if(in_array($pkg, @packages)) {
dprintf(" already satisfied\n");
return;
}
my @depends;
# for real package we get all the dependencies for later recursion.
if(defined($cache->{$pkg}{VersionList})) {
# add all the packages this package depends on into an array, since
# those dependencies will also have to be satisfied. This is
# recursive.
if($cache->{$pkg}{VersionList}[0]{DependsList}) {
foreach(@{$cache->{$pkg}{VersionList}[0]{DependsList}}) {
if($_->{DepType} eq "Depends" || $_->{DepType} eq "PreDepends" ||
$_->{DepType} eq "Recommends" && $_config->get_bool('recommends') ||
$_->{DepType} eq "Suggests" && $_config->get_bool('suggests')) {
dprintf(" %s %s\n", lc($_->{DepType}),
$_->{TargetPkg}->{Name});
push(@depends, $_->{TargetPkg}{Name});
}
}
}
# push all the packages this package provides into an array, so we
# don't need to add any other packages later if dependencies on
# virtual packages are already satisfied through this package
if($cache->{$pkg}{ProvidesList}) {
foreach(@{$cache->{$pkg}{ProvidesList}}) {
push(@virtual, $_->{Name});
}
}
push(@packages, $pkg);
# for virtual packages check if dependency is already satisfied,
# otherwise add a package to satisfy the dependency
} elsif(!in_array($pkg, @virtual) && defined($cache->{$pkg}{ProvidesList})) {
dprintf(" virtual package provided by %s\n",
$cache->{$pkg}->{ProvidesList}[0]{OwnerPkg}{Name});
# since the code for selecting package with highest priority
# is not there yet, just take the first one
push(@depends, $cache->{$pkg}->{ProvidesList}[0]{OwnerPkg}{Name});
# choose package with highest priority should be done here later
# it'd be nice if it could be done like this, but it can't
# need to do this with VerFile
#my $highest;
#foreach(@{$cache->{$_}{ProvidesList}}) {
#print "reverse provides: ".$_->{OwnerPkg}{Name}."\n";
#push(@depends, $_->{OwnerPkg}{Name});
#if($highest->{Priority} < $_->{OwnerPkg}{Priority}) {
# $highest = $_->{OwnerPkg};
#}
#}
#push(@depends, $highest->{Name});
}
foreach(@depends) {
push_package($_) if(!in_array($_, @packages));
}
}
sub in_array
{
my $key = shift;
foreach(@_) {
return(1) if($key eq $_);
}
return(0);
}
sub dprintf
{
printf(@_) if $_config->get_bool("verbose");
}
#!/usr/bin/env python
list = '/mirrors/debian/spool/packages'
base = '/mirrors/debian'
host = 'ftp.de.debian.org'
sources = {
'sid': [
(host, 'debian', 'sid', 'main'),
(host, 'debian', 'sid', 'contrib'),
(host, 'debian', 'sid', 'non-free'),
(host, 'debian-non-US', 'sid', 'non-US/main'),
(host, 'debian-non-US', 'sid', 'non-US/contrib'),
(host, 'debian-non-US', 'sid', 'non-US/non-free'),
]
}
### DO NOT EDIT BEYOND THIS ###
import os, re, string, md5
class __Package__:
PackageMatch = re.compile('^Package: (.+)$')
FilenameMatch = re.compile('^Filename: (.+)$')
md5sumMatch = re.compile('^MD5sum: ([0-9a-fA-F]{32})$')
buffer = []
def __init__(self, buffer, location):
self.buffer = buffer
self.location = location
for line in buffer:
if self.PackageMatch.match(line):
self.package = self.PackageMatch.findall(line)[0]
elif self.FilenameMatch.match(line):
self.filename = self.FilenameMatch.findall(line)[0]
elif self.md5sumMatch.match(line):
self.md5sum = self.md5sumMatch.findall(line)[0]
def __repr__(self):
return string.join(self.buffer, '\n') + '\n'
class packagesFile:
PackageMatch = re.compile('^Package: (.*)$')
packages = {}
bysection = {}
bydist = {}
def add(self, location, file, dist):
fp = open(file)
current = None
buffer = []
if not self.bydist.has_key(dist):
self.bydist[dist] = []
while 1:
line = fp.readline()
if line == '': break
line = line[0:-1]
if self.PackageMatch.match(line):
if current != None:
this = __Package__(buffer, location)
if not self.packages.has_key(current):
self.packages[current] = {}
if not self.bysection.has_key(location[3]):
self.bysection[location[3]] = []
self.packages[current][dist] = this
self.bysection[location[3]].append(this)
self.bydist[dist].append(this)
buffer = []
buffer.append(line)
current = self.PackageMatch.findall(line)[0]
elif current != None and line != '':
buffer.append(line)
fp.close()
def addpkg(self, pkg):
if not self.packages.has_key(pkg.package):
self.packages[pkg.package] = {}
if not self.bysection.has_key(pkg.location[3]):
self.bysection[location[3]] = []
if not self.bydist.has_key(dist):
self.bydist[dist] = []
self.packages[pkg.package][pkg.location[2]] = pkg
self.bysection[pkg.location[3]].append(pkg)
self.bydist[pkg.location[2]].append(pkg)
def __getitem__(self, index):
if self.packages.has_key(index):
return self.packages[index]
else:
return None
def dump(self, dist, sect):
for pkg in self.bydist[dist]:
if pkg in self.bysection[sect]:
print pkg
def createDirectoryTree(dir):
if os.path.isdir(dir): return
pos = '/'
for i in dir.split('/'):
pos = os.path.join(pos, i)
if not os.path.isdir(pos):
os.mkdir(pos)
def download(host, path, dest):
src = '%s::%s' % (host, path)
createDirectoryTree(os.path.dirname(dest))
print ' %s -> %s' % (src,dest)
os.spawnv(os.P_WAIT, '/usr/bin/rsync', ['rsync', '--quiet', src, dest])
def md5sum(filename):
fp = open(filename)
m = md5.new(fp.read())
fp.close()
return m.hexdigest()
def downloadPackage(pkg):
global base
dest = os.path.join(base, pkg.filename)
if not os.path.exists(dest) or pkg.md5sum != md5sum(dest):
print ' downloading %s' % pkg.filename
download(pkg.location[0], os.path.join(pkg.location[1], pkg.filename), dest)
else:
print ' file exists'
if pkg.md5sum != md5sum(dest):
print ' WARNING: %s is broken' % pkg.filename
if __name__ == '__main__':
Packages = packagesFile()
dists = {}
print 'Updating package information...'
for dist in sources.keys():
for location in sources[dist]:
host, root, ddist, sect = location
path = os.path.join('dists', dist, sect, 'binary-i386/Packages')
localPath = os.path.join(base, 'spool', path.replace('/', '_'))
remotePath = os.path.join(root, path)
download(host, remotePath, localPath)
Packages.add(location, localPath, dist)
# Packages file array for all dists + sections
if not dists.has_key(dist):
dists[dist] = {}
if not dists[dist].has_key(sect):
dists[dist][sect] = []
fp = open(list)
for line in fp.readlines():
line = line[0:-1]
s_dist = None
if line.find('/') > 0:
packageName, s_dist = line.split('/')
else:
packageName = line
package = Packages[packageName]
if package == None: continue
print 'Processing %s...' % packageName
if s_dist == None:
for dist in package.keys():
downloadPackage(package[dist])
dists[dist][package[dist].location[3]].append(package[dist])
else:
downloadPackage(package[s_dist])
dists[s_dist][package[s_dist].location[3]].append(package[s_dist])
fp.close
for dist in dists.keys():
for sect in dists[dist].keys():
print 'Generating Packages file for %s:%s...' % (dist,sect)
fp = open(os.path.join(base, 'dists', dist, sect, 'binary-i386/Packages'), 'w')
for pkg in dists[dist][sect]:
fp.write(str(pkg)+'\n')
fp.close()
Reply to: