[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

partial mirroring



I wrote a small Python script to create a partial Debian mirror. In
contrast to other mirror scripts that I know of, it is capable of
mirroring a given list of packages from multiple dists. To be able to
do this, the script parses the Packages files and keeps track which
information goes with which dist (apt-ftparchive or dpkg-scanpackages
wouldn't work for multiple dists). After fetching the required packages
(if they don't exist already), the Packages files for all dists are
generated from the information gathered previously. I use this in
combination with a Perl script I wrote some time ago which accepts a
list of packages and resolves all dependencies with AptPkg::Cache. The
code could probably be cleaned up and documented, but otherwise it seems
to work.

It'd be happy about feedback,

Roland

-- 
Roland Bauerschmidt
#!/usr/bin/perl -w
# vim: nowrap ts=8
# apt-mkpkglist -- create consistent package list with resolved dependencies
# Copyright 2001 Roland Bauerschmidt <rb@debian.org>, GPL
# with lots of help from Brenden O'Dea over mail and IRC...

use strict;
use AptPkg::Config '$_config';
use AptPkg::System '$_system';
use AptPkg::Cache;

(my $self = $0) =~ s#.*/##;
$_config->init();
$_system = $_config->system or die 'system';
$_config->{quiet} = 2;

my @ARGV = $_config->parse_cmdline([
		[ 'v', 'verbose', 'verbose' ],
		[ 'R', 'recommends', 'recommends' ],
		[ 'S', 'suggests', 'suggests' ],
	], @ARGV);

my $input = shift(@ARGV) or die "$self: no input file given\n";
my $output = shift(@ARGV) or die "$self: no output file given\n";
open(IN, "<$input");
open(OUT, ">$output");

my @packages;
my @virtual;
my $cache = AptPkg::Cache->new(0);

foreach(<IN>)
{
	chomp();
	$_ =~ s/ *\#.*//;
	next if($_ eq "");
	push_package($_);
}

foreach(@packages) {
	print OUT $_."\n";
}

sub push_package {
	my $pkg = shift;
	dprintf("processing package %s\n", $pkg);
	if(in_array($pkg, @packages)) {
		dprintf("  already satisfied\n");
		return;
	}
	my @depends;
	
	# for real package we get all the dependencies for later recursion.
	if(defined($cache->{$pkg}{VersionList})) {
		# add all the packages this package depends on into an array, since
		# those dependencies will also have to be satisfied. This is
		# recursive.
		if($cache->{$pkg}{VersionList}[0]{DependsList}) {
			foreach(@{$cache->{$pkg}{VersionList}[0]{DependsList}}) {
				if($_->{DepType} eq "Depends" || $_->{DepType} eq "PreDepends" ||
					$_->{DepType} eq "Recommends" && $_config->get_bool('recommends') ||
					$_->{DepType} eq "Suggests" && $_config->get_bool('suggests')) {
						dprintf("  %s %s\n", lc($_->{DepType}),
							$_->{TargetPkg}->{Name});
						push(@depends, $_->{TargetPkg}{Name});
				}
			}
		}
		# push all the packages this package provides into an array, so we
		# don't need to add any other packages later if dependencies on
		# virtual packages are already satisfied through this package
		if($cache->{$pkg}{ProvidesList}) {
			foreach(@{$cache->{$pkg}{ProvidesList}}) {
				push(@virtual, $_->{Name});
			}
		}
		push(@packages, $pkg);
	# for virtual packages check if dependency is already satisfied,
	# otherwise add a package to satisfy the dependency
	} elsif(!in_array($pkg, @virtual) && defined($cache->{$pkg}{ProvidesList})) {
		dprintf("  virtual package provided by %s\n",
			$cache->{$pkg}->{ProvidesList}[0]{OwnerPkg}{Name});
		# since the code for selecting package with highest priority
		# is not there yet, just take the first one
		push(@depends, $cache->{$pkg}->{ProvidesList}[0]{OwnerPkg}{Name});
		# choose package with highest priority should be done here later
		# it'd be nice if it could be done like this, but it can't
		# need to do this with VerFile
		#my $highest;
		#foreach(@{$cache->{$_}{ProvidesList}}) {
			#print "reverse provides: ".$_->{OwnerPkg}{Name}."\n";
			#push(@depends, $_->{OwnerPkg}{Name});
			#if($highest->{Priority} < $_->{OwnerPkg}{Priority}) {
			#	$highest = $_->{OwnerPkg};
			#}
		#}
		#push(@depends, $highest->{Name});
	}
	
	foreach(@depends) {
		push_package($_) if(!in_array($_, @packages));
	}
}

sub in_array
{
	my $key = shift;
	foreach(@_) {
		return(1) if($key eq $_);
	}
	return(0);
}

sub dprintf
{
	printf(@_) if $_config->get_bool("verbose");
}
#!/usr/bin/env python

list = '/mirrors/debian/spool/packages'
base = '/mirrors/debian'

host = 'ftp.de.debian.org'
sources = {
	'sid': [
		(host, 'debian', 'sid', 'main'),
		(host, 'debian', 'sid', 'contrib'),
		(host, 'debian', 'sid', 'non-free'),
		(host, 'debian-non-US', 'sid', 'non-US/main'),
		(host, 'debian-non-US', 'sid', 'non-US/contrib'),
		(host, 'debian-non-US', 'sid', 'non-US/non-free'),
		]
	}

### DO NOT EDIT BEYOND THIS ###

import os, re, string, md5

class __Package__:
	PackageMatch = re.compile('^Package: (.+)$')
	FilenameMatch = re.compile('^Filename: (.+)$')
	md5sumMatch = re.compile('^MD5sum: ([0-9a-fA-F]{32})$')
	buffer = []

	def __init__(self, buffer, location):
		self.buffer = buffer
		self.location = location
		for line in buffer:
			if self.PackageMatch.match(line):
				self.package = self.PackageMatch.findall(line)[0]
			elif self.FilenameMatch.match(line):
				self.filename = self.FilenameMatch.findall(line)[0]
			elif self.md5sumMatch.match(line):
				self.md5sum = self.md5sumMatch.findall(line)[0]
				
	def __repr__(self):
		return string.join(self.buffer, '\n') + '\n'

class packagesFile:
	PackageMatch = re.compile('^Package: (.*)$')

	packages = {}
	bysection = {}
	bydist = {}
	
	def add(self, location, file, dist):
		fp = open(file)
		current = None
		buffer = []

		if not self.bydist.has_key(dist):
			self.bydist[dist] = []
		
		while 1:
			line = fp.readline()
			if line == '': break
			line = line[0:-1]
		
			if self.PackageMatch.match(line):
				if current != None:
					this = __Package__(buffer, location)
					if not self.packages.has_key(current):
						self.packages[current] = {}
					if not self.bysection.has_key(location[3]):
						self.bysection[location[3]] = []
					self.packages[current][dist] = this
					self.bysection[location[3]].append(this)
					self.bydist[dist].append(this)
					
				buffer = []
				buffer.append(line)
				current = self.PackageMatch.findall(line)[0]
			elif current != None and line != '':
				buffer.append(line)
		fp.close()
	
	def addpkg(self, pkg):
		if not self.packages.has_key(pkg.package):
			self.packages[pkg.package] = {}
		if not self.bysection.has_key(pkg.location[3]):
			self.bysection[location[3]] = []
		if not self.bydist.has_key(dist):
			self.bydist[dist] = []
		self.packages[pkg.package][pkg.location[2]] = pkg
		self.bysection[pkg.location[3]].append(pkg)
		self.bydist[pkg.location[2]].append(pkg)

	def __getitem__(self, index):
		if self.packages.has_key(index):
			return self.packages[index]
		else:
			return None
	
	def dump(self, dist, sect):
		for pkg in self.bydist[dist]:
			if pkg in self.bysection[sect]:
				print pkg

def createDirectoryTree(dir):
	if os.path.isdir(dir): return
	pos = '/'
	for i in dir.split('/'):
		pos = os.path.join(pos, i)
		if not os.path.isdir(pos):
			os.mkdir(pos)

def download(host, path, dest):
	src = '%s::%s' % (host, path)
	createDirectoryTree(os.path.dirname(dest))
	print '		%s -> %s' % (src,dest)
	os.spawnv(os.P_WAIT, '/usr/bin/rsync', ['rsync', '--quiet', src, dest])

def md5sum(filename):
	fp = open(filename)
	m = md5.new(fp.read())
	fp.close()
	return m.hexdigest()

def downloadPackage(pkg):
	global base
	dest = os.path.join(base, pkg.filename)
	if not os.path.exists(dest) or pkg.md5sum != md5sum(dest):
		print '	downloading %s' % pkg.filename
		download(pkg.location[0], os.path.join(pkg.location[1], pkg.filename), dest)
	else:
		print '	file exists'
	if pkg.md5sum != md5sum(dest):
		print '	WARNING: %s is broken' % pkg.filename

if __name__ == '__main__':
	Packages = packagesFile()
	dists = {}

	print 'Updating package information...'
	for dist in sources.keys():
		for location in sources[dist]:
			host, root, ddist, sect = location
			path = os.path.join('dists', dist, sect, 'binary-i386/Packages')
			localPath = os.path.join(base, 'spool', path.replace('/', '_'))
			remotePath = os.path.join(root, path)
			download(host, remotePath, localPath)
			Packages.add(location, localPath, dist)

			# Packages file array for all dists + sections
			if not dists.has_key(dist):
				dists[dist] = {}
			if not dists[dist].has_key(sect):
				dists[dist][sect] = []

	fp = open(list)
	for line in fp.readlines():
		line = line[0:-1]
		s_dist = None
		if line.find('/') > 0:
			packageName, s_dist = line.split('/')
		else:
			packageName = line
			
		package = Packages[packageName]
		if package == None: continue

		print 'Processing %s...' % packageName
		if s_dist == None:
			for dist in package.keys():
				downloadPackage(package[dist])
				dists[dist][package[dist].location[3]].append(package[dist])
		else:
			downloadPackage(package[s_dist])
			dists[s_dist][package[s_dist].location[3]].append(package[s_dist])
	fp.close

	for dist in dists.keys():
		for sect in dists[dist].keys():
			print 'Generating Packages file for %s:%s...' % (dist,sect)
			fp = open(os.path.join(base, 'dists', dist, sect, 'binary-i386/Packages'), 'w')
			for pkg in dists[dist][sect]:
				fp.write(str(pkg)+'\n')
			fp.close()



Reply to: