[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

2 patches for dpkg



Hello,

you will find the proposed patches attached together with the extensive 
descriptions (feel free to put them into the changelog if you decide to 
apply). The patches should apply on top of 1.14.19 and should be independant 
from each other. I would like to see them both in dpkg targetted for lenny, 
because:

0001 patch would help adoption of of symbol files for large library packages 
(e.g. kde4libs. symbols is ~15 MB, gzipped 180 KB)

0002 reduces build time of big source packages significantly (i.e. less of 
developer time wasted while preparing packages) and I think it might fix 
build failutes like [1] (wild guess (memory usage?), I'm not sure about real 
cause).

P.S. I tested the patches, they seem to work OK.

1. 
http://buildd.debian.org/fetch.cgi?pkg=amarok;ver=1.4.9.1-2;arch=arm;stamp=1210553352
From 1b46f2697fccb7297b2a67566e7d6192ba9520f0 Mon Sep 17 00:00:00 2001
From: Modestas Vainius <modestas@vainius.eu>
Date: Thu, 15 May 2008 21:31:00 +0300
Subject: [PATCH] Add support for gzip compressed symbols files.

This patch adds support for for loading and storing of gzip compressed
symbol files (symbols_gz). The retionale behind this is that symbol
files of C++ libraries tend to grow very large very quickly and so they
take up much space on user systems. Given that symbol files, stored in
the dpkg admin dir, are only interesting for the dpkg itself (namely for
dpkg-shlibdeps) but they all together may waste quite much valuable disk
space, it's should be perfectly acceptable to compress them to save
space with a downside of a bit increasing processing time of
dpkg-shlibdeps. However, large gzipped symbol files will probably be
faster to process than uncompressed ones on systems with slow I/O.

Gzipped symbols file might be 90 or more times smaller than uncompressed
one which matters a lot if the file size is reduced e.g from ~14 MB to <
200 KB (kdelibs5 case).

Therefore, this patch implements optional compression of the symbols
files. Support was added to Dpkg::Shlibs::SymbolFile, -z option was
added to dpkg-gensymbols and some small corrections were made to
dpkg-shlibdeps.

A usual .gz suffix cannot be used due to special handling of the dot
(.) in the filenames of the info files. Therefore '_gz' suffix is used
instead.

Signed-off-by: Modestas Vainius <modestas@vainius.eu>
---
 scripts/Dpkg/Shlibs/SymbolFile.pm |   62 ++++++++++++++++++++++++++++++++----
 scripts/dpkg-gensymbols.pl        |   13 ++++++--
 scripts/dpkg-shlibdeps.pl         |   15 +++++----
 3 files changed, 73 insertions(+), 17 deletions(-)

diff --git a/scripts/Dpkg/Shlibs/SymbolFile.pm b/scripts/Dpkg/Shlibs/SymbolFile.pm
index 7e068c6..19772a8 100644
--- a/scripts/Dpkg/Shlibs/SymbolFile.pm
+++ b/scripts/Dpkg/Shlibs/SymbolFile.pm
@@ -21,6 +21,12 @@ use Dpkg::ErrorHandling qw(syserr warning error);
 use Dpkg::Version qw(vercmp);
 use Dpkg::Fields qw(capit);
 
+use IO::Compress::Gzip qw($GzipError);
+use IO::Uncompress::Gunzip qw($GunzipError);
+
+use base qw(Exporter);
+our @EXPORT_OK = qw(exists_symfile open_symfile);
+
 my %blacklist = (
     '__bss_end__' => 1,		# arm
     '__bss_end' => 1,		# arm
@@ -98,6 +104,38 @@ sub clear_except {
     }
 }
 
+sub open_symfile {
+    my ($fh, $file) = @_;
+    my $error = undef;
+    my $type;
+
+    if (-r $file) {
+	$type = "gzip" if ($file =~ /\_gz$/);
+    } elsif (-r "${file}_gz") {
+	$file = "${file}_gz";
+	$type = "gzip";
+    } else {
+	return sprintf(_g("cannot open %s"), $file);
+    }
+
+    if (!defined($type)) {
+	$error = sprintf(_g("cannot open %s"), $file) unless open($_[0], "<", $file);
+    } elsif ($type eq "gzip") {
+	$error = sprintf(_g("cannot open %s") . ": %s", $file, $GunzipError)
+	    unless($_[0] = new IO::Uncompress::Gunzip($file)) ;
+    } else {
+	$error = sprintf(_g("cannot open %s"), $file);
+    }
+
+    $_[2] = $error if ($#_ == 2);
+    return (defined $error) ? undef : $file;
+}
+
+sub exists_symfile {
+    my $file = shift;
+    return (-e $file || -e "${file}_gz");
+}
+
 # Parameter seen is only used for recursive calls
 sub load {
     my ($self, $file, $seen, $current_object) = @_;
@@ -110,8 +148,9 @@ sub load {
     }
     $seen->{$file} = 1;
 
-    open(my $sym_file, "<", $file)
-	|| syserr(_g("cannot open %s"), $file);
+    my $open_error;
+    $file = open_symfile(my $sym_file, $file, $open_error)
+	|| syserr($open_error);
     my $object = $current_object;
     while (defined($_ = <$sym_file>)) {
 	chomp($_);
@@ -171,17 +210,26 @@ sub load {
 }
 
 sub save {
-    my ($self, $file, $with_deprecated) = @_;
-    $file = $self->{file} unless defined($file);
+    my ($self, $file, $with_deprecated, $compress) = @_;
+    $compress = "" unless defined $compress;
+    unless(defined $file) {
+	$file = $self->{file};
+	$file .= "_gz" if ($file !~ m/\_gz$/ && $compress eq "gzip");
+    }
     my $fh;
-    if ($file eq "-") {
-	$fh = \*STDOUT;
+    if ($compress eq "gzip") {
+	$fh = new IO::Compress::Gzip($file) ||
+	    syserr(_g("cannot write %s") . ": %s", $file, $GzipError);
     } else {
+	if ($file eq "-") {
+	    $fh = \*STDOUT;
+	} else {
 	open($fh, ">", $file)
 	    || syserr(_g("cannot write %s"), $file);
+	}
     }
     $self->dump($fh, $with_deprecated);
-    close($fh) if ($file ne "-");
+    close($fh) if ($compress || $file ne "-");
 }
 
 sub dump {
diff --git a/scripts/dpkg-gensymbols.pl b/scripts/dpkg-gensymbols.pl
index df50dad..af18bdf 100755
--- a/scripts/dpkg-gensymbols.pl
+++ b/scripts/dpkg-gensymbols.pl
@@ -25,6 +25,7 @@ my $input;
 my $output;
 my $debug = 0;
 my $host_arch = get_host_arch();
+my $compress = undef;
 
 sub version {
     printf _g("Debian %s version %s.\n"), $progname, $version;
@@ -59,6 +60,8 @@ Options:
                            file instead of the default file.
   -O<file>                 write to <file>, not .../DEBIAN/symbols.
   -O                       write to stdout, not .../DEBIAN/symbols.
+  -z                       compress output with gzip and write to
+                           .../DEBIAN/symbols_gz unless changed with -O.
   -d                       display debug information during work.
   -h, --help               show this help message.
       --version            show the version.
@@ -94,6 +97,8 @@ while (@ARGV) {
 	$input = $1;
     } elsif (m/^-O(.+)$/) {
 	$output = $1;
+    } elsif (m/^-z$/) {
+	$compress = "gzip";
     } elsif (m/^-(h|-help)$/) {
 	&usage; exit(0);
     } elsif (m/^--version$/) {
@@ -178,17 +183,19 @@ $symfile->clear_except(keys %{$od->{objects}});
 # Write out symbols files
 if ($stdout) {
     $output = "standard output";
-    $symfile->save("-");
+    $symfile->save("-", undef, $compress);
 } else {
     unless (defined($output)) {
 	unless($symfile->is_empty()) {
 	    $output = "$packagebuilddir/DEBIAN/symbols";
+	    $output .= "_gz" if ($compress eq "gzip");
 	    mkdir("$packagebuilddir/DEBIAN") if not -e "$packagebuilddir/DEBIAN";
 	}
     }
     if (defined($output)) {
-	print "Storing symbols in $output.\n" if $debug;
-	$symfile->save($output);
+	print "Storing symbols in $output",
+	    (defined($compress) ? " ($compress compressed)" : ""), ".\n" if $debug;
+	$symfile->save($output, undef, $compress);
     } else {
 	print "No symbol information to store.\n" if $debug;
     }
diff --git a/scripts/dpkg-shlibdeps.pl b/scripts/dpkg-shlibdeps.pl
index 82a9a01..92155a0 100755
--- a/scripts/dpkg-shlibdeps.pl
+++ b/scripts/dpkg-shlibdeps.pl
@@ -14,7 +14,7 @@ use Dpkg::Path qw(relative_to_pkg_root guess_pkg_root_dir
 use Dpkg::Version qw(compare_versions);
 use Dpkg::Shlibs qw(find_library);
 use Dpkg::Shlibs::Objdump;
-use Dpkg::Shlibs::SymbolFile;
+use Dpkg::Shlibs::SymbolFile qw(open_symfile exists_symfile);
 use Dpkg::Arch qw(get_host_arch);
 use Dpkg::Fields qw(capit);
 use Dpkg::Deps;
@@ -48,7 +48,7 @@ my $host_arch = get_host_arch();
 
 my (@pkg_shlibs, @pkg_symbols, @pkg_root_dirs);
 if (-d "debian") {
-    push @pkg_symbols, <debian/*/DEBIAN/symbols>;
+    push @pkg_symbols, <debian/*/DEBIAN/symbols*>;
     push @pkg_shlibs, <debian/*/DEBIAN/shlibs>;
     my %uniq = map { guess_pkg_root_dir($_) => 1 } (@pkg_symbols, @pkg_shlibs);
     push @pkg_root_dirs, keys %uniq;
@@ -650,7 +650,7 @@ sub find_symbols_file {
     }
 
     foreach my $file (@files) {
-	if (-e $file and symfile_has_soname($file, $soname)) {
+	if (exists_symfile($file) and symfile_has_soname($file, $soname)) {
 	    return $file;
 	}
     }
@@ -659,16 +659,17 @@ sub find_symbols_file {
 
 sub symfile_has_soname {
     my ($file, $soname) = @_;
-    open(SYM_FILE, "<", $file) ||
-        syserr(_g("cannot open file %s"), $file);
+    my $open_error;
+    open_symfile(my $sym_file, $file, $open_error) ||
+	syserr($open_error);
     my $result = 0;
-    while (<SYM_FILE>) {
+    while (<$sym_file>) {
 	if (/^\Q$soname\E /) {
 	    $result = 1;
 	    last;
 	}
     }
-    close(SYM_FILE);
+    close($sym_file);
     return $result;
 }
 
-- 
1.5.5.1

From c862c9af5aba3c7d40532fd9890a7da19ff0c074 Mon Sep 17 00:00:00 2001
From: Modestas Vainius <modestas@vainius.eu>
Date: Fri, 16 May 2008 13:33:24 +0300
Subject: [PATCH] Optimize dpkg-shlibdeps by caching symbol file and objdump objects

This patch optimizes dpkg-shlibdeps by caching parsed symbols files and
objdump objects. This way neither of the libraries or symbols files are
parsed more than once. This patch significantly improves performance of
dpkg-shlibdeps bringing it near to performance levels of << 1.14.8
dpkg-shlibdeps without loosing any of new functionally at all. Memory
requirements are reduced too.

This patch SHOULD NOT change the end result of dpkg-shlibdeps. If it
does, it is a bug.

Signed-off-by: Modestas Vainius <modestas@vainius.eu>
---
 scripts/Dpkg/Shlibs/Objdump.pm    |   12 +++++++---
 scripts/Dpkg/Shlibs/SymbolFile.pm |   17 +++++++++++++++
 scripts/dpkg-shlibdeps.pl         |   42 ++++++++++++++++++++++++++++++------
 3 files changed, 60 insertions(+), 11 deletions(-)

diff --git a/scripts/Dpkg/Shlibs/Objdump.pm b/scripts/Dpkg/Shlibs/Objdump.pm
index 9bb9802..fb1e276 100644
--- a/scripts/Dpkg/Shlibs/Objdump.pm
+++ b/scripts/Dpkg/Shlibs/Objdump.pm
@@ -30,10 +30,8 @@ sub new {
     return $self;
 }
 
-sub parse {
-    my ($self, $file) = @_;
-    my $obj = Dpkg::Shlibs::Objdump::Object->new($file);
-
+sub add_object {
+    my ($self, $obj) = @_;
     my $id = $obj->get_id;
     if ($id) {
 	$self->{objects}{$id} = $obj;
@@ -41,6 +39,12 @@ sub parse {
     return $id;
 }
 
+sub parse {
+    my ($self, $file) = @_;
+    my $obj = Dpkg::Shlibs::Objdump::Object->new($file);
+
+    return $self->add_object($obj);
+}
 
 sub locate_symbol {
     my ($self, $name) = @_;
diff --git a/scripts/Dpkg/Shlibs/SymbolFile.pm b/scripts/Dpkg/Shlibs/SymbolFile.pm
index 19772a8..f9e6d99 100644
--- a/scripts/Dpkg/Shlibs/SymbolFile.pm
+++ b/scripts/Dpkg/Shlibs/SymbolFile.pm
@@ -209,6 +209,23 @@ sub load {
     delete $seen->{$file};
 }
 
+sub merge_from_symfile {
+    my ($self, $src) = @_;
+    while (($soname, $srcobj) = each(%{$src->{objects}})) {
+	if (exists $self->{objects}{$soname}) {
+	    # Update/override infos only
+	    $self->{objects}{$soname}{deps} = $srcobj->{deps};
+	} else {
+	    # Shallow copy the soname object (because deps can be replaced later)
+	    my %obj;
+	    while (($key, $val) = each(%$srcobj)) {
+		$obj{$key} = $val;
+	    }
+	    $self->{objects}{$soname} = \%obj;
+	}
+    }
+}
+
 sub save {
     my ($self, $file, $with_deprecated, $compress) = @_;
     $compress = "" unless defined $compress;
diff --git a/scripts/dpkg-shlibdeps.pl b/scripts/dpkg-shlibdeps.pl
index 92155a0..4056e60 100755
--- a/scripts/dpkg-shlibdeps.pl
+++ b/scripts/dpkg-shlibdeps.pl
@@ -128,6 +128,10 @@ my %global_soname_notfound;
 my %global_soname_used;
 my %global_soname_needed;
 
+# Symfile and objdump caches
+my %dpkg_symfile_cache;
+my %dpkg_objdump_cache;
+
 my $cur_field;
 foreach my $file (keys %exec) {
     $cur_field = $exec{$file};
@@ -193,12 +197,23 @@ foreach my $file (keys %exec) {
 	    my $dpkg_symfile;
 	    if ($packagetype eq "deb") {
 		# Use fine-grained dependencies only on real deb
-		$dpkg_symfile = find_symbols_file($pkg, $soname, $lib);
-		if (defined $dpkg_symfile) {
-		    # Load symbol information
-		    print "Using symbols file $dpkg_symfile for $soname\n" if $debug;
-		    $symfile->load($dpkg_symfile);
+		if (exists $dpkg_symfile_cache{$pkg}) {
+		    if (defined $dpkg_symfile_cache{$pkg}) {
+			    $dpkg_symfile = $dpkg_symfile_cache{$pkg}{file};
+			    print "Using symbols file $dpkg_symfile (cached) for $soname\n" if $debug;
+		    }
+		} else {
+		    $dpkg_symfile = find_symbols_file($pkg, $soname, $lib);
+		    if (defined $dpkg_symfile) {
+			# Load symbol information
+			print "Using symbols file $dpkg_symfile for $soname\n" if $debug;
+			$dpkg_symfile_cache{$pkg} = new Dpkg::Shlibs::SymbolFile();
+			$dpkg_symfile_cache{$pkg}->load($dpkg_symfile);
+		    } else {
+			$dpkg_symfile_cache{$pkg} = undef;
+		    }
 		}
+		$symfile->merge_from_symfile($dpkg_symfile_cache{$pkg}) if (defined($dpkg_symfile));
 	    }
 	    if (defined($dpkg_symfile) && $symfile->has_object($soname)) {
 		# Initialize dependencies with the smallest minimal version
@@ -214,13 +229,26 @@ foreach my $file (keys %exec) {
 		}
 	    } else {
 		# No symbol file found, fall back to standard shlibs
-		my $id = $dumplibs_wo_symfile->parse($lib);
+		$dpkg_objdump_cache{$pkg} = {} unless (exists $dpkg_objdump_cache{$pkg});
+		my $id;
+		my $libobj;
+		if (exists $dpkg_objdump_cache{$pkg}{$lib}) {
+		    $libobj = $dpkg_objdump_cache{$pkg}{$lib};
+		    # We don't want to process the same lib more than once (redundant)
+		    next if ($dumplibs_wo_symfile->get_object($libobj->get_id()));
+		    $id = $dumplibs_wo_symfile->add_object($dpkg_objdump_cache{$pkg}{$lib});
+		    print "Using objdump (cached) for $soname (file $lib)\n" if $debug;
+		} else {
+		    $id = $dumplibs_wo_symfile->parse($lib);
+		    $libobj = $dumplibs_wo_symfile->get_object($id);
+		    $dpkg_objdump_cache{$pkg}{$lib} = $libobj;
+		    print "Using objdump for $soname (file $lib)\n" if $debug;
+		}
 		if (($id ne $soname) and ($id ne $lib)) {
 		    warning(_g("%s has an unexpected SONAME (%s)"), $lib, $id);
 		    $alt_soname{$id} = $soname;
 		}
 		push @soname_wo_symfile, $soname;
-		my $libobj = $dumplibs_wo_symfile->get_object($id);
 		# Only try to generate a dependency for libraries with a SONAME
 		if ($libobj->is_public_library() and not
 		    add_shlibs_dep($soname, $pkg, $lib)) {
-- 
1.5.5.1

Attachment: signature.asc
Description: This is a digitally signed message part.


Reply to: