[lintian] 01/03: L::Path: Merge type, _path_access and operm into one field
This is an automated email from the git hooks/post-receive script.
nthykier pushed a commit to branch master
in repository lintian.
commit fb074e43233fdca8b09f4d4ff475bf4b762d6e53
Author: Niels Thykier <niels@thykier.net>
Date: Fri Jul 17 19:09:09 2015 +0200
L::Path: Merge type, _path_access and operm into one field
Merge the 3 fields into one single 32bit integer field to reduce some
memory usage. This reduces cached memory usage by another 4.5% - 5%
when processing lintian itself.
Signed-off-by: Niels Thykier <niels@thykier.net>
---
debian/changelog | 3 ++
lib/Lintian/Collect/Package.pm | 66 +++++++++++++++++-----------
lib/Lintian/Path.pm | 97 ++++++++++++++++++++++++------------------
3 files changed, 100 insertions(+), 66 deletions(-)
diff --git a/debian/changelog b/debian/changelog
index add991b..1c95d51 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -23,6 +23,9 @@ lintian (2.5.34) UNRELEASED; urgency=medium
* lib/Lintian/Path.pm:
+ [NT] Rework some implementation details to reduce memory
consumption slightly.
+ + [NT] The undocumented "type" method has been removed from
+ the API. It was an implementation detail that has now
+ changed.
* reporting/templates:
+ [NT] Remove (uses of) the "invisible-anchor" css class as
diff --git a/lib/Lintian/Collect/Package.pm b/lib/Lintian/Collect/Package.pm
index b0e7977..0cbd283 100644
--- a/lib/Lintian/Collect/Package.pm
+++ b/lib/Lintian/Collect/Package.pm
@@ -32,21 +32,6 @@ use Lintian::Path;
use Lintian::Path::FSInfo;
use Lintian::Util qw(fail open_gz perm2oct normalize_pkg_path dequote_name);
-my %INDEX_FAUX_DIR_TEMPLATE = (
- 'name' => '',
- 'type' => 'd',
- 'basename' => '',
- 'owner' => 'root',
- 'group' => 'root',
- 'size' => 0,
- 'operm' => 0755,
- # Pick a "random" (but fixed) date
- # - hint, it's a good read. :)
- 'date' => '1998-01-25',
- 'time' => '22:55:34',
- 'faux' => 1,
-);
-
# A cache for (probably) the 5 most common permission strings seen in
# the wild.
# It may seem obscene, but it has an extreme "hit-ratio" and it is
@@ -59,6 +44,30 @@ my %PERM_CACHE = map { $_ => perm2oct($_) } (
'lrwxrwxrwx', # symlinks
);
+my %FILE_CODE2LPATH_TYPE = (
+ '-' => Lintian::Path::TYPE_FILE | Lintian::Path::OPEN_IS_OK,
+ 'h' => Lintian::Path::TYPE_HARDLINK | Lintian::Path::OPEN_IS_OK,
+ 'd' => Lintian::Path::TYPE_DIR | Lintian::Path::FS_PATH_IS_OK,
+ 'l' => Lintian::Path::TYPE_SYMLINK,
+ 'b' => Lintian::Path::TYPE_BLOCK_DEV,
+ 'c' => Lintian::Path::TYPE_CHAR_DEV,
+ 'p' => Lintian::Path::TYPE_PIPE,
+);
+
+my %INDEX_FAUX_DIR_TEMPLATE = (
+ 'name' => '',
+ '_path_info' => $FILE_CODE2LPATH_TYPE{'d'} | 0755,
+ 'basename' => '',
+ 'owner' => 'root',
+ 'group' => 'root',
+ 'size' => 0,
+ # Pick a "random" (but fixed) date
+ # - hint, it's a good read. :)
+ 'date' => '1998-01-25',
+ 'time' => '22:55:34',
+ 'faux' => 1,
+);
+
=head1 NAME
Lintian::Collect::Package - Lintian base interface to binary and source package data collection
@@ -429,7 +438,7 @@ sub _fetch_index_data {
while (my $line = <$idx>) {
chomp($line);
- my (%file, $perm, $owner, $name);
+ my (%file, $perm, $operm, $owner, $name, $raw_type);
($perm,$owner,$file{size},$file{date},$file{time},$name)
=split(' ', $line, 6);
# This may appear to be obscene, but the call overhead of
@@ -438,11 +447,13 @@ sub _fetch_index_data {
# Of the 115363 paths here, only 306 had an "uncached"
# permission string (chromium-browser/32.0.1700.123-2).
if (exists($PERM_CACHE{$perm})) {
- $file{operm} = $PERM_CACHE{$perm};
+ $operm = $PERM_CACHE{$perm};
} else {
- $file{operm} = perm2oct($perm);
+ $operm = perm2oct($perm);
}
- $file{type} = substr $perm, 0, 1;
+ $raw_type = substr($perm, 0, 1);
+ $file{'_path_info'} = $operm | $FILE_CODE2LPATH_TYPE{$raw_type}
+ // Lintian::Path::TYPE_OTHER;
if ($num_idx) {
# If we have a "numeric owner" index file, read that as well
@@ -462,11 +473,11 @@ sub _fetch_index_data {
if ($name =~ s/ link to (.*)//) {
my $target = dequote_name($1);
- $file{type} = 'h';
+ $file{'_path_info'} = $FILE_CODE2LPATH_TYPE{'h'} | $operm;
$file{link} = $target;
push @{$rhlinks{$target}}, dequote_name($name);
- } elsif ($file{type} eq 'l') {
+ } elsif ($raw_type eq 'l') {
($name, $file{link}) = split ' -> ', $name, 2;
$file{link} = dequote_name($file{link}, 0);
}
@@ -478,7 +489,7 @@ sub _fetch_index_data {
$idxh{$name} = \%file;
# Record children
- $children{$name} ||= [] if $file{type} eq 'd';
+ $children{$name} ||= [] if $raw_type eq 'd';
my ($parent, $base) = ($name =~ m,^(.+/)?([^/]+/?)$,);
$parent = '' unless defined $parent;
$base = '' unless defined $base;
@@ -554,11 +565,16 @@ sub _fetch_index_data {
next unless exists $idxh{$target};
my $le = $idxh{$link};
# We may be "demoting" a "real file" to a "hardlink"
- $le->{type} = 'h';
+ $le->{'_path_info'}
+ = ($le->{'_path_info'} & ~Lintian::Path::TYPE_FILE)
+ | Lintian::Path::TYPE_HARDLINK;
$le->{link} = $target;
}
if ($target ne $e->{name}) {
- $idxh{$target}->{type} = '-';
+ $idxh{$target}{'_path_info'}
+ = ($idxh{$target}{'_path_info'}
+ & ~Lintian::Path::TYPE_HARDLINK)
+ | Lintian::Path::TYPE_FILE;
# hardlinks does not have size, so copy that from the original
# entry.
$idxh{$target}->{size} = $e->{size};
@@ -571,7 +587,7 @@ sub _fetch_index_data {
# Add them in reverse order - entries in a dir are made
# objects before the dir itself.
my $entry = $idxh{$file};
- if ($entry->{'type'} eq 'd') {
+ if ($entry->{'_path_info'} & Lintian::Path::TYPE_DIR) {
my (%child_table, @sorted_children);
for my $cname (sort(@{ $children{$file} })) {
my $child = $idxh{$cname};
diff --git a/lib/Lintian/Path.pm b/lib/Lintian/Path.pm
index 32f0211..225cac7 100644
--- a/lib/Lintian/Path.pm
+++ b/lib/Lintian/Path.pm
@@ -23,9 +23,23 @@ use warnings;
use parent qw(Class::Accessor::Fast);
use constant {
- UNSAFE_PATH => 0,
- FS_PATH_IS_OK => 1,
- OPEN_IS_OK => 3,
+ TYPE_FILE => 0x00_01_00_00,
+ TYPE_HARDLINK => 0x00_02_00_00,
+ TYPE_DIR => 0x00_04_00_00,
+ TYPE_SYMLINK => 0x00_08_00_00,
+ TYPE_BLOCK_DEV => 0x00_10_00_00,
+ TYPE_CHAR_DEV => 0x00_20_00_00,
+ TYPE_PIPE => 0x00_40_00_00,
+ TYPE_OTHER => 0x00_80_00_00,
+ TYPE_MASK => 0x00_ff_00_00,
+
+ UNSAFE_PATH => 0x01_00_00_00,
+ FS_PATH_IS_OK => 0x02_00_00_00,
+ OPEN_IS_OK => 0x06_00_00_00, # Implies FS_PATH_IS_OK
+ ACCESS_INFO => 0x07_00_00_00,
+ # 0o6777 == 0xdff, which covers set[ug]id + sticky bit. Accordingly,
+ # 0xffff should be more than sufficient for the foreseeable future.
+ OPERM_MASK => 0x00_00_ff_ff,
};
use overload (
@@ -88,15 +102,9 @@ Argument is a hash containing the data read from the index file.
sub new {
my ($type, $data) = @_;
my $self = $data;
- my $ftype = $data->{'type'};
+ my $ftype = $data->{'_path_info'};
bless($self, $type);
- # Use $data->{type} directly here. The constructor is called
- # often enough for this to take up a (small) measure amount of
- # runtime.
- if ($ftype eq '-' or $ftype eq 'h') {
- $self->{'_path_access'} = OPEN_IS_OK;
- } elsif ($ftype eq 'd') {
- $self->{'_path_access'} = FS_PATH_IS_OK;
+ if ($ftype & TYPE_DIR) {
for my $child ($self->children) {
$child->_set_parent_dir($self);
}
@@ -165,14 +173,6 @@ NB: This is only well defined for files.
Return the modification date as YYYY-MM-DD.
-=item operm
-
-Returns the file permissions of this object in octal (e.g. 0644).
-
-NB: This is only well defined for file entries that are subject to
-permissions (e.g. files). Particularly, the value is not well defined
-for symlinks.
-
=item parent_dir
Returns the parent directory entry of this entry as a
@@ -205,11 +205,26 @@ happen if a package does not include all intermediate directories.
=cut
Lintian::Path->mk_ro_accessors(
- qw(name owner group link type uid gid
- size date time operm parent_dir dirname basename
+ qw(name owner group link uid gid
+ size date time parent_dir dirname basename
faux
));
+=item operm
+
+Returns the file permissions of this object in octal (e.g. 0644).
+
+NB: This is only well defined for file entries that are subject to
+permissions (e.g. files). Particularly, the value is not well defined
+for symlinks.
+
+=cut
+
+sub operm {
+ my ($self) = @_;
+ return $self->{'_path_info'} & OPERM_MASK;
+}
+
=item children
Returns a list of children (as Lintian::Path objects) of this entry.
@@ -307,11 +322,14 @@ symlinks, even if the symlink points to a file.
=cut
-sub is_symlink { return $_[0]->type eq 'l'; }
-sub is_hardlink { return $_[0]->type eq 'h'; }
-sub is_dir { return $_[0]->type eq 'd'; }
-sub is_file { return $_[0]->type eq '-' || $_[0]->type eq 'h'; }
-sub is_regular_file { return $_[0]->type eq '-'; }
+sub is_symlink { return $_[0]->{'_path_info'} & TYPE_SYMLINK ? 1 : 0; }
+sub is_hardlink { return $_[0]->{'_path_info'} & TYPE_HARDLINK ? 1 : 0; }
+sub is_dir { return $_[0]->{'_path_info'} & TYPE_DIR ? 1 : 0; }
+
+sub is_file {
+ return $_[0]->{'_path_info'} & (TYPE_FILE | TYPE_HARDLINK) ? 1 : 0;
+}
+sub is_regular_file { return $_[0]->{'_path_info'} & TYPE_FILE ? 1 : 0; }
=item link_normalized
@@ -364,7 +382,7 @@ at least one bit denoting executability set (bitmask 0111).
sub _any_bit_in_operm {
my ($self, $bitmask) = @_;
- return ($self->operm & $bitmask) ? 1 : 0;
+ return ($self->{'_path_info'} & $bitmask) ? 1 : 0;
}
sub is_readable { return $_[0]->_any_bit_in_operm(0444); }
@@ -441,9 +459,9 @@ Returns a truth value if the path may be opened.
sub is_open_ok {
my ($self) = @_;
- if (exists($self->{'_path_access'})) {
- return ($self->{'_path_access'} & OPEN_IS_OK) == OPEN_IS_OK ? 1 : 0;
- }
+ my $path_info = $self->{'_path_info'};
+ return 1 if ($path_info & OPEN_IS_OK) == OPEN_IS_OK;
+ return 0 if $path_info & ACCESS_INFO;
eval {
my $path = $self->_collect_path();
$self->_check_open($path);
@@ -468,22 +486,19 @@ sub _fs_info {
sub _check_access {
my ($self, $path) = @_;
- my $safe = FS_PATH_IS_OK;
- if (exists($self->{'_path_access'})) {
- $safe = $self->{'_path_access'};
- } else {
- my $resolvable = $self->resolve_path;
- $safe = UNSAFE_PATH if not $resolvable;
- $self->{'_path_access'} = $safe;
- }
- if (($safe & FS_PATH_IS_OK) != FS_PATH_IS_OK) {
- $self->{'_path_access'} = UNSAFE_PATH if $safe == UNSAFE_PATH;
+ my $path_info = $self->{'_path_info'};
+ return 1 if ($path_info & FS_PATH_IS_OK) == FS_PATH_IS_OK;
+ return 0 if $path_info & ACCESS_INFO;
+ my $resolvable = $self->resolve_path;
+ if (not $resolvable) {
+ $self->{'_path_info'} |= UNSAFE_PATH;
# NB: We are deliberately vague here to avoid suggesting
# whether $path exists. In some cases (e.g. lintian.d.o)
# the output is readily available to wider public.
confess('Attempt to access through broken or unsafe symlink:'. ' '
. $self->name);
}
+ $self->{'_path_info'} |= FS_PATH_IS_OK;
return 1;
}
@@ -493,7 +508,7 @@ sub _check_open {
# Symlinks can point to a "non-file" object inside the
# package root
if ($self->is_file or ($self->is_symlink and -f $path)) {
- $self->{'_path_access'} |= OPEN_IS_OK;
+ $self->{'_path_info'} |= OPEN_IS_OK;
return 1;
}
# Leave "_path_access" here as _check_access marks it either as
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/lintian/lintian.git
Reply to: