[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

[lintian] 01/03: L::Path: Merge type, _path_access and operm into one field



This is an automated email from the git hooks/post-receive script.

nthykier pushed a commit to branch master
in repository lintian.

commit fb074e43233fdca8b09f4d4ff475bf4b762d6e53
Author: Niels Thykier <niels@thykier.net>
Date:   Fri Jul 17 19:09:09 2015 +0200

    L::Path: Merge type, _path_access and operm into one field
    
    Merge the 3 fields into one single 32bit integer field to reduce some
    memory usage.  This reduces cached memory usage by another 4.5% - 5%
    when processing lintian itself.
    
    Signed-off-by: Niels Thykier <niels@thykier.net>
---
 debian/changelog               |  3 ++
 lib/Lintian/Collect/Package.pm | 66 +++++++++++++++++-----------
 lib/Lintian/Path.pm            | 97 ++++++++++++++++++++++++------------------
 3 files changed, 100 insertions(+), 66 deletions(-)

diff --git a/debian/changelog b/debian/changelog
index add991b..1c95d51 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -23,6 +23,9 @@ lintian (2.5.34) UNRELEASED; urgency=medium
   * lib/Lintian/Path.pm:
     + [NT] Rework some implementation details to reduce memory
       consumption slightly.
+    + [NT] The undocumented "type" method has been removed from
+      the API.  It was an implementation detail that has now
+      changed.
 
   * reporting/templates:
     + [NT] Remove (uses of) the "invisible-anchor" css class as
diff --git a/lib/Lintian/Collect/Package.pm b/lib/Lintian/Collect/Package.pm
index b0e7977..0cbd283 100644
--- a/lib/Lintian/Collect/Package.pm
+++ b/lib/Lintian/Collect/Package.pm
@@ -32,21 +32,6 @@ use Lintian::Path;
 use Lintian::Path::FSInfo;
 use Lintian::Util qw(fail open_gz perm2oct normalize_pkg_path dequote_name);
 
-my %INDEX_FAUX_DIR_TEMPLATE = (
-    'name'     => '',
-    'type'     => 'd',
-    'basename' => '',
-    'owner'    => 'root',
-    'group'    => 'root',
-    'size'     => 0,
-    'operm'    => 0755,
-    # Pick a "random" (but fixed) date
-    # - hint, it's a good read.  :)
-    'date'     => '1998-01-25',
-    'time'     => '22:55:34',
-    'faux'     => 1,
-);
-
 # A cache for (probably) the 5 most common permission strings seen in
 # the wild.
 # It may seem obscene, but it has an extreme "hit-ratio" and it is
@@ -59,6 +44,30 @@ my %PERM_CACHE = map { $_ => perm2oct($_) } (
     'lrwxrwxrwx', # symlinks
 );
 
+my %FILE_CODE2LPATH_TYPE = (
+    '-' => Lintian::Path::TYPE_FILE     | Lintian::Path::OPEN_IS_OK,
+    'h' => Lintian::Path::TYPE_HARDLINK | Lintian::Path::OPEN_IS_OK,
+    'd' => Lintian::Path::TYPE_DIR      | Lintian::Path::FS_PATH_IS_OK,
+    'l' => Lintian::Path::TYPE_SYMLINK,
+    'b' => Lintian::Path::TYPE_BLOCK_DEV,
+    'c' => Lintian::Path::TYPE_CHAR_DEV,
+    'p' => Lintian::Path::TYPE_PIPE,
+);
+
+my %INDEX_FAUX_DIR_TEMPLATE = (
+    'name'       => '',
+    '_path_info' => $FILE_CODE2LPATH_TYPE{'d'} | 0755,
+    'basename'   => '',
+    'owner'      => 'root',
+    'group'      => 'root',
+    'size'       => 0,
+    # Pick a "random" (but fixed) date
+    # - hint, it's a good read.  :)
+    'date'       => '1998-01-25',
+    'time'       => '22:55:34',
+    'faux'       => 1,
+);
+
 =head1 NAME
 
 Lintian::Collect::Package - Lintian base interface to binary and source package data collection
@@ -429,7 +438,7 @@ sub _fetch_index_data {
     while (my $line = <$idx>) {
         chomp($line);
 
-        my (%file, $perm, $owner, $name);
+        my (%file, $perm, $operm, $owner, $name, $raw_type);
         ($perm,$owner,$file{size},$file{date},$file{time},$name)
           =split(' ', $line, 6);
         # This may appear to be obscene, but the call overhead of
@@ -438,11 +447,13 @@ sub _fetch_index_data {
         #   Of the 115363 paths here, only 306 had an "uncached"
         # permission string (chromium-browser/32.0.1700.123-2).
         if (exists($PERM_CACHE{$perm})) {
-            $file{operm} = $PERM_CACHE{$perm};
+            $operm = $PERM_CACHE{$perm};
         } else {
-            $file{operm} = perm2oct($perm);
+            $operm = perm2oct($perm);
         }
-        $file{type} = substr $perm, 0, 1;
+        $raw_type = substr($perm, 0, 1);
+        $file{'_path_info'} = $operm | $FILE_CODE2LPATH_TYPE{$raw_type}
+          // Lintian::Path::TYPE_OTHER;
 
         if ($num_idx) {
             # If we have a "numeric owner" index file, read that as well
@@ -462,11 +473,11 @@ sub _fetch_index_data {
 
         if ($name =~ s/ link to (.*)//) {
             my $target = dequote_name($1);
-            $file{type} = 'h';
+            $file{'_path_info'} = $FILE_CODE2LPATH_TYPE{'h'} | $operm;
             $file{link} = $target;
 
             push @{$rhlinks{$target}}, dequote_name($name);
-        } elsif ($file{type} eq 'l') {
+        } elsif ($raw_type eq 'l') {
             ($name, $file{link}) = split ' -> ', $name, 2;
             $file{link} = dequote_name($file{link}, 0);
         }
@@ -478,7 +489,7 @@ sub _fetch_index_data {
         $idxh{$name} = \%file;
 
         # Record children
-        $children{$name} ||= [] if $file{type} eq 'd';
+        $children{$name} ||= [] if $raw_type eq 'd';
         my ($parent, $base) = ($name =~ m,^(.+/)?([^/]+/?)$,);
         $parent = '' unless defined $parent;
         $base = '' unless defined $base;
@@ -554,11 +565,16 @@ sub _fetch_index_data {
                 next unless exists $idxh{$target};
                 my $le = $idxh{$link};
                 # We may be "demoting" a "real file" to a "hardlink"
-                $le->{type} = 'h';
+                $le->{'_path_info'}
+                  = ($le->{'_path_info'} & ~Lintian::Path::TYPE_FILE)
+                  | Lintian::Path::TYPE_HARDLINK;
                 $le->{link} = $target;
             }
             if ($target ne $e->{name}) {
-                $idxh{$target}->{type} = '-';
+                $idxh{$target}{'_path_info'}
+                  = ($idxh{$target}{'_path_info'}
+                      & ~Lintian::Path::TYPE_HARDLINK)
+                  | Lintian::Path::TYPE_FILE;
                 # hardlinks does not have size, so copy that from the original
                 # entry.
                 $idxh{$target}->{size} = $e->{size};
@@ -571,7 +587,7 @@ sub _fetch_index_data {
         # Add them in reverse order - entries in a dir are made
         # objects before the dir itself.
         my $entry = $idxh{$file};
-        if ($entry->{'type'} eq 'd') {
+        if ($entry->{'_path_info'} & Lintian::Path::TYPE_DIR) {
             my (%child_table, @sorted_children);
             for my $cname (sort(@{ $children{$file} })) {
                 my $child = $idxh{$cname};
diff --git a/lib/Lintian/Path.pm b/lib/Lintian/Path.pm
index 32f0211..225cac7 100644
--- a/lib/Lintian/Path.pm
+++ b/lib/Lintian/Path.pm
@@ -23,9 +23,23 @@ use warnings;
 use parent qw(Class::Accessor::Fast);
 
 use constant {
-    UNSAFE_PATH => 0,
-    FS_PATH_IS_OK => 1,
-    OPEN_IS_OK => 3,
+    TYPE_FILE      => 0x00_01_00_00,
+    TYPE_HARDLINK  => 0x00_02_00_00,
+    TYPE_DIR       => 0x00_04_00_00,
+    TYPE_SYMLINK   => 0x00_08_00_00,
+    TYPE_BLOCK_DEV => 0x00_10_00_00,
+    TYPE_CHAR_DEV  => 0x00_20_00_00,
+    TYPE_PIPE      => 0x00_40_00_00,
+    TYPE_OTHER     => 0x00_80_00_00,
+    TYPE_MASK      => 0x00_ff_00_00,
+
+    UNSAFE_PATH    => 0x01_00_00_00,
+    FS_PATH_IS_OK  => 0x02_00_00_00,
+    OPEN_IS_OK     => 0x06_00_00_00, # Implies FS_PATH_IS_OK
+    ACCESS_INFO    => 0x07_00_00_00,
+    # 0o6777 == 0xdff, which covers set[ug]id + sticky bit.  Accordingly,
+    # 0xffff should be more than sufficient for the foreseeable future.
+    OPERM_MASK     => 0x00_00_ff_ff,
 };
 
 use overload (
@@ -88,15 +102,9 @@ Argument is a hash containing the data read from the index file.
 sub new {
     my ($type, $data) = @_;
     my $self = $data;
-    my $ftype = $data->{'type'};
+    my $ftype = $data->{'_path_info'};
     bless($self, $type);
-    # Use $data->{type} directly here.  The constructor is called
-    # often enough for this to take up a (small) measure amount of
-    # runtime.
-    if ($ftype eq '-' or $ftype eq 'h') {
-        $self->{'_path_access'} = OPEN_IS_OK;
-    } elsif ($ftype eq 'd') {
-        $self->{'_path_access'} = FS_PATH_IS_OK;
+    if ($ftype & TYPE_DIR) {
         for my $child ($self->children) {
             $child->_set_parent_dir($self);
         }
@@ -165,14 +173,6 @@ NB: This is only well defined for files.
 
 Return the modification date as YYYY-MM-DD.
 
-=item operm
-
-Returns the file permissions of this object in octal (e.g. 0644).
-
-NB: This is only well defined for file entries that are subject to
-permissions (e.g. files).  Particularly, the value is not well defined
-for symlinks.
-
 =item parent_dir
 
 Returns the parent directory entry of this entry as a
@@ -205,11 +205,26 @@ happen if a package does not include all intermediate directories.
 =cut
 
 Lintian::Path->mk_ro_accessors(
-    qw(name owner group link type uid gid
-      size date time operm parent_dir dirname basename
+    qw(name owner group link uid gid
+      size date time parent_dir dirname basename
       faux
       ));
 
+=item operm
+
+Returns the file permissions of this object in octal (e.g. 0644).
+
+NB: This is only well defined for file entries that are subject to
+permissions (e.g. files).  Particularly, the value is not well defined
+for symlinks.
+
+=cut
+
+sub operm {
+    my ($self) = @_;
+    return $self->{'_path_info'} & OPERM_MASK;
+}
+
 =item children
 
 Returns a list of children (as Lintian::Path objects) of this entry.
@@ -307,11 +322,14 @@ symlinks, even if the symlink points to a file.
 
 =cut
 
-sub is_symlink { return $_[0]->type eq 'l'; }
-sub is_hardlink { return $_[0]->type eq 'h'; }
-sub is_dir { return $_[0]->type eq 'd'; }
-sub is_file { return $_[0]->type eq '-' || $_[0]->type eq 'h'; }
-sub is_regular_file  { return $_[0]->type eq '-'; }
+sub is_symlink { return $_[0]->{'_path_info'} & TYPE_SYMLINK ? 1 : 0; }
+sub is_hardlink { return $_[0]->{'_path_info'} & TYPE_HARDLINK ? 1 : 0; }
+sub is_dir { return $_[0]->{'_path_info'} & TYPE_DIR ? 1 : 0; }
+
+sub is_file {
+    return $_[0]->{'_path_info'} & (TYPE_FILE | TYPE_HARDLINK) ? 1 : 0;
+}
+sub is_regular_file { return $_[0]->{'_path_info'} & TYPE_FILE ? 1 : 0; }
 
 =item link_normalized
 
@@ -364,7 +382,7 @@ at least one bit denoting executability set (bitmask 0111).
 
 sub _any_bit_in_operm {
     my ($self, $bitmask) = @_;
-    return ($self->operm & $bitmask) ? 1 : 0;
+    return ($self->{'_path_info'} & $bitmask) ? 1 : 0;
 }
 
 sub is_readable   { return $_[0]->_any_bit_in_operm(0444); }
@@ -441,9 +459,9 @@ Returns a truth value if the path may be opened.
 
 sub is_open_ok {
     my ($self) = @_;
-    if (exists($self->{'_path_access'})) {
-        return ($self->{'_path_access'} & OPEN_IS_OK) == OPEN_IS_OK ? 1 : 0;
-    }
+    my $path_info = $self->{'_path_info'};
+    return 1 if ($path_info & OPEN_IS_OK) == OPEN_IS_OK;
+    return 0 if $path_info & ACCESS_INFO;
     eval {
         my $path = $self->_collect_path();
         $self->_check_open($path);
@@ -468,22 +486,19 @@ sub _fs_info {
 
 sub _check_access {
     my ($self, $path) = @_;
-    my $safe = FS_PATH_IS_OK;
-    if (exists($self->{'_path_access'})) {
-        $safe = $self->{'_path_access'};
-    } else {
-        my $resolvable = $self->resolve_path;
-        $safe = UNSAFE_PATH if not $resolvable;
-        $self->{'_path_access'} = $safe;
-    }
-    if (($safe & FS_PATH_IS_OK) != FS_PATH_IS_OK) {
-        $self->{'_path_access'} = UNSAFE_PATH if $safe == UNSAFE_PATH;
+    my $path_info = $self->{'_path_info'};
+    return 1 if ($path_info & FS_PATH_IS_OK) == FS_PATH_IS_OK;
+    return 0 if $path_info & ACCESS_INFO;
+    my $resolvable = $self->resolve_path;
+    if (not $resolvable) {
+        $self->{'_path_info'} |= UNSAFE_PATH;
         # NB: We are deliberately vague here to avoid suggesting
         # whether $path exists.  In some cases (e.g. lintian.d.o)
         # the output is readily available to wider public.
         confess('Attempt to access through broken or unsafe symlink:'. ' '
               . $self->name);
     }
+    $self->{'_path_info'} |= FS_PATH_IS_OK;
     return 1;
 }
 
@@ -493,7 +508,7 @@ sub _check_open {
     # Symlinks can point to a "non-file" object inside the
     # package root
     if ($self->is_file or ($self->is_symlink and -f $path)) {
-        $self->{'_path_access'} |= OPEN_IS_OK;
+        $self->{'_path_info'} |= OPEN_IS_OK;
         return 1;
     }
     # Leave "_path_access" here as _check_access marks it either as

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/lintian/lintian.git


Reply to: