[SCM] Debian package checker branch, master, updated. 2.5.6-100-geef3c3b
The following commit has been merged in the master branch:
commit eef3c3b6614f02fd74578b6442cb5af8e3c2534b
Author: Niels Thykier <niels@thykier.net>
Date: Sun Apr 15 11:26:39 2012 +0200
L::Util: Rename and promote _parse_dpkg_control_iterative
Rename _parse_dpkg_control_iterative to visit_dpkg_paragraph and add
it to EXPORT_OK.
Made a slight alteration to how visit_dpkg_paragraph calls its CODE
argument for consistency with its "pattern".
Signed-off-by: Niels Thykier <niels@thykier.net>
diff --git a/lib/Lintian/Util.pm b/lib/Lintian/Util.pm
index 34021d8..6004342 100644
--- a/lib/Lintian/Util.pm
+++ b/lib/Lintian/Util.pm
@@ -31,6 +31,7 @@ use base 'Exporter';
our (@EXPORT_OK, @EXPORT);
BEGIN {
@EXPORT_OK = qw(
+ visit_dpkg_paragraph
parse_dpkg_control
read_dpkg_control
get_deb_info
@@ -100,9 +101,45 @@ but on their own did not warrant their own module.
Most subs are imported only on request.
+=head2 Debian control parsers
+
+At first glance, this module appears to contain several debian control
+parsers. In practise, there is only one real parser
+(L</visit_dpkg_paragraph>) - the rest are convience functions around
+it.
+
+If you have very large files (e.g. Packages_amd64), you almost
+certainly want L</visit_dpkg_paragraph>. Otherwise, one of the
+convience methods are probably what you are looking for.
+
+=over 4
+
+=item Use L</get_deb_info> when
+
+You have a I<.deb> (or I<.udeb>) file and you want the control file
+from it.
+
+=item Use L</get_dsc_info> when
+
+You have a I<.dsc> (or I<.changes>) file. Alternative, it is also
+useful if you have a control file and only care about the first
+paragraph.
+
+=item Use L</read_dpkg_control> when
+
+You have a debian control file (such I<debian/control>) and you want
+a number of paragraphs from it.
+
+=item Use L</parse_dpkg_control> when
+
+When you would have used L</read_dpkg_control>, except you have an
+open filehandle rather than a file name.
+
+=back
+
=head1 FUNCTIONS
-=over
+=over 4
=item parse_dpkg_control (HANDLE[, DEBCONF_FLAG[, LINES]])
@@ -119,12 +156,75 @@ return, LINES will be populated to the line numbers where a given
paragraph "started" (i.e. the line number of first field in the
paragraph).
+This is a convience sub around L</visit_dpkg_paragraph> and can
+therefore produce the same errors as it. Please see
+L</visit_dpkg_paragraph> for the finer semantics of how the
+control file is parsed.
+
+NB: parse_dpkg_control does I<not> close the handle for the caller.
+
+=cut
+
+sub parse_dpkg_control {
+ my ($handle, $debconf_flag, $lines) = @_;
+ my @result;
+ my $c = sub {
+ my ($para, $line) = @_;
+ push @result, $para;
+ push @$lines, $line if defined $lines;
+ };
+ visit_dpkg_paragraph ($c, $handle, $debconf_flag);
+ return @result;
+}
+
+
+=item visit_dpkg_paragraph (CODE, HANDLE[, DEBCONF_FLAG])
+
+Reads a debian control file from HANDLE and passes each paragraph to
+CODE. A paragraph is represented via a hashref, which maps (lower
+cased) field names to their values.
+
+If DEBCONF_FLAG is passed and a truth value, the handle is assumed to
+point to a debconf template. These files have slightly different
+syntax and the flag is needed to parse them correctly.
+
If the file is empty (i.e. it contains no paragraphs), the method will
contain an I<empty> list. Lines looking like a GPG-signature is
ignored when parsing the file.
-On syntax errors, parse_dpkg_control will call die with the following
-string:
+visit_dpkg_paragraph will pass paragraphs to CODE as they are
+completed. If CODE can process the paragraphs as they are seen, very
+large control files can be processed without keeping all the
+paragraphs in memory.
+
+As a consequence of how the file is parsed, CODE may be passed a
+number of (valid) paragraphs before parsing is stopped due to a syntax
+error.
+
+NB: visit_dpkg_paragraph does I<not> close the handle for the caller.
+
+CODE is expected to be a callable reference (e.g. a sub) and will be
+invoked as the following:
+
+=over 4
+
+=item CODE->(PARA, STARTLINE)
+
+The first argument, PARA, is a hashref to the most recent paragraph
+parsed. The second argument, STARTLINE, is the line number where the
+paragraph "started" (i.e. the line number of first field in the
+paragraph).
+
+The return value of CODE is ignored.
+
+If the CODE invokes die (or similar) the error is propagated to the
+caller.
+
+=back
+
+
+I<On syntax errors>, visit_dpkg_paragraph will call die with the
+following string:
"syntax error at line %d: %s\n"
@@ -157,21 +257,10 @@ underscores.
=cut
-sub parse_dpkg_control {
- my @result;
- my $c = sub { push @result, @_; };
- _parse_dpkg_control_iterative( $c, @_);
- return @result;
-}
-
-# parses a dpkg-control file like parse_dpkg_control, except
-# at the end of each section (or paragraph) it will pass the
-# section to a piece of code to handle it. This allows reading
-# large dpkg-control based files without having the entire file
-# in memory.
-sub _parse_dpkg_control_iterative {
- my ($code, $CONTROL, $debconf_flag, $lines) = @_;
+sub visit_dpkg_paragraph {
+ my ($code, $CONTROL, $debconf_flag) = @_;
+ my $sline = -1;
my $section = {};
my $open_section = 0;
my $last_tag;
@@ -188,7 +277,7 @@ sub _parse_dpkg_control_iterative {
if ((!$debconf_flag && m/^\s*$/) or ($debconf_flag && $_ eq '')) {
if ($open_section) { # end of current section
# pass the current section to the handler
- $code->($section);
+ $code->($section, $sline);
$section = {};
$open_section = 0;
}
@@ -207,7 +296,7 @@ sub _parse_dpkg_control_iterative {
}
# new empty field?
elsif (m/^([^: \t]+):\s*$/o) {
- push @$lines, $. if defined $lines and not $open_section;
+ $sline = $. if not $open_section;
$open_section = 1;
my ($tag) = (lc $1);
@@ -217,7 +306,7 @@ sub _parse_dpkg_control_iterative {
}
# new field?
elsif (m/^([^: \t]+):\s*(.*)$/o) {
- push @$lines, $. if defined $lines and not $open_section;
+ $sline = $. if not $open_section;
$open_section = 1;
# Policy: Horizontal whitespace (spaces and tabs) may occur
@@ -259,7 +348,7 @@ sub _parse_dpkg_control_iterative {
}
}
# pass the last section (if not already done).
- $code->($section) if $open_section;
+ $code->($section, $sline) if $open_section;
}
=item read_dpkg_control (FILE[, DEBCONF_FLAG[, LINES]])
@@ -272,11 +361,16 @@ returned.
Otherwise, this behaves like:
- open my $fd, '<' FILE or fail ...;
+ open my $fd, '<' FILE or die ...;
my @p = parse_dpkg_control ($fd, DEBCONF_FLAG, LINES);
close $fd;
return @p;
+This goes without saying that may fail with any of the messages that
+L</parse_dpkg_control> do. It can also emit the following error:
+
+ "cannot open %s: %s"
+
=cut
sub read_dpkg_control {
@@ -286,11 +380,10 @@ sub read_dpkg_control {
return;
}
- open(my $CONTROL, '<', $file)
- or fail("cannot open control file $file for reading: $!");
+ open my $CONTROL, '<', $file or die "cannot open $file: $!";
my @data = parse_dpkg_control($CONTROL, $debconf_flag, $lines);
- close($CONTROL)
- or fail("pipe for control file $file exited with status: $?");
+ close $CONTROL;
+
return @data;
}
@@ -307,6 +400,11 @@ Note: the control file is only expected to have a single paragraph and
thus only the first is returned (in the unlikely case that there are
more than one).
+This function may fail with any of the messages that
+L</parse_dpkg_control> do. It can also emit:
+
+ "cannot fork to unpack %s: %s\n"
+
=cut
sub get_deb_info {
@@ -321,7 +419,7 @@ sub get_deb_info {
spawn($opts,
['ar', 'p', $file, 'control.tar.gz'],
'|', ['tar', '--wildcards', '-xzO', '-f', '-', '*control'])
- or fail("cannot fork to unpack $file: $opts->{exception}\n");
+ or die "cannot fork to unpack $file: $opts->{exception}\n";
my @data = parse_dpkg_control($opts->{pipe_out});
# Consume all data before exiting so that we don't kill child processes
@@ -342,6 +440,9 @@ Note: the control file is only expected to have a single paragraph and
thus only the first is returned (in the unlikely case that there are
more than one).
+This function may fail with any of the messages that
+L</read_dpkg_control> do.
+
=cut
sub get_dsc_info {
diff --git a/reporting/harness b/reporting/harness
index 891f8b4..1ef234e 100755
--- a/reporting/harness
+++ b/reporting/harness
@@ -101,7 +101,7 @@ unshift @INC, "$LINTIAN_ROOT/lib";
require Lintian::Lab;
require Lintian::Lab::Manifest;
require Lintian::Processable::Package;
-require Lintian::Util; # _parse_dpkg_control_iterative
+require Lintian::Util qw(visit_dpkg_paragraph);
# turn file buffering off
$| = 1;
@@ -426,14 +426,14 @@ sub local_mirror_manifests {
my $binsub = sub { _parse_pkgs_pg ($active_srcs, $binman, $mirdir, $area, @_) };
my $upkgfd = _open_data_file ($upkgs);
my $udebsub = sub { _parse_pkgs_pg ($active_srcs, $udebman, $mirdir, $area, @_) };
- Lintian::Util::_parse_dpkg_control_iterative ($binsub, $pkgfd);
- Lintian::Util::_parse_dpkg_control_iterative ($udebsub, $upkgfd);
+ visit_dpkg_paragraph ($binsub, $pkgfd);
+ visit_dpkg_paragraph ($udebsub, $upkgfd);
close $pkgfd;
close $upkgfd;
}
$srcfd = _open_data_file ($srcs);
$srcsub = sub { _parse_srcs_pg ($active_srcs, $srcman, $mirdir, $area, @_) };
- Lintian::Util::_parse_dpkg_control_iterative ($srcsub, $srcfd);
+ visit_dpkg_paragraph ($srcsub, $srcfd);
close $srcfd;
}
}
--
Debian package checker
Reply to: