CGI search script.
I don't recall seeing a search script on our website, but I could not
verify that today since it is down for a little while. In any case, I
wrote up a small CGI script that parses the Packages file and looks
for matching strings, it will output the Package listing, with a URL
for the file, if it finds a match. It is fairly primitive, but I
thought it might be useful. Please mail me any suggestions, comments,
or whatevers.
Jim
#!/usr/bin/perl
#
# find_deb.pl -- a simple CGI search engine for Debian packages.
#
# This is a simple CGI search script to find Debian packages via the
# Web. It is meant to be used for simple packages, not really for
# interdependent packages.
#
# Pass the search string via the variable "search_string" in a POST from
# a WWW form. The variable may be a single word or a perl regular
# expression. It will search the Packages file and output a list of
# packages that match.
#
# The script uses a list generated from a 'find -name' run from the root
# of the FTP tree to try and match the filename in the Package field.
# The files in this list should be following format:
# (note the lack of leading './')
#
# pub/debian/binary-i386/admin/acct-5-12.deb
# pub/debian/binary-i386/admin/at-2.8a-2.deb
# pub/debian/binary-i386/admin/cfengine-1.2.14-1.deb
#
# If a grep through the file list finds a match for '/filename*.deb$',
# the package name will be a URL to the found file. You may also match
# the Depends, Recommends, etc. fields, but it is not advised due to the
# interdependencies of Debian packages.
#
# Modify the variables below before you put this script up. Look at
# sub html_head and sub html_tail for the header/trailer HTML code.
#
# Jim Robinson <jimr@simons-rock.edu>
# March 17, 1996
### Variables ###
$SERVER = 'ftp://ftp.debian.org'; # Our FTP server
$M_URL = 'http://www.debian.org/'; # Our homepage URL
$M_EMAIL = 'webmaster\@debian.org'; # Our e-mail address.
$TITLE = 'Debian GNU/Linux Search'; # URL Title
$HEAD = 'Debian GNU/Linux Search'; # Printed Title.
# The following is the path to the Packages file.
$PACKAGES_PATH='/usr/src/ftp/pub/debian/Packages';
# The following is the path to a "find -name" of the main binary tree.
$FILELIST_PATH='/usr/src/ftp/pub/debian/find-name';
# The following are the package keys we try and find files for.
# It would be safer to use this field...
$CONTAINS_FILENAMES = 'package';
# However, if you use the big CONTAINS_FILENAMES field, make sure you warn
# people that interdependencies are fairly complex, and this search engine
# will NOT find them all.
#$CONTAINS_FILENAMES = 'package|depends|recommended|optional';
# Names in a field. Make sure they are in the order you want them
# printed in.
@field_names = ('Package',
'Section',
'Version',
'Maintainer',
'Depends',
'Recommends',
'Suggests',
'Conflics',
'Provides',
'Essential',
'Description');
#### END OF VARIABLES ####
#
# Main
#
# Open filelist database. This is slow -- we should use locate(1) or
# some sort of simple lookup program in C.
if (!open(FILELIST, "$FILELIST_PATH")) {
html_head();
print "Sorry, I couldn't open $FILELIST_PATH: ($!)<br>\n";
html_tail();
exit(-1);
}
@LIST = <FILELIST>;
# Get our regexp string.
if ($ENV{'REQUEST_METHOD'} eq 'GET' or $ENV{'REQUEST_METHOD'} eq 'POST') {
ReadParse();
$string = $in{'search_string'};
} else {
$string = $ARGV[0];
}
# Find records and output...
html_head();
find_records();
html_tail();
#
# Functions
#
sub find_records
{
$count = 0;
if (!open(PACKAGES, "$PACKAGES_PATH")) {
print "Sorry, I couldn't open $PACKAGES_PATH: ($!)<br>\n";
exit(-1);
}
while(<PACKAGES>) {
chomp(); # strip newline
if ($_ ne '') { # It is NOT a new package name...
$package .= $_. "\n";
} else { # It IS a new package name...
if(grep(/$string/, $package)) {
parse_record($package);
}
undef($package);
}
}
}
sub parse_record
{
local($input) = @_;
local($pkg, @lines);
@lines = split(/\n/, $input);
# Split apart the record into its component key/value pairs.
# Descriptions are tricky, since they are multi-line.
foreach $line (@lines) {
if (($field ne 'Description') and ($line =~ m/^(\w+):\s+(.*)$/)) {
# Searched for, and found key/value pairs.
$pkg{$1} = $2;
} elsif ($pkg{'Description'} ne '') {
# We are at the end (with the Description).
$pkg{'Description'} .= "\n$line";
}
}
find_package_url(%pkg); # Search for the file in the list.
}
sub find_package_url
{
local(%pkg) = @_;
local($file, $key, $path_to_file, $path_to_tree, @matchs, @urls);
foreach $key (keys %pkg) {
if ($key =~ m/$CONTAINS_FILENAMES/i) { # Does the key have filenames?
undef(@urls);
@files = split(/,|\|/, $pkg{$key});
foreach $file (@files) {
$file =~ s/\s//g;
# Using a call to locate(1) or a specialized lookup
# program probably be a LOT faster, but anyway...
#
# If we find a match, use the first match and write out
# The URL string, replacing the filename in the key/value
# pair with this string. This needs improvement.
if ((@matchs = grep(/\/$file\-.*\.deb$/, @LIST))) {
chomp($path_to_file = $matchs[0]);
$file = "<a href=\"$SERVER/$path_to_file\">$file</a>";
}
push (@urls, $file);
}
$pkg{$key} = join(', ', @urls);
}
}
print_output(%pkg);
}
sub print_output
{
local(%pkg) = @_;
$pkg{'Maintainer'} =~ s/<(.*)>/\<<a href=\"mailto:$1\">$1<\/a>\>/g;
$pkg{'Description'} =~ s/ \./<p>\n/g;
# We do this ONCE, for the first linefeed.
$pkg{'Description'} =~ s/\n/<br>\n/;
foreach $field (@field_names) {
if ($pkg{$field} ne '') {
print "<b>$field:</b> $pkg{$field}<br>\n";
}
}
print "\n<hr>\n";
}
#
# html_headxs - print out httpd needed info and title information.
#
sub html_head
{
print "Content-type: text/html\n\n";
print "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML//EN\">\n";
print "<html> <head>\n";
print "<title>$TITLE</title>\n";
print "</head>\n";
print "\n";
print "<body>\n";
print "<h1>$HEAD</h1>\n";
print "<hr><p>\n";
}
#
# html_tail - print out main site and contact information.
#
sub html_tail
{
print "<hr>\n";
print "<address><A HREF=\"$M_URL\">Debian GNU/Linux</A><<A HREF=\"mailto:$M_EMAIL\">$M_EMAIL</A>></address>\n";
print "</body> </html>\n";
}
#
# ReadParse from cgi-lib.pl - Parse data passed via POST or GET.
#
# S.E.Brenner@bioc.cam.ac.uk
# $Header: /people/seb1005/http/cgi-bin/RCS/cgi-lib.pl,v 1.2 1994/01/10 15:05:40 seb1005 Exp $
#
# Copyright 1993 Steven E. Brenner
# Unpublished work.
# Permission granted to use and modify this library so long as the
# copyright above is maintained, modifications are documented, and
# credit is given
#
sub ReadParse
{
if (@_) {
local (*in) = @_;
}
local ($i, $loc, $key, $val);
# Read in text
if ($ENV{'REQUEST_METHOD'} eq "GET") {
$in = $ENV{'QUERY_STRING'};
} elsif ($ENV{'REQUEST_METHOD'} eq "POST") {
for ($i = 0; $i < $ENV{'CONTENT_LENGTH'}; $i++) {
$in .= getc;
}
}
@in = split(/&/,$in);
foreach $i (0 .. $#in) {
# Convert plus's to spaces
$in[$i] =~ s/\+/ /g;
# Convert %XX from hex numbers to alphanumeric
$in[$i] =~ s/%(..)/pack("c",hex($1))/ge;
# Split into key and value.
$loc = index($in[$i],"=");
$key = substr($in[$i],0,$loc);
$val = substr($in[$i],$loc+1);
# \0 is the multiple separator
$in{$key} .= '\0' if (defined($in{$key}));
$in{$key} .= $val;
}
return 1; # just for fun
}
Reply to: