[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#179296: patch: database speedup



Package: dpkg
Version: 1.10.9
Tags: patch

Here is a patch against dpkg 1.10.9 that gives a roughly 40-45%
speedup (for me) at least in `dpkg-query -s' and `dpkg-query -l`.

The patch makes two somewhat unrelated changes, one of them trivial
(increasing the hash table size in database.c) and the other one less
so.

Most of the patch deals with parsedb(). It adds a flex lexer
description file (lib/pkg_lex.lex) and a header file (dblexer.h), and
modifies the parsedb() function to use the generated lexer.

The patch also adds flex to the Build-Depends in debian/control and
the necessary rules to lib/Makefile.in. Please take a careful look at
the latter, I'm not very comfortable with my skills wrt autoconf and
automake.

I admit the modified parsedb() is not nearly tested enough; I didn't
have a test system at hand so I didn't even try to install any
packages with the modified dpkg. Queries seem to work and behave well
under a memory debugger, however. I also expect there to be problems
with malformed entries.

In short, it's not a perfect patch, but I think it's a start. I
believe it also somewhat cleans up the parser, but I'm biased anyway.

	Sami


diff -NurB dpkg-1.10.9-orig/debian/control dpkg-1.10.9/debian/control
--- dpkg-1.10.9-orig/debian/control	2002-07-12 07:39:19.000000000 +0300
+++ dpkg-1.10.9/debian/control	2003-02-01 03:54:11.000000000 +0200
@@ -6,7 +6,7 @@
 Origin: debian
 Bugs: debbugs://bugs.debian.org
 Standards-Version: 3.5.4
-Build-Depends: debiandoc-sgml, sgmltools-lite, libncurses-dev, gettext (>= 0.10.36), zlib1g-dev (>= 1:1.1.3-19.1)
+Build-Depends: debiandoc-sgml, sgmltools-lite, libncurses-dev, gettext (>= 0.10.36), zlib1g-dev (>= 1:1.1.3-19.1), flex
 Uploaders: Wichert Akkerman <wakkerma@debian.org>, Adam Heath <doogie@debian.org>
 
 Package: dpkg
diff -NurB dpkg-1.10.9-orig/lib/database.c dpkg-1.10.9/lib/database.c
--- dpkg-1.10.9-orig/lib/database.c	2002-08-23 07:43:52.000000000 +0300
+++ dpkg-1.10.9/lib/database.c	2003-02-01 01:47:09.000000000 +0200
@@ -26,7 +26,7 @@
 #include <dpkg.h>
 #include <dpkg-db.h>
 
-#define BINS (1 << 7)
+#define BINS (1 << 12)
  /* This must always be a power of two.  If you change it
   * consider changing the per-character hashing factor (currently 5) too.
   */
@@ -36,7 +36,7 @@
 
 static int hash(const char *name) {
   int v= 0;
-  while (*name) { v *= 5; v += tolower(*name); name++; }
+  while (*name) { v *= 113; v += tolower(*name); name++; }
   return v;
 /* These results were achieved with 128 bins, and the list of packages
  * shown at the bottom of this file.
diff -NurB dpkg-1.10.9-orig/lib/dblexer.h dpkg-1.10.9/lib/dblexer.h
--- dpkg-1.10.9-orig/lib/dblexer.h	1970-01-01 02:00:00.000000000 +0200
+++ dpkg-1.10.9/lib/dblexer.h	2003-02-01 02:22:15.000000000 +0200
@@ -0,0 +1,79 @@
+/*
+ * libdpkg - Debian packaging suite library routines
+ * dblexer.h - Internal definitions for the database parser
+ *
+ * Copyright (C) 2003 Sami Liedes <sliedes@cc.hut.fi>
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with dpkg; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+#ifndef DPKG_DBLEXER_H
+#define DPKG_DBLEXER_H
+
+/* for FILE */
+#include <stdio.h>
+
+extern char *yytext;
+extern int lexer_lineno;
+extern int yyleng;
+
+void lexer_init(FILE *fp);
+int yylex(void);
+
+#define KW_PACKAGE 0
+#define KW_ESSENTIAL 1
+#define KW_STATUS 2
+#define KW_PRIORITY 3
+#define KW_SECTION 4
+#define KW_INSTALLED_SIZE 5
+#define KW_ORIGIN 6
+#define KW_MAINTAINER 7
+#define KW_BUGS 8
+#define KW_ARCHITECTURE 9
+#define KW_SOURCE 10
+#define KW_VERSION 11
+#define KW_REVISION 12
+#define KW_CONFIG_VERSION 13
+#define KW_REPLACES 14
+#define KW_PROVIDES 15
+#define KW_DEPENDS 16
+#define KW_PRE_DEPENDS 17
+#define KW_RECOMMENDS 18
+#define KW_SUGGESTS 19
+#define KW_CONFLICTS 20
+#define KW_ENHANCES 21
+#define KW_CONFFILES 22
+#define KW_FILENAME 23
+#define KW_SIZE 24
+#define KW_MD5SUM 25
+#define KW_MSDOS_FILENAME 26
+#define KW_DESCRIPTION 27
+
+#define PARSER_NUM_KWS 28
+
+#define PARSER_VALUE -1
+#define PARSER_EOF -2
+#define PARSER_ARBITRARYFIELD -3
+#define PARSER_PARA_SEP -4
+
+/* errors */
+#define PARSER_NL_IN_FIELD_NAME -5
+#define PARSER_MSDOS_EOF_IN_FIELD_NAME -6
+#define PARSER_NO_COLON_IN_FIELD_NAME -7
+#define PARSER_MSDOS_EOF_IN_VALUE -8
+#define PARSER_EOF_IN_VALUE -9
+
+#endif /* DPKG_DBLEXER_H */
diff -NurB dpkg-1.10.9-orig/lib/Makefile.in dpkg-1.10.9/lib/Makefile.in
--- dpkg-1.10.9-orig/lib/Makefile.in	2002-05-24 08:16:43.000000000 +0300
+++ dpkg-1.10.9/lib/Makefile.in	2003-02-01 03:00:05.000000000 +0200
@@ -9,10 +9,10 @@
 SOURCES		= compat.c database.c dbmodify.c dump.c ehandle.c fields.c \
 		    lock.c mlib.c myopt.c nfmalloc.c parse.c parsehelp.c \
 		    showcright.c showpkg.c tarfn.c varbuf.c vercmp.c md5.c \
-		    utils.c startup.c
+		    utils.c startup.c pkg_lex.c
 
 OBJECTS		= $(patsubst %.c, %.o, $(SOURCES))
-GENFILES	= $(OBJECTS) libdpkg.a
+GENFILES	= $(OBJECTS) libdpkg.a pkg_lex.c
 
 ALL_CFLAGS	+= -DCOPYINGFILE=\"$(copyingfile)\"
 
@@ -23,6 +23,9 @@
 	$(AR) r $@ $^
 	$(RANLIB) $@
 
+pkg_lex.c: pkg_lex.lex
+	flex -s -B -i -o$@ -8 -Cfre $^
+
 .PHONY: install
 install:: all
 
diff -NurB dpkg-1.10.9-orig/lib/parse.c dpkg-1.10.9/lib/parse.c
--- dpkg-1.10.9-orig/lib/parse.c	2002-08-29 22:28:16.000000000 +0300
+++ dpkg-1.10.9/lib/parse.c	2003-02-01 03:44:38.000000000 +0200
@@ -19,11 +19,17 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+/*
+ * 2003-02-01: Modified to use a flex-generated lexer by
+ *             Sami Liedes <sliedes@cc.hut.fi>
+ */
+
+
 #include <stdio.h>
 #include <string.h>
 #include <ctype.h>
 #include <stdarg.h>
-
+#include <assert.h>
 
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -34,13 +40,15 @@
 #include <dpkg.h>
 #include <dpkg-db.h>
 #include "parsedump.h"
+#include "dblexer.h"
 
 #ifdef HAVE_MMAP
 #include <sys/mman.h>
 #endif
 
 const struct fieldinfo fieldinfos[]= {
-  /* NB: capitalisation of these strings is important. */
+  /* NB: capitalisation of these strings is important.
+   * The order must match that of the definitions in dblexer.h. */
   { "Package",          f_name,            w_name                                     },
   { "Essential",        f_boolean,         w_booleandefno,   PKGIFPOFF(essential)     },
   { "Status",           f_status,          w_status                                   },
@@ -75,7 +83,7 @@
 #define NFIELDS (sizeof(fieldinfos)/sizeof(struct fieldinfo))
 const int nfields= NFIELDS;
 
-static void cu_parsedb(int argc, void **argv) { close(*(int *)argv); }
+static void cu_parsedb(int argc, void **argv) { fclose(*(FILE **)argv); }
 
 int parsedb(const char *filename, enum parsedbflags flags,
             struct pkginfo **donep, FILE *warnto, int *warncount) {
@@ -83,163 +91,139 @@
    * If donep is not null only one package's information is expected.
    */
   
-  int fd;
+  FILE *fp;
   struct pkginfo newpig, *pigp;
   struct pkginfoperfile *newpifp, *pifp;
   struct arbitraryfield *arp, **larpp;
-  int lno;
   int pdone;
   int fieldencountered[NFIELDS];
-  const struct fieldinfo *fip;
-  const struct nickname *nick;
-  char *data, *dataptr, *endptr;
-  const char *fieldstart, *valuestart;
   char *value= NULL;
   int fieldlen= 0, valuelen= 0;
-  int *ip, c;
-  struct stat stat;
+  int token, consumed=1, fieldname;
+  char *arbfieldname;
 
   if (warncount) *warncount= 0;
   newpifp= (flags & pdb_recordavailable) ? &newpig.available : &newpig.installed;
-  fd= open(filename, O_RDONLY);
-  if (fd == -1) ohshite(_("failed to open package info file `%.255s' for reading"),filename);
-
-  push_cleanup(cu_parsedb,~0, NULL,0, 1,&fd);
+  fp= fopen(filename, "r");
+  if (fp == 0) ohshite(_("failed to open package info file `%.255s' for reading"),filename);
 
-  if (fstat(fd, &stat) == -1)
-    ohshite(_("can't stat package info file `%.255s'"),filename);
+  lexer_init(fp);
+  push_cleanup(cu_parsedb,~0, NULL,0, 1,fp);
 
-#ifdef HAVE_MMAP
-  if ((dataptr= (char *)mmap(NULL, stat.st_size, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED)
-    ohshite(_("can't mmap package info file `%.255s'"),filename);
-#else
-  if ((dataptr= malloc(stat.st_size)) == NULL)
-    ohshite(_("failed to malloc for info file `%.255s'"),filename);
-
-  fd_buf_copy(fd, dataptr, stat.st_size, _("copy info file `%.255s'"),filename);
-#endif
-  data= dataptr;
-  endptr= dataptr + stat.st_size;
-
-  lno= 1;
   pdone= 0;
-#define EOF_mmap(dataptr, endptr)	(dataptr >= endptr)
-#define getc_mmap(dataptr)		*dataptr++;
-#define ungetc_mmap(c, dataptr, data)	dataptr--;
+
+  assert(PARSER_NUM_KWS+1 == NFIELDS);
 
   for (;;) { /* loop per package */
     memset(fieldencountered, 0, sizeof(fieldencountered));
     blankpackage(&newpig);
     blankpackageperfile(newpifp);
-/* Skip adjacent new lines */
-    while(!EOF_mmap(dataptr, endptr)) {
-      c= getc_mmap(dataptr); if (c!='\n' && c!=MSDOS_EOF_CHAR ) break;
-      lno++;
-    }
-    if (EOF_mmap(dataptr, endptr)) break;
+    token= yylex();
+    if (token == PARSER_EOF) break;
+    consumed=0;
     for (;;) { /* loop per field */
-      fieldstart= dataptr - 1;
-      while (!EOF_mmap(dataptr, endptr) && !isspace(c) && c!=':' && c!=MSDOS_EOF_CHAR)
-        c= getc_mmap(dataptr);
-      fieldlen= dataptr - fieldstart - 1;
-      while (EOF_mmap(dataptr, endptr) && c != '\n' && isspace(c)) c= getc_mmap(dataptr);
-      if (EOF_mmap(dataptr, endptr))
-        parseerr(NULL,filename,lno, warnto,warncount,&newpig,0,
-                 _("EOF after field name `%.*s'"),fieldlen,fieldstart);
-      if (c == '\n')
-        parseerr(NULL,filename,lno, warnto,warncount,&newpig,0,
-                 _("newline in field name `%.*s'"),fieldlen,fieldstart);
-      if (c == MSDOS_EOF_CHAR)
-        parseerr(NULL,filename,lno, warnto,warncount,&newpig,0,
-                 _("MSDOS EOF (^Z) in field name `%.*s'"),fieldlen,fieldstart);
-      if (c != ':')
-        parseerr(NULL,filename,lno, warnto,warncount,&newpig,0,
-                 _("field name `%.*s' must be followed by colon"),fieldlen,fieldstart);
-/* Skip space after ':' but before value and eol */
-      while(!EOF_mmap(dataptr, endptr)) {
-        c= getc_mmap(dataptr);
-        if (c == '\n' || !isspace(c)) break;
+      if (consumed) {
+	token= yylex();
+	consumed= 0;
       }
-      if (EOF_mmap(dataptr, endptr))
-        parseerr(NULL,filename,lno, warnto,warncount,&newpig,0,
+      /*
+      parseerr(NULL,filename,lexer_lineno, warnto,warncount,&newpig,0,
+	       _("EOF after field name `%.*s'"),fieldlen,fieldstart);
+      */
+      if (token == PARSER_NL_IN_FIELD_NAME)
+        parseerr(NULL,filename,lexer_lineno, warnto,warncount,&newpig,0,
+                 _("newline in field name `%.*s'"),yyleng,yytext);
+      else if (token == PARSER_MSDOS_EOF_IN_FIELD_NAME)
+        parseerr(NULL,filename,lexer_lineno, warnto,warncount,&newpig,0,
+                 _("MSDOS EOF (^Z) in field name `%.*s'"),yyleng,yytext);
+      else if (token == PARSER_NO_COLON_IN_FIELD_NAME)
+        parseerr(NULL,filename,lexer_lineno, warnto,warncount,&newpig,0,
+                 _("field name `%.*s' must be followed by colon"),yyleng,yytext);
+      else if (token == PARSER_EOF_IN_VALUE)
+        parseerr(NULL,filename,lexer_lineno,warnto,warncount,&newpig,0,
                  _("EOF before value of field `%.*s' (missing final newline)"),
-                 fieldlen,fieldstart);
-      if (c == MSDOS_EOF_CHAR)
-        parseerr(NULL,filename,lno, warnto,warncount,&newpig,0,
+                 fieldlen,fieldname>=0?fieldinfos[fieldname].name:arbfieldname);
+      else if (token == PARSER_PARA_SEP)
+	break;
+      
+      assert(token >= 0 || token == PARSER_ARBITRARYFIELD);
+      fieldname= token;
+      fieldlen= yyleng;
+      if (token == PARSER_ARBITRARYFIELD)
+	arbfieldname= strdup(yytext);
+      
+      token= yylex();
+      consumed= 1;
+      
+      if (token == PARSER_EOF)
+        parseerr(NULL,filename,lexer_lineno,warnto,warncount,&newpig,0,
+                 _("EOF before value of field `%.*s' (missing final newline)"),
+                 fieldlen,fieldname>=0?fieldinfos[fieldname].name:arbfieldname);
+      else if (token == PARSER_MSDOS_EOF_IN_VALUE)
+        parseerr(NULL,filename,lexer_lineno,warnto,warncount,&newpig,0,
                  _("MSDOS EOF char in value of field `%.*s' (missing newline?)"),
-                 fieldlen,fieldstart);
-      valuestart= dataptr - 1;
-      for (;;) {
-        if (c == '\n' || c == MSDOS_EOF_CHAR) {
-          lno++;
-	  if (EOF_mmap(dataptr, endptr)) break;
-          c= getc_mmap(dataptr);
-/* Found double eol, or start of new field */
-          if (EOF_mmap(dataptr, endptr) || c == '\n' || !isspace(c)) break;
-          ungetc_mmap(c,dataptr, data);
-          c= '\n';
-        } else if (EOF_mmap(dataptr, endptr)) {
-          parseerr(NULL,filename,lno, warnto,warncount,&newpig,0,
-                   _("EOF during value of field `%.*s' (missing final newline)"),
-                   fieldlen,fieldstart);
-        }
-        c= getc_mmap(dataptr);
-      }
-      valuelen= dataptr - valuestart - 1;
-/* trim ending space on value */
-      while (valuelen && isspace(*(valuestart+valuelen-1)))
- valuelen--;
-      for (nick= nicknames; nick->nick && (strncasecmp(nick->nick,fieldstart, fieldlen) || nick->nick[fieldlen] != 0); nick++);
-      if (nick->nick) {
-	fieldstart= nick->canon;
-	fieldlen= strlen(fieldstart);
-      }
-      for (fip= fieldinfos, ip= fieldencountered;
-           fip->name && strncasecmp(fieldstart,fip->name, fieldlen);
-           fip++, ip++);
-      if (fip->name) {
+                 fieldlen,fieldname>=0?fieldinfos[fieldname].name:arbfieldname);
+      /*
+      parseerr(NULL,filename,lno, warnto,warncount,&newpig,0,
+	       _("EOF during value of field `%.*s' (missing final newline)"),
+	       fieldlen,fieldstart);
+      */
+
+      assert(token==PARSER_VALUE);
+
+      valuelen= yyleng;
+      /* trim ending space on value */
+      while (valuelen && isspace(*(yytext+valuelen-1)))
+	  valuelen--;
+      if (fieldname>=0) {
 	value= realloc(value,valuelen+1);
-	strncpy(value,valuestart,valuelen);
+	strncpy(value,yytext,valuelen);
 	*(value+valuelen)= 0;
-        if (*ip++)
-          parseerr(NULL,filename,lno, warnto,warncount,&newpig,0,
-                   _("duplicate value for `%s' field"), fip->name);
-        fip->rcall(&newpig,newpifp,flags,filename,lno-1,warnto,warncount,value,fip);
+        if (fieldencountered[fieldname]++)
+          parseerr(NULL,filename,lexer_lineno,warnto,warncount,&newpig,0,
+                   _("duplicate value for `%s' field"), fieldinfos[fieldname].name);
+        fieldinfos[fieldname].rcall(&newpig,newpifp,flags,filename,lexer_lineno-1,warnto,warncount,value,&fieldinfos[fieldname]);
       } else {
-        if (fieldlen<2)
-          parseerr(NULL,filename,lno, warnto,warncount,&newpig,0,
-                   _("user-defined field name `%.*s' too short"), fieldlen,fieldstart);
+	assert(fieldname==PARSER_ARBITRARYFIELD);
+	if (fieldlen<2)
+          parseerr(NULL,filename,lexer_lineno,warnto,warncount,&newpig,0,
+                   _("user-defined field name `%.*s' too short"),fieldlen,arbfieldname);
         larpp= &newpifp->arbs;
         while ((arp= *larpp) != NULL) {
-          if (!strncasecmp(arp->name,fieldstart,fieldlen))
-            parseerr(NULL,filename,lno, warnto,warncount,&newpig,0,
-                     _("duplicate value for user-defined field `%.*s'"), fieldlen,fieldstart);
+          if (!strncasecmp(arp->name,arbfieldname,fieldlen))
+            parseerr(NULL,filename,lexer_lineno,warnto,warncount,&newpig,0,
+                     _("duplicate value for user-defined field `%.*s'"), fieldlen,arbfieldname);
           larpp= &arp->next;
         }
         arp= nfmalloc(sizeof(struct arbitraryfield));
-        arp->name= nfstrnsave(fieldstart,fieldlen);
-        arp->value= nfstrnsave(valuestart,valuelen);
+        arp->name= nfstrnsave(arbfieldname,fieldlen);
+        arp->value= nfstrnsave(yytext,valuelen);
         arp->next= NULL;
         *larpp= arp;
       }
-      if (EOF_mmap(dataptr, endptr) || c == '\n' || c == MSDOS_EOF_CHAR) break;
+      token= yylex();
+      consumed= 0;
+      if (token==PARSER_EOF) {
+	consumed= 1;
+	break;
+      }
     } /* loop per field */
     if (pdone && donep)
-      parseerr(NULL,filename,lno, warnto,warncount,&newpig,0,
+      parseerr(NULL,filename,lexer_lineno,warnto,warncount,&newpig,0,
                _("several package info entries found, only one allowed"));
-    parsemustfield(NULL,filename,lno, warnto,warncount,&newpig,0,
+    parsemustfield(NULL,filename,lexer_lineno,warnto,warncount,&newpig,0,
                    &newpig.name, "package name");
     if ((flags & pdb_recordavailable) || newpig.status != stat_notinstalled) {
-      parsemustfield(NULL,filename,lno, warnto,warncount,&newpig,1,
+      parsemustfield(NULL,filename,lexer_lineno,warnto,warncount,&newpig,1,
                      (const char **)&newpifp->description, "description");
-      parsemustfield(NULL,filename,lno, warnto,warncount,&newpig,1,
+      parsemustfield(NULL,filename,lexer_lineno,warnto,warncount,&newpig,1,
                      (const char **)&newpifp->maintainer, "maintainer");
       if (newpig.status != stat_halfinstalled)
-        parsemustfield(NULL,filename,lno, warnto,warncount,&newpig,0,
+        parsemustfield(NULL,filename,lexer_lineno,warnto,warncount,&newpig,0,
                        &newpifp->version.version, "version");
     }
     if (flags & pdb_recordavailable)
-      parsemustfield(NULL,filename,lno, warnto,warncount,&newpig,1,
+      parsemustfield(NULL,filename,lexer_lineno,warnto,warncount,&newpig,1,
                      (const char **)&newpifp->architecture, "architecture");
     else if (newpifp->architecture && *newpifp->architecture)
       newpifp->architecture= NULL;
@@ -253,7 +237,7 @@
     if (!(flags & pdb_recordavailable)) {
       if (newpig.configversion.version) {
         if (newpig.status == stat_installed || newpig.status == stat_notinstalled)
-          parseerr(NULL,filename,lno, warnto,warncount,&newpig,0,
+          parseerr(NULL,filename,lexer_lineno,warnto,warncount,&newpig,0,
                    _("Configured-Version for package with inappropriate Status"));
       } else {
         if (newpig.status == stat_installed) newpig.configversion= newpifp->version;
@@ -267,7 +251,7 @@
     if (!(flags & pdb_recordavailable) &&
         newpig.status == stat_notinstalled &&
         newpifp->conffiles) {
-      parseerr(NULL,filename,lno, warnto,warncount,&newpig,1,
+      parseerr(NULL,filename,lexer_lineno,warnto,warncount,&newpig,1,
                _("Package which in state not-installed has conffiles, forgetting them"));
       newpifp->conffiles= NULL;
     }
@@ -313,17 +297,18 @@
 
     if (donep) *donep= pigp;
     pdone++;
-    if (EOF_mmap(dataptr, endptr)) break;
-    if (c == '\n') lno++;
+    if (token==PARSER_EOF)
+      break;
+    else if (consumed) {
+      token= yylex();
+      consumed=0;
+    }
+    if (token==PARSER_EOF)
+      break;
   }
   pop_cleanup(0);
-#ifdef HAVE_MMAP
-  munmap(data, stat.st_size);
-#else
-  free(data);
-#endif
   free(value);
-  if (close(fd)) ohshite(_("failed to close after read: `%.255s'"),filename);
+  if (fclose(fp)) ohshite(_("failed to close after read: `%.255s'"),filename);
   if (donep && !pdone) ohshit(_("no package information in `%.255s'"),filename);
 
   return pdone;
diff -NurB dpkg-1.10.9-orig/lib/pkg_lex.lex dpkg-1.10.9/lib/pkg_lex.lex
--- dpkg-1.10.9-orig/lib/pkg_lex.lex	1970-01-01 02:00:00.000000000 +0200
+++ dpkg-1.10.9/lib/pkg_lex.lex	2003-02-01 03:43:09.000000000 +0200
@@ -0,0 +1,118 @@
+%{
+/*
+ * libdpkg - Debian packaging suite library routines
+ * pkg_lex.lex - A lexical analyzer for dpkg package database files
+ *
+ * Copyright (C) 2003 Sami Liedes <sliedes@cc.hut.fi>
+ * 
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with dpkg; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include "dblexer.h"
+
+int lexer_lineno;
+
+static void account_newlines(const char *str) {
+  do
+    if (*str=='\n') lexer_lineno++;
+  while (*str++);
+}
+
+void lexer_init(FILE *fp) {
+  lexer_lineno=1;
+  yyrestart(fp);
+  BEGIN(0);
+}
+
+#define yyterminate() return PARSER_EOF
+
+%}
+
+%option noyywrap
+%x value
+
+MULTI_VALUE		([^\n\032]*\n\ )+[^\n\032]+\n
+MULTIVAL_ERR_EOF1	([^\n]*\n\ )+[^\n]*
+MULTIVAL_ERR_EOF2	([^\n]*\n\ )+([^\n]*\n)
+MULTIVAL_ERR_EOF3	([^\n]*\n)
+MULTIVAL_ERR_EOF4	[^\n]+
+
+SINGLE_VALUE		[^\n\032]+\n
+MS_EOF_MULTI_VALUE	([^\n]*\n\ )+[^\n]+\n
+MS_EOF_SINGLE_VALUE	[^\n]+\n
+
+%%
+
+Package:\ *		{ BEGIN(value); return KW_PACKAGE; }
+Essential:\ *		{ BEGIN(value); return KW_ESSENTIAL; }
+Status:\ *		{ BEGIN(value); return KW_STATUS; }
+Priority:\ *		|
+Class:\ *		{ BEGIN(value); return KW_PRIORITY; }
+Section:\ *		{ BEGIN(value); return KW_SECTION; }
+Installed-Size:\ *	{ BEGIN(value); return KW_INSTALLED_SIZE; }
+Origin:\ *		{ BEGIN(value); return KW_ORIGIN; }
+Maintainer:\ *		{ BEGIN(value); return KW_MAINTAINER; }
+Bugs:\ *		{ BEGIN(value); return KW_BUGS; }
+Architecture:\ *	{ BEGIN(value); return KW_ARCHITECTURE; }
+Source:\ *		{ BEGIN(value); return KW_SOURCE; }
+Version:\ *		{ BEGIN(value); return KW_VERSION; }
+Revision:\ *		|
+Package-Revision:\ *	|
+Package_Revision:\ *	{ BEGIN(value); return KW_REVISION; }
+Config-Version:\ *	{ BEGIN(value); return KW_CONFIG_VERSION; }
+Replaces:\ *		{ BEGIN(value); return KW_REPLACES; }
+Provides:\ *		{ BEGIN(value); return KW_PROVIDES; }
+Depends:\ *		{ BEGIN(value); return KW_DEPENDS; }
+Pre-Depends:\ *		{ BEGIN(value); return KW_PRE_DEPENDS; }
+Recommends:\ *		|
+Recommended:\ *		{ BEGIN(value); return KW_RECOMMENDS; }
+Suggests:\ *		|
+Optional:\ *		{ BEGIN(value); return KW_SUGGESTS; }
+Conflicts:\ *		{ BEGIN(value); return KW_CONFLICTS; }
+Enhances:\ *		{ BEGIN(value); return KW_ENHANCES; }
+Conffiles:\ *		{ BEGIN(value); return KW_CONFFILES; }
+Filename:\ *		{ BEGIN(value); return KW_FILENAME; }
+Size:\ *		{ BEGIN(value); return KW_SIZE; }
+MD5sum:\ *		{ BEGIN(value); return KW_MD5SUM; }
+MSDOS-Filename:\ *	{ BEGIN(value); return KW_MSDOS_FILENAME; }
+Description:\ *		{ BEGIN(value); return KW_DESCRIPTION; }
+
+[^ :\n]+\n		{ return PARSER_NL_IN_FIELD_NAME; }
+[^ :\032]\032		{ return PARSER_MSDOS_EOF_IN_FIELD_NAME; }
+
+<*>\032			yyterminate(); /* MSDOS EOF */
+
+(" "*\n){2,}		{ lexer_lineno+=yyleng; return PARSER_PARA_SEP; }
+" "*\n			{ lexer_lineno++; }
+
+[^ \n:]*		{ return PARSER_NO_COLON_IN_FIELD_NAME; }
+
+[^ \n:]*:\ *		{ *(strchr(yytext,':'))=0; yyleng=strlen(yytext); BEGIN(value); return PARSER_ARBITRARYFIELD; }
+
+<value>{MULTI_VALUE}	{ yyless(--yyleng); yytext[yyleng]=0; account_newlines(yytext); BEGIN(0); return PARSER_VALUE; }
+<value>{SINGLE_VALUE}	{ yyless(--yyleng); yytext[yyleng]=0; BEGIN(0); return PARSER_VALUE; }
+<value>{MS_EOF_SINGLE_VALUE} {  return PARSER_MSDOS_EOF_IN_VALUE; }
+<value>{MS_EOF_MULTI_VALUE} { return PARSER_MSDOS_EOF_IN_VALUE; }
+
+<value>{MULTIVAL_ERR_EOF1} |
+<value>{MULTIVAL_ERR_EOF2} |
+<value>{MULTIVAL_ERR_EOF3} |
+<value>{MULTIVAL_ERR_EOF4} { return PARSER_EOF_IN_VALUE; }
+
+(" "*\n)+" "*		|
+" "+			{ return PARSER_NO_COLON_IN_FIELD_NAME; }



Reply to: