[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#319369: unmunch not always can extract all words



Package: libmyspell-dev
Version: 1:3.1-11

If you are using a MySpell dictionary (.dic) with a word/affix that generate more than 5.000 words than the unmunch utility can generate only the first 5.000 of that.

In the next Italian dictionary there will be a word/affix that generate more than 16.500 words.

This bug affect only unmunch utility and not MySpell library.

I have also write to the MySpell author.

I attach a patch ... I have exaggerated on it :-)
I have also add two missing cast and used the mychop() of MySpell library.

Ciao
Davide

Debian: Etch
Kernel: Linux localhost.localdomain 2.4.27 i686 GNU/Linux
glibc version: 2.3.2.ds1-22
gcc version: 3.3.5 (Debian 1:3.3.5-13)
--- /mnt/mandrake/src/MySpell/unmunch.c	2003-03-26 14:02:49.000000000 +0100
+++ unmunch.c	2005-07-21 17:26:50.000000000 +0200
@@ -22,8 +22,8 @@
 int main(int argc, char** argv)
 {
 
-  int i;
-  int al, wl;
+  long int i;
+  long int al, wl;
 
   FILE * wrdlst;
   FILE * afflst;
@@ -122,14 +122,14 @@
 
 void parse_aff_file(FILE * afflst)
 {  
-    int i, j;
-    int numents=0;
+    long int i, j;
+    long int numents=0;
     char achar='\0';
     short ff=0;
     char ft;
     struct affent * ptr= NULL;
     struct affent * nptr= NULL;
-    char * line = malloc(MAX_LN_LEN);
+    char * line = (char *) malloc(MAX_LN_LEN);
 
     while (fgets(line,MAX_LN_LEN,afflst)) {
        mychomp(line);
@@ -149,7 +149,7 @@
                     case 1: { achar = *piece; break; }
                     case 2: { if (*piece == 'Y') ff = XPRODUCT; break; }
                     case 3: { numents = atoi(piece); 
-                              ptr = malloc(numents * sizeof(struct affent));
+                              ptr = (struct affent *) malloc(numents * sizeof(struct affent));
                               ptr->achar = achar;
                               ptr->xpflg = ff;
 	                      fprintf(stderr,"parsing %c entries %d\n",achar,numents);
@@ -230,14 +230,14 @@
 
 void encodeit(struct affent * ptr, char * cs)
 {
-  int nc;
-  int neg;
-  int grp;
+  long int nc;
+  long int neg;
+  long int grp;
   unsigned char c;
-  int n;
-  int ec;   
-  int nm;
-  int i, j, k;
+  long int n;
+  long int ec;   
+  long int nm;
+  long int i, j, k;
   unsigned char mbr[MAX_WD_LEN];
 
   /* now clear the conditions array */
@@ -316,13 +316,13 @@
 
 
 /* add a prefix to word */
-void pfx_add (const char * word, int len, struct affent* ep, int num)
+void pfx_add (const char * word, long int len, struct affent* ep, long int num)
 {
     struct affent *     aent;
-    int			cond;
-    int	tlen;
+    long int			cond;
+    long int	tlen;
     unsigned char *	cp;		
-    int			i;
+    long int			i;
     char *              pp;
     char	        tword[MAX_WD_LEN];
 
@@ -361,13 +361,13 @@
 
 
 /* add a suffix to a word */
-void suf_add (const char * word, int len, struct affent * ep, int num)
+void suf_add (const char * word, long int len, struct affent * ep, long int num)
 {
     struct affent *     aent;	
-    int	                tlen;	
-    int			cond;	
+    long int	                tlen;	
+    long int			cond;	
     unsigned char *	cp;
-    int			i;
+    long int			i;
     char	        tword[MAX_WD_LEN];
     char *              pp;
 
@@ -407,12 +407,12 @@
 
 
 
-int expand_rootword(const char * ts, int wl, const char * ap, int al)
+int expand_rootword(const char * ts, long int wl, const char * ap, long int al)
 {
-    int i;
-    int j;
-    int nh=0;
-    int nwl;
+    long int i;
+    long int j;
+    long int nh=0;
+    long int nwl;
 
     for (i=0; i < numsfx; i++) {
       if (strchr(ap,(stable[i].aep)->achar)) {
@@ -492,7 +492,7 @@
 void mychomp(char * s)
 {
   int k = strlen(s);
-  if (k > 0) *(s+k-1) = '\0';
+  if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
   if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
 }
 
--- unmunch.h	2005-07-21 16:41:32.000000000 +0200
+++ /tmp/a/unmunch.h	2005-07-21 16:17:30.000000000 +0200
@@ -4,10 +4,10 @@
 #define MAX_WD_LEN    200
 #define MAX_PREFIXES  256
 #define MAX_SUFFIXES  256
-#define MAX_WORDS     5000
- 
+#define MAX_WORDS     500000
+
 #define ROTATE_LEN      5
- 
+
 #define ROTATE(v,q) \
    (v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q))-1));
 
@@ -33,7 +33,7 @@
 struct affixptr
 {
     struct affent * aep;
-    int		    num;
+    int    num;
 };
 
 /* the prefix and suffix table */
@@ -47,7 +47,7 @@
 struct affixptr          stable[MAX_SUFFIXES];
 
 
-int    numwords;	          /* number of words found */
+long int    numwords;	          /* number of words found */
 struct dwords
 {
   char * word;
@@ -63,11 +63,11 @@
 
 void encodeit(struct affent * ptr, char * cs);
 
-int expand_rootword(const char *, int, const char*, int);
+int expand_rootword(const char *, long int, const char*, long int);
 
-void pfx_add (const char * word, int len, struct affent* ep, int num);
+void pfx_add (const char * word, long int len, struct affent* ep, long int num);
 
-void suf_add (const char * word, int len, struct affent * ep, int num);
+void suf_add (const char * word, long int len, struct affent * ep, long int num);
 
 char * mystrsep(char ** stringp, const char delim);
 

Reply to: