Re: New Version of Aspell English Dictionary Now Available (fwd)
On Wed, 8 Jan 2003, David Coe wrote:
> Kevin Atkinson <kevina@gnu.org> writes:
> > Since it is the DEC wordlist caused a problem someone should really update
> > the wenglish package with something else since it is based on the DEC
> > wordlist. Perhapes you should use the wordlist for Aspell or if that is
> > not large enough something from SCOWL.
> I'm looking into that. (I'm the wenglish maintainer.)
FYI: In order to use the Aspell word lists for other purposes other than
Aspell the wordlists must be decompressed. To do so pipe them through
"word-list-compress d". word-list-compress is a simple utility
distributed with Aspell. If you don't have Aspell installed you can
simply download Aspell and compile the utility yourself. Since it is so
small I attached the utility to this email.
--
http://kevin.atkinson.dhs.org
/*
* Copyright (c) 2000-2001
* Kevin Atkinson
*
* Permission to use, copy, modify, distribute and sell this software
* and its documentation for any purpose is hereby granted without
* fee, provided that the above copyright notice appear in all copies
* and that both that copyright notice and this permission notice
* appear in supporting documentation. Kevin Atkinson makes no
* representations about the suitability of this software for any
* purpose. It is provided "as is" without express or implied
* warranty.
*
*/
#include <stdio.h>
#if defined(__CYGWIN__) || defined (_WIN32)
# include <io.h>
# include <fcntl.h>
# define SETBIN(fno) _setmode( _fileno( fno ), _O_BINARY )
#else
# define SETBIN(fno)
#endif
void usage ()
{
fputs("Compresses or uncompresses sorted word lists.\n" , stderr);
fputs("For best result the locale should be set to C\n" , stderr);
fputs("before sorting by setting the environmental\n" , stderr);
fputs("variable LANG to \"C\" before sorting.\n" , stderr);
fputs("Copyright 2001 by Kevin Atkinson.\n" , stderr);
fputs("Usage: word-list-compress c[ompress]|d[ecompress]\n" , stderr);
}
static int get_word(FILE * in, char * w)
{
int c;
while (c = getc(in), c != EOF && c <= 32);
if (c == EOF) return 0;
do {
*w++ = (char)(c);
} while (c = getc(in), c != EOF && c > 32);
*w = '\0';
ungetc(c, in);
if (c == EOF) return 0;
else return 1;
}
int main (int argc, const char *argv[]) {
if (argc != 2) {
usage();
return 1;
} else if (argv[1][0] == 'c') {
char s1[256];
char s2[256];
char * prev = s2;
char * cur = s1;
*prev = '\0';
SETBIN (stdout);
while (get_word(stdin, cur)) {
int i = 0;
/* get the length of the prefix */
while (prev[i] != '\0' && cur[i] != '\0' && prev[i] == cur[i])
++i;
if (i > 31) {
putc('\0', stdout);
}
putc(i+1, stdout);
fputs(cur+i, stdout);
if (cur == s1) {
prev = s1; cur = s2;
} else {
prev = s2; cur = s1;
}
}
return 0;
} else if (argv[1][0] == 'd') {
char cur[256];
int i;
int c;
SETBIN (stdin);
i = getc(stdin);
while (i != -1 ) {
if (i == 0)
i = getc(stdin);
--i;
while ((c = getc(stdin)) > 32)
cur[i++] = (char)c;
cur[i] = '\0';
fputs(cur, stdout);
putc('\n', stdout);
i = c;
}
return 0;
} else {
usage();
return 1;
}
}
Reply to: