[hunspell] 45/98: Imported Upstream version 1.2.2~b
This is an automated email from the git hooks/post-receive script.
rene pushed a commit to branch master
in repository hunspell.
commit cadb20abfd2aad13ab00cdc39188d8fe84c3ca55
Author: Rene Engelhard <rene@debian.org>
Date: Thu Apr 21 14:45:17 2016 +0200
Imported Upstream version 1.2.2~b
---
BUGS | 2 -
ChangeLog | 40 ++++++
NEWS | 13 ++
configure | 79 +++++------
configure.ac | 7 +-
hunspell.pc.in | 2 +-
po/hu.gmo | Bin 7563 -> 7564 bytes
po/hu.po | 27 ++--
src/hunspell/Makefile.am | 14 +-
src/hunspell/Makefile.in | 69 ++++-----
src/hunspell/affentry.cxx | 10 +-
src/hunspell/affixmgr.cxx | 165 +++++++++++-----------
src/hunspell/affixmgr.hxx | 26 ++--
src/hunspell/csutil.cxx | 91 +++++++-----
src/hunspell/csutil.hxx | 9 +-
src/hunspell/filemgr.cxx | 38 +++++
src/hunspell/filemgr.hxx | 19 +++
src/hunspell/hashmgr.cxx | 91 ++++++------
src/hunspell/hashmgr.hxx | 11 +-
src/hunspell/htypes.hxx | 5 +-
src/hunspell/hunspell.cxx | 77 ++++++----
src/hunspell/hunspell.h | 10 +-
src/hunspell/hunspell.hxx | 14 +-
src/hunspell/hunzip.cxx | 191 +++++++++++++++++++++++++
src/hunspell/hunzip.hxx | 41 ++++++
src/hunspell/suggestmgr.cxx | 13 +-
src/hunspell/suggestmgr.hxx | 2 +-
src/parsers/Makefile.am | 2 +-
src/parsers/Makefile.in | 4 +-
src/tools/Makefile.am | 16 ++-
src/tools/Makefile.in | 45 ++++--
src/tools/affixcompress | 183 ++++++++++++++++++++++++
src/tools/chmorph.cxx | 46 +++++-
src/tools/example.cxx | 59 +++-----
src/tools/hunspell.cxx | 128 +++++++++++------
src/tools/hunzip.cxx | 22 +++
src/tools/hzip.c | 281 +++++++++++++++++++++++++++++++++++++
src/win_api/hunspelldll.c | 10 +-
tests/IJ.good | 2 +
tests/Makefile.am | 1 +
tests/Makefile.in | 1 +
tests/suggestiontest/Makefile.orig | 2 +-
42 files changed, 1419 insertions(+), 449 deletions(-)
diff --git a/BUGS b/BUGS
index b18d135..98558e1 100644
--- a/BUGS
+++ b/BUGS
@@ -1,5 +1,3 @@
-* Problems with UTF-8 flag support on ARM platform (fail reputf and maputf test)
-
* Interactive interface has some visualisation problem with UTF-8 characters
* -U, -u options doesn't support Unicode.
diff --git a/ChangeLog b/ChangeLog
index dc3c224..46d1473 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,43 @@
+2007-11-16 Németh László <nemeth at OOo>:
+ - ZWSP, REP, tabulator problems: need for the final release?
+
+ * Bug 1851246 IDB_Winshell Visual C, hunspell.cxx, csutil.cxx, hunspelldll.c
+
+ * Bug 1856572 Mark de Does C prototype problem
+
+ * hunspell.pc: 1857450 wrong prefix in hunspell.pc.in
+
+ * Bug 1857512, reported by Rene Engelhard, Mark de Does
+
+ * csutil.cxx: Bug 1863239, notrailingcomma patch and optimization of
+ get_currect_cs() by Caolan McNamara.
+
+ * csutil.cxx: patches for ARM platform, signed_chars.dpatch
+ by Rene Engelhard and arm_structure_alignment.dpatch by
+ Steinar H. Gunderson <sesse@debian.org>
+
+ * hunzip.*, hzip.c: new hzip compression format
+
+ * tools/affixcompressor: affix compressor utility (similar to
+ munch, but it generates affix table automatically), works
+ with million-words dictionaries of agglutinative languages.
+
+ * README: fix problems reported by Pham Ngoc Khanh.
+
+ * hunspell.pc: reset numbering scheme: libhunspell-1.2
+
+ * csutil.cxx, suggestmgr: Warning-free in OOo builds
+
+ * hentry??, csutil.cxx: fix protected memory problems with
+ stored pointers on several not x86 platforms.
+
+ * fix iconv support on Solaris platform
+
+ * tests/IJ.good: add missing test file
+
+ * csutil.cxx: fix const char* related errors. Compiling bug
+ with Visual C++ reported by Ryan VanderMeulen and Ingo H. De Boer.
+
2007-11-01 Németh László <nemeth at OOo>:
* hunspell/*: new feature: morphological generation,
also fix experimental morphological analysis and stemming.
diff --git a/NEWS b/NEWS
index befdaa0..a68e581 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,16 @@
+2008-01-16: Hunspell 1.2.2 release:
+ - multiple dictionary (dic file) support to use extra (medical,
+ geographical etc.) dictionaries
+
+ - optional compressed dictionary format "hzip" for aff and dic files
+
+ - support encrypted dictionaries for closed OpenOffice.org extensions or
+ other commercial programs
+
+ - new affix compression tool "affixcompress"
+
+ - bug fixes
+
2007-11-01: Hunspell 1.2.1 release:
- new memory efficient condition checking algorithm for affix rules
diff --git a/configure b/configure
index d8f741d..f2d6276 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.59 for hunspell 1.2.1.
+# Generated by GNU Autoconf 2.59 for hunspell 1.2.2b.
#
# Report bugs to <nemeth@openoffice.org>.
#
@@ -423,8 +423,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='hunspell'
PACKAGE_TARNAME='hunspell'
-PACKAGE_VERSION='1.2.1'
-PACKAGE_STRING='hunspell 1.2.1'
+PACKAGE_VERSION='1.2.2b'
+PACKAGE_STRING='hunspell 1.2.2b'
PACKAGE_BUGREPORT='nemeth@openoffice.org'
ac_unique_file="config.h.in"
@@ -954,7 +954,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures hunspell 1.2.1 to adapt to many kinds of systems.
+\`configure' configures hunspell 1.2.2b to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1021,7 +1021,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of hunspell 1.2.1:";;
+ short | recursive ) echo "Configuration of hunspell 1.2.2b:";;
esac
cat <<\_ACEOF
@@ -1171,7 +1171,7 @@ fi
test -n "$ac_init_help" && exit 0
if $ac_init_version; then
cat <<\_ACEOF
-hunspell configure 1.2.1
+hunspell configure 1.2.2b
generated by GNU Autoconf 2.59
Copyright (C) 2003 Free Software Foundation, Inc.
@@ -1185,7 +1185,7 @@ cat >&5 <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by hunspell $as_me 1.2.1, which was
+It was created by hunspell $as_me 1.2.2b, which was
generated by GNU Autoconf 2.59. Invocation command line was
$ $0 $@
@@ -1629,9 +1629,6 @@ test -n "$target_alias" &&
test "$program_prefix$program_suffix$program_transform_name" = \
NONENONEs,x,x, &&
program_prefix=${target_alias}-
-case "$host" in
-arm*) XFAILED="flagutf8.test maputf.test";;
-esac
am__api_version="1.9"
@@ -1916,7 +1913,7 @@ fi
# Define the identity of the package.
PACKAGE=hunspell
- VERSION=1.2.1
+ VERSION=1.2.2b
cat >>confdefs.h <<_ACEOF
@@ -4197,7 +4194,7 @@ ia64-*-hpux*)
;;
*-*-irix6*)
# Find out which ABI we are using.
- echo '#line 4200 "configure"' > conftest.$ac_ext
+ echo '#line 4197 "configure"' > conftest.$ac_ext
if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
(eval $ac_compile) 2>&5
ac_status=$?
@@ -5332,7 +5329,7 @@ fi
# Provide some information about the compiler.
-echo "$as_me:5335:" \
+echo "$as_me:5332:" \
"checking for Fortran 77 compiler version" >&5
ac_compiler=`set X $ac_compile; echo $2`
{ (eval echo "$as_me:$LINENO: \"$ac_compiler --version </dev/null >&5\"") >&5
@@ -6395,11 +6392,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:6398: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:6395: $lt_compile\"" >&5)
(eval "$lt_compile" 2>conftest.err)
ac_status=$?
cat conftest.err >&5
- echo "$as_me:6402: \$? = $ac_status" >&5
+ echo "$as_me:6399: \$? = $ac_status" >&5
if (exit $ac_status) && test -s "$ac_outfile"; then
# The compiler can only warn and ignore the option if not recognized
# So say no if there are warnings other than the usual output.
@@ -6663,11 +6660,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:6666: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:6663: $lt_compile\"" >&5)
(eval "$lt_compile" 2>conftest.err)
ac_status=$?
cat conftest.err >&5
- echo "$as_me:6670: \$? = $ac_status" >&5
+ echo "$as_me:6667: \$? = $ac_status" >&5
if (exit $ac_status) && test -s "$ac_outfile"; then
# The compiler can only warn and ignore the option if not recognized
# So say no if there are warnings other than the usual output.
@@ -6767,11 +6764,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:6770: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:6767: $lt_compile\"" >&5)
(eval "$lt_compile" 2>out/conftest.err)
ac_status=$?
cat out/conftest.err >&5
- echo "$as_me:6774: \$? = $ac_status" >&5
+ echo "$as_me:6771: \$? = $ac_status" >&5
if (exit $ac_status) && test -s out/conftest2.$ac_objext
then
# The compiler can only warn and ignore the option if not recognized
@@ -8236,7 +8233,7 @@ linux*)
libsuff=
case "$host_cpu" in
x86_64*|s390x*|powerpc64*)
- echo '#line 8239 "configure"' > conftest.$ac_ext
+ echo '#line 8236 "configure"' > conftest.$ac_ext
if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
(eval $ac_compile) 2>&5
ac_status=$?
@@ -9133,7 +9130,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<EOF
-#line 9136 "configure"
+#line 9133 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -9233,7 +9230,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<EOF
-#line 9236 "configure"
+#line 9233 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -11576,11 +11573,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:11579: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:11576: $lt_compile\"" >&5)
(eval "$lt_compile" 2>conftest.err)
ac_status=$?
cat conftest.err >&5
- echo "$as_me:11583: \$? = $ac_status" >&5
+ echo "$as_me:11580: \$? = $ac_status" >&5
if (exit $ac_status) && test -s "$ac_outfile"; then
# The compiler can only warn and ignore the option if not recognized
# So say no if there are warnings other than the usual output.
@@ -11680,11 +11677,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:11683: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:11680: $lt_compile\"" >&5)
(eval "$lt_compile" 2>out/conftest.err)
ac_status=$?
cat out/conftest.err >&5
- echo "$as_me:11687: \$? = $ac_status" >&5
+ echo "$as_me:11684: \$? = $ac_status" >&5
if (exit $ac_status) && test -s out/conftest2.$ac_objext
then
# The compiler can only warn and ignore the option if not recognized
@@ -12216,7 +12213,7 @@ linux*)
libsuff=
case "$host_cpu" in
x86_64*|s390x*|powerpc64*)
- echo '#line 12219 "configure"' > conftest.$ac_ext
+ echo '#line 12216 "configure"' > conftest.$ac_ext
if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
(eval $ac_compile) 2>&5
ac_status=$?
@@ -13274,11 +13271,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:13277: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:13274: $lt_compile\"" >&5)
(eval "$lt_compile" 2>conftest.err)
ac_status=$?
cat conftest.err >&5
- echo "$as_me:13281: \$? = $ac_status" >&5
+ echo "$as_me:13278: \$? = $ac_status" >&5
if (exit $ac_status) && test -s "$ac_outfile"; then
# The compiler can only warn and ignore the option if not recognized
# So say no if there are warnings other than the usual output.
@@ -13378,11 +13375,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:13381: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:13378: $lt_compile\"" >&5)
(eval "$lt_compile" 2>out/conftest.err)
ac_status=$?
cat out/conftest.err >&5
- echo "$as_me:13385: \$? = $ac_status" >&5
+ echo "$as_me:13382: \$? = $ac_status" >&5
if (exit $ac_status) && test -s out/conftest2.$ac_objext
then
# The compiler can only warn and ignore the option if not recognized
@@ -14827,7 +14824,7 @@ linux*)
libsuff=
case "$host_cpu" in
x86_64*|s390x*|powerpc64*)
- echo '#line 14830 "configure"' > conftest.$ac_ext
+ echo '#line 14827 "configure"' > conftest.$ac_ext
if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
(eval $ac_compile) 2>&5
ac_status=$?
@@ -15605,11 +15602,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:15608: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:15605: $lt_compile\"" >&5)
(eval "$lt_compile" 2>conftest.err)
ac_status=$?
cat conftest.err >&5
- echo "$as_me:15612: \$? = $ac_status" >&5
+ echo "$as_me:15609: \$? = $ac_status" >&5
if (exit $ac_status) && test -s "$ac_outfile"; then
# The compiler can only warn and ignore the option if not recognized
# So say no if there are warnings other than the usual output.
@@ -15873,11 +15870,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:15876: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:15873: $lt_compile\"" >&5)
(eval "$lt_compile" 2>conftest.err)
ac_status=$?
cat conftest.err >&5
- echo "$as_me:15880: \$? = $ac_status" >&5
+ echo "$as_me:15877: \$? = $ac_status" >&5
if (exit $ac_status) && test -s "$ac_outfile"; then
# The compiler can only warn and ignore the option if not recognized
# So say no if there are warnings other than the usual output.
@@ -15977,11 +15974,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:15980: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:15977: $lt_compile\"" >&5)
(eval "$lt_compile" 2>out/conftest.err)
ac_status=$?
cat out/conftest.err >&5
- echo "$as_me:15984: \$? = $ac_status" >&5
+ echo "$as_me:15981: \$? = $ac_status" >&5
if (exit $ac_status) && test -s out/conftest2.$ac_objext
then
# The compiler can only warn and ignore the option if not recognized
@@ -17446,7 +17443,7 @@ linux*)
libsuff=
case "$host_cpu" in
x86_64*|s390x*|powerpc64*)
- echo '#line 17449 "configure"' > conftest.$ac_ext
+ echo '#line 17446 "configure"' > conftest.$ac_ext
if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
(eval $ac_compile) 2>&5
ac_status=$?
@@ -23884,7 +23881,7 @@ _ASBOX
} >&5
cat >&5 <<_CSEOF
-This file was extended by hunspell $as_me 1.2.1, which was
+This file was extended by hunspell $as_me 1.2.2b, which was
generated by GNU Autoconf 2.59. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -23947,7 +23944,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF
ac_cs_version="\\
-hunspell config.status 1.2.1
+hunspell config.status 1.2.2b
configured by $0, generated by GNU Autoconf 2.59,
with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\"
diff --git a/configure.ac b/configure.ac
index 61083a4..19ec498 100644
--- a/configure.ac
+++ b/configure.ac
@@ -4,15 +4,12 @@
m4_pattern_allow
AC_PREREQ(2.59)
-AC_INIT([hunspell],[1.2.1],[nemeth@openoffice.org])
+AC_INIT([hunspell],[1.2.2b],[nemeth@openoffice.org])
AC_CANONICAL_SYSTEM
-case "$host" in
-arm*) XFAILED="flagutf8.test maputf.test";;
-esac
AC_SUBST(XFAILED)
-AM_INIT_AUTOMAKE(hunspell, 1.2.1)
+AM_INIT_AUTOMAKE(hunspell, 1.2.2b)
HUNSPELL_VERSION_MAJOR=`echo $VERSION | cut -d"." -f1`
HUNSPELL_VERSION_MINOR=`echo $VERSION | cut -d"." -f2`
AC_SUBST(HUNSPELL_VERSION_MAJOR)
diff --git a/hunspell.pc.in b/hunspell.pc.in
index 2b85618..b9f51a2 100644
--- a/hunspell.pc.in
+++ b/hunspell.pc.in
@@ -1,4 +1,4 @@
-prefix=/usr
+prefix=@prefix@
exec_prefix=${prefix}
libdir=${prefix}/lib
includedir=${prefix}/include
diff --git a/po/hu.gmo b/po/hu.gmo
index a004fec..10d4fdb 100644
Binary files a/po/hu.gmo and b/po/hu.gmo differ
diff --git a/po/hu.po b/po/hu.po
index 6271e70..037f6a2 100644
--- a/po/hu.po
+++ b/po/hu.po
@@ -306,12 +306,12 @@ msgstr " -w\t\tki
#, fuzzy, c-format
msgid "Example: hunspell -d english file.txt # interactive spelling\n"
msgstr ""
-"P�lda: hunspell -d en_US f�jl.txt # interakt�v helyes�r�s-ellen�rz�s\n"
+"P�ld�k: hunspell -d en_US f�jl.txt # interakt�v helyes�r�s-ellen�rz�s\n"
#: src/hunspell/hunspell.cxx:1008
#, c-format
msgid " hunspell -l file.txt # print misspelled words\n"
-msgstr " hunspell -l f�jl.txt # ki�rja a hib�s szavakat\n"
+msgstr " hunspell -l f�jl.txt # ki�rja a hib�s szavakat\n"
#: src/hunspell/hunspell.cxx:1009
#, c-format
@@ -329,7 +329,7 @@ msgstr ""
#: src/hunspell/hunspell.cxx:1019
#, fuzzy, c-format
msgid "Copyright (C) 2002-2005 Nemeth Laszlo. License: GNU LGPL.\n"
-msgstr "Copyright (C) 2002-2007 N�meth L�szl�. Licenc: GNU LGPL.\n"
+msgstr "Copyright (C) 2002-2008 N�meth L�szl�. Licenc: MPL/GPL/LGPL.\n"
#: src/hunspell/hunspell.cxx:1021
#, c-format
@@ -373,16 +373,21 @@ msgstr "Nem lehet megnyitni a ragoz
msgid "Hunspell has been compiled without Ncurses user interface.\n"
msgstr "A Hunspell Ncurses felhaszn�l�i fel�let n�lk�l lett ford�tva.\n"
+#~ msgid " --check-url\tCheck URLs, e-mail addresses and directory paths\n"
+#~ msgstr " --check-url\tURL-ek, lev�lc�mek �s �tvonalak ellen�rz�se\n"
+
+#~ msgid " hunspell -i utf-8 file.txt # check UTF-8 encoded file\n"
+#~ msgstr ""
+#~ " hunspell -i utf-8 f�jl.txt # UTF-8-as �llom�ny ellen�rz�se\n"
+
+#~ msgid " -P password\tset password for encrypted dictionaries\n"
+#~ msgstr " -P jelsz�\tjelsz� megad�sa a titkos�tott sz�t�rakhoz\n"
+
+#~ msgid "Bug reports: http://hunspell.sourceforge.net\n"
+#~ msgstr "Hibajelz�s: http://hunspell.sourceforge.net\n"
+
#~ msgid " -D\t\tshow detected path of the dictionary\n"
#~ msgstr " -D\t\tki�rja a bet�lt�tt sz�t�r �tvonal�t\n"
#~ msgid " -i enc\tinput encoding\n"
#~ msgstr " -i k�d\tbemeneti karakterk�dol�s\n"
-
-#~ msgid " hunspell -i utf8 file.txt # check UTF-8 encoded file\n"
-#~ msgstr ""
-#~ " hunspell -i utf8 f�jl.txt # UTF-8 k�dol�s� �llom�nyt "
-#~ "ellen�riz\n"
-
-#~ msgid "Bug reports: http://hunspell.sourceforge.net\n"
-#~ msgstr "Hibabejelent�s: http://hunspell.sourceforge.net\n"
diff --git a/src/hunspell/Makefile.am b/src/hunspell/Makefile.am
index cefd66b..f83b0a9 100644
--- a/src/hunspell/Makefile.am
+++ b/src/hunspell/Makefile.am
@@ -1,15 +1,15 @@
-libhunspell_la_LDFLAGS = -version-info 1:1:0
+#libhunspell_la_LDFLAGS = -version-info x:x:x
-lib_LTLIBRARIES = libhunspell.la
-libhunspell_includedir = $(includedir)/hunspell
-libhunspell_la_SOURCES=affentry.cxx affixmgr.cxx csutil.cxx \
+lib_LTLIBRARIES = libhunspell-1.2.la
+libhunspell_1_2_includedir = $(includedir)/hunspell
+libhunspell_1_2_la_SOURCES=affentry.cxx affixmgr.cxx csutil.cxx \
dictmgr.cxx hashmgr.cxx hunspell.cxx utf_info.cxx \
suggestmgr.cxx license.myspell license.hunspell \
- phonet.cxx
+ phonet.cxx filemgr.cxx hunzip.cxx
-libhunspell_include_HEADERS=affentry.hxx htypes.hxx affixmgr.hxx \
+libhunspell_1_2_include_HEADERS=affentry.hxx htypes.hxx affixmgr.hxx \
csutil.hxx hunspell.hxx atypes.hxx dictmgr.hxx hunspell.h \
suggestmgr.hxx baseaffix.hxx hashmgr.hxx langnum.hxx \
- phonet.hxx
+ phonet.hxx filemgr.hxx hunzip.hxx
EXTRA_DIST=hunspell.dsp makefile.mk README
diff --git a/src/hunspell/Makefile.in b/src/hunspell/Makefile.in
index a700fd8..8e6f837 100644
--- a/src/hunspell/Makefile.in
+++ b/src/hunspell/Makefile.in
@@ -14,6 +14,8 @@
@SET_MAKE@
+#libhunspell_la_LDFLAGS = -version-info x:x:x
+
srcdir = @srcdir@
top_srcdir = @top_srcdir@
@@ -39,7 +41,7 @@ build_triplet = @build@
host_triplet = @host@
target_triplet = @target@
subdir = src/hunspell
-DIST_COMMON = README $(libhunspell_include_HEADERS) \
+DIST_COMMON = README $(libhunspell_1_2_include_HEADERS) \
$(srcdir)/Makefile.am $(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/codeset.m4 \
@@ -60,14 +62,14 @@ am__vpath_adj = case $$p in \
esac;
am__strip_dir = `echo $$p | sed -e 's|^.*/||'`;
am__installdirs = "$(DESTDIR)$(libdir)" \
- "$(DESTDIR)$(libhunspell_includedir)"
+ "$(DESTDIR)$(libhunspell_1_2_includedir)"
libLTLIBRARIES_INSTALL = $(INSTALL)
LTLIBRARIES = $(lib_LTLIBRARIES)
-libhunspell_la_LIBADD =
-am_libhunspell_la_OBJECTS = affentry.lo affixmgr.lo csutil.lo \
+libhunspell_1_2_la_LIBADD =
+am_libhunspell_1_2_la_OBJECTS = affentry.lo affixmgr.lo csutil.lo \
dictmgr.lo hashmgr.lo hunspell.lo utf_info.lo suggestmgr.lo \
- phonet.lo
-libhunspell_la_OBJECTS = $(am_libhunspell_la_OBJECTS)
+ phonet.lo filemgr.lo hunzip.lo
+libhunspell_1_2_la_OBJECTS = $(am_libhunspell_1_2_la_OBJECTS)
DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)
depcomp = $(SHELL) $(top_srcdir)/depcomp
am__depfiles_maybe = depfiles
@@ -79,10 +81,10 @@ LTCXXCOMPILE = $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) \
CXXLD = $(CXX)
CXXLINK = $(LIBTOOL) --tag=CXX --mode=link $(CXXLD) $(AM_CXXFLAGS) \
$(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
-SOURCES = $(libhunspell_la_SOURCES)
-DIST_SOURCES = $(libhunspell_la_SOURCES)
-libhunspell_includeHEADERS_INSTALL = $(INSTALL_HEADER)
-HEADERS = $(libhunspell_include_HEADERS)
+SOURCES = $(libhunspell_1_2_la_SOURCES)
+DIST_SOURCES = $(libhunspell_1_2_la_SOURCES)
+libhunspell_1_2_includeHEADERS_INSTALL = $(INSTALL_HEADER)
+HEADERS = $(libhunspell_1_2_include_HEADERS)
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
@@ -215,18 +217,17 @@ target_alias = @target_alias@
target_cpu = @target_cpu@
target_os = @target_os@
target_vendor = @target_vendor@
-libhunspell_la_LDFLAGS = -version-info 1:1:0
-lib_LTLIBRARIES = libhunspell.la
-libhunspell_includedir = $(includedir)/hunspell
-libhunspell_la_SOURCES = affentry.cxx affixmgr.cxx csutil.cxx \
+lib_LTLIBRARIES = libhunspell-1.2.la
+libhunspell_1_2_includedir = $(includedir)/hunspell
+libhunspell_1_2_la_SOURCES = affentry.cxx affixmgr.cxx csutil.cxx \
dictmgr.cxx hashmgr.cxx hunspell.cxx utf_info.cxx \
suggestmgr.cxx license.myspell license.hunspell \
- phonet.cxx
+ phonet.cxx filemgr.cxx hunzip.cxx
-libhunspell_include_HEADERS = affentry.hxx htypes.hxx affixmgr.hxx \
+libhunspell_1_2_include_HEADERS = affentry.hxx htypes.hxx affixmgr.hxx \
csutil.hxx hunspell.hxx atypes.hxx dictmgr.hxx hunspell.h \
suggestmgr.hxx baseaffix.hxx hashmgr.hxx langnum.hxx \
- phonet.hxx
+ phonet.hxx filemgr.hxx hunzip.hxx
EXTRA_DIST = hunspell.dsp makefile.mk README
all: all-am
@@ -289,8 +290,8 @@ clean-libLTLIBRARIES:
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-libhunspell.la: $(libhunspell_la_OBJECTS) $(libhunspell_la_DEPENDENCIES)
- $(CXXLINK) -rpath $(libdir) $(libhunspell_la_LDFLAGS) $(libhunspell_la_OBJECTS) $(libhunspell_la_LIBADD) $(LIBS)
+libhunspell-1.2.la: $(libhunspell_1_2_la_OBJECTS) $(libhunspell_1_2_la_DEPENDENCIES)
+ $(CXXLINK) -rpath $(libdir) $(libhunspell_1_2_la_LDFLAGS) $(libhunspell_1_2_la_OBJECTS) $(libhunspell_1_2_la_LIBADD) $(LIBS)
mostlyclean-compile:
-rm -f *.$(OBJEXT)
@@ -302,8 +303,10 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/affixmgr.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/csutil.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dictmgr.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/filemgr.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hashmgr.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hunspell.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hunzip.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/phonet.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/suggestmgr.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utf_info.Plo@am__quote@
@@ -338,22 +341,22 @@ clean-libtool:
distclean-libtool:
-rm -f libtool
uninstall-info-am:
-install-libhunspell_includeHEADERS: $(libhunspell_include_HEADERS)
+install-libhunspell_1_2_includeHEADERS: $(libhunspell_1_2_include_HEADERS)
@$(NORMAL_INSTALL)
- test -z "$(libhunspell_includedir)" || $(mkdir_p) "$(DESTDIR)$(libhunspell_includedir)"
- @list='$(libhunspell_include_HEADERS)'; for p in $$list; do \
+ test -z "$(libhunspell_1_2_includedir)" || $(mkdir_p) "$(DESTDIR)$(libhunspell_1_2_includedir)"
+ @list='$(libhunspell_1_2_include_HEADERS)'; for p in $$list; do \
if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
f=$(am__strip_dir) \
- echo " $(libhunspell_includeHEADERS_INSTALL) '$$d$$p' '$(DESTDIR)$(libhunspell_includedir)/$$f'"; \
- $(libhunspell_includeHEADERS_INSTALL) "$$d$$p" "$(DESTDIR)$(libhunspell_includedir)/$$f"; \
+ echo " $(libhunspell_1_2_includeHEADERS_INSTALL) '$$d$$p' '$(DESTDIR)$(libhunspell_1_2_includedir)/$$f'"; \
+ $(libhunspell_1_2_includeHEADERS_INSTALL) "$$d$$p" "$(DESTDIR)$(libhunspell_1_2_includedir)/$$f"; \
done
-uninstall-libhunspell_includeHEADERS:
+uninstall-libhunspell_1_2_includeHEADERS:
@$(NORMAL_UNINSTALL)
- @list='$(libhunspell_include_HEADERS)'; for p in $$list; do \
+ @list='$(libhunspell_1_2_include_HEADERS)'; for p in $$list; do \
f=$(am__strip_dir) \
- echo " rm -f '$(DESTDIR)$(libhunspell_includedir)/$$f'"; \
- rm -f "$(DESTDIR)$(libhunspell_includedir)/$$f"; \
+ echo " rm -f '$(DESTDIR)$(libhunspell_1_2_includedir)/$$f'"; \
+ rm -f "$(DESTDIR)$(libhunspell_1_2_includedir)/$$f"; \
done
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
@@ -435,7 +438,7 @@ check-am: all-am
check: check-am
all-am: Makefile $(LTLIBRARIES) $(HEADERS)
installdirs:
- for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(libhunspell_includedir)"; do \
+ for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(libhunspell_1_2_includedir)"; do \
test -z "$$dir" || $(mkdir_p) "$$dir"; \
done
install: install-am
@@ -483,7 +486,7 @@ info: info-am
info-am:
-install-data-am: install-libhunspell_includeHEADERS
+install-data-am: install-libhunspell_1_2_includeHEADERS
install-exec-am: install-libLTLIBRARIES
@@ -512,7 +515,7 @@ ps: ps-am
ps-am:
uninstall-am: uninstall-info-am uninstall-libLTLIBRARIES \
- uninstall-libhunspell_includeHEADERS
+ uninstall-libhunspell_1_2_includeHEADERS
.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
clean-libLTLIBRARIES clean-libtool ctags distclean \
@@ -520,13 +523,13 @@ uninstall-am: uninstall-info-am uninstall-libLTLIBRARIES \
distclean-tags distdir dvi dvi-am html html-am info info-am \
install install-am install-data install-data-am install-exec \
install-exec-am install-info install-info-am \
- install-libLTLIBRARIES install-libhunspell_includeHEADERS \
+ install-libLTLIBRARIES install-libhunspell_1_2_includeHEADERS \
install-man install-strip installcheck installcheck-am \
installdirs maintainer-clean maintainer-clean-generic \
mostlyclean mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
uninstall-am uninstall-info-am uninstall-libLTLIBRARIES \
- uninstall-libhunspell_includeHEADERS
+ uninstall-libhunspell_1_2_includeHEADERS
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
diff --git a/src/hunspell/affentry.cxx b/src/hunspell/affentry.cxx
index 0ffe557..fd737e0 100644
--- a/src/hunspell/affentry.cxx
+++ b/src/hunspell/affentry.cxx
@@ -470,14 +470,14 @@ inline int SfxEntry::test_condition(const char * st, const char * beg)
if (numconds == 0) return 1;
char * p = c.conds;
st--;
- int c = 1;
+ int i = 1;
while (1) {
switch (*p) {
case '\0': return 1;
case '[': { p = nextchar(p); pos = st; break; }
case '^': { p = nextchar(p); neg = true; break; }
case ']': { if (!neg && !ingroup) return 0;
- c++;
+ i++;
pos = NULL;
neg = false;
ingroup = false;
@@ -515,17 +515,17 @@ inline int SfxEntry::test_condition(const char * st, const char * beg)
}
if (pos && st != pos) {
if (neg) return 0;
- else if (c == numconds) return 1;
+ else if (i == numconds) return 1;
ingroup = true;
}
if (p && *p != '\0') p = nextchar(p);
} else if (pos) {
if (neg) return 0;
- else if (c == numconds) return 1;
+ else if (i == numconds) return 1;
ingroup = true;
}
if (!pos) {
- c++;
+ i++;
st--;
if (st < beg && p && *p != '\0') return 0; // word <= condition
}
diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx
index d3e36be..251d7ae 100644
--- a/src/hunspell/affixmgr.cxx
+++ b/src/hunspell/affixmgr.cxx
@@ -25,10 +25,12 @@ using namespace std;
#endif
#endif
-AffixMgr::AffixMgr(const char * affpath, HashMgr* ptr)
+AffixMgr::AffixMgr(const char * affpath, HashMgr** ptr, int * md, const char * key)
{
// register hash manager and load affix data from aff file
- pHMgr = ptr;
+ pHMgr = ptr[0];
+ alldic = ptr;
+ maxdic = md;
keystring = NULL;
trystring = NULL;
encoding=NULL;
@@ -107,7 +109,7 @@ AffixMgr::AffixMgr(const char * affpath, HashMgr* ptr)
contclasses[j] = 0;
}
- if (parse_file(affpath)) {
+ if (parse_file(affpath, key)) {
HUNSPELL_WARNING(stderr, "Failure loading aff file %s\n",affpath);
}
@@ -244,14 +246,10 @@ AffixMgr::~AffixMgr()
// read in aff file and build up prefix and suffix entry objects
-int AffixMgr::parse_file(const char * affpath)
+int AffixMgr::parse_file(const char * affpath, const char * key)
{
-
- // io buffers
- char line[MAXLNLEN+1];
-
- // affix type
- char ft;
+ char * line; // io buffers
+ char ft; // affix type
// checking flag duplication
char dupflags[CONTSIZE];
@@ -261,8 +259,7 @@ int AffixMgr::parse_file(const char * affpath)
int firstline = 1;
// open the affix file
- FILE * afflst;
- afflst = fopen(affpath,"r");
+ FileMgr * afflst = new FileMgr(affpath, key);
if (!afflst) {
HUNSPELL_WARNING(stderr, "error: could not open affix description file %s\n",affpath);
return 1;
@@ -271,10 +268,9 @@ int AffixMgr::parse_file(const char * affpath)
// step one is to parse the affix file building up the internal
// affix data structures
-
// read in each line ignoring any that do not
// start with a known line type indicator
- while (fgets(line,MAXLNLEN,afflst)) {
+ while (line = afflst->getline()) {
mychomp(line);
/* remove byte order mark */
@@ -289,7 +285,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the keyboard string */
if (strncmp(line,"KEY",3) == 0) {
if (parse_string(line, &keystring, "KEY")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -297,7 +293,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the try string */
if (strncmp(line,"TRY",3) == 0) {
if (parse_string(line, &trystring, "TRY")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -305,7 +301,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the name of the character set used by the .dict and .aff */
if (strncmp(line,"SET",3) == 0) {
if (parse_string(line, &encoding, "SET")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
if (strcmp(encoding, "UTF-8") == 0) {
@@ -325,7 +321,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the flag used by the controlled compound words */
if (strncmp(line,"COMPOUNDFLAG",12) == 0) {
if (parse_flag(line, &compoundflag, "COMPOUNDFLAG")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -334,12 +330,12 @@ int AffixMgr::parse_file(const char * affpath)
if (strncmp(line,"COMPOUNDBEGIN",13) == 0) {
if (complexprefixes) {
if (parse_flag(line, &compoundend, "COMPOUNDBEGIN")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
} else {
if (parse_flag(line, &compoundbegin, "COMPOUNDBEGIN")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -348,7 +344,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the flag used by compound words */
if (strncmp(line,"COMPOUNDMIDDLE",14) == 0) {
if (parse_flag(line, &compoundmiddle, "COMPOUNDMIDDLE")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -356,12 +352,12 @@ int AffixMgr::parse_file(const char * affpath)
if (strncmp(line,"COMPOUNDEND",11) == 0) {
if (complexprefixes) {
if (parse_flag(line, &compoundbegin, "COMPOUNDEND")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
} else {
if (parse_flag(line, &compoundend, "COMPOUNDEND")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -370,7 +366,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the data used by compound_check() method */
if (strncmp(line,"COMPOUNDWORDMAX",15) == 0) {
if (parse_num(line, &cpdwordmax, "COMPOUNDWORDMAX")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -378,7 +374,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the flag sign compounds in dictionary */
if (strncmp(line,"COMPOUNDROOT",12) == 0) {
if (parse_flag(line, &compoundroot, "COMPOUNDROOT")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -386,7 +382,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the flag used by compound_check() method */
if (strncmp(line,"COMPOUNDPERMITFLAG",18) == 0) {
if (parse_flag(line, &compoundpermitflag, "COMPOUNDPERMITFLAG")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -394,7 +390,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the flag used by compound_check() method */
if (strncmp(line,"COMPOUNDFORBIDFLAG",18) == 0) {
if (parse_flag(line, &compoundforbidflag, "COMPOUNDFORBIDFLAG")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -417,7 +413,7 @@ int AffixMgr::parse_file(const char * affpath)
if (strncmp(line,"NOSUGGEST",9) == 0) {
if (parse_flag(line, &nosuggest, "NOSUGGEST")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -425,7 +421,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the flag used by forbidden words */
if (strncmp(line,"FORBIDDENWORD",13) == 0) {
if (parse_flag(line, &forbiddenword, "FORBIDDENWORD")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -433,7 +429,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the flag used by forbidden words */
if (strncmp(line,"LEMMA_PRESENT",13) == 0) {
if (parse_flag(line, &lemma_present, "LEMMA_PRESENT")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -441,7 +437,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the flag used by circumfixes */
if (strncmp(line,"CIRCUMFIX",9) == 0) {
if (parse_flag(line, &circumfix, "CIRCUMFIX")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -449,7 +445,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the flag used by fogemorphemes */
if (strncmp(line,"ONLYINCOMPOUND",14) == 0) {
if (parse_flag(line, &onlyincompound, "ONLYINCOMPOUND")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -457,7 +453,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the flag used by `needaffixs' */
if (strncmp(line,"PSEUDOROOT",10) == 0) {
if (parse_flag(line, &needaffix, "PSEUDOROOT")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -465,7 +461,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the flag used by `needaffixs' */
if (strncmp(line,"NEEDAFFIX",9) == 0) {
if (parse_flag(line, &needaffix, "NEEDAFFIX")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -473,7 +469,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the minimal length for words in compounds */
if (strncmp(line,"COMPOUNDMIN",11) == 0) {
if (parse_num(line, &cpdmin, "COMPOUNDMIN")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
if (cpdmin < 1) cpdmin = 1;
@@ -482,7 +478,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the max. words and syllables in compounds */
if (strncmp(line,"COMPOUNDSYLLABLE",16) == 0) {
if (parse_cpdsyllable(line)) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -490,7 +486,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the flag used by compound_check() method */
if (strncmp(line,"SYLLABLENUM",11) == 0) {
if (parse_string(line, &cpdsyllablenum, "SYLLABLENUM")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -503,7 +499,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the extra word characters */
if (strncmp(line,"WORDCHARS",9) == 0) {
if (parse_array(line, &wordchars, &wordchars_utf16, &wordchars_utf16_len, "WORDCHARS", utf8)) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -511,7 +507,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the ignored characters (for example, Arabic optional diacretics charachters */
if (strncmp(line,"IGNORE",6) == 0) {
if (parse_array(line, &ignorechars, &ignorechars_utf16, &ignorechars_utf16_len, "IGNORE", utf8)) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -519,7 +515,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the typical fault correcting table */
if (strncmp(line,"REP",3) == 0) {
if (parse_reptable(line, afflst)) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -527,7 +523,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the phonetic translation table */
if (strncmp(line,"PHONE",5) == 0) {
if (parse_phonetable(line, afflst)) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -535,7 +531,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the checkcompoundpattern table */
if (strncmp(line,"CHECKCOMPOUNDPATTERN",20) == 0) {
if (parse_checkcpdtable(line, afflst)) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -543,7 +539,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the defcompound table */
if (strncmp(line,"COMPOUNDRULE",12) == 0) {
if (parse_defcpdtable(line, afflst)) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -551,7 +547,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the related character map table */
if (strncmp(line,"MAP",3) == 0) {
if (parse_maptable(line, afflst)) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -559,7 +555,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the word breakpoints table */
if (strncmp(line,"BREAK",5) == 0) {
if (parse_breaktable(line, afflst)) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -567,7 +563,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the language for language specific codes */
if (strncmp(line,"LANG",4) == 0) {
if (parse_string(line, &lang, "LANG")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
langnum = get_lang_num(lang);
@@ -575,14 +571,14 @@ int AffixMgr::parse_file(const char * affpath)
if (strncmp(line,"VERSION",7) == 0) {
if (parse_string(line, &version, "VERSION")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
if (strncmp(line,"MAXNGRAMSUGS",12) == 0) {
if (parse_num(line, &maxngramsugs, "MAXNGRAMSUGS")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -598,7 +594,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the flag used by forbidden words */
if (strncmp(line,"KEEPCASE",8) == 0) {
if (parse_flag(line, &keepcase, "KEEPCASE")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -606,7 +602,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the flag used by the affix generator */
if (strncmp(line,"SUBSTANDARD",11) == 0) {
if (parse_flag(line, &substandard, "SUBSTANDARD")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -625,7 +621,7 @@ int AffixMgr::parse_file(const char * affpath)
dupflags_ini = 0;
}
if (parse_affix(line, ft, afflst, dupflags)) {
- fclose(afflst);
+ delete afflst;
process_pfx_tree_to_list();
process_sfx_tree_to_list();
return 1;
@@ -633,7 +629,7 @@ int AffixMgr::parse_file(const char * affpath)
}
}
- fclose(afflst);
+ delete afflst;
// convert affix trees to sorted list
process_pfx_tree_to_list();
@@ -2731,7 +2727,7 @@ char * AffixMgr::morphgen(char * ts, int wl, const unsigned short * ap,
if (cmp == 0) {
char * newword = sptr->add(ts, wl);
if (newword) {
- hentry * check = pHMgr->lookup(newword);
+ hentry * check = pHMgr->lookup(newword); // XXX extra dic
if (!check || !check->astr ||
!TESTAFF(check->astr, forbiddenword, check->alen)) {
return newword;
@@ -2767,7 +2763,7 @@ char * AffixMgr::morphgen(char * ts, int wl, const unsigned short * ap,
int AffixMgr::expand_rootword(struct guessword * wlst, int maxn, const char * ts,
int wl, const unsigned short * ap, unsigned short al, char * bad, int badl,
- char * phone)
+ char * phon)
{
int nh=0;
// first add root word to list
@@ -2778,8 +2774,8 @@ int AffixMgr::expand_rootword(struct guessword * wlst, int maxn, const char * ts
wlst[nh].orig = NULL;
nh++;
// add special phonetic version
- if (phone && (nh < maxn)) {
- wlst[nh].word = mystrdup(phone);
+ if (phon && (nh < maxn)) {
+ wlst[nh].word = mystrdup(phon);
wlst[nh].allow = (1 == 0);
wlst[nh].orig = mystrdup(ts);
nh++;
@@ -2809,11 +2805,11 @@ int AffixMgr::expand_rootword(struct guessword * wlst, int maxn, const char * ts
wlst[nh].orig = NULL;
nh++;
// add special phonetic version
- if (phone && (nh < maxn)) {
+ if (phon && (nh < maxn)) {
char st[MAXWORDUTF8LEN];
- strcpy(st, phone);
+ strcpy(st, phon);
strcat(st, sptr->getKey());
- reverseword(st + strlen(phone));
+ reverseword(st + strlen(phon));
wlst[nh].word = mystrdup(st);
wlst[nh].allow = (1 == 0);
wlst[nh].orig = mystrdup(newword);
@@ -3097,8 +3093,12 @@ FLAG AffixMgr::get_lemma_present()
// utility method to look up root words in hash table
struct hentry * AffixMgr::lookup(const char * word)
{
- if (! pHMgr) return NULL;
- return pHMgr->lookup(word);
+ int i;
+ struct hentry * he = NULL;
+ for (i = 0; i < *maxdic && !he; i++) {
+ he = (alldic[i])->lookup(word);
+ }
+ return he;
}
// return the value of suffix
@@ -3203,7 +3203,7 @@ int AffixMgr::parse_cpdsyllable(char * line)
}
/* parse in the typical fault correcting table */
-int AffixMgr::parse_reptable(char * line, FILE * af)
+int AffixMgr::parse_reptable(char * line, FileMgr * af)
{
if (numrep != 0) {
HUNSPELL_WARNING(stderr, "error: duplicate REP tables used\n");
@@ -3243,9 +3243,9 @@ int AffixMgr::parse_reptable(char * line, FILE * af)
}
/* now parse the numrep lines to read in the remainder of the table */
- char * nl = line;
+ char * nl;
for (int j=0; j < numrep; j++) {
- if (!fgets(nl,MAXLNLEN,af)) return 1;
+ if (!(nl = af->getline())) return 1;
mychomp(nl);
tp = nl;
i = 0;
@@ -3283,7 +3283,7 @@ int AffixMgr::parse_reptable(char * line, FILE * af)
}
/* parse in the typical fault correcting table */
-int AffixMgr::parse_phonetable(char * line, FILE * af)
+int AffixMgr::parse_phonetable(char * line, FileMgr * af)
{
if (phone) {
HUNSPELL_WARNING(stderr, "error: duplicate PHONE tables used\n");
@@ -3327,9 +3327,9 @@ int AffixMgr::parse_phonetable(char * line, FILE * af)
}
/* now parse the phone->num lines to read in the remainder of the table */
- char * nl = line;
+ char * nl;
for (int j=0; j < phone->num; j++) {
- if (!fgets(nl,MAXLNLEN,af)) return 1;
+ if (!(nl = af->getline())) return 1;
mychomp(nl);
tp = nl;
i = 0;
@@ -3370,7 +3370,7 @@ int AffixMgr::parse_phonetable(char * line, FILE * af)
}
/* parse in the checkcompoundpattern table */
-int AffixMgr::parse_checkcpdtable(char * line, FILE * af)
+int AffixMgr::parse_checkcpdtable(char * line, FileMgr * af)
{
if (numcheckcpd != 0) {
HUNSPELL_WARNING(stderr, "error: duplicate compound pattern tables used\n");
@@ -3410,9 +3410,9 @@ int AffixMgr::parse_checkcpdtable(char * line, FILE * af)
}
/* now parse the numcheckcpd lines to read in the remainder of the table */
- char * nl = line;
+ char * nl;
for (int j=0; j < numcheckcpd; j++) {
- if (!fgets(nl,MAXLNLEN,af)) return 1;
+ if (!(nl = af->getline())) return 1;
mychomp(nl);
tp = nl;
i = 0;
@@ -3450,7 +3450,7 @@ int AffixMgr::parse_checkcpdtable(char * line, FILE * af)
}
/* parse in the compound rule table */
-int AffixMgr::parse_defcpdtable(char * line, FILE * af)
+int AffixMgr::parse_defcpdtable(char * line, FileMgr * af)
{
if (numdefcpd != 0) {
HUNSPELL_WARNING(stderr, "error: duplicate compound rule tables used\n");
@@ -3490,9 +3490,9 @@ int AffixMgr::parse_defcpdtable(char * line, FILE * af)
}
/* now parse the numdefcpd lines to read in the remainder of the table */
- char * nl = line;
+ char * nl;
for (int j=0; j < numdefcpd; j++) {
- if (!fgets(nl,MAXLNLEN,af)) return 1;
+ if (!(nl = af->getline())) return 1;
mychomp(nl);
tp = nl;
i = 0;
@@ -3533,7 +3533,7 @@ int AffixMgr::parse_defcpdtable(char * line, FILE * af)
/* parse in the character map table */
-int AffixMgr::parse_maptable(char * line, FILE * af)
+int AffixMgr::parse_maptable(char * line, FileMgr * af)
{
if (nummap != 0) {
HUNSPELL_WARNING(stderr, "error: duplicate MAP tables used\n");
@@ -3573,9 +3573,9 @@ int AffixMgr::parse_maptable(char * line, FILE * af)
}
/* now parse the nummap lines to read in the remainder of the table */
- char * nl = line;
+ char * nl;
for (int j=0; j < nummap; j++) {
- if (!fgets(nl,MAXLNLEN,af)) return 1;
+ if (!(nl = af->getline())) return 1;
mychomp(nl);
tp = nl;
i = 0;
@@ -3630,7 +3630,7 @@ int AffixMgr::parse_maptable(char * line, FILE * af)
}
/* parse in the word breakpoint table */
-int AffixMgr::parse_breaktable(char * line, FILE * af)
+int AffixMgr::parse_breaktable(char * line, FileMgr * af)
{
if (numbreak != 0) {
HUNSPELL_WARNING(stderr, "error: duplicate word breakpoint tables used\n");
@@ -3670,9 +3670,9 @@ int AffixMgr::parse_breaktable(char * line, FILE * af)
}
/* now parse the numbreak lines to read in the remainder of the table */
- char * nl = line;
+ char * nl;
for (int j=0; j < numbreak; j++) {
- if (!fgets(nl,MAXLNLEN,af)) return 1;
+ if (!(nl = af->getline())) return 1;
mychomp(nl);
tp = nl;
i = 0;
@@ -3734,7 +3734,7 @@ void AffixMgr::reverse_condition(char * piece) {
}
}
-int AffixMgr::parse_affix(char * line, const char at, FILE * af, char * dupflags)
+int AffixMgr::parse_affix(char * line, const char at, FileMgr * af, char * dupflags)
{
int numents = 0; // number of affentry structures to parse
@@ -3745,7 +3745,7 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af, char * dupflag
struct affentry * nptr= NULL;
char * tp = line;
- char * nl = line;
+ char * nl;
char * piece;
int i = 0;
@@ -3820,7 +3820,7 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af, char * dupflag
// now parse numents affentries for this affix
for (int j=0; j < numents; j++) {
- if (!fgets(nl,MAXLNLEN,af)) return 1;
+ if (!(nl = af->getline())) return 1;
mychomp(nl);
tp = nl;
i = 0;
@@ -3898,6 +3898,7 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af, char * dupflag
if (pHMgr->is_aliasf()) {
int index = atoi(dash + 1);
nptr->contclasslen = (unsigned short) pHMgr->get_aliasf(index, &(nptr->contclass));
+ if (!nptr->contclasslen) HUNSPELL_WARNING(stderr, "error: bad affix flag alias: \"%s\"\n", dash+1);
} else {
nptr->contclasslen = (unsigned short) pHMgr->decode_flags(&(nptr->contclass), dash + 1);
flag_qsort(nptr->contclass, 0, nptr->contclasslen);
diff --git a/src/hunspell/affixmgr.hxx b/src/hunspell/affixmgr.hxx
index 644d2c9..969780d 100644
--- a/src/hunspell/affixmgr.hxx
+++ b/src/hunspell/affixmgr.hxx
@@ -27,6 +27,8 @@ class AffixMgr
AffEntry * pFlag[CONTSIZE];
AffEntry * sFlag[CONTSIZE];
HashMgr * pHMgr;
+ HashMgr ** alldic;
+ int * maxdic;
char * keystring;
char * trystring;
char * encoding;
@@ -96,8 +98,9 @@ class AffixMgr
flag flag_mode;
public:
-
- AffixMgr(const char * affpath, HashMgr * ptr);
+
+ AffixMgr(const char * affpath, HashMgr** ptr, int * md,
+ const char * key = NULL);
~AffixMgr();
struct hentry * affix_check(const char * word, int len,
const unsigned short needflag = (unsigned short) 0,
@@ -150,7 +153,7 @@ public:
short numsyllable, short maxwordnum, short wnum, hentry ** words,
char hu_mov_rule, char ** result, char * partresult);
- struct hentry * lookup(const char * word);
+ struct hentry * lookup(const char * word);
int get_numrep();
struct replentry * get_reptable();
struct phonetable * get_phonetable();
@@ -171,7 +174,6 @@ public:
FLAG get_compoundbegin();
FLAG get_forbiddenword();
FLAG get_nosuggest();
-// FLAG get_circumfix();
FLAG get_needaffix();
FLAG get_onlyincompound();
FLAG get_compoundroot();
@@ -193,17 +195,17 @@ public:
int get_checksharps(void);
private:
- int parse_file(const char * affpath);
+ int parse_file(const char * affpath, const char * key);
int parse_flag(char * line, unsigned short * out, const char * name);
int parse_num(char * line, int * out, const char * name);
int parse_cpdsyllable(char * line);
- int parse_reptable(char * line, FILE * af);
- int parse_phonetable(char * line, FILE * af);
- int parse_maptable(char * line, FILE * af);
- int parse_breaktable(char * line, FILE * af);
- int parse_checkcpdtable(char * line, FILE * af);
- int parse_defcpdtable(char * line, FILE * af);
- int parse_affix(char * line, const char at, FILE * af, char * dupflags);
+ int parse_reptable(char * line, FileMgr * af);
+ int parse_phonetable(char * line, FileMgr * af);
+ int parse_maptable(char * line, FileMgr * af);
+ int parse_breaktable(char * line, FileMgr * af);
+ int parse_checkcpdtable(char * line, FileMgr * af);
+ int parse_defcpdtable(char * line, FileMgr * af);
+ int parse_affix(char * line, const char at, FileMgr * af, char * dupflags);
void reverse_condition(char *);
int condlen(char *);
diff --git a/src/hunspell/csutil.cxx b/src/hunspell/csutil.cxx
index 6914957..784ffa9 100644
--- a/src/hunspell/csutil.cxx
+++ b/src/hunspell/csutil.cxx
@@ -55,8 +55,8 @@ static int utf_tbl_count = 0; // utf_tbl can be used by multiple Hunspell instan
/* only UTF-16 (BMP) implementation */
char * u16_u8(char * dest, int size, const w_char * src, int srclen) {
- char * u8 = dest;
- char * u8_max = u8 + size;
+ signed char * u8 = (signed char *)dest;
+ signed char * u8_max = (signed char *)(u8 + size);
const w_char * u2 = src;
const w_char * u2_max = src + srclen;
while ((u2 < u2_max) && (u8 < u8_max)) {
@@ -103,7 +103,7 @@ char * u16_u8(char * dest, int size, const w_char * src, int srclen) {
/* only UTF-16 (BMP) implementation */
int u8_u16(w_char * dest, int size, const char * src) {
- const char * u8 = src;
+ const signed char * u8 = (const signed char *)src;
w_char * u2 = dest;
w_char * u2_max = u2 + size;
@@ -125,7 +125,7 @@ int u8_u16(w_char * dest, int size, const char * src) {
case 0x90:
case 0xa0:
case 0xb0: {
- HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Unexpected continuation bytes in %ld. character position\n%s\n", static_cast<long>(u8 - src), src);
+ HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Unexpected continuation bytes in %ld. character position\n%s\n", static_cast<long>(u8 - (signed char *)src), src);
u2->h = 0xff;
u2->l = 0xfd;
break;
@@ -137,7 +137,7 @@ int u8_u16(w_char * dest, int size, const char * src) {
u2->l = (*u8 << 6) + (*(u8+1) & 0x3f);
u8++;
} else {
- HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %ld. character position:\n%s\n", static_cast<long>(u8 - src), src);
+ HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %ld. character position:\n%s\n", static_cast<long>(u8 - (signed char *)src), src);
u2->h = 0xff;
u2->l = 0xfd;
}
@@ -151,12 +151,12 @@ int u8_u16(w_char * dest, int size, const char * src) {
u2->l = (*u8 << 6) + (*(u8+1) & 0x3f);
u8++;
} else {
- HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %ld. character position:\n%s\n", static_cast<long>(u8 - src), src);
+ HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %ld. character position:\n%s\n", static_cast<long>(u8 - (signed char *)src), src);
u2->h = 0xff;
u2->l = 0xfd;
}
} else {
- HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %ld. character position:\n%s\n", static_cast<long>(u8 - src), src);
+ HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %ld. character position:\n%s\n", static_cast<long>(u8 - (signed char *)src), src);
u2->h = 0xff;
u2->l = 0xfd;
}
@@ -415,10 +415,10 @@ char * tr(char * text, char oldc, char newc) {
// otherwise return -1
int morphcmp(const char * s, const char * t)
{
- int se;
- int te;
- char * sl;
- char * tl;
+ int se = 0;
+ int te = 0;
+ const char * sl;
+ const char * tl;
const char * olds;
const char * oldt;
if (!s || !t) return 1;
@@ -515,7 +515,7 @@ int fieldlen(const char * r)
char * copy_field(char * dest, const char * morph, const char * var)
{
if (!morph) return NULL;
- char * beg = strstr(morph, var);
+ const char * beg = strstr(morph, var);
if (beg) {
char * d = dest;
for (beg += MORPH_TAG_LEN; *beg != ' ' && *beg != '\t' &&
@@ -681,6 +681,20 @@ void mkallcap_utf(w_char * u, int nc, int langnum) {
if (*p != '\0') *d= csconv[((unsigned char)*p)].cupper;
}
+ // conversion function for protected memory
+ void store_pointer(char * dest, char * source)
+ {
+ memcpy(dest, &source, sizeof(char *));
+ }
+
+ // conversion function for protected memory
+ char * get_stored_pointer(char * s)
+ {
+ char * p;
+ memcpy(&p, s, sizeof(char *));
+ return p;
+ }
+
// these are simple character mappings for the
// encodings supported
// supplying isupper, tolower, and toupper
@@ -941,7 +955,7 @@ struct cs_info iso1_tbl[] = {
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0xdd },
{ 0x00, 0xfe, 0xde },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
@@ -1201,7 +1215,7 @@ struct cs_info iso2_tbl[] = {
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0xdd },
{ 0x00, 0xfe, 0xde },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
@@ -1461,7 +1475,7 @@ struct cs_info iso3_tbl[] = {
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0xdd },
{ 0x00, 0xfe, 0xde },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
struct cs_info iso4_tbl[] = {
@@ -1720,7 +1734,7 @@ struct cs_info iso4_tbl[] = {
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0xdd },
{ 0x00, 0xfe, 0xde },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
struct cs_info iso5_tbl[] = {
@@ -1979,7 +1993,7 @@ struct cs_info iso5_tbl[] = {
{ 0x00, 0xfc, 0xac },
{ 0x00, 0xfd, 0xfd },
{ 0x00, 0xfe, 0xae },
-{ 0x00, 0xff, 0xaf },
+{ 0x00, 0xff, 0xaf }
};
struct cs_info iso6_tbl[] = {
@@ -2238,7 +2252,7 @@ struct cs_info iso6_tbl[] = {
{ 0x00, 0xfc, 0xfc },
{ 0x00, 0xfd, 0xfd },
{ 0x00, 0xfe, 0xfe },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
struct cs_info iso7_tbl[] = {
@@ -2497,7 +2511,7 @@ struct cs_info iso7_tbl[] = {
{ 0x00, 0xfc, 0xbc },
{ 0x00, 0xfd, 0xbe },
{ 0x00, 0xfe, 0xbf },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
struct cs_info iso8_tbl[] = {
@@ -2756,7 +2770,7 @@ struct cs_info iso8_tbl[] = {
{ 0x00, 0xfc, 0xfc },
{ 0x00, 0xfd, 0xfd },
{ 0x00, 0xfe, 0xfe },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
struct cs_info iso9_tbl[] = {
@@ -3015,7 +3029,7 @@ struct cs_info iso9_tbl[] = {
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0x49 },
{ 0x00, 0xfe, 0xde },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
struct cs_info iso10_tbl[] = {
@@ -3274,7 +3288,7 @@ struct cs_info iso10_tbl[] = {
{ 0x00, 0xfc, 0xfc },
{ 0x00, 0xfd, 0xfd },
{ 0x00, 0xfe, 0xfe },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
struct cs_info koi8r_tbl[] = {
@@ -3533,7 +3547,7 @@ struct cs_info koi8r_tbl[] = {
{ 0x01, 0xdc, 0xfc },
{ 0x01, 0xdd, 0xfd },
{ 0x01, 0xde, 0xfe },
-{ 0x01, 0xdf, 0xff },
+{ 0x01, 0xdf, 0xff }
};
struct cs_info koi8u_tbl[] = {
@@ -3792,7 +3806,7 @@ struct cs_info koi8u_tbl[] = {
{ 0x01, 0xdc, 0xfc },
{ 0x01, 0xdd, 0xfd },
{ 0x01, 0xde, 0xfe },
-{ 0x01, 0xdf, 0xff },
+{ 0x01, 0xdf, 0xff }
};
struct cs_info cp1251_tbl[] = {
@@ -4051,7 +4065,7 @@ struct cs_info cp1251_tbl[] = {
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0xdd },
{ 0x00, 0xfe, 0xde },
-{ 0x00, 0xff, 0xdf },
+{ 0x00, 0xff, 0xdf }
};
struct cs_info iso13_tbl[] = {
@@ -4310,7 +4324,7 @@ struct cs_info iso13_tbl[] = {
{ 0x00, 0xFC, 0xDC },
{ 0x00, 0xFD, 0xDD },
{ 0x00, 0xFE, 0xDE },
-{ 0x00, 0xFF, 0xFF },
+{ 0x00, 0xFF, 0xFF }
};
@@ -4570,7 +4584,7 @@ struct cs_info iso14_tbl[] = {
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0xdd },
{ 0x00, 0xfe, 0xde },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
struct cs_info iso15_tbl[] = {
@@ -4829,7 +4843,7 @@ struct cs_info iso15_tbl[] = {
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0xdd },
{ 0x00, 0xfe, 0xde },
-{ 0x00, 0xff, 0xbe },
+{ 0x00, 0xff, 0xbe }
};
struct cs_info iscii_devanagari_tbl[] = {
@@ -5088,7 +5102,7 @@ struct cs_info iscii_devanagari_tbl[] = {
{ 0x00, 0xfc, 0xfc },
{ 0x00, 0xfd, 0xfd },
{ 0x00, 0xfe, 0xfe },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
struct enc_entry encds[] = {
@@ -5108,7 +5122,7 @@ struct enc_entry encds[] = {
{"ISO8859-13", iso13_tbl},
{"ISO8859-14", iso14_tbl},
{"ISO8859-15", iso15_tbl},
-{"ISCII-DEVANAGARI", iscii_devanagari_tbl},
+{"ISCII-DEVANAGARI", iscii_devanagari_tbl}
};
struct cs_info * get_current_cs(const char * es) {
@@ -5117,6 +5131,7 @@ struct cs_info * get_current_cs(const char * es) {
for (int i = 0; i < n; i++) {
if (strcmp(es,encds[i].enc_name) == 0) {
ccs = encds[i].cs_table;
+ break;
}
}
return ccs;
@@ -5362,14 +5377,14 @@ int get_captype(char * word, int nl, cs_info * csconv) {
int ncap = 0;
int nneutral = 0;
int firstcap = 0;
-
- for (char * q = word; *q != '\0'; q++) {
- if (csconv[*((unsigned char *)q)].ccase) ncap++;
- if (csconv[*((unsigned char *)q)].cupper == csconv[*((unsigned char *)q)].clower) nneutral++;
- }
- if (ncap) {
- firstcap = csconv[*((unsigned char *) word)].ccase;
- }
+ if (csconv == NULL) return NOCAP;
+ for (char * q = word; *q != '\0'; q++) {
+ if (csconv[*((unsigned char *)q)].ccase) ncap++;
+ if (csconv[*((unsigned char *)q)].cupper == csconv[*((unsigned char *)q)].clower) nneutral++;
+ }
+ if (ncap) {
+ firstcap = csconv[*((unsigned char *) word)].ccase;
+ }
// now finally set the captype
if (ncap == 0) {
diff --git a/src/hunspell/csutil.hxx b/src/hunspell/csutil.hxx
index df7979b..442cac2 100644
--- a/src/hunspell/csutil.hxx
+++ b/src/hunspell/csutil.hxx
@@ -30,13 +30,12 @@
#define MSEP_REC '\n'
#define MSEP_ALT '\v'
-
// default flags
#define DEFAULTFLAGS 65510
#define FORBIDDENWORD 65510
#define ONLYUPCASEFLAG 65511
-typedef struct {
+typedef struct __attribute__ ((packed)) {
unsigned char l;
unsigned char h;
} w_char;
@@ -200,4 +199,10 @@ int morphcmp(const char * s, const char * t);
int get_sfxcount(const char * morph);
+// conversion function for protected memory
+void store_pointer(char * dest, char * source);
+
+// conversion function for protected memory
+char * get_stored_pointer(char * s);
+
#endif
diff --git a/src/hunspell/filemgr.cxx b/src/hunspell/filemgr.cxx
new file mode 100644
index 0000000..165fc77
--- /dev/null
+++ b/src/hunspell/filemgr.cxx
@@ -0,0 +1,38 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "filemgr.hxx"
+
+int FileMgr::fail(const char * err, const char * par) {
+ fprintf(stderr, err, par);
+ return -1;
+}
+
+FileMgr::FileMgr(const char * file, const char * key) {
+ hin = NULL;
+ fin = fopen(file, "r");
+ if (!fin) {
+ // check hzipped file
+ char * st = (char *) malloc(strlen(file) + strlen(HZIP_EXTENSION));
+ if (st) {
+ strcpy(st, file);
+ strcat(st, HZIP_EXTENSION);
+ hin = new Hunzip(st, key);
+ }
+ }
+ if (!fin && !hin) fail(MSG_OPEN, file);
+}
+
+FileMgr::~FileMgr()
+{
+ if (fin) fclose(fin);
+ if (hin) delete hin;
+}
+
+char * FileMgr::getline() {
+ const char * l;
+ if (fin) return fgets(in, BUFSIZE - 1, fin);
+ if (hin && (l = hin->getline())) return strcpy(in, l);
+ return NULL;
+}
diff --git a/src/hunspell/filemgr.hxx b/src/hunspell/filemgr.hxx
new file mode 100644
index 0000000..593228d
--- /dev/null
+++ b/src/hunspell/filemgr.hxx
@@ -0,0 +1,19 @@
+/* file manager class - read lines of files [filename] OR [filename.hz] */
+#ifndef _FILEMGR_HXX_
+#define _FILEMGR_HXX_
+#include "hunzip.hxx"
+
+class FileMgr
+{
+protected:
+ FILE * fin;
+ Hunzip * hin;
+ char in[BUFSIZE + 50]; // input buffer
+ int fail(const char * err, const char * par);
+
+public:
+ FileMgr(const char * filename, const char * key = NULL);
+ ~FileMgr();
+ char * getline();
+};
+#endif
diff --git a/src/hunspell/hashmgr.cxx b/src/hunspell/hashmgr.cxx
index 08e061c..5f0b169 100644
--- a/src/hunspell/hashmgr.cxx
+++ b/src/hunspell/hashmgr.cxx
@@ -29,7 +29,7 @@ using namespace std;
// build a hash table from a munched word list
-HashMgr::HashMgr(const char * tpath, const char * apath)
+HashMgr::HashMgr(const char * tpath, const char * apath, const char * key)
{
tablesize = 0;
tableptr = NULL;
@@ -48,8 +48,8 @@ HashMgr::HashMgr(const char * tpath, const char * apath)
numaliasm = 0;
aliasm = NULL;
forbiddenword = FORBIDDENWORD; // forbidden word signing flag
- load_config(apath);
- int ec = load_tables(tpath);
+ load_config(apath, key);
+ int ec = load_tables(tpath, key);
if (ec) {
/* error condition - what should we do here */
HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec);
@@ -129,7 +129,7 @@ int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff,
int al, const char * desc, bool onlyupcase)
{
bool upcasehomonym = false;
- int descl = desc ? (aliasm ? sizeof(char *) : strlen(desc) + 1) : 0;
+ int descl = desc ? (aliasm ? sizeof(short) : strlen(desc) + 1) : 0;
// variable-length hash record with word and optional fields
struct hentry* hp =
(struct hentry *) malloc (sizeof(struct hentry) + wbl + descl);
@@ -161,7 +161,8 @@ int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff,
hp->var = H_OPT;
if (aliasm) {
hp->var += H_OPT_ALIASM;
- *((char **) (hpw + wbl + 1)) = get_aliasm(atoi(desc));
+// *((char **) (hpw + wbl + 1)) = get_aliasm(atoi(desc));
+ store_pointer(hpw + wbl + 1, get_aliasm(atoi(desc)));
} else {
strcpy(hpw + wbl + 1, desc);
if (complexprefixes) {
@@ -236,12 +237,12 @@ int HashMgr::add_hidden_capitalized_word(char * word, int wbl, int wcl,
if (al) memcpy(flags2, flags, al * sizeof(unsigned short));
flags2[al] = ONLYUPCASEFLAG;
if (utf8) {
- char st[MAXDELEN];
- w_char w[MAXDELEN];
- int wlen = u8_u16(w, MAXDELEN, word);
+ char st[BUFSIZE];
+ w_char w[BUFSIZE];
+ int wlen = u8_u16(w, BUFSIZE, word);
mkallsmall_utf(w, wlen, langnum);
mkallcap_utf(w, 1, langnum);
- u16_u8(st, MAXDELEN, w, wlen);
+ u16_u8(st, BUFSIZE, w, wlen);
return add_word(st,wbl,wcl,flags2,al+1,dp, true);
} else {
mkallsmall(word, csconv);
@@ -256,8 +257,8 @@ int HashMgr::add_hidden_capitalized_word(char * word, int wbl, int wcl,
int HashMgr::get_clen_and_captype(const char * word, int wbl, int * captype) {
int len;
if (utf8) {
- w_char dest_utf[MAXDELEN];
- len = u8_u16(dest_utf, MAXDELEN, word);
+ w_char dest_utf[BUFSIZE];
+ len = u8_u16(dest_utf, BUFSIZE, word);
*captype = get_captype_utf8(dest_utf, len, langnum);
} else {
len = wbl;
@@ -269,8 +270,8 @@ int HashMgr::get_clen_and_captype(const char * word, int wbl, int * captype) {
// remove word with FORBIDDENWORD flag (not implemented)
int HashMgr::remove(const char * word)
{
- struct hentry * dp = lookup(word);
-/*
+/* struct hentry * dp = lookup(word);
+
if (!word || (!dp->astr || !TESTAFF(dp->astr, forbiddenword, pt->alen))) {
int wbl = strlen(word);
int wcl = get_clen_and_captype(word, wbl, &captype);
@@ -344,22 +345,22 @@ struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const
}
// load a munched word list and build a hash table on the fly
-int HashMgr::load_tables(const char * tpath)
+int HashMgr::load_tables(const char * tpath, const char * key)
{
int al;
char * ap;
char * dp;
unsigned short * flags;
+ char * ts;
- // raw dictionary - munched file
- FILE * rawdict = fopen(tpath, "r");
- if (rawdict == NULL) return 1;
+ // open dictionary file
+ FileMgr * dict = new FileMgr(tpath, key);
+ if (dict == NULL) return 1;
// first read the first line of file to get hash table size */
- char ts[MAXDELEN];
- if (! fgets(ts, MAXDELEN-1,rawdict)) {
+ if (!(ts = dict->getline())) {
HUNSPELL_WARNING(stderr, "error: empty dic file\n");
- fclose(rawdict);
+ delete dict;
return 2;
}
mychomp(ts);
@@ -373,7 +374,7 @@ int HashMgr::load_tables(const char * tpath)
if ((*ts < '1') || (*ts > '9')) HUNSPELL_WARNING(stderr, "error - missing word count in dictionary file\n");
tablesize = atoi(ts);
if (!tablesize) {
- fclose(rawdict);
+ delete dict;
return 4;
}
tablesize = tablesize + 5 + USERWORD;
@@ -382,7 +383,7 @@ int HashMgr::load_tables(const char * tpath)
// allocate the hash table
tableptr = (struct hentry **) malloc(tablesize * sizeof(struct hentry *));
if (! tableptr) {
- fclose(rawdict);
+ delete dict;
return 3;
}
for (int i=0; i<tablesize; i++) tableptr[i] = NULL;
@@ -390,7 +391,7 @@ int HashMgr::load_tables(const char * tpath)
// loop through all words on much list and add to hash
// table and create word and affix strings
- while (fgets(ts,MAXDELEN-1,rawdict)) {
+ while ((ts = dict->getline())) {
mychomp(ts);
// split each line into word and morphological description
dp = strchr(ts,'\t');
@@ -443,16 +444,15 @@ int HashMgr::load_tables(const char * tpath)
// add the word and its index plus its capitalized form optionally
if (add_word(ts,wbl,wcl,flags,al,dp, false) ||
add_hidden_capitalized_word(ts, wbl, wcl, flags, al, dp, captype)) {
- fclose(rawdict);
+ delete dict;
return 5;
}
}
- fclose(rawdict);
+ delete dict;
return 0;
}
-
// the hash function is a simple load and rotate
// algorithm borrowed
@@ -506,8 +506,8 @@ int HashMgr::decode_flags(unsigned short ** result, char * flags) {
break;
}
case FLAG_UNI: { // UTF-8 characters
- w_char w[MAXDELEN/2];
- len = u8_u16(w, MAXDELEN/2, flags);
+ w_char w[BUFSIZE/2];
+ len = u8_u16(w, BUFSIZE/2, flags);
*result = (unsigned short *) malloc(len * sizeof(short));
if (!*result) return -1;
memcpy(*result, w, len * sizeof(short));
@@ -566,16 +566,13 @@ char * HashMgr::encode_flag(unsigned short f) {
}
// read in aff file and set flag mode
-int HashMgr::load_config(const char * affpath)
+int HashMgr::load_config(const char * affpath, const char * key)
{
+ char * line; // io buffers
int firstline = 1;
-
- // io buffers
- char line[MAXDELEN+1];
// open the affix file
- FILE * afflst;
- afflst = fopen(affpath,"r");
+ FileMgr * afflst = new FileMgr(affpath, key);
if (!afflst) {
HUNSPELL_WARNING(stderr, "Error - could not open affix description file %s\n",affpath);
return 1;
@@ -584,7 +581,7 @@ int HashMgr::load_config(const char * affpath)
// read in each line ignoring any that do not
// start with a known line type indicator
- while (fgets(line,MAXDELEN,afflst)) {
+ while ((line = afflst->getline())) {
mychomp(line);
/* remove byte order mark */
@@ -608,7 +605,7 @@ int HashMgr::load_config(const char * affpath)
if (strncmp(line,"FORBIDDENWORD",13) == 0) {
char * st = NULL;
if (parse_string(line, &st, "FORBIDDENWORD")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
forbiddenword = decode_flag(st);
@@ -616,7 +613,7 @@ int HashMgr::load_config(const char * affpath)
}
if (strncmp(line, "SET", 3) == 0) {
if (parse_string(line, &enc, "SET")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
if (strcmp(enc, "UTF-8") == 0) {
@@ -630,7 +627,7 @@ int HashMgr::load_config(const char * affpath)
}
if (strncmp(line, "LANG", 4) == 0) {
if (parse_string(line, &lang, "LANG")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
langnum = get_lang_num(lang);
@@ -639,21 +636,21 @@ int HashMgr::load_config(const char * affpath)
/* parse in the ignored characters (for example, Arabic optional diacritics characters */
if (strncmp(line,"IGNORE",6) == 0) {
if (parse_array(line, &ignorechars, &ignorechars_utf16, &ignorechars_utf16_len, "IGNORE", utf8)) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
if ((strncmp(line,"AF",2) == 0) && isspace(line[2])) {
if (parse_aliasf(line, afflst)) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
if ((strncmp(line,"AM",2) == 0) && isspace(line[2])) {
if (parse_aliasm(line, afflst)) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -662,12 +659,12 @@ int HashMgr::load_config(const char * affpath)
if (((strncmp(line,"SFX",3) == 0) || (strncmp(line,"PFX",3) == 0)) && isspace(line[3])) break;
}
if (csconv == NULL) csconv = get_current_cs("ISO8859-1");
- fclose(afflst);
+ delete afflst;
return 0;
}
/* parse in the ALIAS table */
-int HashMgr::parse_aliasf(char * line, FILE * af)
+int HashMgr::parse_aliasf(char * line, FileMgr * af)
{
if (numaliasf != 0) {
HUNSPELL_WARNING(stderr, "error: duplicate AF (alias for flag vector) tables used\n");
@@ -723,9 +720,9 @@ int HashMgr::parse_aliasf(char * line, FILE * af)
}
/* now parse the numaliasf lines to read in the remainder of the table */
- char * nl = line;
+ char * nl;
for (int j=0; j < numaliasf; j++) {
- if (!fgets(nl,MAXDELEN,af)) return 1;
+ if (!(nl = af->getline())) return 1;
mychomp(nl);
tp = nl;
i = 0;
@@ -788,7 +785,7 @@ int HashMgr::get_aliasf(int index, unsigned short ** fvec) {
}
/* parse morph alias definitions */
-int HashMgr::parse_aliasm(char * line, FILE * af)
+int HashMgr::parse_aliasm(char * line, FileMgr * af)
{
if (numaliasm != 0) {
HUNSPELL_WARNING(stderr, "error: duplicate AM (aliases for morphological descriptions) tables used\n");
@@ -836,7 +833,7 @@ int HashMgr::parse_aliasm(char * line, FILE * af)
/* now parse the numaliasm lines to read in the remainder of the table */
char * nl = line;
for (int j=0; j < numaliasm; j++) {
- if (!fgets(nl,MAXDELEN,af)) return 1;
+ if (!(nl = af->getline())) return 1;
mychomp(nl);
tp = nl;
i = 0;
diff --git a/src/hunspell/hashmgr.hxx b/src/hunspell/hashmgr.hxx
index d88de48..9664e5f 100644
--- a/src/hunspell/hashmgr.hxx
+++ b/src/hunspell/hashmgr.hxx
@@ -8,6 +8,7 @@
#endif
#include "htypes.hxx"
+#include "filemgr.hxx"
enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI };
@@ -35,7 +36,7 @@ class HashMgr
public:
- HashMgr(const char * tpath, const char * apath);
+ HashMgr(const char * tpath, const char * apath, const char * key = NULL);
~HashMgr();
struct hentry * lookup(const char *) const;
@@ -55,14 +56,14 @@ public:
private:
int get_clen_and_captype(const char * word, int wbl, int * captype);
- int load_tables(const char * tpath);
+ int load_tables(const char * tpath, const char * key);
int add_word(const char * word, int wbl, int wcl, unsigned short * ap,
int al, const char * desc, bool onlyupcase);
- int load_config(const char * affpath);
- int parse_aliasf(char * line, FILE * af);
+ int load_config(const char * affpath, const char * key);
+ int parse_aliasf(char * line, FileMgr * af);
int add_hidden_capitalized_word(char * word, int wbl, int wcl,
unsigned short * flags, int al, char * dp, int captype);
- int parse_aliasm(char * line, FILE * af);
+ int parse_aliasm(char * line, FileMgr * af);
};
diff --git a/src/hunspell/htypes.hxx b/src/hunspell/htypes.hxx
index bc078c3..718a0f8 100644
--- a/src/hunspell/htypes.hxx
+++ b/src/hunspell/htypes.hxx
@@ -1,8 +1,6 @@
#ifndef _HTYPES_HXX_
#define _HTYPES_HXX_
-#define MAXDELEN 8192
-
#define ROTATE_LEN 5
#define ROTATE(v,q) \
@@ -15,7 +13,8 @@
#define HENTRY_WORD(h) &(h->word)
#define HENTRY_DATA(h) (h->var ? ((h->var & H_OPT_ALIASM) ? \
- *((char **) (&(h->word) + h->blen + 1)) : &(h->word) + h->blen + 1) : NULL)
+ get_stored_pointer(&(h->word) + h->blen + 1) : &(h->word) + h->blen + 1) : NULL)
+// *((char **) (&(h->word) + h->blen + 1)) : &(h->word) + h->blen + 1) : NULL)
#define HENTRY_FIND(h,p) (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL)
// approx. number of user defined words
diff --git a/src/hunspell/hunspell.cxx b/src/hunspell/hunspell.cxx
index 502b997..f2f7536 100644
--- a/src/hunspell/hunspell.cxx
+++ b/src/hunspell/hunspell.cxx
@@ -21,19 +21,22 @@ using namespace std;
#endif
#endif
-Hunspell::Hunspell(const char * affpath, const char * dpath)
+Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key)
{
encoding = NULL;
csconv = NULL;
utf8 = 0;
complexprefixes = 0;
+ affixpath = mystrdup(affpath);
+ maxdic = 0;
/* first set up the hash manager */
- pHMgr = new HashMgr(dpath, affpath);
+ pHMgr[0] = new HashMgr(dpath, affpath, key);
+ if (pHMgr[0]) maxdic = 1;
/* next set up the affix manager */
/* it needs access to the hash manager lookup methods */
- pAMgr = new AffixMgr(affpath,pHMgr);
+ pAMgr = new AffixMgr(affpath, pHMgr, &maxdic, key);
/* get the preferred try string and the dictionary */
/* encoding from the Affix Manager for that dictionary */
@@ -55,18 +58,28 @@ Hunspell::~Hunspell()
{
if (pSMgr) delete pSMgr;
if (pAMgr) delete pAMgr;
- if (pHMgr) delete pHMgr;
+ for (int i = 0; i < maxdic; i++) delete pHMgr[i];
+ maxdic = 0;
pSMgr = NULL;
pAMgr = NULL;
- pHMgr = NULL;
+ pHMgr[0] = NULL;
#ifdef MOZILLA_CLIENT
free(csconv);
#endif
csconv= NULL;
if (encoding) free(encoding);
encoding = NULL;
+ if (affixpath) free(affixpath);
+ affixpath = NULL;
}
+// load extra dictionaries
+int Hunspell::add_dic(const char * dpath, const char * key) {
+ if (maxdic == MAXDIC) return 1;
+ pHMgr[maxdic] = new HashMgr(dpath, affixpath, key);
+ if (pHMgr[maxdic]) maxdic++; else return 1;
+ return 0;
+}
// make a copy of src at destination while removing all leading
// blanks and removing any trailing periods after recording
@@ -334,7 +347,7 @@ int Hunspell::spell(const char * word, int * info, char ** root)
int abbv = 0;
int wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
int info2 = 0;
- if (wl == 0) return 1;
+ if (wl == 0 || maxdic == 0) return 1;
if (root) *root = NULL;
// allow numbers with dots and commas (but forbid double separators: "..", ",," etc.)
@@ -559,7 +572,7 @@ int Hunspell::spell(const char * word, int * info, char ** root)
struct hentry * Hunspell::checkword(const char * w, int * info, char ** root)
{
struct hentry * he = NULL;
- int len;
+ int len, i;
char w2[MAXWORDUTF8LEN];
const char * word;
@@ -586,7 +599,8 @@ struct hentry * Hunspell::checkword(const char * w, int * info, char ** root)
}
// look word in hash table
- if (pHMgr) he = pHMgr->lookup(word);
+ for (i = 0; (i < maxdic) && !he; i ++) {
+ he = (pHMgr[i])->lookup(word);
// check forbidden and onlyincompound words
if ((he) && (he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
@@ -607,6 +621,7 @@ struct hentry * Hunspell::checkword(const char * w, int * info, char ** root)
(pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
(info && (*info & SPELL_INITCAP) && TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))
)) he = he->next_homonym;
+ }
// check with affixes
if (!he && pAMgr) {
@@ -668,7 +683,7 @@ int Hunspell::suggest(char*** slst, const char * word)
int onlycmpdsug = 0;
char cw[MAXWORDUTF8LEN];
char wspace[MAXWORDUTF8LEN];
- if (! pSMgr) return 0;
+ if (!pSMgr || maxdic == 0) return 0;
w_char unicw[MAXWORDLEN];
int nc = strlen(word);
if (utf8) {
@@ -820,27 +835,27 @@ int Hunspell::suggest(char*** slst, const char * word)
if ((ns == 0 || onlycmpdsug) && pAMgr && (pAMgr->get_maxngramsugs() != 0)) {
switch(captype) {
case NOCAP: {
- ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr);
+ ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr, maxdic);
break;
}
case HUHCAP: {
memcpy(wspace,cw,(wl+1));
mkallsmall2(wspace, unicw, nc);
- ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr);
+ ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
break;
}
case INITCAP: {
capwords = 1;
memcpy(wspace,cw,(wl+1));
mkallsmall2(wspace, unicw, nc);
- ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr);
+ ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
break;
}
case ALLCAP: {
memcpy(wspace,cw,(wl+1));
mkallsmall2(wspace, unicw, nc);
int oldns = ns;
- ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr);
+ ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
for (int j = oldns; j < ns; j++)
mkallcap((*slst)[j]);
break;
@@ -933,7 +948,7 @@ int Hunspell::suggest_auto(char*** slst, const char * word)
{
char cw[MAXWORDUTF8LEN];
char wspace[MAXWORDUTF8LEN];
- if (! pSMgr) return 0;
+ if (!pSMgr || maxdic == 0) return 0;
int wl = strlen(word);
if (utf8) {
if (wl >= MAXWORDUTF8LEN) return 0;
@@ -1056,13 +1071,13 @@ int Hunspell::stem(char*** slst, char ** desc, int n)
alt = strstr(alt, " | ");
}
int pln = line_tok(tok, &pl, MSEP_ALT);
- for (int i = 0; i < pln; i++) {
+ for (int k = 0; k < pln; k++) {
// add derivational suffixes
- if (strstr(pl[i], MORPH_DERI_SFX)) {
+ if (strstr(pl[k], MORPH_DERI_SFX)) {
// remove inflectional suffixes
- char * is = strstr(pl[i], MORPH_INFL_SFX);
+ char * is = strstr(pl[k], MORPH_INFL_SFX);
if (is) *is = '\0';
- char * sg = pSMgr->suggest_gen(&(pl[i]), 1, pl[i]);
+ char * sg = pSMgr->suggest_gen(&(pl[k]), 1, pl[k]);
if (sg) {
char ** gen;
int genl = line_tok(sg, &gen, MSEP_REC);
@@ -1075,10 +1090,10 @@ int Hunspell::stem(char*** slst, char ** desc, int n)
}
} else {
sprintf(result2 + strlen(result2), "%c%s", MSEP_REC, result);
- if (strstr(pl[i], MORPH_SURF_PFX)) {
- copy_field(result2 + strlen(result2), pl[i], MORPH_SURF_PFX);
+ if (strstr(pl[k], MORPH_SURF_PFX)) {
+ copy_field(result2 + strlen(result2), pl[k], MORPH_SURF_PFX);
}
- copy_field(result2 + strlen(result2), pl[i], MORPH_STEM);
+ copy_field(result2 + strlen(result2), pl[k], MORPH_STEM);
}
}
freelist(&pl, pln);
@@ -1100,7 +1115,7 @@ int Hunspell::suggest_pos_stems(char*** slst, const char * word)
{
char cw[MAXWORDUTF8LEN];
char wspace[MAXWORDUTF8LEN];
- if (! pSMgr) return 0;
+ if (! pSMgr || maxdic == 0) return 0;
int wl = strlen(word);
if (utf8) {
if (wl >= MAXWORDUTF8LEN) return 0;
@@ -1220,13 +1235,13 @@ int Hunspell::mkinitsmall2(char * p, w_char * u, int nc)
int Hunspell::add(const char * word)
{
- if (pHMgr) return pHMgr->add(word, NULL);
+ if (pHMgr[0]) return (pHMgr[0])->add(word, NULL);
return 0;
}
int Hunspell::add_with_affix(const char * word, const char * example)
{
- if (pHMgr) return pHMgr->add_with_affix(word, example);
+ if (pHMgr[0]) return (pHMgr[0])->add_with_affix(word, example);
return 0;
}
@@ -1234,7 +1249,7 @@ int Hunspell::add_with_affix(const char * word, const char * example)
int Hunspell::remove(const char * word)
{
- if (pHMgr) return pHMgr->remove(word);
+ if (pHMgr[0]) return (pHMgr[0])->remove(word);
return 0;
}
@@ -1248,7 +1263,7 @@ struct cs_info * Hunspell::get_csconv()
return csconv;
}
-char * Hunspell::cat_result(char * result, char * st)
+void Hunspell::cat_result(char * result, char * st)
{
if (st) {
if (*result) strcat(result, "\n");
@@ -1261,7 +1276,7 @@ int Hunspell::analyze(char*** slst, const char * word)
{
char cw[MAXWORDUTF8LEN];
char wspace[MAXWORDUTF8LEN];
- if (! pSMgr) return 0;
+ if (! pSMgr || maxdic == 0) return 0;
int wl = strlen(word);
if (utf8) {
if (wl >= MAXWORDUTF8LEN) return 0;
@@ -1530,7 +1545,7 @@ char * Hunspell::morph_with_correction(const char * word)
{
char cw[MAXWORDUTF8LEN];
char wspace[MAXWORDUTF8LEN];
- if (! pSMgr) return NULL;
+ if (! pSMgr || maxdic == 0) return NULL;
int wl = strlen(word);
if (utf8) {
if (wl >= MAXWORDUTF8LEN) return NULL;
@@ -1680,6 +1695,12 @@ Hunhandle *Hunspell_create(const char * affpath, const char * dpath)
return (Hunhandle*)(new Hunspell(affpath, dpath));
}
+Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,
+ const char * key)
+{
+ return (Hunhandle*)(new Hunspell(affpath, dpath, key));
+}
+
void Hunspell_destroy(Hunhandle *pHunspell)
{
delete (Hunspell*)(pHunspell);
diff --git a/src/hunspell/hunspell.h b/src/hunspell/hunspell.h
index 452599c..dc8d501 100644
--- a/src/hunspell/hunspell.h
+++ b/src/hunspell/hunspell.h
@@ -8,6 +8,10 @@ extern "C" {
typedef struct Hunhandle Hunhandle;
Hunhandle *Hunspell_create(const char * affpath, const char * dpath);
+
+Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,
+ const char * key);
+
void Hunspell_destroy(Hunhandle *pHunspell);
/* spell(word) - spellcheck word
@@ -65,19 +69,19 @@ int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word,
/* add word to the run-time dictionary */
-int Hunspell_add(const char * word);
+int Hunspell_add(Hunhandle *pHunspell, const char * word);
/* add word to the run-time dictionary with affix flags of
* the example (a dictionary word): Hunspell will recognize
* affixed forms of the new word, too.
*/
-int Hunspell_add_with_affix(const char * word, const char * example);
+int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word, const char * example);
/* remove word from the run-time dictionary */
/* NOTE: not implemented yet */
-int Hunspell_remove(const char * word);
+int Hunspell_remove(Hunhandle *pHunspell, const char * word);
#ifdef __cplusplus
diff --git a/src/hunspell/hunspell.hxx b/src/hunspell/hunspell.hxx
index 38c141e..8461b54 100644
--- a/src/hunspell/hunspell.hxx
+++ b/src/hunspell/hunspell.hxx
@@ -10,6 +10,7 @@
#define SPELL_NOCAP (1 << 3)
#define SPELL_INITCAP (1 << 4)
+#define MAXDIC 20
#define MAXSUGGESTION 15
#define MAXSHARPS 5
@@ -33,14 +34,17 @@ class Hunspell
#endif
{
AffixMgr* pAMgr;
- HashMgr* pHMgr;
+ HashMgr* pHMgr[MAXDIC];
+ int maxdic;
SuggestMgr* pSMgr;
+ char * affixpath;
char * encoding;
struct cs_info * csconv;
int langnum;
int utf8;
int complexprefixes;
char** wordbreak;
+ char * key;
public:
@@ -48,10 +52,12 @@ public:
* input: path of affix file and dictionary file
*/
- Hunspell(const char * affpath, const char * dpath);
-
+ Hunspell(const char * affpath, const char * dpath, const char * key = NULL);
~Hunspell();
+ /* load extra dictionaries (only dic files) */
+ int add_dic(const char * dpath, const char * key = NULL);
+
/* spell(word) - spellcheck word
* output: 0 = bad word, not 0 = good word
*
@@ -164,7 +170,7 @@ private:
hentry * spellsharps(char * base, char *, int, int, char * tmp, int * info, char **root);
int is_keepcase(const hentry * rv);
int insert_sug(char ***slst, char * word, int ns);
- char * cat_result(char * result, char * st);
+ void cat_result(char * result, char * st);
char * stem_description(const char * desc);
};
diff --git a/src/hunspell/hunzip.cxx b/src/hunspell/hunzip.cxx
new file mode 100644
index 0000000..93912df
--- /dev/null
+++ b/src/hunspell/hunzip.cxx
@@ -0,0 +1,191 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "hunzip.hxx"
+
+#define CODELEN 65536
+#define BASEBITREC 5000
+
+#define UNCOMPRESSED '\002'
+#define MAGIC "hz0"
+#define MAGIC_ENCRYPT "hz1"
+#define MAGICLEN (sizeof(MAGIC) - 1)
+
+int Hunzip::fail(const char * err, const char * par) {
+ fprintf(stderr, err, par);
+ return -1;
+}
+
+Hunzip::Hunzip(const char * file, const char * key) {
+ bufsiz = 0;
+ lastbit = 0;
+ inc = 0;
+ outc = 0;
+ dec = NULL;
+ filename = (char *) malloc(strlen(file) + 1);
+ if (filename) strcpy(filename, file);
+ if (getcode(key) == -1) bufsiz = -1;
+ else bufsiz = getbuf();
+}
+
+int Hunzip::getcode(const char * key) {
+ unsigned char c[2];
+ int i, j, n, o, p;
+ int allocatedbit = BASEBITREC;
+ const char * enc = key;
+
+ fin = fopen(filename, "r");
+ if (!fin) return -1;
+
+ // read magic number
+ if ((fread(in, 1, 3, fin) < MAGICLEN)
+ || !(strncmp(MAGIC, in, MAGICLEN) == 0 ||
+ strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0)) {
+ return fail(MSG_FORMAT, filename);
+ }
+
+ // check encryption
+ if (strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0) {
+ unsigned char cs;
+ if (!key) return fail(MSG_KEY, filename);
+ if (fread(&c, 1, 1, fin) < 1) return fail(MSG_FORMAT, filename);
+ for (cs = 0; *enc; enc++) cs ^= *enc;
+ if (cs != c[0]) return fail(MSG_KEY, filename);
+ enc = key;
+ } else key = NULL;
+
+ // read record count
+ if (fread(&c, 1, 2, fin) < 2) return fail(MSG_FORMAT, filename);
+
+ if (key) {
+ c[0] ^= *enc;
+ if (*(++enc) == '\0') enc = key;
+ c[1] ^= *enc;
+ }
+
+ n = ((int) c[0] << 8) + c[1];
+ dec = (struct bit *) malloc(BASEBITREC * sizeof(struct bit));
+ if (!dec) return fail(MSG_MEMORY, filename);
+ dec[0].v[0] = 0;
+ dec[0].v[1] = 0;
+
+ // read codes
+ for (i = 0; i < n; i++) {
+ unsigned char l;
+ if (fread(c, 1, 2, fin) < 2) return fail(MSG_FORMAT, filename);
+ if (key) {
+ if (*(++enc) == '\0') enc = key;
+ c[0] ^= *enc;
+ if (*(++enc) == '\0') enc = key;
+ c[1] ^= *enc;
+ }
+ if (fread(&l, 1, 1, fin) < 1) return fail(MSG_FORMAT, filename);
+ if (key) {
+ if (*(++enc) == '\0') enc = key;
+ l ^= *enc;
+ }
+ if (fread(in, 1, l/8+1, fin) < l/8+1) return fail(MSG_FORMAT, filename);
+ if (key) for (j = 0; j <= l/8; j++) {
+ if (*(++enc) == '\0') enc = key;
+ in[j] ^= *enc;
+ }
+ p = 0;
+ for (j = 0; j < l; j++) {
+ int b = (in[j/8] & (1 << (7 - (j % 8)))) ? 1 : 0;
+ int oldp = p;
+ p = dec[p].v[b];
+ if (p == 0) {
+ lastbit++;
+ if (lastbit == allocatedbit) {
+ allocatedbit += BASEBITREC;
+ dec = (struct bit *) realloc(dec, allocatedbit * sizeof(struct bit));
+ }
+ dec[lastbit].v[0] = 0;
+ dec[lastbit].v[1] = 0;
+ dec[oldp].v[b] = lastbit;
+ p = lastbit;
+ }
+ }
+ dec[p].c[0] = c[0];
+ dec[p].c[1] = c[1];
+ }
+ return 0;
+}
+
+Hunzip::~Hunzip()
+{
+ if (dec) free(dec);
+ if (fin) fclose(fin);
+ if (filename) free(filename);
+}
+
+int Hunzip::getbuf() {
+ int p = 0;
+ int o = 0;
+ do {
+ if (inc == 0) inbits = fread(in, 1, BUFSIZE, fin) * 8;
+ for (; inc < inbits; inc++) {
+ int b = (in[inc / 8] & (1 << (7 - (inc % 8)))) ? 1 : 0;
+ int oldp = p;
+ p = dec[p].v[b];
+ if (p == 0) {
+ if (oldp == lastbit) {
+ fclose(fin);
+ fin = NULL;
+ // add last odd byte
+ if (dec[lastbit].c[0]) out[o++] = dec[lastbit].c[1];
+ return o;
+ }
+ out[o++] = dec[oldp].c[0];
+ out[o++] = dec[oldp].c[1];
+ if (o == BUFSIZE) return o;
+ p = dec[p].v[b];
+ }
+ }
+ inc = 0;
+ } while (inbits == BUFSIZE * 8);
+ return fail(MSG_FORMAT, filename);
+}
+
+const char * Hunzip::getline() {
+ char linebuf[BUFSIZE];
+ int l = 0, eol = 0, left = 0, right = 0;
+ char end;
+ if (bufsiz == -1) return NULL;
+ while (l < bufsiz && !eol) {
+ linebuf[l++] = out[outc];
+ switch (out[outc]) {
+ case '\t': break;
+ case 31: { // escape
+ if (++outc == bufsiz) {
+ bufsiz = getbuf();
+ outc = 0;
+ }
+ linebuf[l - 1] = out[outc];
+ break;
+ }
+ case ' ': break;
+ default: if (((unsigned char) out[outc]) < 47) {
+ if (out[outc] > 32) {
+ right = out[outc] - 31;
+ if (++outc == bufsiz) {
+ bufsiz = getbuf();
+ outc = 0;
+ }
+ }
+ if (out[outc] == 30) left = 9; else left = out[outc];
+ linebuf[l-1] = '\n';
+ eol = 1;
+ }
+ }
+ if (++outc == bufsiz) {
+ outc = 0;
+ bufsiz = fin ? getbuf(): -1;
+ }
+ }
+ if (right) strcpy(linebuf + l - 1, line + strlen(line) - right - 1);
+ else linebuf[l] = '\0';
+ strcpy(line + left, linebuf);
+ return line;
+}
diff --git a/src/hunspell/hunzip.hxx b/src/hunspell/hunzip.hxx
new file mode 100644
index 0000000..52109d1
--- /dev/null
+++ b/src/hunspell/hunzip.hxx
@@ -0,0 +1,41 @@
+/* hunzip: file decompression for sorted dictionaries with optional encryption,
+ * algorithm: prefix-suffix encoding and 16-bit Huffman encoding */
+
+#ifndef _HUNZIP_HXX_
+#define _HUNZIP_HXX_
+
+#define BUFSIZE 65536
+#define HZIP_EXTENSION ".hz"
+
+#define MSG_OPEN "error: %s: cannot open\n"
+#define MSG_FORMAT "error: %s: not in hzip format\n"
+#define MSG_MEMORY "error: %s: missing memory\n"
+#define MSG_KEY "error: %s: missing or bad password\n"
+
+struct bit {
+ unsigned char c[2];
+ int v[2];
+};
+
+class Hunzip
+{
+
+protected:
+ char * filename;
+ FILE * fin;
+ int bufsiz, lastbit, inc, inbits, outc;
+ struct bit * dec; // code table
+ char in[BUFSIZE]; // input buffer
+ char out[BUFSIZE + 1]; // Huffman-decoded buffer
+ char line[BUFSIZE + 50]; // decoded line
+ int getcode(const char * key);
+ int getbuf();
+ int fail(const char * err, const char * par);
+
+public:
+ Hunzip(const char * filename, const char * key = NULL);
+ ~Hunzip();
+ const char * getline();
+};
+
+#endif
diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx
index b1a58f3..ce54f57 100644
--- a/src/hunspell/suggestmgr.cxx
+++ b/src/hunspell/suggestmgr.cxx
@@ -1028,7 +1028,7 @@ int SuggestMgr::movechar_utf(char ** wlst, const w_char * word, int wl, int ns,
}
// generate a set of suggestions for very poorly spelled words
-int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr* pHMgr)
+int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr** pHMgr, int md)
{
int i, j;
@@ -1037,8 +1037,6 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr* pHMgr)
int lp, lpphon;
int nonbmp = 0;
- if (!pHMgr) return ns;
-
// exhaustively search through all root words
// keeping track of the MAX_ROOTS most similar root words
struct hentry * roots[MAX_ROOTS];
@@ -1088,8 +1086,9 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr* pHMgr)
mkallcap(candidate, csconv);
phonet(candidate, target, n, *ph);
}
-
- while ((hp = pHMgr->walk_hashtable(col, hp))) {
+
+ for (i = 0; i < md; i++) {
+ while ((hp = (pHMgr[i])->walk_hashtable(col, hp))) {
if ((hp->astr) && (pAMgr) &&
(TESTAFF(hp->astr, pAMgr->get_forbiddenword(), hp->alen) ||
TESTAFF(hp->astr, ONLYUPCASEFLAG, hp->alen) ||
@@ -1135,7 +1134,7 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr* pHMgr)
lval = scoresphon[j];
}
}
- }
+ }}
// find minimum threshhold for a passable suggestion
// mangle original word three differnt ways
@@ -1557,7 +1556,7 @@ char * SuggestMgr::suggest_hentry_gen(hentry * rv, char * pattern)
*result = '\0';
int sfxcount = get_sfxcount(pattern);
-// if (get_sfxcount(HENTRY_DATA(rv)) > sfxcount) return NULL;
+ if (get_sfxcount(HENTRY_DATA(rv)) > sfxcount) return NULL;
if (HENTRY_DATA(rv)) {
char * aff = pAMgr->morphgen(HENTRY_WORD(rv), rv->blen, rv->astr, rv->alen,
diff --git a/src/hunspell/suggestmgr.hxx b/src/hunspell/suggestmgr.hxx
index d22884f..0e61572 100644
--- a/src/hunspell/suggestmgr.hxx
+++ b/src/hunspell/suggestmgr.hxx
@@ -51,7 +51,7 @@ public:
~SuggestMgr();
int suggest(char*** slst, const char * word, int nsug, int * onlycmpdsug);
- int ngsuggest(char ** wlst, char * word, int ns, HashMgr* pHMgr);
+ int ngsuggest(char ** wlst, char * word, int ns, HashMgr** pHMgr, int md);
int suggest_auto(char*** slst, const char * word, int nsug);
int suggest_stems(char*** slst, const char * word, int nsug);
int suggest_pos_stems(char*** slst, const char * word, int nsug);
diff --git a/src/parsers/Makefile.am b/src/parsers/Makefile.am
index ffd13f3..c8e34be 100644
--- a/src/parsers/Makefile.am
+++ b/src/parsers/Makefile.am
@@ -7,4 +7,4 @@ noinst_PROGRAMS=testparser
testparser_SOURCES=firstparser.cxx firstparser.hxx htmlparser.cxx htmlparser.hxx latexparser.cxx latexparser.hxx manparser.cxx manparser.hxx testparser.cxx textparser.cxx textparser.hxx
# need mystrdup()
-LDADD = ../hunspell/libhunspell.la
+LDADD = ../hunspell/libhunspell-1.2.la
diff --git a/src/parsers/Makefile.in b/src/parsers/Makefile.in
index 888958b..08c1d03 100644
--- a/src/parsers/Makefile.in
+++ b/src/parsers/Makefile.in
@@ -74,7 +74,7 @@ am_testparser_OBJECTS = firstparser.$(OBJEXT) htmlparser.$(OBJEXT) \
textparser.$(OBJEXT)
testparser_OBJECTS = $(am_testparser_OBJECTS)
testparser_LDADD = $(LDADD)
-testparser_DEPENDENCIES = ../hunspell/libhunspell.la
+testparser_DEPENDENCIES = ../hunspell/libhunspell-1.2.la
DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)
depcomp = $(SHELL) $(top_srcdir)/depcomp
am__depfiles_maybe = depfiles
@@ -236,7 +236,7 @@ libparsers_a_SOURCES = firstparser.cxx htmlparser.cxx \
testparser_SOURCES = firstparser.cxx firstparser.hxx htmlparser.cxx htmlparser.hxx latexparser.cxx latexparser.hxx manparser.cxx manparser.hxx testparser.cxx textparser.cxx textparser.hxx
# need mystrdup()
-LDADD = ../hunspell/libhunspell.la
+LDADD = ../hunspell/libhunspell-1.2.la
all: all-am
.SUFFIXES:
diff --git a/src/tools/Makefile.am b/src/tools/Makefile.am
index 4ce8357..3e92633 100644
--- a/src/tools/Makefile.am
+++ b/src/tools/Makefile.am
@@ -1,22 +1,26 @@
-bin_PROGRAMS=analyze chmorph example hunspell munch unmunch
+bin_PROGRAMS=analyze chmorph example hunspell munch unmunch hzip hunzip
INCLUDES=-I${top_srcdir}/src/hunspell -I${top_srcdir}/src/parsers
+hzip_SOURCES=hzip.c
+hunzip_SOURCES=hunzip.cxx
+hunzip_LDADD = ../hunspell/libhunspell-1.2.la
+
munch_SOURCES=munch.c
unmunch_SOURCES=unmunch.c
include_HEADERS=munch.h unmunch.h
example_SOURCES=example.cxx
-example_LDADD = ../hunspell/libhunspell.la
+example_LDADD = ../hunspell/libhunspell-1.2.la
hunspell_SOURCES=hunspell.cxx
-hunspell_LDADD = @LIBINTL@ ../hunspell/libhunspell.la \
+hunspell_LDADD = @LIBINTL@ ../hunspell/libhunspell-1.2.la \
../parsers/libparsers.a @CURSESLIB@ @READLINELIB@
analyze_SOURCES=analyze.cxx
-analyze_LDADD = ../hunspell/libhunspell.la
+analyze_LDADD = ../hunspell/libhunspell-1.2.la
chmorph_SOURCES=chmorph.cxx
-chmorph_LDADD = ../hunspell/libhunspell.la ../parsers/libparsers.a
+chmorph_LDADD = ../hunspell/libhunspell-1.2.la ../parsers/libparsers.a
-EXTRA_DIST=makealias
+EXTRA_DIST=makealias affixcompress
diff --git a/src/tools/Makefile.in b/src/tools/Makefile.in
index 4155684..bd5b851 100644
--- a/src/tools/Makefile.in
+++ b/src/tools/Makefile.in
@@ -39,7 +39,8 @@ build_triplet = @build@
host_triplet = @host@
target_triplet = @target@
bin_PROGRAMS = analyze$(EXEEXT) chmorph$(EXEEXT) example$(EXEEXT) \
- hunspell$(EXEEXT) munch$(EXEEXT) unmunch$(EXEEXT)
+ hunspell$(EXEEXT) munch$(EXEEXT) unmunch$(EXEEXT) \
+ hzip$(EXEEXT) hunzip$(EXEEXT)
subdir = src/tools
DIST_COMMON = $(include_HEADERS) $(srcdir)/Makefile.am \
$(srcdir)/Makefile.in
@@ -60,18 +61,24 @@ binPROGRAMS_INSTALL = $(INSTALL_PROGRAM)
PROGRAMS = $(bin_PROGRAMS)
am_analyze_OBJECTS = analyze.$(OBJEXT)
analyze_OBJECTS = $(am_analyze_OBJECTS)
-analyze_DEPENDENCIES = ../hunspell/libhunspell.la
+analyze_DEPENDENCIES = ../hunspell/libhunspell-1.2.la
am_chmorph_OBJECTS = chmorph.$(OBJEXT)
chmorph_OBJECTS = $(am_chmorph_OBJECTS)
-chmorph_DEPENDENCIES = ../hunspell/libhunspell.la \
+chmorph_DEPENDENCIES = ../hunspell/libhunspell-1.2.la \
../parsers/libparsers.a
am_example_OBJECTS = example.$(OBJEXT)
example_OBJECTS = $(am_example_OBJECTS)
-example_DEPENDENCIES = ../hunspell/libhunspell.la
+example_DEPENDENCIES = ../hunspell/libhunspell-1.2.la
am_hunspell_OBJECTS = hunspell.$(OBJEXT)
hunspell_OBJECTS = $(am_hunspell_OBJECTS)
-hunspell_DEPENDENCIES = ../hunspell/libhunspell.la \
+hunspell_DEPENDENCIES = ../hunspell/libhunspell-1.2.la \
../parsers/libparsers.a
+am_hunzip_OBJECTS = hunzip.$(OBJEXT)
+hunzip_OBJECTS = $(am_hunzip_OBJECTS)
+hunzip_DEPENDENCIES = ../hunspell/libhunspell-1.2.la
+am_hzip_OBJECTS = hzip.$(OBJEXT)
+hzip_OBJECTS = $(am_hzip_OBJECTS)
+hzip_LDADD = $(LDADD)
am_munch_OBJECTS = munch.$(OBJEXT)
munch_OBJECTS = $(am_munch_OBJECTS)
munch_LDADD = $(LDADD)
@@ -98,10 +105,11 @@ CXXLD = $(CXX)
CXXLINK = $(LIBTOOL) --tag=CXX --mode=link $(CXXLD) $(AM_CXXFLAGS) \
$(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
SOURCES = $(analyze_SOURCES) $(chmorph_SOURCES) $(example_SOURCES) \
- $(hunspell_SOURCES) $(munch_SOURCES) $(unmunch_SOURCES)
+ $(hunspell_SOURCES) $(hunzip_SOURCES) $(hzip_SOURCES) \
+ $(munch_SOURCES) $(unmunch_SOURCES)
DIST_SOURCES = $(analyze_SOURCES) $(chmorph_SOURCES) \
- $(example_SOURCES) $(hunspell_SOURCES) $(munch_SOURCES) \
- $(unmunch_SOURCES)
+ $(example_SOURCES) $(hunspell_SOURCES) $(hunzip_SOURCES) \
+ $(hzip_SOURCES) $(munch_SOURCES) $(unmunch_SOURCES)
am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
am__vpath_adj = case $$p in \
$(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
@@ -243,20 +251,23 @@ target_cpu = @target_cpu@
target_os = @target_os@
target_vendor = @target_vendor@
INCLUDES = -I${top_srcdir}/src/hunspell -I${top_srcdir}/src/parsers
+hzip_SOURCES = hzip.c
+hunzip_SOURCES = hunzip.cxx
+hunzip_LDADD = ../hunspell/libhunspell-1.2.la
munch_SOURCES = munch.c
unmunch_SOURCES = unmunch.c
include_HEADERS = munch.h unmunch.h
example_SOURCES = example.cxx
-example_LDADD = ../hunspell/libhunspell.la
+example_LDADD = ../hunspell/libhunspell-1.2.la
hunspell_SOURCES = hunspell.cxx
-hunspell_LDADD = @LIBINTL@ ../hunspell/libhunspell.la \
+hunspell_LDADD = @LIBINTL@ ../hunspell/libhunspell-1.2.la \
../parsers/libparsers.a @CURSESLIB@ @READLINELIB@
analyze_SOURCES = analyze.cxx
-analyze_LDADD = ../hunspell/libhunspell.la
+analyze_LDADD = ../hunspell/libhunspell-1.2.la
chmorph_SOURCES = chmorph.cxx
-chmorph_LDADD = ../hunspell/libhunspell.la ../parsers/libparsers.a
-EXTRA_DIST = makealias
+chmorph_LDADD = ../hunspell/libhunspell-1.2.la ../parsers/libparsers.a
+EXTRA_DIST = makealias affixcompress
all: all-am
.SUFFIXES:
@@ -330,6 +341,12 @@ example$(EXEEXT): $(example_OBJECTS) $(example_DEPENDENCIES)
hunspell$(EXEEXT): $(hunspell_OBJECTS) $(hunspell_DEPENDENCIES)
@rm -f hunspell$(EXEEXT)
$(CXXLINK) $(hunspell_LDFLAGS) $(hunspell_OBJECTS) $(hunspell_LDADD) $(LIBS)
+hunzip$(EXEEXT): $(hunzip_OBJECTS) $(hunzip_DEPENDENCIES)
+ @rm -f hunzip$(EXEEXT)
+ $(CXXLINK) $(hunzip_LDFLAGS) $(hunzip_OBJECTS) $(hunzip_LDADD) $(LIBS)
+hzip$(EXEEXT): $(hzip_OBJECTS) $(hzip_DEPENDENCIES)
+ @rm -f hzip$(EXEEXT)
+ $(LINK) $(hzip_LDFLAGS) $(hzip_OBJECTS) $(hzip_LDADD) $(LIBS)
munch$(EXEEXT): $(munch_OBJECTS) $(munch_DEPENDENCIES)
@rm -f munch$(EXEEXT)
$(LINK) $(munch_LDFLAGS) $(munch_OBJECTS) $(munch_LDADD) $(LIBS)
@@ -347,6 +364,8 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chmorph.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/example.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hunspell.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hunzip.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hzip.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/munch.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unmunch.Po@am__quote@
diff --git a/src/tools/affixcompress b/src/tools/affixcompress
new file mode 100755
index 0000000..a2b9508
--- /dev/null
+++ b/src/tools/affixcompress
@@ -0,0 +1,183 @@
+#!/bin/sh
+# affix compressor utility for Hunspell
+# 2007 (c) László Németh, version 0.2
+# usage: affixcompress sorted_word_list_file
+case $# in
+0) echo \
+"affixcompress - compress a huge sorted word list to Hunspell aff and dic file
+Usage: affixcompress sorted_word_list_file
+Note: output may need manually added affix parameters (SET character_encoding,
+TRY suggestion_characters etc., see man(4) hunspell)"
+ exit 0;;
+esac
+
+# profiling
+#AWK="pgawk --profile"
+AWK="gawk"
+
+export LC_ALL=C
+rm -f $1.aff $1.dic
+cat $1 | $AWK '
+{
+ # calculate frequent suffixes
+ A[$1] = 1
+ len = length($1)
+ if (len > 2) {
+# print $1, substr($1, 1, len - 1), substr($1, len, 1) >"/dev/stderr"
+ B[substr($1, 1, len - 1)] = substr($1, len, 1);
+ }
+ for(i = 2; i < len; i++) {
+ r = substr($1, 1, i)
+ if (i == 2) {
+ if (prev != r) {
+ delete A
+ delete B
+ print "Deleted roots: ", prev > "/dev/stderr"
+ }
+ prev = r
+ }
+ if (A[r]) {
+# print $1 ": " r " és "substr($1, i + 1, len - i + 1) >"/dev/stderr"
+ sfx[substr($1, i + 1, len - i + 1)]++
+ } else if (B[r] && B[r] != substr($1, i + 1, 1)) {
+ r2 = substr($1, i + 1, len - i + 1)
+ sfy[r2,B[r]]++
+ }
+ }
+}
+END {
+ for (i in sfx) print i, 0, sfx[i]
+ for (i in sfy) print i, sfy[i]
+}
+' | tr '\034' ' ' >affixcompress0.tmp
+sort -rnk 3 affixcompress0.tmp | $AWK '$3 > 20{print $0}' | head -20000 >affixcompress1.tmp
+cat affixcompress1.tmp |
+$AWK '
+function potential_roots() {
+ # potential roots with most frequent suffixes
+ for(word in W) if (W[word]==1) {
+ print word >"word"
+ len = length(word);
+ for(i = 2; i < len; i++) {
+ root = substr(word, 1, i)
+ suff = substr(word, i + 1, len - i + 1)
+ if ((W[root]!="") && (sfxfr[suff] > 100)) C[root]++
+ if (sfz[suff]) {
+ l = split(sfz[suff], a)
+ for (k=1; k <= l; k++) if ((W[root a[k]]!="") && (sfyfr[root a[k]] > 100)) {
+ C[root a[k]]++
+ }
+ }
+ }
+ }
+
+ # calculate roots
+ for(word in W) if (W[word]==1) {
+ print word >"word2"
+ len = length(word);
+ z = 0
+ # choose most frequent root (maybe the original word)
+ max = C[word]
+ maxword = word
+ maxsuff = 0
+ for(i = 2; i < len; i++) {
+ root = substr(word, 1, i)
+ suff = substr(word, i + 1, len - i + 1)
+ if ((sfx[suff] != "") && (C[root] > max)) {
+ max = C[root]
+ maxword = root
+ maxsuff = sfx[suff]
+ }
+ if (sfz[suff] != "") {
+ l = split(sfz[suff], a)
+ for (k=1; k <= l; k++) if (C[root a[k]] > max) {
+ max = C[root a[k]]
+ maxword = root a[k]
+ maxsuff = sfy[suff,a[k]]
+ }
+ }
+ }
+ if (max > 0) {
+ if (maxsuff > 0) print maxword, maxsuff; else print maxword
+ A[maxword]++
+ z=1
+ } else {
+ for(i = 2; i < len; i++) {
+ root = substr(word, 1, i)
+ suff = substr(word, i + 1, len - i + 1)
+ if ((A[root] > 0) && sfx[suff]!="") {
+ print root, sfx[suff]
+ z = 1
+ break
+ }
+ if (sfz[suff]) {
+ l = split(sfz[suff], a)
+ for (k=1; k <= l; k++) if (A[root a[k]]!="") {
+ print root a[k], sfy[suff,a[k]]
+ z = 1
+ break
+ }
+ }
+ }
+ }
+ if (z == 0) {
+ print word
+ A[word]++
+ }
+ }
+ delete A
+ delete C
+}
+FILENAME == "-" {
+ if ($2 == 0) {
+ sfx[$1] = NR
+ sfxfr[$1] = $3
+ } else {
+ sfy[$1,$2] = NR
+ sfyfr[$1,$2] = $3
+ sfz[$1] = sfz[$1] " " $2
+ }
+ maxsuf = NR
+ next
+}
+{
+ cap = substr($1, 1, 3)
+ if (cap != prev) {
+ potential_roots()
+ delete W
+ print "Deleted class:", prev > "/dev/stderr"
+ }
+ prev = cap
+ W[$1] = 1
+}
+END {
+ potential_roots()
+ # write out frequent suffixes
+ out=FILENAME ".aff"
+ print "FLAG num" >out
+ for (i in sfx) if (sfx[i] > 0) {
+ print "SFX", sfx[i], "Y 1" >out
+ print "SFX", sfx[i], "0", i, "." >out
+ }
+ for (i in sfy) if (sfy[i] > 0) {
+ print "SFX", sfy[i], "Y 1" >out
+ split(i, c, "\034");
+ print "SFX", sfy[i], c[2], c[1], c[2] >out
+ }
+}
+' - $1 >affixcompress2.tmp
+sort -nk 2 affixcompress2.tmp >affixcompress3.tmp
+cat affixcompress3.tmp | $AWK -v out="$1.dic" '
+{
+ if (A[$1]=="") A[$1]=$2;
+ else if ($2!="") A[$1] = A[$1] "," $2
+}
+END {
+ for (i in A) n++
+ print n >out
+ for (i in A) {
+ if (A[i]=="") print i
+ else print i "/" A[i]
+ }
+}
+' | sort >>$1.dic
diff --git a/src/tools/chmorph.cxx b/src/tools/chmorph.cxx
index c6c246a..c2f372e 100644
--- a/src/tools/chmorph.cxx
+++ b/src/tools/chmorph.cxx
@@ -37,10 +37,11 @@ main(int argc, char** argv)
}
Hunspell *pMS = new Hunspell(argv[1], argv[2]);
- TextParser * p = new TextParser("qwertzuiopasdfghjklyxcvbnm���������QWERTZUIOPASDFGHJKLYXCVBNM���������");
+ TextParser * p = new TextParser("|${}:/_+qwertzuiopasdfghjklyxcvbnm���������QWERTZUIOPASDFGHJKLYXCVBNM���������");
char buf[MAXLNLEN];
char * next;
+ int num = 0;
while(fgets(buf,MAXLNLEN,f)) {
p->put_line(buf);
@@ -60,18 +61,59 @@ main(int argc, char** argv)
free(pl[i]);
pl[i] = r;
gen = 1;
+ } else {
+// free(pl[i]);
+// pl[i] = NULL;
}
}
if (gen) {
char **pl2;
+// pln = uniqlist(pl, pln);
int pl2n = pMS->generate(&pl2, next, pl, pln);
if (pl2n) {
- p->change_token(pl2[0]);
+// pl2n = uniqlist(pl2, pl2n);
+ char x[MAXLNLEN];
+ char * x2 = pl2[0];
+ num++;
+ if (pl2n>1) {
+ strcpy(x, "$");
+ for (int j = 0; (j < pl2n) && (j < 5); j++) {
+ strcat(x, pl2[j]);
+ if (j < pln && pl[j]) {
+ strcat(x, "{");
+ char * p2 = x + strlen(x);
+ for (char * p3 = pl[j]; *p3; p3++, p2++) {
+ if (*p3 == ' ' || *p3 == '\t') *p2 = '+';
+ else *p2 = *p3;
+ }
+ strcpy(p2, "}");
+ }
+
+ strcat(x, "|");
+ }
+ x[strlen(x) - 1] = '$';
+ x2 = x;
+ }
+// p->change_token(pl2[0]);
+ p->change_token(x2);
freelist(&pl2, pl2n);
// jump over the (possibly un)modified word
free(next);
next=p->next_token();
}
+ } else {
+ char x[MAXLNLEN];
+ strcpy(x, next);
+ strcat(x, "{");
+ char * p2 = x + strlen(x);
+ for (char * p3 = pl[0]; *p3; p3++, p2++) {
+ if (*p3 == ' ' || *p3 == '\t') *p2 = '+';
+ else *p2 = *p3;
+ }
+ strcpy(p2, "}");
+ p->change_token(x);
+ free(next);
+ next=p->next_token();
}
freelist(&pl, pln);
}
diff --git a/src/tools/example.cxx b/src/tools/example.cxx
index 0f53927..029c4e9 100644
--- a/src/tools/example.cxx
+++ b/src/tools/example.cxx
@@ -12,52 +12,33 @@ int
main(int argc, char** argv)
{
- char * af;
- char * df;
- char * wtc;
FILE* wtclst;
- /* first parse the command line options */
- /* arg1 - affix file, arg2 dictionary file, arg3 - file of words to check */
-
- if (argv[1]) {
- af = mystrdup(argv[1]);
- } else {
- fprintf(stderr,"correct syntax is:\n");
- fprintf(stderr,"example affix_file dictionary_file file_of_words_to_check\n");
- exit(1);
- }
- if (argv[2]) {
- df = mystrdup(argv[2]);
- } else {
- fprintf(stderr,"correct syntax is:\n");
- fprintf(stderr,"example affix_file dictionary_file file_of_words_to_check\n");
- exit(1);
- }
- if (argv[3]) {
- wtc = mystrdup(argv[3]);
- } else {
- fprintf(stderr,"correct syntax is:\n");
- fprintf(stderr,"example affix_file dictionary_file file_of_words_to_check\n");
- exit(1);
- }
+ /* first parse the command line options */
+ if (argc < 4) {
+ fprintf(stderr,"example (multiple dictionary version.:\n");
+ fprintf(stderr,"example affix_file dictionary_file(s) file_of_words_to_check\n");
+ exit(1);
+ }
- /* open the words to check list */
- wtclst = fopen(wtc,"r");
- if (!wtclst) {
- fprintf(stderr,"Error - could not open file of words to check\n");
- exit(1);
- }
-
+ /* open the words to check list */
+ wtclst = fopen(argv[argc - 1],"r");
+ if (!wtclst) {
+ fprintf(stderr,"Error - could not open file of words to check\n");
+ exit(1);
+ }
- Hunspell * pMS= new Hunspell(af,df);
-
int k;
int dp;
char buf[101];
- while(fgets(buf,100,wtclst)) {
+ Hunspell * pMS= new Hunspell(argv[1], argv[2]);
+
+ // load extra dictionaries
+ if (argc > 4) for (k = 3; k < argc - 1; k++) pMS->add_dic(argv[k]);
+
+ while(fgets(buf, 100, wtclst)) {
k = strlen(buf);
*(buf + k - 1) = '\0';
dp = pMS->spell(buf);
@@ -80,10 +61,6 @@ main(int argc, char** argv)
delete pMS;
fclose(wtclst);
- free(wtc);
- free(df);
- free(af);
-
return 0;
}
diff --git a/src/tools/hunspell.cxx b/src/tools/hunspell.cxx
index 80d5135..96d17fd 100644
--- a/src/tools/hunspell.cxx
+++ b/src/tools/hunspell.cxx
@@ -169,9 +169,13 @@ char * chenc(char * st, const char * enc1, const char * enc2) {
char * source = st;
char * dest = text_conv;
iconv_t conv = iconv_open(enc2, enc1);
- size_t res = iconv(conv, (ICONV_CONST char **) &source, &c1, &dest, &c2);
- iconv_close(conv);
- if (res != (size_t) -1) out = text_conv;
+ if (conv == (iconv_t) -1) {
+ fprintf(stderr, gettext("error - iconv_open: %s -> %s"), enc2, enc1);
+ } else {
+ size_t res = iconv(conv, (ICONV_CONST char **) &source, &c1, &dest, &c2);
+ iconv_close(conv);
+ if (res != (size_t) -1) out = text_conv;
+ }
}
#endif
return out;
@@ -208,12 +212,18 @@ TextParser * get_parser(int format, char * extension, Hunspell * pMS) {
size_t c2 = MAXLNLEN;
char * dest = text_conv;
iconv_t conv = iconv_open("UTF-8", dic_enc);
- iconv(conv, (ICONV_CONST char **) &wchars, &c1, &dest, &c2);
- iconv_close(conv);
- wordchars_utf16 = (unsigned short *) malloc(sizeof(unsigned short) * wlen);
- int n = u8_u16((w_char *) wordchars_utf16, wlen, text_conv);
- if (n > 0) flag_qsort(wordchars_utf16, 0, n);
- wordchars_utf16_len = n;
+ if (conv == (iconv_t) -1) {
+ fprintf(stderr, gettext("error - iconv_open: UTF-8 -> %s"), dic_enc);
+ wordchars_utf16 = NULL;
+ wordchars_utf16_len = 0;
+ } else {
+ iconv(conv, (ICONV_CONST char **) &wchars, &c1, &dest, &c2);
+ iconv_close(conv);
+ wordchars_utf16 = (unsigned short *) malloc(sizeof(unsigned short) * wlen);
+ int n = u8_u16((w_char *) wordchars_utf16, wlen, text_conv);
+ if (n > 0) flag_qsort(wordchars_utf16, 0, n);
+ wordchars_utf16_len = n;
+ }
}
} else {
// 8-bit input encoding
@@ -223,30 +233,34 @@ TextParser * get_parser(int format, char * extension, Hunspell * pMS) {
char ch[2];
char u8[10];
iconv_t conv = iconv_open("UTF-8", io_enc);
- for (int i = 32; i < 256; i++) {
- size_t c1 = 1;
- size_t c2 = 10;
- char * dest = u8;
- u8[0] = '\0';
- char * ch8bit = ch;
- ch[0] = (char) i;
- ch[1] = '\0';
- size_t res = iconv(conv, (ICONV_CONST char **) &ch8bit, &c1, &dest, &c2);
- if (res != (size_t) -1) {
- unsigned short idx;
- w_char w;
- w.l = 0;
- w.h = 0;
- u8_u16(&w, 1, u8);
- idx = (w.h << 8) + w.l;
- if (unicodeisalpha(idx)) {
- *pletters = (char) i;
- pletters++;
- }
+ if (conv == (iconv_t) -1) {
+ fprintf(stderr, gettext("error - iconv_open: UTF-8 -> %s"), io_enc);
+ } else {
+ for (int i = 32; i < 256; i++) {
+ size_t c1 = 1;
+ size_t c2 = 10;
+ char * dest = u8;
+ u8[0] = '\0';
+ char * ch8bit = ch;
+ ch[0] = (char) i;
+ ch[1] = '\0';
+ size_t res = iconv(conv, (ICONV_CONST char **) &ch8bit, &c1, &dest, &c2);
+ if (res != (size_t) -1) {
+ unsigned short idx;
+ w_char w;
+ w.l = 0;
+ w.h = 0;
+ u8_u16(&w, 1, u8);
+ idx = (w.h << 8) + w.l;
+ if (unicodeisalpha(idx)) {
+ *pletters = (char) i;
+ pletters++;
+ }
+ }
}
+ iconv_close(conv);
}
- *pletters = '\0';
- iconv_close(conv);
+ *pletters = '\0';
// UTF-8 wordchars -> 8 bit wordchars
int len = 0;
@@ -261,10 +275,13 @@ TextParser * get_parser(int format, char * extension, Hunspell * pMS) {
size_t c1 = len + 1;
size_t c2 = len + 1;
iconv_t conv = iconv_open(io_enc, dic_enc);
- iconv(conv, (ICONV_CONST char **) &wchars, &c1, &dest, &c2);
- iconv_close(conv);
+ if (conv == (iconv_t) -1) {
+ fprintf(stderr, gettext("error - iconv_open: %s -> %s"), io_enc, dic_enc);
+ } else {
+ iconv(conv, (ICONV_CONST char **) &wchars, &c1, &dest, &c2);
+ iconv_close(conv);
+ }
}
-
if (*letters) wordchars = mystrdup(letters);
}
#else
@@ -758,6 +775,7 @@ void dialogscreen(TextParser * parser, char * token,
int beginrow = rowindex - pos_tab(parser->get_prevline(0), parser->get_tokenpos()) / x;
if (beginrow >= MAXPREVLINE) beginrow = MAXPREVLINE - 1;
+/*
for (int i = 0; i < MAXPREVLINE; i++) {
char * line = (char *) calloc(1, x + 1);
strncpy(line, lines[prevline] + x * rowindex, x);
@@ -769,24 +787,35 @@ void dialogscreen(TextParser * parser, char * token,
}
free(line);
}
-
+*/
char * line = (char *) calloc(1, x + 1);
+ int poslen;
strncpy(line, lines[0] + x * rowindex, parser->get_tokenpos() % x);
- mvprintw(MAXPREVLINE + 1 - beginrow, 0, "%s", chenc(line, io_enc, ui_enc));
- free(line);
+// fprintf(stderr, "%s\n", line);
+// exit(1);
+ chenc(line, io_enc, ui_enc);
+ if (strcmp(ui_enc, "UTF-8")==0) {
+ char * p;
+ w_char dest_utf[BUFSIZ];
+ poslen = u8_u16(dest_utf, BUFSIZ, line);
+// for (p = line; *p; p++) if (*p == '\t') exit(1);
+ } else poslen = strlen(line);
+// mvprintw(MAXPREVLINE + 1 - beginrow, 0, "%s", chenc(line, io_enc, ui_enc));
attron(A_REVERSE);
- printw("%s", chenc(token, io_enc, ui_enc));
+// mvprintw(MAXPREVLINE + 1 - beginrow, 0, "%s", chenc(line, io_enc, ui_enc));
+ mvprintw(MAXPREVLINE + 1 - beginrow, poslen, "%s", chenc(token, io_enc, ui_enc));
attroff(A_REVERSE);
+ free(line);
- mvprintw(MAXPREVLINE + 2, 0, "\n");
- for (int i = 0; i < ns; i++) {
+// mvprintw(MAXPREVLINE + 2, 0, "\n");
+/* for (int i = 0; i < ns; i++) {
if ((ns > 10) && (i < 10)) {
printw(" 0%d: %s\n", i, chenc(wlst[i], dic_enc, ui_enc));
} else {
printw(" %d: %s\n", i, chenc(wlst[i], dic_enc, ui_enc));
}
}
-
+*/
mvprintw(y-3, 0, "%s\n",
gettext("\n[SP] <number> R)epl A)ccept I)nsert L)ookup U)ncap Q)uit e(X)it or ? for help\n"));
}
@@ -1196,6 +1225,11 @@ char * exist2(char * dir, int len, char * name, char * ext) {
strcat(buf, name);
strcat(buf, ext);
if (exist(buf)) return mystrdup(buf);
+ strcat(buf, HZIP_EXTENSION);
+ if (exist(buf)) {
+ buf[strlen(buf) - strlen(HZIP_EXTENSION)] = '\0';
+ return mystrdup(buf);
+ }
return NULL;
}
@@ -1214,6 +1248,7 @@ char * search(char * begin, char * name, char * ext) {
int main(int argc, char** argv)
{
Hunspell * pMS = NULL;
+ char * key = NULL;
int arg_files = -1; // first filename argumentum position in argv
int format = FMT_TEXT;
@@ -1256,9 +1291,13 @@ int main(int argc, char** argv)
} else if (argstate == 3) {
io_enc = argv[i];
argstate = 0;
+ } else if (argstate == 4) {
+ key = argv[i];
+ argstate = 0;
} else if (strcmp(argv[i],"-d")==0) argstate=1;
else if (strcmp(argv[i],"-p")==0) argstate=2;
else if (strcmp(argv[i],"-i")==0) argstate=3;
+ else if (strcmp(argv[i],"-P")==0) argstate=4;
else if ((strcmp(argv[i],"-h") == 0) || (strcmp(argv[i],"--help") == 0)) {
fprintf(stderr,gettext("Usage: hunspell [OPTION]... [FILE]...\n"));
fprintf(stderr,gettext("Check spelling of each FILE. Without FILE, check standard input.\n"));
@@ -1276,6 +1315,7 @@ int main(int argc, char** argv)
fprintf(stderr,gettext(" -L\t\tprint lines with mispelled words\n"));
fprintf(stderr,gettext(" -n\t\tnroff/troff input file format\n"));
fprintf(stderr,gettext(" -p dict\tset dict custom dictionary\n"));
+ fprintf(stderr,gettext(" -P password\tset password for encrypted dictionaries\n"));
fprintf(stderr,gettext(" -t\t\tTeX/LaTeX input file format\n"));
// experimental functions: missing Unicode support
// fprintf(stderr,gettext(" -u\t\tshow typical misspellings\n"));
@@ -1287,7 +1327,7 @@ int main(int argc, char** argv)
fprintf(stderr,"\n");
fprintf(stderr,gettext("Example: hunspell -d en_US file.txt # interactive spelling\n"));
fprintf(stderr,gettext(" hunspell -l file.txt # print misspelled words\n"));
- fprintf(stderr,gettext(" hunspell -i utf8 file.txt # check UTF-8 encoded file\n"));
+ fprintf(stderr,gettext(" hunspell -i utf-8 file.txt # check UTF-8 encoded file\n"));
fprintf(stderr,"\n");
fprintf(stderr,gettext("Bug reports: http://hunspell.sourceforge.net\n"));
exit(0);
@@ -1296,7 +1336,7 @@ int main(int argc, char** argv)
fprintf(stdout,"\n");
if (strcmp(argv[i],"-vv")!=0) {
fprintf(stdout,"\n");
- fprintf(stdout,gettext("Copyright (C) 2002-2007 Nemeth Laszlo. License: GNU LGPL.\n"));
+ fprintf(stdout,gettext("Copyright (C) 2002-2008 L\303\241szl\303\263 N\303\251meth. License: MPL/GPL/LGPL.\n"));
fprintf(stdout,"\n");
fprintf(stdout,gettext("Based on OpenOffice.org's Myspell library.\n"));
fprintf(stdout,gettext("Myspell's copyright (C) Kevin Hendricks, 2001-2002, License: BSD.\n"));
@@ -1376,7 +1416,7 @@ int main(int argc, char** argv)
char * dic = search(path, dicname, ".dic");
if (aff && dic) {
if (showpath) fprintf(stderr, "%s\n%s\n", aff, dic);
- pMS = new Hunspell(aff, dic);
+ pMS = new Hunspell(aff, dic, key);
} else {
fprintf(stderr,gettext("Can't open affix or dictionary files.\n"));
exit(1);
diff --git a/src/tools/hunzip.cxx b/src/tools/hunzip.cxx
new file mode 100644
index 0000000..5d1581d
--- /dev/null
+++ b/src/tools/hunzip.cxx
@@ -0,0 +1,22 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "hunzip.hxx"
+
+#define DESC "hunzip - decompress a hzip file to the standard output\n" \
+"Usage: hunzip file.hz [password]\n"
+
+int fail(const char * err, const char * par) {
+ fprintf(stderr, err, par);
+ return 1;
+}
+
+int main(int argc, char** argv) {
+ Hunzip * h;
+ const char * s;
+ if (argc == 1 || strcmp(argv[1], "-h") == 0) return fail(DESC, NULL);
+ h = new Hunzip(argv[1], (argc > 2) ? argv[2] : NULL);
+ while (h && (s = h->getline())) printf("%s", s);
+ return 0;
+}
diff --git a/src/tools/hzip.c b/src/tools/hzip.c
new file mode 100644
index 0000000..7c63297
--- /dev/null
+++ b/src/tools/hzip.c
@@ -0,0 +1,281 @@
+/* hzip: file compression for sorted dictionaries with optional encryption,
+ * algorithm: prefix-suffix encoding and 16-bit Huffman encoding */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define CODELEN 65536
+#define BUFSIZE 65536
+#define EXTENSION ".hz"
+
+#define ESCAPE 31
+#define MAGIC "hz0"
+#define MAGIC_ENCRYPTED "hz1"
+
+#define DESC "hzip - dictionary compression utility\n" \
+"Usage: hzip [-h | -P password ] [file1 file2 ..]\n" \
+" -P password encrypted compression\n" \
+" -h display this help and exit\n"
+
+enum { code_LEAF, code_TERM, code_NODE};
+
+struct item {
+ unsigned short word;
+ int count;
+ char type;
+ struct item * left;
+ struct item * right;
+};
+
+int fail(const char * err, const char * par) {
+ fprintf(stderr, err, par);
+ return 1;
+}
+
+void code2table(struct item * tree, char **table, char * code, int deep) {
+ int first = 0;
+ if (!code) {
+ first = 1;
+ code = malloc(sizeof(char) * CODELEN);
+ }
+ code[deep] = '1';
+ if (tree->left) code2table(tree->left, table, code, deep + 1);
+ if (tree->type != code_NODE) {
+ int i = tree->word;
+ code[deep] = '\0';
+ if (tree->type == code_TERM) i = CODELEN; // terminal code
+ table[i] = malloc((deep + 1) * sizeof(char));
+ strcpy(table[i], code);
+ }
+ code[deep] = '0';
+ if (tree->right) code2table(tree->right, table, code, deep + 1);
+ if (first) free(code);
+}
+
+struct item * newitem(int c, struct item * l, struct item * r, int t) {
+ struct item * ni = (struct item *) malloc(sizeof(struct item));
+ ni->type = t;
+ ni->word = 0;
+ ni->count = c;
+ ni->left = l;
+ ni->right = r;
+ return ni;
+}
+
+// return length of the freq array
+int get_freqdata(struct item *** dest, FILE * f, unsigned short * termword) {
+ int freq[CODELEN];
+ int i, j, k, n;
+ char c[2];
+ for (i = 0; i < CODELEN; i++) freq[i] = 0;
+ while((j = getc(f)) != -1 && (k = getc(f)) != -1) {
+ c[0] = j;
+ c[1] = k;
+ freq[*((unsigned short *) c)]++;
+ }
+ if (j != -1) {
+ c[0] = 1;
+ c[1] = j;
+ } else {
+ c[0] = 0;
+ c[1] = 0;
+ }
+ *dest = (struct item **) malloc((CODELEN + 1) * sizeof(struct item *));
+ if (!*dest) return -1;
+ for (i = 0, n = 0; i < CODELEN; i++) if (freq[i]) {
+ (*dest)[n] = newitem(freq[i], NULL, NULL, code_LEAF);
+ (*dest)[n]->word = i;
+ n++;
+ }
+ // terminal sequence (also contains the last odd byte of the file)
+ (*dest)[n] = newitem(1, NULL, NULL, code_TERM);
+ *termword = *((unsigned short *) c);
+ return n + 1;
+}
+
+void get_codetable(struct item **l, int n, char ** table) {
+ int i;
+ while (n > 1) {
+ int min = 0;
+ int mi2 = 1;
+ for (i = 1; i < n; i++) {
+ if (l[i]->count < l[min]->count) {
+ mi2 = min;
+ min = i;
+ } else if (l[i]->count < l[mi2]->count) mi2 = i;
+ }
+ l[min] = newitem(l[min]->count + l[mi2]->count, l[min], l[mi2], code_NODE);
+ for (i = mi2 + 1; i < n; i++) l[i - 1] = l[i];
+ n--;
+ }
+ code2table(l[0], table, NULL, 0);
+}
+
+void write_bits(FILE *f, char * bitbuf, int *bits, char * code) {
+ while (*code) {
+ int b = (*bits) % 8;
+ if (!b) bitbuf[(*bits) / 8] = ((*code) - '0') << 7;
+ else bitbuf[(*bits) / 8] |= (((*code) - '0') << (7 - b));
+ (*bits)++;
+ code++;
+ if (*bits == BUFSIZE * 8) {
+ fwrite(bitbuf, sizeof(char), BUFSIZE, f);
+ *bits = 0;
+ }
+ }
+}
+
+void encode_file(char ** table, int n, FILE *f, FILE *f2, unsigned short tw, char * key) {
+ char bitbuf[BUFSIZE];
+ int i, bits = 0;
+ unsigned char cl, ch;
+ int cx[2];
+ char c[2];
+ char * enc = key;
+
+ // header and codes
+ fprintf(f2, "%s", (key ? MAGIC_ENCRYPTED : MAGIC)); // 3-byte HEADER
+ cl = (unsigned char) (n & 0x00ff);
+ ch = (unsigned char) (n >> 8);
+ if (key) {
+ unsigned char cs;
+ for (cs = 0; *enc; enc++) cs ^= *enc;
+ fprintf(f2, "%c", cs); // 1-byte check sum
+ enc = key;
+ ch ^= *enc;
+ if ((*(++enc)) == '\0') enc = key;
+ cl ^= *enc;
+ }
+ fprintf(f2, "%c%c", ch, cl); // upper and lower byte of record count
+ for (i = 0; i < BUFSIZE; i++) bitbuf[i] = '\0';
+ for (i = 0; i < CODELEN + 1; i++) if (table[i]) {
+ unsigned short * d = (unsigned short *) &c;
+ *d = (unsigned short) i;
+ if (i == CODELEN) *d = tw;
+ if (key) {
+ if (*(++enc) == '\0') enc = key;
+ c[0] ^= *enc;
+ if (*(++enc) == '\0') enc = key;
+ c[1] ^= *enc;
+ }
+ fprintf(f2, "%c%c", c[0], c[1]); // 2-character code id
+ bits = 0;
+ write_bits(f2, bitbuf, &bits, table[i]);
+ if (key) {
+ if (*(++enc) == '\0') enc = key;
+ fprintf(f2, "%c", ((unsigned char) bits) ^ *enc);
+ for (cl = 0; cl <= bits/8; cl++) {
+ if (*(++enc) == '\0') enc = key;
+ bitbuf[cl] ^= *enc;
+ }
+ } else fprintf(f2, "%c", (unsigned char) bits); // 1-byte code length
+ fwrite(bitbuf, sizeof(char), bits/8 + 1, f2); // x-byte code
+ }
+
+ // file encoding
+ bits = 0;
+ while((cx[0] = getc(f)) != -1 && (cx[1] = getc(f)) != -1) {
+ c[0] = cx[0];
+ c[1] = cx[1];
+ write_bits(f2, bitbuf, &bits, table[*((unsigned short *) c)]);
+ }
+ // terminal suffixes
+ write_bits(f2, bitbuf, &bits, table[CODELEN]);
+ if (bits > 0) fwrite(bitbuf, sizeof(char), bits/8 + 1, f2);
+}
+
+void prefixcompress(FILE *f, FILE *tempfile) {
+ char buf[BUFSIZE];
+ char buf2[BUFSIZE * 2];
+ char prev[BUFSIZE];
+ int prevlen = 0;
+ while(fgets(buf,BUFSIZE,f)) {
+ int i, j, k, m, c;
+ int pfx = prevlen;
+ char * p = buf2;
+ m = j = 0;
+ for (i = 0; buf[i]; i++) {
+ if ((pfx > 0) && (buf[i] == prev[i])) {
+ j++;
+ } else pfx = 0;
+ }
+ if (i > 0 && buf[i - 1] == '\n') {
+ if (j == i) j--; // line duplicate
+ if (j > 29) j = 29;
+ c = j;
+ if (c == '\t') c = 30;
+ // common suffix
+ for (; buf[i - m - 2] == prev[prevlen - m - 2] &&
+ m < i - j - 1 && m < 15; m++);
+ if (m == 1) m = 0;
+ } else {
+ j = 0;
+ m = -1;
+ }
+ for (k = j; k < i - m - 1; k++, p++) {
+ if (((unsigned char) buf[k]) < 47 && buf[k] != '\t' && buf[k] != ' ') {
+ *p = ESCAPE;
+ p++;
+ }
+ *p = buf[k];
+ }
+ if (m > 0) {
+ *p = m + 31; // 33-46
+ p++;
+ }
+ if (i > 0 && buf[i - 1] == '\n') {
+ *p = c;
+ fwrite(buf2, 1, p - buf2 + 1, tempfile);
+ } else fwrite(buf2, 1, p - buf2, tempfile);
+ memcpy(prev, buf, i);
+ prevlen = i;
+ }
+}
+
+int hzip(const char * filename, char * key) {
+ struct item ** list;
+ char * table[CODELEN + 1];
+ int n;
+ char out[BUFSIZE];
+ FILE *f, *f2, *tempfile;
+ unsigned short termword;
+ strcpy(out, filename);
+ strcat(out, EXTENSION);
+ f = fopen(filename, "r");
+ if (!f) return fail("hzip: %s: Permission denied\n", filename);
+ tempfile = tmpfile();
+ if (!tempfile) return fail("hzip: cannot create temporary file\n", NULL);
+ f2 = fopen(out, "w");
+ if (!f2) return fail("hzip: %s: Permission denied\n", out);
+ for (n = 0; n < CODELEN; n++) table[n] = NULL;
+ prefixcompress(f, tempfile);
+ rewind(tempfile);
+ n = get_freqdata(&list, tempfile, &termword);
+ get_codetable(list, n, table);
+ rewind(tempfile);
+ encode_file(table, n, tempfile, f2, termword, key);
+ fclose(f2);
+ return 0;
+}
+
+int main(int argc, char** argv) {
+ int i, j = 0;
+ char * key = NULL;
+ for (i = 1; i < argc; i++) {
+ if (*(argv[i]) == '-') {
+ if (*(argv[i] + 1) == 'h')
+ return fail(DESC, NULL);
+ if (*(argv[i] + 1) == 'P') {
+ if (i + 1 == argc)
+ return fail("hzip: missing password\n", NULL);
+ key = argv[i + 1];
+ i++;
+ continue;
+ }
+ return fail("hzip: no such option: %s\n", argv[i]);
+ } else if (hzip(argv[i], key) != 0) return 1; else j = 1;
+ }
+ if (j == 0) return fail("hzip: need a filename parameter\n", NULL);
+ return 0;
+}
diff --git a/src/win_api/hunspelldll.c b/src/win_api/hunspelldll.c
index 583d96e..797359b 100644
--- a/src/win_api/hunspelldll.c
+++ b/src/win_api/hunspelldll.c
@@ -48,6 +48,12 @@ DLLEXPORT void * hunspell_initialize(char *aff_file, char *dict_file)
return pMS;
}
+DLLEXPORT void * hunspell_initialize_key(char *aff_file, char *dict_file, char * key)
+{
+ Hunspell * pMS = new Hunspell(aff_file, dict_file, key);
+ return pMS;
+}
+
DLLEXPORT void hunspell_uninitialize(Hunspell *pMS)
{
delete pMS;
@@ -82,9 +88,9 @@ DLLEXPORT char * hunspell_get_dic_encoding(Hunspell *pMS)
return pMS->get_dic_encoding();
}
-DLLEXPORT int hunspell_put_word(Hunspell *pMS, char *word)
+DLLEXPORT int hunspell_add(Hunspell *pMS, char *word)
{
- return pMS->put_word(word);
+ return pMS->add(word);
}
diff --git a/tests/IJ.good b/tests/IJ.good
new file mode 100644
index 0000000..5f888f0
--- /dev/null
+++ b/tests/IJ.good
@@ -0,0 +1,2 @@
+ijs
+IJs
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 8e0d947..5fa971e 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -476,6 +476,7 @@ ignoreutf.test \
1463589_utf.wrong \
IJ.aff \
IJ.dic \
+IJ.good \
IJ.sug \
IJ.test \
IJ.wrong \
diff --git a/tests/Makefile.in b/tests/Makefile.in
index 836ddd5..76180ff 100644
--- a/tests/Makefile.in
+++ b/tests/Makefile.in
@@ -661,6 +661,7 @@ ignoreutf.test \
1463589_utf.wrong \
IJ.aff \
IJ.dic \
+IJ.good \
IJ.sug \
IJ.test \
IJ.wrong \
diff --git a/tests/suggestiontest/Makefile.orig b/tests/suggestiontest/Makefile.orig
index 65e24c8..a983776 100644
--- a/tests/suggestiontest/Makefile.orig
+++ b/tests/suggestiontest/Makefile.orig
@@ -1,4 +1,4 @@
-all: aspell.txt hunspell.txt
+all:
./prepare List*txt
./test List*txt
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-openoffice/hunspell.git
Reply to: