[hunspell] 10/98: Imported Upstream version 1.1.0
This is an automated email from the git hooks/post-receive script.
rene pushed a commit to branch master
in repository hunspell.
commit 574b3661f9add7069b41ea48abf8b192485c55b4
Author: Rene Engelhard <rene@debian.org>
Date: Thu Apr 21 14:45:09 2016 +0200
Imported Upstream version 1.1.0
---
ChangeLog | 77 ++
NEWS | 61 ++
README | 17 +-
THANKS | 1 +
TODO | 1 -
configure | 56 +-
configure.ac | 4 +-
man/hunspell.4 | 15 +
po/hu.gmo | Bin 7638 -> 7975 bytes
po/hu.po | 79 +-
src/hunspell/Makefile.am | 2 +-
src/hunspell/Makefile.in | 4 +-
src/hunspell/README | 3 +
src/hunspell/affentry.cxx | 55 +-
src/hunspell/affixmgr.cxx | 453 ++++----
src/hunspell/affixmgr.hxx | 12 +-
src/hunspell/atypes.hxx | 5 +-
src/hunspell/csutil.cxx | 1146 ++++++++++++--------
src/hunspell/csutil.hxx | 2 +-
src/hunspell/dictmgr.cxx | 94 +-
src/hunspell/dictmgr.hxx | 3 +
src/hunspell/hashmgr.cxx | 56 +-
src/hunspell/hashmgr.hxx | 4 +-
src/hunspell/hunspell.cxx | 165 +--
src/hunspell/langnum.hxx | 1 +
src/hunspell/suggestmgr.cxx | 233 ++--
src/hunspell/suggestmgr.hxx | 5 +-
src/tools/hunmorph.cxx | 1 +
src/tools/munch.c | 10 +-
src/tools/munch.h | 2 +-
src/tools/unmunch.c | 11 +-
src/tools/unmunch.h | 2 +-
tests/Makefile.am | 94 +-
tests/Makefile.in | 94 +-
tests/base.aff | 192 ++++
tests/base.dic | 28 +
tests/base.good | 22 +
tests/base.sug | 10 +
tests/{circumfix.test => base.test} | 2 +-
tests/base.wrong | 11 +
tests/checkcompounddup.aff | 3 +
tests/checkcompounddup.dic | 3 +
tests/checkcompounddup.good | 5 +
tests/{circumfix.test => checkcompounddup.test} | 2 +-
tests/checkcompounddup.wrong | 3 +
tests/checkcompoundpattern.aff | 5 +
tests/checkcompoundpattern.dic | 5 +
tests/checkcompoundpattern.good | 2 +
.../{circumfix.test => checkcompoundpattern.test} | 2 +-
tests/checkcompoundpattern.wrong | 4 +
tests/checkcompoundrep.aff | 8 +
tests/checkcompoundrep.dic | 4 +
tests/checkcompoundrep.good | 1 +
tests/{circumfix.test => checkcompoundrep.test} | 2 +-
tests/checkcompoundrep.wrong | 1 +
tests/checkcompoundtriple.aff | 3 +
tests/checkcompoundtriple.dic | 5 +
tests/checkcompoundtriple.good | 6 +
tests/{circumfix.test => checkcompoundtriple.test} | 2 +-
tests/checkcompoundtriple.wrong | 2 +
tests/circumfix.test | 2 +-
tests/complexprefixes.test | 2 +-
tests/complexprefixesutf.aff | 12 +
tests/complexprefixesutf.dic | 2 +
tests/complexprefixesutf.good | 3 +
tests/{circumfix.test => complexprefixesutf.test} | 2 +-
tests/complexprefixesutf.wrong | 2 +
tests/compound.test | 2 +-
tests/compoundaffix.good | 1 +
tests/compoundaffix.test | 2 +-
tests/compoundaffix.wrong | 1 -
tests/compoundaffix2.test | 2 +-
tests/compoundaffix2.wrong | 1 -
tests/compoundaffix3.good | 1 +
tests/compoundaffix3.test | 2 +-
tests/compoundaffix3.wrong | 1 -
tests/conditionalprefix.test | 2 +-
tests/flag.test | 2 +-
tests/flaglong.test | 2 +-
tests/flagnum.test | 2 +-
tests/fogemorpheme.test | 2 +-
tests/forbiddenword.test | 2 +-
tests/germancompounding.test | 2 +-
tests/germansharps.test | 2 +-
tests/germansharpsutf.test | 2 +-
tests/i35725.aff | 199 ++++
tests/i35725.dic | 15 +
tests/i35725.good | 1 +
tests/i35725.sug | 10 +
tests/{circumfix.test => i35725.test} | 2 +-
tests/i35725.wrong | 10 +
tests/i54633.aff | 1 +
tests/i54633.dic | 2 +
tests/i54633.good | 2 +
tests/i54633.sug | 2 +
tests/{circumfix.test => i54633.test} | 2 +-
tests/i54633.wrong | 2 +
tests/map.aff | 8 +
tests/map.dic | 3 +
tests/map.sug | 2 +
tests/{circumfix.test => map.test} | 2 +-
tests/map.wrong | 2 +
tests/maputf.aff | 10 +
tests/maputf.dic | 3 +
tests/maputf.sug | 2 +
tests/{circumfix.test => maputf.test} | 2 +-
tests/maputf.wrong | 2 +
tests/onlyincompound.aff | 5 +
tests/onlyincompound.dic | 3 +
tests/onlyincompound.good | 4 +
tests/{circumfix.test => onlyincompound.test} | 2 +-
tests/onlyincompound.wrong | 2 +
tests/pseudoroot.test | 2 +-
tests/pseudoroot2.test | 2 +-
tests/pseudoroot3.test | 2 +-
tests/pseudoroot4.test | 2 +-
tests/pseudoroot5.aff | 13 +
tests/pseudoroot5.dic | 2 +
tests/pseudoroot5.good | 11 +
tests/{circumfix.test => pseudoroot5.test} | 2 +-
tests/pseudoroot5.wrong | 3 +
tests/rep.aff | 9 +
tests/rep.dic | 4 +
tests/rep.sug | 3 +
tests/{circumfix.test => rep.test} | 2 +-
tests/rep.wrong | 3 +
tests/reputf.aff | 9 +
tests/reputf.dic | 2 +
tests/reputf.sug | 1 +
tests/{circumfix.test => reputf.test} | 2 +-
tests/reputf.wrong | 1 +
tests/test.sh | 108 ++
tests/test_hunmorph | 47 -
tests/utf8.test | 2 +-
tests/utfcompound.test | 2 +-
tests/zeroaffix.test | 2 +-
136 files changed, 2635 insertions(+), 1058 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 5278180..6ddae32 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,80 @@
+2005-09-19 Németh László <nemethl@gyorsposta.hu>:
+ * src/hunspell/suggestmgr.cxx: improved ngram suggestion:
+ - detect not neighboring swap characters (pernament -> permanent)
+ Rationale: ngram method has a significant error with not neighboring
+ swap characters, especially when swap is in the middle of the word.
+ - suggest uppercase forms (unesco -> UNESCO, siggraph's -> SIGGRAPH's)
+ - suggest only ngram swap character and uppercase form, if they exist.
+ Rationale: swap character and casing equivalence give mutch better
+ suggestions as any other (weighted) ngram suggestions.
+ - add uppercase suggestion (PERMENANT -> PERMANENT)
+
+ * src/hunspell/*: complete comparison with MySpell 3.2 (in OOo beta 2):
+ - affixmgr.cxx: add missing numrep initialization
+ - hashmgr.cxx: add_word(): don't allocate temporary records
+ - hunspell.cxx: in suggest():
+ - check capitalized words first (better sug. order for proper names),
+ - check pSMgr->suggest() return value
+ - set pSMgr->suggest() call to not optional in HUHCAP
+ - csutil.cxx: fix bad KOI8-U -> koi8r_tbl reference in enc_entry encds
+ - csutil.cxx: fix casing data in ISO 8859-2, Windows 1251 and KOI8-U
+ encoding tables. Bug reported by Dmitri Gabinski.
+
+ * src/hunspell/affixmgr.*: improved compound word and other features
+ - generalize hu_HU specific compound word features with new affix file
+ parameters, suggested by Bram Moolenaar:
+ - CHECKCOMPOUNDDUP: forbid word duplication in compounds (eg. foo|foo)
+ - CHECKCOMPOUNDTRIPLE: forbid triple letters in compounds (eg. foo|obar)
+ - CHECKCOMPOUNDPATTERN: forbid patterns at word bounds in compounds
+ - CHECKCOMPOUNDREP: using REP replacement table, forbid presumably bad
+ compounds (useful for languages with unlimited number of compounds)
+ - ONLYINCOMPOUND flag works also with words (see tests/onlyincompound.*)
+ Suggested by Daniel Naber, Björn Jacke, Trón Viktor & Bram Moolenaar.
+ - PSEUDOROOT works also with prefixes and prefix + suffix combinations
+ (see tests/pseudoroot5.*). Suggested by Trón Viktor.
+ - man/hunspell.4: updated man page
+
+ * src/hunspell/affixmgr.*: fix incomplete prefix handling with twofold
+ suffixes (delete unnecessary contclasses[] conditions in
+ prefix_check_twosfx() and prefix_check_twosfx_morph()).
+ Bug reported by Trón Viktor.
+
+ * src/hunspell/affixmgr.*: complete also *_morph() functions with
+ conditions of new Hunspell features (circumfix, pseudoroot etc.).
+
+ * src/hunspell/suggestmgr.cxx:
+ - fix missing suggestions for words with crossed prefix and suffix
+ - fix redundant non compound word checking
+ - fix losing suggestions problem. Bug reported by Dmitri Gabinski.
+
+ * src/hunspell/dictmgr.*:
+ - add new dictionary manager for Hunspell UNO modul
+ Problems with eo_ANY Esperanto locale reported by Dmitri Gabinski.
+
+ * src/hunspell/*: use precise constant sizes for 8-bit and 16-bit character
+ arrays with MAXWORDUTF8LEN and MAXSWUTF8L macros.
+
+ * src/hunspell/affixmgr.cxx: fix bad MAXNGRAMSUGS parameter handling
+
+ * src/hunspell/affixmgr.cxx, src/tools/{un}munch.*: fix GCC 4.0 warnings
+ on fgets(), reported by Dvornik László
+
+ * po/hu.po: improved translation by Dvornik László
+
+ * tests/test.sh: improved test environment
+ - add suggestion testing (see tests/*.sug)
+ - add memory debugging environment, based on the excellent Valgrind debugger.
+ Usage on Linux and experimental platforms of Valgrind:
+ VALGRIND=memcheck make check
+ - rename test_hunmorph to test.sh
+
+ * tests/*: new tests:
+ - base.*: base example based on MySpell's checkme.lst.
+ - map{,utf}.*, rep{,utf}: MAP and REP suggestion examples
+ - tests on new CHECKCOMPOUND, ONLYINCOMPOUND and PSEUDOROOT features
+ - i54633.*: capitalized suggestion test for Issue 54633 from OOo's Issuezilla
+ - i35725.*: improved ngram suggestion test for Issue 35725
+
2005-08-26 Németh László <nemethl@gyorsposta.hu>:
improvements:
diff --git a/NEWS b/NEWS
index 6d6c7f5..f36279c 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,64 @@
+2005-09-19: Hunspell 1.1.0 release
+
+* complete comparison with MySpell 3.2 (from OpenOffice.org 2 beta)
+
+* improved ngram suggestion with swap character detection and
+ case insensitivity
+
+------ examples for ngram improvement (input word and suggestions) -----
+
+1. pernament (instead of permanent)
+
+MySpell 3.2: tournaments, tournament, ornaments, ornament's, ornamenting, ornamented,
+ ornament, ornamentals, ornamental, ornamentally
+
+Hunspell 1.0.9: ornamental, ornament, tournament
+
+Hunspell 1.1.0: permanent
+
+Note: swap character detection
+
+
+2. PERNAMENT (instead of PERMANENT)
+
+MySpell 3.2: -
+
+Hunspell 1.0.9: -
+
+Hunspell 1.1.0: PERMANENT
+
+
+3. Unesco (instead of UNESCO)
+
+MySpell 3.2: Genesco, Ionesco, Genesco's, Ionesco's, Frescoing, Fresco's,
+ Frescoed, Fresco, Escorts, Escorting
+
+Hunspell 1.0.9: Genesco, Ionesco, Fresco
+
+Hunspell 1.1.0: UNESCO
+
+
+4. siggraph's (instead of SIGGRAPH's)
+
+MySpell 3.2: serigraph's, photograph's, serigraphs, physiography's,
+ physiography, digraphs, serigraph, stratigraphy's, stratigraphy
+ epigraphs
+
+Hunspell 1.0.9: serigraph's, epigraph's, digraph's
+
+Hunspell 1.1.0: SIGGRAPH's
+
+--------------- end of examples --------------------
+
+* improved testing environment with suggestion checking and memory debugging
+
+ memory debugging of all tests with a simple command:
+
+ VALGRIND=memcheck make check
+
+* lots of other improvements and bug fixes (see ChangeLog)
+
+
2005-08-26: Hunspell 1.0.9 release
* improved related character map suggestion
diff --git a/README b/README
index 3ed9d70..338d208 100644
--- a/README
+++ b/README
@@ -51,6 +51,21 @@ locale and gettext (but you can also use the
Hunspell distribution uses new Autoconf (2.59) and Automake (1.9).
+Testing
+-------
+
+Testing Hunspell (see tests in tests/ subdirectory):
+
+make check
+
+or with Valgrind debugger:
+
+VALGRIND=[Valgrind_tool] make check
+
+For example:
+
+VALGRIND=memcheck make check
+
Documentation
-------------
@@ -103,5 +118,5 @@ http://lingucomponent.openoffice.org/spell_dic.html
Aspell dictionaries (need some conversion):
ftp://ftp.gnu.org/gnu/aspell/dict
-László Németh
+Németh László
nemethl@gyorsposta.hu
diff --git a/THANKS b/THANKS
index 21f85ee..2ddccb9 100644
--- a/THANKS
+++ b/THANKS
@@ -1,5 +1,6 @@
Bencsáth Boldizsár
Bíró Árpád
+Dmitri Gabinski
Dvornik László
Gefferth András
Godó Ferenc
diff --git a/TODO b/TODO
index 3109034..cd3557b 100644
--- a/TODO
+++ b/TODO
@@ -1,5 +1,4 @@
* shared dictionaries for multi-user environment
-* check less conditions
* new data structure for morphological analysis
* implement morphological generation
* improve compound handling
diff --git a/configure b/configure
index 6fc3191..69f673b 100755
--- a/configure
+++ b/configure
@@ -1,8 +1,8 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.59 for hunspell 1.0.9.
+# Generated by GNU Autoconf 2.59 for hunspell 1.1.0.
#
-# Report bugs to <nemeth@mokk.bme.hu>.
+# Report bugs to <nemethl@gyorsposta.hu>.
#
# Copyright (C) 2003 Free Software Foundation, Inc.
# This configure script is free software; the Free Software Foundation
@@ -269,9 +269,9 @@ SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='hunspell'
PACKAGE_TARNAME='hunspell'
-PACKAGE_VERSION='1.0.9'
-PACKAGE_STRING='hunspell 1.0.9'
-PACKAGE_BUGREPORT='nemeth@mokk.bme.hu'
+PACKAGE_VERSION='1.1.0'
+PACKAGE_STRING='hunspell 1.1.0'
+PACKAGE_BUGREPORT='nemethl@gyorsposta.hu'
ac_unique_file="config.h.in"
# Factoring default headers for most tests.
@@ -788,7 +788,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures hunspell 1.0.9 to adapt to many kinds of systems.
+\`configure' configures hunspell 1.1.0 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -854,7 +854,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of hunspell 1.0.9:";;
+ short | recursive ) echo "Configuration of hunspell 1.1.0:";;
esac
cat <<\_ACEOF
@@ -891,7 +891,7 @@ Some influential environment variables:
Use these variables to override the choices made by `configure' or to help
it to find libraries and programs with nonstandard names/locations.
-Report bugs to <nemeth@mokk.bme.hu>.
+Report bugs to <nemethl@gyorsposta.hu>.
_ACEOF
fi
@@ -987,7 +987,7 @@ fi
test -n "$ac_init_help" && exit 0
if $ac_init_version; then
cat <<\_ACEOF
-hunspell configure 1.0.9
+hunspell configure 1.1.0
generated by GNU Autoconf 2.59
Copyright (C) 2003 Free Software Foundation, Inc.
@@ -1001,7 +1001,7 @@ cat >&5 <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by hunspell $as_me 1.0.9, which was
+It was created by hunspell $as_me 1.1.0, which was
generated by GNU Autoconf 2.59. Invocation command line was
$ $0 $@
@@ -1644,7 +1644,7 @@ fi
# Define the identity of the package.
PACKAGE=hunspell
- VERSION=1.0.9
+ VERSION=1.1.0
cat >>confdefs.h <<_ACEOF
@@ -4049,9 +4049,9 @@ echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&
echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
(
cat <<\_ASBOX
-## --------------------------------- ##
-## Report this to nemeth@mokk.bme.hu ##
-## --------------------------------- ##
+## ------------------------------------ ##
+## Report this to nemethl@gyorsposta.hu ##
+## ------------------------------------ ##
_ASBOX
) |
sed "s/^/$as_me: WARNING: /" >&2
@@ -4446,9 +4446,9 @@ echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&
echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
(
cat <<\_ASBOX
-## --------------------------------- ##
-## Report this to nemeth@mokk.bme.hu ##
-## --------------------------------- ##
+## ------------------------------------ ##
+## Report this to nemethl@gyorsposta.hu ##
+## ------------------------------------ ##
_ASBOX
) |
sed "s/^/$as_me: WARNING: /" >&2
@@ -5815,9 +5815,9 @@ echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&
echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
(
cat <<\_ASBOX
-## --------------------------------- ##
-## Report this to nemeth@mokk.bme.hu ##
-## --------------------------------- ##
+## ------------------------------------ ##
+## Report this to nemethl@gyorsposta.hu ##
+## ------------------------------------ ##
_ASBOX
) |
sed "s/^/$as_me: WARNING: /" >&2
@@ -6975,9 +6975,9 @@ echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&
echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
(
cat <<\_ASBOX
-## --------------------------------- ##
-## Report this to nemeth@mokk.bme.hu ##
-## --------------------------------- ##
+## ------------------------------------ ##
+## Report this to nemethl@gyorsposta.hu ##
+## ------------------------------------ ##
_ASBOX
) |
sed "s/^/$as_me: WARNING: /" >&2
@@ -8625,9 +8625,9 @@ echo "$as_me: WARNING: readline/readline.h: proceeding with the preprocessor's r
echo "$as_me: WARNING: readline/readline.h: in the future, the compiler will take precedence" >&2;}
(
cat <<\_ASBOX
-## --------------------------------- ##
-## Report this to nemeth@mokk.bme.hu ##
-## --------------------------------- ##
+## ------------------------------------ ##
+## Report this to nemethl@gyorsposta.hu ##
+## ------------------------------------ ##
_ASBOX
) |
sed "s/^/$as_me: WARNING: /" >&2
@@ -9052,7 +9052,7 @@ _ASBOX
} >&5
cat >&5 <<_CSEOF
-This file was extended by hunspell $as_me 1.0.9, which was
+This file was extended by hunspell $as_me 1.1.0, which was
generated by GNU Autoconf 2.59. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -9115,7 +9115,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF
ac_cs_version="\\
-hunspell config.status 1.0.9
+hunspell config.status 1.1.0
configured by $0, generated by GNU Autoconf 2.59,
with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\"
diff --git a/configure.ac b/configure.ac
index ce795fe..eb28bac 100644
--- a/configure.ac
+++ b/configure.ac
@@ -4,8 +4,8 @@
m4_pattern_allow
AC_PREREQ(2.59)
-AC_INIT(hunspell, 1.0.9, nemeth@mokk.bme.hu)
-AM_INIT_AUTOMAKE(hunspell, 1.0.9)
+AC_INIT([hunspell],[1.1.0],[nemethl@gyorsposta.hu])
+AM_INIT_AUTOMAKE(hunspell, 1.1.0)
AC_CONFIG_SRCDIR([config.h.in])
AC_CONFIG_HEADER([config.h])
diff --git a/man/hunspell.4 b/man/hunspell.4
index 4c37fbb..909d92c 100644
--- a/man/hunspell.4
+++ b/man/hunspell.4
@@ -140,6 +140,21 @@ COMPOUNDROOT flag signs the compounds in the dictionary
(Now it is used only in the Hungarian language specific code).
.IP "COMPOUNDWORDMAX number"
Set maximum word count in a compound word. (Default is unlimited.)
+.IP "CHECKCOMPOUNDDUP"
+Forbid word duplication in compounds (eg. foofoo).
+.IP "CHECKCOMPOUNDREP"
+Forbid compounding, if (this usually bad) compound word may be
+a non compound word with a REP fault. Useful for languages with
+`compound friendly' orthography.
+.IP "CHECKCOMPOUNDTRIPLE"
+Forbid compounding, if compound word contains triple letters
+(eg. foo|ox or xo|oof).
+Bug: missing multi-byte character support in UTF-8 encoding
+(works only for 7-bit ASCII characters).
+.IP "CHECKCOMPOUNDPATTERN number_of_checkcompoundpattern_definitions"
+.IP "CHECKCOMPOUNDPATTERN endchars beginchars"
+Forbid compounding, if first word in compound ends with endchars, and
+next word begins with beginchars.
.IP "FORBIDDENWORD flag"
This flag signs forbidden word form. Because affixed forms
are also forbidden, we can substract a subset from set of
diff --git a/po/hu.gmo b/po/hu.gmo
index 9324f2d..b9a87dc 100644
Binary files a/po/hu.gmo and b/po/hu.gmo differ
diff --git a/po/hu.po b/po/hu.po
index f84a17b..f75ea92 100644
--- a/po/hu.po
+++ b/po/hu.po
@@ -1,19 +1,19 @@
-# MySpell's Ispell Interface.
-# Copyright (C) 2002 Laszlo Nemeth
-# This file is distributed under the same license as the Mispell package.
-# Laszlo Nemeth <nemethl@gyorsposta.hu>, 2002.
-#
+# Hungarian translation of hunspell.
+# Copyright (C) 2005 Free Software Foundation, Inc.
+# This file is distributed under the same license as the hunspell package.
+# Laszlo Nemeth <nemethl@gyorsposta.hu>, 2005.
+# Laszlo Dvornik <dvornik@gnome.hu>, 2005.
#
msgid ""
msgstr ""
-"Project-Id-Version: MIspell\n"
+"Project-Id-Version: hunspell\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2005-07-15 13:09+0200\n"
-"PO-Revision-Date: 2002-10-30 23:00+0100\n"
-"Last-Translator: Laszlo Nemeth <nemethl@gyorsposta.hu>\n"
+"PO-Revision-Date: 2005-09-03 11:22+0200\n"
+"Last-Translator: Laszlo Dvornik <dvornik@gnome.hu>\n"
"Language-Team: Hungarian <magyarispell@yahoogroups.com>\n"
"MIME-Version: 1.0\n"
-"Content-Type: text/plain; charset=iso-8859-2\n"
+"Content-Type: text/plain; charset=ISO-8859-2\n"
"Content-Transfer-Encoding: 8bit\n"
#: src/hunspell/hunspell.cxx:383
@@ -31,7 +31,7 @@ msgid ""
"\t%s\t\tFile: %s\n"
"\n"
msgstr ""
-"\t%s\t\t�llom�ny: %s\n"
+"\t%s\t\tF�jl: %s\n"
"\n"
#: src/hunspell/hunspell.cxx:562
@@ -62,7 +62,7 @@ msgstr "egyet, de megadhatunk ak
#: src/hunspell/hunspell.cxx:611
msgid "completely, or choosing one of the suggested words.\n"
-msgstr "R�szletes le�r�s a program lehet�s�geir�l: man mispell\n"
+msgstr "R�szletes le�r�s a program lehet�s�geir�l: man hunspell.\n"
#: src/hunspell/hunspell.cxx:612
msgid ""
@@ -107,12 +107,12 @@ msgstr "0-n\tA javasolt szavak k
#: src/hunspell/hunspell.cxx:620
msgid "L\tLook up words in system dictionary.\n"
-msgstr "L\tSz�, vagy minta alapj�n sz�ri a rendszer sz�t�r�t. (Nincs impl.)\n"
+msgstr "L\tSz�, vagy minta alapj�n sz�ri a rendszer sz�t�r�t.\n"
#: src/hunspell/hunspell.cxx:621
msgid ""
"X\tWrite the rest of this file, ignoring misspellings, and start next file.\n"
-msgstr "V\tMenti az eddigi jav�t�sokat, �s r�t�r a k�vetkez� �llom�nyra.\n"
+msgstr "V\tMenti az eddigi jav�t�sokat, �s r�t�r a k�vetkez� f�jlra.\n"
#: src/hunspell/hunspell.cxx:622
msgid "Q\tQuit immediately. Asks for confirmation. Leaves file unchanged.\n"
@@ -120,7 +120,7 @@ msgstr "M\tKil
#: src/hunspell/hunspell.cxx:623
msgid "!\tShell escape.\n"
-msgstr "H\tIdeiglenes kil�p�s a h�jba. (Nincs m�g implement�lva.)\n"
+msgstr "H\tIdeiglenes kil�p�s a h�jba.\n"
#: src/hunspell/hunspell.cxx:624
msgid "^L\tRedraw screen\n"
@@ -140,7 +140,7 @@ msgid ""
"-- Type space to continue -- \n"
msgstr ""
"\n"
-"-- Tov�bbl�p�s a sz�k�zbillenty� le�t�s�vel -- \n"
+"-- Tov�bbl�p�s a sz�k�z billenty� le�t�s�vel -- \n"
#: src/hunspell/hunspell.cxx:635
msgid "r"
@@ -162,7 +162,7 @@ msgstr "f"
#: src/hunspell/hunspell.cxx:694 src/hunspell/hunspell.cxx:816
#, c-format
msgid "Cannot update personal dictionary."
-msgstr "Nem tudom a saj�t sz�t�rat m�dos�tani."
+msgstr "Nem lehet friss�teni a saj�t sz�t�rat."
#: src/hunspell/hunspell.cxx:698
msgid "a"
@@ -203,19 +203,18 @@ msgstr "i"
#: src/hunspell/hunspell.cxx:899
#, c-format
msgid "Can't create tempfile %s.\n"
-msgstr "Nem tudom l�trehozni a %s ideiglenes �llom�nyt.\n"
+msgstr "Nem lehet l�trehozni a(z) %s �tmeneti f�jlt.\n"
#: src/hunspell/hunspell.cxx:986
#, c-format
msgid "Usage: hunspell [OPTION]... [FILE]...\n"
-msgstr "Haszn�lat: hunspell [KAPCSOL�]... [�LLOM�NY]...\n"
+msgstr "Haszn�lat: hunspell [KAPCSOL�]... [F�JL]...\n"
#: src/hunspell/hunspell.cxx:987
#, c-format
msgid "Check spelling of each FILE. Without FILE, check standard input.\n"
msgstr ""
-"Az �LLOM�NY(OK) (ennek hi�ny�ban a standard bemenet) helyes�r�s�t "
-"ellen�rzi.\n"
+"A F�JL(OK) (ennek hi�ny�ban a szabv�nyos bemenet) helyes�r�s�t ellen�rzi.\n"
#: src/hunspell/hunspell.cxx:989
#, c-format
@@ -236,7 +235,7 @@ msgstr " -d t
#: src/hunspell/hunspell.cxx:992
#, c-format
msgid " -G\t\tprint only correct words or lines\n"
-msgstr " -G\t\tki�rja a bemenet helyes szavait (vagy sorait)\n"
+msgstr " -G\t\tki�rja a bemenet helyes szavait vagy sorait\n"
#: src/hunspell/hunspell.cxx:993
#, c-format
@@ -286,7 +285,7 @@ msgstr " -u2\t\tki
#: src/hunspell/hunspell.cxx:1002
#, c-format
msgid " -U\t\tautomatic correction of typical misspellings to stdout\n"
-msgstr " -U\t\tki�rja az automatikusan jav�tott �llom�nyt\n"
+msgstr " -U\t\tki�rja az automatikusan jav�tott f�jlt\n"
#: src/hunspell/hunspell.cxx:1003
#, c-format
@@ -307,12 +306,12 @@ msgstr " -w\t\tki
#, c-format
msgid "Example: hunspell -d english file.txt # interactive spelling\n"
msgstr ""
-"P�lda: hunspell -d english �llom�ny.txt # interakt�v helyes�r�s-ellen�rz�s\n"
+"P�lda: hunspell -d english f�jl.txt # interakt�v helyes�r�s-ellen�rz�s\n"
#: src/hunspell/hunspell.cxx:1008
#, c-format
msgid " hunspell -l file.txt # print misspelled words\n"
-msgstr " hunspell -l �llom�ny.txt # ki�rja a hib�s szavakat\n"
+msgstr " hunspell -l f�jl.txt # ki�rja a hib�s szavakat\n"
#: src/hunspell/hunspell.cxx:1009
#, c-format
@@ -320,8 +319,7 @@ msgid ""
" hunspell -u file.txt # print typical (=serious) "
"misspellings\n"
msgstr ""
-" hunspell -u �llom�ny.txt # ki�rja a tipikus (= s�lyos) "
-"hib�kat\n"
+" hunspell -u f�jl.txt # ki�rja a tipikus (=s�lyos) hib�kat\n"
#: src/hunspell/hunspell.cxx:1011
#, c-format
@@ -334,12 +332,12 @@ msgid "Copyright (C) 2002-2005 Nemeth Laszlo. License: GNU LGPL.\n"
msgstr "Copyright (C) 2002-2005 N�meth L�szl�. Licenc: GNU LGPL.\n"
#: src/hunspell/hunspell.cxx:1021
-#, fuzzy, c-format
+#, c-format
msgid "Based on OpenOffice.org's Myspell library.\n"
-msgstr "A Hunspell az OpenOffice.org Myspell f�ggv�nyk�nyvt�r�n alapszik.\n"
+msgstr "Az OpenOffice.org Myspell programk�nyvt�r�n alapul.\n"
#: src/hunspell/hunspell.cxx:1022
-#, fuzzy, c-format
+#, c-format
msgid "Myspell's copyright (C) Kevin Hendricks, 2001-2002, License: BSD.\n"
msgstr "MySpell copyright (C) Kevin Hendricks, 2001-2002, Licenc: BSD.\n"
@@ -363,29 +361,14 @@ msgstr "alkalmazhat
#: src/hunspell/hunspell.cxx:1056 src/hunspell/hunspell.cxx:1138
#, c-format
msgid "Can't open %s.\n"
-msgstr "%s nem nyithat� meg.\n"
+msgstr "Nem lehet megnyitni a(z) %s-t.\n"
#: src/hunspell/hunspell.cxx:1088
#, c-format
msgid "Can't open affix or dictionary files.\n"
-msgstr "A ragoz�si vagy a sz�t�r�llom�ny nem nyithat� meg.\n"
+msgstr "Nem lehet megnyitni a ragoz�si vagy a sz�t�rf�jlt.\n"
#: src/hunspell/hunspell.cxx:1150 src/hunspell/hunspell.cxx:1153
-#, fuzzy, c-format
+#, c-format
msgid "Hunspell has been compiled without Ncurses user interface.\n"
-msgstr "A Hunspell forr�sk�d nem HUNMORPH t�mogat�ssal lett leford�tva.\n"
-
-#~ msgid "Can't move checked temporary file %s to %s.\n"
-#~ msgstr "Az ellen�rz�tt ideiglenes �llom�ny (%s) nem m�solhat� %s hely�re.\n"
-
-#~ msgid " - MySpell's Ispell Interface\n"
-#~ msgstr " - Ispell fel�let a (Magyar) MySpell f�ggv�nyk�nyvt�rhoz\n"
-
-#~ msgid " -s\t\tHunstem stemmer mode\n"
-#~ msgstr " -s\t\tHunstem t�vez� �zemm�d\n"
-
-#~ msgid "-s: STEMMING NOT IMPLEMENTED\n"
-#~ msgstr "-s: A T�VEZ�S NINCS MEGVAL�S�TVA\n"
-
-#~ msgid " -m\t\tHunmorph morphological analyzer mode\n"
-#~ msgstr " -s\t\tHunmorph morfol�giai elemz� �zemm�d\n"
+msgstr "A Hunspell Ncurses felhaszn�l�i fel�let n�lk�l lett ford�tva.\n"
diff --git a/src/hunspell/Makefile.am b/src/hunspell/Makefile.am
index 27b2e2a..9925672 100644
--- a/src/hunspell/Makefile.am
+++ b/src/hunspell/Makefile.am
@@ -7,4 +7,4 @@ include_HEADERS=affentry.hxx htypes.hxx affixmgr.hxx \
csutil.hxx hunspell.hxx atypes.hxx dictmgr.hxx \
suggestmgr.hxx baseaffix.hxx hashmgr.hxx langnum.hxx
-EXTRA_DIST=hunspell.dsp makefile.mk
+EXTRA_DIST=hunspell.dsp makefile.mk README
diff --git a/src/hunspell/Makefile.in b/src/hunspell/Makefile.in
index 3087aeb..5674d67 100644
--- a/src/hunspell/Makefile.in
+++ b/src/hunspell/Makefile.in
@@ -40,7 +40,7 @@ POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
subdir = src/hunspell
-DIST_COMMON = $(include_HEADERS) $(srcdir)/Makefile.am \
+DIST_COMMON = README $(include_HEADERS) $(srcdir)/Makefile.am \
$(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/codeset.m4 \
@@ -207,7 +207,7 @@ include_HEADERS = affentry.hxx htypes.hxx affixmgr.hxx \
csutil.hxx hunspell.hxx atypes.hxx dictmgr.hxx \
suggestmgr.hxx baseaffix.hxx hashmgr.hxx langnum.hxx
-EXTRA_DIST = hunspell.dsp makefile.mk
+EXTRA_DIST = hunspell.dsp makefile.mk README
all: all-am
.SUFFIXES:
diff --git a/src/hunspell/README b/src/hunspell/README
new file mode 100644
index 0000000..78b91f2
--- /dev/null
+++ b/src/hunspell/README
@@ -0,0 +1,3 @@
+Hunspell spell checker and morphological analyser
+
+Documentation, tests, examples: http://hunspell.sourceforge.net
diff --git a/src/hunspell/affentry.cxx b/src/hunspell/affentry.cxx
index 2bdab90..b659ac2 100644
--- a/src/hunspell/affentry.cxx
+++ b/src/hunspell/affentry.cxx
@@ -61,10 +61,11 @@ PfxEntry::~PfxEntry()
char * PfxEntry::add(const char * word, int len)
{
int cond;
- char tword[MAXWORDLEN+1];
+ char tword[MAXWORDUTF8LEN + 4];
if ((len > stripl) && (len >= numconds) && test_condition(word) &&
- (!stripl || (strncmp(word, strip, stripl) == 0))) {
+ (!stripl || (strncmp(word, strip, stripl) == 0)) &&
+ ((MAXWORDUTF8LEN + 4) > (len + appndl - stripl))) {
/* we have a match so add prefix */
char * pp = tword;
if (appndl) {
@@ -125,7 +126,7 @@ struct hentry * PfxEntry::check(const char * word, int len, char in_compound, co
int tmpl; // length of tmpword
struct hentry * he; // hash entry of root word or NULL
unsigned char * cp;
- char tmpword[MAXWORDLEN+1];
+ char tmpword[MAXWORDUTF8LEN + 4];
// on entry prefix is 0 length or already matches the beginning of the word.
// So if the remaining root word has positive length
@@ -155,10 +156,13 @@ struct hentry * PfxEntry::check(const char * word, int len, char in_compound, co
if ((he = pmyMgr->lookup(tmpword)) != NULL) {
do {
if (TESTAFF(he->astr, aflag, he->alen) &&
+ // forbid single prefixes with pseudoroot flag
+ ! TESTAFF(contclass, pmyMgr->get_pseudoroot(), contclasslen) &&
+ // needflag
((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
(contclass && TESTAFF(contclass, needflag, contclasslen))))
return he;
- } while ((he = he->next_homonym)); // check homonyms (HU)
+ } while ((he = he->next_homonym)); // check homonyms
}
// prefix matched but no root word was found
@@ -184,7 +188,7 @@ struct hentry * PfxEntry::check_twosfx(const char * word, int len,
int tmpl; // length of tmpword
struct hentry * he; // hash entry of root word or NULL
unsigned char * cp;
- char tmpword[MAXWORDLEN+1];
+ char tmpword[MAXWORDUTF8LEN + 4];
// on entry prefix is 0 length or already matches the beginning of the word.
// So if the remaining root word has positive length
@@ -233,7 +237,7 @@ char * PfxEntry::check_twosfx_morph(const char * word, int len,
int cond; // condition number being examined
int tmpl; // length of tmpword
unsigned char * cp;
- char tmpword[MAXWORDLEN+1];
+ char tmpword[MAXWORDUTF8LEN + 4];
// on entry prefix is 0 length or already matches the beginning of the word.
// So if the remaining root word has positive length
@@ -281,7 +285,7 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const
int tmpl; // length of tmpword
struct hentry * he; // hash entry of root word or NULL
unsigned char * cp;
- char tmpword[MAXWORDLEN+1];
+ char tmpword[MAXWORDUTF8LEN + 4];
char result[MAXLNLEN];
char * st;
@@ -315,6 +319,9 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const
if ((he = pmyMgr->lookup(tmpword)) != NULL) {
do {
if (TESTAFF(he->astr, aflag, he->alen) &&
+ // forbid single prefixes with pseudoroot flag
+ ! TESTAFF(contclass, pmyMgr->get_pseudoroot(), contclasslen) &&
+ // needflag
((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
(contclass && TESTAFF(contclass, needflag, contclasslen)))) {
if (morphcode) strcat(result, morphcode); else strcat(result,getKey());
@@ -395,11 +402,12 @@ SfxEntry::~SfxEntry()
char * SfxEntry::add(const char * word, int len)
{
int cond;
- char tword[MAXWORDLEN+1];
+ char tword[MAXWORDUTF8LEN + 4];
/* make sure all conditions match */
if ((len > stripl) && (len >= numconds) && test_condition(word + len, word) &&
- (!stripl || (strcmp(word + len - stripl, strip) == 0))) {
+ (!stripl || (strcmp(word + len - stripl, strip) == 0)) &&
+ ((MAXWORDUTF8LEN + 4) > (len + appndl - stripl))) {
/* we have a match so add suffix */
strcpy(tword,word);
if (appndl) {
@@ -464,14 +472,13 @@ struct hentry * SfxEntry::check(const char * word, int len, int optflags,
int cond; // condition beng examined
struct hentry * he; // hash entry pointer
unsigned char * cp;
- char tmpword[MAXWORDLEN+1];
+ char tmpword[MAXWORDUTF8LEN + 4];
PfxEntry* ep = (PfxEntry *) ppfx;
-
// if this suffix is being cross checked with a prefix
// but it does not support cross products skip it
- if ((optflags & XPRODUCT) != 0 && (xpflg & XPRODUCT) == 0)
+ if (((optflags & XPRODUCT) != 0) && ((xpflg & XPRODUCT) == 0))
return NULL;
// upon entry suffix is 0 length or already matches the end of the word.
@@ -505,13 +512,16 @@ struct hentry * SfxEntry::check(const char * word, int len, int optflags,
// root word in the dictionary
if (test_condition((char *) cp, (char *) tmpword)) {
+
#ifdef SZOSZABLYA_POSSIBLE_ROOTS
fprintf(stdout,"%s %s %c\n", word, tmpword, aflag);
#endif
if ((he = pmyMgr->lookup(tmpword)) != NULL) {
do {
- if ((TESTAFF(he->astr, aflag, he->alen) || (ep && ep->getCont() && TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
- ((optflags & XPRODUCT) == 0 ||
+ // check conditional suffix (enabled by prefix)
+ if ((TESTAFF(he->astr, aflag, he->alen) || (ep && ep->getCont() &&
+ TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
+ (((optflags & XPRODUCT) == 0) ||
TESTAFF(he->astr, ep->getFlag(), he->alen) ||
// enabled by prefix
((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
@@ -526,10 +536,11 @@ struct hentry * SfxEntry::check(const char * word, int len, int optflags,
((contclass) && TESTAFF(contclass, needflag, contclasslen)))
)
) return he;
- } while ((he = he->next_homonym)); // check homonyms (HU)
+ } while ((he = he->next_homonym)); // check homonyms
- // store resulting root in wlst (see SuffixMgr:suggest_pos_stems)
-
+ // obsolote stemming code (used only by the
+ // experimental SuffixMgr:suggest_pos_stems)
+ // store resulting root in wlst
} else if ((wlst) && (*ns < maxSug) &&
TESTAFF("ABCUVWKLMQRSTqrst", aflag , 17)) { // XXX (HU) productive suffixes
int cwrd = 1;
@@ -558,7 +569,7 @@ struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags,
int cond; // condition beng examined
struct hentry * he; // hash entry pointer
unsigned char * cp;
- char tmpword[MAXWORDLEN+1];
+ char tmpword[MAXWORDUTF8LEN + 4];
PfxEntry* ep = (PfxEntry *) ppfx;
@@ -598,7 +609,7 @@ struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags,
if (test_condition((char *) cp, (char *) tmpword)) {
if (ppfx) {
- // handle confixum (HU)
+ // handle conditional suffix
if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
he = pmyMgr->suffix_check(tmpword, tmpl, 0, NULL, NULL, 0, NULL, (FLAG) aflag, needflag);
else
@@ -620,7 +631,7 @@ char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags,
int tmpl; // length of tmpword
int cond; // condition beng examined
unsigned char * cp;
- char tmpword[MAXWORDLEN+1];
+ char tmpword[MAXWORDUTF8LEN + 4];
PfxEntry* ep = (PfxEntry *) ppfx;
char * st;
@@ -664,7 +675,7 @@ char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags,
if (test_condition((char *) cp, (char *) tmpword)) {
if (ppfx) {
- // handle confixum (HU)
+ // handle conditional suffix
if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen)) {
st = pmyMgr->suffix_check_morph(tmpword, tmpl, 0, NULL, aflag, needflag);
if (st) {
@@ -711,7 +722,7 @@ struct hentry * SfxEntry::get_next_homonym(struct hentry * he, int optflags, Aff
if ((TESTAFF(he->astr, aflag, he->alen) || (ep && ep->getCont() && TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
((optflags & XPRODUCT) == 0 ||
TESTAFF(he->astr, ep->getFlag(), he->alen) ||
- // handle confixum (HU)
+ // handle conditional suffix
((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
) &&
// handle cont. class
diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx
index 03e8a32..c971e2b 100644
--- a/src/hunspell/affixmgr.cxx
+++ b/src/hunspell/affixmgr.cxx
@@ -26,8 +26,11 @@ AffixMgr::AffixMgr(const char * affpath, HashMgr* ptr)
utf_tbl = NULL;
complexprefixes = 0;
maptable = NULL;
+ nummap = 0;
reptable = NULL;
numrep = 0;
+ checkcpdtable = NULL;
+ numcheckcpd = 0;
compoundflag = FLAG_NULL; // permits word in compound forms
compoundbegin = FLAG_NULL; // may be first word in compound forms
compoundmiddle = FLAG_NULL; // may be middle word in compound forms
@@ -35,11 +38,13 @@ AffixMgr::AffixMgr(const char * affpath, HashMgr* ptr)
compoundroot = FLAG_NULL; // compound word signing flag
compoundpermitflag = FLAG_NULL; // compound permitting flag for suffixed word
compoundforbidflag = FLAG_NULL; // compound fordidden flag for suffixed word
+ checkcompounddup = 0; // forbid double words in compounds
+ checkcompoundrep = 0; // forbid bad compounds
+ checkcompoundtriple = 0; // forbid compounds with triple letters
forbiddenword = FLAG_NULL; // forbidden word signing flag
lang = NULL; // language
langnum = 0; // language code (see http://l10n.openoffice.org/languages.html)
pseudoroot = FLAG_NULL; // forbidden root, allowed only with suffixes
- onlyroot = FLAG_NULL; // forbidden word modify flag
cpdwordmax=0; // default: unlimited wordcount in compound words
cpdmin = 3; // default value
cpdmaxsyllable = 0; // default: unlimited syllablecount in compound words
@@ -135,14 +140,25 @@ AffixMgr::~AffixMgr()
if (reptable) {
for (int j=0; j < numrep; j++) {
free(reptable[j].pattern);
- free(reptable[j].replacement);
+ free(reptable[j].pattern2);
reptable[j].pattern = NULL;
- reptable[j].replacement = NULL;
+ reptable[j].pattern2 = NULL;
}
free(reptable);
reptable = NULL;
}
numrep = 0;
+ if (checkcpdtable) {
+ for (int j=0; j < numcheckcpd; j++) {
+ free(checkcpdtable[j].pattern);
+ free(checkcpdtable[j].pattern2);
+ checkcpdtable[j].pattern = NULL;
+ checkcpdtable[j].pattern2 = NULL;
+ }
+ free(checkcpdtable);
+ checkcpdtable = NULL;
+ }
+ numcheckcpd = 0;
FREE_FLAG(compoundflag);
FREE_FLAG(compoundbegin);
FREE_FLAG(compoundmiddle);
@@ -152,7 +168,6 @@ AffixMgr::~AffixMgr()
FREE_FLAG(compoundroot);
FREE_FLAG(forbiddenword);
FREE_FLAG(pseudoroot);
- FREE_FLAG(onlyroot);
FREE_FLAG(lemma_present);
FREE_FLAG(circumfix);
FREE_FLAG(onlyincompound);
@@ -287,6 +302,18 @@ int AffixMgr::parse_file(const char * affpath)
}
}
+ /* parse CHECKCOMPOUNDDUP */
+ if (strncmp(line,"CHECKCOMPOUNDDUP",16) == 0)
+ checkcompounddup = 1;
+
+ /* parse CHECKCOMPOUNDREP */
+ if (strncmp(line,"CHECKCOMPOUNDREP",16) == 0)
+ checkcompoundrep = 1;
+
+ /* parse CHECKCOMPOUNDTRIPLE */
+ if (strncmp(line,"CHECKCOMPOUNDTRIPLE",19) == 0)
+ checkcompoundtriple = 1;
+
/* parse in the flag used by forbidden words */
if (strncmp(line,"FORBIDDENWORD",13) == 0) {
if (parse_flag(line, &forbiddenword, "FORBIDDENWORD")) {
@@ -322,18 +349,12 @@ int AffixMgr::parse_file(const char * affpath)
}
}
- /* parse in the flag used by `ONLYROOTs' */
- if (strncmp(line,"ONLYROOT",8) == 0) {
- if (parse_flag(line, &onlyroot, "ONLYROOT")) {
- return 1;
- }
- }
-
/* parse in the minimal length for words in compounds */
if (strncmp(line,"COMPOUNDMIN",11) == 0) {
if (parse_num(line, &cpdmin, "COMPOUNDMIN")) {
return 1;
}
+ if (cpdmin < 1) cpdmin = 1;
}
/* parse in the max. words and syllables in compounds */
@@ -369,6 +390,13 @@ int AffixMgr::parse_file(const char * affpath)
}
}
+ /* parse in the checkcompoundpattern table */
+ if (strncmp(line,"CHECKCOMPOUNDPATTERN",20) == 0) {
+ if (parse_checkcpdtable(line, afflst)) {
+ return 1;
+ }
+ }
+
/* parse in the related character map table */
if (strncmp(line,"MAP",3) == 0) {
if (parse_maptable(line, afflst)) {
@@ -391,8 +419,11 @@ int AffixMgr::parse_file(const char * affpath)
}
/* parse MAXNGRAMSUGS */
- if (strncmp(line,"MAXNGRAMSUGS",12) == 0)
- maxngramsugs=1;
+ if (strncmp(line,"MAXNGRAMSUGS",12) == 0) {
+ if (parse_num(line, &maxngramsugs, "MAXNGRAMSUGS")) {
+ return 1;
+ }
+ }
/* parse NOSPLITSUGS */
if (strncmp(line,"NOSPLITSUGS",11) == 0)
@@ -448,7 +479,7 @@ int AffixMgr::parse_file(const char * affpath)
process_pfx_order();
process_sfx_order();
- // expand wordchars string (HU), based on csutil
+ // expand wordchars string, based on csutil (for external tokenization)
struct cs_info * csconv;
char * enc = get_encoding();
@@ -986,11 +1017,8 @@ struct hentry * AffixMgr::prefix_check_twosfx(const char * word, int len,
PfxEntry * pe = (PfxEntry *) pStart[0];
while (pe) {
- if ((contclasses[pe->getFlag()]))
- {
- rv = pe->check_twosfx(word, len, in_compound, needflag);
- if (rv) return rv;
- }
+ rv = pe->check_twosfx(word, len, in_compound, needflag);
+ if (rv) return rv;
pe = pe->getNext();
}
@@ -1000,14 +1028,11 @@ struct hentry * AffixMgr::prefix_check_twosfx(const char * word, int len,
while (pptr) {
if (isSubset(pptr->getKey(),word)) {
- if (contclasses[pptr->getFlag()])
- {
- rv = pptr->check_twosfx(word, len, in_compound, needflag);
- if (rv) {
- pfx = (AffEntry *)pptr;
- return rv;
- }
- }
+ rv = pptr->check_twosfx(word, len, in_compound, needflag);
+ if (rv) {
+ pfx = (AffEntry *)pptr;
+ return rv;
+ }
pptr = pptr->getNextEQ();
} else {
pptr = pptr->getNextNE();
@@ -1084,15 +1109,12 @@ char * AffixMgr::prefix_check_twosfx_morph(const char * word, int len,
// first handle the special case of 0 length prefixes
PfxEntry * pe = (PfxEntry *) pStart[0];
while (pe) {
- if ((contclasses[pe->getFlag()]))
- {
- st = pe->check_twosfx_morph(word,len,in_compound, needflag);
- if (st) {
- strcat(result, st);
- free(st);
- }
+ st = pe->check_twosfx_morph(word,len,in_compound, needflag);
+ if (st) {
+ strcat(result, st);
+ free(st);
}
- pe = pe->getNext();
+ pe = pe->getNext();
}
// now handle the general case
@@ -1101,14 +1123,11 @@ char * AffixMgr::prefix_check_twosfx_morph(const char * word, int len,
while (pptr) {
if (isSubset(pptr->getKey(),word)) {
- if (contclasses[pptr->getFlag()])
- {
- st = pptr->check_twosfx_morph(word, len, in_compound, needflag);
- if (st) {
- strcat(result, st);
- free(st);
- pfx = (AffEntry *)pptr;
- }
+ st = pptr->check_twosfx_morph(word, len, in_compound, needflag);
+ if (st) {
+ strcat(result, st);
+ free(st);
+ pfx = (AffEntry *)pptr;
}
pptr = pptr->getNextEQ();
} else {
@@ -1121,33 +1140,24 @@ char * AffixMgr::prefix_check_twosfx_morph(const char * word, int len,
}
-// check compounds with replacement table
-// rationale: a typical fault may be a `right' compound word,
-// with this function we correct a sort of typical hungarian faults:
-// for example:
-// service = szerviz
-// drug+water = szerv�z, this form was `right' compound.
+// check compounds with replacement table (see checkcompoundrep)
int AffixMgr::repl_check(const char * word, int wl)
{
char candidate[MAXLNLEN];
const char * r;
int lenr, lenp;
- if (wl < 2) return 0;
-
- int numrep = get_numrep();
- struct replentry* reptable = get_reptable();
- if (reptable==NULL) return 0;
+ if ((wl < 2) || !numrep) return 0;
for (int i=0; i < numrep; i++ ) {
r = word;
- lenr = strlen(reptable[i].replacement);
+ lenr = strlen(reptable[i].pattern2);
lenp = strlen(reptable[i].pattern);
// search every occurence of the pattern in the word
while ((r=strstr(r, reptable[i].pattern)) != NULL) {
strcpy(candidate, word);
if (r-word + lenr + strlen(r+lenp) >= MAXLNLEN) break;
- strcpy(candidate+(r-word),reptable[i].replacement);
+ strcpy(candidate+(r-word),reptable[i].pattern2);
strcpy(candidate+(r-word)+lenr, r+lenp);
if (candidate_check(candidate,strlen(candidate))) return 1;
if (candidate_check(candidate,strlen(candidate))) return 1;
@@ -1157,30 +1167,17 @@ int AffixMgr::repl_check(const char * word, int wl)
return 0;
}
-#if 0
-// check compounds with adjacent letter were swapped
-int AffixMgr::swap_check(const char * word, int wl)
+// forbid compoundings when there are special patterns at connection.
+int AffixMgr::checkcpd_check(const char * word, int pos)
{
- char candidate[MAXLNLEN];
- char * p;
- char tmpc;
-
- if (wl < 2) return 0;
-
- // try swapping adjacent chars one by one
- strcpy(candidate, word);
- for (p = candidate; p[1] != 0; p++) {
- tmpc = *p;
- *p = p[1];
- p[1] = tmpc;
- if (candidate_check(candidate,wl)) return 1;
- tmpc = *p;
- *p = p[1];
- p[1] = tmpc;
- }
- return 0;
+ int len;
+ for (int i = 0; i < numcheckcpd; i++) {
+ if (isSubset(checkcpdtable[i].pattern2, word + pos) &&
+ (len = strlen(checkcpdtable[i].pattern)) && (pos > len) &&
+ (strncmp(word + pos - len, checkcpdtable[i].pattern, len) == 0)) return 1;
+ }
+ return 0;
}
-#endif
inline int AffixMgr::candidate_check(const char * word, int len)
{
@@ -1229,7 +1226,7 @@ struct hentry * AffixMgr::compound_check(const char * word, int len,
int oldcmpdstemnum = 0;
struct hentry * rv = NULL;
struct hentry * rv_first;
- char st [MAXWORDLEN + 1];
+ char st [MAXWORDUTF8LEN + 4];
char ch;
int cmin;
int cmax;
@@ -1354,8 +1351,7 @@ struct hentry * AffixMgr::compound_check(const char * word, int len,
}
// check forbiddenwords
- if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen))
- && (! TESTAFF(rv->astr, onlyroot, rv->alen))) return NULL;
+ if ((rv) && (rv->astr) && TESTAFF(rv->astr, forbiddenword, rv->alen)) return NULL;
// increment word number, if the second root has a compoundroot flag
if ((rv) && compoundroot &&
@@ -1378,24 +1374,16 @@ struct hentry * AffixMgr::compound_check(const char * word, int len,
)
//-----------
)
- && ! ((langnum == LANG_hu) && (( // test triple letters
+ && ! (( checkcompoundtriple && // test triple letters
(word[i-1]==word[i]) && (
((i>1) && (word[i-1]==word[i-2])) ||
((word[i-1]==word[i+1])) // may word[i+1] == '\0'
)
) ||
- ( // test hungarian consonant
- (i>2) && (
- (strncmp(word+i-3,"ccscs",5)==0) || // *giccscsin�l�s
- (strncmp(word+i-3,"ggygy",5)==0) || // *meggygy�jt�s
- (strncmp(word+i-3,"llyly",5)==0) || // *gallylyukaszt�s
- (strncmp(word+i-3,"nnyny",5)==0) || // *k�nnynyal�s
- (strncmp(word+i-3,"sszsz",5)==0) || // *dzsesszszak (hib�s p�lda)
- (strncmp(word+i-2,"szszer�",7)==0) || // *m�szszer�
- (strncmp(word+i-2,"szszerű",7)==0) // *m�szszer� UTF-8 k�dol�ssal
- //(strncmp(word+i-3,"ttyty",5)==0) // may word[i+1] == '\0'
- )
- )))
+ (
+ // test CHECKCOMPOUNDPATTERN
+ numcheckcpd && checkcpd_check(word, i)
+ ))
)
//MAGYARISPELL
|| ((!rv) && (langnum == LANG_hu) && hu_mov_rule && (rv = affix_check(st,i)) &&
@@ -1447,8 +1435,7 @@ struct hentry * AffixMgr::compound_check(const char * word, int len,
}
// check forbiddenwords
- if ((rv) && (rv->astr) && (TESTAFF(rv->astr,forbiddenword,rv->alen))
- && (! TESTAFF(rv->astr, onlyroot, rv->alen))) return NULL;
+ if ((rv) && (rv->astr) && TESTAFF(rv->astr,forbiddenword,rv->alen)) return NULL;
// second word is acceptable, as a root?
// hungarian conventions: compounding is acceptable,
// when compound forms consist of 2 words, or if more,
@@ -1464,12 +1451,12 @@ struct hentry * AffixMgr::compound_check(const char * word, int len,
(numsyllable + get_syllable(rv->word,rv->wlen)<=cpdmaxsyllable))
)
&& (
- (rv != rv_first)
+ (!checkcompounddup || (rv != rv_first))
)
)
{
- // bad compound word
- if ((langnum == LANG_hu) && repl_check(word,len)) return NULL;
+ // forbid compound word, if it is a non compound word with typical fault
+ if (checkcompoundrep && repl_check(word,len)) return NULL;
return rv;
}
@@ -1498,8 +1485,7 @@ struct hentry * AffixMgr::compound_check(const char * word, int len,
}
// check forbiddenwords
- if ((rv) && (rv->astr) && (TESTAFF(rv->astr,forbiddenword,rv->alen))
- && (! TESTAFF(rv->astr, onlyroot, rv->alen))) return NULL;
+ if ((rv) && (rv->astr) && TESTAFF(rv->astr,forbiddenword,rv->alen)) return NULL;
// pfxappnd = prefix of word+i, or NULL
// calculate syllable number of prefix.
@@ -1550,10 +1536,10 @@ struct hentry * AffixMgr::compound_check(const char * word, int len,
(numsyllable <= cpdmaxsyllable))
)
&& (
- (rv != rv_first)
+ (!checkcompounddup || (rv != rv_first))
)) {
- // bad compound word
- if ((langnum == LANG_hu) && repl_check(word, len)) return NULL;
+ // forbid compound word, if it is a non compound word with typical fault
+ if (checkcompoundrep && repl_check(word, len)) return NULL;
return rv;
}
@@ -1570,8 +1556,8 @@ struct hentry * AffixMgr::compound_check(const char * word, int len,
rv=NULL;
}
if (rv) {
- // bad compound word
- if ((langnum == LANG_hu) && repl_check(word, len)) return NULL;
+ // forbid compound word, if it is a non compound word with typical fault
+ if (checkcompoundrep && repl_check(word, len)) return NULL;
return rv;
} else {
#ifdef HUNSTEM
@@ -1598,7 +1584,7 @@ int AffixMgr::compound_check_morph(const char * word, int len,
struct hentry * rv = NULL;
struct hentry * rv_first;
- char st [MAXWORDLEN + 1];
+ char st [MAXWORDUTF8LEN + 4];
char ch;
int checked_prefix;
@@ -1659,7 +1645,6 @@ int AffixMgr::compound_check_morph(const char * word, int len,
strcat(presult, st);
strcat(presult, MISSING_DESCRIPTION);
}
- //strcat(presult, "+");
}
if (!rv) {
@@ -1740,8 +1725,7 @@ int AffixMgr::compound_check_morph(const char * word, int len,
}
// check forbiddenwords
- if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen))
- && (! TESTAFF(rv->astr, onlyroot, rv->alen))) continue;
+ if ((rv) && (rv->astr) && TESTAFF(rv->astr, forbiddenword, rv->alen)) continue;
// increment word number, if the second root has a compoundroot flag
if ((rv) && (compoundroot) &&
@@ -1765,24 +1749,16 @@ int AffixMgr::compound_check_morph(const char * word, int len,
)
//------------
)
- && ! ((langnum == LANG_hu) && (( // test triple letters
+ && ! (( checkcompoundtriple && // test triple letters
(word[i-1]==word[i]) && (
((i>1) && (word[i-1]==word[i-2])) ||
((word[i-1]==word[i+1])) // may word[i+1] == '\0'
)
) ||
- ( // test hungarian consonant
- (i>2) && (
- (strncmp(word+i-3,"ccscs",5)==0) || // giccscsin�l�s
- (strncmp(word+i-3,"ggygy",5)==0) || // meggygy�jt�s
- (strncmp(word+i-3,"llyly",5)==0) || // gallylyukaszt�s
- (strncmp(word+i-3,"nnyny",5)==0) || // k�nnynyal�s
- (strncmp(word+i-3,"sszsz",5)==0) || // dzsesszszak (hib�s p�lda)
- (strncmp(word+i-2,"szszer�",7)==0) ||
- (strncmp(word+i-2,"szszerű",7)==0) // *m�szszer� UTF-8 k�dol�ssal
- //(strncmp(word+i-3,"ttyty",5)==0) // may word[i+1] == '\0'
- )
- )))
+ (
+ // test CHECKCOMPOUNDPATTERN
+ numcheckcpd && checkcpd_check(word, i)
+ ))
)
//MAGYARISPELL
|| ((!rv) && (langnum == LANG_hu) && hu_mov_rule && (rv = affix_check(st,i)) &&
@@ -1830,10 +1806,9 @@ int AffixMgr::compound_check_morph(const char * word, int len,
}
// check forbiddenwords
- if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen))
- && (! TESTAFF(rv->astr, onlyroot, rv->alen))) {
- st[i] = ch;
- continue;
+ if ((rv) && (rv->astr) && TESTAFF(rv->astr, forbiddenword, rv->alen)) {
+ st[i] = ch;
+ continue;
}
// second word is acceptable, as a root?
@@ -1850,7 +1825,7 @@ int AffixMgr::compound_check_morph(const char * word, int len,
(numsyllable+get_syllable(rv->word,rv->wlen)<=cpdmaxsyllable))
)
&& (
- (rv != rv_first)
+ (!checkcompounddup || (rv != rv_first))
)
)
{
@@ -1946,7 +1921,7 @@ int AffixMgr::compound_check_morph(const char * word, int len,
(numsyllable <= cpdmaxsyllable))
)
&& (
- (rv != rv_first)
+ (!checkcompounddup || (rv != rv_first))
)) {
char * m = NULL;
if (compoundflag) m = affix_check_morph((word+i),strlen(word+i), compoundflag);
@@ -1997,6 +1972,8 @@ struct hentry * AffixMgr::suffix_check (const char * word, int len,
struct hentry * rv = NULL;
char result[MAXLNLEN];
+ PfxEntry* ep = (PfxEntry *) ppfx;
+
// first handle the special case of 0 length suffixes
SfxEntry * se = (SfxEntry *) sStart[0];
@@ -2008,19 +1985,23 @@ struct hentry * AffixMgr::suffix_check (const char * word, int len,
(se->getCont() && compoundpermitflag &&
TESTAFF(se->getCont(),compoundpermitflag,se->getContLen()))) && (!circumfix ||
// no circumfix flag in prefix and suffix
- ((!ppfx || !(((PfxEntry *)ppfx)->getCont()) || !TESTAFF(((PfxEntry *)ppfx)->getCont(),
- circumfix, ((PfxEntry *)ppfx)->getContLen())) &&
+ ((!ppfx || !(ep->getCont()) || !TESTAFF(ep->getCont(),
+ circumfix, ep->getContLen())) &&
(!se->getCont() || !(TESTAFF(se->getCont(),circumfix,se->getContLen())))) ||
// circumfix flag in prefix AND suffix
- ((ppfx && (((PfxEntry *)ppfx)->getCont()) && TESTAFF(((PfxEntry *)ppfx)->getCont(),
- circumfix, ((PfxEntry *)ppfx)->getContLen())) &&
+ ((ppfx && (ep->getCont()) && TESTAFF(ep->getCont(),
+ circumfix, ep->getContLen())) &&
(se->getCont() && (TESTAFF(se->getCont(),circumfix,se->getContLen()))))) &&
// fogemorpheme
(in_compound ||
!((se->getCont() && (TESTAFF(se->getCont(), onlyincompound, se->getContLen()))))) &&
- // pseudoroot on first suffix
- (cclass || !(se->getCont() &&
- TESTAFF(se->getCont(), pseudoroot, se->getContLen())))
+ // pseudoroot on prefix or first suffix
+ (cclass ||
+ !(se->getCont() && TESTAFF(se->getCont(), pseudoroot, se->getContLen())) ||
+ (ppfx && !((ep->getCont()) &&
+ TESTAFF(ep->getCont(), pseudoroot,
+ ep->getContLen())))
+ )
) &&
(rv = se->check(word,len, sfxopts, ppfx, wlst, maxSug, ns, (FLAG) cclass, needflag))) {
sfx=(AffEntry *)se; // BUG: sfx not stateless
@@ -2043,19 +2024,23 @@ struct hentry * AffixMgr::suffix_check (const char * word, int len,
(sptr->getCont() && compoundpermitflag &&
TESTAFF(sptr->getCont(),compoundpermitflag,sptr->getContLen()))) && (!circumfix ||
// no circumfix flag in prefix and suffix
- ((!ppfx || !(((PfxEntry *)ppfx)->getCont()) || !TESTAFF(((PfxEntry *)ppfx)->getCont(),
- circumfix, ((PfxEntry *)ppfx)->getContLen())) &&
+ ((!ppfx || !(ep->getCont()) || !TESTAFF(ep->getCont(),
+ circumfix, ep->getContLen())) &&
(!sptr->getCont() || !(TESTAFF(sptr->getCont(),circumfix,sptr->getContLen())))) ||
// circumfix flag in prefix AND suffix
- ((ppfx && (((PfxEntry *)ppfx)->getCont()) && TESTAFF(((PfxEntry *)ppfx)->getCont(),
- circumfix, ((PfxEntry *)ppfx)->getContLen())) &&
+ ((ppfx && (ep->getCont()) && TESTAFF(ep->getCont(),
+ circumfix, ep->getContLen())) &&
(sptr->getCont() && (TESTAFF(sptr->getCont(),circumfix,sptr->getContLen()))))) &&
// fogemorpheme
(in_compound ||
!((sptr->getCont() && (TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))))) &&
- // pseudoroot on first suffix
- (cclass || !(sptr->getCont() &&
- TESTAFF(sptr->getCont(), pseudoroot, sptr->getContLen())))
+ // pseudoroot on prefix or first suffix
+ (cclass ||
+ !(sptr->getCont() && TESTAFF(sptr->getCont(), pseudoroot, sptr->getContLen())) ||
+ (ppfx && !((ep->getCont()) &&
+ TESTAFF(ep->getCont(), pseudoroot,
+ ep->getContLen())))
+ )
) &&
(rv = sptr->check(word,len, sfxopts, ppfx, wlst, maxSug, ns, cclass, needflag))) {
sfx=(AffEntry *)sptr; // BUG: sfx not stateless
@@ -2110,7 +2095,7 @@ struct hentry * AffixMgr::suffix_check_twosfx(const char * word, int len,
if (isRevSubset(sptr->getKey(), word + len - 1, len)) {
if (contclasses[sptr->getFlag()])
{
- rv = sptr->check_twosfx(word,len, sfxopts, ppfx, needflag); // (HU)
+ rv = sptr->check_twosfx(word,len, sfxopts, ppfx, needflag);
if (rv) {
sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
if (!sptr->getCont()) sfxappnd=sptr->getKey(); // BUG: sfxappnd not stateless
@@ -2149,11 +2134,9 @@ char * AffixMgr::suffix_check_twosfx_morph(const char * word, int len,
if (st) {
if (ppfx) {
if (((PfxEntry *) ppfx)->getMorph()) strcat(result, ((PfxEntry *) ppfx)->getMorph());
- //strcat(result,"+");
}
strcat(result, st);
free(st);
- //strcat(result, "+");
if (se->getMorph()) strcat(result, se->getMorph());
strcat(result, "\n");
}
@@ -2169,7 +2152,7 @@ char * AffixMgr::suffix_check_twosfx_morph(const char * word, int len,
if (isRevSubset(sptr->getKey(), word + len - 1, len)) {
if (contclasses[sptr->getFlag()])
{
- st = sptr->check_twosfx_morph(word,len, sfxopts, ppfx, needflag); // (HU)
+ st = sptr->check_twosfx_morph(word,len, sfxopts, ppfx, needflag);
if (st) {
sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
if (!sptr->getCont()) sfxappnd=sptr->getKey(); // BUG: sfxappnd not stateless
@@ -2212,13 +2195,38 @@ char * AffixMgr::suffix_check_morph(const char * word, int len,
result[0] = '\0';
+ PfxEntry* ep = (PfxEntry *) ppfx;
+
// first handle the special case of 0 length suffixes
SfxEntry * se = (SfxEntry *) sStart[0];
while (se) {
- rv = se->check(word,len, sfxopts, ppfx, NULL, 0, 0, cclass, needflag);
- if (rv && (!cclass && se->getCont() &&
- TESTAFF(se->getCont(), pseudoroot, se->getContLen()))) rv=NULL;
- while (rv) {
+ if (!cclass || se->getCont()) {
+ // suffixes are not allowed in beginning of compounds
+ if (((((in_compound != IN_CPD_BEGIN)) || // && !cclass
+ // except when signed with compoundpermitflag flag
+ (se->getCont() && compoundpermitflag &&
+ TESTAFF(se->getCont(),compoundpermitflag,se->getContLen()))) && (!circumfix ||
+ // no circumfix flag in prefix and suffix
+ ((!ppfx || !(ep->getCont()) || !TESTAFF(ep->getCont(),
+ circumfix, ep->getContLen())) &&
+ (!se->getCont() || !(TESTAFF(se->getCont(),circumfix,se->getContLen())))) ||
+ // circumfix flag in prefix AND suffix
+ ((ppfx && (ep->getCont()) && TESTAFF(ep->getCont(),
+ circumfix, ep->getContLen())) &&
+ (se->getCont() && (TESTAFF(se->getCont(),circumfix,se->getContLen()))))) &&
+ // fogemorpheme
+ (in_compound ||
+ !((se->getCont() && (TESTAFF(se->getCont(), onlyincompound, se->getContLen()))))) &&
+ // pseudoroot on prefix or first suffix
+ (cclass ||
+ !(se->getCont() && TESTAFF(se->getCont(), pseudoroot, se->getContLen())) ||
+ (ppfx && !((ep->getCont()) &&
+ TESTAFF(ep->getCont(), pseudoroot,
+ ep->getContLen())))
+ )
+ ))
+ rv = se->check(word,len, sfxopts, ppfx, NULL, 0, 0, cclass, needflag);
+ while (rv) {
if (ppfx) {
if (((PfxEntry *) ppfx)->getMorph()) strcat(result, ((PfxEntry *) ppfx)->getMorph());
}
@@ -2229,6 +2237,7 @@ char * AffixMgr::suffix_check_morph(const char * word, int len,
if (se->getMorph()) strcat(result, se->getMorph());
strcat(result, "\n");
rv = se->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag);
+ }
}
se = se->getNext();
}
@@ -2238,36 +2247,36 @@ char * AffixMgr::suffix_check_morph(const char * word, int len,
SfxEntry * sptr = (SfxEntry *) sStart[sp];
while (sptr) {
- if (isRevSubset(sptr->getKey(), word + len - 1, len)) {
- ////rv = sptr->check(word,len, sfxopts, ppfx);
- rv = NULL;
- if (!circumfix ||
+ if (isRevSubset(sptr->getKey(), word + len - 1, len)
+ ) {
+ // suffixes are not allowed in beginning of compounds
+ if (((((in_compound != IN_CPD_BEGIN)) || // && !cclass
+ // except when signed with compoundpermitflag flag
+ (sptr->getCont() && compoundpermitflag &&
+ TESTAFF(sptr->getCont(),compoundpermitflag,sptr->getContLen()))) && (!circumfix ||
// no circumfix flag in prefix and suffix
- ((!ppfx || !(((PfxEntry *)ppfx)->getCont()) || !TESTAFF(((PfxEntry *)ppfx)->getCont(),
- circumfix, ((PfxEntry *)ppfx)->getContLen())) &&
+ ((!ppfx || !(ep->getCont()) || !TESTAFF(ep->getCont(),
+ circumfix, ep->getContLen())) &&
(!sptr->getCont() || !(TESTAFF(sptr->getCont(),circumfix,sptr->getContLen())))) ||
// circumfix flag in prefix AND suffix
- ((ppfx && (((PfxEntry *)ppfx)->getCont()) && TESTAFF(((PfxEntry *)ppfx)->getCont(),
- circumfix, ((PfxEntry *)ppfx)->getContLen())) &&
- (sptr->getCont() && (TESTAFF(sptr->getCont(),circumfix,sptr->getContLen())))))
- rv = sptr->check(word,len, sfxopts, ppfx, NULL, 0, 0, cclass, needflag); // (HU)
- if (rv && (!cclass && sptr->getCont() &&
- TESTAFF(sptr->getCont(), pseudoroot, sptr->getContLen()))) rv=NULL;
+ ((ppfx && (ep->getCont()) && TESTAFF(ep->getCont(),
+ circumfix, ep->getContLen())) &&
+ (sptr->getCont() && (TESTAFF(sptr->getCont(),circumfix,sptr->getContLen()))))) &&
+ // fogemorpheme
+ (in_compound ||
+ !((sptr->getCont() && (TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))))) &&
+ // pseudoroot on first suffix
+ (cclass || !(sptr->getCont() &&
+ TESTAFF(sptr->getCont(), pseudoroot, sptr->getContLen())))
+ )) rv = sptr->check(word,len, sfxopts, ppfx, NULL, 0, 0, cclass, needflag);
while (rv) {
- //sfxflag=sptr->getFlag();
- //sfxappnd=sptr->getKey();
if (ppfx) {
-// strcat(result,((PfxEntry * )ppfx)->getKey());
if (((PfxEntry *) ppfx)->getMorph()) strcat(result, ((PfxEntry *) ppfx)->getMorph());
- //strcat(result, "+");
}
if (rv->description && ((!rv->astr) ||
!TESTAFF(rv->astr, lemma_present, rv->alen)))
strcat(result, rv->word);
-
if (rv->description) strcat(result, rv->description);
- //strcat(result, "+");
-// strcat(result, sptr->getAffix());
#ifdef DEBUG
unsigned short flag = sptr->getKey();
char flagch[2] = &flag;
@@ -2280,9 +2289,8 @@ char * AffixMgr::suffix_check_morph(const char * word, int len,
strcat(result, result2);
#endif
- if (sptr->getMorph()) strcat(result, sptr->getMorph());
- strcat(result, "\n");
- //return rv;
+ if (sptr->getMorph()) strcat(result, sptr->getMorph());
+ strcat(result, "\n");
rv = sptr->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag);
}
sptr = sptr->getNextEQ();
@@ -2538,18 +2546,18 @@ FLAG AffixMgr::get_forbiddenword()
return forbiddenword;
}
-// return the pseudoroot flag
-FLAG AffixMgr::get_onlyroot()
-{
- return onlyroot;
-}
-
// return the forbidden words flag modify flag
FLAG AffixMgr::get_pseudoroot()
{
return pseudoroot;
}
+// return the onlyincompound flag
+FLAG AffixMgr::get_onlyincompound()
+{
+ return onlyincompound;
+}
+
// return the compound word signal flag
FLAG AffixMgr::get_compoundroot()
{
@@ -2915,12 +2923,12 @@ int AffixMgr::parse_reptable(char * line, FILE * af)
/* now parse the numrep lines to read in the remainder of the table */
char * nl = line;
for (int j=0; j < numrep; j++) {
- fgets(nl,MAXLNLEN,af);
+ if (!fgets(nl,MAXLNLEN,af)) return 1;
mychomp(nl);
tp = nl;
i = 0;
reptable[j].pattern = NULL;
- reptable[j].replacement = NULL;
+ reptable[j].pattern2 = NULL;
while ((piece=mystrsep(&tp,' '))) {
if (*piece != '\0') {
switch(i) {
@@ -2933,14 +2941,87 @@ int AffixMgr::parse_reptable(char * line, FILE * af)
break;
}
case 1: { reptable[j].pattern = mystrdup(piece); break; }
- case 2: { reptable[j].replacement = mystrdup(piece); break; }
+ case 2: { reptable[j].pattern2 = mystrdup(piece); break; }
+ default: break;
+ }
+ i++;
+ }
+ free(piece);
+ }
+ if ((!(reptable[j].pattern)) || (!(reptable[j].pattern2))) {
+ fprintf(stderr,"error: replacement table is corrupt\n");
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* parse in the checkcompoundpattern table */
+int AffixMgr::parse_checkcpdtable(char * line, FILE * af)
+{
+ if (numcheckcpd != 0) {
+ fprintf(stderr,"error: duplicate compound pattern tables used\n");
+ return 1;
+ }
+ char * tp = line;
+ char * piece;
+ int i = 0;
+ int np = 0;
+ while ((piece=mystrsep(&tp,' '))) {
+ if (*piece != '\0') {
+ switch(i) {
+ case 0: { np++; break; }
+ case 1: {
+ numcheckcpd = atoi(piece);
+ if (numcheckcpd < 1) {
+ fprintf(stderr,"incorrect number of entries in compound pattern table\n");
+ free(piece);
+ return 1;
+ }
+ checkcpdtable = (replentry *) malloc(numcheckcpd * sizeof(struct replentry));
+ np++;
+ break;
+ }
+ default: break;
+ }
+ i++;
+ }
+ free(piece);
+ }
+ if (np != 2) {
+ fprintf(stderr,"error: missing compound pattern table information\n");
+ return 1;
+ }
+
+ /* now parse the numcheckcpd lines to read in the remainder of the table */
+ char * nl = line;
+ for (int j=0; j < numcheckcpd; j++) {
+ if (!fgets(nl,MAXLNLEN,af)) return 1;
+ mychomp(nl);
+ tp = nl;
+ i = 0;
+ checkcpdtable[j].pattern = NULL;
+ checkcpdtable[j].pattern2 = NULL;
+ while ((piece=mystrsep(&tp,' '))) {
+ if (*piece != '\0') {
+ switch(i) {
+ case 0: {
+ if (strncmp(piece,"CHECKCOMPOUNDPATTERN",3) != 0) {
+ fprintf(stderr,"error: compound pattern table is corrupt\n");
+ free(piece);
+ return 1;
+ }
+ break;
+ }
+ case 1: { checkcpdtable[j].pattern = mystrdup(piece); break; }
+ case 2: { checkcpdtable[j].pattern2 = mystrdup(piece); break; }
default: break;
}
i++;
}
free(piece);
}
- if ((!(reptable[j].pattern)) || (!(reptable[j].replacement))) {
+ if ((!(checkcpdtable[j].pattern)) || (!(checkcpdtable[j].pattern2))) {
fprintf(stderr,"error: replacement table is corrupt\n");
return 1;
}
@@ -2988,7 +3069,7 @@ int AffixMgr::parse_maptable(char * line, FILE * af)
/* now parse the nummap lines to read in the remainder of the table */
char * nl = line;
for (int j=0; j < nummap; j++) {
- fgets(nl,MAXLNLEN,af);
+ if (!fgets(nl,MAXLNLEN,af)) return 1;
mychomp(nl);
tp = nl;
i = 0;
@@ -3011,7 +3092,7 @@ int AffixMgr::parse_maptable(char * line, FILE * af)
maptable[j].set_utf16 = NULL;
if (!utf8) {
maptable[j].set = mystrdup(piece);
- maptable[j].len = strlen(maptable[j].set);
+ maptable[j].len = strlen(maptable[j].set);
} else {
w_char w[MAXWORDLEN];
int n = u8_u16(w, MAXWORDLEN, piece);
@@ -3151,7 +3232,7 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af)
// now parse numents affentries for this affix
for (int j=0; j < numents; j++) {
- fgets(nl,MAXLNLEN,af);
+ if (!fgets(nl,MAXLNLEN,af)) return 1;
mychomp(nl);
tp = nl;
i = 0;
diff --git a/src/hunspell/affixmgr.hxx b/src/hunspell/affixmgr.hxx
index 180ff19..d3a8a78 100644
--- a/src/hunspell/affixmgr.hxx
+++ b/src/hunspell/affixmgr.hxx
@@ -28,14 +28,18 @@ class AffixMgr
FLAG compoundroot;
FLAG compoundforbidflag;
FLAG compoundpermitflag;
+ int checkcompounddup;
+ int checkcompoundrep;
+ int checkcompoundtriple;
FLAG forbiddenword;
FLAG pseudoroot;
- FLAG onlyroot;
int cpdmin;
int numrep;
replentry * reptable;
int nummap;
mapentry * maptable;
+ int numcheckcpd;
+ replentry * checkcpdtable;
int maxngramsugs;
int nosplitsugs;
int sugswithdots;
@@ -99,7 +103,7 @@ public:
int get_syllable (const char * word, int wlen);
int repl_check(const char * word, int len);
-// int swap_check(const char * word, int len);
+ int checkcpd_check(const char * word, int len);
int candidate_check(const char * word, int len);
struct hentry * compound_check(const char * word, int len,
int wordnum, int numsyllable, int maxwordnum, int hu_mov_rule,
@@ -123,7 +127,7 @@ public:
FLAG get_compoundbegin();
FLAG get_forbiddenword();
FLAG get_pseudoroot();
- FLAG get_onlyroot(); // obsolote
+ FLAG get_onlyincompound();
FLAG get_compoundroot();
FLAG get_lemma_present();
int get_checknum();
@@ -149,11 +153,11 @@ private:
int parse_cpdflag(char * line);
int parse_cpdforbid(char * line);
int parse_forbid(char * line);
- int parse_onlyroot(char * line);
int parse_cpdsyllable(char * line);
int parse_syllablenum(char * line);
int parse_reptable(char * line, FILE * af);
int parse_maptable(char * line, FILE * af);
+ int parse_checkcpdtable(char * line, FILE * af);
int parse_affix(char * line, const char at, FILE * af);
int parse_wordchars(char * line);
int parse_lang(char * line);
diff --git a/src/hunspell/atypes.hxx b/src/hunspell/atypes.hxx
index e9eb876..3cba8a9 100644
--- a/src/hunspell/atypes.hxx
+++ b/src/hunspell/atypes.hxx
@@ -9,7 +9,8 @@
#define SETSIZE 256
#define CONTSIZE 65536
-#define MAXWORDLEN 300
+#define MAXWORDLEN 100
+#define MAXWORDUTF8LEN (MAXWORDLEN * 4)
#define XPRODUCT (1 << 0)
enum {IN_CPD_NOT, IN_CPD_BEGIN, IN_CPD_END};
@@ -55,7 +56,7 @@ struct affentry
struct replentry {
char * pattern;
- char * replacement;
+ char * pattern2;
};
struct mapentry {
diff --git a/src/hunspell/csutil.cxx b/src/hunspell/csutil.cxx
index 5b2c3d2..963849b 100644
--- a/src/hunspell/csutil.cxx
+++ b/src/hunspell/csutil.cxx
@@ -254,12 +254,12 @@ int flag_bsearch(unsigned short flags[], unsigned short flag, short length) {
}
- // return 1 if s1 is a ending subset of s2
- int isRevSubset(const char * s1, const char * end_of_word, int len)
+ // return 1 if s1 (reversed) is a leading subset of end of s2
+ int isRevSubset(const char * s1, const char * end_of_s2, int len)
{
- while ((len > 0) && (*s1 == *end_of_word) && *s1) {
+ while ((len > 0) && *s1 && (*s1 == *end_of_s2)) {
s1++;
- end_of_word--;
+ end_of_s2--;
len--;
}
return (*s1 == '\0');
@@ -473,11 +473,11 @@ void mkallsmall_utf(w_char * u, int nc, struct unicode_info2 * utfconv) {
*p = *dest;
*dest = r;
}
- u16_u8(word, MAXWORDLEN, w, l);
+ u16_u8(word, MAXWORDUTF8LEN, w, l);
}
// these are simple character mappings for the
-// encodings supported by MySpell
+// encodings supported
// supplying isupper, tolower, and toupper
struct cs_info iso1_tbl[] = {
@@ -1421,100 +1421,100 @@ struct cs_info iso4_tbl[] = {
{ 0x00, 0x9e, 0x9e },
{ 0x00, 0x9f, 0x9f },
{ 0x00, 0xa0, 0xa0 },
-{ 0x00, 0xa1, 0xa1 },
+{ 0x01, 0xb1, 0xa1 },
{ 0x00, 0xa2, 0xa2 },
-{ 0x00, 0xa3, 0xa3 },
+{ 0x01, 0xb3, 0xa3 },
{ 0x00, 0xa4, 0xa4 },
-{ 0x00, 0xa5, 0xa5 },
-{ 0x00, 0xa6, 0xa6 },
+{ 0x01, 0xb5, 0xa5 },
+{ 0x01, 0xb6, 0xa6 },
{ 0x00, 0xa7, 0xa7 },
{ 0x00, 0xa8, 0xa8 },
-{ 0x00, 0xa9, 0xa9 },
-{ 0x00, 0xaa, 0xaa },
-{ 0x00, 0xab, 0xab },
-{ 0x00, 0xac, 0xac },
+{ 0x01, 0xb9, 0xa9 },
+{ 0x01, 0xba, 0xaa },
+{ 0x01, 0xbb, 0xab },
+{ 0x01, 0xbc, 0xac },
{ 0x00, 0xad, 0xad },
-{ 0x00, 0xae, 0xae },
+{ 0x01, 0xbe, 0xae },
{ 0x00, 0xaf, 0xaf },
{ 0x00, 0xb0, 0xb0 },
-{ 0x00, 0xb1, 0xb1 },
+{ 0x00, 0xb1, 0xa1 },
{ 0x00, 0xb2, 0xb2 },
-{ 0x00, 0xb3, 0xb3 },
+{ 0x00, 0xb3, 0xa3 },
{ 0x00, 0xb4, 0xb4 },
-{ 0x00, 0xb5, 0xb5 },
-{ 0x00, 0xb6, 0xb6 },
+{ 0x00, 0xb5, 0xa5 },
+{ 0x00, 0xb6, 0xa6 },
{ 0x00, 0xb7, 0xb7 },
{ 0x00, 0xb8, 0xb8 },
-{ 0x00, 0xb9, 0xb9 },
-{ 0x00, 0xba, 0xba },
-{ 0x00, 0xbb, 0xbb },
-{ 0x00, 0xbc, 0xbc },
+{ 0x00, 0xb9, 0xa9 },
+{ 0x00, 0xba, 0xaa },
+{ 0x00, 0xbb, 0xab },
+{ 0x00, 0xbc, 0xac },
{ 0x00, 0xbd, 0xbd },
-{ 0x00, 0xbe, 0xbe },
+{ 0x00, 0xbe, 0xae },
{ 0x00, 0xbf, 0xbf },
-{ 0x00, 0xc0, 0xc0 },
-{ 0x00, 0xc1, 0xc1 },
-{ 0x00, 0xc2, 0xc2 },
-{ 0x00, 0xc3, 0xc3 },
-{ 0x00, 0xc4, 0xc4 },
-{ 0x00, 0xc5, 0xc5 },
-{ 0x00, 0xc6, 0xc6 },
-{ 0x00, 0xc7, 0xc7 },
-{ 0x00, 0xc8, 0xc8 },
-{ 0x00, 0xc9, 0xc9 },
-{ 0x00, 0xca, 0xca },
-{ 0x00, 0xcb, 0xcb },
-{ 0x00, 0xcc, 0xcc },
-{ 0x00, 0xcd, 0xcd },
-{ 0x00, 0xce, 0xce },
-{ 0x00, 0xcf, 0xcf },
-{ 0x00, 0xd0, 0xd0 },
-{ 0x00, 0xd1, 0xd1 },
-{ 0x00, 0xd2, 0xd2 },
-{ 0x00, 0xd3, 0xd3 },
-{ 0x00, 0xd4, 0xd4 },
-{ 0x00, 0xd5, 0xd5 },
-{ 0x00, 0xd6, 0xd6 },
+{ 0x01, 0xe0, 0xc0 },
+{ 0x01, 0xe1, 0xc1 },
+{ 0x01, 0xe2, 0xc2 },
+{ 0x01, 0xe3, 0xc3 },
+{ 0x01, 0xe4, 0xc4 },
+{ 0x01, 0xe5, 0xc5 },
+{ 0x01, 0xe6, 0xc6 },
+{ 0x01, 0xe7, 0xc7 },
+{ 0x01, 0xe8, 0xc8 },
+{ 0x01, 0xe9, 0xc9 },
+{ 0x01, 0xea, 0xca },
+{ 0x01, 0xeb, 0xcb },
+{ 0x01, 0xec, 0xcc },
+{ 0x01, 0xed, 0xcd },
+{ 0x01, 0xee, 0xce },
+{ 0x01, 0xef, 0xcf },
+{ 0x01, 0xf0, 0xd0 },
+{ 0x01, 0xf1, 0xd1 },
+{ 0x01, 0xf2, 0xd2 },
+{ 0x01, 0xf3, 0xd3 },
+{ 0x01, 0xf4, 0xd4 },
+{ 0x01, 0xf5, 0xd5 },
+{ 0x01, 0xf6, 0xd6 },
{ 0x00, 0xd7, 0xd7 },
-{ 0x00, 0xd8, 0xd8 },
-{ 0x00, 0xd9, 0xd9 },
-{ 0x00, 0xda, 0xda },
-{ 0x00, 0xdb, 0xdb },
-{ 0x00, 0xdc, 0xdc },
-{ 0x00, 0xdd, 0xdd },
-{ 0x00, 0xde, 0xde },
+{ 0x01, 0xf8, 0xd8 },
+{ 0x01, 0xf9, 0xd9 },
+{ 0x01, 0xfa, 0xda },
+{ 0x01, 0xfb, 0xdb },
+{ 0x01, 0xfc, 0xdc },
+{ 0x01, 0xfd, 0xdd },
+{ 0x01, 0xfe, 0xde },
{ 0x00, 0xdf, 0xdf },
-{ 0x00, 0xe0, 0xe0 },
-{ 0x00, 0xe1, 0xe1 },
-{ 0x00, 0xe2, 0xe2 },
-{ 0x00, 0xe3, 0xe3 },
-{ 0x00, 0xe4, 0xe4 },
-{ 0x00, 0xe5, 0xe5 },
-{ 0x00, 0xe6, 0xe6 },
-{ 0x00, 0xe7, 0xe7 },
-{ 0x00, 0xe8, 0xe8 },
-{ 0x00, 0xe9, 0xe9 },
-{ 0x00, 0xea, 0xea },
-{ 0x00, 0xeb, 0xeb },
-{ 0x00, 0xec, 0xec },
-{ 0x00, 0xed, 0xed },
-{ 0x00, 0xee, 0xee },
-{ 0x00, 0xef, 0xef },
-{ 0x00, 0xf0, 0xf0 },
-{ 0x00, 0xf1, 0xf1 },
-{ 0x00, 0xf2, 0xf2 },
-{ 0x00, 0xf3, 0xf3 },
-{ 0x00, 0xf4, 0xf4 },
-{ 0x00, 0xf5, 0xf5 },
-{ 0x00, 0xf6, 0xf6 },
+{ 0x00, 0xe0, 0xc0 },
+{ 0x00, 0xe1, 0xc1 },
+{ 0x00, 0xe2, 0xc2 },
+{ 0x00, 0xe3, 0xc3 },
+{ 0x00, 0xe4, 0xc4 },
+{ 0x00, 0xe5, 0xc5 },
+{ 0x00, 0xe6, 0xc6 },
+{ 0x00, 0xe7, 0xc7 },
+{ 0x00, 0xe8, 0xc8 },
+{ 0x00, 0xe9, 0xc9 },
+{ 0x00, 0xea, 0xca },
+{ 0x00, 0xeb, 0xcb },
+{ 0x00, 0xec, 0xcc },
+{ 0x00, 0xed, 0xcd },
+{ 0x00, 0xee, 0xce },
+{ 0x00, 0xef, 0xcf },
+{ 0x00, 0xf0, 0xd0 },
+{ 0x00, 0xf1, 0xd1 },
+{ 0x00, 0xf2, 0xd2 },
+{ 0x00, 0xf3, 0xd3 },
+{ 0x00, 0xf4, 0xd4 },
+{ 0x00, 0xf5, 0xd5 },
+{ 0x00, 0xf6, 0xd6 },
{ 0x00, 0xf7, 0xf7 },
-{ 0x00, 0xf8, 0xf8 },
-{ 0x00, 0xf9, 0xf9 },
-{ 0x00, 0xfa, 0xfa },
-{ 0x00, 0xfb, 0xfb },
-{ 0x00, 0xfc, 0xfc },
-{ 0x00, 0xfd, 0xfd },
-{ 0x00, 0xfe, 0xfe },
+{ 0x00, 0xf8, 0xd8 },
+{ 0x00, 0xf9, 0xd9 },
+{ 0x00, 0xfa, 0xda },
+{ 0x00, 0xfb, 0xdb },
+{ 0x00, 0xfc, 0xdc },
+{ 0x00, 0xfd, 0xdd },
+{ 0x00, 0xfe, 0xde },
{ 0x00, 0xff, 0xff },
};
@@ -3331,7 +3331,7 @@ struct cs_info koi8r_tbl[] = {
{ 0x01, 0xdf, 0xff },
};
-struct cs_info cp1251_tbl[] = {
+struct cs_info koi8u_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
@@ -3495,27 +3495,27 @@ struct cs_info cp1251_tbl[] = {
{ 0x00, 0xa0, 0xa0 },
{ 0x00, 0xa1, 0xa1 },
{ 0x00, 0xa2, 0xa2 },
-{ 0x00, 0xa3, 0xa3 },
-{ 0x00, 0xa4, 0xa4 },
+{ 0x00, 0xa3, 0xb3 },
+{ 0x00, 0xa4, 0xb4 }, /* ie */
{ 0x00, 0xa5, 0xa5 },
-{ 0x00, 0xa6, 0xa6 },
-{ 0x00, 0xa7, 0xa7 },
+{ 0x00, 0xa6, 0xb6 }, /* i */
+{ 0x00, 0xa7, 0xb7 }, /* ii */
{ 0x00, 0xa8, 0xa8 },
{ 0x00, 0xa9, 0xa9 },
{ 0x00, 0xaa, 0xaa },
{ 0x00, 0xab, 0xab },
{ 0x00, 0xac, 0xac },
-{ 0x00, 0xad, 0xad },
+{ 0x00, 0xad, 0xbd }, /* g'' */
{ 0x00, 0xae, 0xae },
{ 0x00, 0xaf, 0xaf },
{ 0x00, 0xb0, 0xb0 },
{ 0x00, 0xb1, 0xb1 },
{ 0x00, 0xb2, 0xb2 },
-{ 0x00, 0xb3, 0xb3 },
-{ 0x00, 0xb4, 0xb4 },
+{ 0x01, 0xa3, 0xb3 },
+{ 0x00, 0xb4, 0xb4 }, /* IE */
{ 0x00, 0xb5, 0xb5 },
-{ 0x00, 0xb6, 0xb6 },
-{ 0x00, 0xb7, 0xb7 },
+{ 0x00, 0xb6, 0xb6 }, /* I */
+{ 0x00, 0xb7, 0xb7 }, /* II */
{ 0x00, 0xb8, 0xb8 },
{ 0x00, 0xb9, 0xb9 },
{ 0x00, 0xba, 0xba },
@@ -3524,93 +3524,612 @@ struct cs_info cp1251_tbl[] = {
{ 0x00, 0xbd, 0xbd },
{ 0x00, 0xbe, 0xbe },
{ 0x00, 0xbf, 0xbf },
-{ 0x00, 0xc0, 0xc0 },
-{ 0x00, 0xc1, 0xc1 },
-{ 0x00, 0xc2, 0xc2 },
-{ 0x00, 0xc3, 0xc3 },
-{ 0x00, 0xc4, 0xc4 },
-{ 0x00, 0xc5, 0xc5 },
-{ 0x00, 0xc6, 0xc6 },
-{ 0x00, 0xc7, 0xc7 },
-{ 0x00, 0xc8, 0xc8 },
-{ 0x00, 0xc9, 0xc9 },
-{ 0x00, 0xca, 0xca },
-{ 0x00, 0xcb, 0xcb },
-{ 0x00, 0xcc, 0xcc },
-{ 0x00, 0xcd, 0xcd },
-{ 0x00, 0xce, 0xce },
-{ 0x00, 0xcf, 0xcf },
-{ 0x00, 0xd0, 0xd0 },
-{ 0x00, 0xd1, 0xd1 },
-{ 0x00, 0xd2, 0xd2 },
-{ 0x00, 0xd3, 0xd3 },
-{ 0x00, 0xd4, 0xd4 },
-{ 0x00, 0xd5, 0xd5 },
-{ 0x00, 0xd6, 0xd6 },
-{ 0x00, 0xd7, 0xd7 },
-{ 0x00, 0xd8, 0xd8 },
-{ 0x00, 0xd9, 0xd9 },
-{ 0x00, 0xda, 0xda },
-{ 0x00, 0xdb, 0xdb },
-{ 0x00, 0xdc, 0xdc },
-{ 0x00, 0xdd, 0xdd },
-{ 0x00, 0xde, 0xde },
-{ 0x00, 0xdf, 0xdf },
-{ 0x00, 0xe0, 0xe0 },
-{ 0x00, 0xe1, 0xe1 },
-{ 0x00, 0xe2, 0xe2 },
-{ 0x00, 0xe3, 0xe3 },
-{ 0x00, 0xe4, 0xe4 },
-{ 0x00, 0xe5, 0xe5 },
-{ 0x00, 0xe6, 0xe6 },
-{ 0x00, 0xe7, 0xe7 },
-{ 0x00, 0xe8, 0xe8 },
-{ 0x00, 0xe9, 0xe9 },
-{ 0x00, 0xea, 0xea },
-{ 0x00, 0xeb, 0xeb },
-{ 0x00, 0xec, 0xec },
-{ 0x00, 0xed, 0xed },
-{ 0x00, 0xee, 0xee },
-{ 0x00, 0xef, 0xef },
-{ 0x00, 0xf0, 0xf0 },
-{ 0x00, 0xf1, 0xf1 },
-{ 0x00, 0xf2, 0xf2 },
-{ 0x00, 0xf3, 0xf3 },
-{ 0x00, 0xf4, 0xf4 },
-{ 0x00, 0xf5, 0xf5 },
-{ 0x00, 0xf6, 0xf6 },
-{ 0x00, 0xf7, 0xf7 },
-{ 0x00, 0xf8, 0xf8 },
-{ 0x00, 0xf9, 0xf9 },
-{ 0x00, 0xfa, 0xfa },
-{ 0x00, 0xfb, 0xfb },
-{ 0x00, 0xfc, 0xfc },
-{ 0x00, 0xfd, 0xfd },
-{ 0x00, 0xfe, 0xfe },
-{ 0x00, 0xff, 0xff },
-};
-
-struct cs_info iso14_tbl[] = {
-{ 0x00, 0x00, 0x00 },
-{ 0x00, 0x01, 0x01 },
-{ 0x00, 0x02, 0x02 },
-{ 0x00, 0x03, 0x03 },
-{ 0x00, 0x04, 0x04 },
-{ 0x00, 0x05, 0x05 },
-{ 0x00, 0x06, 0x06 },
-{ 0x00, 0x07, 0x07 },
-{ 0x00, 0x08, 0x08 },
-{ 0x00, 0x09, 0x09 },
-{ 0x00, 0x0a, 0x0a },
-{ 0x00, 0x0b, 0x0b },
-{ 0x00, 0x0c, 0x0c },
-{ 0x00, 0x0d, 0x0d },
-{ 0x00, 0x0e, 0x0e },
-{ 0x00, 0x0f, 0x0f },
-{ 0x00, 0x10, 0x10 },
-{ 0x00, 0x11, 0x11 },
-{ 0x00, 0x12, 0x12 },
-{ 0x00, 0x13, 0x13 },
+{ 0x00, 0xc0, 0xe0 },
+{ 0x00, 0xc1, 0xe1 },
+{ 0x00, 0xc2, 0xe2 },
+{ 0x00, 0xc3, 0xe3 },
+{ 0x00, 0xc4, 0xe4 },
+{ 0x00, 0xc5, 0xe5 },
+{ 0x00, 0xc6, 0xe6 },
+{ 0x00, 0xc7, 0xe7 },
+{ 0x00, 0xc8, 0xe8 },
+{ 0x00, 0xc9, 0xe9 },
+{ 0x00, 0xca, 0xea },
+{ 0x00, 0xcb, 0xeb },
+{ 0x00, 0xcc, 0xec },
+{ 0x00, 0xcd, 0xed },
+{ 0x00, 0xce, 0xee },
+{ 0x00, 0xcf, 0xef },
+{ 0x00, 0xd0, 0xf0 },
+{ 0x00, 0xd1, 0xf1 },
+{ 0x00, 0xd2, 0xf2 },
+{ 0x00, 0xd3, 0xf3 },
+{ 0x00, 0xd4, 0xf4 },
+{ 0x00, 0xd5, 0xf5 },
+{ 0x00, 0xd6, 0xf6 },
+{ 0x00, 0xd7, 0xf7 },
+{ 0x00, 0xd8, 0xf8 },
+{ 0x00, 0xd9, 0xf9 },
+{ 0x00, 0xda, 0xfa },
+{ 0x00, 0xdb, 0xfb },
+{ 0x00, 0xdc, 0xfc },
+{ 0x00, 0xdd, 0xfd },
+{ 0x00, 0xde, 0xfe },
+{ 0x00, 0xdf, 0xff },
+{ 0x01, 0xc0, 0xe0 },
+{ 0x01, 0xc1, 0xe1 },
+{ 0x01, 0xc2, 0xe2 },
+{ 0x01, 0xc3, 0xe3 },
+{ 0x01, 0xc4, 0xe4 },
+{ 0x01, 0xc5, 0xe5 },
+{ 0x01, 0xc6, 0xe6 },
+{ 0x01, 0xc7, 0xe7 },
+{ 0x01, 0xc8, 0xe8 },
+{ 0x01, 0xc9, 0xe9 },
+{ 0x01, 0xca, 0xea },
+{ 0x01, 0xcb, 0xeb },
+{ 0x01, 0xcc, 0xec },
+{ 0x01, 0xcd, 0xed },
+{ 0x01, 0xce, 0xee },
+{ 0x01, 0xcf, 0xef },
+{ 0x01, 0xd0, 0xf0 },
+{ 0x01, 0xd1, 0xf1 },
+{ 0x01, 0xd2, 0xf2 },
+{ 0x01, 0xd3, 0xf3 },
+{ 0x01, 0xd4, 0xf4 },
+{ 0x01, 0xd5, 0xf5 },
+{ 0x01, 0xd6, 0xf6 },
+{ 0x01, 0xd7, 0xf7 },
+{ 0x01, 0xd8, 0xf8 },
+{ 0x01, 0xd9, 0xf9 },
+{ 0x01, 0xda, 0xfa },
+{ 0x01, 0xdb, 0xfb },
+{ 0x01, 0xdc, 0xfc },
+{ 0x01, 0xdd, 0xfd },
+{ 0x01, 0xde, 0xfe },
+{ 0x01, 0xdf, 0xff },
+};
+
+struct cs_info cp1251_tbl[] = {
+{ 0x00, 0x00, 0x00 },
+{ 0x00, 0x01, 0x01 },
+{ 0x00, 0x02, 0x02 },
+{ 0x00, 0x03, 0x03 },
+{ 0x00, 0x04, 0x04 },
+{ 0x00, 0x05, 0x05 },
+{ 0x00, 0x06, 0x06 },
+{ 0x00, 0x07, 0x07 },
+{ 0x00, 0x08, 0x08 },
+{ 0x00, 0x09, 0x09 },
+{ 0x00, 0x0a, 0x0a },
+{ 0x00, 0x0b, 0x0b },
+{ 0x00, 0x0c, 0x0c },
+{ 0x00, 0x0d, 0x0d },
+{ 0x00, 0x0e, 0x0e },
+{ 0x00, 0x0f, 0x0f },
+{ 0x00, 0x10, 0x10 },
+{ 0x00, 0x11, 0x11 },
+{ 0x00, 0x12, 0x12 },
+{ 0x00, 0x13, 0x13 },
+{ 0x00, 0x14, 0x14 },
+{ 0x00, 0x15, 0x15 },
+{ 0x00, 0x16, 0x16 },
+{ 0x00, 0x17, 0x17 },
+{ 0x00, 0x18, 0x18 },
+{ 0x00, 0x19, 0x19 },
+{ 0x00, 0x1a, 0x1a },
+{ 0x00, 0x1b, 0x1b },
+{ 0x00, 0x1c, 0x1c },
+{ 0x00, 0x1d, 0x1d },
+{ 0x00, 0x1e, 0x1e },
+{ 0x00, 0x1f, 0x1f },
+{ 0x00, 0x20, 0x20 },
+{ 0x00, 0x21, 0x21 },
+{ 0x00, 0x22, 0x22 },
+{ 0x00, 0x23, 0x23 },
+{ 0x00, 0x24, 0x24 },
+{ 0x00, 0x25, 0x25 },
+{ 0x00, 0x26, 0x26 },
+{ 0x00, 0x27, 0x27 },
+{ 0x00, 0x28, 0x28 },
+{ 0x00, 0x29, 0x29 },
+{ 0x00, 0x2a, 0x2a },
+{ 0x00, 0x2b, 0x2b },
+{ 0x00, 0x2c, 0x2c },
+{ 0x00, 0x2d, 0x2d },
+{ 0x00, 0x2e, 0x2e },
+{ 0x00, 0x2f, 0x2f },
+{ 0x00, 0x30, 0x30 },
+{ 0x00, 0x31, 0x31 },
+{ 0x00, 0x32, 0x32 },
+{ 0x00, 0x33, 0x33 },
+{ 0x00, 0x34, 0x34 },
+{ 0x00, 0x35, 0x35 },
+{ 0x00, 0x36, 0x36 },
+{ 0x00, 0x37, 0x37 },
+{ 0x00, 0x38, 0x38 },
+{ 0x00, 0x39, 0x39 },
+{ 0x00, 0x3a, 0x3a },
+{ 0x00, 0x3b, 0x3b },
+{ 0x00, 0x3c, 0x3c },
+{ 0x00, 0x3d, 0x3d },
+{ 0x00, 0x3e, 0x3e },
+{ 0x00, 0x3f, 0x3f },
+{ 0x00, 0x40, 0x40 },
+{ 0x01, 0x61, 0x41 },
+{ 0x01, 0x62, 0x42 },
+{ 0x01, 0x63, 0x43 },
+{ 0x01, 0x64, 0x44 },
+{ 0x01, 0x65, 0x45 },
+{ 0x01, 0x66, 0x46 },
+{ 0x01, 0x67, 0x47 },
+{ 0x01, 0x68, 0x48 },
+{ 0x01, 0x69, 0x49 },
+{ 0x01, 0x6a, 0x4a },
+{ 0x01, 0x6b, 0x4b },
+{ 0x01, 0x6c, 0x4c },
+{ 0x01, 0x6d, 0x4d },
+{ 0x01, 0x6e, 0x4e },
+{ 0x01, 0x6f, 0x4f },
+{ 0x01, 0x70, 0x50 },
+{ 0x01, 0x71, 0x51 },
+{ 0x01, 0x72, 0x52 },
+{ 0x01, 0x73, 0x53 },
+{ 0x01, 0x74, 0x54 },
+{ 0x01, 0x75, 0x55 },
+{ 0x01, 0x76, 0x56 },
+{ 0x01, 0x77, 0x57 },
+{ 0x01, 0x78, 0x58 },
+{ 0x01, 0x79, 0x59 },
+{ 0x01, 0x7a, 0x5a },
+{ 0x00, 0x5b, 0x5b },
+{ 0x00, 0x5c, 0x5c },
+{ 0x00, 0x5d, 0x5d },
+{ 0x00, 0x5e, 0x5e },
+{ 0x00, 0x5f, 0x5f },
+{ 0x00, 0x60, 0x60 },
+{ 0x00, 0x61, 0x41 },
+{ 0x00, 0x62, 0x42 },
+{ 0x00, 0x63, 0x43 },
+{ 0x00, 0x64, 0x44 },
+{ 0x00, 0x65, 0x45 },
+{ 0x00, 0x66, 0x46 },
+{ 0x00, 0x67, 0x47 },
+{ 0x00, 0x68, 0x48 },
+{ 0x00, 0x69, 0x49 },
+{ 0x00, 0x6a, 0x4a },
+{ 0x00, 0x6b, 0x4b },
+{ 0x00, 0x6c, 0x4c },
+{ 0x00, 0x6d, 0x4d },
+{ 0x00, 0x6e, 0x4e },
+{ 0x00, 0x6f, 0x4f },
+{ 0x00, 0x70, 0x50 },
+{ 0x00, 0x71, 0x51 },
+{ 0x00, 0x72, 0x52 },
+{ 0x00, 0x73, 0x53 },
+{ 0x00, 0x74, 0x54 },
+{ 0x00, 0x75, 0x55 },
+{ 0x00, 0x76, 0x56 },
+{ 0x00, 0x77, 0x57 },
+{ 0x00, 0x78, 0x58 },
+{ 0x00, 0x79, 0x59 },
+{ 0x00, 0x7a, 0x5a },
+{ 0x00, 0x7b, 0x7b },
+{ 0x00, 0x7c, 0x7c },
+{ 0x00, 0x7d, 0x7d },
+{ 0x00, 0x7e, 0x7e },
+{ 0x00, 0x7f, 0x7f },
+{ 0x01, 0x90, 0x80 },
+{ 0x01, 0x83, 0x81 },
+{ 0x00, 0x82, 0x82 },
+{ 0x00, 0x83, 0x81 },
+{ 0x00, 0x84, 0x84 },
+{ 0x00, 0x85, 0x85 },
+{ 0x00, 0x86, 0x86 },
+{ 0x00, 0x87, 0x87 },
+{ 0x00, 0x88, 0x88 },
+{ 0x00, 0x89, 0x89 },
+{ 0x01, 0x9a, 0x8a },
+{ 0x00, 0x8b, 0x8b },
+{ 0x01, 0x9c, 0x8c },
+{ 0x01, 0x9d, 0x8d },
+{ 0x01, 0x9e, 0x8e },
+{ 0x01, 0x9f, 0x8f },
+{ 0x00, 0x90, 0x80 },
+{ 0x00, 0x91, 0x91 },
+{ 0x00, 0x92, 0x92 },
+{ 0x00, 0x93, 0x93 },
+{ 0x00, 0x94, 0x94 },
+{ 0x00, 0x95, 0x95 },
+{ 0x00, 0x96, 0x96 },
+{ 0x00, 0x97, 0x97 },
+{ 0x00, 0x98, 0x98 },
+{ 0x00, 0x99, 0x99 },
+{ 0x00, 0x9a, 0x8a },
+{ 0x00, 0x9b, 0x9b },
+{ 0x00, 0x9c, 0x8c },
+{ 0x00, 0x9d, 0x8d },
+{ 0x00, 0x9e, 0x8e },
+{ 0x00, 0x9f, 0x8f },
+{ 0x00, 0xa0, 0xa0 },
+{ 0x01, 0xa2, 0xa1 },
+{ 0x00, 0xa2, 0xa1 },
+{ 0x01, 0xbc, 0xa3 },
+{ 0x00, 0xa4, 0xa4 },
+{ 0x01, 0xb4, 0xa5 },
+{ 0x00, 0xa6, 0xa6 },
+{ 0x00, 0xa7, 0xa7 },
+{ 0x01, 0xb8, 0xa8 },
+{ 0x00, 0xa9, 0xa9 },
+{ 0x01, 0xba, 0xaa },
+{ 0x00, 0xab, 0xab },
+{ 0x00, 0xac, 0xac },
+{ 0x00, 0xad, 0xad },
+{ 0x00, 0xae, 0xae },
+{ 0x01, 0xbf, 0xaf },
+{ 0x00, 0xb0, 0xb0 },
+{ 0x00, 0xb1, 0xb1 },
+{ 0x01, 0xb3, 0xb2 },
+{ 0x00, 0xb3, 0xb2 },
+{ 0x00, 0xb4, 0xa5 },
+{ 0x00, 0xb5, 0xb5 },
+{ 0x00, 0xb6, 0xb6 },
+{ 0x00, 0xb7, 0xb7 },
+{ 0x00, 0xb8, 0xa8 },
+{ 0x00, 0xb9, 0xb9 },
+{ 0x00, 0xba, 0xaa },
+{ 0x00, 0xbb, 0xbb },
+{ 0x00, 0xbc, 0xa3 },
+{ 0x01, 0xbe, 0xbd },
+{ 0x00, 0xbe, 0xbd },
+{ 0x00, 0xbf, 0xaf },
+{ 0x01, 0xe0, 0xc0 },
+{ 0x01, 0xe1, 0xc1 },
+{ 0x01, 0xe2, 0xc2 },
+{ 0x01, 0xe3, 0xc3 },
+{ 0x01, 0xe4, 0xc4 },
+{ 0x01, 0xe5, 0xc5 },
+{ 0x01, 0xe6, 0xc6 },
+{ 0x01, 0xe7, 0xc7 },
+{ 0x01, 0xe8, 0xc8 },
+{ 0x01, 0xe9, 0xc9 },
+{ 0x01, 0xea, 0xca },
+{ 0x01, 0xeb, 0xcb },
+{ 0x01, 0xec, 0xcc },
+{ 0x01, 0xed, 0xcd },
+{ 0x01, 0xee, 0xce },
+{ 0x01, 0xef, 0xcf },
+{ 0x01, 0xf0, 0xd0 },
+{ 0x01, 0xf1, 0xd1 },
+{ 0x01, 0xf2, 0xd2 },
+{ 0x01, 0xf3, 0xd3 },
+{ 0x01, 0xf4, 0xd4 },
+{ 0x01, 0xf5, 0xd5 },
+{ 0x01, 0xf6, 0xd6 },
+{ 0x01, 0xf7, 0xd7 },
+{ 0x01, 0xf8, 0xd8 },
+{ 0x01, 0xf9, 0xd9 },
+{ 0x01, 0xfa, 0xda },
+{ 0x01, 0xfb, 0xdb },
+{ 0x01, 0xfc, 0xdc },
+{ 0x01, 0xfd, 0xdd },
+{ 0x01, 0xfe, 0xde },
+{ 0x01, 0xff, 0xdf },
+{ 0x00, 0xe0, 0xc0 },
+{ 0x00, 0xe1, 0xc1 },
+{ 0x00, 0xe2, 0xc2 },
+{ 0x00, 0xe3, 0xc3 },
+{ 0x00, 0xe4, 0xc4 },
+{ 0x00, 0xe5, 0xc5 },
+{ 0x00, 0xe6, 0xc6 },
+{ 0x00, 0xe7, 0xc7 },
+{ 0x00, 0xe8, 0xc8 },
+{ 0x00, 0xe9, 0xc9 },
+{ 0x00, 0xea, 0xca },
+{ 0x00, 0xeb, 0xcb },
+{ 0x00, 0xec, 0xcc },
+{ 0x00, 0xed, 0xcd },
+{ 0x00, 0xee, 0xce },
+{ 0x00, 0xef, 0xcf },
+{ 0x00, 0xf0, 0xd0 },
+{ 0x00, 0xf1, 0xd1 },
+{ 0x00, 0xf2, 0xd2 },
+{ 0x00, 0xf3, 0xd3 },
+{ 0x00, 0xf4, 0xd4 },
+{ 0x00, 0xf5, 0xd5 },
+{ 0x00, 0xf6, 0xd6 },
+{ 0x00, 0xf7, 0xd7 },
+{ 0x00, 0xf8, 0xd8 },
+{ 0x00, 0xf9, 0xd9 },
+{ 0x00, 0xfa, 0xda },
+{ 0x00, 0xfb, 0xdb },
+{ 0x00, 0xfc, 0xdc },
+{ 0x00, 0xfd, 0xdd },
+{ 0x00, 0xfe, 0xde },
+{ 0x00, 0xff, 0xdf },
+};
+
+struct cs_info iso13_tbl[] = {
+{ 0x00, 0x00, 0x00 },
+{ 0x00, 0x01, 0x01 },
+{ 0x00, 0x02, 0x02 },
+{ 0x00, 0x03, 0x03 },
+{ 0x00, 0x04, 0x04 },
+{ 0x00, 0x05, 0x05 },
+{ 0x00, 0x06, 0x06 },
+{ 0x00, 0x07, 0x07 },
+{ 0x00, 0x08, 0x08 },
+{ 0x00, 0x09, 0x09 },
+{ 0x00, 0x0A, 0x0A },
+{ 0x00, 0x0B, 0x0B },
+{ 0x00, 0x0C, 0x0C },
+{ 0x00, 0x0D, 0x0D },
+{ 0x00, 0x0E, 0x0E },
+{ 0x00, 0x0F, 0x0F },
+{ 0x00, 0x10, 0x10 },
+{ 0x00, 0x11, 0x11 },
+{ 0x00, 0x12, 0x12 },
+{ 0x00, 0x13, 0x13 },
+{ 0x00, 0x14, 0x14 },
+{ 0x00, 0x15, 0x15 },
+{ 0x00, 0x16, 0x16 },
+{ 0x00, 0x17, 0x17 },
+{ 0x00, 0x18, 0x18 },
+{ 0x00, 0x19, 0x19 },
+{ 0x00, 0x1A, 0x1A },
+{ 0x00, 0x1B, 0x1B },
+{ 0x00, 0x1C, 0x1C },
+{ 0x00, 0x1D, 0x1D },
+{ 0x00, 0x1E, 0x1E },
+{ 0x00, 0x1F, 0x1F },
+{ 0x00, 0x20, 0x20 },
+{ 0x00, 0x21, 0x21 },
+{ 0x00, 0x22, 0x22 },
+{ 0x00, 0x23, 0x23 },
+{ 0x00, 0x24, 0x24 },
+{ 0x00, 0x25, 0x25 },
+{ 0x00, 0x26, 0x26 },
+{ 0x00, 0x27, 0x27 },
+{ 0x00, 0x28, 0x28 },
+{ 0x00, 0x29, 0x29 },
+{ 0x00, 0x2A, 0x2A },
+{ 0x00, 0x2B, 0x2B },
+{ 0x00, 0x2C, 0x2C },
+{ 0x00, 0x2D, 0x2D },
+{ 0x00, 0x2E, 0x2E },
+{ 0x00, 0x2F, 0x2F },
+{ 0x00, 0x30, 0x30 },
+{ 0x00, 0x31, 0x31 },
+{ 0x00, 0x32, 0x32 },
+{ 0x00, 0x33, 0x33 },
+{ 0x00, 0x34, 0x34 },
+{ 0x00, 0x35, 0x35 },
+{ 0x00, 0x36, 0x36 },
+{ 0x00, 0x37, 0x37 },
+{ 0x00, 0x38, 0x38 },
+{ 0x00, 0x39, 0x39 },
+{ 0x00, 0x3A, 0x3A },
+{ 0x00, 0x3B, 0x3B },
+{ 0x00, 0x3C, 0x3C },
+{ 0x00, 0x3D, 0x3D },
+{ 0x00, 0x3E, 0x3E },
+{ 0x00, 0x3F, 0x3F },
+{ 0x00, 0x40, 0x40 },
+{ 0x01, 0x61, 0x41 },
+{ 0x01, 0x62, 0x42 },
+{ 0x01, 0x63, 0x43 },
+{ 0x01, 0x64, 0x44 },
+{ 0x01, 0x65, 0x45 },
+{ 0x01, 0x66, 0x46 },
+{ 0x01, 0x67, 0x47 },
+{ 0x01, 0x68, 0x48 },
+{ 0x01, 0x69, 0x49 },
+{ 0x01, 0x6A, 0x4A },
+{ 0x01, 0x6B, 0x4B },
+{ 0x01, 0x6C, 0x4C },
+{ 0x01, 0x6D, 0x4D },
+{ 0x01, 0x6E, 0x4E },
+{ 0x01, 0x6F, 0x4F },
+{ 0x01, 0x70, 0x50 },
+{ 0x01, 0x71, 0x51 },
+{ 0x01, 0x72, 0x52 },
+{ 0x01, 0x73, 0x53 },
+{ 0x01, 0x74, 0x54 },
+{ 0x01, 0x75, 0x55 },
+{ 0x01, 0x76, 0x56 },
+{ 0x01, 0x77, 0x57 },
+{ 0x01, 0x78, 0x58 },
+{ 0x01, 0x79, 0x59 },
+{ 0x01, 0x7A, 0x5A },
+{ 0x00, 0x5B, 0x5B },
+{ 0x00, 0x5C, 0x5C },
+{ 0x00, 0x5D, 0x5D },
+{ 0x00, 0x5E, 0x5E },
+{ 0x00, 0x5F, 0x5F },
+{ 0x00, 0x60, 0x60 },
+{ 0x00, 0x61, 0x41 },
+{ 0x00, 0x62, 0x42 },
+{ 0x00, 0x63, 0x43 },
+{ 0x00, 0x64, 0x44 },
+{ 0x00, 0x65, 0x45 },
+{ 0x00, 0x66, 0x46 },
+{ 0x00, 0x67, 0x47 },
+{ 0x00, 0x68, 0x48 },
+{ 0x00, 0x69, 0x49 },
+{ 0x00, 0x6A, 0x4A },
+{ 0x00, 0x6B, 0x4B },
+{ 0x00, 0x6C, 0x4C },
+{ 0x00, 0x6D, 0x4D },
+{ 0x00, 0x6E, 0x4E },
+{ 0x00, 0x6F, 0x4F },
+{ 0x00, 0x70, 0x50 },
+{ 0x00, 0x71, 0x51 },
+{ 0x00, 0x72, 0x52 },
+{ 0x00, 0x73, 0x53 },
+{ 0x00, 0x74, 0x54 },
+{ 0x00, 0x75, 0x55 },
+{ 0x00, 0x76, 0x56 },
+{ 0x00, 0x77, 0x57 },
+{ 0x00, 0x78, 0x58 },
+{ 0x00, 0x79, 0x59 },
+{ 0x00, 0x7A, 0x5A },
+{ 0x00, 0x7B, 0x7B },
+{ 0x00, 0x7C, 0x7C },
+{ 0x00, 0x7D, 0x7D },
+{ 0x00, 0x7E, 0x7E },
+{ 0x00, 0x7F, 0x7F },
+{ 0x00, 0x80, 0x80 },
+{ 0x00, 0x81, 0x81 },
+{ 0x00, 0x82, 0x82 },
+{ 0x00, 0x83, 0x83 },
+{ 0x00, 0x84, 0x84 },
+{ 0x00, 0x85, 0x85 },
+{ 0x00, 0x86, 0x86 },
+{ 0x00, 0x87, 0x87 },
+{ 0x00, 0x88, 0x88 },
+{ 0x00, 0x89, 0x89 },
+{ 0x00, 0x8A, 0x8A },
+{ 0x00, 0x8B, 0x8B },
+{ 0x00, 0x8C, 0x8C },
+{ 0x00, 0x8D, 0x8D },
+{ 0x00, 0x8E, 0x8E },
+{ 0x00, 0x8F, 0x8F },
+{ 0x00, 0x90, 0x90 },
+{ 0x00, 0x91, 0x91 },
+{ 0x00, 0x92, 0x92 },
+{ 0x00, 0x93, 0x93 },
+{ 0x00, 0x94, 0x94 },
+{ 0x00, 0x95, 0x95 },
+{ 0x00, 0x96, 0x96 },
+{ 0x00, 0x97, 0x97 },
+{ 0x00, 0x98, 0x98 },
+{ 0x00, 0x99, 0x99 },
+{ 0x00, 0x9A, 0x9A },
+{ 0x00, 0x9B, 0x9B },
+{ 0x00, 0x9C, 0x9C },
+{ 0x00, 0x9D, 0x9D },
+{ 0x00, 0x9E, 0x9E },
+{ 0x00, 0x9F, 0x9F },
+{ 0x00, 0xA0, 0xA0 },
+{ 0x00, 0xA1, 0xA1 },
+{ 0x00, 0xA2, 0xA2 },
+{ 0x00, 0xA3, 0xA3 },
+{ 0x00, 0xA4, 0xA4 },
+{ 0x00, 0xA5, 0xA5 },
+{ 0x00, 0xA6, 0xA6 },
+{ 0x00, 0xA7, 0xA7 },
+{ 0x00, 0xA8, 0xA8 },
+{ 0x00, 0xA9, 0xA9 },
+{ 0x00, 0xAA, 0xAA },
+{ 0x00, 0xAB, 0xAB },
+{ 0x00, 0xAC, 0xAC },
+{ 0x00, 0xAD, 0xAD },
+{ 0x00, 0xAE, 0xAE },
+{ 0x00, 0xAF, 0xAF },
+{ 0x00, 0xB0, 0xB0 },
+{ 0x00, 0xB1, 0xB1 },
+{ 0x00, 0xB2, 0xB2 },
+{ 0x00, 0xB3, 0xB3 },
+{ 0x00, 0xB4, 0xB4 },
+{ 0x00, 0xB5, 0xB5 },
+{ 0x00, 0xB6, 0xB6 },
+{ 0x00, 0xB7, 0xB7 },
+{ 0x00, 0xB8, 0xB8 },
+{ 0x00, 0xB9, 0xB9 },
+{ 0x00, 0xBA, 0xBA },
+{ 0x00, 0xBB, 0xBB },
+{ 0x00, 0xBC, 0xBC },
+{ 0x00, 0xBD, 0xBD },
+{ 0x00, 0xBE, 0xBE },
+{ 0x00, 0xBF, 0xBF },
+{ 0x00, 0xE0, 0xC0 },
+{ 0x00, 0xE1, 0xC1 },
+{ 0x00, 0xC2, 0xC2 },
+{ 0x00, 0xC3, 0xC3 },
+{ 0x00, 0xC4, 0xC4 },
+{ 0x00, 0xC5, 0xC5 },
+{ 0x00, 0xE6, 0xC6 },
+{ 0x00, 0xC7, 0xC7 },
+{ 0x00, 0xE8, 0xC8 },
+{ 0x00, 0xC9, 0xC9 },
+{ 0x00, 0xCA, 0xCA },
+{ 0x00, 0xEB, 0xCB },
+{ 0x00, 0xCC, 0xCC },
+{ 0x00, 0xCD, 0xCD },
+{ 0x00, 0xCE, 0xCE },
+{ 0x00, 0xCF, 0xCF },
+{ 0x00, 0xF0, 0xD0 },
+{ 0x00, 0xD1, 0xD1 },
+{ 0x00, 0xD2, 0xD2 },
+{ 0x00, 0xD3, 0xD3 },
+{ 0x00, 0xD4, 0xD4 },
+{ 0x00, 0xD5, 0xD5 },
+{ 0x00, 0xD6, 0xD6 },
+{ 0x00, 0xD7, 0xD7 },
+{ 0x00, 0xF8, 0xD8 },
+{ 0x00, 0xD9, 0xD9 },
+{ 0x00, 0xDA, 0xDA },
+{ 0x00, 0xFB, 0xDB },
+{ 0x00, 0xDC, 0xDC },
+{ 0x00, 0xDD, 0xDD },
+{ 0x00, 0xFE, 0xDE },
+{ 0x00, 0xDF, 0xDF },
+{ 0x00, 0xE0, 0xC0 },
+{ 0x00, 0xE1, 0xC1 },
+{ 0x00, 0xE2, 0xE2 },
+{ 0x00, 0xE3, 0xE3 },
+{ 0x00, 0xE4, 0xE4 },
+{ 0x00, 0xE5, 0xE5 },
+{ 0x00, 0xE6, 0xC6 },
+{ 0x00, 0xE7, 0xE7 },
+{ 0x00, 0xE8, 0xC8 },
+{ 0x00, 0xE9, 0xE9 },
+{ 0x00, 0xEA, 0xEA },
+{ 0x00, 0xEB, 0xCB },
+{ 0x00, 0xEC, 0xEC },
+{ 0x00, 0xED, 0xED },
+{ 0x00, 0xEE, 0xEE },
+{ 0x00, 0xEF, 0xEF },
+{ 0x00, 0xF0, 0xD0 },
+{ 0x00, 0xF1, 0xF1 },
+{ 0x00, 0xF2, 0xF2 },
+{ 0x00, 0xF3, 0xF3 },
+{ 0x00, 0xF4, 0xF4 },
+{ 0x00, 0xF5, 0xF5 },
+{ 0x00, 0xF6, 0xF6 },
+{ 0x00, 0xF7, 0xF7 },
+{ 0x00, 0xF8, 0xD8 },
+{ 0x00, 0xF9, 0xF9 },
+{ 0x00, 0xFA, 0xFA },
+{ 0x00, 0xFB, 0xDB },
+{ 0x00, 0xFC, 0xFC },
+{ 0x00, 0xFD, 0xFD },
+{ 0x00, 0xFE, 0xDE },
+{ 0x00, 0xFF, 0xFF },
+};
+
+
+struct cs_info iso14_tbl[] = {
+{ 0x00, 0x00, 0x00 },
+{ 0x00, 0x01, 0x01 },
+{ 0x00, 0x02, 0x02 },
+{ 0x00, 0x03, 0x03 },
+{ 0x00, 0x04, 0x04 },
+{ 0x00, 0x05, 0x05 },
+{ 0x00, 0x06, 0x06 },
+{ 0x00, 0x07, 0x07 },
+{ 0x00, 0x08, 0x08 },
+{ 0x00, 0x09, 0x09 },
+{ 0x00, 0x0a, 0x0a },
+{ 0x00, 0x0b, 0x0b },
+{ 0x00, 0x0c, 0x0c },
+{ 0x00, 0x0d, 0x0d },
+{ 0x00, 0x0e, 0x0e },
+{ 0x00, 0x0f, 0x0f },
+{ 0x00, 0x10, 0x10 },
+{ 0x00, 0x11, 0x11 },
+{ 0x00, 0x12, 0x12 },
+{ 0x00, 0x13, 0x13 },
{ 0x00, 0x14, 0x14 },
{ 0x00, 0x15, 0x15 },
{ 0x00, 0x16, 0x16 },
@@ -4108,267 +4627,6 @@ struct cs_info iscii_devanagari_tbl[] = {
{ 0x00, 0xff, 0xff },
};
-struct cs_info koi8u_tbl[] = {
-{ 0x00, 0x00, 0x00 },
-{ 0x00, 0x01, 0x01 },
-{ 0x00, 0x02, 0x02 },
-{ 0x00, 0x03, 0x03 },
-{ 0x00, 0x04, 0x04 },
-{ 0x00, 0x05, 0x05 },
-{ 0x00, 0x06, 0x06 },
-{ 0x00, 0x07, 0x07 },
-{ 0x00, 0x08, 0x08 },
-{ 0x00, 0x09, 0x09 },
-{ 0x00, 0x0a, 0x0a },
-{ 0x00, 0x0b, 0x0b },
-{ 0x00, 0x0c, 0x0c },
-{ 0x00, 0x0d, 0x0d },
-{ 0x00, 0x0e, 0x0e },
-{ 0x00, 0x0f, 0x0f },
-{ 0x00, 0x10, 0x10 },
-{ 0x00, 0x11, 0x11 },
-{ 0x00, 0x12, 0x12 },
-{ 0x00, 0x13, 0x13 },
-{ 0x00, 0x14, 0x14 },
-{ 0x00, 0x15, 0x15 },
-{ 0x00, 0x16, 0x16 },
-{ 0x00, 0x17, 0x17 },
-{ 0x00, 0x18, 0x18 },
-{ 0x00, 0x19, 0x19 },
-{ 0x00, 0x1a, 0x1a },
-{ 0x00, 0x1b, 0x1b },
-{ 0x00, 0x1c, 0x1c },
-{ 0x00, 0x1d, 0x1d },
-{ 0x00, 0x1e, 0x1e },
-{ 0x00, 0x1f, 0x1f },
-{ 0x00, 0x20, 0x20 },
-{ 0x00, 0x21, 0x21 },
-{ 0x00, 0x22, 0x22 },
-{ 0x00, 0x23, 0x23 },
-{ 0x00, 0x24, 0x24 },
-{ 0x00, 0x25, 0x25 },
-{ 0x00, 0x26, 0x26 },
-{ 0x00, 0x27, 0x27 },
-{ 0x00, 0x28, 0x28 },
-{ 0x00, 0x29, 0x29 },
-{ 0x00, 0x2a, 0x2a },
-{ 0x00, 0x2b, 0x2b },
-{ 0x00, 0x2c, 0x2c },
-{ 0x00, 0x2d, 0x2d },
-{ 0x00, 0x2e, 0x2e },
-{ 0x00, 0x2f, 0x2f },
-{ 0x00, 0x30, 0x30 },
-{ 0x00, 0x31, 0x31 },
-{ 0x00, 0x32, 0x32 },
-{ 0x00, 0x33, 0x33 },
-{ 0x00, 0x34, 0x34 },
-{ 0x00, 0x35, 0x35 },
-{ 0x00, 0x36, 0x36 },
-{ 0x00, 0x37, 0x37 },
-{ 0x00, 0x38, 0x38 },
-{ 0x00, 0x39, 0x39 },
-{ 0x00, 0x3a, 0x3a },
-{ 0x00, 0x3b, 0x3b },
-{ 0x00, 0x3c, 0x3c },
-{ 0x00, 0x3d, 0x3d },
-{ 0x00, 0x3e, 0x3e },
-{ 0x00, 0x3f, 0x3f },
-{ 0x00, 0x40, 0x40 },
-{ 0x01, 0x61, 0x41 },
-{ 0x01, 0x62, 0x42 },
-{ 0x01, 0x63, 0x43 },
-{ 0x01, 0x64, 0x44 },
-{ 0x01, 0x65, 0x45 },
-{ 0x01, 0x66, 0x46 },
-{ 0x01, 0x67, 0x47 },
-{ 0x01, 0x68, 0x48 },
-{ 0x01, 0x69, 0x49 },
-{ 0x01, 0x6a, 0x4a },
-{ 0x01, 0x6b, 0x4b },
-{ 0x01, 0x6c, 0x4c },
-{ 0x01, 0x6d, 0x4d },
-{ 0x01, 0x6e, 0x4e },
-{ 0x01, 0x6f, 0x4f },
-{ 0x01, 0x70, 0x50 },
-{ 0x01, 0x71, 0x51 },
-{ 0x01, 0x72, 0x52 },
-{ 0x01, 0x73, 0x53 },
-{ 0x01, 0x74, 0x54 },
-{ 0x01, 0x75, 0x55 },
-{ 0x01, 0x76, 0x56 },
-{ 0x01, 0x77, 0x57 },
-{ 0x01, 0x78, 0x58 },
-{ 0x01, 0x79, 0x59 },
-{ 0x01, 0x7a, 0x5a },
-{ 0x00, 0x5b, 0x5b },
-{ 0x00, 0x5c, 0x5c },
-{ 0x00, 0x5d, 0x5d },
-{ 0x00, 0x5e, 0x5e },
-{ 0x00, 0x5f, 0x5f },
-{ 0x00, 0x60, 0x60 },
-{ 0x00, 0x61, 0x41 },
-{ 0x00, 0x62, 0x42 },
-{ 0x00, 0x63, 0x43 },
-{ 0x00, 0x64, 0x44 },
-{ 0x00, 0x65, 0x45 },
-{ 0x00, 0x66, 0x46 },
-{ 0x00, 0x67, 0x47 },
-{ 0x00, 0x68, 0x48 },
-{ 0x00, 0x69, 0x49 },
-{ 0x00, 0x6a, 0x4a },
-{ 0x00, 0x6b, 0x4b },
-{ 0x00, 0x6c, 0x4c },
-{ 0x00, 0x6d, 0x4d },
-{ 0x00, 0x6e, 0x4e },
-{ 0x00, 0x6f, 0x4f },
-{ 0x00, 0x70, 0x50 },
-{ 0x00, 0x71, 0x51 },
-{ 0x00, 0x72, 0x52 },
-{ 0x00, 0x73, 0x53 },
-{ 0x00, 0x74, 0x54 },
-{ 0x00, 0x75, 0x55 },
-{ 0x00, 0x76, 0x56 },
-{ 0x00, 0x77, 0x57 },
-{ 0x00, 0x78, 0x58 },
-{ 0x00, 0x79, 0x59 },
-{ 0x00, 0x7a, 0x5a },
-{ 0x00, 0x7b, 0x7b },
-{ 0x00, 0x7c, 0x7c },
-{ 0x00, 0x7d, 0x7d },
-{ 0x00, 0x7e, 0x7e },
-{ 0x00, 0x7f, 0x7f },
-{ 0x00, 0x80, 0x80 },
-{ 0x00, 0x81, 0x81 },
-{ 0x00, 0x82, 0x82 },
-{ 0x00, 0x83, 0x83 },
-{ 0x00, 0x84, 0x84 },
-{ 0x00, 0x85, 0x85 },
-{ 0x00, 0x86, 0x86 },
-{ 0x00, 0x87, 0x87 },
-{ 0x00, 0x88, 0x88 },
-{ 0x00, 0x89, 0x89 },
-{ 0x00, 0x8a, 0x8a },
-{ 0x00, 0x8b, 0x8b },
-{ 0x00, 0x8c, 0x8c },
-{ 0x00, 0x8d, 0x8d },
-{ 0x00, 0x8e, 0x8e },
-{ 0x00, 0x8f, 0x8f },
-{ 0x00, 0x90, 0x90 },
-{ 0x00, 0x91, 0x91 },
-{ 0x00, 0x92, 0x92 },
-{ 0x00, 0x93, 0x93 },
-{ 0x00, 0x94, 0x94 },
-{ 0x00, 0x95, 0x95 },
-{ 0x00, 0x96, 0x96 },
-{ 0x00, 0x97, 0x97 },
-{ 0x00, 0x98, 0x98 },
-{ 0x00, 0x99, 0x99 },
-{ 0x00, 0x9a, 0x9a },
-{ 0x00, 0x9b, 0x9b },
-{ 0x00, 0x9c, 0x9c },
-{ 0x00, 0x9d, 0x9d },
-{ 0x00, 0x9e, 0x9e },
-{ 0x00, 0x9f, 0x9f },
-{ 0x00, 0xa0, 0xa0 },
-{ 0x00, 0xa1, 0xa1 },
-{ 0x00, 0xa2, 0xa2 },
-{ 0x00, 0xa3, 0xb3 },
-{ 0x00, 0xa4, 0xb4 },
-{ 0x00, 0xa5, 0xa5 },
-{ 0x00, 0xa6, 0xb6 },
-{ 0x00, 0xa7, 0xb7 },
-{ 0x00, 0xa8, 0xa8 },
-{ 0x00, 0xa9, 0xa9 },
-{ 0x00, 0xaa, 0xaa },
-{ 0x00, 0xab, 0xab },
-{ 0x00, 0xac, 0xac },
-{ 0x00, 0xad, 0xbd },
-{ 0x00, 0xae, 0xae },
-{ 0x00, 0xaf, 0xaf },
-{ 0x00, 0xb0, 0xb0 },
-{ 0x00, 0xb1, 0xb1 },
-{ 0x00, 0xb2, 0xb2 },
-{ 0x01, 0xa3, 0xb3 },
-{ 0x01, 0xa4, 0xb4 },
-{ 0x00, 0xb5, 0xb5 },
-{ 0x01, 0xa6, 0xb6 },
-{ 0x01, 0xa7, 0xb7 },
-{ 0x00, 0xb8, 0xb8 },
-{ 0x00, 0xb9, 0xb9 },
-{ 0x00, 0xba, 0xba },
-{ 0x00, 0xbb, 0xbb },
-{ 0x00, 0xbc, 0xbc },
-{ 0x01, 0xad, 0xbd },
-{ 0x00, 0xbe, 0xbe },
-{ 0x00, 0xbf, 0xbf },
-{ 0x00, 0xc0, 0xe0 },
-{ 0x00, 0xc1, 0xe1 },
-{ 0x00, 0xc2, 0xe2 },
-{ 0x00, 0xc3, 0xe3 },
-{ 0x00, 0xc4, 0xe4 },
-{ 0x00, 0xc5, 0xe5 },
-{ 0x00, 0xc6, 0xe6 },
-{ 0x00, 0xc7, 0xe7 },
-{ 0x00, 0xc8, 0xe8 },
-{ 0x00, 0xc9, 0xe9 },
-{ 0x00, 0xca, 0xea },
-{ 0x00, 0xcb, 0xeb },
-{ 0x00, 0xcc, 0xec },
-{ 0x00, 0xcd, 0xed },
-{ 0x00, 0xce, 0xee },
-{ 0x00, 0xcf, 0xef },
-{ 0x00, 0xd0, 0xf0 },
-{ 0x00, 0xd1, 0xf1 },
-{ 0x00, 0xd2, 0xf2 },
-{ 0x00, 0xd3, 0xf3 },
-{ 0x00, 0xd4, 0xf4 },
-{ 0x00, 0xd5, 0xf5 },
-{ 0x00, 0xd6, 0xf6 },
-{ 0x00, 0xd7, 0xf7 },
-{ 0x00, 0xd8, 0xf8 },
-{ 0x00, 0xd9, 0xf9 },
-{ 0x00, 0xda, 0xfa },
-{ 0x00, 0xdb, 0xfb },
-{ 0x00, 0xdc, 0xfc },
-{ 0x00, 0xdd, 0xfd },
-{ 0x00, 0xde, 0xfe },
-{ 0x00, 0xdf, 0xff },
-{ 0x01, 0xc0, 0xe0 },
-{ 0x01, 0xc1, 0xe1 },
-{ 0x01, 0xc2, 0xe2 },
-{ 0x01, 0xc3, 0xe3 },
-{ 0x01, 0xc4, 0xe4 },
-{ 0x01, 0xc5, 0xe5 },
-{ 0x01, 0xc6, 0xe6 },
-{ 0x01, 0xc7, 0xe7 },
-{ 0x01, 0xc8, 0xe8 },
-{ 0x01, 0xc9, 0xe9 },
-{ 0x01, 0xca, 0xea },
-{ 0x01, 0xcb, 0xeb },
-{ 0x01, 0xcc, 0xec },
-{ 0x01, 0xcd, 0xed },
-{ 0x01, 0xce, 0xee },
-{ 0x01, 0xcf, 0xef },
-{ 0x01, 0xd0, 0xf0 },
-{ 0x01, 0xd1, 0xf1 },
-{ 0x01, 0xd2, 0xf2 },
-{ 0x01, 0xd3, 0xf3 },
-{ 0x01, 0xd4, 0xf4 },
-{ 0x01, 0xd5, 0xf5 },
-{ 0x01, 0xd6, 0xf6 },
-{ 0x01, 0xd7, 0xf7 },
-{ 0x01, 0xd8, 0xf8 },
-{ 0x01, 0xd9, 0xf9 },
-{ 0x01, 0xda, 0xfa },
-{ 0x01, 0xdb, 0xfb },
-{ 0x01, 0xdc, 0xfc },
-{ 0x01, 0xdd, 0xfd },
-{ 0x01, 0xde, 0xfe },
-{ 0x01, 0xdf, 0xff },
-};
-
-
-
struct enc_entry encds[] = {
{"ISO8859-1",iso1_tbl},
{"ISO8859-2",iso2_tbl},
@@ -4381,8 +4639,9 @@ struct enc_entry encds[] = {
{"ISO8859-9",iso9_tbl},
{"ISO8859-10",iso10_tbl},
{"KOI8-R",koi8r_tbl},
-{"KOI8-U",koi8r_tbl},
+{"KOI8-U",koi8u_tbl},
{"microsoft-cp1251",cp1251_tbl},
+{"ISO8859-13", iso13_tbl},
{"ISO8859-14", iso14_tbl},
{"ISCII-DEVANAGARI", iscii_devanagari_tbl},
};
@@ -4424,6 +4683,7 @@ struct lang_map lang2enc[] = {
{"hu", "ISO8859-2", LANG_hu},
{"it", "ISO8859-1", LANG_it},
{"la", "ISO8859-1", LANG_la},
+{"lv", "ISO8859-13", LANG_lv},
{"nl", "ISO8859-1", LANG_nl},
{"pl", "ISO8859-2", LANG_pl},
{"pt", "ISO8859-1", LANG_pt},
diff --git a/src/hunspell/csutil.hxx b/src/hunspell/csutil.hxx
index 2a4d4cf..903f110 100644
--- a/src/hunspell/csutil.hxx
+++ b/src/hunspell/csutil.hxx
@@ -38,7 +38,7 @@ char * mystrrep(char *, const char *, const char *);
// is one string a leading subset of another
int isSubset(const char * s1, const char * s2);
-// is one string a leading subset of another
+// is one reverse string a leading subset of the end of another
int isRevSubset(const char * s1, const char * s2, int len);
// append s to ends of every lines in text
diff --git a/src/hunspell/dictmgr.cxx b/src/hunspell/dictmgr.cxx
index 628cedf..5d9ef6c 100644
--- a/src/hunspell/dictmgr.cxx
+++ b/src/hunspell/dictmgr.cxx
@@ -1,30 +1,25 @@
+
#include <cstdlib>
#include <cstring>
#include <cctype>
#include <cstdio>
#include "dictmgr.hxx"
-#include "csutil.hxx"
-#ifndef W32
using namespace std;
-#endif
-
DictMgr::DictMgr(const char * dictpath, const char * etype)
{
- // load list of dictionaries
+ // load list of etype entries
numdict = 0;
pdentry = (dictentry *)malloc(MAXDICTIONARIES*sizeof(struct dictentry));
if (pdentry) {
- if (parse_file(dictpath,etype)) {
+ if (parse_file(dictpath, etype)) {
numdict = 0;
- fprintf(stderr,"Failure loading dictionary list\n");
- fflush(stderr);
+ // no dictionary.lst found is okay
}
} else {
- fprintf(stderr,"Error - Insufficient Memory\n");
- fflush(stderr);
+ numdict = 0;
}
}
@@ -57,7 +52,7 @@ DictMgr::~DictMgr()
}
-// read in list of dictionaries and build up structure to describe them
+// read in list of etype entries and build up structure to describe them
int DictMgr::parse_file(const char * dictpath, const char * etype)
{
@@ -69,14 +64,13 @@ int DictMgr::parse_file(const char * dictpath, const char * etype)
FILE * dictlst;
dictlst = fopen(dictpath,"r");
if (!dictlst) {
- fprintf(stderr,"Error - could not open dictionary description file\n");
return 1;
}
// step one is to parse the dictionary list building up the
// descriptive structures
- // read in each line ignoring any that dont start with DICT
+ // read in each line ignoring any that dont start with etype
while (fgets(line,MAXDICTENTRYLEN,dictlst)) {
mychomp(line);
@@ -90,18 +84,25 @@ int DictMgr::parse_file(const char * dictpath, const char * etype)
if (*piece != '\0') {
switch(i) {
case 0: break;
- case 1: { pdict->lang = mystrdup(piece); break; }
- case 2: { pdict->region = mystrdup(piece); break; }
- case 3: { pdict->filename = mystrdup(piece); break; }
+ case 1: pdict->lang = mystrdup(piece); break;
+ case 2: if (strcmp (piece, "ANY") == 0)
+ pdict->region = mystrdup("");
+ else
+ pdict->region = mystrdup(piece);
+ break;
+ case 3: pdict->filename = mystrdup(piece); break;
default: break;
}
i++;
}
free(piece);
- if (i > 3) {
- numdict++;
- pdict++;
- }
+ }
+ if (i == 4) {
+ numdict++;
+ pdict++;
+ } else {
+ fprintf(stderr,"dictionary list corruption in line \"%s\"\n",line);
+ fflush(stderr);
}
}
}
@@ -117,3 +118,56 @@ int DictMgr::get_list(dictentry ** ppentry)
return numdict;
}
+
+
+// strip strings into token based on single char delimiter
+// acts like strsep() but only uses a delim char and not
+// a delim string
+
+char * DictMgr::mystrsep(char ** stringp, const char delim)
+{
+ char * rv = NULL;
+ char * mp = *stringp;
+ int n = strlen(mp);
+ if (n > 0) {
+ char * dp = (char *)memchr(mp,(int)((unsigned char)delim),n);
+ if (dp) {
+ *stringp = dp+1;
+ int nc = (int)((unsigned long)dp - (unsigned long)mp);
+ rv = (char *) malloc(nc+1);
+ memcpy(rv,mp,nc);
+ *(rv+nc) = '\0';
+ return rv;
+ } else {
+ rv = (char *) malloc(n+1);
+ memcpy(rv, mp, n);
+ *(rv+n) = '\0';
+ *stringp = mp + n;
+ return rv;
+ }
+ }
+ return NULL;
+}
+
+
+// replaces strdup with ansi version
+char * DictMgr::mystrdup(const char * s)
+{
+ char * d = NULL;
+ if (s) {
+ int sl = strlen(s);
+ d = (char *) malloc(((sl+1) * sizeof(char)));
+ if (d) memcpy(d,s,((sl+1)*sizeof(char)));
+ }
+ return d;
+}
+
+
+// remove cross-platform text line end characters
+void DictMgr:: mychomp(char * s)
+{
+ int k = strlen(s);
+ if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
+ if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
+}
+
diff --git a/src/hunspell/dictmgr.hxx b/src/hunspell/dictmgr.hxx
index 1f5071b..88eeb7d 100644
--- a/src/hunspell/dictmgr.hxx
+++ b/src/hunspell/dictmgr.hxx
@@ -25,6 +25,9 @@ public:
private:
int parse_file(const char * dictpath, const char * etype);
+ char * mystrsep(char ** stringp, const char delim);
+ char * mystrdup(const char * s);
+ void mychomp(char * s);
};
diff --git a/src/hunspell/hashmgr.cxx b/src/hunspell/hashmgr.cxx
index 8b176d7..5043227 100644
--- a/src/hunspell/hashmgr.cxx
+++ b/src/hunspell/hashmgr.cxx
@@ -70,7 +70,7 @@ HashMgr::~HashMgr()
// lookup a root word in the hashtable
-struct hentry * HashMgr::lookup(const char *word)
+struct hentry * HashMgr::lookup(const char *word) const
{
struct hentry * dp;
if (tableptr) {
@@ -87,32 +87,39 @@ struct hentry * HashMgr::lookup(const char *word)
int HashMgr::add_word(const char * word, int wl, unsigned short * aff, int al, const char * desc)
{
- struct hentry* hp = (struct hentry *) malloc (sizeof(struct hentry));
-
- hp->wlen = wl;
- hp->alen = al;
- hp->word = mystrdup(word);
+ char * st = mystrdup(word);
+ if (wl && !st) return 1;
if (complexprefixes) {
- if (utf8) reverseword_utf(hp->word); else reverseword(hp->word);
+ if (utf8) reverseword_utf(st); else reverseword(st);
}
- hp->astr = aff;
- hp->next = NULL;
- hp->next_homonym = NULL;
- hp->description = mystrdup(desc);
-
- int i = hash(hp->word);
+ int i = hash(st);
struct hentry * dp = &tableptr[i];
-
if (dp->word == NULL) {
- *dp = *hp;
- free(hp);
+ dp->wlen = wl;
+ dp->alen = al;
+ dp->word = st;
+ dp->astr = aff;
+ dp->next = NULL;
+ dp->next_homonym = NULL;
+ dp->description = mystrdup(desc);
+ if (desc && !dp->description) return 1;
} else {
- while (dp->next != NULL) {
- if ((!dp->next_homonym) && strcmp(hp->word, dp->word) == 0) dp->next_homonym = hp;
- dp=dp->next;
- }
- if ((!dp->next_homonym) && strcmp(hp->word, dp->word) == 0) dp->next_homonym = hp;
- dp->next = hp;
+ struct hentry* hp = (struct hentry *) malloc (sizeof(struct hentry));
+ if (!hp) return 1;
+ hp->wlen = wl;
+ hp->alen = al;
+ hp->word = st;
+ hp->astr = aff;
+ hp->next = NULL;
+ hp->next_homonym = NULL;
+ hp->description = mystrdup(desc);
+ if (desc && !hp->description) return 1;
+ while (dp->next != NULL) {
+ if ((!dp->next_homonym) && strcmp(hp->word, dp->word) == 0) dp->next_homonym = hp;
+ dp=dp->next;
+ }
+ if ((!dp->next_homonym) && strcmp(hp->word, dp->word) == 0) dp->next_homonym = hp;
+ dp->next = hp;
}
return 0;
}
@@ -188,6 +195,7 @@ int HashMgr::load_tables(const char * tpath)
mychomp(ts);
if ((*ts < '1') || (*ts > '9')) fprintf(stderr, "error - missing word count in dictionary file\n");
tablesize = atoi(ts);
+ if (!tablesize) return 4;
tablesize = tablesize + 5 + USERWORD;
if ((tablesize %2) == 0) tablesize++;
@@ -227,7 +235,7 @@ int HashMgr::load_tables(const char * tpath)
wl = strlen(ts);
// add the word and its index
- add_word(ts,wl,flags,al,dp);
+ if (add_word(ts,wl,flags,al,dp)) return 5;
}
@@ -239,7 +247,7 @@ int HashMgr::load_tables(const char * tpath)
// the hash function is a simple load and rotate
// algorithm borrowed
-int HashMgr::hash(const char * word)
+int HashMgr::hash(const char * word) const
{
long hv = 0;
for (int i=0; i < 4 && *word != 0; i++)
diff --git a/src/hunspell/hashmgr.hxx b/src/hunspell/hashmgr.hxx
index 4e60094..a5b41f3 100644
--- a/src/hunspell/hashmgr.hxx
+++ b/src/hunspell/hashmgr.hxx
@@ -18,8 +18,8 @@ public:
HashMgr(const char * tpath, const char * apath);
~HashMgr();
- struct hentry * lookup(const char *);
- int hash(const char *);
+ struct hentry * lookup(const char *) const;
+ int hash(const char *) const;
struct hentry * walk_hashtable(int & col, struct hentry * hp) const;
int put_word(const char * word, int wl, char * ap);
diff --git a/src/hunspell/hunspell.cxx b/src/hunspell/hunspell.cxx
index 125bcd3..b9441c9 100644
--- a/src/hunspell/hunspell.cxx
+++ b/src/hunspell/hunspell.cxx
@@ -111,13 +111,15 @@ int Hunspell::cleanword2(char * dest, const char * src,
} else {
unsigned short idx;
*nc = u8_u16(dest_utf, MAXWORDLEN, (const char *) q);
+ // don't check too long words
+ if (*nc >= MAXWORDLEN) return 0;
*nc -= *pabbrev;
for (int i = 0; i < *nc; i++) {
idx = (dest_utf[i].h << 8) + dest_utf[i].l;
if (idx != utfconv[idx].clower) ncap++;
if (utfconv[idx].cupper == utfconv[idx].clower) nneutral++;
}
- u16_u8(dest, MAXWORDLEN, dest_utf, *nc);
+ u16_u8(dest, MAXWORDUTF8LEN, dest_utf, *nc);
if (ncap) {
idx = (dest_utf[0].h << 8) + dest_utf[0].l;
firstcap = (idx != utfconv[idx].clower);
@@ -189,7 +191,7 @@ int Hunspell::cleanword(char * dest, const char * src,
if (idx != utfconv[idx].clower) ncap++;
if (utfconv[idx].cupper == utfconv[idx].clower) nneutral++;
}
- u16_u8(dest, MAXWORDLEN, t, nc);
+ u16_u8(dest, MAXWORDUTF8LEN, t, nc);
if (ncap) {
idx = (t[0].h << 8) + t[0].l;
firstcap = (idx != utfconv[idx].clower);
@@ -224,7 +226,7 @@ void Hunspell::mkallcap(char * p)
u[i].l = (unsigned char) (utfconv[idx].cupper & 0x00FF);
}
}
- u16_u8(p, MAXWORDLEN, u, nc);
+ u16_u8(p, MAXWORDUTF8LEN, u, nc);
} else {
while (*p != '\0') {
*p = csconv[((unsigned char) *p)].cupper;
@@ -244,7 +246,7 @@ int Hunspell::mkallcap2(char * p, w_char * u, int nc)
u[i].l = (unsigned char) (utfconv[idx].cupper & 0x00FF);
}
}
- u16_u8(p, MAXWORDLEN, u, nc);
+ u16_u8(p, MAXWORDUTF8LEN, u, nc);
return strlen(p);
} else {
while (*p != '\0') {
@@ -275,7 +277,7 @@ int Hunspell::mkallsmall2(char * p, w_char * u, int nc)
u[i].l = (unsigned char) (utfconv[idx].clower & 0x00FF);
}
}
- u16_u8(p, MAXWORDLEN, u, nc);
+ u16_u8(p, MAXWORDUTF8LEN, u, nc);
return strlen(p);
} else {
while (*p != '\0') {
@@ -317,16 +319,18 @@ hentry * Hunspell::spellsharps(char * base, char * pos, int n, int repnum, char
int Hunspell::spell(const char * word)
{
struct hentry * rv=NULL;
- char cw[MAXWORDLEN+1];
- char wspace[MAXWORDLEN+1];
- w_char unicw[MAXWORDLEN];
- int nc = strlen(word);
- int wl2;
-
// need larger vector. For example, Turkish capital letter I converted a
// 2-byte UTF-8 character (dotless i) by mkallsmall.
- if (nc > ((MAXWORDLEN - 1) / 3)) return 0;
-
+ char cw[MAXWORDUTF8LEN + 4];
+ char wspace[MAXWORDUTF8LEN + 4];
+ w_char unicw[MAXWORDLEN + 1];
+ int nc = strlen(word);
+ int wl2;
+ if (utf8) {
+ if (nc >= MAXWORDUTF8LEN) return 0;
+ } else {
+ if (nc >= MAXWORDLEN) return 0;
+ }
int captype = 0;
int abbv = 0;
int wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
@@ -384,7 +388,7 @@ int Hunspell::spell(const char * word)
(!utf8 && strchr(cw, '�')))) break;
// LANG_de section: German Sharp S (�)
if ((langnum == LANG_de) && strstr(cw, "SS")) {
- char tmpword[MAXWORDLEN];
+ char tmpword[MAXWORDUTF8LEN];
wl = mkallsmall2(cw, unicw, nc);
memcpy(wspace,cw,(wl+1));
rv = spellsharps(wspace, wspace, 0, 0, tmpword);
@@ -480,7 +484,7 @@ struct hentry * Hunspell::check(const char * w)
{
struct hentry * he = NULL;
int len;
- char w2[MAXWORDLEN];
+ char w2[MAXWORDUTF8LEN];
const char * word = w;
// word reversing wrapper for complex prefixes
@@ -497,7 +501,7 @@ struct hentry * Hunspell::check(const char * w)
// look word in hash table
if (pHMgr) he = pHMgr->lookup(word);
- // check forbidden words (FIXME: don't check forbiddenword homonyms)
+ // check forbidden and onlyincompound words
if ((he) && (he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
// LANG_hu section: set dash information for suggestions
if (langnum == LANG_hu) {
@@ -510,9 +514,11 @@ struct hentry * Hunspell::check(const char * w)
return NULL;
}
- // he = next not pseudoroot homonym or NULL
- while (he && (he->astr) && TESTAFF(he->astr, pAMgr->get_pseudoroot(), he->alen))
- he = he->next_homonym;
+ // he = next not pseudoroot and not onlyincompound homonym or NULL
+ while (he && (he->astr) &&
+ ((pAMgr->get_pseudoroot() && TESTAFF(he->astr, pAMgr->get_pseudoroot(), he->alen)) ||
+ (pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen))
+ )) he = he->next_homonym;
// check with affixes
if (!he && pAMgr) {
@@ -520,14 +526,16 @@ struct hentry * Hunspell::check(const char * w)
len = strlen(word);
he = pAMgr->affix_check(word, len, 0);
+ // check compound restriction
+ if (he && he->astr && pAMgr->get_onlyincompound() &&
+ TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) he = NULL;
+
// try check compound word
if (he) {
- if ((he->astr) && (pAMgr) &&
- TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)
- && (! TESTAFF(he->astr, pAMgr->get_onlyroot(), he->alen))) {
- forbidden_compound = 1; // LANG_hu
- return NULL;
- }
+ if ((he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
+ forbidden_compound = 1; // LANG_hu
+ return NULL;
+ }
prevroot = he->word;
} else if (pAMgr->get_compoundflag() || pAMgr->get_compoundbegin()) {
he = pAMgr->compound_check(word, len,
@@ -554,12 +562,16 @@ struct hentry * Hunspell::check(const char * w)
int Hunspell::suggest(char*** slst, const char * word)
{
- char cw[MAXWORDLEN+1];
- char wspace[MAXWORDLEN+1];
+ char cw[MAXWORDUTF8LEN + 4];
+ char wspace[MAXWORDUTF8LEN + 4];
if (! pSMgr) return 0;
- w_char unicw[MAXWORDLEN];
+ w_char unicw[MAXWORDLEN + 1];
int nc = strlen(word);
- if (nc > (MAXWORDLEN-1) / 3) return 0;
+ if (utf8) {
+ if (nc >= MAXWORDUTF8LEN) return 0;
+ } else {
+ if (nc >= MAXWORDLEN) return 0;
+ }
int captype = 0;
int abbv = 0;
int wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
@@ -571,27 +583,21 @@ int Hunspell::suggest(char*** slst, const char * word)
switch(captype) {
case NOCAP: {
ns = pSMgr->suggest(slst, cw, ns);
- if (ns>0) break;
break;
}
case INITCAP: {
+ capwords = 1;
+ ns = pSMgr->suggest(slst, cw, ns);
+ if (ns == -1) break;
memcpy(wspace,cw,(wl+1));
mkallsmall2(wspace, unicw, nc);
ns = pSMgr->suggest(slst, wspace, ns);
-
-// for (int j=0; j < ns; j++)
-// mkinitcap((*slst)[j]);
- capwords = 1;
-
- ns = pSMgr->suggest(slst, cw, ns);
break;
-
}
-
case HUHCAP: {
ns = pSMgr->suggest(slst, cw, ns);
- if (ns == 0) {
+ if (ns != -1) {
memcpy(wspace,cw,(wl+1));
mkallsmall2(wspace, unicw, nc);
ns = pSMgr->suggest(slst, wspace, ns);
@@ -603,6 +609,7 @@ int Hunspell::suggest(char*** slst, const char * word)
memcpy(wspace, cw, (wl+1));
mkallsmall2(wspace, unicw, nc);
ns = pSMgr->suggest(slst, wspace, ns);
+ if (ns == -1) break;
mkinitcap2(wspace, unicw, nc);
ns = pSMgr->suggest(slst, wspace, ns);
for (int j=0; j < ns; j++) {
@@ -637,25 +644,29 @@ int Hunspell::suggest(char*** slst, const char * word)
}
// try ngram approach since found nothing
- if (ns == 0) {
+ if ((ns == 0) && pAMgr && (pAMgr->get_maxngramsugs() != 0)) {
switch(captype) {
- case NOCAP:
- case HUHCAP: {
+ case NOCAP: {
ns = pSMgr->ngsuggest(*slst, cw, pHMgr);
break;
}
- case INITCAP: {
+ case HUHCAP: {
memcpy(wspace,cw,(wl+1));
mkallsmall2(wspace, unicw, nc);
ns = pSMgr->ngsuggest(*slst, wspace, pHMgr);
+ break;
+ }
+ case INITCAP: {
capwords = 1;
-
-// for (int j=0; j < ns; j++)
-// mkinitcap((*slst)[j]);
+ memcpy(wspace,cw,(wl+1));
+ mkallsmall2(wspace, unicw, nc);
+ ns = pSMgr->ngsuggest(*slst, wspace, pHMgr);
break;
}
case ALLCAP: {
- ns = pSMgr->ngsuggest(*slst, cw, pHMgr);
+ memcpy(wspace,cw,(wl+1));
+ mkallsmall2(wspace, unicw, nc);
+ ns = pSMgr->ngsuggest(*slst, wspace, pHMgr);
for (int j=0; j < ns; j++)
mkallcap((*slst)[j]);
break;
@@ -698,13 +709,18 @@ int Hunspell::suggest(char*** slst, const char * word)
return ns;
}
+// XXX need UTF-8 support
int Hunspell::suggest_auto(char*** slst, const char * word)
{
- char cw[MAXWORDLEN+1];
- char wspace[MAXWORDLEN+1];
+ char cw[MAXWORDUTF8LEN + 4];
+ char wspace[MAXWORDUTF8LEN + 4];
if (! pSMgr) return 0;
int wl = strlen(word);
- if (wl > (MAXWORDLEN-1)) return 0;
+ if (utf8) {
+ if (wl >= MAXWORDUTF8LEN) return 0;
+ } else {
+ if (wl >= MAXWORDLEN) return 0;
+ }
int captype = 0;
int abbv = 0;
wl = cleanword(cw, word, &captype, &abbv);
@@ -779,14 +795,18 @@ int Hunspell::suggest_auto(char*** slst, const char * word)
return ns;
}
-
+// XXX need UTF-8 support
int Hunspell::stem(char*** slst, const char * word)
{
- char cw[MAXWORDLEN+1];
- char wspace[MAXWORDLEN+1];
+ char cw[MAXWORDUTF8LEN + 4];
+ char wspace[MAXWORDUTF8LEN + 4];
if (! pSMgr) return 0;
int wl = strlen(word);
- if (wl > (MAXWORDLEN-1)) return 0;
+ if (utf8) {
+ if (wl >= MAXWORDUTF8LEN) return 0;
+ } else {
+ if (wl >= MAXWORDLEN) return 0;
+ }
int captype = 0;
int abbv = 0;
wl = cleanword(cw, word, &captype, &abbv);
@@ -865,11 +885,15 @@ int Hunspell::stem(char*** slst, const char * word)
int Hunspell::suggest_pos_stems(char*** slst, const char * word)
{
- char cw[MAXWORDLEN+1];
- char wspace[MAXWORDLEN+1];
+ char cw[MAXWORDUTF8LEN + 4];
+ char wspace[MAXWORDUTF8LEN + 4];
if (! pSMgr) return 0;
int wl = strlen(word);
- if (wl > (MAXWORDLEN-1)) return 0;
+ if (utf8) {
+ if (wl >= MAXWORDUTF8LEN) return 0;
+ } else {
+ if (wl >= MAXWORDLEN) return 0;
+ }
int captype = 0;
int abbv = 0;
wl = cleanword(cw, word, &captype, &abbv);
@@ -963,7 +987,7 @@ void Hunspell::mkinitcap(char * p)
unsigned short i = utfconv[(u[0].h << 8) + u[0].l].cupper;
u[0].h = (unsigned char) (i >> 8);
u[0].l = (unsigned char) (i & 0x00FF);
- u16_u8(p, MAXWORDLEN, u, len);
+ u16_u8(p, MAXWORDUTF8LEN, u, len);
}
}
@@ -975,7 +999,7 @@ int Hunspell::mkinitcap2(char * p, w_char * u, int nc)
unsigned short i = utfconv[(u[0].h << 8) + u[0].l].cupper;
u[0].h = (unsigned char) (i >> 8);
u[0].l = (unsigned char) (i & 0x00FF);
- u16_u8(p, MAXWORDLEN, u, nc);
+ u16_u8(p, MAXWORDUTF8LEN, u, nc);
return strlen(p);
}
return nc;
@@ -1015,14 +1039,18 @@ const char * Hunspell::get_version()
return pAMgr->get_version();
}
-
+// XXX need UTF-8 support
char * Hunspell::morph(const char * word)
{
- char cw[MAXWORDLEN+1];
- char wspace[MAXWORDLEN+1];
+ char cw[MAXWORDUTF8LEN + 4];
+ char wspace[MAXWORDUTF8LEN + 4];
if (! pSMgr) return 0;
int wl = strlen(word);
- if (wl > (MAXWORDLEN-1)) return 0;
+ if (utf8) {
+ if (wl >= MAXWORDUTF8LEN) return 0;
+ } else {
+ if (wl >= MAXWORDLEN) return 0;
+ }
int captype = 0;
int abbv = 0;
wl = cleanword(cw, word, &captype, &abbv);
@@ -1287,13 +1315,18 @@ char * Hunspell::morph(const char * word)
return NULL;
}
+// XXX need UTF-8 support
char * Hunspell::morph_with_correction(const char * word)
{
- char cw[MAXWORDLEN+1];
- char wspace[MAXWORDLEN+1];
+ char cw[MAXWORDUTF8LEN + 4];
+ char wspace[MAXWORDUTF8LEN + 4];
if (! pSMgr) return 0;
int wl = strlen(word);
- if (wl > (MAXWORDLEN-1)) return 0;
+ if (utf8) {
+ if (wl >= MAXWORDUTF8LEN) return 0;
+ } else {
+ if (wl >= MAXWORDLEN) return 0;
+ }
int captype = 0;
int abbv = 0;
wl = cleanword(cw, word, &captype, &abbv);
diff --git a/src/hunspell/langnum.hxx b/src/hunspell/langnum.hxx
index bad6891..19c3c5e 100644
--- a/src/hunspell/langnum.hxx
+++ b/src/hunspell/langnum.hxx
@@ -23,6 +23,7 @@ LANG_hr=78,
LANG_hu=36,
LANG_it=39,
LANG_la=99, // custom number
+LANG_lv=101, // custom number
LANG_nl=31,
LANG_pl=48,
LANG_pt=03,
diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx
index a3a8134..58684ce 100644
--- a/src/hunspell/suggestmgr.cxx
+++ b/src/hunspell/suggestmgr.cxx
@@ -81,7 +81,7 @@ int SuggestMgr::suggest(char*** slst, const char * w, int nsug)
w_char word_utf[MAXSWL];
int wl;
- char w2[MAXSWL];
+ char w2[MAXWORDUTF8LEN];
const char * word = w;
// word reversing wrapper for complex prefixes
@@ -113,9 +113,11 @@ int SuggestMgr::suggest(char*** slst, const char * w, int nsug)
if ((nsug < maxSug) && (nsug > -1))
nsug = mapchars(wlst, word, nsug, cpdsuggest);
- // perhaps we made a special pattern mistake
- // if ((nsug < maxSug) && (nsug > -1))
- // nsug = doubledsyllable(wlst, word, nsug);
+ // did we swap the order of chars by mistake
+ if ((nsug < maxSug) && (nsug > -1)) {
+ nsug = (utf8) ? swapchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) :
+ swapchar(wlst, word, nsug, cpdsuggest);
+ }
// did we forget to add a char
if ((nsug < maxSug) && (nsug > -1)) {
@@ -123,13 +125,6 @@ int SuggestMgr::suggest(char*** slst, const char * w, int nsug)
forgotchar(wlst, word, nsug, cpdsuggest);
}
-
- // did we swap the order of chars by mistake
- if ((nsug < maxSug) && (nsug > -1)) {
- nsug = (utf8) ? swapchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) :
- swapchar(wlst, word, nsug, cpdsuggest);
- }
-
// did we add a char that should not be there
if ((nsug < maxSug) && (nsug > -1)) {
nsug = (utf8) ? extrachar_utf(wlst, word_utf, wl, nsug, cpdsuggest) :
@@ -142,6 +137,7 @@ int SuggestMgr::suggest(char*** slst, const char * w, int nsug)
badchar(wlst, word, nsug, cpdsuggest);
}
+ // only suggest compound words when no other suggestion
if ((cpdsuggest==0) && (nsug>0)) nocompoundtwowords=1;
// perhaps we forgot to hit space and two words ran together
@@ -152,10 +148,11 @@ int SuggestMgr::suggest(char*** slst, const char * w, int nsug)
} // repeating ``for'' statement compounding support
if (nsug < 0) {
- for (int i=0;i<maxSug; i++)
+ // we ran out of memory - we should free up as much as possible
+ for (int i = 0; i < maxSug; i++)
if (wlst[i] != NULL) free(wlst[i]);
free(wlst);
- return -1;
+ wlst = NULL;
}
*slst = wlst;
@@ -170,7 +167,7 @@ int SuggestMgr::suggest_auto(char*** slst, const char * w, int nsug)
int nocompoundtwowords = 0;
char ** wlst;
- char w2[MAXSWL];
+ char w2[MAXWORDUTF8LEN];
const char * word = w;
// word reversing wrapper for complex prefixes
@@ -245,12 +242,14 @@ int SuggestMgr::mapchars(char** wlst, const char * word, int ns, int cpdsuggest)
int SuggestMgr::map_related(const char * word, int i, char** wlst, int ns,
const mapentry* maptable, int nummap, int * timer, time_t * timelimit)
{
- char c = *(word + i);
+ char c = *(word + i);
if (c == 0) {
- int cwrd = 1;
+ int cwrd = 1;
+ int wl;
for (int m=0; m < ns; m++)
if (strcmp(word,wlst[m]) == 0) cwrd = 0;
- if ((cwrd) && check(word,strlen(word), 1, timer, timelimit)) {
+ if ((cwrd) && (wl = strlen(word)) && (check(word, wl, 0, timer, timelimit) ||
+ check(word, wl, 1, timer, timelimit))) {
if (ns < maxSug) {
wlst[ns] = mystrdup(word);
if (wlst[ns] == NULL) return -1;
@@ -284,11 +283,13 @@ int SuggestMgr::map_related_utf(w_char * word, int len, int i, char** wlst, int
{
if (i == len) {
int cwrd = 1;
- char s[MAXSWL];
- u16_u8(s, MAXSWL, word, len);
+ int wl;
+ char s[MAXSWUTF8L];
+ u16_u8(s, MAXSWUTF8L, word, len);
for (int m=0; m < ns; m++)
if (strcmp(s,wlst[m]) == 0) cwrd = 0;
- if ((cwrd) && check(s,strlen(s), 1, timer, timelimit)) {
+ if ((cwrd) && (wl = strlen(s)) && (check(s, wl, 0, timer, timelimit) ||
+ check(s, wl, 1, timer, timelimit))) {
if (ns < maxSug) {
wlst[ns] = mystrdup(s);
if (wlst[ns] == NULL) return -1;
@@ -323,27 +324,27 @@ int SuggestMgr::map_related_utf(w_char * word, int len, int i, char** wlst, int
// differs with more, than 1 letter from the right form.
int SuggestMgr::replchars(char** wlst, const char * word, int ns, int cpdsuggest)
{
- char candidate[MAXSWL];
+ char candidate[MAXSWUTF8L];
const char * r;
int lenr, lenp;
int cwrd;
int wl = strlen(word);
- if (wl < 2 || ! pAMgr) return 0;
+ if (wl < 2 || ! pAMgr) return ns;
int numrep = pAMgr->get_numrep();
struct replentry* reptable = pAMgr->get_reptable();
- if (reptable==NULL) return 0;
+ if (reptable==NULL) return ns;
for (int i=0; i < numrep; i++ ) {
r = word;
- lenr = strlen(reptable[i].replacement);
+ lenr = strlen(reptable[i].pattern2);
lenp = strlen(reptable[i].pattern);
// search every occurence of the pattern in the word
while ((r=strstr(r, reptable[i].pattern)) != NULL) {
strcpy(candidate, word);
- if (r-word + lenr + strlen(r+lenp) >= MAXSWL) break;
- strcpy(candidate+(r-word),reptable[i].replacement);
+ if (r-word + lenr + strlen(r+lenp) >= MAXSWUTF8L) break;
+ strcpy(candidate+(r-word),reptable[i].pattern2);
strcpy(candidate+(r-word)+lenr, r+lenp);
cwrd = 1;
for (int k=0; k < ns; k++)
@@ -368,7 +369,7 @@ int SuggestMgr::replchars(char** wlst, const char * word, int ns, int cpdsuggest
// for example: vacation -> vacacation (doubled `ac')
int SuggestMgr::doubledsyllable(char** wlst, const char * word, int ns, int cpdsuggest)
{
- char candidate[MAXSWL];
+ char candidate[MAXSWUTF8L];
int state=0;
int cwrd;
@@ -407,7 +408,7 @@ int SuggestMgr::doubledsyllable(char** wlst, const char * word, int ns, int cpds
int SuggestMgr::badchar(char ** wlst, const char * word, int ns, int cpdsuggest)
{
char tmpc;
- char candidate[MAXSWL];
+ char candidate[MAXSWUTF8L];
time_t timelimit = time(NULL);
int timer = MINTIMER;
@@ -444,7 +445,7 @@ int SuggestMgr::badchar_utf(char ** wlst, const w_char * word, int wl, int ns, i
{
w_char tmpc;
w_char candidate_utf[MAXSWL];
- char candidate[MAXSWL];
+ char candidate[MAXSWUTF8L];
int cwrd;
time_t timelimit = time(NULL);
int timer = MINTIMER;
@@ -459,7 +460,7 @@ int SuggestMgr::badchar_utf(char ** wlst, const w_char * word, int wl, int ns, i
if ((ctry_utf[j].l == tmpc.l) && (ctry_utf[j].h == tmpc.h)) continue;
candidate_utf[i] = ctry_utf[j];
cwrd = 1;
- u16_u8(candidate, MAXSWL, candidate_utf, wl);
+ u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl);
for (int k=0; k < ns; k++)
if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
if ((cwrd) && check(candidate, strlen(candidate), cpdsuggest, &timer, &timelimit)) {
@@ -479,7 +480,7 @@ int SuggestMgr::badchar_utf(char ** wlst, const w_char * word, int wl, int ns, i
// error is word has an extra letter it does not need
int SuggestMgr::extrachar_utf(char** wlst, const w_char * word, int wl, int ns, int cpdsuggest)
{
- char candidate[MAXSWL];
+ char candidate[MAXSWUTF8L];
w_char candidate_utf[MAXSWL];
const w_char * p;
@@ -492,7 +493,7 @@ int SuggestMgr::extrachar_utf(char** wlst, const w_char * word, int wl, int ns,
memcpy(candidate_utf, word + 1, (wl - 1) * sizeof(w_char));
for (p = word, r = candidate_utf; p < word + wl; ) {
cwrd = 1;
- u16_u8(candidate, MAXSWL, candidate_utf, wl - 1);
+ u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl - 1);
for (int k=0; k < ns; k++)
if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
if ((cwrd) && check(candidate, strlen(candidate), cpdsuggest, NULL, NULL)) {
@@ -510,7 +511,7 @@ int SuggestMgr::extrachar_utf(char** wlst, const w_char * word, int wl, int ns,
// error is word has an extra letter it does not need
int SuggestMgr::extrachar(char** wlst, const char * word, int ns, int cpdsuggest)
{
- char candidate[MAXSWL];
+ char candidate[MAXSWUTF8L];
const char * p;
char * r;
int cwrd;
@@ -540,7 +541,7 @@ int SuggestMgr::extrachar(char** wlst, const char * word, int ns, int cpdsuggest
// error is missing a letter it needs
int SuggestMgr::forgotchar(char ** wlst, const char * word, int ns, int cpdsuggest)
{
- char candidate[MAXSWL];
+ char candidate[MAXSWUTF8L];
const char * p;
char * q;
int cwrd;
@@ -590,7 +591,7 @@ int SuggestMgr::forgotchar(char ** wlst, const char * word, int ns, int cpdsugge
int SuggestMgr::forgotchar_utf(char ** wlst, const w_char * word, int wl, int ns, int cpdsuggest)
{
w_char candidate_utf[MAXSWL];
- char candidate[MAXSWL];
+ char candidate[MAXSWUTF8L];
const w_char * p;
w_char * q;
int cwrd;
@@ -603,7 +604,7 @@ int SuggestMgr::forgotchar_utf(char ** wlst, const w_char * word, int wl, int ns
for (int i = 0; i < ctryl; i++) {
*q = ctry_utf[i];
cwrd = 1;
- u16_u8(candidate, MAXSWL, candidate_utf, wl + 1);
+ u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl + 1);
for (int k=0; k < ns; k++)
if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
if ((cwrd) && check(candidate, strlen(candidate), cpdsuggest, &timer, &timelimit)) {
@@ -622,7 +623,7 @@ int SuggestMgr::forgotchar_utf(char ** wlst, const w_char * word, int wl, int ns
for (int i = 0; i < ctryl; i++) {
*q = ctry_utf[i];
cwrd = 1;
- u16_u8(candidate, MAXSWL, candidate_utf, wl + 1);
+ u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl + 1);
for (int k=0; k < ns; k++)
if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
if ((cwrd) && check(candidate, strlen(candidate), cpdsuggest, NULL, NULL)) {
@@ -640,7 +641,7 @@ int SuggestMgr::forgotchar_utf(char ** wlst, const w_char * word, int wl, int ns
/* error is should have been two words */
int SuggestMgr::twowords(char ** wlst, const char * word, int ns, int cpdsuggest)
{
- char candidate[MAXSWL];
+ char candidate[MAXSWUTF8L];
char * p;
int c1, c2, cwrd;
int forbidden = 0;
@@ -694,7 +695,7 @@ int SuggestMgr::twowords(char ** wlst, const char * word, int ns, int cpdsuggest
// error is adjacent letter were swapped
int SuggestMgr::swapchar(char ** wlst, const char * word, int ns, int cpdsuggest)
{
- char candidate[MAXSWL];
+ char candidate[MAXSWUTF8L];
char * p;
char tmpc;
int cwrd;
@@ -728,7 +729,7 @@ int SuggestMgr::swapchar(char ** wlst, const char * word, int ns, int cpdsuggest
int SuggestMgr::swapchar_utf(char ** wlst, const w_char * word, int wl, int ns, int cpdsuggest)
{
w_char candidate_utf[MAXSWL];
- char candidate[MAXSWL];
+ char candidate[MAXSWUTF8L];
w_char * p;
w_char tmpc;
int cwrd;
@@ -740,7 +741,7 @@ int SuggestMgr::swapchar_utf(char ** wlst, const w_char * word, int wl, int ns,
*p = p[1];
p[1] = tmpc;
cwrd = 1;
- u16_u8(candidate, MAXSWL, candidate_utf, wl);
+ u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl);
for (int k=0; k < ns; k++)
if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
if ((cwrd) && check(candidate, strlen(candidate), cpdsuggest, NULL, NULL)) {
@@ -778,7 +779,7 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, HashMgr* pHMgr)
}
lp = MAX_ROOTS - 1;
- char w2[MAXSWL];
+ char w2[MAXWORDUTF8LEN];
char * word = w;
// word reversing wrapper for complex prefixes
@@ -788,7 +789,7 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, HashMgr* pHMgr)
word = w2;
}
- char mw[MAXSWL];
+ char mw[MAXSWUTF8L];
w_char u8[MAXSWL];
int nc = strlen(word);
int n = (utf8) ? u8_u16(u8, MAXSWL, word) : nc;
@@ -815,11 +816,10 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, HashMgr* pHMgr)
// mangle original word three differnt ways
// and score them to generate a minimum acceptable score
int thresh = 0;
-// int n = (utf8) ? u8_u16(u8, MAXSWL, word) : strlen(word);
for (int sp = 1; sp < 4; sp++) {
if (utf8) {
for (int k=sp; k < n; k+=4) *((unsigned short *) u8 + k) = '*';
- u16_u8(mw, MAXSWL, u8, n);
+ u16_u8(mw, MAXSWUTF8L, u8, n);
thresh = thresh + ngram(n, word, mw, NGRAM_ANY_MISMATCH);
} else {
strcpy(mw, word);
@@ -871,7 +871,7 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, HashMgr* pHMgr)
}
}
}
- if (glst) free(glst);
+ free(glst);
// now we are done generating guesses
// sort in order of decreasing score
@@ -881,16 +881,41 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, HashMgr* pHMgr)
// weight suggestions with a similarity index, based on
// the longest common subsequent algorithm and resort
+ int is_swap;
for (i=0; i < MAX_GUESS; i++) {
if (guess[i]) {
- int lcs = lcslen(word, guess[i]);
- gscore[i] +=
- // length of longest common subsequent minus lenght difference
- 2 * lcs - abs((int) (n - mystrlen(guess[i]))) +
- // weight equal first letter
- equalfirstletter(word, guess[i]) +
- // weight equal character positions
- ((lcs == commoncharacterpositions(word, guess[i])) ? 1: 0);
+ // lowering guess[i]
+ char gl[MAXSWUTF8L];
+ int len;
+ if (utf8) {
+ w_char w[MAXSWL];
+ len = u8_u16(w, MAXSWL, guess[i]);
+ mkallsmall_utf(w, len, utfconv);
+ u16_u8(gl, MAXSWUTF8L, w, len);
+ } else {
+ strcpy(gl, guess[i]);
+ mkallsmall(gl, csconv);
+ len = strlen(guess[i]);
+ }
+
+ int lcs = lcslen(word, gl);
+
+ // same characters with different casing
+ if ((n == len) && (n == lcs)) {
+ gscore[i] += 2000;
+ break;
+ }
+
+ // heuristic weigthing of ngram scores
+ gscore[i] +=
+ // length of longest common subsequent minus lenght difference
+ 2 * lcs - abs((int) (n - len)) +
+ // weight equal first letter
+ equalfirstletter(word, gl) +
+ // weight equal character positions
+ ((lcs == commoncharacterpositions(word, gl, &is_swap)) ? 1: 0) +
+ // swap character (not neighboring)
+ ((is_swap) ? 1000 : 0);
}
}
@@ -899,10 +924,13 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, HashMgr* pHMgr)
// copy over
int ns = 0;
+ int same = 0;
for (i=0; i < MAX_GUESS; i++) {
if (guess[i]) {
- if ((ns < maxngramsugs) && (ns < maxSug)) {
+ if ((ns < maxngramsugs) && (ns < maxSug) && (!same || (gscore[i] > 1000))) {
int unique = 1;
+ // we have excellent suggestion(s)
+ if (gscore[i] > 1000) same = 1;
for (j=0; j < ns; j++)
// don't suggest previous suggestions or a previous suggestion with prefixes or affixes
if (strstr(guess[i], wlst[j]) ||
@@ -941,13 +969,20 @@ int SuggestMgr::check(const char * word, int len, int cpdsuggest, int * timer, t
}
if (pAMgr) {
+ if (cpdsuggest==1) {
+ if (pAMgr->get_compoundflag() || pAMgr->get_compoundbegin()) {
+ rv = pAMgr->compound_check(word,len,0,0,0,0,NULL,NULL);
+ if (rv) return 3; // XXX obsolote categorisation
+ }
+ return 0;
+ }
rv = pAMgr->lookup(word);
if (rv) {
if ((rv->astr) && (TESTAFF(rv->astr,pAMgr->get_forbiddenword(),rv->alen))) return 0;
if (rv->astr && TESTAFF(rv->astr,pAMgr->get_pseudoroot(),rv->alen)) rv = NULL;
- } else rv = pAMgr->prefix_check(word,len,1); // only prefix, and prefix + suffix XXX
+ } else rv = pAMgr->prefix_check(word, len, 0); // only prefix, and prefix + suffix XXX
if (rv) {
nosuffix=1;
@@ -961,24 +996,15 @@ int SuggestMgr::check(const char * word, int len, int cpdsuggest, int * timer, t
}
// check forbidden words
- if ((rv) && (rv->astr) && TESTAFF(rv->astr,pAMgr->get_forbiddenword(),rv->alen)
- && (! TESTAFF(rv->astr,pAMgr->get_onlyroot(),rv->alen))
- ) return 0;
-
- if (cpdsuggest==1) {
- if ((!rv) && (pAMgr->get_compoundflag() || pAMgr->get_compoundbegin())) {
- rv = pAMgr->compound_check(word,len,0,0,0,0,NULL,NULL);
- if (rv) return 3; // XXX obsolote
- }
- }
- }
+ if ((rv) && (rv->astr) && TESTAFF(rv->astr,pAMgr->get_forbiddenword(),rv->alen)) return 0;
- if (rv) { // XXX obsolote
+ if (rv) { // XXX obsolote
if ((pAMgr->get_compoundflag()) &&
TESTAFF(rv->astr, pAMgr->get_compoundflag(), rv->alen)) {
return 2 + nosuffix;
}
return 1;
+ }
}
return 0;
}
@@ -1001,11 +1027,11 @@ int SuggestMgr::check_forbidden(const char * word, int len)
// suggest stems, XXX experimental code
int SuggestMgr::suggest_stems(char*** slst, const char * w, int nsug)
{
- char buf[MAXSWL];
+ char buf[MAXSWUTF8L];
char ** wlst;
int prevnsug = nsug;
- char w2[MAXSWL];
+ char w2[MAXWORDUTF8LEN];
const char * word = w;
// word reversing wrapper for complex prefixes
@@ -1065,9 +1091,9 @@ int SuggestMgr::suggest_stems(char*** slst, const char * w, int nsug)
// there are fix stems in dictionary
int SuggestMgr::fixstems(char ** wlst, const char * word, int ns)
{
- char fix[MAXSWL];
- char buf[MAXSWL];
- char prefix[MAXSWL] = "";
+ char fix[MAXSWUTF8L];
+ char buf[MAXSWUTF8L];
+ char prefix[MAXSWUTF8L] = "";
char * p;
int dicstem = 1; // 0 = lookup, 1= affix, 2 = compound
@@ -1221,7 +1247,7 @@ while (rv) {
if ((rv2) && (rv2->astr) && (ns < maxSug))
if ((rv2) && (rv2->astr) && (ns < maxSug))
if (0) {
- char buf2[MAXSWL];
+ char buf2[MAXSWUTF8L];
strcpy(buf2, prefix);
@@ -1298,7 +1324,7 @@ int SuggestMgr::suggest_pos_stems(char*** slst, const char * w, int nsug)
struct hentry * rv = NULL;
- char w2[MAXSWL];
+ char w2[MAXSWUTF8L];
const char * word = w;
// word reversing wrapper for complex prefixes
@@ -1344,7 +1370,7 @@ char * SuggestMgr::suggest_morph(const char * w)
if (! pAMgr) return NULL;
- char w2[MAXSWL];
+ char w2[MAXSWUTF8L];
const char * word = w;
// word reversing wrapper for complex prefixes
@@ -1437,7 +1463,7 @@ int SuggestMgr::ngram(int n, char * s1, const char * s2, int uselen)
if (ns < 2) break;
}
} else {
- char t[MAXSWL];
+ char t[MAXSWUTF8L];
l1 = strlen(s1);
l2 = strlen(s2);
if (!l2) return 0;
@@ -1445,7 +1471,8 @@ int SuggestMgr::ngram(int n, char * s1, const char * s2, int uselen)
if (complexprefixes) {
*(t+l2-1) = csconv[((unsigned char)*(t+l2-1))].clower;
} else {
- *t = csconv[((unsigned char)*t)].clower;
+ mkallsmall(t, csconv);
+/// *t = csconv[((unsigned char)*t)].clower;
}
for (int j = 1; j <= n; j++) {
ns = 0;
@@ -1474,54 +1501,58 @@ int SuggestMgr::equalfirstletter(char * s1, const char * s2) {
if (complexprefixes) {
int l1 = u8_u16(su1, MAXSWL, s1);
int l2 = u8_u16(su2, MAXSWL, s2);
- mkallsmall_utf(su2+l2-1, 1, utfconv);
if (*((short *)su1+l1-1) == *((short *)su2+l2-1)) return 1;
} else {
u8_u16(su1, 1, s1);
u8_u16(su2, 1, s2);
- mkallsmall_utf(su2, 1, utfconv);
if (*((short *)su1) == *((short *)su2)) return 1;
}
} else {
if (complexprefixes) {
int l1 = strlen(s1);
int l2 = strlen(s2);
- if (*(s2+l1-1) == csconv[((unsigned char)*(s2+l2-1))].clower) return 1;
+ if (*(s2+l1-1) == *(s2+l2-1)) return 1;
} else {
- if (*s1 == csconv[((unsigned char)*s2)].clower) return 1;
+ if (*s1 == *s2) return 1;
}
}
return 0;
}
-int SuggestMgr::commoncharacterpositions(char * s1, const char * s2) {
+int SuggestMgr::commoncharacterpositions(char * s1, const char * s2, int * is_swap) {
int num = 0;
+ int diff = 0;
+ int diffpos[2];
+ *is_swap = 0;
if (utf8) {
w_char su1[MAXSWL];
w_char su2[MAXSWL];
int l1 = u8_u16(su1, MAXSWL, s1);
int l2 = u8_u16(su2, MAXSWL, s2);
- // decapitalize dictionary word
- if (complexprefixes) {
- mkallsmall_utf(su2+l2-1, 1, utfconv);
- char x[MAXSWL];
- u16_u8(x, MAXSWL, su2, l2);
- } else {
- mkallsmall_utf(su2, 1, utfconv);
+ for (int i = 0; (i < l1) && (i < l2); i++) {
+ if (((short *) su1)[i] == ((short *) su2)[i]) {
+ num++;
+ } else {
+ if (diff < 2) diffpos[diff] = i;
+ diff++;
+ }
}
- for (int i = 0; (i < l1) && (i < l2); i++)
- if (((short *) su1)[i] == ((short *) su2)[i]) num++;
+ if ((diff == 2) && (l1 == l2) &&
+ (((short *) su1)[diffpos[0]] == ((short *) su2)[diffpos[1]]) &&
+ (((short *) su1)[diffpos[1]] == ((short *) su2)[diffpos[0]])) *is_swap = 1;
} else {
- char t[MAXSWL];
- strcpy(t, s2);
- if (complexprefixes) {
- int l = strlen(t);
- *(t+l-1) = csconv[((unsigned char)*(t+l-1))].clower;
- } else {
- *t = csconv[((unsigned char)*t)].clower;
+ int i;
+ for (i = 0; (*(s1+i) != 0) && (*(s2+i) != 0); i++) {
+ if (*(s1+i) == *(s2+i)) {
+ num++;
+ } else {
+ if (diff < 2) diffpos[diff] = i;
+ diff++;
+ }
}
- for (int i = 0; (*(s1+i) != 0) && (*(t+i) != 0); i++)
- if (*(s1+i) == *(t+i)) num++;
+ if ((diff == 2) && (*(s1+i) == 0) && (*(s2+i) == 0) &&
+ (*(s1+diffpos[0]) == *(s2+diffpos[1])) &&
+ (*(s1+diffpos[1]) == *(s2+diffpos[0]))) *is_swap = 1;
}
return num;
}
diff --git a/src/hunspell/suggestmgr.hxx b/src/hunspell/suggestmgr.hxx
index d249d24..6d39a41 100644
--- a/src/hunspell/suggestmgr.hxx
+++ b/src/hunspell/suggestmgr.hxx
@@ -1,7 +1,8 @@
#ifndef _SUGGESTMGR_HXX_
#define _SUGGESTMGR_HXX_
-#define MAXSWL 300
+#define MAXSWL 100
+#define MAXSWUTF8L (MAXSWL * 4)
#define MAX_ROOTS 50
#define MAX_WORDS 200
#define MAX_GUESS 200
@@ -75,7 +76,7 @@ private:
int ngram(int n, char * s1, const char * s2, int uselen);
int mystrlen(const char * word);
int equalfirstletter(char * s1, const char * s2);
- int commoncharacterpositions(char * s1, const char * s2);
+ int commoncharacterpositions(char * s1, const char * s2, int * is_swap);
void bubblesort( char ** rwd, int * rsc, int n);
void lcs(const char * s, const char * s2, int * l1, int * l2, char ** result);
int lcslen(const char * s, const char* s2);
diff --git a/src/tools/hunmorph.cxx b/src/tools/hunmorph.cxx
index 03615e2..5032cc1 100644
--- a/src/tools/hunmorph.cxx
+++ b/src/tools/hunmorph.cxx
@@ -57,5 +57,6 @@ int main(int argc, char **argv)
}
}
delete pMS;
+ fclose(wtclst);
return 0;
}
diff --git a/src/tools/munch.c b/src/tools/munch.c
index d695b04..63d6933 100644
--- a/src/tools/munch.c
+++ b/src/tools/munch.c
@@ -66,7 +66,10 @@ int main(int argc, char** argv)
numpfx = 0;
numsfx = 0;
- parse_aff_file(afflst);
+ if (parse_aff_file(afflst)) {
+ fprintf(stderr,"Error - in affix file loading\n");
+ exit(1);
+ }
fclose(afflst);
fprintf(stderr,"parsed in %d prefixes and %d suffixes\n",numpfx,numsfx);
@@ -201,7 +204,7 @@ int main(int argc, char** argv)
}
-void parse_aff_file(FILE * afflst)
+int parse_aff_file(FILE * afflst)
{
int i, j;
int numents = 0;
@@ -245,7 +248,7 @@ void parse_aff_file(FILE * afflst)
/* now parse all of the sub entries*/
nptr = ptr;
for (j=0; j < numents; j++) {
- fgets(line,MAX_LN_LEN,afflst);
+ if (!fgets(line,MAX_LN_LEN,afflst)) return 1;
mychomp(line);
tp = line;
i = 0;
@@ -306,6 +309,7 @@ void parse_aff_file(FILE * afflst)
}
}
free(line);
+ return 0;
}
diff --git a/src/tools/munch.h b/src/tools/munch.h
index 8dc7aea..ee75878 100644
--- a/src/tools/munch.h
+++ b/src/tools/munch.h
@@ -87,7 +87,7 @@ struct dwords wlist[MAX_WORDS]; /* list words found */
/* the routines */
-void parse_aff_file(FILE* afflst);
+int parse_aff_file(FILE* afflst);
void encodeit(struct affent * ptr, char * cs);
diff --git a/src/tools/unmunch.c b/src/tools/unmunch.c
index 12725c3..d8180cd 100644
--- a/src/tools/unmunch.c
+++ b/src/tools/unmunch.c
@@ -63,7 +63,11 @@ int main(int argc, char** argv)
numpfx = 0;
numsfx = 0;
- parse_aff_file(afflst);
+ if (parse_aff_file(afflst)) {
+ fprintf(stderr,"Error - in affix file loading\n");
+ exit(1);
+ }
+
fclose(afflst);
fprintf(stderr,"parsed in %d prefixes and %d suffixes\n",numpfx,numsfx);
@@ -120,7 +124,7 @@ int main(int argc, char** argv)
-void parse_aff_file(FILE * afflst)
+int parse_aff_file(FILE * afflst)
{
int i, j;
int numents=0;
@@ -164,7 +168,7 @@ void parse_aff_file(FILE * afflst)
/* now parse all of the sub entries*/
nptr = ptr;
for (j=0; j < numents; j++) {
- fgets(line,MAX_LN_LEN,afflst);
+ if (!fgets(line,MAX_LN_LEN,afflst)) return 1;
mychomp(line);
tp = line;
i = 0;
@@ -225,6 +229,7 @@ void parse_aff_file(FILE * afflst)
}
}
free(line);
+ return 0;
}
diff --git a/src/tools/unmunch.h b/src/tools/unmunch.h
index 4f2bfa3..a8d99fb 100644
--- a/src/tools/unmunch.h
+++ b/src/tools/unmunch.h
@@ -59,7 +59,7 @@ struct dwords wlist[MAX_WORDS]; /* list words found */
/* the routines */
-void parse_aff_file(FILE* afflst);
+int parse_aff_file(FILE* afflst);
void encodeit(struct affent * ptr, char * cs);
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 47277a4..856b326 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -1,6 +1,9 @@
## Process this file with automake to create Makefile.in
TESTS = \
+base.test \
+map.test \
+rep.test \
flag.test \
flaglong.test \
flagnum.test \
@@ -9,9 +12,12 @@ pseudoroot.test \
pseudoroot2.test \
pseudoroot3.test \
pseudoroot4.test \
+pseudoroot5.test \
circumfix.test \
fogemorpheme.test \
+onlyincompound.test \
complexprefixes.test \
+complexprefixesutf.test \
conditionalprefix.test \
zeroaffix.test \
utf8.test \
@@ -19,16 +25,40 @@ compound.test \
compoundaffix.test \
compoundaffix2.test \
compoundaffix3.test \
+checkcompounddup.test \
+checkcompoundtriple.test \
+checkcompoundrep.test \
+checkcompoundpattern.test \
utfcompound.test \
germansharps.test \
germansharpsutf.test \
-germancompounding.test
+germancompounding.test \
+i35725.test \
+i54633.test \
+maputf.test \
+reputf.test
distclean-local:
-rm -rf testSubDir
EXTRA_DIST = \
-test_hunmorph \
+test.sh \
+base.aff \
+base.dic \
+base.good \
+base.sug \
+base.test \
+base.wrong \
+map.aff \
+map.dic \
+map.sug \
+map.test \
+map.wrong \
+rep.aff \
+rep.dic \
+rep.sug \
+rep.test \
+rep.wrong \
circumfix.aff \
circumfix.dic \
circumfix.good \
@@ -40,6 +70,11 @@ fogemorpheme.dic \
fogemorpheme.good \
fogemorpheme.test \
fogemorpheme.wrong \
+onlyincompound.aff \
+onlyincompound.dic \
+onlyincompound.good \
+onlyincompound.test \
+onlyincompound.wrong \
forbiddenword.aff \
forbiddenword.dic \
forbiddenword.good \
@@ -64,6 +99,11 @@ pseudoroot4.aff \
pseudoroot4.dic \
pseudoroot4.good \
pseudoroot4.test \
+pseudoroot5.aff \
+pseudoroot5.dic \
+pseudoroot5.good \
+pseudoroot5.test \
+pseudoroot5.wrong \
pseudoroot.aff \
pseudoroot.dic \
pseudoroot.good \
@@ -97,12 +137,31 @@ compoundaffix2.aff \
compoundaffix2.dic \
compoundaffix2.good \
compoundaffix2.test \
-compoundaffix2.wrong \
compoundaffix3.aff \
compoundaffix3.dic \
compoundaffix3.good \
compoundaffix3.test \
compoundaffix3.wrong \
+checkcompounddup.aff \
+checkcompounddup.dic \
+checkcompounddup.good \
+checkcompounddup.test \
+checkcompounddup.wrong \
+checkcompoundrep.aff \
+checkcompoundrep.dic \
+checkcompoundrep.good \
+checkcompoundrep.test \
+checkcompoundrep.wrong \
+checkcompoundtriple.aff \
+checkcompoundtriple.dic \
+checkcompoundtriple.good \
+checkcompoundtriple.test \
+checkcompoundtriple.wrong \
+checkcompoundpattern.aff \
+checkcompoundpattern.dic \
+checkcompoundpattern.good \
+checkcompoundpattern.test \
+checkcompoundpattern.wrong \
germansharps.aff \
germansharps.dic \
germansharps.good \
@@ -135,4 +194,31 @@ complexprefixes.aff \
complexprefixes.dic \
complexprefixes.good \
complexprefixes.wrong \
-complexprefixes.test
+complexprefixes.test \
+complexprefixesutf.aff \
+complexprefixesutf.dic \
+complexprefixesutf.good \
+complexprefixesutf.wrong \
+complexprefixesutf.test \
+i35725.aff \
+i35725.dic \
+i35725.good \
+i35725.sug \
+i35725.test \
+i35725.wrong \
+i54633.aff \
+i54633.dic \
+i54633.good \
+i54633.sug \
+i54633.test \
+i54633.wrong \
+maputf.aff \
+maputf.dic \
+maputf.sug \
+maputf.wrong \
+maputf.test \
+reputf.aff \
+reputf.dic \
+reputf.sug \
+reputf.wrong \
+reputf.test
diff --git a/tests/Makefile.in b/tests/Makefile.in
index 079d549..3325abd 100644
--- a/tests/Makefile.in
+++ b/tests/Makefile.in
@@ -165,6 +165,9 @@ sharedstatedir = @sharedstatedir@
sysconfdir = @sysconfdir@
target_alias = @target_alias@
TESTS = \
+base.test \
+map.test \
+rep.test \
flag.test \
flaglong.test \
flagnum.test \
@@ -173,9 +176,12 @@ pseudoroot.test \
pseudoroot2.test \
pseudoroot3.test \
pseudoroot4.test \
+pseudoroot5.test \
circumfix.test \
fogemorpheme.test \
+onlyincompound.test \
complexprefixes.test \
+complexprefixesutf.test \
conditionalprefix.test \
zeroaffix.test \
utf8.test \
@@ -183,13 +189,37 @@ compound.test \
compoundaffix.test \
compoundaffix2.test \
compoundaffix3.test \
+checkcompounddup.test \
+checkcompoundtriple.test \
+checkcompoundrep.test \
+checkcompoundpattern.test \
utfcompound.test \
germansharps.test \
germansharpsutf.test \
-germancompounding.test
+germancompounding.test \
+i35725.test \
+i54633.test \
+maputf.test \
+reputf.test
EXTRA_DIST = \
-test_hunmorph \
+test.sh \
+base.aff \
+base.dic \
+base.good \
+base.sug \
+base.test \
+base.wrong \
+map.aff \
+map.dic \
+map.sug \
+map.test \
+map.wrong \
+rep.aff \
+rep.dic \
+rep.sug \
+rep.test \
+rep.wrong \
circumfix.aff \
circumfix.dic \
circumfix.good \
@@ -201,6 +231,11 @@ fogemorpheme.dic \
fogemorpheme.good \
fogemorpheme.test \
fogemorpheme.wrong \
+onlyincompound.aff \
+onlyincompound.dic \
+onlyincompound.good \
+onlyincompound.test \
+onlyincompound.wrong \
forbiddenword.aff \
forbiddenword.dic \
forbiddenword.good \
@@ -225,6 +260,11 @@ pseudoroot4.aff \
pseudoroot4.dic \
pseudoroot4.good \
pseudoroot4.test \
+pseudoroot5.aff \
+pseudoroot5.dic \
+pseudoroot5.good \
+pseudoroot5.test \
+pseudoroot5.wrong \
pseudoroot.aff \
pseudoroot.dic \
pseudoroot.good \
@@ -258,12 +298,31 @@ compoundaffix2.aff \
compoundaffix2.dic \
compoundaffix2.good \
compoundaffix2.test \
-compoundaffix2.wrong \
compoundaffix3.aff \
compoundaffix3.dic \
compoundaffix3.good \
compoundaffix3.test \
compoundaffix3.wrong \
+checkcompounddup.aff \
+checkcompounddup.dic \
+checkcompounddup.good \
+checkcompounddup.test \
+checkcompounddup.wrong \
+checkcompoundrep.aff \
+checkcompoundrep.dic \
+checkcompoundrep.good \
+checkcompoundrep.test \
+checkcompoundrep.wrong \
+checkcompoundtriple.aff \
+checkcompoundtriple.dic \
+checkcompoundtriple.good \
+checkcompoundtriple.test \
+checkcompoundtriple.wrong \
+checkcompoundpattern.aff \
+checkcompoundpattern.dic \
+checkcompoundpattern.good \
+checkcompoundpattern.test \
+checkcompoundpattern.wrong \
germansharps.aff \
germansharps.dic \
germansharps.good \
@@ -296,7 +355,34 @@ complexprefixes.aff \
complexprefixes.dic \
complexprefixes.good \
complexprefixes.wrong \
-complexprefixes.test
+complexprefixes.test \
+complexprefixesutf.aff \
+complexprefixesutf.dic \
+complexprefixesutf.good \
+complexprefixesutf.wrong \
+complexprefixesutf.test \
+i35725.aff \
+i35725.dic \
+i35725.good \
+i35725.sug \
+i35725.test \
+i35725.wrong \
+i54633.aff \
+i54633.dic \
+i54633.good \
+i54633.sug \
+i54633.test \
+i54633.wrong \
+maputf.aff \
+maputf.dic \
+maputf.sug \
+maputf.wrong \
+maputf.test \
+reputf.aff \
+reputf.dic \
+reputf.sug \
+reputf.wrong \
+reputf.test
all: all-am
diff --git a/tests/base.aff b/tests/base.aff
new file mode 100644
index 0000000..f0c4dd0
--- /dev/null
+++ b/tests/base.aff
@@ -0,0 +1,192 @@
+# OpenOffice.org's en_US.aff file
+
+SET ISO8859-1
+TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ'
+
+WORDCHARS '
+
+PFX A Y 1
+PFX A 0 re .
+
+PFX I Y 1
+PFX I 0 in .
+
+PFX U Y 1
+PFX U 0 un .
+
+PFX C Y 1
+PFX C 0 de .
+
+PFX E Y 1
+PFX E 0 dis .
+
+PFX F Y 1
+PFX F 0 con .
+
+PFX K Y 1
+PFX K 0 pro .
+
+SFX V N 2
+SFX V e ive e
+SFX V 0 ive [^e]
+
+SFX N Y 3
+SFX N e ion e
+SFX N y ication y
+SFX N 0 en [^ey]
+
+SFX X Y 3
+SFX X e ions e
+SFX X y ications y
+SFX X 0 ens [^ey]
+
+SFX H N 2
+SFX H y ieth y
+SFX H 0 th [^y]
+
+SFX Y Y 1
+SFX Y 0 ly .
+
+SFX G Y 2
+SFX G e ing e
+SFX G 0 ing [^e]
+
+SFX J Y 2
+SFX J e ings e
+SFX J 0 ings [^e]
+
+SFX D Y 4
+SFX D 0 d e
+SFX D y ied [^aeiou]y
+SFX D 0 ed [^ey]
+SFX D 0 ed [aeiou]y
+
+SFX T N 4
+SFX T 0 st e
+SFX T y iest [^aeiou]y
+SFX T 0 est [aeiou]y
+SFX T 0 est [^ey]
+
+SFX R Y 4
+SFX R 0 r e
+SFX R y ier [^aeiou]y
+SFX R 0 er [aeiou]y
+SFX R 0 er [^ey]
+
+SFX Z Y 4
+SFX Z 0 rs e
+SFX Z y iers [^aeiou]y
+SFX Z 0 ers [aeiou]y
+SFX Z 0 ers [^ey]
+
+SFX S Y 4
+SFX S y ies [^aeiou]y
+SFX S 0 s [aeiou]y
+SFX S 0 es [sxzh]
+SFX S 0 s [^sxzhy]
+
+SFX P Y 3
+SFX P y iness [^aeiou]y
+SFX P 0 ness [aeiou]y
+SFX P 0 ness [^y]
+
+SFX M Y 1
+SFX M 0 's .
+
+SFX B Y 3
+SFX B 0 able [^aeiou]
+SFX B 0 able ee
+SFX B e able [^aeiou]e
+
+SFX L Y 1
+SFX L 0 ment .
+
+REP 88
+REP a ei
+REP ei a
+REP a ey
+REP ey a
+REP ai ie
+REP ie ai
+REP are air
+REP are ear
+REP are eir
+REP air are
+REP air ere
+REP ere air
+REP ere ear
+REP ere eir
+REP ear are
+REP ear air
+REP ear ere
+REP eir are
+REP eir ere
+REP ch te
+REP te ch
+REP ch ti
+REP ti ch
+REP ch tu
+REP tu ch
+REP ch s
+REP s ch
+REP ch k
+REP k ch
+REP f ph
+REP ph f
+REP gh f
+REP f gh
+REP i igh
+REP igh i
+REP i uy
+REP uy i
+REP i ee
+REP ee i
+REP j di
+REP di j
+REP j gg
+REP gg j
+REP j ge
+REP ge j
+REP s ti
+REP ti s
+REP s ci
+REP ci s
+REP k cc
+REP cc k
+REP k qu
+REP qu k
+REP kw qu
+REP o eau
+REP eau o
+REP o ew
+REP ew o
+REP oo ew
+REP ew oo
+REP ew ui
+REP ui ew
+REP oo ui
+REP ui oo
+REP ew u
+REP u ew
+REP oo u
+REP u oo
+REP u oe
+REP oe u
+REP u ieu
+REP ieu u
+REP ue ew
+REP ew ue
+REP uff ough
+REP oo ieu
+REP ieu oo
+REP ier ear
+REP ear ier
+REP ear air
+REP air ear
+REP w qu
+REP qu w
+REP z ss
+REP ss z
+REP shun tion
+REP shun sion
+REP shun cion
diff --git a/tests/base.dic b/tests/base.dic
new file mode 100644
index 0000000..9d78959
--- /dev/null
+++ b/tests/base.dic
@@ -0,0 +1,28 @@
+27
+created/U
+create/XKVNGADS
+imply/GNSDX
+natural/PUY
+like/USPBY
+convey/BDGS
+look/GZRDS
+text
+hello
+said
+sawyer
+NASA
+rotten
+day
+tomorrow
+seven
+FAQ/SM
+can't
+doesn't
+etc
+won't
+lip
+text
+horrifying
+speech
+suggest
+uncreate/V
diff --git a/tests/base.good b/tests/base.good
new file mode 100644
index 0000000..cf2d6cf
--- /dev/null
+++ b/tests/base.good
@@ -0,0 +1,22 @@
+created
+uncreate
+uncreated
+imply
+implied
+unnatural
+conveyed
+sawyer
+NASA
+FAQs
+can't
+doesn't
+won't
+Created
+Hello
+HELLO
+NASA
+etc.
+etc
+HELLO
+lip.
+text.
\ No newline at end of file
diff --git a/tests/base.sug b/tests/base.sug
new file mode 100644
index 0000000..a4a8186
--- /dev/null
+++ b/tests/base.sug
@@ -0,0 +1,10 @@
+looked
+text
+hello
+said
+rotten day
+tomorrow
+NASA
+horrifying
+speech
+suggest
diff --git a/tests/circumfix.test b/tests/base.test
similarity index 69%
copy from tests/circumfix.test
copy to tests/base.test
index 89ca772..7f44369 100755
--- a/tests/circumfix.test
+++ b/tests/base.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
diff --git a/tests/base.wrong b/tests/base.wrong
new file mode 100644
index 0000000..23b9fa4
--- /dev/null
+++ b/tests/base.wrong
@@ -0,0 +1,11 @@
+loooked
+texxt
+hlelo
+seid
+rottenday
+tomorow
+sezzven
+Nasa
+horrorfying
+peech
+sugesst
diff --git a/tests/checkcompounddup.aff b/tests/checkcompounddup.aff
new file mode 100644
index 0000000..5cd357a
--- /dev/null
+++ b/tests/checkcompounddup.aff
@@ -0,0 +1,3 @@
+# Forbid compound word with triple letters
+CHECKCOMPOUNDDUP
+COMPOUNDFLAG A
diff --git a/tests/checkcompounddup.dic b/tests/checkcompounddup.dic
new file mode 100644
index 0000000..8ac75f4
--- /dev/null
+++ b/tests/checkcompounddup.dic
@@ -0,0 +1,3 @@
+2
+foo/A
+bar/A
diff --git a/tests/checkcompounddup.good b/tests/checkcompounddup.good
new file mode 100644
index 0000000..3866f24
--- /dev/null
+++ b/tests/checkcompounddup.good
@@ -0,0 +1,5 @@
+barfoo
+foobar
+foofoobar
+foobarfoo
+barfoobarfoo
diff --git a/tests/circumfix.test b/tests/checkcompounddup.test
similarity index 69%
copy from tests/circumfix.test
copy to tests/checkcompounddup.test
index 89ca772..7f44369 100755
--- a/tests/circumfix.test
+++ b/tests/checkcompounddup.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
diff --git a/tests/checkcompounddup.wrong b/tests/checkcompounddup.wrong
new file mode 100644
index 0000000..5e809b3
--- /dev/null
+++ b/tests/checkcompounddup.wrong
@@ -0,0 +1,3 @@
+foofoo
+foofoofoo
+foobarbar
diff --git a/tests/checkcompoundpattern.aff b/tests/checkcompoundpattern.aff
new file mode 100644
index 0000000..dfda51a
--- /dev/null
+++ b/tests/checkcompoundpattern.aff
@@ -0,0 +1,5 @@
+# forbid compounds with spec. pattern at word bounds
+COMPOUNDFLAG A
+CHECKCOMPOUNDPATTERN 2
+CHECKCOMPOUNDPATTERN nny ny
+CHECKCOMPOUNDPATTERN ssz sz
diff --git a/tests/checkcompoundpattern.dic b/tests/checkcompoundpattern.dic
new file mode 100644
index 0000000..09300f0
--- /dev/null
+++ b/tests/checkcompoundpattern.dic
@@ -0,0 +1,5 @@
+4
+k�nny/A
+nyel�s/A
+hossz/A
+sz�m�t�s/A
diff --git a/tests/checkcompoundpattern.good b/tests/checkcompoundpattern.good
new file mode 100644
index 0000000..0f99c52
--- /dev/null
+++ b/tests/checkcompoundpattern.good
@@ -0,0 +1,2 @@
+k�nnysz�m�t�s
+hossznyel�s
diff --git a/tests/circumfix.test b/tests/checkcompoundpattern.test
similarity index 69%
copy from tests/circumfix.test
copy to tests/checkcompoundpattern.test
index 89ca772..7f44369 100755
--- a/tests/circumfix.test
+++ b/tests/checkcompoundpattern.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
diff --git a/tests/checkcompoundpattern.wrong b/tests/checkcompoundpattern.wrong
new file mode 100644
index 0000000..5edd115
--- /dev/null
+++ b/tests/checkcompoundpattern.wrong
@@ -0,0 +1,4 @@
+k�nnynyel�s
+hosszsz�m�t�s
+hosszk�nnynyel�s
+k�nnynyel�shossz
diff --git a/tests/checkcompoundrep.aff b/tests/checkcompoundrep.aff
new file mode 100644
index 0000000..4fb7ff5
--- /dev/null
+++ b/tests/checkcompoundrep.aff
@@ -0,0 +1,8 @@
+// forbid compound word, if it is also a non compound word with a REP fault
+// In example: Hungarian `szerv�z' (szer+v�z) compound word is forbidden, because
+// this word is also a dictionary word (szerviz) with typical fault (i->�)
+CHECKCOMPOUNDREP
+COMPOUNDFLAG A
+
+REP 1
+REP � i
diff --git a/tests/checkcompoundrep.dic b/tests/checkcompoundrep.dic
new file mode 100644
index 0000000..5b5d227
--- /dev/null
+++ b/tests/checkcompoundrep.dic
@@ -0,0 +1,4 @@
+3
+szer/A
+v�z/A
+szerviz
diff --git a/tests/checkcompoundrep.good b/tests/checkcompoundrep.good
new file mode 100644
index 0000000..e7ef1f8
--- /dev/null
+++ b/tests/checkcompoundrep.good
@@ -0,0 +1 @@
+v�zszer
diff --git a/tests/circumfix.test b/tests/checkcompoundrep.test
similarity index 69%
copy from tests/circumfix.test
copy to tests/checkcompoundrep.test
index 89ca772..7f44369 100755
--- a/tests/circumfix.test
+++ b/tests/checkcompoundrep.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
diff --git a/tests/checkcompoundrep.wrong b/tests/checkcompoundrep.wrong
new file mode 100644
index 0000000..843f4b8
--- /dev/null
+++ b/tests/checkcompoundrep.wrong
@@ -0,0 +1 @@
+szerv�z
diff --git a/tests/checkcompoundtriple.aff b/tests/checkcompoundtriple.aff
new file mode 100644
index 0000000..7159cf5
--- /dev/null
+++ b/tests/checkcompoundtriple.aff
@@ -0,0 +1,3 @@
+# Forbid compound word with triple letters
+CHECKCOMPOUNDTRIPLE
+COMPOUNDFLAG A
diff --git a/tests/checkcompoundtriple.dic b/tests/checkcompoundtriple.dic
new file mode 100644
index 0000000..607c489
--- /dev/null
+++ b/tests/checkcompoundtriple.dic
@@ -0,0 +1,5 @@
+4
+foo/A
+opera/A
+eel/A
+bare/A
diff --git a/tests/checkcompoundtriple.good b/tests/checkcompoundtriple.good
new file mode 100644
index 0000000..1293f74
--- /dev/null
+++ b/tests/checkcompoundtriple.good
@@ -0,0 +1,6 @@
+operafoo
+operaeel
+operabare
+eelbare
+eelfoo
+eelopera
diff --git a/tests/circumfix.test b/tests/checkcompoundtriple.test
similarity index 69%
copy from tests/circumfix.test
copy to tests/checkcompoundtriple.test
index 89ca772..7f44369 100755
--- a/tests/circumfix.test
+++ b/tests/checkcompoundtriple.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
diff --git a/tests/checkcompoundtriple.wrong b/tests/checkcompoundtriple.wrong
new file mode 100644
index 0000000..ae2d02b
--- /dev/null
+++ b/tests/checkcompoundtriple.wrong
@@ -0,0 +1,2 @@
+fooopera
+bareeel
diff --git a/tests/circumfix.test b/tests/circumfix.test
index 89ca772..7f44369 100755
--- a/tests/circumfix.test
+++ b/tests/circumfix.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
diff --git a/tests/complexprefixes.test b/tests/complexprefixes.test
index 89ca772..7f44369 100755
--- a/tests/complexprefixes.test
+++ b/tests/complexprefixes.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
diff --git a/tests/complexprefixesutf.aff b/tests/complexprefixesutf.aff
new file mode 100644
index 0000000..3991e9f
--- /dev/null
+++ b/tests/complexprefixesutf.aff
@@ -0,0 +1,12 @@
+# Coptic example by Moheb Mekhaiel
+# Encoded with the new Coptic character encoding of Unicode 4.1
+SET UTF-8
+
+# set twofold prefix stripping
+COMPLEXPREFIXES
+
+PFX A Y 1
+PFX A 0 ⲧⲉⲕ .
+
+PFX B Y 1
+PFX B 0 ⲙⲉⲧ/A .
diff --git a/tests/complexprefixesutf.dic b/tests/complexprefixesutf.dic
new file mode 100644
index 0000000..bd0eb6d
--- /dev/null
+++ b/tests/complexprefixesutf.dic
@@ -0,0 +1,2 @@
+1
+ⲟⲩⲣⲟ/B
diff --git a/tests/complexprefixesutf.good b/tests/complexprefixesutf.good
new file mode 100644
index 0000000..9fc6833
--- /dev/null
+++ b/tests/complexprefixesutf.good
@@ -0,0 +1,3 @@
+ⲟⲩⲣⲟ
+ⲙⲉⲧⲟⲩⲣⲟ
+ⲧⲉⲕⲙⲉⲧⲟⲩⲣⲟ
diff --git a/tests/circumfix.test b/tests/complexprefixesutf.test
similarity index 69%
copy from tests/circumfix.test
copy to tests/complexprefixesutf.test
index 89ca772..4d9d1a8 100755
--- a/tests/circumfix.test
+++ b/tests/complexprefixesutf.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME -1
diff --git a/tests/complexprefixesutf.wrong b/tests/complexprefixesutf.wrong
new file mode 100644
index 0000000..2fc991c
--- /dev/null
+++ b/tests/complexprefixesutf.wrong
@@ -0,0 +1,2 @@
+ⲧⲉⲕⲟⲩⲣⲟ
+ⲙⲉⲧⲧⲉⲕⲟⲩⲣⲟ
diff --git a/tests/compound.test b/tests/compound.test
index 89ca772..7f44369 100755
--- a/tests/compound.test
+++ b/tests/compound.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
diff --git a/tests/compoundaffix.good b/tests/compoundaffix.good
index ce45599..af1f001 100644
--- a/tests/compoundaffix.good
+++ b/tests/compoundaffix.good
@@ -1,4 +1,5 @@
foo
+foofoo
prefoo
foosuf
prefoosuf
diff --git a/tests/compoundaffix.test b/tests/compoundaffix.test
index 89ca772..7f44369 100755
--- a/tests/compoundaffix.test
+++ b/tests/compoundaffix.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
diff --git a/tests/compoundaffix.wrong b/tests/compoundaffix.wrong
index 31e1cd9..b7e4067 100644
--- a/tests/compoundaffix.wrong
+++ b/tests/compoundaffix.wrong
@@ -1,4 +1,3 @@
-foofoo
foosufbar
fooprebarsuf
prefooprebarsuf
diff --git a/tests/compoundaffix2.test b/tests/compoundaffix2.test
index 89ca772..7f44369 100755
--- a/tests/compoundaffix2.test
+++ b/tests/compoundaffix2.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
diff --git a/tests/compoundaffix2.wrong b/tests/compoundaffix2.wrong
deleted file mode 100644
index 55b5f1f..0000000
--- a/tests/compoundaffix2.wrong
+++ /dev/null
@@ -1 +0,0 @@
-foofoo
diff --git a/tests/compoundaffix3.good b/tests/compoundaffix3.good
index 3e37077..76cc08e 100644
--- a/tests/compoundaffix3.good
+++ b/tests/compoundaffix3.good
@@ -1,4 +1,5 @@
foo
+foofoo
prefoo
foosuf
prefoosuf
diff --git a/tests/compoundaffix3.test b/tests/compoundaffix3.test
index 89ca772..7f44369 100755
--- a/tests/compoundaffix3.test
+++ b/tests/compoundaffix3.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
diff --git a/tests/compoundaffix3.wrong b/tests/compoundaffix3.wrong
index 3b5cd13..d92b90b 100644
--- a/tests/compoundaffix3.wrong
+++ b/tests/compoundaffix3.wrong
@@ -1,4 +1,3 @@
-foofoo
prefoobarsuf
foosufbar
fooprebar
diff --git a/tests/conditionalprefix.test b/tests/conditionalprefix.test
index 89ca772..7f44369 100755
--- a/tests/conditionalprefix.test
+++ b/tests/conditionalprefix.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
diff --git a/tests/flag.test b/tests/flag.test
index 89ca772..7f44369 100755
--- a/tests/flag.test
+++ b/tests/flag.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
diff --git a/tests/flaglong.test b/tests/flaglong.test
index 89ca772..7f44369 100755
--- a/tests/flaglong.test
+++ b/tests/flaglong.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
diff --git a/tests/flagnum.test b/tests/flagnum.test
index 89ca772..7f44369 100755
--- a/tests/flagnum.test
+++ b/tests/flagnum.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
diff --git a/tests/fogemorpheme.test b/tests/fogemorpheme.test
index 89ca772..7f44369 100755
--- a/tests/fogemorpheme.test
+++ b/tests/fogemorpheme.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
diff --git a/tests/forbiddenword.test b/tests/forbiddenword.test
index 89ca772..7f44369 100755
--- a/tests/forbiddenword.test
+++ b/tests/forbiddenword.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
diff --git a/tests/germancompounding.test b/tests/germancompounding.test
index 89ca772..7f44369 100755
--- a/tests/germancompounding.test
+++ b/tests/germancompounding.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
diff --git a/tests/germansharps.test b/tests/germansharps.test
index c18806a..4d9d1a8 100755
--- a/tests/germansharps.test
+++ b/tests/germansharps.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME -1
+$DIR/test.sh $NAME -1
diff --git a/tests/germansharpsutf.test b/tests/germansharpsutf.test
index c18806a..4d9d1a8 100755
--- a/tests/germansharpsutf.test
+++ b/tests/germansharpsutf.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME -1
+$DIR/test.sh $NAME -1
diff --git a/tests/i35725.aff b/tests/i35725.aff
new file mode 100644
index 0000000..f847cc6
--- /dev/null
+++ b/tests/i35725.aff
@@ -0,0 +1,199 @@
+# Ngram suggestions
+# - fix case problem
+# - detect character swapping (keep only these suggestions)
+# - lesser suggestions
+# - weight with common subsequence algorithm
+# - suggest uppercased words
+
+# OpenOffice.org's en_US.aff file
+
+SET ISO8859-1
+TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ'
+
+WORDCHARS '
+
+PFX A Y 1
+PFX A 0 re .
+
+PFX I Y 1
+PFX I 0 in .
+
+PFX U Y 1
+PFX U 0 un .
+
+PFX C Y 1
+PFX C 0 de .
+
+PFX E Y 1
+PFX E 0 dis .
+
+PFX F Y 1
+PFX F 0 con .
+
+PFX K Y 1
+PFX K 0 pro .
+
+SFX V N 2
+SFX V e ive e
+SFX V 0 ive [^e]
+
+SFX N Y 3
+SFX N e ion e
+SFX N y ication y
+SFX N 0 en [^ey]
+
+SFX X Y 3
+SFX X e ions e
+SFX X y ications y
+SFX X 0 ens [^ey]
+
+SFX H N 2
+SFX H y ieth y
+SFX H 0 th [^y]
+
+SFX Y Y 1
+SFX Y 0 ly .
+
+SFX G Y 2
+SFX G e ing e
+SFX G 0 ing [^e]
+
+SFX J Y 2
+SFX J e ings e
+SFX J 0 ings [^e]
+
+SFX D Y 4
+SFX D 0 d e
+SFX D y ied [^aeiou]y
+SFX D 0 ed [^ey]
+SFX D 0 ed [aeiou]y
+
+SFX T N 4
+SFX T 0 st e
+SFX T y iest [^aeiou]y
+SFX T 0 est [aeiou]y
+SFX T 0 est [^ey]
+
+SFX R Y 4
+SFX R 0 r e
+SFX R y ier [^aeiou]y
+SFX R 0 er [aeiou]y
+SFX R 0 er [^ey]
+
+SFX Z Y 4
+SFX Z 0 rs e
+SFX Z y iers [^aeiou]y
+SFX Z 0 ers [aeiou]y
+SFX Z 0 ers [^ey]
+
+SFX S Y 4
+SFX S y ies [^aeiou]y
+SFX S 0 s [aeiou]y
+SFX S 0 es [sxzh]
+SFX S 0 s [^sxzhy]
+
+SFX P Y 3
+SFX P y iness [^aeiou]y
+SFX P 0 ness [aeiou]y
+SFX P 0 ness [^y]
+
+SFX M Y 1
+SFX M 0 's .
+
+SFX B Y 3
+SFX B 0 able [^aeiou]
+SFX B 0 able ee
+SFX B e able [^aeiou]e
+
+SFX L Y 1
+SFX L 0 ment .
+
+REP 88
+REP a ei
+REP ei a
+REP a ey
+REP ey a
+REP ai ie
+REP ie ai
+REP are air
+REP are ear
+REP are eir
+REP air are
+REP air ere
+REP ere air
+REP ere ear
+REP ere eir
+REP ear are
+REP ear air
+REP ear ere
+REP eir are
+REP eir ere
+REP ch te
+REP te ch
+REP ch ti
+REP ti ch
+REP ch tu
+REP tu ch
+REP ch s
+REP s ch
+REP ch k
+REP k ch
+REP f ph
+REP ph f
+REP gh f
+REP f gh
+REP i igh
+REP igh i
+REP i uy
+REP uy i
+REP i ee
+REP ee i
+REP j di
+REP di j
+REP j gg
+REP gg j
+REP j ge
+REP ge j
+REP s ti
+REP ti s
+REP s ci
+REP ci s
+REP k cc
+REP cc k
+REP k qu
+REP qu k
+REP kw qu
+REP o eau
+REP eau o
+REP o ew
+REP ew o
+REP oo ew
+REP ew oo
+REP ew ui
+REP ui ew
+REP oo ui
+REP ui oo
+REP ew u
+REP u ew
+REP oo u
+REP u oo
+REP u oe
+REP oe u
+REP u ieu
+REP ieu u
+REP ue ew
+REP ew ue
+REP uff ough
+REP oo ieu
+REP ieu oo
+REP ier ear
+REP ear ier
+REP ear air
+REP air ear
+REP w qu
+REP qu w
+REP z ss
+REP ss z
+REP shun tion
+REP shun sion
+REP shun cion
diff --git a/tests/i35725.dic b/tests/i35725.dic
new file mode 100644
index 0000000..0c61f00
--- /dev/null
+++ b/tests/i35725.dic
@@ -0,0 +1,15 @@
+15
+endangerment/SM
+ferment/FSCM
+preferment/SM
+impermanent/Y
+permanent/YSP
+semipermanent/Y
+empowerment/MS
+supermen
+tournament/MS
+ornamental/SY
+ornament/GSDM
+supernatant
+pimpernel
+UNESCO/M
diff --git a/tests/i35725.good b/tests/i35725.good
new file mode 100644
index 0000000..052ba84
--- /dev/null
+++ b/tests/i35725.good
@@ -0,0 +1 @@
+permanent
diff --git a/tests/i35725.sug b/tests/i35725.sug
new file mode 100644
index 0000000..eea2e92
--- /dev/null
+++ b/tests/i35725.sug
@@ -0,0 +1,10 @@
+permanent
+permanent
+permanent, supernatant, pimpernel
+Permanent
+Permanent
+Permanent, Supernatant, Pimpernel
+UNESCO
+UNESCO
+UNESCO's
+UNESCO's
diff --git a/tests/circumfix.test b/tests/i35725.test
similarity index 69%
copy from tests/circumfix.test
copy to tests/i35725.test
index 89ca772..7f44369 100755
--- a/tests/circumfix.test
+++ b/tests/i35725.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
diff --git a/tests/i35725.wrong b/tests/i35725.wrong
new file mode 100644
index 0000000..573e195
--- /dev/null
+++ b/tests/i35725.wrong
@@ -0,0 +1,10 @@
+permenant
+pernament
+pernemant
+Permenant
+Pernament
+Pernemant
+unesco
+Unesco
+unesco's
+Unesco's
diff --git a/tests/i54633.aff b/tests/i54633.aff
new file mode 100644
index 0000000..e70073b
--- /dev/null
+++ b/tests/i54633.aff
@@ -0,0 +1 @@
+# Missing capitalized suggestion for capitalized bad words
diff --git a/tests/i54633.dic b/tests/i54633.dic
new file mode 100644
index 0000000..e26d6f9
--- /dev/null
+++ b/tests/i54633.dic
@@ -0,0 +1,2 @@
+1
+�diter
diff --git a/tests/i54633.good b/tests/i54633.good
new file mode 100644
index 0000000..a115f67
--- /dev/null
+++ b/tests/i54633.good
@@ -0,0 +1,2 @@
+�diter
+�diter
diff --git a/tests/i54633.sug b/tests/i54633.sug
new file mode 100644
index 0000000..a115f67
--- /dev/null
+++ b/tests/i54633.sug
@@ -0,0 +1,2 @@
+�diter
+�diter
diff --git a/tests/circumfix.test b/tests/i54633.test
similarity index 69%
copy from tests/circumfix.test
copy to tests/i54633.test
index 89ca772..7f44369 100755
--- a/tests/circumfix.test
+++ b/tests/i54633.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
diff --git a/tests/i54633.wrong b/tests/i54633.wrong
new file mode 100644
index 0000000..579a45d
--- /dev/null
+++ b/tests/i54633.wrong
@@ -0,0 +1,2 @@
+editer
+Editer
diff --git a/tests/map.aff b/tests/map.aff
new file mode 100644
index 0000000..83d99a4
--- /dev/null
+++ b/tests/map.aff
@@ -0,0 +1,8 @@
+# With MAP suggestion, Hunspell can add missing accents to a word.
+
+# switch off ngram suggestion for testing
+MAXNGRAMSUGS 0
+
+MAP 2
+MAP u��
+MAP o��
diff --git a/tests/map.dic b/tests/map.dic
new file mode 100644
index 0000000..82079c6
--- /dev/null
+++ b/tests/map.dic
@@ -0,0 +1,3 @@
+2
+Fr�hst�ck
+t�k�rf�r�
diff --git a/tests/map.sug b/tests/map.sug
new file mode 100644
index 0000000..9ecad05
--- /dev/null
+++ b/tests/map.sug
@@ -0,0 +1,2 @@
+Fr�hst�ck
+t�k�rf�r�
diff --git a/tests/circumfix.test b/tests/map.test
similarity index 69%
copy from tests/circumfix.test
copy to tests/map.test
index 89ca772..7f44369 100755
--- a/tests/circumfix.test
+++ b/tests/map.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
diff --git a/tests/map.wrong b/tests/map.wrong
new file mode 100644
index 0000000..fa913e4
--- /dev/null
+++ b/tests/map.wrong
@@ -0,0 +1,2 @@
+Fruhstuck
+tukorfuro
diff --git a/tests/maputf.aff b/tests/maputf.aff
new file mode 100644
index 0000000..aefedfa
--- /dev/null
+++ b/tests/maputf.aff
@@ -0,0 +1,10 @@
+# With MAP suggestion, Hunspell can add missing accents to a word.
+
+SET UTF-8
+
+# switch off ngram suggestion for testing
+MAXNGRAMSUGS 0
+
+MAP 2
+MAP uúü
+MAP oóö
diff --git a/tests/maputf.dic b/tests/maputf.dic
new file mode 100644
index 0000000..3e83065
--- /dev/null
+++ b/tests/maputf.dic
@@ -0,0 +1,3 @@
+2
+Frühstück
+tükörfúró
diff --git a/tests/maputf.sug b/tests/maputf.sug
new file mode 100644
index 0000000..c45a7ae
--- /dev/null
+++ b/tests/maputf.sug
@@ -0,0 +1,2 @@
+Frühstück
+tükörfúró
diff --git a/tests/circumfix.test b/tests/maputf.test
similarity index 69%
copy from tests/circumfix.test
copy to tests/maputf.test
index 89ca772..7f44369 100755
--- a/tests/circumfix.test
+++ b/tests/maputf.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
diff --git a/tests/maputf.wrong b/tests/maputf.wrong
new file mode 100644
index 0000000..fa913e4
--- /dev/null
+++ b/tests/maputf.wrong
@@ -0,0 +1,2 @@
+Fruhstuck
+tukorfuro
diff --git a/tests/onlyincompound.aff b/tests/onlyincompound.aff
new file mode 100644
index 0000000..e700b0e
--- /dev/null
+++ b/tests/onlyincompound.aff
@@ -0,0 +1,5 @@
+# words only in compounds (see also fogemorpheme example)
+ONLYINCOMPOUND O
+COMPOUNDFLAG A
+SFX B Y 1
+SFX B 0 s .
diff --git a/tests/onlyincompound.dic b/tests/onlyincompound.dic
new file mode 100644
index 0000000..dc742f7
--- /dev/null
+++ b/tests/onlyincompound.dic
@@ -0,0 +1,3 @@
+2
+foo/A
+pseudo/OAB
diff --git a/tests/onlyincompound.good b/tests/onlyincompound.good
new file mode 100644
index 0000000..151d597
--- /dev/null
+++ b/tests/onlyincompound.good
@@ -0,0 +1,4 @@
+foo
+pseudofoo
+foopseudo
+foopseudos
diff --git a/tests/circumfix.test b/tests/onlyincompound.test
similarity index 69%
copy from tests/circumfix.test
copy to tests/onlyincompound.test
index 89ca772..7f44369 100755
--- a/tests/circumfix.test
+++ b/tests/onlyincompound.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
diff --git a/tests/onlyincompound.wrong b/tests/onlyincompound.wrong
new file mode 100644
index 0000000..115d0c6
--- /dev/null
+++ b/tests/onlyincompound.wrong
@@ -0,0 +1,2 @@
+pseudo
+pseudos
diff --git a/tests/pseudoroot.test b/tests/pseudoroot.test
index 89ca772..7f44369 100755
--- a/tests/pseudoroot.test
+++ b/tests/pseudoroot.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
diff --git a/tests/pseudoroot2.test b/tests/pseudoroot2.test
index 89ca772..7f44369 100755
--- a/tests/pseudoroot2.test
+++ b/tests/pseudoroot2.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
diff --git a/tests/pseudoroot3.test b/tests/pseudoroot3.test
index 89ca772..7f44369 100755
--- a/tests/pseudoroot3.test
+++ b/tests/pseudoroot3.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
diff --git a/tests/pseudoroot4.test b/tests/pseudoroot4.test
index 89ca772..7f44369 100755
--- a/tests/pseudoroot4.test
+++ b/tests/pseudoroot4.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
diff --git a/tests/pseudoroot5.aff b/tests/pseudoroot5.aff
new file mode 100644
index 0000000..bfa1168
--- /dev/null
+++ b/tests/pseudoroot5.aff
@@ -0,0 +1,13 @@
+# pseudoaffix
+PSEUDOROOT X
+
+SFX A Y 2
+SFX A 0 suf/B .
+SFX A 0 pseudosuf/XB .
+
+SFX B Y 1
+SFX B 0 bar .
+
+PFX C Y 2
+PFX C 0 pre .
+PFX C 0 pseudopre/X .
diff --git a/tests/pseudoroot5.dic b/tests/pseudoroot5.dic
new file mode 100644
index 0000000..83131e2
--- /dev/null
+++ b/tests/pseudoroot5.dic
@@ -0,0 +1,2 @@
+1
+foo/AC
diff --git a/tests/pseudoroot5.good b/tests/pseudoroot5.good
new file mode 100644
index 0000000..d1b86bf
--- /dev/null
+++ b/tests/pseudoroot5.good
@@ -0,0 +1,11 @@
+foo
+prefoo
+foosuf
+prefoosuf
+foosufbar
+prefoosufbar
+pseudoprefoosuf
+pseudoprefoosufbar
+pseudoprefoopseudosufbar
+prefoopseudosuf
+prefoopseudosufbar
diff --git a/tests/circumfix.test b/tests/pseudoroot5.test
similarity index 69%
copy from tests/circumfix.test
copy to tests/pseudoroot5.test
index 89ca772..7f44369 100755
--- a/tests/circumfix.test
+++ b/tests/pseudoroot5.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
diff --git a/tests/pseudoroot5.wrong b/tests/pseudoroot5.wrong
new file mode 100644
index 0000000..fdd1797
--- /dev/null
+++ b/tests/pseudoroot5.wrong
@@ -0,0 +1,3 @@
+pseudoprefoo
+foopseudosuf
+pseudoprefoopseudosuf
diff --git a/tests/rep.aff b/tests/rep.aff
new file mode 100644
index 0000000..f61aa34
--- /dev/null
+++ b/tests/rep.aff
@@ -0,0 +1,9 @@
+# With REP suggestions, we can fix typical language specific misspellings.
+
+# switch off ngram suggestion for testing
+MAXNGRAMSUGS 0
+
+REP 3
+REP f ph
+REP ph f
+REP shun tion
diff --git a/tests/rep.dic b/tests/rep.dic
new file mode 100644
index 0000000..ce904f9
--- /dev/null
+++ b/tests/rep.dic
@@ -0,0 +1,4 @@
+3
+form
+phantom
+vacation
diff --git a/tests/rep.sug b/tests/rep.sug
new file mode 100644
index 0000000..d1bd218
--- /dev/null
+++ b/tests/rep.sug
@@ -0,0 +1,3 @@
+form
+phantom
+vacation
diff --git a/tests/circumfix.test b/tests/rep.test
similarity index 69%
copy from tests/circumfix.test
copy to tests/rep.test
index 89ca772..7f44369 100755
--- a/tests/circumfix.test
+++ b/tests/rep.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
diff --git a/tests/rep.wrong b/tests/rep.wrong
new file mode 100644
index 0000000..2b43ec5
--- /dev/null
+++ b/tests/rep.wrong
@@ -0,0 +1,3 @@
+phorm
+fantom
+vacashun
diff --git a/tests/reputf.aff b/tests/reputf.aff
new file mode 100644
index 0000000..ac434a4
--- /dev/null
+++ b/tests/reputf.aff
@@ -0,0 +1,9 @@
+# With REP suggestions, we can fix typical language specific misspellings.
+
+SET UTF-8
+
+# switch off ngram suggestion for testing
+MAXNGRAMSUGS 0
+
+REP 1
+REP oo őő
diff --git a/tests/reputf.dic b/tests/reputf.dic
new file mode 100644
index 0000000..1890fcb
--- /dev/null
+++ b/tests/reputf.dic
@@ -0,0 +1,2 @@
+1
+főő
diff --git a/tests/reputf.sug b/tests/reputf.sug
new file mode 100644
index 0000000..8a00bc3
--- /dev/null
+++ b/tests/reputf.sug
@@ -0,0 +1 @@
+főő
diff --git a/tests/circumfix.test b/tests/reputf.test
similarity index 69%
copy from tests/circumfix.test
copy to tests/reputf.test
index 89ca772..7f44369 100755
--- a/tests/circumfix.test
+++ b/tests/reputf.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
diff --git a/tests/reputf.wrong b/tests/reputf.wrong
new file mode 100644
index 0000000..257cc56
--- /dev/null
+++ b/tests/reputf.wrong
@@ -0,0 +1 @@
+foo
diff --git a/tests/test.sh b/tests/test.sh
new file mode 100755
index 0000000..4b16c81
--- /dev/null
+++ b/tests/test.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+
+function check_valgrind_log () {
+if [ "$VALGRIND" != "" ]; then
+ if [ -f $TEMPDIR/test.pid* ]; then
+ log=`ls $TEMPDIR/test.pid*`
+ if ! grep -q 'ERROR SUMMARY: 0 error' $log; then
+ echo "Fail in $NAME $1 checking detected by Valgrind"
+ echo "$log Valgrind log file moved to $TEMPDIR/badlogs"
+ mv $log $TEMPDIR/badlogs
+ exit 1
+ fi
+ if grep -q 'LEAK SUMMARY' $log; then
+ echo "Memory leak in $NAME $1 checking detected by Valgrind"
+ echo "$log Valgrind log file moved to $TEMPDIR/badlogs"
+ mv $log $TEMPDIR/badlogs
+ exit 1
+ fi
+ rm -f $log
+ fi
+fi
+}
+
+TESTDIR=.
+TEMPDIR=$TESTDIR/testSubDir
+NAME="$1"
+shift
+
+if [ ! -d $TEMPDIR ]; then
+ mkdir $TEMPDIR
+fi
+
+shopt -s expand_aliases
+
+alias hunspell='../src/tools/hunspell'
+alias hunmorph='../src/tools/hunmorph'
+
+if [ "$VALGRIND" != "" ]; then
+ rm -f $TEMPDIR/test.pid*
+ if [ ! -d $TEMPDIR/badlogs ]; then
+ mkdir $TEMPDIR/badlogs
+ fi
+ alias hunspell='valgrind --tool=$VALGRIND --leak-check=yes --show-reachable=yes --log-file=$TEMPDIR/test ../src/tools/hunspell'
+ alias hunmorph='valgrind --tool=$VALGRIND --leak-check=yes --show-reachable=yes --log-file=$TEMPDIR/test ../src/tools/hunmorph'
+fi
+
+# Tests good words
+if test -f $TESTDIR/$NAME.good; then
+ hunspell -l $* -d $TESTDIR/$NAME <$TESTDIR/$NAME.good >$TEMPDIR/$NAME.good
+ if test -s $TEMPDIR/$NAME.good; then
+ echo "============================================="
+ echo "Fail in $NAME.good. Good words recognised as wrong:"
+ cat $TEMPDIR/$NAME.good
+ rm -f $TEMPDIR/$NAME.good
+ exit 1
+ fi
+ rm -f $TEMPDIR/$NAME.good
+fi
+
+check_valgrind_log "good words"
+
+# Tests bad words
+if test -f $TESTDIR/$NAME.wrong; then
+ hunspell -l $* -d $TESTDIR/$NAME <$TESTDIR/$NAME.wrong >$TEMPDIR/$NAME.wrong
+ tr -d ' ' <$TESTDIR/$NAME.wrong >$TEMPDIR/$NAME.wrong.detab
+ if ! cmp $TEMPDIR/$NAME.wrong $TEMPDIR/$NAME.wrong.detab >/dev/null; then
+ echo "============================================="
+ echo "Fail in $NAME.wrong. Bad words recognised as good:"
+ tr -d ' ' <$TESTDIR/$NAME.wrong >$TEMPDIR/$NAME.wrong.detab
+ diff $TEMPDIR/$NAME.wrong.detab $TEMPDIR/$NAME.wrong | grep '^<' | sed 's/^..//'
+ rm -f $TEMPDIR/$NAME.wrong $TEMPDIR/$NAME.wrong.detab
+ exit 1
+ fi
+ rm -f $TEMPDIR/$NAME.wrong $TEMPDIR/$NAME.wrong.detab
+fi
+
+check_valgrind_log "bad words"
+
+# Tests morphological analysis
+if test -f $TESTDIR/$NAME.morph; then
+ hunmorph $TESTDIR/$NAME.aff $TESTDIR/$NAME.dic $TESTDIR/$NAME.good >$TEMPDIR/$NAME.morph
+ if ! cmp $TEMPDIR/$NAME.morph $TESTDIR/$NAME.morph >/dev/null; then
+ echo "============================================="
+ echo "Fail in $NAME.morph. Bad analysis?"
+ diff $TESTDIR/$NAME.morph $TEMPDIR/$NAME.morph | grep '^<' | sed 's/^..//'
+ rm -f $TEMPDIR/$NAME.morph
+ exit 1
+ fi
+ rm -f $TEMPDIR/$NAME.morph
+fi
+
+check_valgrind_log "morphological analysis"
+
+# Tests suggestions
+if test -f $TESTDIR/$NAME.sug; then
+ hunspell -d $TESTDIR/$NAME <$TESTDIR/$NAME.wrong | tail +2 | grep '^&' | \
+ sed 's/^[^:]*: //' >$TEMPDIR/$NAME.sug
+ if ! cmp $TEMPDIR/$NAME.sug $TESTDIR/$NAME.sug >/dev/null; then
+ echo "============================================="
+ echo "Fail in $NAME.sug. Bad suggestion?"
+ diff $TESTDIR/$NAME.sug $TEMPDIR/$NAME.sug
+ rm -f $TEMPDIR/$NAME.sug
+ exit 1
+ fi
+ rm -f $TEMPDIR/$NAME.sug
+fi
+
+check_valgrind_log "suggestion"
diff --git a/tests/test_hunmorph b/tests/test_hunmorph
deleted file mode 100755
index 726719b..0000000
--- a/tests/test_hunmorph
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/sh
-
-TESTDIR=.
-TEMPDIR=/tmp
-NAME="$1"
-shift
-
-# Tests good words
-if test -f $TESTDIR/$NAME.good; then
- ../src/tools/hunspell -l $* -d $TESTDIR/$NAME <$TESTDIR/$NAME.good >$TEMPDIR/$NAME.good
- if test -s $TEMPDIR/$NAME.good; then
- echo "============================================="
- echo "Fail in $NAME.good. Good words recognised as wrong:"
- cat $TEMPDIR/$NAME.good
- rm -f $TEMPDIR/$NAME.good
- exit 1
- fi
- rm -f $TEMPDIR/$NAME.good
-fi
-
-# Tests bad words
-if test -f $TESTDIR/$NAME.wrong; then
- ../src/tools/hunspell -l $* -d $TESTDIR/$NAME <$TESTDIR/$NAME.wrong >$TEMPDIR/$NAME.wrong
- tr -d ' ' <$TESTDIR/$NAME.wrong >$TEMPDIR/$NAME.wrong.detab
- if ! cmp $TEMPDIR/$NAME.wrong $TEMPDIR/$NAME.wrong.detab >/dev/null; then
- echo "============================================="
- echo "Fail in $NAME.wrong. Bad words recognised as good:"
- tr -d ' ' <$TESTDIR/$NAME.wrong >$TEMPDIR/$NAME.wrong.detab
- diff $TEMPDIR/$NAME.wrong.detab $TEMPDIR/$NAME.wrong | grep '^<' | sed 's/^..//'
- rm -f $TEMPDIR/$NAME.wrong $TEMPDIR/$NAME.wrong.detab
- exit 1
- fi
- rm -f $TEMPDIR/$NAME.wrong $TEMPDIR/$NAME.wrong.detab
-fi
-
-# Tests morphological analysis
-if test -f $TESTDIR/$NAME.morph; then
- ../src/tools/hunmorph $TESTDIR/$NAME.aff $TESTDIR/$NAME.dic $TESTDIR/$NAME.good >$TEMPDIR/$NAME.morph
- if ! cmp $TEMPDIR/$NAME.morph $TESTDIR/$NAME.morph >/dev/null; then
- echo "============================================="
- echo "Fail in $NAME.morph. Bad analysis?"
- diff $TESTDIR/$NAME.morph $TEMPDIR/$NAME.morph | grep '^<' | sed 's/^..//'
- rm -f $TEMPDIR/$NAME.morph
- exit 1
- fi
- rm -f $TEMPDIR/$NAME.morph
-fi
diff --git a/tests/utf8.test b/tests/utf8.test
index c18806a..4d9d1a8 100755
--- a/tests/utf8.test
+++ b/tests/utf8.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME -1
+$DIR/test.sh $NAME -1
diff --git a/tests/utfcompound.test b/tests/utfcompound.test
index c18806a..4d9d1a8 100755
--- a/tests/utfcompound.test
+++ b/tests/utfcompound.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME -1
+$DIR/test.sh $NAME -1
diff --git a/tests/zeroaffix.test b/tests/zeroaffix.test
index 89ca772..7f44369 100755
--- a/tests/zeroaffix.test
+++ b/tests/zeroaffix.test
@@ -1,4 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
-$DIR/test_hunmorph $NAME
+$DIR/test.sh $NAME
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-openoffice/hunspell.git
Reply to: