summaryrefslogtreecommitdiffstats
path: root/extensions/spellcheck/hunspell
diff options
context:
space:
mode:
Diffstat (limited to 'extensions/spellcheck/hunspell')
-rw-r--r--extensions/spellcheck/hunspell/glue/PRemoteSpellcheckEngine.ipdl22
-rw-r--r--extensions/spellcheck/hunspell/glue/RemoteSpellCheckEngineChild.cpp21
-rw-r--r--extensions/spellcheck/hunspell/glue/RemoteSpellCheckEngineChild.h27
-rw-r--r--extensions/spellcheck/hunspell/glue/RemoteSpellCheckEngineParent.cpp63
-rw-r--r--extensions/spellcheck/hunspell/glue/RemoteSpellCheckEngineParent.h39
-rw-r--r--extensions/spellcheck/hunspell/glue/hunspell_alloc_hooks.h58
-rw-r--r--extensions/spellcheck/hunspell/glue/hunspell_fopen_hooks.h87
-rw-r--r--extensions/spellcheck/hunspell/glue/moz.build39
-rw-r--r--extensions/spellcheck/hunspell/glue/mozHunspell.cpp623
-rw-r--r--extensions/spellcheck/hunspell/glue/mozHunspell.h125
-rw-r--r--extensions/spellcheck/hunspell/glue/mozHunspellAllocator.h16
-rw-r--r--extensions/spellcheck/hunspell/glue/mozHunspellDirProvider.cpp140
-rw-r--r--extensions/spellcheck/hunspell/glue/mozHunspellDirProvider.h77
-rw-r--r--extensions/spellcheck/hunspell/moz.build12
-rw-r--r--extensions/spellcheck/hunspell/src/README21
-rw-r--r--extensions/spellcheck/hunspell/src/README.mozilla2
-rw-r--r--extensions/spellcheck/hunspell/src/affentry.cxx1068
-rw-r--r--extensions/spellcheck/hunspell/src/affentry.hxx232
-rw-r--r--extensions/spellcheck/hunspell/src/affixmgr.cxx5117
-rw-r--r--extensions/spellcheck/hunspell/src/affixmgr.hxx390
-rw-r--r--extensions/spellcheck/hunspell/src/atypes.hxx145
-rw-r--r--extensions/spellcheck/hunspell/src/baseaffix.hxx77
-rw-r--r--extensions/spellcheck/hunspell/src/csutil.cxx2850
-rw-r--r--extensions/spellcheck/hunspell/src/csutil.hxx325
-rw-r--r--extensions/spellcheck/hunspell/src/filemgr.cxx120
-rw-r--r--extensions/spellcheck/hunspell/src/filemgr.hxx101
-rw-r--r--extensions/spellcheck/hunspell/src/hashmgr.cxx1147
-rw-r--r--extensions/spellcheck/hunspell/src/hashmgr.hxx149
-rw-r--r--extensions/spellcheck/hunspell/src/htypes.hxx71
-rw-r--r--extensions/spellcheck/hunspell/src/hunspell.cxx1895
-rw-r--r--extensions/spellcheck/hunspell/src/hunspell.h162
-rw-r--r--extensions/spellcheck/hunspell/src/hunspell.hxx258
-rw-r--r--extensions/spellcheck/hunspell/src/hunvisapi.h18
-rw-r--r--extensions/spellcheck/hunspell/src/hunzip.cxx263
-rw-r--r--extensions/spellcheck/hunspell/src/hunzip.hxx87
-rw-r--r--extensions/spellcheck/hunspell/src/langnum.hxx78
-rw-r--r--extensions/spellcheck/hunspell/src/license.hunspell61
-rw-r--r--extensions/spellcheck/hunspell/src/license.myspell61
-rw-r--r--extensions/spellcheck/hunspell/src/moz.build38
-rw-r--r--extensions/spellcheck/hunspell/src/patches/132266624
-rw-r--r--extensions/spellcheck/hunspell/src/phonet.cxx274
-rw-r--r--extensions/spellcheck/hunspell/src/phonet.hxx52
-rw-r--r--extensions/spellcheck/hunspell/src/replist.cxx193
-rw-r--r--extensions/spellcheck/hunspell/src/replist.hxx107
-rw-r--r--extensions/spellcheck/hunspell/src/suggestmgr.cxx2192
-rw-r--r--extensions/spellcheck/hunspell/src/suggestmgr.hxx198
-rw-r--r--extensions/spellcheck/hunspell/src/w_char.hxx75
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.aff4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.dic2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.sug5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.wrong5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/1463589.aff3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/1463589.dic2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/1463589.sug5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/1463589.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/1463589.wrong5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/1592880.aff20
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/1592880.dic4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/1592880.good3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/1592880.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/1695964.aff10
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/1695964.dic3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/1695964.sug3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/1695964.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/1695964.wrong3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/1706659.aff13
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/1706659.dic4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/1706659.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/1706659.wrong3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/1975530.aff6
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/1975530.dic3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/1975530.good3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/1975530.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/1975530.wrong1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/2970240.aff5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/2970240.dic4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/2970240.good1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/2970240.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/2970240.wrong1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/2970242.aff4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/2970242.dic4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/2970242.good5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/2970242.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/2970242.wrong1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/2999225.aff6
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/2999225.dic4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/2999225.good2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/2999225.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/IJ.aff8
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/IJ.dic3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/IJ.good2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/IJ.sug1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/IJ.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/IJ.wrong1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/Makefile.am693
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/Makefile.in1416
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/affixes.aff7
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/affixes.dic4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/affixes.good7
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/affixes.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/alias.aff12
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/alias.dic2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/alias.good4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/alias.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/alias2.aff17
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/alias2.dic2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/alias2.good4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/alias2.morph12
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/alias2.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/alias3.aff18
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/alias3.dic2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/alias3.good4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/alias3.morph8
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/alias3.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.aff6
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.dic3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.good4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.sug3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.wrong3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/allcaps.aff5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/allcaps.dic3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/allcaps.good4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/allcaps.sug3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/allcaps.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/allcaps.wrong3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/allcaps2.aff6
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/allcaps2.dic4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/allcaps2.good4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/allcaps2.sug2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/allcaps2.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/allcaps2.wrong2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/allcaps3.aff10
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/allcaps3.dic7
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/allcaps3.good13
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/allcaps3.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/allcaps3.wrong4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/arabic.aff6
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/arabic.dic2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/arabic.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/arabic.wrong1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/base-utf.aff198
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/base-utf.dic29
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/base-utf.good27
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/base-utf.sug11
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/base-utf.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/base-utf.wrong11
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/base.aff192
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/base.dic29
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/base.good27
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/base.sug11
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/base.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/base.wrong11
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/break.aff8
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/break.dic4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/break.good7
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/break.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/break.wrong12
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/breakdefault.aff6
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/breakdefault.dic6
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/breakdefault.good7
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/breakdefault.sug3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/breakdefault.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/breakdefault.wrong3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/breakoff.aff7
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/breakoff.dic6
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/breakoff.good3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/breakoff.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/breakoff.wrong5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.aff3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.dic5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.good5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.wrong3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.aff3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.dic3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.good2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.wrong1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.aff3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.dic3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.good2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.wrong1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.aff3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.dic3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.good5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.wrong3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.aff5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.dic5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.good2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.wrong4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.aff7
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.dic3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.good3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.wrong1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.aff6
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.dic5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.good9
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.wrong8
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.aff8
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.dic6
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.good2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.wrong2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.aff8
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.dic5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.good2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.wrong3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.aff3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.dic5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.good6
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.wrong2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checksharps.aff4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checksharps.dic7
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checksharps.good13
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checksharps.sug1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checksharps.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checksharps.wrong1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.aff5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.dic7
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.good13
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.sug1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.wrong1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/circumfix.aff16
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/circumfix.dic2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/circumfix.good4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/circumfix.morph12
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/circumfix.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/circumfix.wrong2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/colons-in-words.aff3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/colons-in-words.dic4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/colons-in-words.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.aff9
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.dic3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.good3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.wrong2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/complexprefixes2.aff12
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/complexprefixes2.dic3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/complexprefixes2.good4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/complexprefixes2.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.aff12
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.dic2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.good3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.wrong2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.aff7
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.dic3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.good6
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.wrong3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundaffix2.aff8
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundaffix2.dic3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundaffix2.good8
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundaffix2.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.aff8
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.dic3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.good5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.wrong6
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundflag.aff3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundflag.dic5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundflag.good3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundflag.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundflag.wrong4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule.aff3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule.dic5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule.good2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule.wrong39
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.aff3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.dic5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.good37
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.wrong8
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.aff3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.dic5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.good7
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.wrong41
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.aff7
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.dic24
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.good29
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.test6
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.wrong5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.aff7
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.dic14
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.good7
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.morph21
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.wrong1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.aff4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.dic5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.good4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.wrong4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.aff8
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.dic24
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.good29
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.test6
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.wrong5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.aff8
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.dic24
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.good29
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.test6
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.wrong5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/condition-utf.aff42
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/condition-utf.dic2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/condition-utf.good19
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/condition-utf.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/condition-utf.wrong18
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/condition.aff62
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/condition.dic6
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/condition.good26
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/condition.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/condition.wrong21
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.aff11
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.dic3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.good6
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.morph20
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.wrong2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/digits-in-words.aff9
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/digits-in-words.dic12
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/digits-in-words.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/digits-in-words.wrong1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/encoding.aff1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/encoding.dic3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/encoding.good4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/encoding.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/flag.aff13
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/flag.dic2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/flag.good8
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/flag.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/flaglong.aff14
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/flaglong.dic2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/flaglong.good8
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/flaglong.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/flagnum.aff14
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/flagnum.dic2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/flagnum.good8
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/flagnum.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/flagutf8.aff15
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/flagutf8.dic2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/flagutf8.good8
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/flagutf8.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.aff12
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.dic3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.good3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.wrong3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.aff11
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.dic8
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.good3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.wrong4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/forceucase.aff4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/forceucase.dic4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/forceucase.good7
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/forceucase.sug2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/forceucase.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/forceucase.wrong2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/fullstrip.aff15
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/fullstrip.dic4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/fullstrip.good9
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/fullstrip.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/germancompounding.aff91
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/germancompounding.dic5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/germancompounding.good20
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/germancompounding.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/germancompounding.wrong50
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.aff96
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.dic5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.good14
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.wrong50
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i35725.aff203
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i35725.dic15
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i35725.good1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i35725.sug10
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i35725.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i35725.wrong10
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i53643.aff2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i53643.dic2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i53643.good19
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i53643.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i53643.wrong4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i54633.aff2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i54633.dic2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i54633.good2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i54633.sug2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i54633.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i54633.wrong2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i54980.aff2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i54980.dic3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i54980.good4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i54980.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i58202.aff4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i58202.dic5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i58202.good10
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i58202.sug13
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i58202.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i58202.wrong13
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i68568.aff7
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i68568.dic2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i68568.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i68568.wrong5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i68568utf.aff8
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i68568utf.dic2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i68568utf.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/i68568utf.wrong5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/iconv.aff10
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/iconv.dic5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/iconv.good6
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/iconv.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/ignore.aff5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/ignore.dic3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/ignore.good6
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/ignore.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/ignoreutf.aff6
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/ignoreutf.dic10
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/ignoreutf.good9
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/ignoreutf.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/keepcase.aff3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/keepcase.dic5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/keepcase.good4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/keepcase.sug8
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/keepcase.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/keepcase.wrong8
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/korean.aff1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/korean.dic3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/korean.good2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/korean.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/korean.wrong1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/map.aff9
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/map.dic4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/map.sug3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/map.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/map.wrong3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/maputf.aff11
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/maputf.dic4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/maputf.sug3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/maputf.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/maputf.wrong3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/morph.aff12
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/morph.dic10
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/morph.good26
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/morph.morph48
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/morph.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/needaffix.aff5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/needaffix.dic3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/needaffix.good3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/needaffix.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/needaffix.wrong1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/needaffix2.aff2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/needaffix2.dic5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/needaffix2.good5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/needaffix2.morph13
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/needaffix2.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/needaffix3.aff8
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/needaffix3.dic2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/needaffix3.good2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/needaffix3.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/needaffix3.wrong1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/needaffix4.aff2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/needaffix4.dic5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/needaffix4.good5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/needaffix4.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/needaffix5.aff13
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/needaffix5.dic2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/needaffix5.good11
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/needaffix5.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/needaffix5.wrong3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.aff21
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.dic2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.good1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.sug2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.wrong2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/nosuggest.aff5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/nosuggest.dic3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/nosuggest.good3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/nosuggest.sug0
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/nosuggest.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/nosuggest.wrong3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/oconv.aff12
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/oconv.dic4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/oconv.good2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/oconv.sug3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/oconv.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/oconv.wrong3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.aff5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.dic3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.good4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.sug0
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.wrong2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.aff12
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.dic3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.good3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.wrong3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.aff13
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.dic4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.good1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.wrong1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.aff27
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.dic4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.good1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.wrong1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.aff9
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.dic5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.good3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.sug1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.wrong5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.aff7
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.dic5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.good4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.sug1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.wrong5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.aff8
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.dic7
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.good4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.sug8
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.wrong8
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/phone.aff255
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/phone.dic11
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/phone.sug1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/phone.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/phone.wrong1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/rep.aff21
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/rep.dic15
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/rep.sug8
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/rep.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/rep.wrong11
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/reputf.aff9
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/reputf.dic2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/reputf.sug1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/reputf.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/reputf.wrong1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.aff8
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.dic3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.good3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.wrong1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/slash.aff4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/slash.dic5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/slash.good4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/slash.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/sug.aff15
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/sug.dic11
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/sug.sug12
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/sug.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/sug.wrong12
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/List_of_common_misspellings.txt4020
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/Makefile.am6
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/Makefile.in435
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/README16
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/prepare40
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/test25
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/sugutf.aff15
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/sugutf.dic11
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/sugutf.sug12
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/sugutf.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/sugutf.wrong12
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/test.sh111
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/utf8-bom.aff3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/utf8-bom.dic2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/utf8-bom.good2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/utf8-bom.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/utf8-bom2.aff3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/utf8-bom2.dic2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/utf8-bom2.good2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/utf8-bom2.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.aff1
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.dic5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.good5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.sug2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.wrong2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/utf8.aff10
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/utf8.dic3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/utf8.good9
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/utf8.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/utfcompound.aff3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/utfcompound.dic9
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/utfcompound.good5
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/utfcompound.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/utfcompound.wrong7
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/warn.aff13
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/warn.dic3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/warn.good2
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/warn.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.aff12
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.dic3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.good3
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.morph13
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.test4
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/test_hunspell.js220
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/xpcshell.ini7
614 files changed, 30726 insertions, 0 deletions
diff --git a/extensions/spellcheck/hunspell/glue/PRemoteSpellcheckEngine.ipdl b/extensions/spellcheck/hunspell/glue/PRemoteSpellcheckEngine.ipdl
new file mode 100644
index 000000000..7fd918364
--- /dev/null
+++ b/extensions/spellcheck/hunspell/glue/PRemoteSpellcheckEngine.ipdl
@@ -0,0 +1,22 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+include protocol PContent;
+
+namespace mozilla {
+
+sync protocol PRemoteSpellcheckEngine {
+ manager PContent;
+
+parent:
+ async __delete__();
+
+ sync Check(nsString aWord) returns (bool aIsMisspelled);
+
+ sync CheckAndSuggest(nsString aWord) returns (bool aIsMisspelled, nsString[] aSuggestions);
+
+ sync SetDictionary(nsString aDictionary) returns (bool success);
+};
+
+} // namespace mozilla
diff --git a/extensions/spellcheck/hunspell/glue/RemoteSpellCheckEngineChild.cpp b/extensions/spellcheck/hunspell/glue/RemoteSpellCheckEngineChild.cpp
new file mode 100644
index 000000000..dadfb2963
--- /dev/null
+++ b/extensions/spellcheck/hunspell/glue/RemoteSpellCheckEngineChild.cpp
@@ -0,0 +1,21 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "RemoteSpellCheckEngineChild.h"
+
+namespace mozilla {
+
+RemoteSpellcheckEngineChild::RemoteSpellcheckEngineChild(mozSpellChecker *aOwner)
+ : mOwner(aOwner)
+{
+}
+
+RemoteSpellcheckEngineChild::~RemoteSpellcheckEngineChild()
+{
+ // null out the owner's SpellcheckEngineChild to prevent state corruption
+ // during shutdown
+ mOwner->DeleteRemoteEngine();
+}
+
+} //namespace mozilla
diff --git a/extensions/spellcheck/hunspell/glue/RemoteSpellCheckEngineChild.h b/extensions/spellcheck/hunspell/glue/RemoteSpellCheckEngineChild.h
new file mode 100644
index 000000000..2cc40ce1f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/glue/RemoteSpellCheckEngineChild.h
@@ -0,0 +1,27 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef RemoteSpellcheckEngineChild_h_
+#define RemoteSpellcheckEngineChild_h_
+
+#include "mozilla/PRemoteSpellcheckEngineChild.h"
+#include "mozSpellChecker.h"
+
+class mozSpellChecker;
+
+namespace mozilla {
+
+class RemoteSpellcheckEngineChild : public mozilla::PRemoteSpellcheckEngineChild
+{
+public:
+ explicit RemoteSpellcheckEngineChild(mozSpellChecker *aOwner);
+ virtual ~RemoteSpellcheckEngineChild();
+
+private:
+ mozSpellChecker *mOwner;
+};
+
+} //namespace mozilla
+
+#endif // RemoteSpellcheckEngineChild_h_
diff --git a/extensions/spellcheck/hunspell/glue/RemoteSpellCheckEngineParent.cpp b/extensions/spellcheck/hunspell/glue/RemoteSpellCheckEngineParent.cpp
new file mode 100644
index 000000000..ce20b4e19
--- /dev/null
+++ b/extensions/spellcheck/hunspell/glue/RemoteSpellCheckEngineParent.cpp
@@ -0,0 +1,63 @@
+/* vim: set ts=2 sw=2 sts=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "RemoteSpellCheckEngineParent.h"
+#include "nsISpellChecker.h"
+#include "nsServiceManagerUtils.h"
+
+namespace mozilla {
+
+RemoteSpellcheckEngineParent::RemoteSpellcheckEngineParent()
+{
+ mSpellChecker = do_CreateInstance(NS_SPELLCHECKER_CONTRACTID);
+}
+
+RemoteSpellcheckEngineParent::~RemoteSpellcheckEngineParent()
+{
+}
+
+bool
+RemoteSpellcheckEngineParent::RecvSetDictionary(
+ const nsString& aDictionary,
+ bool* success)
+{
+ nsresult rv = mSpellChecker->SetCurrentDictionary(aDictionary);
+ *success = NS_SUCCEEDED(rv);
+ return true;
+}
+
+bool
+RemoteSpellcheckEngineParent::RecvCheck(
+ const nsString& aWord,
+ bool* aIsMisspelled)
+{
+ nsresult rv = mSpellChecker->CheckWord(aWord, aIsMisspelled, nullptr);
+
+ // If CheckWord failed, we can't tell whether the word is correctly spelled.
+ if (NS_FAILED(rv))
+ *aIsMisspelled = false;
+ return true;
+}
+
+bool
+RemoteSpellcheckEngineParent::RecvCheckAndSuggest(
+ const nsString& aWord,
+ bool* aIsMisspelled,
+ InfallibleTArray<nsString>* aSuggestions)
+{
+ nsresult rv = mSpellChecker->CheckWord(aWord, aIsMisspelled, aSuggestions);
+ if (NS_FAILED(rv)) {
+ aSuggestions->Clear();
+ *aIsMisspelled = false;
+ }
+ return true;
+}
+
+void
+RemoteSpellcheckEngineParent::ActorDestroy(ActorDestroyReason aWhy)
+{
+}
+
+} // namespace mozilla
diff --git a/extensions/spellcheck/hunspell/glue/RemoteSpellCheckEngineParent.h b/extensions/spellcheck/hunspell/glue/RemoteSpellCheckEngineParent.h
new file mode 100644
index 000000000..c0bb51139
--- /dev/null
+++ b/extensions/spellcheck/hunspell/glue/RemoteSpellCheckEngineParent.h
@@ -0,0 +1,39 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef RemoteSpellcheckEngineParent_h_
+#define RemoteSpellcheckEngineParent_h_
+
+#include "mozilla/PRemoteSpellcheckEngineParent.h"
+#include "nsCOMPtr.h"
+
+class nsISpellChecker;
+
+namespace mozilla {
+
+class RemoteSpellcheckEngineParent : public PRemoteSpellcheckEngineParent
+{
+public:
+ RemoteSpellcheckEngineParent();
+
+ virtual ~RemoteSpellcheckEngineParent();
+
+ virtual void ActorDestroy(ActorDestroyReason aWhy) override;
+
+ virtual bool RecvSetDictionary(const nsString& aDictionary,
+ bool* success) override;
+
+ virtual bool RecvCheck(const nsString& aWord, bool* aIsMisspelled) override;
+
+ virtual bool RecvCheckAndSuggest(const nsString& aWord,
+ bool* aIsMisspelled,
+ InfallibleTArray<nsString>* aSuggestions)
+ override;
+
+private:
+ nsCOMPtr<nsISpellChecker> mSpellChecker;
+};
+
+} // namespace mozilla
+
+#endif
diff --git a/extensions/spellcheck/hunspell/glue/hunspell_alloc_hooks.h b/extensions/spellcheck/hunspell/glue/hunspell_alloc_hooks.h
new file mode 100644
index 000000000..729e66b8e
--- /dev/null
+++ b/extensions/spellcheck/hunspell/glue/hunspell_alloc_hooks.h
@@ -0,0 +1,58 @@
+/******* BEGIN LICENSE BLOCK *******
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Initial Developers of the Original Code is Mozilla Foundation.
+ * Portions created by the Initial Developers
+ * are Copyright (C) 2011 the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s):
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ ******* END LICENSE BLOCK *******/
+
+#ifndef alloc_hooks_h__
+#define alloc_hooks_h__
+
+/**
+ * This file is force-included in hunspell code. Its purpose is to add memory
+ * reporting to hunspell without modifying its code, in order to ease future
+ * upgrades.
+ *
+ * This file is force-included through mozilla-config.h which is generated
+ * during the configure step.
+ *
+ * Currently, the memory allocated using operator new/new[] is not being
+ * tracked, but that's OK, since almost all of the memory used by Hunspell is
+ * allocated using C memory allocation functions.
+ */
+
+#include "mozilla/mozalloc.h"
+#include "mozHunspellAllocator.h"
+
+#define malloc(size) HunspellAllocator::CountingMalloc(size)
+#define calloc(count, size) HunspellAllocator::CountingCalloc(count, size)
+#define free(ptr) HunspellAllocator::CountingFree(ptr)
+#define realloc(ptr, size) HunspellAllocator::CountingRealloc(ptr, size)
+
+#endif
diff --git a/extensions/spellcheck/hunspell/glue/hunspell_fopen_hooks.h b/extensions/spellcheck/hunspell/glue/hunspell_fopen_hooks.h
new file mode 100644
index 000000000..a841567b9
--- /dev/null
+++ b/extensions/spellcheck/hunspell/glue/hunspell_fopen_hooks.h
@@ -0,0 +1,87 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef fopen_hooks_h__
+#define fopen_hooks_h__
+
+/**
+ * This file is force-included in hunspell code. Its purpose is to add
+ * readahead to fopen() calls in hunspell without modifying its code, in order
+ * to ease future upgrades.
+ *
+ * This file is force-included through mozilla-config.h which is generated
+ * during the configure step.
+ */
+
+#include "mozilla/FileUtils.h"
+#include <stdio.h>
+#include <string.h>
+
+#if defined(XP_WIN)
+#include "nsNativeCharsetUtils.h"
+#include "nsString.h"
+
+#include <fcntl.h>
+#include <windows.h>
+// Hunspell defines a function named near. Windef.h #defines near.
+#undef near
+// mozHunspell defines a function named RemoveDirectory.
+#undef RemoveDirectory
+#endif /* defined(XP_WIN) */
+
+inline FILE*
+hunspell_fopen_readahead(const char* filename, const char* mode)
+{
+ if (!filename || !mode) {
+ return nullptr;
+ }
+ // Fall back to libc's fopen for modes not supported by ReadAheadFile
+ if (!strchr(mode, 'r') || strchr(mode, '+')) {
+ return fopen(filename, mode);
+ }
+ int fd = -1;
+#if defined(XP_WIN)
+ // filename is obtained via the nsIFile::nativePath attribute, so
+ // it is using the Windows ANSI code page, NOT UTF-8!
+ nsAutoString utf16Filename;
+ nsresult rv = NS_CopyNativeToUnicode(nsDependentCString(filename),
+ utf16Filename);
+ if (NS_FAILED(rv)) {
+ return nullptr;
+ }
+ HANDLE handle = INVALID_HANDLE_VALUE;
+ mozilla::ReadAheadFile(utf16Filename.get(), 0, SIZE_MAX, &handle);
+ if (handle == INVALID_HANDLE_VALUE) {
+ return nullptr;
+ }
+ int flags = _O_RDONLY;
+ // MSVC CRT's _open_osfhandle only supports adding _O_TEXT, not _O_BINARY
+ if (strchr(mode, 't')) {
+ // Force translated mode
+ flags |= _O_TEXT;
+ }
+ // Import the Win32 fd into the CRT
+ fd = _open_osfhandle((intptr_t)handle, flags);
+ if (fd < 0) {
+ CloseHandle(handle);
+ return nullptr;
+ }
+#else
+ mozilla::ReadAheadFile(filename, 0, SIZE_MAX, &fd);
+ if (fd < 0) {
+ return nullptr;
+ }
+#endif /* defined(XP_WIN) */
+
+ FILE* file = fdopen(fd, mode);
+ if (!file) {
+ close(fd);
+ }
+ return file;
+}
+
+#define fopen(filename, mode) hunspell_fopen_readahead(filename, mode)
+
+#endif /* fopen_hooks_h__ */
+
diff --git a/extensions/spellcheck/hunspell/glue/moz.build b/extensions/spellcheck/hunspell/glue/moz.build
new file mode 100644
index 000000000..1e1d9711a
--- /dev/null
+++ b/extensions/spellcheck/hunspell/glue/moz.build
@@ -0,0 +1,39 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+SOURCES += [
+ 'mozHunspell.cpp',
+ 'mozHunspellDirProvider.cpp',
+ 'RemoteSpellCheckEngineChild.cpp',
+ 'RemoteSpellCheckEngineParent.cpp',
+]
+
+FINAL_LIBRARY = 'xul'
+
+if CONFIG['MOZ_SYSTEM_HUNSPELL']:
+ CXXFLAGS += CONFIG['MOZ_HUNSPELL_CFLAGS']
+else:
+ LOCAL_INCLUDES += ['../src']
+
+LOCAL_INCLUDES += [
+ '/dom/base',
+ '/extensions/spellcheck/src',
+]
+
+include('/ipc/chromium/chromium-config.mozbuild')
+
+IPDL_SOURCES = [
+ 'PRemoteSpellcheckEngine.ipdl',
+]
+
+EXPORTS.mozilla += [
+ 'RemoteSpellCheckEngineChild.h',
+ 'RemoteSpellCheckEngineParent.h',
+]
+
+# This variable is referenced in configure.in. Make sure to change that file
+# too if you need to change this variable.
+DEFINES['HUNSPELL_STATIC'] = True
diff --git a/extensions/spellcheck/hunspell/glue/mozHunspell.cpp b/extensions/spellcheck/hunspell/glue/mozHunspell.cpp
new file mode 100644
index 000000000..87ffbc661
--- /dev/null
+++ b/extensions/spellcheck/hunspell/glue/mozHunspell.cpp
@@ -0,0 +1,623 @@
+/******* BEGIN LICENSE BLOCK *******
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
+ * and László Németh (Hunspell). Portions created by the Initial Developers
+ * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
+ * David Einstein (deinst@world.std.com)
+ * Michiel van Leeuwen (mvl@exedo.nl)
+ * Caolan McNamara (cmc@openoffice.org)
+ * László Németh (nemethl@gyorsposta.hu)
+ * Davide Prina
+ * Giuseppe Modugno
+ * Gianluca Turconi
+ * Simon Brouwer
+ * Noll Janos
+ * Biro Arpad
+ * Goldman Eleonora
+ * Sarlos Tamas
+ * Bencsath Boldizsar
+ * Halacsy Peter
+ * Dvornik Laszlo
+ * Gefferth Andras
+ * Nagy Viktor
+ * Varga Daniel
+ * Chris Halls
+ * Rene Engelhard
+ * Bram Moolenaar
+ * Dafydd Jones
+ * Harri Pitkanen
+ * Andras Timar
+ * Tor Lillqvist
+ * Jesper Kristensen (mail@jesperkristensen.dk)
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ ******* END LICENSE BLOCK *******/
+
+#include "mozHunspell.h"
+#include "nsReadableUtils.h"
+#include "nsXPIDLString.h"
+#include "nsIObserverService.h"
+#include "nsISimpleEnumerator.h"
+#include "nsIDirectoryEnumerator.h"
+#include "nsIFile.h"
+#include "nsDirectoryServiceUtils.h"
+#include "nsDirectoryServiceDefs.h"
+#include "mozISpellI18NManager.h"
+#include "nsUnicharUtilCIID.h"
+#include "nsUnicharUtils.h"
+#include "nsCRT.h"
+#include "mozInlineSpellChecker.h"
+#include "mozilla/Services.h"
+#include <stdlib.h>
+#include "nsIPrefService.h"
+#include "nsIPrefBranch.h"
+#include "mozilla/dom/EncodingUtils.h"
+#include "mozilla/dom/ContentParent.h"
+
+using mozilla::dom::ContentParent;
+using mozilla::dom::EncodingUtils;
+
+NS_IMPL_CYCLE_COLLECTING_ADDREF(mozHunspell)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(mozHunspell)
+
+NS_INTERFACE_MAP_BEGIN(mozHunspell)
+ NS_INTERFACE_MAP_ENTRY(mozISpellCheckingEngine)
+ NS_INTERFACE_MAP_ENTRY(nsIObserver)
+ NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
+ NS_INTERFACE_MAP_ENTRY(nsIMemoryReporter)
+ NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, mozISpellCheckingEngine)
+ NS_INTERFACE_MAP_ENTRIES_CYCLE_COLLECTION(mozHunspell)
+NS_INTERFACE_MAP_END
+
+NS_IMPL_CYCLE_COLLECTION(mozHunspell,
+ mPersonalDictionary,
+ mEncoder,
+ mDecoder)
+
+template<> mozilla::Atomic<size_t> mozilla::CountingAllocatorBase<HunspellAllocator>::sAmount(0);
+
+mozHunspell::mozHunspell()
+ : mHunspell(nullptr)
+{
+#ifdef DEBUG
+ // There must be only one instance of this class: it reports memory based on
+ // a single static count in HunspellAllocator.
+ static bool hasRun = false;
+ MOZ_ASSERT(!hasRun);
+ hasRun = true;
+#endif
+}
+
+nsresult
+mozHunspell::Init()
+{
+ LoadDictionaryList(false);
+
+ nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService();
+ if (obs) {
+ obs->AddObserver(this, "profile-do-change", true);
+ obs->AddObserver(this, "profile-after-change", true);
+ }
+
+ mozilla::RegisterWeakMemoryReporter(this);
+
+ return NS_OK;
+}
+
+mozHunspell::~mozHunspell()
+{
+ mozilla::UnregisterWeakMemoryReporter(this);
+
+ mPersonalDictionary = nullptr;
+ delete mHunspell;
+}
+
+NS_IMETHODIMP mozHunspell::GetDictionary(char16_t **aDictionary)
+{
+ NS_ENSURE_ARG_POINTER(aDictionary);
+
+ *aDictionary = ToNewUnicode(mDictionary);
+ return *aDictionary ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
+}
+
+/* set the Dictionary.
+ * This also Loads the dictionary and initializes the converter using the dictionaries converter
+ */
+NS_IMETHODIMP mozHunspell::SetDictionary(const char16_t *aDictionary)
+{
+ NS_ENSURE_ARG_POINTER(aDictionary);
+
+ if (nsDependentString(aDictionary).IsEmpty()) {
+ delete mHunspell;
+ mHunspell = nullptr;
+ mDictionary.Truncate();
+ mAffixFileName.Truncate();
+ mLanguage.Truncate();
+ mDecoder = nullptr;
+ mEncoder = nullptr;
+
+ return NS_OK;
+ }
+
+ nsIFile* affFile = mDictionaries.GetWeak(nsDependentString(aDictionary));
+ if (!affFile)
+ return NS_ERROR_FILE_NOT_FOUND;
+
+ nsAutoCString dictFileName, affFileName;
+
+ // XXX This isn't really good. nsIFile->NativePath isn't safe for all
+ // character sets on Windows.
+ // A better way would be to QI to nsIFile, and get a filehandle
+ // from there. Only problem is that hunspell wants a path
+
+ nsresult rv = affFile->GetNativePath(affFileName);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ if (mAffixFileName.Equals(affFileName.get()))
+ return NS_OK;
+
+ dictFileName = affFileName;
+ int32_t dotPos = dictFileName.RFindChar('.');
+ if (dotPos == -1)
+ return NS_ERROR_FAILURE;
+
+ dictFileName.SetLength(dotPos);
+ dictFileName.AppendLiteral(".dic");
+
+ // SetDictionary can be called multiple times, so we might have a
+ // valid mHunspell instance which needs cleaned up.
+ delete mHunspell;
+
+ mDictionary = aDictionary;
+ mAffixFileName = affFileName;
+
+ mHunspell = new Hunspell(affFileName.get(),
+ dictFileName.get());
+ if (!mHunspell)
+ return NS_ERROR_OUT_OF_MEMORY;
+
+ nsDependentCString label(mHunspell->get_dic_encoding());
+ nsAutoCString encoding;
+ if (!EncodingUtils::FindEncodingForLabelNoReplacement(label, encoding)) {
+ return NS_ERROR_UCONV_NOCONV;
+ }
+ mEncoder = EncodingUtils::EncoderForEncoding(encoding);
+ mDecoder = EncodingUtils::DecoderForEncoding(encoding);
+
+ if (mEncoder)
+ mEncoder->SetOutputErrorBehavior(mEncoder->kOnError_Signal, nullptr, '?');
+
+ int32_t pos = mDictionary.FindChar('-');
+ if (pos == -1)
+ pos = mDictionary.FindChar('_');
+
+ if (pos == -1)
+ mLanguage.Assign(mDictionary);
+ else
+ mLanguage = Substring(mDictionary, 0, pos);
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP mozHunspell::GetLanguage(char16_t **aLanguage)
+{
+ NS_ENSURE_ARG_POINTER(aLanguage);
+
+ if (mDictionary.IsEmpty())
+ return NS_ERROR_NOT_INITIALIZED;
+
+ *aLanguage = ToNewUnicode(mLanguage);
+ return *aLanguage ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
+}
+
+NS_IMETHODIMP mozHunspell::GetProvidesPersonalDictionary(bool *aProvidesPersonalDictionary)
+{
+ NS_ENSURE_ARG_POINTER(aProvidesPersonalDictionary);
+
+ *aProvidesPersonalDictionary = false;
+ return NS_OK;
+}
+
+NS_IMETHODIMP mozHunspell::GetProvidesWordUtils(bool *aProvidesWordUtils)
+{
+ NS_ENSURE_ARG_POINTER(aProvidesWordUtils);
+
+ *aProvidesWordUtils = false;
+ return NS_OK;
+}
+
+NS_IMETHODIMP mozHunspell::GetName(char16_t * *aName)
+{
+ return NS_ERROR_NOT_IMPLEMENTED;
+}
+
+NS_IMETHODIMP mozHunspell::GetCopyright(char16_t * *aCopyright)
+{
+ return NS_ERROR_NOT_IMPLEMENTED;
+}
+
+NS_IMETHODIMP mozHunspell::GetPersonalDictionary(mozIPersonalDictionary * *aPersonalDictionary)
+{
+ *aPersonalDictionary = mPersonalDictionary;
+ NS_IF_ADDREF(*aPersonalDictionary);
+ return NS_OK;
+}
+
+NS_IMETHODIMP mozHunspell::SetPersonalDictionary(mozIPersonalDictionary * aPersonalDictionary)
+{
+ mPersonalDictionary = aPersonalDictionary;
+ return NS_OK;
+}
+
+NS_IMETHODIMP mozHunspell::GetDictionaryList(char16_t ***aDictionaries,
+ uint32_t *aCount)
+{
+ if (!aDictionaries || !aCount)
+ return NS_ERROR_NULL_POINTER;
+
+ uint32_t count = 0;
+ char16_t** dicts =
+ (char16_t**) moz_xmalloc(sizeof(char16_t*) * mDictionaries.Count());
+
+ for (auto iter = mDictionaries.Iter(); !iter.Done(); iter.Next()) {
+ dicts[count] = ToNewUnicode(iter.Key());
+ if (!dicts[count]) {
+ while (count) {
+ --count;
+ free(dicts[count]);
+ }
+ free(dicts);
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ ++count;
+ }
+
+ *aDictionaries = dicts;
+ *aCount = count;
+
+ return NS_OK;
+}
+
+void
+mozHunspell::LoadDictionaryList(bool aNotifyChildProcesses)
+{
+ mDictionaries.Clear();
+
+ nsresult rv;
+
+ nsCOMPtr<nsIProperties> dirSvc =
+ do_GetService(NS_DIRECTORY_SERVICE_CONTRACTID);
+ if (!dirSvc)
+ return;
+
+ // find built in dictionaries, or dictionaries specified in
+ // spellchecker.dictionary_path in prefs
+ nsCOMPtr<nsIFile> dictDir;
+
+ // check preferences first
+ nsCOMPtr<nsIPrefBranch> prefs(do_GetService(NS_PREFSERVICE_CONTRACTID));
+ if (prefs) {
+ nsCString extDictPath;
+ rv = prefs->GetCharPref("spellchecker.dictionary_path", getter_Copies(extDictPath));
+ if (NS_SUCCEEDED(rv)) {
+ // set the spellchecker.dictionary_path
+ rv = NS_NewNativeLocalFile(extDictPath, true, getter_AddRefs(dictDir));
+ }
+ }
+ if (!dictDir) {
+ // spellcheck.dictionary_path not found, set internal path
+ rv = dirSvc->Get(DICTIONARY_SEARCH_DIRECTORY,
+ NS_GET_IID(nsIFile), getter_AddRefs(dictDir));
+ }
+ if (dictDir) {
+ LoadDictionariesFromDir(dictDir);
+ }
+ else {
+ // try to load gredir/dictionaries
+ nsCOMPtr<nsIFile> greDir;
+ rv = dirSvc->Get(NS_GRE_DIR,
+ NS_GET_IID(nsIFile), getter_AddRefs(greDir));
+ if (NS_SUCCEEDED(rv)) {
+ greDir->AppendNative(NS_LITERAL_CSTRING("dictionaries"));
+ LoadDictionariesFromDir(greDir);
+ }
+
+ // try to load appdir/dictionaries only if different than gredir
+ nsCOMPtr<nsIFile> appDir;
+ rv = dirSvc->Get(NS_XPCOM_CURRENT_PROCESS_DIR,
+ NS_GET_IID(nsIFile), getter_AddRefs(appDir));
+ bool equals;
+ if (NS_SUCCEEDED(rv) && NS_SUCCEEDED(appDir->Equals(greDir, &equals)) && !equals) {
+ appDir->AppendNative(NS_LITERAL_CSTRING("dictionaries"));
+ LoadDictionariesFromDir(appDir);
+ }
+ }
+
+ // find dictionaries in DICPATH
+ char* dicEnv = PR_GetEnv("DICPATH");
+ if (dicEnv) {
+ // do a two-pass dance so dictionaries are loaded right-to-left as preference
+ nsTArray<nsCOMPtr<nsIFile>> dirs;
+ nsAutoCString env(dicEnv); // assume dicEnv is UTF-8
+
+ char* currPath = nullptr;
+ char* nextPaths = env.BeginWriting();
+ while ((currPath = NS_strtok(":", &nextPaths))) {
+ nsCOMPtr<nsIFile> dir;
+ rv = NS_NewNativeLocalFile(nsCString(currPath), true, getter_AddRefs(dir));
+ if (NS_SUCCEEDED(rv)) {
+ dirs.AppendElement(dir);
+ }
+ }
+
+ // load them in reverse order so they override each other properly
+ for (int32_t i = dirs.Length() - 1; i >= 0; i--) {
+ LoadDictionariesFromDir(dirs[i]);
+ }
+ }
+
+ // find dictionaries from extensions requiring restart
+ nsCOMPtr<nsISimpleEnumerator> dictDirs;
+ rv = dirSvc->Get(DICTIONARY_SEARCH_DIRECTORY_LIST,
+ NS_GET_IID(nsISimpleEnumerator), getter_AddRefs(dictDirs));
+ if (NS_FAILED(rv))
+ return;
+
+ bool hasMore;
+ while (NS_SUCCEEDED(dictDirs->HasMoreElements(&hasMore)) && hasMore) {
+ nsCOMPtr<nsISupports> elem;
+ dictDirs->GetNext(getter_AddRefs(elem));
+
+ dictDir = do_QueryInterface(elem);
+ if (dictDir)
+ LoadDictionariesFromDir(dictDir);
+ }
+
+ // find dictionaries from restartless extensions
+ for (int32_t i = 0; i < mDynamicDirectories.Count(); i++) {
+ LoadDictionariesFromDir(mDynamicDirectories[i]);
+ }
+
+ // Now we have finished updating the list of dictionaries, update the current
+ // dictionary and any editors which may use it.
+ mozInlineSpellChecker::UpdateCanEnableInlineSpellChecking();
+
+ if (aNotifyChildProcesses) {
+ ContentParent::NotifyUpdatedDictionaries();
+ }
+
+ // Check if the current dictionary is still available.
+ // If not, try to replace it with another dictionary of the same language.
+ if (!mDictionary.IsEmpty()) {
+ rv = SetDictionary(mDictionary.get());
+ if (NS_SUCCEEDED(rv))
+ return;
+ }
+
+ // If the current dictionary has gone, and we don't have a good replacement,
+ // set no current dictionary.
+ if (!mDictionary.IsEmpty()) {
+ SetDictionary(EmptyString().get());
+ }
+}
+
+NS_IMETHODIMP
+mozHunspell::LoadDictionariesFromDir(nsIFile* aDir)
+{
+ nsresult rv;
+
+ bool check = false;
+ rv = aDir->Exists(&check);
+ if (NS_FAILED(rv) || !check)
+ return NS_ERROR_UNEXPECTED;
+
+ rv = aDir->IsDirectory(&check);
+ if (NS_FAILED(rv) || !check)
+ return NS_ERROR_UNEXPECTED;
+
+ nsCOMPtr<nsISimpleEnumerator> e;
+ rv = aDir->GetDirectoryEntries(getter_AddRefs(e));
+ if (NS_FAILED(rv))
+ return NS_ERROR_UNEXPECTED;
+
+ nsCOMPtr<nsIDirectoryEnumerator> files(do_QueryInterface(e));
+ if (!files)
+ return NS_ERROR_UNEXPECTED;
+
+ nsCOMPtr<nsIFile> file;
+ while (NS_SUCCEEDED(files->GetNextFile(getter_AddRefs(file))) && file) {
+ nsAutoString leafName;
+ file->GetLeafName(leafName);
+ if (!StringEndsWith(leafName, NS_LITERAL_STRING(".dic")))
+ continue;
+
+ nsAutoString dict(leafName);
+ dict.SetLength(dict.Length() - 4); // magic length of ".dic"
+
+ // check for the presence of the .aff file
+ leafName = dict;
+ leafName.AppendLiteral(".aff");
+ file->SetLeafName(leafName);
+ rv = file->Exists(&check);
+ if (NS_FAILED(rv) || !check)
+ continue;
+
+#ifdef DEBUG_bsmedberg
+ printf("Adding dictionary: %s\n", NS_ConvertUTF16toUTF8(dict).get());
+#endif
+
+ // Replace '_' separator with '-'
+ dict.ReplaceChar("_", '-');
+
+ mDictionaries.Put(dict, file);
+ }
+
+ return NS_OK;
+}
+
+nsresult mozHunspell::ConvertCharset(const char16_t* aStr, char ** aDst)
+{
+ NS_ENSURE_ARG_POINTER(aDst);
+ NS_ENSURE_TRUE(mEncoder, NS_ERROR_NULL_POINTER);
+
+ int32_t outLength;
+ int32_t inLength = NS_strlen(aStr);
+ nsresult rv = mEncoder->GetMaxLength(aStr, inLength, &outLength);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ *aDst = (char *) moz_xmalloc(sizeof(char) * (outLength+1));
+ NS_ENSURE_TRUE(*aDst, NS_ERROR_OUT_OF_MEMORY);
+
+ rv = mEncoder->Convert(aStr, &inLength, *aDst, &outLength);
+ if (NS_SUCCEEDED(rv))
+ (*aDst)[outLength] = '\0';
+
+ return rv;
+}
+
+NS_IMETHODIMP
+mozHunspell::CollectReports(nsIHandleReportCallback* aHandleReport,
+ nsISupports* aData, bool aAnonymize)
+{
+ MOZ_COLLECT_REPORT(
+ "explicit/spell-check", KIND_HEAP, UNITS_BYTES,
+ HunspellAllocator::MemoryAllocated(),
+ "Memory used by the spell-checking engine.");
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP mozHunspell::Check(const char16_t *aWord, bool *aResult)
+{
+ NS_ENSURE_ARG_POINTER(aWord);
+ NS_ENSURE_ARG_POINTER(aResult);
+ NS_ENSURE_TRUE(mHunspell, NS_ERROR_FAILURE);
+
+ nsXPIDLCString charsetWord;
+ nsresult rv = ConvertCharset(aWord, getter_Copies(charsetWord));
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ *aResult = !!mHunspell->spell(charsetWord);
+
+
+ if (!*aResult && mPersonalDictionary)
+ rv = mPersonalDictionary->Check(aWord, mLanguage.get(), aResult);
+
+ return rv;
+}
+
+NS_IMETHODIMP mozHunspell::Suggest(const char16_t *aWord, char16_t ***aSuggestions, uint32_t *aSuggestionCount)
+{
+ NS_ENSURE_ARG_POINTER(aSuggestions);
+ NS_ENSURE_ARG_POINTER(aSuggestionCount);
+ NS_ENSURE_TRUE(mHunspell, NS_ERROR_FAILURE);
+
+ nsresult rv;
+ *aSuggestionCount = 0;
+
+ nsXPIDLCString charsetWord;
+ rv = ConvertCharset(aWord, getter_Copies(charsetWord));
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ char ** wlst;
+ *aSuggestionCount = mHunspell->suggest(&wlst, charsetWord);
+
+ if (*aSuggestionCount) {
+ *aSuggestions = (char16_t **)moz_xmalloc(*aSuggestionCount * sizeof(char16_t *));
+ if (*aSuggestions) {
+ uint32_t index = 0;
+ for (index = 0; index < *aSuggestionCount && NS_SUCCEEDED(rv); ++index) {
+ // Convert the suggestion to utf16
+ int32_t inLength = strlen(wlst[index]);
+ int32_t outLength;
+ rv = mDecoder->GetMaxLength(wlst[index], inLength, &outLength);
+ if (NS_SUCCEEDED(rv))
+ {
+ (*aSuggestions)[index] = (char16_t *) moz_xmalloc(sizeof(char16_t) * (outLength+1));
+ if ((*aSuggestions)[index])
+ {
+ rv = mDecoder->Convert(wlst[index], &inLength, (*aSuggestions)[index], &outLength);
+ if (NS_SUCCEEDED(rv))
+ (*aSuggestions)[index][outLength] = 0;
+ }
+ else
+ rv = NS_ERROR_OUT_OF_MEMORY;
+ }
+ }
+
+ if (NS_FAILED(rv))
+ NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(index, *aSuggestions); // free the char16_t strings up to the point at which the error occurred
+ }
+ else // if (*aSuggestions)
+ rv = NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(*aSuggestionCount, wlst);
+ return rv;
+}
+
+NS_IMETHODIMP
+mozHunspell::Observe(nsISupports* aSubj, const char *aTopic,
+ const char16_t *aData)
+{
+ NS_ASSERTION(!strcmp(aTopic, "profile-do-change")
+ || !strcmp(aTopic, "profile-after-change"),
+ "Unexpected observer topic");
+
+ LoadDictionaryList(false);
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP mozHunspell::AddDirectory(nsIFile *aDir)
+{
+ mDynamicDirectories.AppendObject(aDir);
+ LoadDictionaryList(true);
+ return NS_OK;
+}
+
+NS_IMETHODIMP mozHunspell::RemoveDirectory(nsIFile *aDir)
+{
+ mDynamicDirectories.RemoveObject(aDir);
+ LoadDictionaryList(true);
+
+#ifdef MOZ_THUNDERBIRD
+ /*
+ * This notification is needed for Thunderbird. Thunderbird derives the dictionary
+ * from the document's "lang" attribute. If a dictionary is removed,
+ * we need to change the "lang" attribute.
+ */
+ nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService();
+ if (obs) {
+ obs->NotifyObservers(nullptr,
+ SPELLCHECK_DICTIONARY_REMOVE_NOTIFICATION,
+ nullptr);
+ }
+#endif
+ return NS_OK;
+}
diff --git a/extensions/spellcheck/hunspell/glue/mozHunspell.h b/extensions/spellcheck/hunspell/glue/mozHunspell.h
new file mode 100644
index 000000000..01ef741aa
--- /dev/null
+++ b/extensions/spellcheck/hunspell/glue/mozHunspell.h
@@ -0,0 +1,125 @@
+/******* BEGIN LICENSE BLOCK *******
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
+ * and László Németh (Hunspell). Portions created by the Initial Developers
+ * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
+ * David Einstein (deinst@world.std.com)
+ * Michiel van Leeuwen (mvl@exedo.nl)
+ * Caolan McNamara (cmc@openoffice.org)
+ * László Németh (nemethl@gyorsposta.hu)
+ * Davide Prina
+ * Giuseppe Modugno
+ * Gianluca Turconi
+ * Simon Brouwer
+ * Noll Janos
+ * Biro Arpad
+ * Goldman Eleonora
+ * Sarlos Tamas
+ * Bencsath Boldizsar
+ * Halacsy Peter
+ * Dvornik Laszlo
+ * Gefferth Andras
+ * Nagy Viktor
+ * Varga Daniel
+ * Chris Halls
+ * Rene Engelhard
+ * Bram Moolenaar
+ * Dafydd Jones
+ * Harri Pitkanen
+ * Andras Timar
+ * Tor Lillqvist
+ * Jesper Kristensen (mail@jesperkristensen.dk)
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ ******* END LICENSE BLOCK *******/
+
+#ifndef mozHunspell_h__
+#define mozHunspell_h__
+
+#include <hunspell.hxx>
+#include "mozISpellCheckingEngine.h"
+#include "mozIPersonalDictionary.h"
+#include "nsString.h"
+#include "nsCOMPtr.h"
+#include "nsCOMArray.h"
+#include "nsIMemoryReporter.h"
+#include "nsIObserver.h"
+#include "nsIUnicodeEncoder.h"
+#include "nsIUnicodeDecoder.h"
+#include "nsInterfaceHashtable.h"
+#include "nsWeakReference.h"
+#include "nsCycleCollectionParticipant.h"
+#include "mozHunspellAllocator.h"
+
+#define MOZ_HUNSPELL_CONTRACTID "@mozilla.org/spellchecker/engine;1"
+#define MOZ_HUNSPELL_CID \
+/* 56c778e4-1bee-45f3-a689-886692a97fe7 */ \
+{ 0x56c778e4, 0x1bee, 0x45f3, \
+ { 0xa6, 0x89, 0x88, 0x66, 0x92, 0xa9, 0x7f, 0xe7 } }
+
+class mozHunspell final : public mozISpellCheckingEngine,
+ public nsIObserver,
+ public nsSupportsWeakReference,
+ public nsIMemoryReporter
+{
+public:
+ NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+ NS_DECL_MOZISPELLCHECKINGENGINE
+ NS_DECL_NSIOBSERVER
+ NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(mozHunspell, mozISpellCheckingEngine)
+
+ mozHunspell();
+
+ nsresult Init();
+
+ void LoadDictionaryList(bool aNotifyChildProcesses);
+
+ // helper method for converting a word to the charset of the dictionary
+ nsresult ConvertCharset(const char16_t* aStr, char ** aDst);
+
+ NS_DECL_NSIMEMORYREPORTER
+
+protected:
+ virtual ~mozHunspell();
+
+ nsCOMPtr<mozIPersonalDictionary> mPersonalDictionary;
+ nsCOMPtr<nsIUnicodeEncoder> mEncoder;
+ nsCOMPtr<nsIUnicodeDecoder> mDecoder;
+
+ // Hashtable matches dictionary name to .aff file
+ nsInterfaceHashtable<nsStringHashKey, nsIFile> mDictionaries;
+ nsString mDictionary;
+ nsString mLanguage;
+ nsCString mAffixFileName;
+
+ // dynamic dirs used to search for dictionaries
+ nsCOMArray<nsIFile> mDynamicDirectories;
+
+ Hunspell *mHunspell;
+};
+
+#endif
diff --git a/extensions/spellcheck/hunspell/glue/mozHunspellAllocator.h b/extensions/spellcheck/hunspell/glue/mozHunspellAllocator.h
new file mode 100644
index 000000000..219d4a5dd
--- /dev/null
+++ b/extensions/spellcheck/hunspell/glue/mozHunspellAllocator.h
@@ -0,0 +1,16 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozHunspellAllocator_h__
+#define mozHunspellAllocator_h__
+
+#include "mozilla/CountingAllocatorBase.h"
+
+class HunspellAllocator : public mozilla::CountingAllocatorBase<HunspellAllocator>
+{
+};
+
+#endif
diff --git a/extensions/spellcheck/hunspell/glue/mozHunspellDirProvider.cpp b/extensions/spellcheck/hunspell/glue/mozHunspellDirProvider.cpp
new file mode 100644
index 000000000..00758b0c3
--- /dev/null
+++ b/extensions/spellcheck/hunspell/glue/mozHunspellDirProvider.cpp
@@ -0,0 +1,140 @@
+/******* BEGIN LICENSE BLOCK *******
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
+ * and László Németh (Hunspell). Portions created by the Initial Developers
+ * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): Benjamin Smedberg (benjamin@smedbergs.us) (Original Code)
+ * László Németh (nemethl@gyorsposta.hu)
+ * Ryan VanderMeulen (ryanvm@gmail.com)
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ ******* END LICENSE BLOCK *******/
+
+#include "mozHunspellDirProvider.h"
+#include "nsXULAppAPI.h"
+#include "nsString.h"
+
+#include "mozISpellCheckingEngine.h"
+#include "nsICategoryManager.h"
+
+NS_IMPL_ISUPPORTS(mozHunspellDirProvider,
+ nsIDirectoryServiceProvider,
+ nsIDirectoryServiceProvider2)
+
+NS_IMETHODIMP
+mozHunspellDirProvider::GetFile(const char *aKey, bool *aPersist,
+ nsIFile* *aResult)
+{
+ return NS_ERROR_FAILURE;
+}
+
+NS_IMETHODIMP
+mozHunspellDirProvider::GetFiles(const char *aKey,
+ nsISimpleEnumerator* *aResult)
+{
+ if (strcmp(aKey, DICTIONARY_SEARCH_DIRECTORY_LIST) != 0) {
+ return NS_ERROR_FAILURE;
+ }
+
+ nsCOMPtr<nsIProperties> dirSvc =
+ do_GetService(NS_DIRECTORY_SERVICE_CONTRACTID);
+ if (!dirSvc)
+ return NS_ERROR_FAILURE;
+
+ nsCOMPtr<nsISimpleEnumerator> list;
+ nsresult rv = dirSvc->Get(XRE_EXTENSIONS_DIR_LIST,
+ NS_GET_IID(nsISimpleEnumerator),
+ getter_AddRefs(list));
+ if (NS_FAILED(rv))
+ return rv;
+
+ nsCOMPtr<nsISimpleEnumerator> e = new AppendingEnumerator(list);
+ if (!e)
+ return NS_ERROR_OUT_OF_MEMORY;
+
+ *aResult = nullptr;
+ e.swap(*aResult);
+ return NS_SUCCESS_AGGREGATE_RESULT;
+}
+
+NS_IMPL_ISUPPORTS(mozHunspellDirProvider::AppendingEnumerator,
+ nsISimpleEnumerator)
+
+NS_IMETHODIMP
+mozHunspellDirProvider::AppendingEnumerator::HasMoreElements(bool *aResult)
+{
+ *aResult = mNext ? true : false;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+mozHunspellDirProvider::AppendingEnumerator::GetNext(nsISupports* *aResult)
+{
+ if (aResult)
+ NS_ADDREF(*aResult = mNext);
+
+ mNext = nullptr;
+
+ nsresult rv;
+
+ // Ignore all errors
+
+ bool more;
+ while (NS_SUCCEEDED(mBase->HasMoreElements(&more)) && more) {
+ nsCOMPtr<nsISupports> nextbasesupp;
+ mBase->GetNext(getter_AddRefs(nextbasesupp));
+
+ nsCOMPtr<nsIFile> nextbase(do_QueryInterface(nextbasesupp));
+ if (!nextbase)
+ continue;
+
+ nextbase->Clone(getter_AddRefs(mNext));
+ if (!mNext)
+ continue;
+
+ mNext->AppendNative(NS_LITERAL_CSTRING("dictionaries"));
+
+ bool exists;
+ rv = mNext->Exists(&exists);
+ if (NS_SUCCEEDED(rv) && exists)
+ break;
+
+ mNext = nullptr;
+ }
+
+ return NS_OK;
+}
+
+mozHunspellDirProvider::AppendingEnumerator::AppendingEnumerator
+ (nsISimpleEnumerator* aBase) :
+ mBase(aBase)
+{
+ // Initialize mNext to begin
+ GetNext(nullptr);
+}
+
+char const *const
+mozHunspellDirProvider::kContractID = "@mozilla.org/spellcheck/dir-provider;1";
diff --git a/extensions/spellcheck/hunspell/glue/mozHunspellDirProvider.h b/extensions/spellcheck/hunspell/glue/mozHunspellDirProvider.h
new file mode 100644
index 000000000..60ab23be8
--- /dev/null
+++ b/extensions/spellcheck/hunspell/glue/mozHunspellDirProvider.h
@@ -0,0 +1,77 @@
+/******* BEGIN LICENSE BLOCK *******
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
+ * and László Németh (Hunspell). Portions created by the Initial Developers
+ * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): Benjamin Smedberg (benjamin@smedbergs.us) (Original Code)
+ * László Németh (nemethl@gyorsposta.hu)
+ * Ryan VanderMeulen (ryanvm@gmail.com)
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ ******* END LICENSE BLOCK *******/
+
+#ifndef mozHunspellDirProvider_h__
+#define mozHunspellDirProvider_h__
+
+#include "nsIDirectoryService.h"
+#include "nsIFile.h"
+#include "nsISimpleEnumerator.h"
+#include "mozilla/Attributes.h"
+
+class mozHunspellDirProvider final :
+ public nsIDirectoryServiceProvider2
+{
+public:
+ NS_DECL_ISUPPORTS
+ NS_DECL_NSIDIRECTORYSERVICEPROVIDER
+ NS_DECL_NSIDIRECTORYSERVICEPROVIDER2
+
+ static char const *const kContractID;
+
+private:
+ ~mozHunspellDirProvider() {}
+
+ class AppendingEnumerator final : public nsISimpleEnumerator
+ {
+ public:
+ NS_DECL_ISUPPORTS
+ NS_DECL_NSISIMPLEENUMERATOR
+
+ explicit AppendingEnumerator(nsISimpleEnumerator* aBase);
+
+ private:
+ ~AppendingEnumerator() {}
+
+ nsCOMPtr<nsISimpleEnumerator> mBase;
+ nsCOMPtr<nsIFile> mNext;
+ };
+};
+
+#define HUNSPELLDIRPROVIDER_CID \
+{ 0x64d6174c, 0x1496, 0x4ffd, \
+ { 0x87, 0xf2, 0xda, 0x26, 0x70, 0xf8, 0x89, 0x34 } }
+
+#endif // mozHunspellDirProvider
diff --git a/extensions/spellcheck/hunspell/moz.build b/extensions/spellcheck/hunspell/moz.build
new file mode 100644
index 000000000..61955e506
--- /dev/null
+++ b/extensions/spellcheck/hunspell/moz.build
@@ -0,0 +1,12 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+DIRS += ['glue']
+if not CONFIG['MOZ_SYSTEM_HUNSPELL']:
+ DIRS += ['src']
+
+if CONFIG['ENABLE_TESTS']:
+ XPCSHELL_TESTS_MANIFESTS += ['tests/unit/xpcshell.ini']
diff --git a/extensions/spellcheck/hunspell/src/README b/extensions/spellcheck/hunspell/src/README
new file mode 100644
index 000000000..b97a112fd
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/README
@@ -0,0 +1,21 @@
+Hunspell spell checker and morphological analyser library
+
+Documentation, tests, examples: http://hunspell.github.io/
+
+Author of Hunspell:
+László Németh (nemethl (at) gyorsposta.hu)
+
+Hunspell based on OpenOffice.org's Myspell. MySpell's author:
+Kevin Hendricks (kevin.hendricks (at) sympatico.ca)
+
+License: GPL 2.0/LGPL 2.1/MPL 1.1 tri-license
+
+The contents of this library may be used under the terms of
+the GNU General Public License Version 2 or later (the "GPL"), or
+the GNU Lesser General Public License Version 2.1 or later (the "LGPL",
+see http://gnu.org/copyleft/lesser.html) or the Mozilla Public License
+Version 1.1 or later (the "MPL", see http://mozilla.org/MPL/MPL-1.1.html).
+
+Software distributed under these licenses is distributed on an "AS IS" basis,
+WITHOUT WARRANTY OF ANY KIND, either express or implied. See the licences
+for the specific language governing rights and limitations under the licenses.
diff --git a/extensions/spellcheck/hunspell/src/README.mozilla b/extensions/spellcheck/hunspell/src/README.mozilla
new file mode 100644
index 000000000..79a9f54d1
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/README.mozilla
@@ -0,0 +1,2 @@
+Hunspell Version: 1.4.1
+Additional Patches: See patches directory.
diff --git a/extensions/spellcheck/hunspell/src/affentry.cxx b/extensions/spellcheck/hunspell/src/affentry.cxx
new file mode 100644
index 000000000..bd2827436
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/affentry.cxx
@@ -0,0 +1,1068 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Hunspell, based on MySpell.
+ *
+ * The Initial Developers of the Original Code are
+ * Kevin Hendricks (MySpell) and Németh László (Hunspell).
+ * Portions created by the Initial Developers are Copyright (C) 2002-2005
+ * the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+
+#include "affentry.hxx"
+#include "csutil.hxx"
+
+PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp)
+ // register affix manager
+ : pmyMgr(pmgr),
+ next(NULL),
+ nexteq(NULL),
+ nextne(NULL),
+ flgnxt(NULL) {
+ // set up its initial values
+ aflag = dp->aflag; // flag
+ strip = dp->strip; // string to strip
+ appnd = dp->appnd; // string to append
+ numconds = dp->numconds; // length of the condition
+ opts = dp->opts; // cross product flag
+ // then copy over all of the conditions
+ if (opts & aeLONGCOND) {
+ memcpy(c.conds, dp->c.l.conds1, MAXCONDLEN_1);
+ c.l.conds2 = dp->c.l.conds2;
+ } else
+ memcpy(c.conds, dp->c.conds, MAXCONDLEN);
+ morphcode = dp->morphcode;
+ contclass = dp->contclass;
+ contclasslen = dp->contclasslen;
+}
+
+PfxEntry::~PfxEntry() {
+ aflag = 0;
+ pmyMgr = NULL;
+ if (opts & aeLONGCOND)
+ free(c.l.conds2);
+ if (morphcode && !(opts & aeALIASM))
+ free(morphcode);
+ if (contclass && !(opts & aeALIASF))
+ free(contclass);
+}
+
+// add prefix to this word assuming conditions hold
+char* PfxEntry::add(const char* word, size_t len) {
+ if ((len > strip.size() || (len == 0 && pmyMgr->get_fullstrip())) &&
+ (len >= numconds) && test_condition(word) &&
+ (!strip.size() || (strncmp(word, strip.c_str(), strip.size()) == 0))) {
+ /* we have a match so add prefix */
+ std::string tword(appnd);
+ tword.append(word + strip.size());
+ return mystrdup(tword.c_str());
+ }
+ return NULL;
+}
+
+inline char* PfxEntry::nextchar(char* p) {
+ if (p) {
+ p++;
+ if (opts & aeLONGCOND) {
+ // jump to the 2nd part of the condition
+ if (p == c.conds + MAXCONDLEN_1)
+ return c.l.conds2;
+ // end of the MAXCONDLEN length condition
+ } else if (p == c.conds + MAXCONDLEN)
+ return NULL;
+ return *p ? p : NULL;
+ }
+ return NULL;
+}
+
+inline int PfxEntry::test_condition(const char* st) {
+ const char* pos = NULL; // group with pos input position
+ bool neg = false; // complementer
+ bool ingroup = false; // character in the group
+ if (numconds == 0)
+ return 1;
+ char* p = c.conds;
+ while (1) {
+ switch (*p) {
+ case '\0':
+ return 1;
+ case '[': {
+ neg = false;
+ ingroup = false;
+ p = nextchar(p);
+ pos = st;
+ break;
+ }
+ case '^': {
+ p = nextchar(p);
+ neg = true;
+ break;
+ }
+ case ']': {
+ if ((neg && ingroup) || (!neg && !ingroup))
+ return 0;
+ pos = NULL;
+ p = nextchar(p);
+ // skip the next character
+ if (!ingroup && *st)
+ for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++)
+ ;
+ if (*st == '\0' && p)
+ return 0; // word <= condition
+ break;
+ }
+ case '.':
+ if (!pos) { // dots are not metacharacters in groups: [.]
+ p = nextchar(p);
+ // skip the next character
+ for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++)
+ ;
+ if (*st == '\0' && p)
+ return 0; // word <= condition
+ break;
+ }
+ /* FALLTHROUGH */
+ default: {
+ if (*st == *p) {
+ st++;
+ p = nextchar(p);
+ if ((opts & aeUTF8) && (*(st - 1) & 0x80)) { // multibyte
+ while (p && (*p & 0xc0) == 0x80) { // character
+ if (*p != *st) {
+ if (!pos)
+ return 0;
+ st = pos;
+ break;
+ }
+ p = nextchar(p);
+ st++;
+ }
+ if (pos && st != pos) {
+ ingroup = true;
+ while (p && *p != ']' && ((p = nextchar(p)) != NULL)) {
+ }
+ }
+ } else if (pos) {
+ ingroup = true;
+ while (p && *p != ']' && ((p = nextchar(p)) != NULL)) {
+ }
+ }
+ } else if (pos) { // group
+ p = nextchar(p);
+ } else
+ return 0;
+ }
+ }
+ if (!p)
+ return 1;
+ }
+}
+
+// check if this prefix entry matches
+struct hentry* PfxEntry::checkword(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag) {
+ struct hentry* he; // hash entry of root word or NULL
+
+ // on entry prefix is 0 length or already matches the beginning of the word.
+ // So if the remaining root word has positive length
+ // and if there are enough chars in root word and added back strip chars
+ // to meet the number of characters conditions, then test it
+
+ int tmpl = len - appnd.size(); // length of tmpword
+
+ if (tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) {
+ // generate new root word by removing prefix and adding
+ // back any characters that would have been stripped
+
+ std::string tmpword(strip);
+ tmpword.append(word + appnd.size());
+
+ // now make sure all of the conditions on characters
+ // are met. Please see the appendix at the end of
+ // this file for more info on exactly what is being
+ // tested
+
+ // if all conditions are met then check if resulting
+ // root word in the dictionary
+
+ if (test_condition(tmpword.c_str())) {
+ tmpl += strip.size();
+ if ((he = pmyMgr->lookup(tmpword.c_str())) != NULL) {
+ do {
+ if (TESTAFF(he->astr, aflag, he->alen) &&
+ // forbid single prefixes with needaffix flag
+ !TESTAFF(contclass, pmyMgr->get_needaffix(), contclasslen) &&
+ // needflag
+ ((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
+ (contclass && TESTAFF(contclass, needflag, contclasslen))))
+ return he;
+ he = he->next_homonym; // check homonyms
+ } while (he);
+ }
+
+ // prefix matched but no root word was found
+ // if aeXPRODUCT is allowed, try again but now
+ // ross checked combined with a suffix
+
+ // if ((opts & aeXPRODUCT) && in_compound) {
+ if ((opts & aeXPRODUCT)) {
+ he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, aeXPRODUCT, this,
+ NULL, 0, NULL, FLAG_NULL, needflag,
+ in_compound);
+ if (he)
+ return he;
+ }
+ }
+ }
+ return NULL;
+}
+
+// check if this prefix entry matches
+struct hentry* PfxEntry::check_twosfx(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag) {
+ struct hentry* he; // hash entry of root word or NULL
+
+ // on entry prefix is 0 length or already matches the beginning of the word.
+ // So if the remaining root word has positive length
+ // and if there are enough chars in root word and added back strip chars
+ // to meet the number of characters conditions, then test it
+
+ int tmpl = len - appnd.size(); // length of tmpword
+
+ if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
+ (tmpl + strip.size() >= numconds)) {
+ // generate new root word by removing prefix and adding
+ // back any characters that would have been stripped
+
+ std::string tmpword(strip);
+ tmpword.append(word + appnd.size());
+
+ // now make sure all of the conditions on characters
+ // are met. Please see the appendix at the end of
+ // this file for more info on exactly what is being
+ // tested
+
+ // if all conditions are met then check if resulting
+ // root word in the dictionary
+
+ if (test_condition(tmpword.c_str())) {
+ tmpl += strip.size();
+
+ // prefix matched but no root word was found
+ // if aeXPRODUCT is allowed, try again but now
+ // cross checked combined with a suffix
+
+ if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
+ he = pmyMgr->suffix_check_twosfx(tmpword.c_str(), tmpl, aeXPRODUCT, this,
+ needflag);
+ if (he)
+ return he;
+ }
+ }
+ }
+ return NULL;
+}
+
+// check if this prefix entry matches
+char* PfxEntry::check_twosfx_morph(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag) {
+ // on entry prefix is 0 length or already matches the beginning of the word.
+ // So if the remaining root word has positive length
+ // and if there are enough chars in root word and added back strip chars
+ // to meet the number of characters conditions, then test it
+
+ int tmpl = len - appnd.size(); // length of tmpword
+
+ if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
+ (tmpl + strip.size() >= numconds)) {
+ // generate new root word by removing prefix and adding
+ // back any characters that would have been stripped
+
+ std::string tmpword(strip);
+ tmpword.append(word + appnd.size());
+
+ // now make sure all of the conditions on characters
+ // are met. Please see the appendix at the end of
+ // this file for more info on exactly what is being
+ // tested
+
+ // if all conditions are met then check if resulting
+ // root word in the dictionary
+
+ if (test_condition(tmpword.c_str())) {
+ tmpl += strip.size();
+
+ // prefix matched but no root word was found
+ // if aeXPRODUCT is allowed, try again but now
+ // ross checked combined with a suffix
+
+ if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
+ return pmyMgr->suffix_check_twosfx_morph(tmpword.c_str(), tmpl,
+ aeXPRODUCT,
+ this, needflag);
+ }
+ }
+ }
+ return NULL;
+}
+
+// check if this prefix entry matches
+char* PfxEntry::check_morph(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag) {
+ struct hentry* he; // hash entry of root word or NULL
+ char* st;
+
+ // on entry prefix is 0 length or already matches the beginning of the word.
+ // So if the remaining root word has positive length
+ // and if there are enough chars in root word and added back strip chars
+ // to meet the number of characters conditions, then test it
+
+ int tmpl = len - appnd.size(); // length of tmpword
+
+ if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
+ (tmpl + strip.size() >= numconds)) {
+ // generate new root word by removing prefix and adding
+ // back any characters that would have been stripped
+
+ std::string tmpword(strip);
+ tmpword.append(word + appnd.size());
+
+ // now make sure all of the conditions on characters
+ // are met. Please see the appendix at the end of
+ // this file for more info on exactly what is being
+ // tested
+
+ // if all conditions are met then check if resulting
+ // root word in the dictionary
+
+ if (test_condition(tmpword.c_str())) {
+ std::string result;
+
+ tmpl += strip.size();
+ if ((he = pmyMgr->lookup(tmpword.c_str())) != NULL) {
+ do {
+ if (TESTAFF(he->astr, aflag, he->alen) &&
+ // forbid single prefixes with needaffix flag
+ !TESTAFF(contclass, pmyMgr->get_needaffix(), contclasslen) &&
+ // needflag
+ ((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
+ (contclass && TESTAFF(contclass, needflag, contclasslen)))) {
+ if (morphcode) {
+ result.append(" ");
+ result.append(morphcode);
+ } else
+ result.append(getKey());
+ if (!HENTRY_FIND(he, MORPH_STEM)) {
+ result.append(" ");
+ result.append(MORPH_STEM);
+ result.append(HENTRY_WORD(he));
+ }
+ // store the pointer of the hash entry
+ if (HENTRY_DATA(he)) {
+ result.append(" ");
+ result.append(HENTRY_DATA2(he));
+ } else {
+ // return with debug information
+ char* flag = pmyMgr->encode_flag(getFlag());
+ result.append(" ");
+ result.append(MORPH_FLAG);
+ result.append(flag);
+ free(flag);
+ }
+ result.append("\n");
+ }
+ he = he->next_homonym;
+ } while (he);
+ }
+
+ // prefix matched but no root word was found
+ // if aeXPRODUCT is allowed, try again but now
+ // ross checked combined with a suffix
+
+ if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
+ st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, aeXPRODUCT, this,
+ FLAG_NULL, needflag);
+ if (st) {
+ result.append(st);
+ free(st);
+ }
+ }
+
+ if (!result.empty())
+ return mystrdup(result.c_str());
+ }
+ }
+
+ return NULL;
+}
+
+SfxEntry::SfxEntry(AffixMgr* pmgr, affentry* dp)
+ : pmyMgr(pmgr) // register affix manager
+ ,
+ next(NULL),
+ nexteq(NULL),
+ nextne(NULL),
+ flgnxt(NULL),
+ l_morph(NULL),
+ r_morph(NULL),
+ eq_morph(NULL) {
+ // set up its initial values
+ aflag = dp->aflag; // char flag
+ strip = dp->strip; // string to strip
+ appnd = dp->appnd; // string to append
+ numconds = dp->numconds; // length of the condition
+ opts = dp->opts; // cross product flag
+
+ // then copy over all of the conditions
+ if (opts & aeLONGCOND) {
+ memcpy(c.l.conds1, dp->c.l.conds1, MAXCONDLEN_1);
+ c.l.conds2 = dp->c.l.conds2;
+ } else
+ memcpy(c.conds, dp->c.conds, MAXCONDLEN);
+ rappnd = appnd;
+ reverseword(rappnd);
+ morphcode = dp->morphcode;
+ contclass = dp->contclass;
+ contclasslen = dp->contclasslen;
+}
+
+SfxEntry::~SfxEntry() {
+ aflag = 0;
+ pmyMgr = NULL;
+ if (opts & aeLONGCOND)
+ free(c.l.conds2);
+ if (morphcode && !(opts & aeALIASM))
+ free(morphcode);
+ if (contclass && !(opts & aeALIASF))
+ free(contclass);
+}
+
+// add suffix to this word assuming conditions hold
+char* SfxEntry::add(const char* word, size_t len) {
+ /* make sure all conditions match */
+ if ((len > strip.size() || (len == 0 && pmyMgr->get_fullstrip())) &&
+ (len >= numconds) && test_condition(word + len, word) &&
+ (!strip.size() ||
+ (strcmp(word + len - strip.size(), strip.c_str()) == 0))) {
+ std::string tword(word);
+ /* we have a match so add suffix */
+ tword.replace(len - strip.size(), std::string::npos, appnd);
+ return mystrdup(tword.c_str());
+ }
+ return NULL;
+}
+
+inline char* SfxEntry::nextchar(char* p) {
+ if (p) {
+ p++;
+ if (opts & aeLONGCOND) {
+ // jump to the 2nd part of the condition
+ if (p == c.l.conds1 + MAXCONDLEN_1)
+ return c.l.conds2;
+ // end of the MAXCONDLEN length condition
+ } else if (p == c.conds + MAXCONDLEN)
+ return NULL;
+ return *p ? p : NULL;
+ }
+ return NULL;
+}
+
+inline int SfxEntry::test_condition(const char* st, const char* beg) {
+ const char* pos = NULL; // group with pos input position
+ bool neg = false; // complementer
+ bool ingroup = false; // character in the group
+ if (numconds == 0)
+ return 1;
+ char* p = c.conds;
+ st--;
+ int i = 1;
+ while (1) {
+ switch (*p) {
+ case '\0':
+ return 1;
+ case '[':
+ p = nextchar(p);
+ pos = st;
+ break;
+ case '^':
+ p = nextchar(p);
+ neg = true;
+ break;
+ case ']':
+ if (!neg && !ingroup)
+ return 0;
+ i++;
+ // skip the next character
+ if (!ingroup) {
+ for (; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--)
+ ;
+ st--;
+ }
+ pos = NULL;
+ neg = false;
+ ingroup = false;
+ p = nextchar(p);
+ if (st < beg && p)
+ return 0; // word <= condition
+ break;
+ case '.':
+ if (!pos) {
+ // dots are not metacharacters in groups: [.]
+ p = nextchar(p);
+ // skip the next character
+ for (st--; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80;
+ st--)
+ ;
+ if (st < beg) { // word <= condition
+ if (p)
+ return 0;
+ else
+ return 1;
+ }
+ if ((opts & aeUTF8) && (*st & 0x80)) { // head of the UTF-8 character
+ st--;
+ if (st < beg) { // word <= condition
+ if (p)
+ return 0;
+ else
+ return 1;
+ }
+ }
+ break;
+ }
+ /* FALLTHROUGH */
+ default: {
+ if (*st == *p) {
+ p = nextchar(p);
+ if ((opts & aeUTF8) && (*st & 0x80)) {
+ st--;
+ while (p && (st >= beg)) {
+ if (*p != *st) {
+ if (!pos)
+ return 0;
+ st = pos;
+ break;
+ }
+ // first byte of the UTF-8 multibyte character
+ if ((*p & 0xc0) != 0x80)
+ break;
+ p = nextchar(p);
+ st--;
+ }
+ if (pos && st != pos) {
+ if (neg)
+ return 0;
+ else if (i == numconds)
+ return 1;
+ ingroup = true;
+ while (p && *p != ']' && ((p = nextchar(p)) != NULL)) {
+ }
+ st--;
+ }
+ if (p && *p != ']')
+ p = nextchar(p);
+ } else if (pos) {
+ if (neg)
+ return 0;
+ else if (i == numconds)
+ return 1;
+ ingroup = true;
+ while (p && *p != ']' && ((p = nextchar(p)) != NULL)) {
+ }
+ // if (p && *p != ']') p = nextchar(p);
+ st--;
+ }
+ if (!pos) {
+ i++;
+ st--;
+ }
+ if (st < beg && p && *p != ']')
+ return 0; // word <= condition
+ } else if (pos) { // group
+ p = nextchar(p);
+ } else
+ return 0;
+ }
+ }
+ if (!p)
+ return 1;
+ }
+}
+
+// see if this suffix is present in the word
+struct hentry* SfxEntry::checkword(const char* word,
+ int len,
+ int optflags,
+ PfxEntry* ppfx,
+ char** wlst,
+ int maxSug,
+ int* ns,
+ const FLAG cclass,
+ const FLAG needflag,
+ const FLAG badflag) {
+ struct hentry* he; // hash entry pointer
+ PfxEntry* ep = ppfx;
+
+ // if this suffix is being cross checked with a prefix
+ // but it does not support cross products skip it
+
+ if (((optflags & aeXPRODUCT) != 0) && ((opts & aeXPRODUCT) == 0))
+ return NULL;
+
+ // upon entry suffix is 0 length or already matches the end of the word.
+ // So if the remaining root word has positive length
+ // and if there are enough chars in root word and added back strip chars
+ // to meet the number of characters conditions, then test it
+
+ int tmpl = len - appnd.size(); // length of tmpword
+ // the second condition is not enough for UTF-8 strings
+ // it checked in test_condition()
+
+ if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
+ (tmpl + strip.size() >= numconds)) {
+ // generate new root word by removing suffix and adding
+ // back any characters that would have been stripped or
+ // or null terminating the shorter string
+
+ std::string tmpstring(word, tmpl);
+ if (strip.size()) {
+ tmpstring.append(strip);
+ }
+
+ const char* tmpword = tmpstring.c_str();
+ const char* endword = tmpword + tmpstring.size();
+
+ // now make sure all of the conditions on characters
+ // are met. Please see the appendix at the end of
+ // this file for more info on exactly what is being
+ // tested
+
+ // if all conditions are met then check if resulting
+ // root word in the dictionary
+
+ if (test_condition(endword, tmpword)) {
+#ifdef SZOSZABLYA_POSSIBLE_ROOTS
+ fprintf(stdout, "%s %s %c\n", word, tmpword, aflag);
+#endif
+ if ((he = pmyMgr->lookup(tmpword)) != NULL) {
+ do {
+ // check conditional suffix (enabled by prefix)
+ if ((TESTAFF(he->astr, aflag, he->alen) ||
+ (ep && ep->getCont() &&
+ TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
+ (((optflags & aeXPRODUCT) == 0) ||
+ (ep && TESTAFF(he->astr, ep->getFlag(), he->alen)) ||
+ // enabled by prefix
+ ((contclass) &&
+ (ep && TESTAFF(contclass, ep->getFlag(), contclasslen)))) &&
+ // handle cont. class
+ ((!cclass) ||
+ ((contclass) && TESTAFF(contclass, cclass, contclasslen))) &&
+ // check only in compound homonyms (bad flags)
+ (!badflag || !TESTAFF(he->astr, badflag, he->alen)) &&
+ // handle required flag
+ ((!needflag) ||
+ (TESTAFF(he->astr, needflag, he->alen) ||
+ ((contclass) && TESTAFF(contclass, needflag, contclasslen)))))
+ return he;
+ he = he->next_homonym; // check homonyms
+ } while (he);
+
+ // obsolote stemming code (used only by the
+ // experimental SuffixMgr:suggest_pos_stems)
+ // store resulting root in wlst
+ } else if (wlst && (*ns < maxSug)) {
+ int cwrd = 1;
+ for (int k = 0; k < *ns; k++)
+ if (strcmp(tmpword, wlst[k]) == 0) {
+ cwrd = 0;
+ break;
+ }
+ if (cwrd) {
+ wlst[*ns] = mystrdup(tmpword);
+ if (wlst[*ns] == NULL) {
+ for (int j = 0; j < *ns; j++)
+ free(wlst[j]);
+ *ns = -1;
+ return NULL;
+ }
+ (*ns)++;
+ }
+ }
+ }
+ }
+ return NULL;
+}
+
+// see if two-level suffix is present in the word
+struct hentry* SfxEntry::check_twosfx(const char* word,
+ int len,
+ int optflags,
+ PfxEntry* ppfx,
+ const FLAG needflag) {
+ struct hentry* he; // hash entry pointer
+ PfxEntry* ep = ppfx;
+
+ // if this suffix is being cross checked with a prefix
+ // but it does not support cross products skip it
+
+ if ((optflags & aeXPRODUCT) != 0 && (opts & aeXPRODUCT) == 0)
+ return NULL;
+
+ // upon entry suffix is 0 length or already matches the end of the word.
+ // So if the remaining root word has positive length
+ // and if there are enough chars in root word and added back strip chars
+ // to meet the number of characters conditions, then test it
+
+ int tmpl = len - appnd.size(); // length of tmpword
+
+ if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
+ (tmpl + strip.size() >= numconds)) {
+ // generate new root word by removing suffix and adding
+ // back any characters that would have been stripped or
+ // or null terminating the shorter string
+
+ std::string tmpword(word);
+ tmpword.resize(tmpl);
+ tmpword.append(strip);
+ tmpl += strip.size();
+
+ const char* beg = tmpword.c_str();
+ const char* end = beg + tmpl;
+
+ // now make sure all of the conditions on characters
+ // are met. Please see the appendix at the end of
+ // this file for more info on exactly what is being
+ // tested
+
+ // if all conditions are met then recall suffix_check
+
+ if (test_condition(end, beg)) {
+ if (ppfx) {
+ // handle conditional suffix
+ if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
+ he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, 0, NULL, NULL, 0, NULL,
+ (FLAG)aflag, needflag);
+ else
+ he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, optflags, ppfx, NULL, 0,
+ NULL, (FLAG)aflag, needflag);
+ } else {
+ he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, 0, NULL, NULL, 0, NULL,
+ (FLAG)aflag, needflag);
+ }
+ if (he)
+ return he;
+ }
+ }
+ return NULL;
+}
+
+// see if two-level suffix is present in the word
+char* SfxEntry::check_twosfx_morph(const char* word,
+ int len,
+ int optflags,
+ PfxEntry* ppfx,
+ const FLAG needflag) {
+ PfxEntry* ep = ppfx;
+ char* st;
+
+ char result[MAXLNLEN];
+
+ *result = '\0';
+
+ // if this suffix is being cross checked with a prefix
+ // but it does not support cross products skip it
+
+ if ((optflags & aeXPRODUCT) != 0 && (opts & aeXPRODUCT) == 0)
+ return NULL;
+
+ // upon entry suffix is 0 length or already matches the end of the word.
+ // So if the remaining root word has positive length
+ // and if there are enough chars in root word and added back strip chars
+ // to meet the number of characters conditions, then test it
+
+ int tmpl = len - appnd.size(); // length of tmpword
+
+ if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
+ (tmpl + strip.size() >= numconds)) {
+ // generate new root word by removing suffix and adding
+ // back any characters that would have been stripped or
+ // or null terminating the shorter string
+
+ std::string tmpword(word);
+ tmpword.resize(tmpl);
+ tmpword.append(strip);
+ tmpl += strip.size();
+
+ const char* beg = tmpword.c_str();
+ const char* end = beg + tmpl;
+
+ // now make sure all of the conditions on characters
+ // are met. Please see the appendix at the end of
+ // this file for more info on exactly what is being
+ // tested
+
+ // if all conditions are met then recall suffix_check
+
+ if (test_condition(end, beg)) {
+ if (ppfx) {
+ // handle conditional suffix
+ if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen)) {
+ st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, 0, NULL, aflag,
+ needflag);
+ if (st) {
+ if (ppfx->getMorph()) {
+ mystrcat(result, ppfx->getMorph(), MAXLNLEN);
+ mystrcat(result, " ", MAXLNLEN);
+ }
+ mystrcat(result, st, MAXLNLEN);
+ free(st);
+ mychomp(result);
+ }
+ } else {
+ st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, optflags, ppfx, aflag,
+ needflag);
+ if (st) {
+ mystrcat(result, st, MAXLNLEN);
+ free(st);
+ mychomp(result);
+ }
+ }
+ } else {
+ st =
+ pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, 0, NULL, aflag, needflag);
+ if (st) {
+ mystrcat(result, st, MAXLNLEN);
+ free(st);
+ mychomp(result);
+ }
+ }
+ if (*result)
+ return mystrdup(result);
+ }
+ }
+ return NULL;
+}
+
+// get next homonym with same affix
+struct hentry* SfxEntry::get_next_homonym(struct hentry* he,
+ int optflags,
+ PfxEntry* ppfx,
+ const FLAG cclass,
+ const FLAG needflag) {
+ PfxEntry* ep = ppfx;
+ FLAG eFlag = ep ? ep->getFlag() : FLAG_NULL;
+
+ while (he->next_homonym) {
+ he = he->next_homonym;
+ if ((TESTAFF(he->astr, aflag, he->alen) ||
+ (ep && ep->getCont() &&
+ TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
+ ((optflags & aeXPRODUCT) == 0 || TESTAFF(he->astr, eFlag, he->alen) ||
+ // handle conditional suffix
+ ((contclass) && TESTAFF(contclass, eFlag, contclasslen))) &&
+ // handle cont. class
+ ((!cclass) ||
+ ((contclass) && TESTAFF(contclass, cclass, contclasslen))) &&
+ // handle required flag
+ ((!needflag) ||
+ (TESTAFF(he->astr, needflag, he->alen) ||
+ ((contclass) && TESTAFF(contclass, needflag, contclasslen)))))
+ return he;
+ }
+ return NULL;
+}
+
+#if 0
+
+Appendix: Understanding Affix Code
+
+
+An affix is either a prefix or a suffix attached to root words to make
+other words.
+
+Basically a Prefix or a Suffix is set of AffEntry objects
+which store information about the prefix or suffix along
+with supporting routines to check if a word has a particular
+prefix or suffix or a combination.
+
+The structure affentry is defined as follows:
+
+struct affentry
+{
+ unsigned short aflag; // ID used to represent the affix
+ std::string strip; // string to strip before adding affix
+ std::string appnd; // the affix string to add
+ char numconds; // the number of conditions that must be met
+ char opts; // flag: aeXPRODUCT- combine both prefix and suffix
+ char conds[SETSIZE]; // array which encodes the conditions to be met
+};
+
+
+Here is a suffix borrowed from the en_US.aff file. This file
+is whitespace delimited.
+
+SFX D Y 4
+SFX D 0 e d
+SFX D y ied [^aeiou]y
+SFX D 0 ed [^ey]
+SFX D 0 ed [aeiou]y
+
+This information can be interpreted as follows:
+
+In the first line has 4 fields
+
+Field
+-----
+1 SFX - indicates this is a suffix
+2 D - is the name of the character flag which represents this suffix
+3 Y - indicates it can be combined with prefixes (cross product)
+4 4 - indicates that sequence of 4 affentry structures are needed to
+ properly store the affix information
+
+The remaining lines describe the unique information for the 4 SfxEntry
+objects that make up this affix. Each line can be interpreted
+as follows: (note fields 1 and 2 are as a check against line 1 info)
+
+Field
+-----
+1 SFX - indicates this is a suffix
+2 D - is the name of the character flag for this affix
+3 y - the string of chars to strip off before adding affix
+ (a 0 here indicates the NULL string)
+4 ied - the string of affix characters to add
+5 [^aeiou]y - the conditions which must be met before the affix
+ can be applied
+
+Field 5 is interesting. Since this is a suffix, field 5 tells us that
+there are 2 conditions that must be met. The first condition is that
+the next to the last character in the word must *NOT* be any of the
+following "a", "e", "i", "o" or "u". The second condition is that
+the last character of the word must end in "y".
+
+So how can we encode this information concisely and be able to
+test for both conditions in a fast manner? The answer is found
+but studying the wonderful ispell code of Geoff Kuenning, et.al.
+(now available under a normal BSD license).
+
+If we set up a conds array of 256 bytes indexed (0 to 255) and access it
+using a character (cast to an unsigned char) of a string, we have 8 bits
+of information we can store about that character. Specifically we
+could use each bit to say if that character is allowed in any of the
+last (or first for prefixes) 8 characters of the word.
+
+Basically, each character at one end of the word (up to the number
+of conditions) is used to index into the conds array and the resulting
+value found there says whether the that character is valid for a
+specific character position in the word.
+
+For prefixes, it does this by setting bit 0 if that char is valid
+in the first position, bit 1 if valid in the second position, and so on.
+
+If a bit is not set, then that char is not valid for that postion in the
+word.
+
+If working with suffixes bit 0 is used for the character closest
+to the front, bit 1 for the next character towards the end, ...,
+with bit numconds-1 representing the last char at the end of the string.
+
+Note: since entries in the conds[] are 8 bits, only 8 conditions
+(read that only 8 character positions) can be examined at one
+end of a word (the beginning for prefixes and the end for suffixes.
+
+So to make this clearer, lets encode the conds array values for the
+first two affentries for the suffix D described earlier.
+
+
+ For the first affentry:
+ numconds = 1 (only examine the last character)
+
+ conds['e'] = (1 << 0) (the word must end in an E)
+ all others are all 0
+
+ For the second affentry:
+ numconds = 2 (only examine the last two characters)
+
+ conds[X] = conds[X] | (1 << 0) (aeiou are not allowed)
+ where X is all characters *but* a, e, i, o, or u
+
+
+ conds['y'] = (1 << 1) (the last char must be a y)
+ all other bits for all other entries in the conds array are zero
+
+#endif
diff --git a/extensions/spellcheck/hunspell/src/affentry.hxx b/extensions/spellcheck/hunspell/src/affentry.hxx
new file mode 100644
index 000000000..6311d83ff
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/affentry.hxx
@@ -0,0 +1,232 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Hunspell, based on MySpell.
+ *
+ * The Initial Developers of the Original Code are
+ * Kevin Hendricks (MySpell) and Németh László (Hunspell).
+ * Portions created by the Initial Developers are Copyright (C) 2002-2005
+ * the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _AFFIX_HXX_
+#define _AFFIX_HXX_
+
+#include "hunvisapi.h"
+
+#include "atypes.hxx"
+#include "baseaffix.hxx"
+#include "affixmgr.hxx"
+
+/* A Prefix Entry */
+
+class LIBHUNSPELL_DLL_EXPORTED PfxEntry : protected AffEntry {
+ private:
+ PfxEntry(const PfxEntry&);
+ PfxEntry& operator=(const PfxEntry&);
+
+ private:
+ AffixMgr* pmyMgr;
+
+ PfxEntry* next;
+ PfxEntry* nexteq;
+ PfxEntry* nextne;
+ PfxEntry* flgnxt;
+
+ public:
+ PfxEntry(AffixMgr* pmgr, affentry* dp);
+ ~PfxEntry();
+
+ inline bool allowCross() { return ((opts & aeXPRODUCT) != 0); }
+ struct hentry* checkword(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag = FLAG_NULL);
+
+ struct hentry* check_twosfx(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag = FLAG_NULL);
+
+ char* check_morph(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag = FLAG_NULL);
+
+ char* check_twosfx_morph(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag = FLAG_NULL);
+
+ inline FLAG getFlag() { return aflag; }
+ inline const char* getKey() { return appnd.c_str(); }
+ char* add(const char* word, size_t len);
+
+ inline short getKeyLen() { return appnd.size(); }
+
+ inline const char* getMorph() { return morphcode; }
+
+ inline const unsigned short* getCont() { return contclass; }
+ inline short getContLen() { return contclasslen; }
+
+ inline PfxEntry* getNext() { return next; }
+ inline PfxEntry* getNextNE() { return nextne; }
+ inline PfxEntry* getNextEQ() { return nexteq; }
+ inline PfxEntry* getFlgNxt() { return flgnxt; }
+
+ inline void setNext(PfxEntry* ptr) { next = ptr; }
+ inline void setNextNE(PfxEntry* ptr) { nextne = ptr; }
+ inline void setNextEQ(PfxEntry* ptr) { nexteq = ptr; }
+ inline void setFlgNxt(PfxEntry* ptr) { flgnxt = ptr; }
+
+ inline char* nextchar(char* p);
+ inline int test_condition(const char* st);
+};
+
+/* A Suffix Entry */
+
+class LIBHUNSPELL_DLL_EXPORTED SfxEntry : protected AffEntry {
+ private:
+ SfxEntry(const SfxEntry&);
+ SfxEntry& operator=(const SfxEntry&);
+
+ private:
+ AffixMgr* pmyMgr;
+ std::string rappnd;
+
+ SfxEntry* next;
+ SfxEntry* nexteq;
+ SfxEntry* nextne;
+ SfxEntry* flgnxt;
+
+ SfxEntry* l_morph;
+ SfxEntry* r_morph;
+ SfxEntry* eq_morph;
+
+ public:
+ SfxEntry(AffixMgr* pmgr, affentry* dp);
+ ~SfxEntry();
+
+ inline bool allowCross() { return ((opts & aeXPRODUCT) != 0); }
+ struct hentry* checkword(const char* word,
+ int len,
+ int optflags,
+ PfxEntry* ppfx,
+ char** wlst,
+ int maxSug,
+ int* ns,
+ const FLAG cclass = FLAG_NULL,
+ const FLAG needflag = FLAG_NULL,
+ const FLAG badflag = FLAG_NULL);
+
+ struct hentry* check_twosfx(const char* word,
+ int len,
+ int optflags,
+ PfxEntry* ppfx,
+ const FLAG needflag = FLAG_NULL);
+
+ char* check_twosfx_morph(const char* word,
+ int len,
+ int optflags,
+ PfxEntry* ppfx,
+ const FLAG needflag = FLAG_NULL);
+ struct hentry* get_next_homonym(struct hentry* he);
+ struct hentry* get_next_homonym(struct hentry* word,
+ int optflags,
+ PfxEntry* ppfx,
+ const FLAG cclass,
+ const FLAG needflag);
+
+ inline FLAG getFlag() { return aflag; }
+ inline const char* getKey() { return rappnd.c_str(); }
+ char* add(const char* word, size_t len);
+
+ inline const char* getMorph() { return morphcode; }
+
+ inline const unsigned short* getCont() { return contclass; }
+ inline short getContLen() { return contclasslen; }
+ inline const char* getAffix() { return appnd.c_str(); }
+
+ inline short getKeyLen() { return appnd.size(); }
+
+ inline SfxEntry* getNext() { return next; }
+ inline SfxEntry* getNextNE() { return nextne; }
+ inline SfxEntry* getNextEQ() { return nexteq; }
+
+ inline SfxEntry* getLM() { return l_morph; }
+ inline SfxEntry* getRM() { return r_morph; }
+ inline SfxEntry* getEQM() { return eq_morph; }
+ inline SfxEntry* getFlgNxt() { return flgnxt; }
+
+ inline void setNext(SfxEntry* ptr) { next = ptr; }
+ inline void setNextNE(SfxEntry* ptr) { nextne = ptr; }
+ inline void setNextEQ(SfxEntry* ptr) { nexteq = ptr; }
+ inline void setFlgNxt(SfxEntry* ptr) { flgnxt = ptr; }
+
+ inline char* nextchar(char* p);
+ inline int test_condition(const char* st, const char* begin);
+};
+
+#endif
diff --git a/extensions/spellcheck/hunspell/src/affixmgr.cxx b/extensions/spellcheck/hunspell/src/affixmgr.cxx
new file mode 100644
index 000000000..d6bb67798
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/affixmgr.cxx
@@ -0,0 +1,5117 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Hunspell, based on MySpell.
+ *
+ * The Initial Developers of the Original Code are
+ * Kevin Hendricks (MySpell) and Németh László (Hunspell).
+ * Portions created by the Initial Developers are Copyright (C) 2002-2005
+ * the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+
+#include <algorithm>
+#include <limits>
+#include <string>
+#include <vector>
+
+#include "affixmgr.hxx"
+#include "affentry.hxx"
+#include "langnum.hxx"
+
+#include "csutil.hxx"
+
+AffixMgr::AffixMgr(const char* affpath,
+ HashMgr** ptr,
+ int* md,
+ const char* key) {
+ // register hash manager and load affix data from aff file
+ pHMgr = ptr[0];
+ alldic = ptr;
+ maxdic = md;
+ keystring = NULL;
+ trystring = NULL;
+ encoding = NULL;
+ csconv = NULL;
+ utf8 = 0;
+ complexprefixes = 0;
+ maptable = NULL;
+ nummap = 0;
+ breaktable = NULL;
+ numbreak = -1;
+ reptable = NULL;
+ numrep = 0;
+ iconvtable = NULL;
+ oconvtable = NULL;
+ checkcpdtable = NULL;
+ // allow simplified compound forms (see 3rd field of CHECKCOMPOUNDPATTERN)
+ simplifiedcpd = 0;
+ numcheckcpd = 0;
+ defcpdtable = NULL;
+ numdefcpd = 0;
+ phone = NULL;
+ compoundflag = FLAG_NULL; // permits word in compound forms
+ compoundbegin = FLAG_NULL; // may be first word in compound forms
+ compoundmiddle = FLAG_NULL; // may be middle word in compound forms
+ compoundend = FLAG_NULL; // may be last word in compound forms
+ compoundroot = FLAG_NULL; // compound word signing flag
+ compoundpermitflag = FLAG_NULL; // compound permitting flag for suffixed word
+ compoundforbidflag = FLAG_NULL; // compound fordidden flag for suffixed word
+ compoundmoresuffixes = 0; // allow more suffixes within compound words
+ checkcompounddup = 0; // forbid double words in compounds
+ checkcompoundrep = 0; // forbid bad compounds (may be non compound word with
+ // a REP substitution)
+ checkcompoundcase =
+ 0; // forbid upper and lowercase combinations at word bounds
+ checkcompoundtriple = 0; // forbid compounds with triple letters
+ simplifiedtriple = 0; // allow simplified triple letters in compounds
+ // (Schiff+fahrt -> Schiffahrt)
+ forbiddenword = FORBIDDENWORD; // forbidden word signing flag
+ nosuggest = FLAG_NULL; // don't suggest words signed with NOSUGGEST flag
+ nongramsuggest = FLAG_NULL;
+ lang = NULL; // language
+ langnum = 0; // language code (see http://l10n.openoffice.org/languages.html)
+ needaffix = FLAG_NULL; // forbidden root, allowed only with suffixes
+ cpdwordmax = -1; // default: unlimited wordcount in compound words
+ cpdmin = -1; // undefined
+ cpdmaxsyllable = 0; // default: unlimited syllablecount in compound words
+ cpdvowels = NULL; // vowels (for calculating of Hungarian compounding limit,
+ // O(n) search! XXX)
+ cpdvowels_utf16 =
+ NULL; // vowels for UTF-8 encoding (bsearch instead of O(n) search)
+ cpdvowels_utf16_len = 0; // vowels
+ pfxappnd = NULL; // previous prefix for counting syllables of the prefix BUG
+ sfxappnd = NULL; // previous suffix for counting syllables of the suffix BUG
+ sfxextra = 0; // modifier for syllable count of sfxappnd BUG
+ cpdsyllablenum = NULL; // syllable count incrementing flag
+ checknum = 0; // checking numbers, and word with numbers
+ wordchars = NULL; // letters + spec. word characters
+ ignorechars = NULL; // letters + spec. word characters
+ version = NULL; // affix and dictionary file version string
+ havecontclass = 0; // flags of possible continuing classes (double affix)
+ // LEMMA_PRESENT: not put root into the morphological output. Lemma presents
+ // in morhological description in dictionary file. It's often combined with
+ // PSEUDOROOT.
+ lemma_present = FLAG_NULL;
+ circumfix = FLAG_NULL;
+ onlyincompound = FLAG_NULL;
+ maxngramsugs = -1; // undefined
+ maxdiff = -1; // undefined
+ onlymaxdiff = 0;
+ maxcpdsugs = -1; // undefined
+ nosplitsugs = 0;
+ sugswithdots = 0;
+ keepcase = 0;
+ forceucase = 0;
+ warn = 0;
+ forbidwarn = 0;
+ checksharps = 0;
+ substandard = FLAG_NULL;
+ fullstrip = 0;
+
+ sfx = NULL;
+ pfx = NULL;
+
+ for (int i = 0; i < SETSIZE; i++) {
+ pStart[i] = NULL;
+ sStart[i] = NULL;
+ pFlag[i] = NULL;
+ sFlag[i] = NULL;
+ }
+
+ for (int j = 0; j < CONTSIZE; j++) {
+ contclasses[j] = 0;
+ }
+
+ if (parse_file(affpath, key)) {
+ HUNSPELL_WARNING(stderr, "Failure loading aff file %s\n", affpath);
+ }
+
+ if (cpdmin == -1)
+ cpdmin = MINCPDLEN;
+}
+
+AffixMgr::~AffixMgr() {
+ // pass through linked prefix entries and clean up
+ for (int i = 0; i < SETSIZE; i++) {
+ pFlag[i] = NULL;
+ PfxEntry* ptr = pStart[i];
+ PfxEntry* nptr = NULL;
+ while (ptr) {
+ nptr = ptr->getNext();
+ delete (ptr);
+ ptr = nptr;
+ nptr = NULL;
+ }
+ }
+
+ // pass through linked suffix entries and clean up
+ for (int j = 0; j < SETSIZE; j++) {
+ sFlag[j] = NULL;
+ SfxEntry* ptr = sStart[j];
+ SfxEntry* nptr = NULL;
+ while (ptr) {
+ nptr = ptr->getNext();
+ delete (ptr);
+ ptr = nptr;
+ nptr = NULL;
+ }
+ sStart[j] = NULL;
+ }
+
+ if (keystring)
+ free(keystring);
+ keystring = NULL;
+ if (trystring)
+ free(trystring);
+ trystring = NULL;
+ if (encoding)
+ free(encoding);
+ encoding = NULL;
+ if (maptable) {
+ for (int j = 0; j < nummap; j++) {
+ for (int k = 0; k < maptable[j].len; k++) {
+ if (maptable[j].set[k])
+ free(maptable[j].set[k]);
+ }
+ free(maptable[j].set);
+ maptable[j].set = NULL;
+ maptable[j].len = 0;
+ }
+ free(maptable);
+ maptable = NULL;
+ }
+ nummap = 0;
+ if (breaktable) {
+ for (int j = 0; j < numbreak; j++) {
+ if (breaktable[j])
+ free(breaktable[j]);
+ breaktable[j] = NULL;
+ }
+ free(breaktable);
+ breaktable = NULL;
+ }
+ numbreak = 0;
+ if (reptable) {
+ for (int j = 0; j < numrep; j++) {
+ free(reptable[j].pattern);
+ free(reptable[j].pattern2);
+ }
+ free(reptable);
+ reptable = NULL;
+ }
+ if (iconvtable)
+ delete iconvtable;
+ if (oconvtable)
+ delete oconvtable;
+ if (phone && phone->rules) {
+ for (int j = 0; j < phone->num + 1; j++) {
+ free(phone->rules[j * 2]);
+ free(phone->rules[j * 2 + 1]);
+ }
+ free(phone->rules);
+ free(phone);
+ phone = NULL;
+ }
+
+ if (defcpdtable) {
+ for (int j = 0; j < numdefcpd; j++) {
+ free(defcpdtable[j].def);
+ defcpdtable[j].def = NULL;
+ }
+ free(defcpdtable);
+ defcpdtable = NULL;
+ }
+ numrep = 0;
+ if (checkcpdtable) {
+ for (int j = 0; j < numcheckcpd; j++) {
+ free(checkcpdtable[j].pattern);
+ free(checkcpdtable[j].pattern2);
+ free(checkcpdtable[j].pattern3);
+ checkcpdtable[j].pattern = NULL;
+ checkcpdtable[j].pattern2 = NULL;
+ checkcpdtable[j].pattern3 = NULL;
+ }
+ free(checkcpdtable);
+ checkcpdtable = NULL;
+ }
+ numcheckcpd = 0;
+ FREE_FLAG(compoundflag);
+ FREE_FLAG(compoundbegin);
+ FREE_FLAG(compoundmiddle);
+ FREE_FLAG(compoundend);
+ FREE_FLAG(compoundpermitflag);
+ FREE_FLAG(compoundforbidflag);
+ FREE_FLAG(compoundroot);
+ FREE_FLAG(forbiddenword);
+ FREE_FLAG(nosuggest);
+ FREE_FLAG(nongramsuggest);
+ FREE_FLAG(needaffix);
+ FREE_FLAG(lemma_present);
+ FREE_FLAG(circumfix);
+ FREE_FLAG(onlyincompound);
+
+ cpdwordmax = 0;
+ pHMgr = NULL;
+ cpdmin = 0;
+ cpdmaxsyllable = 0;
+ if (cpdvowels)
+ free(cpdvowels);
+ if (cpdvowels_utf16)
+ free(cpdvowels_utf16);
+ if (cpdsyllablenum)
+ free(cpdsyllablenum);
+ free_utf_tbl();
+ if (lang)
+ free(lang);
+ if (wordchars)
+ free(wordchars);
+ if (ignorechars)
+ free(ignorechars);
+ if (version)
+ free(version);
+ checknum = 0;
+#ifdef MOZILLA_CLIENT
+ delete[] csconv;
+#endif
+}
+
+void AffixMgr::finishFileMgr(FileMgr* afflst) {
+ delete afflst;
+
+ // convert affix trees to sorted list
+ process_pfx_tree_to_list();
+ process_sfx_tree_to_list();
+}
+
+// read in aff file and build up prefix and suffix entry objects
+int AffixMgr::parse_file(const char* affpath, const char* key) {
+ char* line; // io buffers
+ char ft; // affix type
+
+ // checking flag duplication
+ char dupflags[CONTSIZE];
+ char dupflags_ini = 1;
+
+ // first line indicator for removing byte order mark
+ int firstline = 1;
+
+ // open the affix file
+ FileMgr* afflst = new FileMgr(affpath, key);
+ if (!afflst) {
+ HUNSPELL_WARNING(
+ stderr, "error: could not open affix description file %s\n", affpath);
+ return 1;
+ }
+
+ // step one is to parse the affix file building up the internal
+ // affix data structures
+
+ // read in each line ignoring any that do not
+ // start with a known line type indicator
+ while ((line = afflst->getline()) != NULL) {
+ mychomp(line);
+
+ /* remove byte order mark */
+ if (firstline) {
+ firstline = 0;
+ // Affix file begins with byte order mark: possible incompatibility with
+ // old Hunspell versions
+ if (strncmp(line, "\xEF\xBB\xBF", 3) == 0) {
+ memmove(line, line + 3, strlen(line + 3) + 1);
+ }
+ }
+
+ /* parse in the keyboard string */
+ if (strncmp(line, "KEY", 3) == 0) {
+ if (parse_string(line, &keystring, afflst->getlinenum())) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the try string */
+ if (strncmp(line, "TRY", 3) == 0) {
+ if (parse_string(line, &trystring, afflst->getlinenum())) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the name of the character set used by the .dict and .aff */
+ if (strncmp(line, "SET", 3) == 0) {
+ if (parse_string(line, &encoding, afflst->getlinenum())) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ if (strcmp(encoding, "UTF-8") == 0) {
+ utf8 = 1;
+#ifndef OPENOFFICEORG
+#ifndef MOZILLA_CLIENT
+ if (initialize_utf_tbl()) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+#endif
+#endif
+ }
+ }
+
+ /* parse COMPLEXPREFIXES for agglutinative languages with right-to-left
+ * writing system */
+ if (strncmp(line, "COMPLEXPREFIXES", 15) == 0)
+ complexprefixes = 1;
+
+ /* parse in the flag used by the controlled compound words */
+ if (strncmp(line, "COMPOUNDFLAG", 12) == 0) {
+ if (parse_flag(line, &compoundflag, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the flag used by compound words */
+ if (strncmp(line, "COMPOUNDBEGIN", 13) == 0) {
+ if (complexprefixes) {
+ if (parse_flag(line, &compoundend, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ } else {
+ if (parse_flag(line, &compoundbegin, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+ }
+
+ /* parse in the flag used by compound words */
+ if (strncmp(line, "COMPOUNDMIDDLE", 14) == 0) {
+ if (parse_flag(line, &compoundmiddle, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+ /* parse in the flag used by compound words */
+ if (strncmp(line, "COMPOUNDEND", 11) == 0) {
+ if (complexprefixes) {
+ if (parse_flag(line, &compoundbegin, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ } else {
+ if (parse_flag(line, &compoundend, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+ }
+
+ /* parse in the data used by compound_check() method */
+ if (strncmp(line, "COMPOUNDWORDMAX", 15) == 0) {
+ if (parse_num(line, &cpdwordmax, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the flag sign compounds in dictionary */
+ if (strncmp(line, "COMPOUNDROOT", 12) == 0) {
+ if (parse_flag(line, &compoundroot, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the flag used by compound_check() method */
+ if (strncmp(line, "COMPOUNDPERMITFLAG", 18) == 0) {
+ if (parse_flag(line, &compoundpermitflag, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the flag used by compound_check() method */
+ if (strncmp(line, "COMPOUNDFORBIDFLAG", 18) == 0) {
+ if (parse_flag(line, &compoundforbidflag, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ if (strncmp(line, "COMPOUNDMORESUFFIXES", 20) == 0) {
+ compoundmoresuffixes = 1;
+ }
+
+ if (strncmp(line, "CHECKCOMPOUNDDUP", 16) == 0) {
+ checkcompounddup = 1;
+ }
+
+ if (strncmp(line, "CHECKCOMPOUNDREP", 16) == 0) {
+ checkcompoundrep = 1;
+ }
+
+ if (strncmp(line, "CHECKCOMPOUNDTRIPLE", 19) == 0) {
+ checkcompoundtriple = 1;
+ }
+
+ if (strncmp(line, "SIMPLIFIEDTRIPLE", 16) == 0) {
+ simplifiedtriple = 1;
+ }
+
+ if (strncmp(line, "CHECKCOMPOUNDCASE", 17) == 0) {
+ checkcompoundcase = 1;
+ }
+
+ if (strncmp(line, "NOSUGGEST", 9) == 0) {
+ if (parse_flag(line, &nosuggest, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ if (strncmp(line, "NONGRAMSUGGEST", 14) == 0) {
+ if (parse_flag(line, &nongramsuggest, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the flag used by forbidden words */
+ if (strncmp(line, "FORBIDDENWORD", 13) == 0) {
+ if (parse_flag(line, &forbiddenword, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the flag used by forbidden words */
+ if (strncmp(line, "LEMMA_PRESENT", 13) == 0) {
+ if (parse_flag(line, &lemma_present, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the flag used by circumfixes */
+ if (strncmp(line, "CIRCUMFIX", 9) == 0) {
+ if (parse_flag(line, &circumfix, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the flag used by fogemorphemes */
+ if (strncmp(line, "ONLYINCOMPOUND", 14) == 0) {
+ if (parse_flag(line, &onlyincompound, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the flag used by `needaffixs' */
+ if (strncmp(line, "PSEUDOROOT", 10) == 0) {
+ if (parse_flag(line, &needaffix, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the flag used by `needaffixs' */
+ if (strncmp(line, "NEEDAFFIX", 9) == 0) {
+ if (parse_flag(line, &needaffix, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the minimal length for words in compounds */
+ if (strncmp(line, "COMPOUNDMIN", 11) == 0) {
+ if (parse_num(line, &cpdmin, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ if (cpdmin < 1)
+ cpdmin = 1;
+ }
+
+ /* parse in the max. words and syllables in compounds */
+ if (strncmp(line, "COMPOUNDSYLLABLE", 16) == 0) {
+ if (parse_cpdsyllable(line, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the flag used by compound_check() method */
+ if (strncmp(line, "SYLLABLENUM", 11) == 0) {
+ if (parse_string(line, &cpdsyllablenum, afflst->getlinenum())) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the flag used by the controlled compound words */
+ if (strncmp(line, "CHECKNUM", 8) == 0) {
+ checknum = 1;
+ }
+
+ /* parse in the extra word characters */
+ if (strncmp(line, "WORDCHARS", 9) == 0) {
+ if (!parse_array(line, &wordchars, wordchars_utf16,
+ utf8, afflst->getlinenum())) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the ignored characters (for example, Arabic optional diacretics
+ * charachters */
+ if (strncmp(line, "IGNORE", 6) == 0) {
+ if (!parse_array(line, &ignorechars, ignorechars_utf16,
+ utf8, afflst->getlinenum())) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the typical fault correcting table */
+ if (strncmp(line, "REP", 3) == 0) {
+ if (parse_reptable(line, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the input conversion table */
+ if (strncmp(line, "ICONV", 5) == 0) {
+ if (parse_convtable(line, afflst, &iconvtable, "ICONV")) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the input conversion table */
+ if (strncmp(line, "OCONV", 5) == 0) {
+ if (parse_convtable(line, afflst, &oconvtable, "OCONV")) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the phonetic translation table */
+ if (strncmp(line, "PHONE", 5) == 0) {
+ if (parse_phonetable(line, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the checkcompoundpattern table */
+ if (strncmp(line, "CHECKCOMPOUNDPATTERN", 20) == 0) {
+ if (parse_checkcpdtable(line, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the defcompound table */
+ if (strncmp(line, "COMPOUNDRULE", 12) == 0) {
+ if (parse_defcpdtable(line, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the related character map table */
+ if (strncmp(line, "MAP", 3) == 0) {
+ if (parse_maptable(line, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the word breakpoints table */
+ if (strncmp(line, "BREAK", 5) == 0) {
+ if (parse_breaktable(line, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the language for language specific codes */
+ if (strncmp(line, "LANG", 4) == 0) {
+ if (parse_string(line, &lang, afflst->getlinenum())) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ langnum = get_lang_num(lang);
+ }
+
+ if (strncmp(line, "VERSION", 7) == 0) {
+ for (line = line + 7; *line == ' ' || *line == '\t'; line++)
+ ;
+ version = mystrdup(line);
+ }
+
+ if (strncmp(line, "MAXNGRAMSUGS", 12) == 0) {
+ if (parse_num(line, &maxngramsugs, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ if (strncmp(line, "ONLYMAXDIFF", 11) == 0)
+ onlymaxdiff = 1;
+
+ if (strncmp(line, "MAXDIFF", 7) == 0) {
+ if (parse_num(line, &maxdiff, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ if (strncmp(line, "MAXCPDSUGS", 10) == 0) {
+ if (parse_num(line, &maxcpdsugs, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ if (strncmp(line, "NOSPLITSUGS", 11) == 0) {
+ nosplitsugs = 1;
+ }
+
+ if (strncmp(line, "FULLSTRIP", 9) == 0) {
+ fullstrip = 1;
+ }
+
+ if (strncmp(line, "SUGSWITHDOTS", 12) == 0) {
+ sugswithdots = 1;
+ }
+
+ /* parse in the flag used by forbidden words */
+ if (strncmp(line, "KEEPCASE", 8) == 0) {
+ if (parse_flag(line, &keepcase, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the flag used by `forceucase' */
+ if (strncmp(line, "FORCEUCASE", 10) == 0) {
+ if (parse_flag(line, &forceucase, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the flag used by `warn' */
+ if (strncmp(line, "WARN", 4) == 0) {
+ if (parse_flag(line, &warn, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ if (strncmp(line, "FORBIDWARN", 10) == 0) {
+ forbidwarn = 1;
+ }
+
+ /* parse in the flag used by the affix generator */
+ if (strncmp(line, "SUBSTANDARD", 11) == 0) {
+ if (parse_flag(line, &substandard, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ if (strncmp(line, "CHECKSHARPS", 11) == 0) {
+ checksharps = 1;
+ }
+
+ /* parse this affix: P - prefix, S - suffix */
+ ft = ' ';
+ if (strncmp(line, "PFX", 3) == 0)
+ ft = complexprefixes ? 'S' : 'P';
+ if (strncmp(line, "SFX", 3) == 0)
+ ft = complexprefixes ? 'P' : 'S';
+ if (ft != ' ') {
+ if (dupflags_ini) {
+ memset(dupflags, 0, sizeof(dupflags));
+ dupflags_ini = 0;
+ }
+ if (parse_affix(line, ft, afflst, dupflags)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+ }
+
+ finishFileMgr(afflst);
+ // affix trees are sorted now
+
+ // now we can speed up performance greatly taking advantage of the
+ // relationship between the affixes and the idea of "subsets".
+
+ // View each prefix as a potential leading subset of another and view
+ // each suffix (reversed) as a potential trailing subset of another.
+
+ // To illustrate this relationship if we know the prefix "ab" is found in the
+ // word to examine, only prefixes that "ab" is a leading subset of need be
+ // examined.
+ // Furthermore is "ab" is not present then none of the prefixes that "ab" is
+ // is a subset need be examined.
+ // The same argument goes for suffix string that are reversed.
+
+ // Then to top this off why not examine the first char of the word to quickly
+ // limit the set of prefixes to examine (i.e. the prefixes to examine must
+ // be leading supersets of the first character of the word (if they exist)
+
+ // To take advantage of this "subset" relationship, we need to add two links
+ // from entry. One to take next if the current prefix is found (call it
+ // nexteq)
+ // and one to take next if the current prefix is not found (call it nextne).
+
+ // Since we have built ordered lists, all that remains is to properly
+ // initialize
+ // the nextne and nexteq pointers that relate them
+
+ process_pfx_order();
+ process_sfx_order();
+
+ /* get encoding for CHECKCOMPOUNDCASE */
+ if (!utf8) {
+ char* enc = get_encoding();
+ csconv = get_current_cs(enc);
+ free(enc);
+ enc = NULL;
+
+ std::string expw;
+ if (wordchars) {
+ expw.assign(wordchars);
+ free(wordchars);
+ }
+
+ for (int i = 0; i <= 255; i++) {
+ if ((csconv[i].cupper != csconv[i].clower) &&
+ (expw.find((char)i) == std::string::npos)) {
+ expw.push_back((char)i);
+ }
+ }
+
+ wordchars = mystrdup(expw.c_str());
+ }
+
+ // default BREAK definition
+ if (numbreak == -1) {
+ breaktable = (char**)malloc(sizeof(char*) * 3);
+ if (!breaktable)
+ return 1;
+ breaktable[0] = mystrdup("-");
+ breaktable[1] = mystrdup("^-");
+ breaktable[2] = mystrdup("-$");
+ if (breaktable[0] && breaktable[1] && breaktable[2])
+ numbreak = 3;
+ }
+ return 0;
+}
+
+// we want to be able to quickly access prefix information
+// both by prefix flag, and sorted by prefix string itself
+// so we need to set up two indexes
+
+int AffixMgr::build_pfxtree(PfxEntry* pfxptr) {
+ PfxEntry* ptr;
+ PfxEntry* pptr;
+ PfxEntry* ep = pfxptr;
+
+ // get the right starting points
+ const char* key = ep->getKey();
+ const unsigned char flg = (unsigned char)(ep->getFlag() & 0x00FF);
+
+ // first index by flag which must exist
+ ptr = pFlag[flg];
+ ep->setFlgNxt(ptr);
+ pFlag[flg] = ep;
+
+ // handle the special case of null affix string
+ if (strlen(key) == 0) {
+ // always inset them at head of list at element 0
+ ptr = pStart[0];
+ ep->setNext(ptr);
+ pStart[0] = ep;
+ return 0;
+ }
+
+ // now handle the normal case
+ ep->setNextEQ(NULL);
+ ep->setNextNE(NULL);
+
+ unsigned char sp = *((const unsigned char*)key);
+ ptr = pStart[sp];
+
+ // handle the first insert
+ if (!ptr) {
+ pStart[sp] = ep;
+ return 0;
+ }
+
+ // otherwise use binary tree insertion so that a sorted
+ // list can easily be generated later
+ pptr = NULL;
+ for (;;) {
+ pptr = ptr;
+ if (strcmp(ep->getKey(), ptr->getKey()) <= 0) {
+ ptr = ptr->getNextEQ();
+ if (!ptr) {
+ pptr->setNextEQ(ep);
+ break;
+ }
+ } else {
+ ptr = ptr->getNextNE();
+ if (!ptr) {
+ pptr->setNextNE(ep);
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+// we want to be able to quickly access suffix information
+// both by suffix flag, and sorted by the reverse of the
+// suffix string itself; so we need to set up two indexes
+int AffixMgr::build_sfxtree(SfxEntry* sfxptr) {
+ SfxEntry* ptr;
+ SfxEntry* pptr;
+ SfxEntry* ep = sfxptr;
+
+ /* get the right starting point */
+ const char* key = ep->getKey();
+ const unsigned char flg = (unsigned char)(ep->getFlag() & 0x00FF);
+
+ // first index by flag which must exist
+ ptr = sFlag[flg];
+ ep->setFlgNxt(ptr);
+ sFlag[flg] = ep;
+
+ // next index by affix string
+
+ // handle the special case of null affix string
+ if (strlen(key) == 0) {
+ // always inset them at head of list at element 0
+ ptr = sStart[0];
+ ep->setNext(ptr);
+ sStart[0] = ep;
+ return 0;
+ }
+
+ // now handle the normal case
+ ep->setNextEQ(NULL);
+ ep->setNextNE(NULL);
+
+ unsigned char sp = *((const unsigned char*)key);
+ ptr = sStart[sp];
+
+ // handle the first insert
+ if (!ptr) {
+ sStart[sp] = ep;
+ return 0;
+ }
+
+ // otherwise use binary tree insertion so that a sorted
+ // list can easily be generated later
+ pptr = NULL;
+ for (;;) {
+ pptr = ptr;
+ if (strcmp(ep->getKey(), ptr->getKey()) <= 0) {
+ ptr = ptr->getNextEQ();
+ if (!ptr) {
+ pptr->setNextEQ(ep);
+ break;
+ }
+ } else {
+ ptr = ptr->getNextNE();
+ if (!ptr) {
+ pptr->setNextNE(ep);
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+// convert from binary tree to sorted list
+int AffixMgr::process_pfx_tree_to_list() {
+ for (int i = 1; i < SETSIZE; i++) {
+ pStart[i] = process_pfx_in_order(pStart[i], NULL);
+ }
+ return 0;
+}
+
+PfxEntry* AffixMgr::process_pfx_in_order(PfxEntry* ptr, PfxEntry* nptr) {
+ if (ptr) {
+ nptr = process_pfx_in_order(ptr->getNextNE(), nptr);
+ ptr->setNext(nptr);
+ nptr = process_pfx_in_order(ptr->getNextEQ(), ptr);
+ }
+ return nptr;
+}
+
+// convert from binary tree to sorted list
+int AffixMgr::process_sfx_tree_to_list() {
+ for (int i = 1; i < SETSIZE; i++) {
+ sStart[i] = process_sfx_in_order(sStart[i], NULL);
+ }
+ return 0;
+}
+
+SfxEntry* AffixMgr::process_sfx_in_order(SfxEntry* ptr, SfxEntry* nptr) {
+ if (ptr) {
+ nptr = process_sfx_in_order(ptr->getNextNE(), nptr);
+ ptr->setNext(nptr);
+ nptr = process_sfx_in_order(ptr->getNextEQ(), ptr);
+ }
+ return nptr;
+}
+
+// reinitialize the PfxEntry links NextEQ and NextNE to speed searching
+// using the idea of leading subsets this time
+int AffixMgr::process_pfx_order() {
+ PfxEntry* ptr;
+
+ // loop through each prefix list starting point
+ for (int i = 1; i < SETSIZE; i++) {
+ ptr = pStart[i];
+
+ // look through the remainder of the list
+ // and find next entry with affix that
+ // the current one is not a subset of
+ // mark that as destination for NextNE
+ // use next in list that you are a subset
+ // of as NextEQ
+
+ for (; ptr != NULL; ptr = ptr->getNext()) {
+ PfxEntry* nptr = ptr->getNext();
+ for (; nptr != NULL; nptr = nptr->getNext()) {
+ if (!isSubset(ptr->getKey(), nptr->getKey()))
+ break;
+ }
+ ptr->setNextNE(nptr);
+ ptr->setNextEQ(NULL);
+ if ((ptr->getNext()) &&
+ isSubset(ptr->getKey(), (ptr->getNext())->getKey()))
+ ptr->setNextEQ(ptr->getNext());
+ }
+
+ // now clean up by adding smart search termination strings:
+ // if you are already a superset of the previous prefix
+ // but not a subset of the next, search can end here
+ // so set NextNE properly
+
+ ptr = pStart[i];
+ for (; ptr != NULL; ptr = ptr->getNext()) {
+ PfxEntry* nptr = ptr->getNext();
+ PfxEntry* mptr = NULL;
+ for (; nptr != NULL; nptr = nptr->getNext()) {
+ if (!isSubset(ptr->getKey(), nptr->getKey()))
+ break;
+ mptr = nptr;
+ }
+ if (mptr)
+ mptr->setNextNE(NULL);
+ }
+ }
+ return 0;
+}
+
+// initialize the SfxEntry links NextEQ and NextNE to speed searching
+// using the idea of leading subsets this time
+int AffixMgr::process_sfx_order() {
+ SfxEntry* ptr;
+
+ // loop through each prefix list starting point
+ for (int i = 1; i < SETSIZE; i++) {
+ ptr = sStart[i];
+
+ // look through the remainder of the list
+ // and find next entry with affix that
+ // the current one is not a subset of
+ // mark that as destination for NextNE
+ // use next in list that you are a subset
+ // of as NextEQ
+
+ for (; ptr != NULL; ptr = ptr->getNext()) {
+ SfxEntry* nptr = ptr->getNext();
+ for (; nptr != NULL; nptr = nptr->getNext()) {
+ if (!isSubset(ptr->getKey(), nptr->getKey()))
+ break;
+ }
+ ptr->setNextNE(nptr);
+ ptr->setNextEQ(NULL);
+ if ((ptr->getNext()) &&
+ isSubset(ptr->getKey(), (ptr->getNext())->getKey()))
+ ptr->setNextEQ(ptr->getNext());
+ }
+
+ // now clean up by adding smart search termination strings:
+ // if you are already a superset of the previous suffix
+ // but not a subset of the next, search can end here
+ // so set NextNE properly
+
+ ptr = sStart[i];
+ for (; ptr != NULL; ptr = ptr->getNext()) {
+ SfxEntry* nptr = ptr->getNext();
+ SfxEntry* mptr = NULL;
+ for (; nptr != NULL; nptr = nptr->getNext()) {
+ if (!isSubset(ptr->getKey(), nptr->getKey()))
+ break;
+ mptr = nptr;
+ }
+ if (mptr)
+ mptr->setNextNE(NULL);
+ }
+ }
+ return 0;
+}
+
+// add flags to the result for dictionary debugging
+void AffixMgr::debugflag(char* result, unsigned short flag) {
+ char* st = encode_flag(flag);
+ mystrcat(result, " ", MAXLNLEN);
+ mystrcat(result, MORPH_FLAG, MAXLNLEN);
+ if (st) {
+ mystrcat(result, st, MAXLNLEN);
+ free(st);
+ }
+}
+
+// add flags to the result for dictionary debugging
+std::string& AffixMgr::debugflag(std::string& result, unsigned short flag) {
+ char* st = encode_flag(flag);
+ result.append(" ");
+ result.append(MORPH_FLAG);
+ if (st) {
+ result.append(st);
+ free(st);
+ }
+ return result;
+}
+
+// calculate the character length of the condition
+int AffixMgr::condlen(const char* st) {
+ int l = 0;
+ bool group = false;
+ for (; *st; st++) {
+ if (*st == '[') {
+ group = true;
+ l++;
+ } else if (*st == ']')
+ group = false;
+ else if (!group && (!utf8 || (!(*st & 0x80) || ((*st & 0xc0) == 0x80))))
+ l++;
+ }
+ return l;
+}
+
+int AffixMgr::encodeit(affentry& entry, const char* cs) {
+ if (strcmp(cs, ".") != 0) {
+ entry.numconds = (char)condlen(cs);
+ // coverity[buffer_size_warning] - deliberate use of lack of end of conds
+ // padded by strncpy as long condition flag
+ strncpy(entry.c.conds, cs, MAXCONDLEN);
+ if (entry.c.conds[MAXCONDLEN - 1] && cs[MAXCONDLEN]) {
+ entry.opts += aeLONGCOND;
+ entry.c.l.conds2 = mystrdup(cs + MAXCONDLEN_1);
+ if (!entry.c.l.conds2)
+ return 1;
+ }
+ } else {
+ entry.numconds = 0;
+ entry.c.conds[0] = '\0';
+ }
+ return 0;
+}
+
+// return 1 if s1 is a leading subset of s2 (dots are for infixes)
+inline int AffixMgr::isSubset(const char* s1, const char* s2) {
+ while (((*s1 == *s2) || (*s1 == '.')) && (*s1 != '\0')) {
+ s1++;
+ s2++;
+ }
+ return (*s1 == '\0');
+}
+
+// check word for prefixes
+struct hentry* AffixMgr::prefix_check(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag) {
+ struct hentry* rv = NULL;
+
+ pfx = NULL;
+ pfxappnd = NULL;
+ sfxappnd = NULL;
+ sfxextra = 0;
+
+ // first handle the special case of 0 length prefixes
+ PfxEntry* pe = pStart[0];
+ while (pe) {
+ if (
+ // fogemorpheme
+ ((in_compound != IN_CPD_NOT) ||
+ !(pe->getCont() &&
+ (TESTAFF(pe->getCont(), onlyincompound, pe->getContLen())))) &&
+ // permit prefixes in compounds
+ ((in_compound != IN_CPD_END) ||
+ (pe->getCont() &&
+ (TESTAFF(pe->getCont(), compoundpermitflag, pe->getContLen()))))) {
+ // check prefix
+ rv = pe->checkword(word, len, in_compound, needflag);
+ if (rv) {
+ pfx = pe; // BUG: pfx not stateless
+ return rv;
+ }
+ }
+ pe = pe->getNext();
+ }
+
+ // now handle the general case
+ unsigned char sp = *((const unsigned char*)word);
+ PfxEntry* pptr = pStart[sp];
+
+ while (pptr) {
+ if (isSubset(pptr->getKey(), word)) {
+ if (
+ // fogemorpheme
+ ((in_compound != IN_CPD_NOT) ||
+ !(pptr->getCont() &&
+ (TESTAFF(pptr->getCont(), onlyincompound, pptr->getContLen())))) &&
+ // permit prefixes in compounds
+ ((in_compound != IN_CPD_END) ||
+ (pptr->getCont() && (TESTAFF(pptr->getCont(), compoundpermitflag,
+ pptr->getContLen()))))) {
+ // check prefix
+ rv = pptr->checkword(word, len, in_compound, needflag);
+ if (rv) {
+ pfx = pptr; // BUG: pfx not stateless
+ return rv;
+ }
+ }
+ pptr = pptr->getNextEQ();
+ } else {
+ pptr = pptr->getNextNE();
+ }
+ }
+
+ return NULL;
+}
+
+// check word for prefixes
+struct hentry* AffixMgr::prefix_check_twosfx(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag) {
+ struct hentry* rv = NULL;
+
+ pfx = NULL;
+ sfxappnd = NULL;
+ sfxextra = 0;
+
+ // first handle the special case of 0 length prefixes
+ PfxEntry* pe = pStart[0];
+
+ while (pe) {
+ rv = pe->check_twosfx(word, len, in_compound, needflag);
+ if (rv)
+ return rv;
+ pe = pe->getNext();
+ }
+
+ // now handle the general case
+ unsigned char sp = *((const unsigned char*)word);
+ PfxEntry* pptr = pStart[sp];
+
+ while (pptr) {
+ if (isSubset(pptr->getKey(), word)) {
+ rv = pptr->check_twosfx(word, len, in_compound, needflag);
+ if (rv) {
+ pfx = pptr;
+ return rv;
+ }
+ pptr = pptr->getNextEQ();
+ } else {
+ pptr = pptr->getNextNE();
+ }
+ }
+
+ return NULL;
+}
+
+// check word for prefixes
+char* AffixMgr::prefix_check_morph(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag) {
+
+ char result[MAXLNLEN];
+ result[0] = '\0';
+
+ pfx = NULL;
+ sfxappnd = NULL;
+ sfxextra = 0;
+
+ // first handle the special case of 0 length prefixes
+ PfxEntry* pe = pStart[0];
+ while (pe) {
+ char* st = pe->check_morph(word, len, in_compound, needflag);
+ if (st) {
+ mystrcat(result, st, MAXLNLEN);
+ free(st);
+ }
+ // if (rv) return rv;
+ pe = pe->getNext();
+ }
+
+ // now handle the general case
+ unsigned char sp = *((const unsigned char*)word);
+ PfxEntry* pptr = pStart[sp];
+
+ while (pptr) {
+ if (isSubset(pptr->getKey(), word)) {
+ char* st = pptr->check_morph(word, len, in_compound, needflag);
+ if (st) {
+ // fogemorpheme
+ if ((in_compound != IN_CPD_NOT) ||
+ !((pptr->getCont() && (TESTAFF(pptr->getCont(), onlyincompound,
+ pptr->getContLen()))))) {
+ mystrcat(result, st, MAXLNLEN);
+ pfx = pptr;
+ }
+ free(st);
+ }
+ pptr = pptr->getNextEQ();
+ } else {
+ pptr = pptr->getNextNE();
+ }
+ }
+
+ if (*result)
+ return mystrdup(result);
+ return NULL;
+}
+
+// check word for prefixes
+char* AffixMgr::prefix_check_twosfx_morph(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag) {
+ char result[MAXLNLEN];
+ result[0] = '\0';
+
+ pfx = NULL;
+ sfxappnd = NULL;
+ sfxextra = 0;
+
+ // first handle the special case of 0 length prefixes
+ PfxEntry* pe = pStart[0];
+ while (pe) {
+ char* st = pe->check_twosfx_morph(word, len, in_compound, needflag);
+ if (st) {
+ mystrcat(result, st, MAXLNLEN);
+ free(st);
+ }
+ pe = pe->getNext();
+ }
+
+ // now handle the general case
+ unsigned char sp = *((const unsigned char*)word);
+ PfxEntry* pptr = pStart[sp];
+
+ while (pptr) {
+ if (isSubset(pptr->getKey(), word)) {
+ char* st = pptr->check_twosfx_morph(word, len, in_compound, needflag);
+ if (st) {
+ mystrcat(result, st, MAXLNLEN);
+ free(st);
+ pfx = pptr;
+ }
+ pptr = pptr->getNextEQ();
+ } else {
+ pptr = pptr->getNextNE();
+ }
+ }
+
+ if (*result)
+ return mystrdup(result);
+ return NULL;
+}
+
+// Is word a non compound with a REP substitution (see checkcompoundrep)?
+int AffixMgr::cpdrep_check(const char* word, int wl) {
+
+ if ((wl < 2) || !numrep)
+ return 0;
+
+ for (int i = 0; i < numrep; i++) {
+ const char* r = word;
+ int lenp = strlen(reptable[i].pattern);
+ // search every occurence of the pattern in the word
+ while ((r = strstr(r, reptable[i].pattern)) != NULL) {
+ std::string candidate(word);
+ candidate.replace(r - word, lenp, reptable[i].pattern2);
+ if (candidate_check(candidate.c_str(), candidate.size()))
+ return 1;
+ r++; // search for the next letter
+ }
+ }
+ return 0;
+}
+
+// forbid compoundings when there are special patterns at word bound
+int AffixMgr::cpdpat_check(const char* word,
+ int pos,
+ hentry* r1,
+ hentry* r2,
+ const char /*affixed*/) {
+ int len;
+ for (int i = 0; i < numcheckcpd; i++) {
+ if (isSubset(checkcpdtable[i].pattern2, word + pos) &&
+ (!r1 || !checkcpdtable[i].cond ||
+ (r1->astr && TESTAFF(r1->astr, checkcpdtable[i].cond, r1->alen))) &&
+ (!r2 || !checkcpdtable[i].cond2 ||
+ (r2->astr && TESTAFF(r2->astr, checkcpdtable[i].cond2, r2->alen))) &&
+ // zero length pattern => only TESTAFF
+ // zero pattern (0/flag) => unmodified stem (zero affixes allowed)
+ (!*(checkcpdtable[i].pattern) ||
+ ((*(checkcpdtable[i].pattern) == '0' && r1->blen <= pos &&
+ strncmp(word + pos - r1->blen, r1->word, r1->blen) == 0) ||
+ (*(checkcpdtable[i].pattern) != '0' &&
+ ((len = strlen(checkcpdtable[i].pattern)) != 0) &&
+ strncmp(word + pos - len, checkcpdtable[i].pattern, len) == 0)))) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+// forbid compounding with neighbouring upper and lower case characters at word
+// bounds
+int AffixMgr::cpdcase_check(const char* word, int pos) {
+ if (utf8) {
+ const char* p;
+ for (p = word + pos - 1; (*p & 0xc0) == 0x80; p--)
+ ;
+ std::string pair(p);
+ std::vector<w_char> pair_u;
+ u8_u16(pair_u, pair);
+ unsigned short a = pair_u.size() > 1 ? ((pair_u[1].h << 8) + pair_u[1].l) : 0;
+ unsigned short b = !pair_u.empty() ? ((pair_u[0].h << 8) + pair_u[0].l) : 0;
+ if (((unicodetoupper(a, langnum) == a) ||
+ (unicodetoupper(b, langnum) == b)) &&
+ (a != '-') && (b != '-'))
+ return 1;
+ } else {
+ unsigned char a = *(word + pos - 1);
+ unsigned char b = *(word + pos);
+ if ((csconv[a].ccase || csconv[b].ccase) && (a != '-') && (b != '-'))
+ return 1;
+ }
+ return 0;
+}
+
+struct metachar_data {
+ signed short btpp; // metacharacter (*, ?) position for backtracking
+ signed short btwp; // word position for metacharacters
+ int btnum; // number of matched characters in metacharacter
+};
+
+// check compound patterns
+int AffixMgr::defcpd_check(hentry*** words,
+ short wnum,
+ hentry* rv,
+ hentry** def,
+ char all) {
+ int w = 0;
+
+ if (!*words) {
+ w = 1;
+ *words = def;
+ }
+
+ if (!*words) {
+ return 0;
+ }
+
+ std::vector<metachar_data> btinfo(1);
+
+ short bt = 0;
+ int i, j;
+
+ (*words)[wnum] = rv;
+
+ // has the last word COMPOUNDRULE flag?
+ if (rv->alen == 0) {
+ (*words)[wnum] = NULL;
+ if (w)
+ *words = NULL;
+ return 0;
+ }
+ int ok = 0;
+ for (i = 0; i < numdefcpd; i++) {
+ for (j = 0; j < defcpdtable[i].len; j++) {
+ if (defcpdtable[i].def[j] != '*' && defcpdtable[i].def[j] != '?' &&
+ TESTAFF(rv->astr, defcpdtable[i].def[j], rv->alen)) {
+ ok = 1;
+ break;
+ }
+ }
+ }
+ if (ok == 0) {
+ (*words)[wnum] = NULL;
+ if (w)
+ *words = NULL;
+ return 0;
+ }
+
+ for (i = 0; i < numdefcpd; i++) {
+ signed short pp = 0; // pattern position
+ signed short wp = 0; // "words" position
+ int ok2;
+ ok = 1;
+ ok2 = 1;
+ do {
+ while ((pp < defcpdtable[i].len) && (wp <= wnum)) {
+ if (((pp + 1) < defcpdtable[i].len) &&
+ ((defcpdtable[i].def[pp + 1] == '*') ||
+ (defcpdtable[i].def[pp + 1] == '?'))) {
+ int wend = (defcpdtable[i].def[pp + 1] == '?') ? wp : wnum;
+ ok2 = 1;
+ pp += 2;
+ btinfo[bt].btpp = pp;
+ btinfo[bt].btwp = wp;
+ while (wp <= wend) {
+ if (!(*words)[wp]->alen ||
+ !TESTAFF((*words)[wp]->astr, defcpdtable[i].def[pp - 2],
+ (*words)[wp]->alen)) {
+ ok2 = 0;
+ break;
+ }
+ wp++;
+ }
+ if (wp <= wnum)
+ ok2 = 0;
+ btinfo[bt].btnum = wp - btinfo[bt].btwp;
+ if (btinfo[bt].btnum > 0) {
+ ++bt;
+ btinfo.resize(bt+1);
+ }
+ if (ok2)
+ break;
+ } else {
+ ok2 = 1;
+ if (!(*words)[wp] || !(*words)[wp]->alen ||
+ !TESTAFF((*words)[wp]->astr, defcpdtable[i].def[pp],
+ (*words)[wp]->alen)) {
+ ok = 0;
+ break;
+ }
+ pp++;
+ wp++;
+ if ((defcpdtable[i].len == pp) && !(wp > wnum))
+ ok = 0;
+ }
+ }
+ if (ok && ok2) {
+ int r = pp;
+ while ((defcpdtable[i].len > r) && ((r + 1) < defcpdtable[i].len) &&
+ ((defcpdtable[i].def[r + 1] == '*') ||
+ (defcpdtable[i].def[r + 1] == '?')))
+ r += 2;
+ if (defcpdtable[i].len <= r)
+ return 1;
+ }
+ // backtrack
+ if (bt)
+ do {
+ ok = 1;
+ btinfo[bt - 1].btnum--;
+ pp = btinfo[bt - 1].btpp;
+ wp = btinfo[bt - 1].btwp + (signed short)btinfo[bt - 1].btnum;
+ } while ((btinfo[bt - 1].btnum < 0) && --bt);
+ } while (bt);
+
+ if (ok && ok2 && (!all || (defcpdtable[i].len <= pp)))
+ return 1;
+
+ // check zero ending
+ while (ok && ok2 && (defcpdtable[i].len > pp) &&
+ ((pp + 1) < defcpdtable[i].len) &&
+ ((defcpdtable[i].def[pp + 1] == '*') ||
+ (defcpdtable[i].def[pp + 1] == '?')))
+ pp += 2;
+ if (ok && ok2 && (defcpdtable[i].len <= pp))
+ return 1;
+ }
+ (*words)[wnum] = NULL;
+ if (w)
+ *words = NULL;
+ return 0;
+}
+
+inline int AffixMgr::candidate_check(const char* word, int len) {
+ struct hentry* rv = NULL;
+
+ rv = lookup(word);
+ if (rv)
+ return 1;
+
+ // rv = prefix_check(word,len,1);
+ // if (rv) return 1;
+
+ rv = affix_check(word, len);
+ if (rv)
+ return 1;
+ return 0;
+}
+
+// calculate number of syllable for compound-checking
+short AffixMgr::get_syllable(const std::string& word) {
+ if (cpdmaxsyllable == 0)
+ return 0;
+
+ short num = 0;
+
+ if (!utf8) {
+ for (size_t i = 0; i < word.size(); ++i) {
+ if (strchr(cpdvowels, word[i]))
+ num++;
+ }
+ } else if (cpdvowels_utf16) {
+ std::vector<w_char> w;
+ int i = u8_u16(w, word);
+ for (; i > 0; i--) {
+ if (std::binary_search(cpdvowels_utf16,
+ cpdvowels_utf16 + cpdvowels_utf16_len,
+ w[i - 1])) {
+ ++num;
+ }
+ }
+ }
+ return num;
+}
+
+void AffixMgr::setcminmax(int* cmin, int* cmax, const char* word, int len) {
+ if (utf8) {
+ int i;
+ for (*cmin = 0, i = 0; (i < cpdmin) && *cmin < len; i++) {
+ for ((*cmin)++; *cmin < len && (word[*cmin] & 0xc0) == 0x80; (*cmin)++)
+ ;
+ }
+ for (*cmax = len, i = 0; (i < (cpdmin - 1)) && *cmax >= 0; i++) {
+ for ((*cmax)--; *cmax >= 0 && (word[*cmax] & 0xc0) == 0x80; (*cmax)--)
+ ;
+ }
+ } else {
+ *cmin = cpdmin;
+ *cmax = len - cpdmin + 1;
+ }
+}
+
+// check if compound word is correctly spelled
+// hu_mov_rule = spec. Hungarian rule (XXX)
+struct hentry* AffixMgr::compound_check(const char* word,
+ int len,
+ short wordnum,
+ short numsyllable,
+ short maxwordnum,
+ short wnum,
+ hentry** words = NULL,
+ hentry** rwords = NULL,
+ char hu_mov_rule = 0,
+ char is_sug = 0,
+ int* info = NULL) {
+ int i;
+ short oldnumsyllable, oldnumsyllable2, oldwordnum, oldwordnum2;
+ struct hentry* rv = NULL;
+ struct hentry* rv_first;
+ std::string st;
+ char ch = '\0';
+ int cmin;
+ int cmax;
+ int striple = 0;
+ int scpd = 0;
+ int soldi = 0;
+ int oldcmin = 0;
+ int oldcmax = 0;
+ int oldlen = 0;
+ int checkedstriple = 0;
+ int onlycpdrule;
+ char affixed = 0;
+ hentry** oldwords = words;
+
+ int checked_prefix;
+
+ setcminmax(&cmin, &cmax, word, len);
+
+ st.assign(word);
+
+ for (i = cmin; i < cmax; i++) {
+ // go to end of the UTF-8 character
+ if (utf8) {
+ for (; (st[i] & 0xc0) == 0x80; i++)
+ ;
+ if (i >= cmax)
+ return NULL;
+ }
+
+ words = oldwords;
+ onlycpdrule = (words) ? 1 : 0;
+
+ do { // onlycpdrule loop
+
+ oldnumsyllable = numsyllable;
+ oldwordnum = wordnum;
+ checked_prefix = 0;
+
+ do { // simplified checkcompoundpattern loop
+
+ if (scpd > 0) {
+ for (; scpd <= numcheckcpd &&
+ (!checkcpdtable[scpd - 1].pattern3 ||
+ strncmp(word + i, checkcpdtable[scpd - 1].pattern3,
+ strlen(checkcpdtable[scpd - 1].pattern3)) != 0);
+ scpd++)
+ ;
+
+ if (scpd > numcheckcpd)
+ break; // break simplified checkcompoundpattern loop
+ st.replace(i, std::string::npos, checkcpdtable[scpd - 1].pattern);
+ soldi = i;
+ i += strlen(checkcpdtable[scpd - 1].pattern);
+ st.replace(i, std::string::npos, checkcpdtable[scpd - 1].pattern2);
+ st.replace(i + strlen(checkcpdtable[scpd - 1].pattern2), std::string::npos,
+ word + soldi + strlen(checkcpdtable[scpd - 1].pattern3));
+
+ oldlen = len;
+ len += strlen(checkcpdtable[scpd - 1].pattern) +
+ strlen(checkcpdtable[scpd - 1].pattern2) -
+ strlen(checkcpdtable[scpd - 1].pattern3);
+ oldcmin = cmin;
+ oldcmax = cmax;
+ setcminmax(&cmin, &cmax, st.c_str(), len);
+
+ cmax = len - cpdmin + 1;
+ }
+
+ ch = st[i];
+ st[i] = '\0';
+
+ sfx = NULL;
+ pfx = NULL;
+
+ // FIRST WORD
+
+ affixed = 1;
+ rv = lookup(st.c_str()); // perhaps without prefix
+
+ // search homonym with compound flag
+ while ((rv) && !hu_mov_rule &&
+ ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
+ !((compoundflag && !words && !onlycpdrule &&
+ TESTAFF(rv->astr, compoundflag, rv->alen)) ||
+ (compoundbegin && !wordnum && !onlycpdrule &&
+ TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
+ (compoundmiddle && wordnum && !words && !onlycpdrule &&
+ TESTAFF(rv->astr, compoundmiddle, rv->alen)) ||
+ (numdefcpd && onlycpdrule &&
+ ((!words && !wordnum &&
+ defcpd_check(&words, wnum, rv, rwords, 0)) ||
+ (words &&
+ defcpd_check(&words, wnum, rv, rwords, 0))))) ||
+ (scpd != 0 && checkcpdtable[scpd - 1].cond != FLAG_NULL &&
+ !TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond, rv->alen)))) {
+ rv = rv->next_homonym;
+ }
+
+ if (rv)
+ affixed = 0;
+
+ if (!rv) {
+ if (onlycpdrule)
+ break;
+ if (compoundflag &&
+ !(rv = prefix_check(st.c_str(), i,
+ hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN,
+ compoundflag))) {
+ if (((rv = suffix_check(
+ st.c_str(), i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundflag,
+ hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
+ (compoundmoresuffixes &&
+ (rv = suffix_check_twosfx(st.c_str(), i, 0, NULL, compoundflag)))) &&
+ !hu_mov_rule && sfx->getCont() &&
+ ((compoundforbidflag &&
+ TESTAFF(sfx->getCont(), compoundforbidflag,
+ sfx->getContLen())) ||
+ (compoundend &&
+ TESTAFF(sfx->getCont(), compoundend, sfx->getContLen())))) {
+ rv = NULL;
+ }
+ }
+
+ if (rv ||
+ (((wordnum == 0) && compoundbegin &&
+ ((rv = suffix_check(
+ st.c_str(), i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin,
+ hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
+ (compoundmoresuffixes &&
+ (rv = suffix_check_twosfx(
+ st.c_str(), i, 0, NULL,
+ compoundbegin))) || // twofold suffixes + compound
+ (rv = prefix_check(st.c_str(), i,
+ hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN,
+ compoundbegin)))) ||
+ ((wordnum > 0) && compoundmiddle &&
+ ((rv = suffix_check(
+ st.c_str(), i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle,
+ hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
+ (compoundmoresuffixes &&
+ (rv = suffix_check_twosfx(
+ st.c_str(), i, 0, NULL,
+ compoundmiddle))) || // twofold suffixes + compound
+ (rv = prefix_check(st.c_str(), i,
+ hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN,
+ compoundmiddle))))))
+ checked_prefix = 1;
+ // else check forbiddenwords and needaffix
+ } else if (rv->astr && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
+ TESTAFF(rv->astr, needaffix, rv->alen) ||
+ TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
+ (is_sug && nosuggest &&
+ TESTAFF(rv->astr, nosuggest, rv->alen)))) {
+ st[i] = ch;
+ // continue;
+ break;
+ }
+
+ // check non_compound flag in suffix and prefix
+ if ((rv) && !hu_mov_rule &&
+ ((pfx && pfx->getCont() &&
+ TESTAFF(pfx->getCont(), compoundforbidflag, pfx->getContLen())) ||
+ (sfx && sfx->getCont() &&
+ TESTAFF(sfx->getCont(), compoundforbidflag,
+ sfx->getContLen())))) {
+ rv = NULL;
+ }
+
+ // check compoundend flag in suffix and prefix
+ if ((rv) && !checked_prefix && compoundend && !hu_mov_rule &&
+ ((pfx && pfx->getCont() &&
+ TESTAFF(pfx->getCont(), compoundend, pfx->getContLen())) ||
+ (sfx && sfx->getCont() &&
+ TESTAFF(sfx->getCont(), compoundend, sfx->getContLen())))) {
+ rv = NULL;
+ }
+
+ // check compoundmiddle flag in suffix and prefix
+ if ((rv) && !checked_prefix && (wordnum == 0) && compoundmiddle &&
+ !hu_mov_rule &&
+ ((pfx && pfx->getCont() &&
+ TESTAFF(pfx->getCont(), compoundmiddle, pfx->getContLen())) ||
+ (sfx && sfx->getCont() &&
+ TESTAFF(sfx->getCont(), compoundmiddle, sfx->getContLen())))) {
+ rv = NULL;
+ }
+
+ // check forbiddenwords
+ if ((rv) && (rv->astr) &&
+ (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
+ TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
+ (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen)))) {
+ return NULL;
+ }
+
+ // increment word number, if the second root has a compoundroot flag
+ if ((rv) && compoundroot &&
+ (TESTAFF(rv->astr, compoundroot, rv->alen))) {
+ wordnum++;
+ }
+
+ // first word is acceptable in compound words?
+ if (((rv) &&
+ (checked_prefix || (words && words[wnum]) ||
+ (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
+ ((oldwordnum == 0) && compoundbegin &&
+ TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
+ ((oldwordnum > 0) && compoundmiddle &&
+ TESTAFF(rv->astr, compoundmiddle, rv->alen)) // ||
+ // (numdefcpd && )
+
+ // LANG_hu section: spec. Hungarian rule
+ || ((langnum == LANG_hu) && hu_mov_rule &&
+ (TESTAFF(
+ rv->astr, 'F',
+ rv->alen) || // XXX hardwired Hungarian dictionary codes
+ TESTAFF(rv->astr, 'G', rv->alen) ||
+ TESTAFF(rv->astr, 'H', rv->alen)))
+ // END of LANG_hu section
+ ) &&
+ (
+ // test CHECKCOMPOUNDPATTERN conditions
+ scpd == 0 || checkcpdtable[scpd - 1].cond == FLAG_NULL ||
+ TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond, rv->alen)) &&
+ !((checkcompoundtriple && scpd == 0 &&
+ !words && // test triple letters
+ (word[i - 1] == word[i]) &&
+ (((i > 1) && (word[i - 1] == word[i - 2])) ||
+ ((word[i - 1] == word[i + 1])) // may be word[i+1] == '\0'
+ )) ||
+ (checkcompoundcase && scpd == 0 && !words &&
+ cpdcase_check(word, i))))
+ // LANG_hu section: spec. Hungarian rule
+ || ((!rv) && (langnum == LANG_hu) && hu_mov_rule &&
+ (rv = affix_check(st.c_str(), i)) &&
+ (sfx && sfx->getCont() &&
+ ( // XXX hardwired Hungarian dic. codes
+ TESTAFF(sfx->getCont(), (unsigned short)'x',
+ sfx->getContLen()) ||
+ TESTAFF(
+ sfx->getCont(), (unsigned short)'%',
+ sfx->getContLen()))))) { // first word is ok condition
+
+ // LANG_hu section: spec. Hungarian rule
+ if (langnum == LANG_hu) {
+ // calculate syllable number of the word
+ numsyllable += get_syllable(st.substr(i));
+ // + 1 word, if syllable number of the prefix > 1 (hungarian
+ // convention)
+ if (pfx && (get_syllable(pfx->getKey()) > 1))
+ wordnum++;
+ }
+ // END of LANG_hu section
+
+ // NEXT WORD(S)
+ rv_first = rv;
+ st[i] = ch;
+
+ do { // striple loop
+
+ // check simplifiedtriple
+ if (simplifiedtriple) {
+ if (striple) {
+ checkedstriple = 1;
+ i--; // check "fahrt" instead of "ahrt" in "Schiffahrt"
+ } else if (i > 2 && *(word + i - 1) == *(word + i - 2))
+ striple = 1;
+ }
+
+ rv = lookup(st.c_str() + i); // perhaps without prefix
+
+ // search homonym with compound flag
+ while ((rv) &&
+ ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
+ !((compoundflag && !words &&
+ TESTAFF(rv->astr, compoundflag, rv->alen)) ||
+ (compoundend && !words &&
+ TESTAFF(rv->astr, compoundend, rv->alen)) ||
+ (numdefcpd && words &&
+ defcpd_check(&words, wnum + 1, rv, NULL, 1))) ||
+ (scpd != 0 && checkcpdtable[scpd - 1].cond2 != FLAG_NULL &&
+ !TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond2,
+ rv->alen)))) {
+ rv = rv->next_homonym;
+ }
+
+ // check FORCEUCASE
+ if (rv && forceucase && (rv) &&
+ (TESTAFF(rv->astr, forceucase, rv->alen)) &&
+ !(info && *info & SPELL_ORIGCAP))
+ rv = NULL;
+
+ if (rv && words && words[wnum + 1])
+ return rv_first;
+
+ oldnumsyllable2 = numsyllable;
+ oldwordnum2 = wordnum;
+
+ // LANG_hu section: spec. Hungarian rule, XXX hardwired dictionary
+ // code
+ if ((rv) && (langnum == LANG_hu) &&
+ (TESTAFF(rv->astr, 'I', rv->alen)) &&
+ !(TESTAFF(rv->astr, 'J', rv->alen))) {
+ numsyllable--;
+ }
+ // END of LANG_hu section
+
+ // increment word number, if the second root has a compoundroot flag
+ if ((rv) && (compoundroot) &&
+ (TESTAFF(rv->astr, compoundroot, rv->alen))) {
+ wordnum++;
+ }
+
+ // check forbiddenwords
+ if ((rv) && (rv->astr) &&
+ (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
+ TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
+ (is_sug && nosuggest &&
+ TESTAFF(rv->astr, nosuggest, rv->alen))))
+ return NULL;
+
+ // second word is acceptable, as a root?
+ // hungarian conventions: compounding is acceptable,
+ // when compound forms consist of 2 words, or if more,
+ // then the syllable number of root words must be 6, or lesser.
+
+ if ((rv) &&
+ ((compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
+ (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))) &&
+ (((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) ||
+ ((cpdmaxsyllable != 0) &&
+ (numsyllable + get_syllable(std::string(HENTRY_WORD(rv), rv->clen)) <=
+ cpdmaxsyllable))) &&
+ (
+ // test CHECKCOMPOUNDPATTERN
+ !numcheckcpd || scpd != 0 ||
+ !cpdpat_check(word, i, rv_first, rv, 0)) &&
+ ((!checkcompounddup || (rv != rv_first)))
+ // test CHECKCOMPOUNDPATTERN conditions
+ &&
+ (scpd == 0 || checkcpdtable[scpd - 1].cond2 == FLAG_NULL ||
+ TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond2, rv->alen))) {
+ // forbid compound word, if it is a non compound word with typical
+ // fault
+ if (checkcompoundrep && cpdrep_check(word, len))
+ return NULL;
+ return rv_first;
+ }
+
+ numsyllable = oldnumsyllable2;
+ wordnum = oldwordnum2;
+
+ // perhaps second word has prefix or/and suffix
+ sfx = NULL;
+ sfxflag = FLAG_NULL;
+ rv = (compoundflag && !onlycpdrule)
+ ? affix_check((word + i), strlen(word + i), compoundflag,
+ IN_CPD_END)
+ : NULL;
+ if (!rv && compoundend && !onlycpdrule) {
+ sfx = NULL;
+ pfx = NULL;
+ rv = affix_check((word + i), strlen(word + i), compoundend,
+ IN_CPD_END);
+ }
+
+ if (!rv && numdefcpd && words) {
+ rv = affix_check((word + i), strlen(word + i), 0, IN_CPD_END);
+ if (rv && defcpd_check(&words, wnum + 1, rv, NULL, 1))
+ return rv_first;
+ rv = NULL;
+ }
+
+ // test CHECKCOMPOUNDPATTERN conditions (allowed forms)
+ if (rv &&
+ !(scpd == 0 || checkcpdtable[scpd - 1].cond2 == FLAG_NULL ||
+ TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond2, rv->alen)))
+ rv = NULL;
+
+ // test CHECKCOMPOUNDPATTERN conditions (forbidden compounds)
+ if (rv && numcheckcpd && scpd == 0 &&
+ cpdpat_check(word, i, rv_first, rv, affixed))
+ rv = NULL;
+
+ // check non_compound flag in suffix and prefix
+ if ((rv) && ((pfx && pfx->getCont() &&
+ TESTAFF(pfx->getCont(), compoundforbidflag,
+ pfx->getContLen())) ||
+ (sfx && sfx->getCont() &&
+ TESTAFF(sfx->getCont(), compoundforbidflag,
+ sfx->getContLen())))) {
+ rv = NULL;
+ }
+
+ // check FORCEUCASE
+ if (rv && forceucase && (rv) &&
+ (TESTAFF(rv->astr, forceucase, rv->alen)) &&
+ !(info && *info & SPELL_ORIGCAP))
+ rv = NULL;
+
+ // check forbiddenwords
+ if ((rv) && (rv->astr) &&
+ (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
+ TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
+ (is_sug && nosuggest &&
+ TESTAFF(rv->astr, nosuggest, rv->alen))))
+ return NULL;
+
+ // pfxappnd = prefix of word+i, or NULL
+ // calculate syllable number of prefix.
+ // hungarian convention: when syllable number of prefix is more,
+ // than 1, the prefix+word counts as two words.
+
+ if (langnum == LANG_hu) {
+ // calculate syllable number of the word
+ numsyllable += get_syllable(word + i);
+
+ // - affix syllable num.
+ // XXX only second suffix (inflections, not derivations)
+ if (sfxappnd) {
+ std::string tmp(sfxappnd);
+ reverseword(tmp);
+ numsyllable -= get_syllable(tmp) + sfxextra;
+ }
+
+ // + 1 word, if syllable number of the prefix > 1 (hungarian
+ // convention)
+ if (pfx && (get_syllable(pfx->getKey()) > 1))
+ wordnum++;
+
+ // increment syllable num, if last word has a SYLLABLENUM flag
+ // and the suffix is beginning `s'
+
+ if (cpdsyllablenum) {
+ switch (sfxflag) {
+ case 'c': {
+ numsyllable += 2;
+ break;
+ }
+ case 'J': {
+ numsyllable += 1;
+ break;
+ }
+ case 'I': {
+ if (rv && TESTAFF(rv->astr, 'J', rv->alen))
+ numsyllable += 1;
+ break;
+ }
+ }
+ }
+ }
+
+ // increment word number, if the second word has a compoundroot flag
+ if ((rv) && (compoundroot) &&
+ (TESTAFF(rv->astr, compoundroot, rv->alen))) {
+ wordnum++;
+ }
+
+ // second word is acceptable, as a word with prefix or/and suffix?
+ // hungarian conventions: compounding is acceptable,
+ // when compound forms consist 2 word, otherwise
+ // the syllable number of root words is 6, or lesser.
+ if ((rv) &&
+ (((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) ||
+ ((cpdmaxsyllable != 0) && (numsyllable <= cpdmaxsyllable))) &&
+ ((!checkcompounddup || (rv != rv_first)))) {
+ // forbid compound word, if it is a non compound word with typical
+ // fault
+ if (checkcompoundrep && cpdrep_check(word, len))
+ return NULL;
+ return rv_first;
+ }
+
+ numsyllable = oldnumsyllable2;
+ wordnum = oldwordnum2;
+
+ // perhaps second word is a compound word (recursive call)
+ if (wordnum < maxwordnum) {
+ rv = compound_check(st.c_str() + i, strlen(st.c_str() + i), wordnum + 1,
+ numsyllable, maxwordnum, wnum + 1, words, rwords, 0,
+ is_sug, info);
+
+ if (rv && numcheckcpd &&
+ ((scpd == 0 &&
+ cpdpat_check(word, i, rv_first, rv, affixed)) ||
+ (scpd != 0 &&
+ !cpdpat_check(word, i, rv_first, rv, affixed))))
+ rv = NULL;
+ } else {
+ rv = NULL;
+ }
+ if (rv) {
+ // forbid compound word, if it is a non compound word with typical
+ // fault
+ if (checkcompoundrep || forbiddenword) {
+ struct hentry* rv2 = NULL;
+
+ if (checkcompoundrep && cpdrep_check(word, len))
+ return NULL;
+
+ // check first part
+ if (strncmp(rv->word, word + i, rv->blen) == 0) {
+ char r = st[i + rv->blen];
+ st[i + rv->blen] = '\0';
+
+ if (checkcompoundrep && cpdrep_check(st.c_str(), i + rv->blen)) {
+ st[ + i + rv->blen] = r;
+ continue;
+ }
+
+ if (forbiddenword) {
+ rv2 = lookup(word);
+ if (!rv2)
+ rv2 = affix_check(word, len);
+ if (rv2 && rv2->astr &&
+ TESTAFF(rv2->astr, forbiddenword, rv2->alen) &&
+ (strncmp(rv2->word, st.c_str(), i + rv->blen) == 0)) {
+ return NULL;
+ }
+ }
+ st[i + rv->blen] = r;
+ }
+ }
+ return rv_first;
+ }
+ } while (striple && !checkedstriple); // end of striple loop
+
+ if (checkedstriple) {
+ i++;
+ checkedstriple = 0;
+ striple = 0;
+ }
+
+ } // first word is ok condition
+
+ if (soldi != 0) {
+ i = soldi;
+ soldi = 0;
+ len = oldlen;
+ cmin = oldcmin;
+ cmax = oldcmax;
+ }
+ scpd++;
+
+ } while (!onlycpdrule && simplifiedcpd &&
+ scpd <= numcheckcpd); // end of simplifiedcpd loop
+
+ scpd = 0;
+ wordnum = oldwordnum;
+ numsyllable = oldnumsyllable;
+
+ if (soldi != 0) {
+ i = soldi;
+ st.assign(word); // XXX add more optim.
+ soldi = 0;
+ } else
+ st[i] = ch;
+
+ } while (numdefcpd && oldwordnum == 0 &&
+ onlycpdrule++ < 1); // end of onlycpd loop
+ }
+
+ return NULL;
+}
+
+// check if compound word is correctly spelled
+// hu_mov_rule = spec. Hungarian rule (XXX)
+int AffixMgr::compound_check_morph(const char* word,
+ int len,
+ short wordnum,
+ short numsyllable,
+ short maxwordnum,
+ short wnum,
+ hentry** words,
+ hentry** rwords,
+ char hu_mov_rule = 0,
+ char** result = NULL,
+ char* partresult = NULL) {
+ int i;
+ short oldnumsyllable, oldnumsyllable2, oldwordnum, oldwordnum2;
+ int ok = 0;
+
+ struct hentry* rv = NULL;
+ struct hentry* rv_first;
+ std::string st;
+ char ch;
+
+ int checked_prefix;
+ char presult[MAXLNLEN];
+
+ int cmin;
+ int cmax;
+
+ int onlycpdrule;
+ char affixed = 0;
+ hentry** oldwords = words;
+
+ setcminmax(&cmin, &cmax, word, len);
+
+ st.assign(word);
+
+ for (i = cmin; i < cmax; i++) {
+ // go to end of the UTF-8 character
+ if (utf8) {
+ for (; (st[i] & 0xc0) == 0x80; i++)
+ ;
+ if (i >= cmax)
+ return 0;
+ }
+
+ words = oldwords;
+ onlycpdrule = (words) ? 1 : 0;
+
+ do { // onlycpdrule loop
+
+ oldnumsyllable = numsyllable;
+ oldwordnum = wordnum;
+ checked_prefix = 0;
+
+ ch = st[i];
+ st[i] = '\0';
+ sfx = NULL;
+
+ // FIRST WORD
+
+ affixed = 1;
+
+ *presult = '\0';
+ if (partresult)
+ mystrcat(presult, partresult, MAXLNLEN);
+
+ rv = lookup(st.c_str()); // perhaps without prefix
+
+ // search homonym with compound flag
+ while ((rv) && !hu_mov_rule &&
+ ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
+ !((compoundflag && !words && !onlycpdrule &&
+ TESTAFF(rv->astr, compoundflag, rv->alen)) ||
+ (compoundbegin && !wordnum && !onlycpdrule &&
+ TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
+ (compoundmiddle && wordnum && !words && !onlycpdrule &&
+ TESTAFF(rv->astr, compoundmiddle, rv->alen)) ||
+ (numdefcpd && onlycpdrule &&
+ ((!words && !wordnum &&
+ defcpd_check(&words, wnum, rv, rwords, 0)) ||
+ (words &&
+ defcpd_check(&words, wnum, rv, rwords, 0))))))) {
+ rv = rv->next_homonym;
+ }
+
+ if (rv)
+ affixed = 0;
+
+ if (rv) {
+ sprintf(presult + strlen(presult), "%c%s%s", MSEP_FLD, MORPH_PART, st.c_str());
+ if (!HENTRY_FIND(rv, MORPH_STEM)) {
+ sprintf(presult + strlen(presult), "%c%s%s", MSEP_FLD, MORPH_STEM,
+ st.c_str());
+ }
+ // store the pointer of the hash entry
+ // sprintf(presult + strlen(presult), "%c%s%p", MSEP_FLD,
+ // MORPH_HENTRY, rv);
+ if (HENTRY_DATA(rv)) {
+ sprintf(presult + strlen(presult), "%c%s", MSEP_FLD,
+ HENTRY_DATA2(rv));
+ }
+ }
+
+ if (!rv) {
+ if (onlycpdrule && strlen(*result) > MAXLNLEN / 10)
+ break;
+ if (compoundflag &&
+ !(rv =
+ prefix_check(st.c_str(), i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN,
+ compoundflag))) {
+ if (((rv = suffix_check(st.c_str(), i, 0, NULL, NULL, 0, NULL, FLAG_NULL,
+ compoundflag,
+ hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
+ (compoundmoresuffixes &&
+ (rv = suffix_check_twosfx(st.c_str(), i, 0, NULL, compoundflag)))) &&
+ !hu_mov_rule && sfx->getCont() &&
+ ((compoundforbidflag &&
+ TESTAFF(sfx->getCont(), compoundforbidflag,
+ sfx->getContLen())) ||
+ (compoundend &&
+ TESTAFF(sfx->getCont(), compoundend, sfx->getContLen())))) {
+ rv = NULL;
+ }
+ }
+
+ if (rv ||
+ (((wordnum == 0) && compoundbegin &&
+ ((rv = suffix_check(st.c_str(), i, 0, NULL, NULL, 0, NULL, FLAG_NULL,
+ compoundbegin,
+ hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
+ (compoundmoresuffixes &&
+ (rv = suffix_check_twosfx(
+ st.c_str(), i, 0, NULL,
+ compoundbegin))) || // twofold suffix+compound
+ (rv = prefix_check(st.c_str(), i,
+ hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN,
+ compoundbegin)))) ||
+ ((wordnum > 0) && compoundmiddle &&
+ ((rv = suffix_check(st.c_str(), i, 0, NULL, NULL, 0, NULL, FLAG_NULL,
+ compoundmiddle,
+ hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
+ (compoundmoresuffixes &&
+ (rv = suffix_check_twosfx(
+ st.c_str(), i, 0, NULL,
+ compoundmiddle))) || // twofold suffix+compound
+ (rv = prefix_check(st.c_str(), i,
+ hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN,
+ compoundmiddle)))))) {
+ // char * p = prefix_check_morph(st, i, 0, compound);
+ char* p = NULL;
+ if (compoundflag)
+ p = affix_check_morph(st.c_str(), i, compoundflag);
+ if (!p || (*p == '\0')) {
+ if (p)
+ free(p);
+ p = NULL;
+ if ((wordnum == 0) && compoundbegin) {
+ p = affix_check_morph(st.c_str(), i, compoundbegin);
+ } else if ((wordnum > 0) && compoundmiddle) {
+ p = affix_check_morph(st.c_str(), i, compoundmiddle);
+ }
+ }
+ if (p && (*p != '\0')) {
+ sprintf(presult + strlen(presult), "%c%s%s%s", MSEP_FLD, MORPH_PART,
+ st.c_str(), line_uniq_app(&p, MSEP_REC));
+ }
+ if (p)
+ free(p);
+ checked_prefix = 1;
+ }
+ // else check forbiddenwords
+ } else if (rv->astr && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
+ TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
+ TESTAFF(rv->astr, needaffix, rv->alen))) {
+ st[i] = ch;
+ continue;
+ }
+
+ // check non_compound flag in suffix and prefix
+ if ((rv) && !hu_mov_rule &&
+ ((pfx && pfx->getCont() &&
+ TESTAFF(pfx->getCont(), compoundforbidflag, pfx->getContLen())) ||
+ (sfx && sfx->getCont() &&
+ TESTAFF(sfx->getCont(), compoundforbidflag, sfx->getContLen())))) {
+ continue;
+ }
+
+ // check compoundend flag in suffix and prefix
+ if ((rv) && !checked_prefix && compoundend && !hu_mov_rule &&
+ ((pfx && pfx->getCont() &&
+ TESTAFF(pfx->getCont(), compoundend, pfx->getContLen())) ||
+ (sfx && sfx->getCont() &&
+ TESTAFF(sfx->getCont(), compoundend, sfx->getContLen())))) {
+ continue;
+ }
+
+ // check compoundmiddle flag in suffix and prefix
+ if ((rv) && !checked_prefix && (wordnum == 0) && compoundmiddle &&
+ !hu_mov_rule &&
+ ((pfx && pfx->getCont() &&
+ TESTAFF(pfx->getCont(), compoundmiddle, pfx->getContLen())) ||
+ (sfx && sfx->getCont() &&
+ TESTAFF(sfx->getCont(), compoundmiddle, sfx->getContLen())))) {
+ rv = NULL;
+ }
+
+ // check forbiddenwords
+ if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
+ TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen)))
+ continue;
+
+ // increment word number, if the second root has a compoundroot flag
+ if ((rv) && (compoundroot) &&
+ (TESTAFF(rv->astr, compoundroot, rv->alen))) {
+ wordnum++;
+ }
+
+ // first word is acceptable in compound words?
+ if (((rv) &&
+ (checked_prefix || (words && words[wnum]) ||
+ (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
+ ((oldwordnum == 0) && compoundbegin &&
+ TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
+ ((oldwordnum > 0) && compoundmiddle &&
+ TESTAFF(rv->astr, compoundmiddle, rv->alen))
+ // LANG_hu section: spec. Hungarian rule
+ || ((langnum == LANG_hu) && // hu_mov_rule
+ hu_mov_rule && (TESTAFF(rv->astr, 'F', rv->alen) ||
+ TESTAFF(rv->astr, 'G', rv->alen) ||
+ TESTAFF(rv->astr, 'H', rv->alen)))
+ // END of LANG_hu section
+ ) &&
+ !((checkcompoundtriple && !words && // test triple letters
+ (word[i - 1] == word[i]) &&
+ (((i > 1) && (word[i - 1] == word[i - 2])) ||
+ ((word[i - 1] == word[i + 1])) // may be word[i+1] == '\0'
+ )) ||
+ (
+ // test CHECKCOMPOUNDPATTERN
+ numcheckcpd && !words &&
+ cpdpat_check(word, i, rv, NULL, affixed)) ||
+ (checkcompoundcase && !words && cpdcase_check(word, i))))
+ // LANG_hu section: spec. Hungarian rule
+ ||
+ ((!rv) && (langnum == LANG_hu) && hu_mov_rule &&
+ (rv = affix_check(st.c_str(), i)) &&
+ (sfx && sfx->getCont() &&
+ (TESTAFF(sfx->getCont(), (unsigned short)'x', sfx->getContLen()) ||
+ TESTAFF(sfx->getCont(), (unsigned short)'%', sfx->getContLen()))))
+ // END of LANG_hu section
+ ) {
+ // LANG_hu section: spec. Hungarian rule
+ if (langnum == LANG_hu) {
+ // calculate syllable number of the word
+ numsyllable += get_syllable(st.substr(i));
+
+ // + 1 word, if syllable number of the prefix > 1 (hungarian
+ // convention)
+ if (pfx && (get_syllable(pfx->getKey()) > 1))
+ wordnum++;
+ }
+ // END of LANG_hu section
+
+ // NEXT WORD(S)
+ rv_first = rv;
+ rv = lookup((word + i)); // perhaps without prefix
+
+ // search homonym with compound flag
+ while ((rv) && ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
+ !((compoundflag && !words &&
+ TESTAFF(rv->astr, compoundflag, rv->alen)) ||
+ (compoundend && !words &&
+ TESTAFF(rv->astr, compoundend, rv->alen)) ||
+ (numdefcpd && words &&
+ defcpd_check(&words, wnum + 1, rv, NULL, 1))))) {
+ rv = rv->next_homonym;
+ }
+
+ if (rv && words && words[wnum + 1]) {
+ mystrcat(*result, presult, MAXLNLEN);
+ mystrcat(*result, " ", MAXLNLEN);
+ mystrcat(*result, MORPH_PART, MAXLNLEN);
+ mystrcat(*result, word + i, MAXLNLEN);
+ if (complexprefixes && HENTRY_DATA(rv))
+ mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN);
+ if (!HENTRY_FIND(rv, MORPH_STEM)) {
+ mystrcat(*result, " ", MAXLNLEN);
+ mystrcat(*result, MORPH_STEM, MAXLNLEN);
+ mystrcat(*result, HENTRY_WORD(rv), MAXLNLEN);
+ }
+ // store the pointer of the hash entry
+ // sprintf(*result + strlen(*result), " %s%p",
+ // MORPH_HENTRY, rv);
+ if (!complexprefixes && HENTRY_DATA(rv)) {
+ mystrcat(*result, " ", MAXLNLEN);
+ mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN);
+ }
+ mystrcat(*result, "\n", MAXLNLEN);
+ return 0;
+ }
+
+ oldnumsyllable2 = numsyllable;
+ oldwordnum2 = wordnum;
+
+ // LANG_hu section: spec. Hungarian rule
+ if ((rv) && (langnum == LANG_hu) &&
+ (TESTAFF(rv->astr, 'I', rv->alen)) &&
+ !(TESTAFF(rv->astr, 'J', rv->alen))) {
+ numsyllable--;
+ }
+ // END of LANG_hu section
+ // increment word number, if the second root has a compoundroot flag
+ if ((rv) && (compoundroot) &&
+ (TESTAFF(rv->astr, compoundroot, rv->alen))) {
+ wordnum++;
+ }
+
+ // check forbiddenwords
+ if ((rv) && (rv->astr) &&
+ (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
+ TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen))) {
+ st[i] = ch;
+ continue;
+ }
+
+ // second word is acceptable, as a root?
+ // hungarian conventions: compounding is acceptable,
+ // when compound forms consist of 2 words, or if more,
+ // then the syllable number of root words must be 6, or lesser.
+ if ((rv) &&
+ ((compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
+ (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))) &&
+ (((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) ||
+ ((cpdmaxsyllable != 0) &&
+ (numsyllable + get_syllable(std::string(HENTRY_WORD(rv), rv->blen)) <=
+ cpdmaxsyllable))) &&
+ ((!checkcompounddup || (rv != rv_first)))) {
+ // bad compound word
+ mystrcat(*result, presult, MAXLNLEN);
+ mystrcat(*result, " ", MAXLNLEN);
+ mystrcat(*result, MORPH_PART, MAXLNLEN);
+ mystrcat(*result, word + i, MAXLNLEN);
+
+ if (HENTRY_DATA(rv)) {
+ if (complexprefixes)
+ mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN);
+ if (!HENTRY_FIND(rv, MORPH_STEM)) {
+ mystrcat(*result, " ", MAXLNLEN);
+ mystrcat(*result, MORPH_STEM, MAXLNLEN);
+ mystrcat(*result, HENTRY_WORD(rv), MAXLNLEN);
+ }
+ // store the pointer of the hash entry
+ // sprintf(*result + strlen(*result), "
+ // %s%p", MORPH_HENTRY, rv);
+ if (!complexprefixes) {
+ mystrcat(*result, " ", MAXLNLEN);
+ mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN);
+ }
+ }
+ mystrcat(*result, "\n", MAXLNLEN);
+ ok = 1;
+ }
+
+ numsyllable = oldnumsyllable2;
+ wordnum = oldwordnum2;
+
+ // perhaps second word has prefix or/and suffix
+ sfx = NULL;
+ sfxflag = FLAG_NULL;
+
+ if (compoundflag && !onlycpdrule)
+ rv = affix_check((word + i), strlen(word + i), compoundflag);
+ else
+ rv = NULL;
+
+ if (!rv && compoundend && !onlycpdrule) {
+ sfx = NULL;
+ pfx = NULL;
+ rv = affix_check((word + i), strlen(word + i), compoundend);
+ }
+
+ if (!rv && numdefcpd && words) {
+ rv = affix_check((word + i), strlen(word + i), 0, IN_CPD_END);
+ if (rv && words && defcpd_check(&words, wnum + 1, rv, NULL, 1)) {
+ char* m = NULL;
+ if (compoundflag)
+ m = affix_check_morph((word + i), strlen(word + i), compoundflag);
+ if ((!m || *m == '\0') && compoundend) {
+ if (m)
+ free(m);
+ m = affix_check_morph((word + i), strlen(word + i), compoundend);
+ }
+ mystrcat(*result, presult, MAXLNLEN);
+ if (m || (*m != '\0')) {
+ char m2[MAXLNLEN];
+ sprintf(m2, "%c%s%s%s", MSEP_FLD, MORPH_PART, word + i,
+ line_uniq_app(&m, MSEP_REC));
+ mystrcat(*result, m2, MAXLNLEN);
+ }
+ if (m)
+ free(m);
+ mystrcat(*result, "\n", MAXLNLEN);
+ ok = 1;
+ }
+ }
+
+ // check non_compound flag in suffix and prefix
+ if ((rv) &&
+ ((pfx && pfx->getCont() &&
+ TESTAFF(pfx->getCont(), compoundforbidflag, pfx->getContLen())) ||
+ (sfx && sfx->getCont() &&
+ TESTAFF(sfx->getCont(), compoundforbidflag,
+ sfx->getContLen())))) {
+ rv = NULL;
+ }
+
+ // check forbiddenwords
+ if ((rv) && (rv->astr) &&
+ (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
+ TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen)) &&
+ (!TESTAFF(rv->astr, needaffix, rv->alen))) {
+ st[i] = ch;
+ continue;
+ }
+
+ if (langnum == LANG_hu) {
+ // calculate syllable number of the word
+ numsyllable += get_syllable(word + i);
+
+ // - affix syllable num.
+ // XXX only second suffix (inflections, not derivations)
+ if (sfxappnd) {
+ std::string tmp(sfxappnd);
+ reverseword(tmp);
+ numsyllable -= get_syllable(tmp) + sfxextra;
+ }
+
+ // + 1 word, if syllable number of the prefix > 1 (hungarian
+ // convention)
+ if (pfx && (get_syllable(pfx->getKey()) > 1))
+ wordnum++;
+
+ // increment syllable num, if last word has a SYLLABLENUM flag
+ // and the suffix is beginning `s'
+
+ if (cpdsyllablenum) {
+ switch (sfxflag) {
+ case 'c': {
+ numsyllable += 2;
+ break;
+ }
+ case 'J': {
+ numsyllable += 1;
+ break;
+ }
+ case 'I': {
+ if (rv && TESTAFF(rv->astr, 'J', rv->alen))
+ numsyllable += 1;
+ break;
+ }
+ }
+ }
+ }
+
+ // increment word number, if the second word has a compoundroot flag
+ if ((rv) && (compoundroot) &&
+ (TESTAFF(rv->astr, compoundroot, rv->alen))) {
+ wordnum++;
+ }
+ // second word is acceptable, as a word with prefix or/and suffix?
+ // hungarian conventions: compounding is acceptable,
+ // when compound forms consist 2 word, otherwise
+ // the syllable number of root words is 6, or lesser.
+ if ((rv) &&
+ (((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) ||
+ ((cpdmaxsyllable != 0) && (numsyllable <= cpdmaxsyllable))) &&
+ ((!checkcompounddup || (rv != rv_first)))) {
+ char* m = NULL;
+ if (compoundflag)
+ m = affix_check_morph((word + i), strlen(word + i), compoundflag);
+ if ((!m || *m == '\0') && compoundend) {
+ if (m)
+ free(m);
+ m = affix_check_morph((word + i), strlen(word + i), compoundend);
+ }
+ mystrcat(*result, presult, MAXLNLEN);
+ if (m && (*m != '\0')) {
+ char m2[MAXLNLEN];
+ sprintf(m2, "%c%s%s%s", MSEP_FLD, MORPH_PART, word + i,
+ line_uniq_app(&m, MSEP_REC));
+ mystrcat(*result, m2, MAXLNLEN);
+ }
+ if (m)
+ free(m);
+ if (strlen(*result) + 1 < MAXLNLEN)
+ sprintf(*result + strlen(*result), "%c", MSEP_REC);
+ ok = 1;
+ }
+
+ numsyllable = oldnumsyllable2;
+ wordnum = oldwordnum2;
+
+ // perhaps second word is a compound word (recursive call)
+ if ((wordnum < maxwordnum) && (ok == 0)) {
+ compound_check_morph((word + i), strlen(word + i), wordnum + 1,
+ numsyllable, maxwordnum, wnum + 1, words, rwords, 0,
+ result, presult);
+ } else {
+ rv = NULL;
+ }
+ }
+ st[i] = ch;
+ wordnum = oldwordnum;
+ numsyllable = oldnumsyllable;
+
+ } while (numdefcpd && oldwordnum == 0 &&
+ onlycpdrule++ < 1); // end of onlycpd loop
+ }
+ return 0;
+}
+
+
+// return 1 if s1 (reversed) is a leading subset of end of s2
+/* inline int AffixMgr::isRevSubset(const char * s1, const char * end_of_s2, int
+ len)
+ {
+ while ((len > 0) && *s1 && (*s1 == *end_of_s2)) {
+ s1++;
+ end_of_s2--;
+ len--;
+ }
+ return (*s1 == '\0');
+ }
+ */
+
+inline int AffixMgr::isRevSubset(const char* s1,
+ const char* end_of_s2,
+ int len) {
+ while ((len > 0) && (*s1 != '\0') && ((*s1 == *end_of_s2) || (*s1 == '.'))) {
+ s1++;
+ end_of_s2--;
+ len--;
+ }
+ return (*s1 == '\0');
+}
+
+// check word for suffixes
+
+struct hentry* AffixMgr::suffix_check(const char* word,
+ int len,
+ int sfxopts,
+ PfxEntry* ppfx,
+ char** wlst,
+ int maxSug,
+ int* ns,
+ const FLAG cclass,
+ const FLAG needflag,
+ char in_compound) {
+ struct hentry* rv = NULL;
+ PfxEntry* ep = ppfx;
+
+ // first handle the special case of 0 length suffixes
+ SfxEntry* se = sStart[0];
+
+ while (se) {
+ if (!cclass || se->getCont()) {
+ // suffixes are not allowed in beginning of compounds
+ if ((((in_compound != IN_CPD_BEGIN)) || // && !cclass
+ // except when signed with compoundpermitflag flag
+ (se->getCont() && compoundpermitflag &&
+ TESTAFF(se->getCont(), compoundpermitflag, se->getContLen()))) &&
+ (!circumfix ||
+ // no circumfix flag in prefix and suffix
+ ((!ppfx || !(ep->getCont()) ||
+ !TESTAFF(ep->getCont(), circumfix, ep->getContLen())) &&
+ (!se->getCont() ||
+ !(TESTAFF(se->getCont(), circumfix, se->getContLen())))) ||
+ // circumfix flag in prefix AND suffix
+ ((ppfx && (ep->getCont()) &&
+ TESTAFF(ep->getCont(), circumfix, ep->getContLen())) &&
+ (se->getCont() &&
+ (TESTAFF(se->getCont(), circumfix, se->getContLen()))))) &&
+ // fogemorpheme
+ (in_compound ||
+ !(se->getCont() &&
+ (TESTAFF(se->getCont(), onlyincompound, se->getContLen())))) &&
+ // needaffix on prefix or first suffix
+ (cclass ||
+ !(se->getCont() &&
+ TESTAFF(se->getCont(), needaffix, se->getContLen())) ||
+ (ppfx &&
+ !((ep->getCont()) &&
+ TESTAFF(ep->getCont(), needaffix, ep->getContLen()))))) {
+ rv = se->checkword(word, len, sfxopts, ppfx, wlst, maxSug, ns,
+ (FLAG)cclass, needflag,
+ (in_compound ? 0 : onlyincompound));
+ if (rv) {
+ sfx = se; // BUG: sfx not stateless
+ return rv;
+ }
+ }
+ }
+ se = se->getNext();
+ }
+
+ // now handle the general case
+ if (len == 0)
+ return NULL; // FULLSTRIP
+ unsigned char sp = *((const unsigned char*)(word + len - 1));
+ SfxEntry* sptr = sStart[sp];
+
+ while (sptr) {
+ if (isRevSubset(sptr->getKey(), word + len - 1, len)) {
+ // suffixes are not allowed in beginning of compounds
+ if ((((in_compound != IN_CPD_BEGIN)) || // && !cclass
+ // except when signed with compoundpermitflag flag
+ (sptr->getCont() && compoundpermitflag &&
+ TESTAFF(sptr->getCont(), compoundpermitflag,
+ sptr->getContLen()))) &&
+ (!circumfix ||
+ // no circumfix flag in prefix and suffix
+ ((!ppfx || !(ep->getCont()) ||
+ !TESTAFF(ep->getCont(), circumfix, ep->getContLen())) &&
+ (!sptr->getCont() ||
+ !(TESTAFF(sptr->getCont(), circumfix, sptr->getContLen())))) ||
+ // circumfix flag in prefix AND suffix
+ ((ppfx && (ep->getCont()) &&
+ TESTAFF(ep->getCont(), circumfix, ep->getContLen())) &&
+ (sptr->getCont() &&
+ (TESTAFF(sptr->getCont(), circumfix, sptr->getContLen()))))) &&
+ // fogemorpheme
+ (in_compound ||
+ !((sptr->getCont() && (TESTAFF(sptr->getCont(), onlyincompound,
+ sptr->getContLen()))))) &&
+ // needaffix on prefix or first suffix
+ (cclass ||
+ !(sptr->getCont() &&
+ TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())) ||
+ (ppfx &&
+ !((ep->getCont()) &&
+ TESTAFF(ep->getCont(), needaffix, ep->getContLen())))))
+ if (in_compound != IN_CPD_END || ppfx ||
+ !(sptr->getCont() &&
+ TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))) {
+ rv = sptr->checkword(word, len, sfxopts, ppfx, wlst, maxSug, ns,
+ cclass, needflag,
+ (in_compound ? 0 : onlyincompound));
+ if (rv) {
+ sfx = sptr; // BUG: sfx not stateless
+ sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
+ if (!sptr->getCont())
+ sfxappnd = sptr->getKey(); // BUG: sfxappnd not stateless
+ // LANG_hu section: spec. Hungarian rule
+ else if (langnum == LANG_hu && sptr->getKeyLen() &&
+ sptr->getKey()[0] == 'i' && sptr->getKey()[1] != 'y' &&
+ sptr->getKey()[1] != 't') {
+ sfxextra = 1;
+ }
+ // END of LANG_hu section
+ return rv;
+ }
+ }
+ sptr = sptr->getNextEQ();
+ } else {
+ sptr = sptr->getNextNE();
+ }
+ }
+
+ return NULL;
+}
+
+// check word for two-level suffixes
+
+struct hentry* AffixMgr::suffix_check_twosfx(const char* word,
+ int len,
+ int sfxopts,
+ PfxEntry* ppfx,
+ const FLAG needflag) {
+ struct hentry* rv = NULL;
+
+ // first handle the special case of 0 length suffixes
+ SfxEntry* se = sStart[0];
+ while (se) {
+ if (contclasses[se->getFlag()]) {
+ rv = se->check_twosfx(word, len, sfxopts, ppfx, needflag);
+ if (rv)
+ return rv;
+ }
+ se = se->getNext();
+ }
+
+ // now handle the general case
+ if (len == 0)
+ return NULL; // FULLSTRIP
+ unsigned char sp = *((const unsigned char*)(word + len - 1));
+ SfxEntry* sptr = sStart[sp];
+
+ while (sptr) {
+ if (isRevSubset(sptr->getKey(), word + len - 1, len)) {
+ if (contclasses[sptr->getFlag()]) {
+ rv = sptr->check_twosfx(word, len, sfxopts, ppfx, needflag);
+ if (rv) {
+ sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
+ if (!sptr->getCont())
+ sfxappnd = sptr->getKey(); // BUG: sfxappnd not stateless
+ return rv;
+ }
+ }
+ sptr = sptr->getNextEQ();
+ } else {
+ sptr = sptr->getNextNE();
+ }
+ }
+
+ return NULL;
+}
+
+char* AffixMgr::suffix_check_twosfx_morph(const char* word,
+ int len,
+ int sfxopts,
+ PfxEntry* ppfx,
+ const FLAG needflag) {
+ std::string result;
+ std::string result2;
+ std::string result3;
+
+ char* st;
+
+ // first handle the special case of 0 length suffixes
+ SfxEntry* se = sStart[0];
+ while (se) {
+ if (contclasses[se->getFlag()]) {
+ st = se->check_twosfx_morph(word, len, sfxopts, ppfx, needflag);
+ if (st) {
+ if (ppfx) {
+ if (ppfx->getMorph()) {
+ result.append(ppfx->getMorph());
+ result.append(" ");
+ } else
+ debugflag(result, ppfx->getFlag());
+ }
+ result.append(st);
+ free(st);
+ if (se->getMorph()) {
+ result.append(" ");
+ result.append(se->getMorph());
+ } else
+ debugflag(result, se->getFlag());
+ result.append("\n");
+ }
+ }
+ se = se->getNext();
+ }
+
+ // now handle the general case
+ if (len == 0)
+ return NULL; // FULLSTRIP
+ unsigned char sp = *((const unsigned char*)(word + len - 1));
+ SfxEntry* sptr = sStart[sp];
+
+ while (sptr) {
+ if (isRevSubset(sptr->getKey(), word + len - 1, len)) {
+ if (contclasses[sptr->getFlag()]) {
+ st = sptr->check_twosfx_morph(word, len, sfxopts, ppfx, needflag);
+ if (st) {
+ sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
+ if (!sptr->getCont())
+ sfxappnd = sptr->getKey(); // BUG: sfxappnd not stateless
+ result2.assign(st);
+ free(st);
+
+ result3.clear();
+
+ if (sptr->getMorph()) {
+ result3.append(" ");
+ result3.append(sptr->getMorph());
+ } else
+ debugflag(result3, sptr->getFlag());
+ strlinecat(result2, result3);
+ result2.append("\n");
+ result.append(result2);
+ }
+ }
+ sptr = sptr->getNextEQ();
+ } else {
+ sptr = sptr->getNextNE();
+ }
+ }
+
+ if (!result.empty())
+ return mystrdup(result.c_str());
+
+ return NULL;
+}
+
+char* AffixMgr::suffix_check_morph(const char* word,
+ int len,
+ int sfxopts,
+ PfxEntry* ppfx,
+ const FLAG cclass,
+ const FLAG needflag,
+ char in_compound) {
+ char result[MAXLNLEN];
+
+ struct hentry* rv = NULL;
+
+ result[0] = '\0';
+
+ PfxEntry* ep = ppfx;
+
+ // first handle the special case of 0 length suffixes
+ SfxEntry* se = sStart[0];
+ while (se) {
+ if (!cclass || se->getCont()) {
+ // suffixes are not allowed in beginning of compounds
+ if (((((in_compound != IN_CPD_BEGIN)) || // && !cclass
+ // except when signed with compoundpermitflag flag
+ (se->getCont() && compoundpermitflag &&
+ TESTAFF(se->getCont(), compoundpermitflag, se->getContLen()))) &&
+ (!circumfix ||
+ // no circumfix flag in prefix and suffix
+ ((!ppfx || !(ep->getCont()) ||
+ !TESTAFF(ep->getCont(), circumfix, ep->getContLen())) &&
+ (!se->getCont() ||
+ !(TESTAFF(se->getCont(), circumfix, se->getContLen())))) ||
+ // circumfix flag in prefix AND suffix
+ ((ppfx && (ep->getCont()) &&
+ TESTAFF(ep->getCont(), circumfix, ep->getContLen())) &&
+ (se->getCont() &&
+ (TESTAFF(se->getCont(), circumfix, se->getContLen()))))) &&
+ // fogemorpheme
+ (in_compound ||
+ !((se->getCont() &&
+ (TESTAFF(se->getCont(), onlyincompound, se->getContLen()))))) &&
+ // needaffix on prefix or first suffix
+ (cclass ||
+ !(se->getCont() &&
+ TESTAFF(se->getCont(), needaffix, se->getContLen())) ||
+ (ppfx &&
+ !((ep->getCont()) &&
+ TESTAFF(ep->getCont(), needaffix, ep->getContLen()))))))
+ rv = se->checkword(word, len, sfxopts, ppfx, NULL, 0, 0, cclass,
+ needflag);
+ while (rv) {
+ if (ppfx) {
+ if (ppfx->getMorph()) {
+ mystrcat(result, ppfx->getMorph(), MAXLNLEN);
+ mystrcat(result, " ", MAXLNLEN);
+ } else
+ debugflag(result, ppfx->getFlag());
+ }
+ if (complexprefixes && HENTRY_DATA(rv))
+ mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN);
+ if (!HENTRY_FIND(rv, MORPH_STEM)) {
+ mystrcat(result, " ", MAXLNLEN);
+ mystrcat(result, MORPH_STEM, MAXLNLEN);
+ mystrcat(result, HENTRY_WORD(rv), MAXLNLEN);
+ }
+ // store the pointer of the hash entry
+ // sprintf(result + strlen(result), " %s%p", MORPH_HENTRY,
+ // rv);
+
+ if (!complexprefixes && HENTRY_DATA(rv)) {
+ mystrcat(result, " ", MAXLNLEN);
+ mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN);
+ }
+ if (se->getMorph()) {
+ mystrcat(result, " ", MAXLNLEN);
+ mystrcat(result, se->getMorph(), MAXLNLEN);
+ } else
+ debugflag(result, se->getFlag());
+ mystrcat(result, "\n", MAXLNLEN);
+ rv = se->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag);
+ }
+ }
+ se = se->getNext();
+ }
+
+ // now handle the general case
+ if (len == 0)
+ return NULL; // FULLSTRIP
+ unsigned char sp = *((const unsigned char*)(word + len - 1));
+ SfxEntry* sptr = sStart[sp];
+
+ while (sptr) {
+ if (isRevSubset(sptr->getKey(), word + len - 1, len)) {
+ // suffixes are not allowed in beginning of compounds
+ if (((((in_compound != IN_CPD_BEGIN)) || // && !cclass
+ // except when signed with compoundpermitflag flag
+ (sptr->getCont() && compoundpermitflag &&
+ TESTAFF(sptr->getCont(), compoundpermitflag,
+ sptr->getContLen()))) &&
+ (!circumfix ||
+ // no circumfix flag in prefix and suffix
+ ((!ppfx || !(ep->getCont()) ||
+ !TESTAFF(ep->getCont(), circumfix, ep->getContLen())) &&
+ (!sptr->getCont() ||
+ !(TESTAFF(sptr->getCont(), circumfix, sptr->getContLen())))) ||
+ // circumfix flag in prefix AND suffix
+ ((ppfx && (ep->getCont()) &&
+ TESTAFF(ep->getCont(), circumfix, ep->getContLen())) &&
+ (sptr->getCont() &&
+ (TESTAFF(sptr->getCont(), circumfix, sptr->getContLen()))))) &&
+ // fogemorpheme
+ (in_compound ||
+ !((sptr->getCont() && (TESTAFF(sptr->getCont(), onlyincompound,
+ sptr->getContLen()))))) &&
+ // needaffix on first suffix
+ (cclass ||
+ !(sptr->getCont() &&
+ TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())))))
+ rv = sptr->checkword(word, len, sfxopts, ppfx, NULL, 0, 0, cclass,
+ needflag);
+ while (rv) {
+ if (ppfx) {
+ if (ppfx->getMorph()) {
+ mystrcat(result, ppfx->getMorph(), MAXLNLEN);
+ mystrcat(result, " ", MAXLNLEN);
+ } else
+ debugflag(result, ppfx->getFlag());
+ }
+ if (complexprefixes && HENTRY_DATA(rv))
+ mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN);
+ if (!HENTRY_FIND(rv, MORPH_STEM)) {
+ mystrcat(result, " ", MAXLNLEN);
+ mystrcat(result, MORPH_STEM, MAXLNLEN);
+ mystrcat(result, HENTRY_WORD(rv), MAXLNLEN);
+ }
+ // store the pointer of the hash entry
+ // sprintf(result + strlen(result), " %s%p",
+ // MORPH_HENTRY, rv);
+
+ if (!complexprefixes && HENTRY_DATA(rv)) {
+ mystrcat(result, " ", MAXLNLEN);
+ mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN);
+ }
+
+ if (sptr->getMorph()) {
+ mystrcat(result, " ", MAXLNLEN);
+ mystrcat(result, sptr->getMorph(), MAXLNLEN);
+ } else
+ debugflag(result, sptr->getFlag());
+ mystrcat(result, "\n", MAXLNLEN);
+ rv = sptr->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag);
+ }
+ sptr = sptr->getNextEQ();
+ } else {
+ sptr = sptr->getNextNE();
+ }
+ }
+
+ if (*result)
+ return mystrdup(result);
+ return NULL;
+}
+
+// check if word with affixes is correctly spelled
+struct hentry* AffixMgr::affix_check(const char* word,
+ int len,
+ const FLAG needflag,
+ char in_compound) {
+ struct hentry* rv = NULL;
+
+ // check all prefixes (also crossed with suffixes if allowed)
+ rv = prefix_check(word, len, in_compound, needflag);
+ if (rv)
+ return rv;
+
+ // if still not found check all suffixes
+ rv = suffix_check(word, len, 0, NULL, NULL, 0, NULL, FLAG_NULL, needflag,
+ in_compound);
+
+ if (havecontclass) {
+ sfx = NULL;
+ pfx = NULL;
+
+ if (rv)
+ return rv;
+ // if still not found check all two-level suffixes
+ rv = suffix_check_twosfx(word, len, 0, NULL, needflag);
+
+ if (rv)
+ return rv;
+ // if still not found check all two-level suffixes
+ rv = prefix_check_twosfx(word, len, IN_CPD_NOT, needflag);
+ }
+
+ return rv;
+}
+
+// check if word with affixes is correctly spelled
+char* AffixMgr::affix_check_morph(const char* word,
+ int len,
+ const FLAG needflag,
+ char in_compound) {
+ char result[MAXLNLEN];
+ char* st = NULL;
+
+ *result = '\0';
+
+ // check all prefixes (also crossed with suffixes if allowed)
+ st = prefix_check_morph(word, len, in_compound);
+ if (st) {
+ mystrcat(result, st, MAXLNLEN);
+ free(st);
+ }
+
+ // if still not found check all suffixes
+ st = suffix_check_morph(word, len, 0, NULL, '\0', needflag, in_compound);
+ if (st) {
+ mystrcat(result, st, MAXLNLEN);
+ free(st);
+ }
+
+ if (havecontclass) {
+ sfx = NULL;
+ pfx = NULL;
+ // if still not found check all two-level suffixes
+ st = suffix_check_twosfx_morph(word, len, 0, NULL, needflag);
+ if (st) {
+ mystrcat(result, st, MAXLNLEN);
+ free(st);
+ }
+
+ // if still not found check all two-level suffixes
+ st = prefix_check_twosfx_morph(word, len, IN_CPD_NOT, needflag);
+ if (st) {
+ mystrcat(result, st, MAXLNLEN);
+ free(st);
+ }
+ }
+
+ return mystrdup(result);
+}
+
+char* AffixMgr::morphgen(const char* ts,
+ int wl,
+ const unsigned short* ap,
+ unsigned short al,
+ const char* morph,
+ const char* targetmorph,
+ int level) {
+ // handle suffixes
+ if (!morph)
+ return NULL;
+
+ // check substandard flag
+ if (TESTAFF(ap, substandard, al))
+ return NULL;
+
+ if (morphcmp(morph, targetmorph) == 0)
+ return mystrdup(ts);
+
+ size_t stemmorphcatpos;
+ std::string mymorph;
+
+ // use input suffix fields, if exist
+ if (strstr(morph, MORPH_INFL_SFX) || strstr(morph, MORPH_DERI_SFX)) {
+ mymorph.assign(morph);
+ mymorph.append(" ");
+ stemmorphcatpos = mymorph.size();
+ } else {
+ stemmorphcatpos = std::string::npos;
+ }
+
+ for (int i = 0; i < al; i++) {
+ const unsigned char c = (unsigned char)(ap[i] & 0x00FF);
+ SfxEntry* sptr = sFlag[c];
+ while (sptr) {
+ if (sptr->getFlag() == ap[i] && sptr->getMorph() &&
+ ((sptr->getContLen() == 0) ||
+ // don't generate forms with substandard affixes
+ !TESTAFF(sptr->getCont(), substandard, sptr->getContLen()))) {
+ const char* stemmorph;
+ if (stemmorphcatpos != std::string::npos) {
+ mymorph.replace(stemmorphcatpos, std::string::npos, sptr->getMorph());
+ stemmorph = mymorph.c_str();
+ } else {
+ stemmorph = sptr->getMorph();
+ }
+
+ int cmp = morphcmp(stemmorph, targetmorph);
+
+ if (cmp == 0) {
+ char* newword = sptr->add(ts, wl);
+ if (newword) {
+ hentry* check = pHMgr->lookup(newword); // XXX extra dic
+ if (!check || !check->astr ||
+ !(TESTAFF(check->astr, forbiddenword, check->alen) ||
+ TESTAFF(check->astr, ONLYUPCASEFLAG, check->alen))) {
+ return newword;
+ }
+ free(newword);
+ }
+ }
+
+ // recursive call for secondary suffixes
+ if ((level == 0) && (cmp == 1) && (sptr->getContLen() > 0) &&
+ // (get_sfxcount(stemmorph) < targetcount) &&
+ !TESTAFF(sptr->getCont(), substandard, sptr->getContLen())) {
+ char* newword = sptr->add(ts, wl);
+ if (newword) {
+ char* newword2 =
+ morphgen(newword, strlen(newword), sptr->getCont(),
+ sptr->getContLen(), stemmorph, targetmorph, 1);
+
+ if (newword2) {
+ free(newword);
+ return newword2;
+ }
+ free(newword);
+ newword = NULL;
+ }
+ }
+ }
+ sptr = sptr->getFlgNxt();
+ }
+ }
+ return NULL;
+}
+
+int AffixMgr::expand_rootword(struct guessword* wlst,
+ int maxn,
+ const char* ts,
+ int wl,
+ const unsigned short* ap,
+ unsigned short al,
+ const char* bad,
+ int badl,
+ const char* phon) {
+ int nh = 0;
+ // first add root word to list
+ if ((nh < maxn) &&
+ !(al && ((needaffix && TESTAFF(ap, needaffix, al)) ||
+ (onlyincompound && TESTAFF(ap, onlyincompound, al))))) {
+ wlst[nh].word = mystrdup(ts);
+ if (!wlst[nh].word)
+ return 0;
+ wlst[nh].allow = (1 == 0);
+ wlst[nh].orig = NULL;
+ nh++;
+ // add special phonetic version
+ if (phon && (nh < maxn)) {
+ wlst[nh].word = mystrdup(phon);
+ if (!wlst[nh].word)
+ return nh - 1;
+ wlst[nh].allow = (1 == 0);
+ wlst[nh].orig = mystrdup(ts);
+ if (!wlst[nh].orig)
+ return nh - 1;
+ nh++;
+ }
+ }
+
+ // handle suffixes
+ for (int i = 0; i < al; i++) {
+ const unsigned char c = (unsigned char)(ap[i] & 0x00FF);
+ SfxEntry* sptr = sFlag[c];
+ while (sptr) {
+ if ((sptr->getFlag() == ap[i]) &&
+ (!sptr->getKeyLen() ||
+ ((badl > sptr->getKeyLen()) &&
+ (strcmp(sptr->getAffix(), bad + badl - sptr->getKeyLen()) == 0))) &&
+ // check needaffix flag
+ !(sptr->getCont() &&
+ ((needaffix &&
+ TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())) ||
+ (circumfix &&
+ TESTAFF(sptr->getCont(), circumfix, sptr->getContLen())) ||
+ (onlyincompound &&
+ TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))))) {
+ char* newword = sptr->add(ts, wl);
+ if (newword) {
+ if (nh < maxn) {
+ wlst[nh].word = newword;
+ wlst[nh].allow = sptr->allowCross();
+ wlst[nh].orig = NULL;
+ nh++;
+ // add special phonetic version
+ if (phon && (nh < maxn)) {
+ std::string prefix(phon);
+ std::string key(sptr->getKey());
+ reverseword(key);
+ prefix.append(key);
+ wlst[nh].word = mystrdup(prefix.c_str());
+ if (!wlst[nh].word)
+ return nh - 1;
+ wlst[nh].allow = (1 == 0);
+ wlst[nh].orig = mystrdup(newword);
+ if (!wlst[nh].orig)
+ return nh - 1;
+ nh++;
+ }
+ } else {
+ free(newword);
+ }
+ }
+ }
+ sptr = sptr->getFlgNxt();
+ }
+ }
+
+ int n = nh;
+
+ // handle cross products of prefixes and suffixes
+ for (int j = 1; j < n; j++)
+ if (wlst[j].allow) {
+ for (int k = 0; k < al; k++) {
+ const unsigned char c = (unsigned char)(ap[k] & 0x00FF);
+ PfxEntry* cptr = pFlag[c];
+ while (cptr) {
+ if ((cptr->getFlag() == ap[k]) && cptr->allowCross() &&
+ (!cptr->getKeyLen() ||
+ ((badl > cptr->getKeyLen()) &&
+ (strncmp(cptr->getKey(), bad, cptr->getKeyLen()) == 0)))) {
+ int l1 = strlen(wlst[j].word);
+ char* newword = cptr->add(wlst[j].word, l1);
+ if (newword) {
+ if (nh < maxn) {
+ wlst[nh].word = newword;
+ wlst[nh].allow = cptr->allowCross();
+ wlst[nh].orig = NULL;
+ nh++;
+ } else {
+ free(newword);
+ }
+ }
+ }
+ cptr = cptr->getFlgNxt();
+ }
+ }
+ }
+
+ // now handle pure prefixes
+ for (int m = 0; m < al; m++) {
+ const unsigned char c = (unsigned char)(ap[m] & 0x00FF);
+ PfxEntry* ptr = pFlag[c];
+ while (ptr) {
+ if ((ptr->getFlag() == ap[m]) &&
+ (!ptr->getKeyLen() ||
+ ((badl > ptr->getKeyLen()) &&
+ (strncmp(ptr->getKey(), bad, ptr->getKeyLen()) == 0))) &&
+ // check needaffix flag
+ !(ptr->getCont() &&
+ ((needaffix &&
+ TESTAFF(ptr->getCont(), needaffix, ptr->getContLen())) ||
+ (circumfix &&
+ TESTAFF(ptr->getCont(), circumfix, ptr->getContLen())) ||
+ (onlyincompound &&
+ TESTAFF(ptr->getCont(), onlyincompound, ptr->getContLen()))))) {
+ char* newword = ptr->add(ts, wl);
+ if (newword) {
+ if (nh < maxn) {
+ wlst[nh].word = newword;
+ wlst[nh].allow = ptr->allowCross();
+ wlst[nh].orig = NULL;
+ nh++;
+ } else {
+ free(newword);
+ }
+ }
+ }
+ ptr = ptr->getFlgNxt();
+ }
+ }
+
+ return nh;
+}
+
+// return length of replacing table
+int AffixMgr::get_numrep() const {
+ return numrep;
+}
+
+// return replacing table
+struct replentry* AffixMgr::get_reptable() const {
+ if (!reptable)
+ return NULL;
+ return reptable;
+}
+
+// return iconv table
+RepList* AffixMgr::get_iconvtable() const {
+ if (!iconvtable)
+ return NULL;
+ return iconvtable;
+}
+
+// return oconv table
+RepList* AffixMgr::get_oconvtable() const {
+ if (!oconvtable)
+ return NULL;
+ return oconvtable;
+}
+
+// return replacing table
+struct phonetable* AffixMgr::get_phonetable() const {
+ if (!phone)
+ return NULL;
+ return phone;
+}
+
+// return length of character map table
+int AffixMgr::get_nummap() const {
+ return nummap;
+}
+
+// return character map table
+struct mapentry* AffixMgr::get_maptable() const {
+ if (!maptable)
+ return NULL;
+ return maptable;
+}
+
+// return length of word break table
+int AffixMgr::get_numbreak() const {
+ return numbreak;
+}
+
+// return character map table
+char** AffixMgr::get_breaktable() const {
+ if (!breaktable)
+ return NULL;
+ return breaktable;
+}
+
+// return text encoding of dictionary
+char* AffixMgr::get_encoding() {
+ if (!encoding)
+ encoding = mystrdup(SPELL_ENCODING);
+ return mystrdup(encoding);
+}
+
+// return text encoding of dictionary
+int AffixMgr::get_langnum() const {
+ return langnum;
+}
+
+// return double prefix option
+int AffixMgr::get_complexprefixes() const {
+ return complexprefixes;
+}
+
+// return FULLSTRIP option
+int AffixMgr::get_fullstrip() const {
+ return fullstrip;
+}
+
+FLAG AffixMgr::get_keepcase() const {
+ return keepcase;
+}
+
+FLAG AffixMgr::get_forceucase() const {
+ return forceucase;
+}
+
+FLAG AffixMgr::get_warn() const {
+ return warn;
+}
+
+int AffixMgr::get_forbidwarn() const {
+ return forbidwarn;
+}
+
+int AffixMgr::get_checksharps() const {
+ return checksharps;
+}
+
+char* AffixMgr::encode_flag(unsigned short aflag) const {
+ return pHMgr->encode_flag(aflag);
+}
+
+// return the preferred ignore string for suggestions
+char* AffixMgr::get_ignore() const {
+ if (!ignorechars)
+ return NULL;
+ return ignorechars;
+}
+
+// return the preferred ignore string for suggestions
+const std::vector<w_char>& AffixMgr::get_ignore_utf16() const {
+ return ignorechars_utf16;
+}
+
+// return the keyboard string for suggestions
+char* AffixMgr::get_key_string() {
+ if (!keystring)
+ keystring = mystrdup(SPELL_KEYSTRING);
+ return mystrdup(keystring);
+}
+
+// return the preferred try string for suggestions
+char* AffixMgr::get_try_string() const {
+ if (!trystring)
+ return NULL;
+ return mystrdup(trystring);
+}
+
+// return the preferred try string for suggestions
+const char* AffixMgr::get_wordchars() const {
+ return wordchars;
+}
+
+const std::vector<w_char>& AffixMgr::get_wordchars_utf16() const {
+ return wordchars_utf16;
+}
+
+// is there compounding?
+int AffixMgr::get_compound() const {
+ return compoundflag || compoundbegin || numdefcpd;
+}
+
+// return the compound words control flag
+FLAG AffixMgr::get_compoundflag() const {
+ return compoundflag;
+}
+
+// return the forbidden words control flag
+FLAG AffixMgr::get_forbiddenword() const {
+ return forbiddenword;
+}
+
+// return the forbidden words control flag
+FLAG AffixMgr::get_nosuggest() const {
+ return nosuggest;
+}
+
+// return the forbidden words control flag
+FLAG AffixMgr::get_nongramsuggest() const {
+ return nongramsuggest;
+}
+
+// return the forbidden words flag modify flag
+FLAG AffixMgr::get_needaffix() const {
+ return needaffix;
+}
+
+// return the onlyincompound flag
+FLAG AffixMgr::get_onlyincompound() const {
+ return onlyincompound;
+}
+
+// return the compound word signal flag
+FLAG AffixMgr::get_compoundroot() const {
+ return compoundroot;
+}
+
+// return the compound begin signal flag
+FLAG AffixMgr::get_compoundbegin() const {
+ return compoundbegin;
+}
+
+// return the value of checknum
+int AffixMgr::get_checknum() const {
+ return checknum;
+}
+
+// return the value of prefix
+const char* AffixMgr::get_prefix() const {
+ if (pfx)
+ return pfx->getKey();
+ return NULL;
+}
+
+// return the value of suffix
+const char* AffixMgr::get_suffix() const {
+ return sfxappnd;
+}
+
+// return the value of suffix
+const char* AffixMgr::get_version() const {
+ return version;
+}
+
+// return lemma_present flag
+FLAG AffixMgr::get_lemma_present() const {
+ return lemma_present;
+}
+
+// utility method to look up root words in hash table
+struct hentry* AffixMgr::lookup(const char* word) {
+ int i;
+ struct hentry* he = NULL;
+ for (i = 0; i < *maxdic && !he; i++) {
+ he = (alldic[i])->lookup(word);
+ }
+ return he;
+}
+
+// return the value of suffix
+int AffixMgr::have_contclass() const {
+ return havecontclass;
+}
+
+// return utf8
+int AffixMgr::get_utf8() const {
+ return utf8;
+}
+
+int AffixMgr::get_maxngramsugs(void) const {
+ return maxngramsugs;
+}
+
+int AffixMgr::get_maxcpdsugs(void) const {
+ return maxcpdsugs;
+}
+
+int AffixMgr::get_maxdiff(void) const {
+ return maxdiff;
+}
+
+int AffixMgr::get_onlymaxdiff(void) const {
+ return onlymaxdiff;
+}
+
+// return nosplitsugs
+int AffixMgr::get_nosplitsugs(void) const {
+ return nosplitsugs;
+}
+
+// return sugswithdots
+int AffixMgr::get_sugswithdots(void) const {
+ return sugswithdots;
+}
+
+/* parse flag */
+int AffixMgr::parse_flag(char* line, unsigned short* out, FileMgr* af) {
+ char* s = NULL;
+ if (*out != FLAG_NULL && !(*out >= DEFAULTFLAGS)) {
+ HUNSPELL_WARNING(
+ stderr,
+ "error: line %d: multiple definitions of an affix file parameter\n",
+ af->getlinenum());
+ return 1;
+ }
+ if (parse_string(line, &s, af->getlinenum()))
+ return 1;
+ *out = pHMgr->decode_flag(s);
+ free(s);
+ return 0;
+}
+
+/* parse num */
+int AffixMgr::parse_num(char* line, int* out, FileMgr* af) {
+ char* s = NULL;
+ if (*out != -1) {
+ HUNSPELL_WARNING(
+ stderr,
+ "error: line %d: multiple definitions of an affix file parameter\n",
+ af->getlinenum());
+ return 1;
+ }
+ if (parse_string(line, &s, af->getlinenum()))
+ return 1;
+ *out = atoi(s);
+ free(s);
+ return 0;
+}
+
+/* parse in the max syllablecount of compound words and */
+int AffixMgr::parse_cpdsyllable(char* line, FileMgr* af) {
+ char* tp = line;
+ char* piece;
+ int i = 0;
+ int np = 0;
+ piece = mystrsep(&tp, 0);
+ while (piece) {
+ if (*piece != '\0') {
+ switch (i) {
+ case 0: {
+ np++;
+ break;
+ }
+ case 1: {
+ cpdmaxsyllable = atoi(piece);
+ np++;
+ break;
+ }
+ case 2: {
+ if (!utf8) {
+ cpdvowels = mystrdup(piece);
+ } else {
+ std::vector<w_char> w;
+ u8_u16(w, piece);
+ if (!w.empty()) {
+ std::sort(w.begin(), w.end());
+ cpdvowels_utf16 = (w_char*)malloc(w.size() * sizeof(w_char));
+ if (!cpdvowels_utf16)
+ return 1;
+ memcpy(cpdvowels_utf16, &w[0], w.size());
+ }
+ cpdvowels_utf16_len = w.size();
+ }
+ np++;
+ break;
+ }
+ default:
+ break;
+ }
+ i++;
+ }
+ piece = mystrsep(&tp, 0);
+ }
+ if (np < 2) {
+ HUNSPELL_WARNING(stderr,
+ "error: line %d: missing compoundsyllable information\n",
+ af->getlinenum());
+ return 1;
+ }
+ if (np == 2)
+ cpdvowels = mystrdup("aeiouAEIOU");
+ return 0;
+}
+
+/* parse in the typical fault correcting table */
+int AffixMgr::parse_reptable(char* line, FileMgr* af) {
+ if (numrep != 0) {
+ HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n",
+ af->getlinenum());
+ return 1;
+ }
+ char* tp = line;
+ char* piece;
+ int i = 0;
+ int np = 0;
+ piece = mystrsep(&tp, 0);
+ while (piece) {
+ if (*piece != '\0') {
+ switch (i) {
+ case 0: {
+ np++;
+ break;
+ }
+ case 1: {
+ numrep = atoi(piece);
+ if (numrep < 1) {
+ HUNSPELL_WARNING(stderr, "error: line %d: incorrect entry number\n",
+ af->getlinenum());
+ return 1;
+ }
+ reptable = (replentry*)malloc(numrep * sizeof(struct replentry));
+ if (!reptable)
+ return 1;
+ np++;
+ break;
+ }
+ default:
+ break;
+ }
+ i++;
+ }
+ piece = mystrsep(&tp, 0);
+ }
+ if (np != 2) {
+ HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
+ af->getlinenum());
+ return 1;
+ }
+
+ /* now parse the numrep lines to read in the remainder of the table */
+ char* nl;
+ for (int j = 0; j < numrep; j++) {
+ if ((nl = af->getline()) == NULL)
+ return 1;
+ mychomp(nl);
+ tp = nl;
+ i = 0;
+ reptable[j].pattern = NULL;
+ reptable[j].pattern2 = NULL;
+ piece = mystrsep(&tp, 0);
+ while (piece) {
+ if (*piece != '\0') {
+ switch (i) {
+ case 0: {
+ if (strncmp(piece, "REP", 3) != 0) {
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ numrep = 0;
+ return 1;
+ }
+ break;
+ }
+ case 1: {
+ if (*piece == '^')
+ reptable[j].start = true;
+ else
+ reptable[j].start = false;
+ reptable[j].pattern =
+ mystrrep(mystrdup(piece + int(reptable[j].start)), "_", " ");
+ int lr = strlen(reptable[j].pattern) - 1;
+ if (reptable[j].pattern[lr] == '$') {
+ reptable[j].end = true;
+ reptable[j].pattern[lr] = '\0';
+ } else
+ reptable[j].end = false;
+ break;
+ }
+ case 2: {
+ reptable[j].pattern2 = mystrrep(mystrdup(piece), "_", " ");
+ break;
+ }
+ default:
+ break;
+ }
+ i++;
+ }
+ piece = mystrsep(&tp, 0);
+ }
+ if ((!(reptable[j].pattern)) || (!(reptable[j].pattern2))) {
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ numrep = 0;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* parse in the typical fault correcting table */
+int AffixMgr::parse_convtable(char* line,
+ FileMgr* af,
+ RepList** rl,
+ const char* keyword) {
+ if (*rl) {
+ HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n",
+ af->getlinenum());
+ return 1;
+ }
+ char* tp = line;
+ char* piece;
+ int i = 0;
+ int np = 0;
+ int numrl = 0;
+ piece = mystrsep(&tp, 0);
+ while (piece) {
+ if (*piece != '\0') {
+ switch (i) {
+ case 0: {
+ np++;
+ break;
+ }
+ case 1: {
+ numrl = atoi(piece);
+ if (numrl < 1) {
+ HUNSPELL_WARNING(stderr, "error: line %d: incorrect entry number\n",
+ af->getlinenum());
+ return 1;
+ }
+ *rl = new RepList(numrl);
+ if (!*rl)
+ return 1;
+ np++;
+ break;
+ }
+ default:
+ break;
+ }
+ i++;
+ }
+ piece = mystrsep(&tp, 0);
+ }
+ if (np != 2) {
+ HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
+ af->getlinenum());
+ return 1;
+ }
+
+ /* now parse the num lines to read in the remainder of the table */
+ char* nl;
+ for (int j = 0; j < numrl; j++) {
+ if (!(nl = af->getline()))
+ return 1;
+ mychomp(nl);
+ tp = nl;
+ i = 0;
+ char* pattern = NULL;
+ char* pattern2 = NULL;
+ piece = mystrsep(&tp, 0);
+ while (piece) {
+ if (*piece != '\0') {
+ switch (i) {
+ case 0: {
+ if (strncmp(piece, keyword, strlen(keyword)) != 0) {
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ delete *rl;
+ *rl = NULL;
+ return 1;
+ }
+ break;
+ }
+ case 1: {
+ pattern = mystrrep(mystrdup(piece), "_", " ");
+ break;
+ }
+ case 2: {
+ pattern2 = mystrrep(mystrdup(piece), "_", " ");
+ break;
+ }
+ default:
+ break;
+ }
+ i++;
+ }
+ piece = mystrsep(&tp, 0);
+ }
+ if (!pattern || !pattern2) {
+ if (pattern)
+ free(pattern);
+ if (pattern2)
+ free(pattern2);
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ return 1;
+ }
+ (*rl)->add(pattern, pattern2);
+ }
+ return 0;
+}
+
+/* parse in the typical fault correcting table */
+int AffixMgr::parse_phonetable(char* line, FileMgr* af) {
+ if (phone) {
+ HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n",
+ af->getlinenum());
+ return 1;
+ }
+ char* tp = line;
+ char* piece;
+ int i = 0;
+ int np = 0;
+ piece = mystrsep(&tp, 0);
+ while (piece) {
+ if (*piece != '\0') {
+ switch (i) {
+ case 0: {
+ np++;
+ break;
+ }
+ case 1: {
+ phone = (phonetable*)malloc(sizeof(struct phonetable));
+ if (!phone)
+ return 1;
+ phone->num = atoi(piece);
+ phone->rules = NULL;
+ phone->utf8 = (char)utf8;
+ if (phone->num < 1) {
+ HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
+ af->getlinenum());
+ return 1;
+ }
+ phone->rules = (char**)malloc(2 * (phone->num + 1) * sizeof(char*));
+ if (!phone->rules) {
+ free(phone);
+ phone = NULL;
+ return 1;
+ }
+ np++;
+ break;
+ }
+ default:
+ break;
+ }
+ i++;
+ }
+ piece = mystrsep(&tp, 0);
+ }
+ if (np != 2) {
+ HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
+ af->getlinenum());
+ return 1;
+ }
+
+ /* now parse the phone->num lines to read in the remainder of the table */
+ char* nl;
+ for (int j = 0; j < phone->num; j++) {
+ if (!(nl = af->getline()))
+ return 1;
+ mychomp(nl);
+ tp = nl;
+ i = 0;
+ phone->rules[j * 2] = NULL;
+ phone->rules[j * 2 + 1] = NULL;
+ piece = mystrsep(&tp, 0);
+ while (piece) {
+ if (*piece != '\0') {
+ switch (i) {
+ case 0: {
+ if (strncmp(piece, "PHONE", 5) != 0) {
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ phone->num = 0;
+ return 1;
+ }
+ break;
+ }
+ case 1: {
+ phone->rules[j * 2] = mystrrep(mystrdup(piece), "_", "");
+ break;
+ }
+ case 2: {
+ phone->rules[j * 2 + 1] = mystrrep(mystrdup(piece), "_", "");
+ break;
+ }
+ default:
+ break;
+ }
+ i++;
+ }
+ piece = mystrsep(&tp, 0);
+ }
+ if ((!(phone->rules[j * 2])) || (!(phone->rules[j * 2 + 1]))) {
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ phone->num = 0;
+ return 1;
+ }
+ }
+ phone->rules[phone->num * 2] = mystrdup("");
+ phone->rules[phone->num * 2 + 1] = mystrdup("");
+ init_phonet_hash(*phone);
+ return 0;
+}
+
+/* parse in the checkcompoundpattern table */
+int AffixMgr::parse_checkcpdtable(char* line, FileMgr* af) {
+ if (numcheckcpd != 0) {
+ HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n",
+ af->getlinenum());
+ return 1;
+ }
+ char* tp = line;
+ char* piece;
+ int i = 0;
+ int np = 0;
+ piece = mystrsep(&tp, 0);
+ while (piece) {
+ if (*piece != '\0') {
+ switch (i) {
+ case 0: {
+ np++;
+ break;
+ }
+ case 1: {
+ numcheckcpd = atoi(piece);
+ if (numcheckcpd < 1) {
+ HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
+ af->getlinenum());
+ return 1;
+ }
+ checkcpdtable =
+ (patentry*)malloc(numcheckcpd * sizeof(struct patentry));
+ if (!checkcpdtable)
+ return 1;
+ np++;
+ break;
+ }
+ default:
+ break;
+ }
+ i++;
+ }
+ piece = mystrsep(&tp, 0);
+ }
+ if (np != 2) {
+ HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
+ af->getlinenum());
+ return 1;
+ }
+
+ /* now parse the numcheckcpd lines to read in the remainder of the table */
+ char* nl;
+ for (int j = 0; j < numcheckcpd; j++) {
+ if (!(nl = af->getline()))
+ return 1;
+ mychomp(nl);
+ tp = nl;
+ i = 0;
+ checkcpdtable[j].pattern = NULL;
+ checkcpdtable[j].pattern2 = NULL;
+ checkcpdtable[j].pattern3 = NULL;
+ checkcpdtable[j].cond = FLAG_NULL;
+ checkcpdtable[j].cond2 = FLAG_NULL;
+ piece = mystrsep(&tp, 0);
+ while (piece) {
+ if (*piece != '\0') {
+ switch (i) {
+ case 0: {
+ if (strncmp(piece, "CHECKCOMPOUNDPATTERN", 20) != 0) {
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ numcheckcpd = 0;
+ return 1;
+ }
+ break;
+ }
+ case 1: {
+ checkcpdtable[j].pattern = mystrdup(piece);
+ char* p = strchr(checkcpdtable[j].pattern, '/');
+ if (p) {
+ *p = '\0';
+ checkcpdtable[j].cond = pHMgr->decode_flag(p + 1);
+ }
+ break;
+ }
+ case 2: {
+ checkcpdtable[j].pattern2 = mystrdup(piece);
+ char* p = strchr(checkcpdtable[j].pattern2, '/');
+ if (p) {
+ *p = '\0';
+ checkcpdtable[j].cond2 = pHMgr->decode_flag(p + 1);
+ }
+ break;
+ }
+ case 3: {
+ checkcpdtable[j].pattern3 = mystrdup(piece);
+ simplifiedcpd = 1;
+ break;
+ }
+ default:
+ break;
+ }
+ i++;
+ }
+ piece = mystrsep(&tp, 0);
+ }
+ if ((!(checkcpdtable[j].pattern)) || (!(checkcpdtable[j].pattern2))) {
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ numcheckcpd = 0;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* parse in the compound rule table */
+int AffixMgr::parse_defcpdtable(char* line, FileMgr* af) {
+ if (numdefcpd != 0) {
+ HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n",
+ af->getlinenum());
+ return 1;
+ }
+ char* tp = line;
+ char* piece;
+ int i = 0;
+ int np = 0;
+ piece = mystrsep(&tp, 0);
+ while (piece) {
+ if (*piece != '\0') {
+ switch (i) {
+ case 0: {
+ np++;
+ break;
+ }
+ case 1: {
+ numdefcpd = atoi(piece);
+ if (numdefcpd < 1) {
+ HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
+ af->getlinenum());
+ return 1;
+ }
+ defcpdtable = (flagentry*)malloc(numdefcpd * sizeof(flagentry));
+ if (!defcpdtable)
+ return 1;
+ np++;
+ break;
+ }
+ default:
+ break;
+ }
+ i++;
+ }
+ piece = mystrsep(&tp, 0);
+ }
+ if (np != 2) {
+ HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
+ af->getlinenum());
+ return 1;
+ }
+
+ /* now parse the numdefcpd lines to read in the remainder of the table */
+ char* nl;
+ for (int j = 0; j < numdefcpd; j++) {
+ if (!(nl = af->getline()))
+ return 1;
+ mychomp(nl);
+ tp = nl;
+ i = 0;
+ defcpdtable[j].def = NULL;
+ defcpdtable[j].len = 0;
+ piece = mystrsep(&tp, 0);
+ while (piece) {
+ if (*piece != '\0') {
+ switch (i) {
+ case 0: {
+ if (strncmp(piece, "COMPOUNDRULE", 12) != 0) {
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ numdefcpd = 0;
+ return 1;
+ }
+ break;
+ }
+ case 1: { // handle parenthesized flags
+ if (strchr(piece, '(')) {
+ defcpdtable[j].def = (FLAG*)malloc(strlen(piece) * sizeof(FLAG));
+ defcpdtable[j].len = 0;
+ int end = 0;
+ FLAG* conv;
+ while (!end) {
+ char* par = piece + 1;
+ while (*par != '(' && *par != ')' && *par != '\0')
+ par++;
+ if (*par == '\0')
+ end = 1;
+ else
+ *par = '\0';
+ if (*piece == '(')
+ piece++;
+ if (*piece == '*' || *piece == '?') {
+ defcpdtable[j].def[defcpdtable[j].len++] = (FLAG)*piece;
+ } else if (*piece != '\0') {
+ int l = pHMgr->decode_flags(&conv, piece, af);
+ for (int k = 0; k < l; k++)
+ defcpdtable[j].def[defcpdtable[j].len++] = conv[k];
+ free(conv);
+ }
+ piece = par + 1;
+ }
+ } else {
+ defcpdtable[j].len =
+ pHMgr->decode_flags(&(defcpdtable[j].def), piece, af);
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ i++;
+ }
+ piece = mystrsep(&tp, 0);
+ }
+ if (!defcpdtable[j].len) {
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ numdefcpd = 0;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* parse in the character map table */
+int AffixMgr::parse_maptable(char* line, FileMgr* af) {
+ if (nummap != 0) {
+ HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n",
+ af->getlinenum());
+ return 1;
+ }
+ char* tp = line;
+ char* piece;
+ int i = 0;
+ int np = 0;
+ piece = mystrsep(&tp, 0);
+ while (piece) {
+ if (*piece != '\0') {
+ switch (i) {
+ case 0: {
+ np++;
+ break;
+ }
+ case 1: {
+ nummap = atoi(piece);
+ if (nummap < 1) {
+ HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
+ af->getlinenum());
+ return 1;
+ }
+ maptable = (mapentry*)malloc(nummap * sizeof(struct mapentry));
+ if (!maptable)
+ return 1;
+ np++;
+ break;
+ }
+ default:
+ break;
+ }
+ i++;
+ }
+ piece = mystrsep(&tp, 0);
+ }
+ if (np != 2) {
+ HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
+ af->getlinenum());
+ return 1;
+ }
+
+ /* now parse the nummap lines to read in the remainder of the table */
+ char* nl;
+ for (int j = 0; j < nummap; j++) {
+ if (!(nl = af->getline()))
+ return 1;
+ mychomp(nl);
+ tp = nl;
+ i = 0;
+ maptable[j].set = NULL;
+ maptable[j].len = 0;
+ piece = mystrsep(&tp, 0);
+ while (piece) {
+ if (*piece != '\0') {
+ switch (i) {
+ case 0: {
+ if (strncmp(piece, "MAP", 3) != 0) {
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ nummap = 0;
+ return 1;
+ }
+ break;
+ }
+ case 1: {
+ int setn = 0;
+ maptable[j].len = strlen(piece);
+ maptable[j].set = (char**)malloc(maptable[j].len * sizeof(char*));
+ if (!maptable[j].set)
+ return 1;
+ for (int k = 0; k < maptable[j].len; k++) {
+ int chl = 1;
+ int chb = k;
+ if (piece[k] == '(') {
+ char* parpos = strchr(piece + k, ')');
+ if (parpos != NULL) {
+ chb = k + 1;
+ chl = (int)(parpos - piece) - k - 1;
+ k = k + chl + 1;
+ }
+ } else {
+ if (utf8 && (piece[k] & 0xc0) == 0xc0) {
+ for (k++; utf8 && (piece[k] & 0xc0) == 0x80; k++)
+ ;
+ chl = k - chb;
+ k--;
+ }
+ }
+ maptable[j].set[setn] = (char*)malloc(chl + 1);
+ if (!maptable[j].set[setn])
+ return 1;
+ strncpy(maptable[j].set[setn], piece + chb, chl);
+ maptable[j].set[setn][chl] = '\0';
+ setn++;
+ }
+ maptable[j].len = setn;
+ break;
+ }
+ default:
+ break;
+ }
+ i++;
+ }
+ piece = mystrsep(&tp, 0);
+ }
+ if (!maptable[j].set || !maptable[j].len) {
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ nummap = 0;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* parse in the word breakpoint table */
+int AffixMgr::parse_breaktable(char* line, FileMgr* af) {
+ if (numbreak > -1) {
+ HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n",
+ af->getlinenum());
+ return 1;
+ }
+ char* tp = line;
+ char* piece;
+ int i = 0;
+ int np = 0;
+ piece = mystrsep(&tp, 0);
+ while (piece) {
+ if (*piece != '\0') {
+ switch (i) {
+ case 0: {
+ np++;
+ break;
+ }
+ case 1: {
+ numbreak = atoi(piece);
+ if (numbreak < 0) {
+ HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
+ af->getlinenum());
+ return 1;
+ }
+ if (numbreak == 0)
+ return 0;
+ breaktable = (char**)malloc(numbreak * sizeof(char*));
+ if (!breaktable)
+ return 1;
+ np++;
+ break;
+ }
+ default:
+ break;
+ }
+ i++;
+ }
+ piece = mystrsep(&tp, 0);
+ }
+ if (np != 2) {
+ HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
+ af->getlinenum());
+ return 1;
+ }
+
+ /* now parse the numbreak lines to read in the remainder of the table */
+ char* nl;
+ for (int j = 0; j < numbreak; j++) {
+ if (!(nl = af->getline()))
+ return 1;
+ mychomp(nl);
+ tp = nl;
+ i = 0;
+ piece = mystrsep(&tp, 0);
+ while (piece) {
+ if (*piece != '\0') {
+ switch (i) {
+ case 0: {
+ if (strncmp(piece, "BREAK", 5) != 0) {
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ numbreak = 0;
+ return 1;
+ }
+ break;
+ }
+ case 1: {
+ breaktable[j] = mystrdup(piece);
+ break;
+ }
+ default:
+ break;
+ }
+ i++;
+ }
+ piece = mystrsep(&tp, 0);
+ }
+ if (!breaktable) {
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ numbreak = 0;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+void AffixMgr::reverse_condition(std::string& piece) {
+ if (piece.empty())
+ return;
+
+ int neg = 0;
+ for (std::string::reverse_iterator k = piece.rbegin(); k != piece.rend(); ++k) {
+ switch (*k) {
+ case '[': {
+ if (neg)
+ *(k - 1) = '[';
+ else
+ *k = ']';
+ break;
+ }
+ case ']': {
+ *k = '[';
+ if (neg)
+ *(k - 1) = '^';
+ neg = 0;
+ break;
+ }
+ case '^': {
+ if (*(k - 1) == ']')
+ neg = 1;
+ else
+ *(k - 1) = *k;
+ break;
+ }
+ default: {
+ if (neg)
+ *(k - 1) = *k;
+ }
+ }
+ }
+}
+
+int AffixMgr::parse_affix(char* line,
+ const char at,
+ FileMgr* af,
+ char* dupflags) {
+ int numents = 0; // number of affentry structures to parse
+
+ unsigned short aflag = 0; // affix char identifier
+
+ char ff = 0;
+ std::vector<affentry> affentries;
+
+ char* tp = line;
+ char* nl = line;
+ char* piece;
+ int i = 0;
+
+// checking lines with bad syntax
+#ifdef DEBUG
+ int basefieldnum = 0;
+#endif
+
+ // split affix header line into pieces
+
+ int np = 0;
+
+ piece = mystrsep(&tp, 0);
+ while (piece) {
+ if (*piece != '\0') {
+ switch (i) {
+ // piece 1 - is type of affix
+ case 0: {
+ np++;
+ break;
+ }
+
+ // piece 2 - is affix char
+ case 1: {
+ np++;
+ aflag = pHMgr->decode_flag(piece);
+ if (((at == 'S') && (dupflags[aflag] & dupSFX)) ||
+ ((at == 'P') && (dupflags[aflag] & dupPFX))) {
+ HUNSPELL_WARNING(
+ stderr,
+ "error: line %d: multiple definitions of an affix flag\n",
+ af->getlinenum());
+ // return 1; XXX permissive mode for bad dictionaries
+ }
+ dupflags[aflag] += (char)((at == 'S') ? dupSFX : dupPFX);
+ break;
+ }
+ // piece 3 - is cross product indicator
+ case 2: {
+ np++;
+ if (*piece == 'Y')
+ ff = aeXPRODUCT;
+ break;
+ }
+
+ // piece 4 - is number of affentries
+ case 3: {
+ np++;
+ numents = atoi(piece);
+ if ((numents <= 0) || ((std::numeric_limits<size_t>::max() /
+ sizeof(struct affentry)) < static_cast<size_t>(numents))) {
+ char* err = pHMgr->encode_flag(aflag);
+ if (err) {
+ HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
+ af->getlinenum());
+ free(err);
+ }
+ return 1;
+ }
+ affentries.resize(numents);
+ affentries[0].opts = ff;
+ if (utf8)
+ affentries[0].opts += aeUTF8;
+ if (pHMgr->is_aliasf())
+ affentries[0].opts += aeALIASF;
+ if (pHMgr->is_aliasm())
+ affentries[0].opts += aeALIASM;
+ affentries[0].aflag = aflag;
+ }
+
+ default:
+ break;
+ }
+ i++;
+ }
+ piece = mystrsep(&tp, 0);
+ }
+ // check to make sure we parsed enough pieces
+ if (np != 4) {
+ char* err = pHMgr->encode_flag(aflag);
+ if (err) {
+ HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
+ af->getlinenum());
+ free(err);
+ }
+ return 1;
+ }
+
+ // now parse numents affentries for this affix
+ std::vector<affentry>::iterator start = affentries.begin();
+ std::vector<affentry>::iterator end = affentries.end();
+ for (std::vector<affentry>::iterator entry = start; entry != end; ++entry) {
+ if ((nl = af->getline()) == NULL)
+ return 1;
+ mychomp(nl);
+ tp = nl;
+ i = 0;
+ np = 0;
+
+ // split line into pieces
+ piece = mystrsep(&tp, 0);
+ while (piece) {
+ if (*piece != '\0') {
+ switch (i) {
+ // piece 1 - is type
+ case 0: {
+ np++;
+ if (entry != start)
+ entry->opts = start->opts &
+ (char)(aeXPRODUCT + aeUTF8 + aeALIASF + aeALIASM);
+ break;
+ }
+
+ // piece 2 - is affix char
+ case 1: {
+ np++;
+ if (pHMgr->decode_flag(piece) != aflag) {
+ char* err = pHMgr->encode_flag(aflag);
+ if (err) {
+ HUNSPELL_WARNING(stderr,
+ "error: line %d: affix %s is corrupt\n",
+ af->getlinenum(), err);
+ free(err);
+ }
+ return 1;
+ }
+
+ if (entry != start)
+ entry->aflag = start->aflag;
+ break;
+ }
+
+ // piece 3 - is string to strip or 0 for null
+ case 2: {
+ np++;
+ entry->strip = piece;
+ if (complexprefixes) {
+ if (utf8)
+ reverseword_utf(entry->strip);
+ else
+ reverseword(entry->strip);
+ }
+ if (entry->strip.compare("0") == 0) {
+ entry->strip.clear();
+ }
+ break;
+ }
+
+ // piece 4 - is affix string or 0 for null
+ case 3: {
+ char* dash;
+ entry->morphcode = NULL;
+ entry->contclass = NULL;
+ entry->contclasslen = 0;
+ np++;
+ dash = strchr(piece, '/');
+ if (dash) {
+ *dash = '\0';
+
+ entry->appnd = piece;
+
+ if (ignorechars) {
+ if (utf8) {
+ remove_ignored_chars_utf(entry->appnd, ignorechars_utf16);
+ } else {
+ remove_ignored_chars(entry->appnd, ignorechars);
+ }
+ }
+
+ if (complexprefixes) {
+ if (utf8)
+ reverseword_utf(entry->appnd);
+ else
+ reverseword(entry->appnd);
+ }
+
+ if (pHMgr->is_aliasf()) {
+ int index = atoi(dash + 1);
+ entry->contclasslen = (unsigned short)pHMgr->get_aliasf(
+ index, &(entry->contclass), af);
+ if (!entry->contclasslen)
+ HUNSPELL_WARNING(stderr,
+ "error: bad affix flag alias: \"%s\"\n",
+ dash + 1);
+ } else {
+ entry->contclasslen = (unsigned short)pHMgr->decode_flags(
+ &(entry->contclass), dash + 1, af);
+ std::sort(entry->contclass, entry->contclass + entry->contclasslen);
+ }
+ *dash = '/';
+
+ havecontclass = 1;
+ for (unsigned short _i = 0; _i < entry->contclasslen; _i++) {
+ contclasses[(entry->contclass)[_i]] = 1;
+ }
+ } else {
+ entry->appnd = piece;
+
+ if (ignorechars) {
+ if (utf8) {
+ remove_ignored_chars_utf(entry->appnd, ignorechars_utf16);
+ } else {
+ remove_ignored_chars(entry->appnd, ignorechars);
+ }
+ }
+
+ if (complexprefixes) {
+ if (utf8)
+ reverseword_utf(entry->appnd);
+ else
+ reverseword(entry->appnd);
+ }
+ }
+
+ if (entry->appnd.compare("0") == 0) {
+ entry->appnd.clear();
+ }
+ break;
+ }
+
+ // piece 5 - is the conditions descriptions
+ case 4: {
+ std::string chunk(piece);
+ np++;
+ if (complexprefixes) {
+ if (utf8)
+ reverseword_utf(chunk);
+ else
+ reverseword(chunk);
+ reverse_condition(chunk);
+ }
+ if (!entry->strip.empty() && chunk != "." &&
+ redundant_condition(at, entry->strip.c_str(), entry->strip.size(), chunk.c_str(),
+ af->getlinenum()))
+ chunk = ".";
+ if (at == 'S') {
+ reverseword(chunk);
+ reverse_condition(chunk);
+ }
+ if (encodeit(*entry, chunk.c_str()))
+ return 1;
+ break;
+ }
+
+ case 5: {
+ std::string chunk(piece);
+ np++;
+ if (pHMgr->is_aliasm()) {
+ int index = atoi(chunk.c_str());
+ entry->morphcode = pHMgr->get_aliasm(index);
+ } else {
+ if (complexprefixes) { // XXX - fix me for morph. gen.
+ if (utf8)
+ reverseword_utf(chunk);
+ else
+ reverseword(chunk);
+ }
+ // add the remaining of the line
+ if (*tp) {
+ *(tp - 1) = ' ';
+ chunk.push_back(' ');
+ chunk.append(tp);
+ }
+ entry->morphcode = mystrdup(chunk.c_str());
+ if (!entry->morphcode)
+ return 1;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ i++;
+ }
+ piece = mystrsep(&tp, 0);
+ }
+ // check to make sure we parsed enough pieces
+ if (np < 4) {
+ char* err = pHMgr->encode_flag(aflag);
+ if (err) {
+ HUNSPELL_WARNING(stderr, "error: line %d: affix %s is corrupt\n",
+ af->getlinenum(), err);
+ free(err);
+ }
+ return 1;
+ }
+
+#ifdef DEBUG
+ // detect unnecessary fields, excepting comments
+ if (basefieldnum) {
+ int fieldnum =
+ !(entry->morphcode) ? 5 : ((*(entry->morphcode) == '#') ? 5 : 6);
+ if (fieldnum != basefieldnum)
+ HUNSPELL_WARNING(stderr, "warning: line %d: bad field number\n",
+ af->getlinenum());
+ } else {
+ basefieldnum =
+ !(entry->morphcode) ? 5 : ((*(entry->morphcode) == '#') ? 5 : 6);
+ }
+#endif
+ }
+
+ // now create SfxEntry or PfxEntry objects and use links to
+ // build an ordered (sorted by affix string) list
+ for (std::vector<affentry>::iterator entry = start; entry != end; ++entry) {
+ if (at == 'P') {
+ PfxEntry* pfxptr = new PfxEntry(this, &(*entry));
+ build_pfxtree(pfxptr);
+ } else {
+ SfxEntry* sfxptr = new SfxEntry(this, &(*entry));
+ build_sfxtree(sfxptr);
+ }
+ }
+ return 0;
+}
+
+int AffixMgr::redundant_condition(char ft,
+ const char* strip,
+ int stripl,
+ const char* cond,
+ int linenum) {
+ int condl = strlen(cond);
+ int i;
+ int j;
+ int neg;
+ int in;
+ if (ft == 'P') { // prefix
+ if (strncmp(strip, cond, condl) == 0)
+ return 1;
+ if (utf8) {
+ } else {
+ for (i = 0, j = 0; (i < stripl) && (j < condl); i++, j++) {
+ if (cond[j] != '[') {
+ if (cond[j] != strip[i]) {
+ HUNSPELL_WARNING(stderr,
+ "warning: line %d: incompatible stripping "
+ "characters and condition\n",
+ linenum);
+ return 0;
+ }
+ } else {
+ neg = (cond[j + 1] == '^') ? 1 : 0;
+ in = 0;
+ do {
+ j++;
+ if (strip[i] == cond[j])
+ in = 1;
+ } while ((j < (condl - 1)) && (cond[j] != ']'));
+ if (j == (condl - 1) && (cond[j] != ']')) {
+ HUNSPELL_WARNING(stderr,
+ "error: line %d: missing ] in condition:\n%s\n",
+ linenum, cond);
+ return 0;
+ }
+ if ((!neg && !in) || (neg && in)) {
+ HUNSPELL_WARNING(stderr,
+ "warning: line %d: incompatible stripping "
+ "characters and condition\n",
+ linenum);
+ return 0;
+ }
+ }
+ }
+ if (j >= condl)
+ return 1;
+ }
+ } else { // suffix
+ if ((stripl >= condl) && strcmp(strip + stripl - condl, cond) == 0)
+ return 1;
+ if (utf8) {
+ } else {
+ for (i = stripl - 1, j = condl - 1; (i >= 0) && (j >= 0); i--, j--) {
+ if (cond[j] != ']') {
+ if (cond[j] != strip[i]) {
+ HUNSPELL_WARNING(stderr,
+ "warning: line %d: incompatible stripping "
+ "characters and condition\n",
+ linenum);
+ return 0;
+ }
+ } else {
+ in = 0;
+ do {
+ j--;
+ if (strip[i] == cond[j])
+ in = 1;
+ } while ((j > 0) && (cond[j] != '['));
+ if ((j == 0) && (cond[j] != '[')) {
+ HUNSPELL_WARNING(stderr,
+ "error: line: %d: missing ] in condition:\n%s\n",
+ linenum, cond);
+ return 0;
+ }
+ neg = (cond[j + 1] == '^') ? 1 : 0;
+ if ((!neg && !in) || (neg && in)) {
+ HUNSPELL_WARNING(stderr,
+ "warning: line %d: incompatible stripping "
+ "characters and condition\n",
+ linenum);
+ return 0;
+ }
+ }
+ }
+ if (j < 0)
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int AffixMgr::get_suffix_words(short unsigned* suff,
+ int len,
+ const char* root_word,
+ char** slst) {
+ int suff_words_cnt = 0;
+ short unsigned* start_ptr = suff;
+ for (int j = 0; j < SETSIZE; j++) {
+ SfxEntry* ptr = sStart[j];
+ while (ptr) {
+ suff = start_ptr;
+ for (int i = 0; i < len; i++) {
+ if ((*suff) == ptr->getFlag()) {
+ std::string nw(root_word);
+ nw.append(ptr->getAffix());
+ hentry* ht = ptr->checkword(nw.c_str(), nw.size(), 0, NULL, NULL, 0,
+ NULL, 0, 0, 0);
+ if (ht) {
+ slst[suff_words_cnt++] = mystrdup(nw.c_str());
+ }
+ }
+ suff++;
+ }
+ ptr = ptr->getNext();
+ }
+ }
+ return suff_words_cnt;
+}
diff --git a/extensions/spellcheck/hunspell/src/affixmgr.hxx b/extensions/spellcheck/hunspell/src/affixmgr.hxx
new file mode 100644
index 000000000..d70e85338
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/affixmgr.hxx
@@ -0,0 +1,390 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Hunspell, based on MySpell.
+ *
+ * The Initial Developers of the Original Code are
+ * Kevin Hendricks (MySpell) and Németh László (Hunspell).
+ * Portions created by the Initial Developers are Copyright (C) 2002-2005
+ * the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _AFFIXMGR_HXX_
+#define _AFFIXMGR_HXX_
+
+#include "hunvisapi.h"
+
+#include <stdio.h>
+
+#include <string>
+
+#include "atypes.hxx"
+#include "baseaffix.hxx"
+#include "hashmgr.hxx"
+#include "phonet.hxx"
+#include "replist.hxx"
+
+// check flag duplication
+#define dupSFX (1 << 0)
+#define dupPFX (1 << 1)
+
+class PfxEntry;
+class SfxEntry;
+
+class LIBHUNSPELL_DLL_EXPORTED AffixMgr {
+ PfxEntry* pStart[SETSIZE];
+ SfxEntry* sStart[SETSIZE];
+ PfxEntry* pFlag[SETSIZE];
+ SfxEntry* sFlag[SETSIZE];
+ HashMgr* pHMgr;
+ HashMgr** alldic;
+ int* maxdic;
+ char* keystring;
+ char* trystring;
+ char* encoding;
+ struct cs_info* csconv;
+ int utf8;
+ int complexprefixes;
+ FLAG compoundflag;
+ FLAG compoundbegin;
+ FLAG compoundmiddle;
+ FLAG compoundend;
+ FLAG compoundroot;
+ FLAG compoundforbidflag;
+ FLAG compoundpermitflag;
+ int compoundmoresuffixes;
+ int checkcompounddup;
+ int checkcompoundrep;
+ int checkcompoundcase;
+ int checkcompoundtriple;
+ int simplifiedtriple;
+ FLAG forbiddenword;
+ FLAG nosuggest;
+ FLAG nongramsuggest;
+ FLAG needaffix;
+ int cpdmin;
+ int numrep;
+ replentry* reptable;
+ RepList* iconvtable;
+ RepList* oconvtable;
+ int nummap;
+ mapentry* maptable;
+ int numbreak;
+ char** breaktable;
+ int numcheckcpd;
+ patentry* checkcpdtable;
+ int simplifiedcpd;
+ int numdefcpd;
+ flagentry* defcpdtable;
+ phonetable* phone;
+ int maxngramsugs;
+ int maxcpdsugs;
+ int maxdiff;
+ int onlymaxdiff;
+ int nosplitsugs;
+ int sugswithdots;
+ int cpdwordmax;
+ int cpdmaxsyllable;
+ char* cpdvowels;
+ w_char* cpdvowels_utf16;
+ int cpdvowels_utf16_len;
+ char* cpdsyllablenum;
+ const char* pfxappnd; // BUG: not stateless
+ const char* sfxappnd; // BUG: not stateless
+ int sfxextra; // BUG: not stateless
+ FLAG sfxflag; // BUG: not stateless
+ char* derived; // BUG: not stateless
+ SfxEntry* sfx; // BUG: not stateless
+ PfxEntry* pfx; // BUG: not stateless
+ int checknum;
+ char* wordchars;
+ std::vector<w_char> wordchars_utf16;
+ char* ignorechars;
+ std::vector<w_char> ignorechars_utf16;
+ char* version;
+ char* lang;
+ int langnum;
+ FLAG lemma_present;
+ FLAG circumfix;
+ FLAG onlyincompound;
+ FLAG keepcase;
+ FLAG forceucase;
+ FLAG warn;
+ int forbidwarn;
+ FLAG substandard;
+ int checksharps;
+ int fullstrip;
+
+ int havecontclass; // boolean variable
+ char contclasses[CONTSIZE]; // flags of possible continuing classes (twofold
+ // affix)
+
+ public:
+ AffixMgr(const char* affpath, HashMgr** ptr, int* md, const char* key = NULL);
+ ~AffixMgr();
+ struct hentry* affix_check(const char* word,
+ int len,
+ const unsigned short needflag = (unsigned short)0,
+ char in_compound = IN_CPD_NOT);
+ struct hentry* prefix_check(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag = FLAG_NULL);
+ inline int isSubset(const char* s1, const char* s2);
+ struct hentry* prefix_check_twosfx(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag = FLAG_NULL);
+ inline int isRevSubset(const char* s1, const char* end_of_s2, int len);
+ struct hentry* suffix_check(const char* word,
+ int len,
+ int sfxopts,
+ PfxEntry* ppfx,
+ char** wlst,
+ int maxSug,
+ int* ns,
+ const FLAG cclass = FLAG_NULL,
+ const FLAG needflag = FLAG_NULL,
+ char in_compound = IN_CPD_NOT);
+ struct hentry* suffix_check_twosfx(const char* word,
+ int len,
+ int sfxopts,
+ PfxEntry* ppfx,
+ const FLAG needflag = FLAG_NULL);
+
+ char* affix_check_morph(const char* word,
+ int len,
+ const FLAG needflag = FLAG_NULL,
+ char in_compound = IN_CPD_NOT);
+ char* prefix_check_morph(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag = FLAG_NULL);
+ char* suffix_check_morph(const char* word,
+ int len,
+ int sfxopts,
+ PfxEntry* ppfx,
+ const FLAG cclass = FLAG_NULL,
+ const FLAG needflag = FLAG_NULL,
+ char in_compound = IN_CPD_NOT);
+
+ char* prefix_check_twosfx_morph(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag = FLAG_NULL);
+ char* suffix_check_twosfx_morph(const char* word,
+ int len,
+ int sfxopts,
+ PfxEntry* ppfx,
+ const FLAG needflag = FLAG_NULL);
+
+ char* morphgen(const char* ts,
+ int wl,
+ const unsigned short* ap,
+ unsigned short al,
+ const char* morph,
+ const char* targetmorph,
+ int level);
+
+ int expand_rootword(struct guessword* wlst,
+ int maxn,
+ const char* ts,
+ int wl,
+ const unsigned short* ap,
+ unsigned short al,
+ const char* bad,
+ int,
+ const char*);
+
+ short get_syllable(const std::string& word);
+ int cpdrep_check(const char* word, int len);
+ int cpdpat_check(const char* word,
+ int len,
+ hentry* r1,
+ hentry* r2,
+ const char affixed);
+ int defcpd_check(hentry*** words,
+ short wnum,
+ hentry* rv,
+ hentry** rwords,
+ char all);
+ int cpdcase_check(const char* word, int len);
+ inline int candidate_check(const char* word, int len);
+ void setcminmax(int* cmin, int* cmax, const char* word, int len);
+ struct hentry* compound_check(const char* word,
+ int len,
+ short wordnum,
+ short numsyllable,
+ short maxwordnum,
+ short wnum,
+ hentry** words,
+ hentry** rwords,
+ char hu_mov_rule,
+ char is_sug,
+ int* info);
+
+ int compound_check_morph(const char* word,
+ int len,
+ short wordnum,
+ short numsyllable,
+ short maxwordnum,
+ short wnum,
+ hentry** words,
+ hentry** rwords,
+ char hu_mov_rule,
+ char** result,
+ char* partresult);
+
+ int get_suffix_words(short unsigned* suff,
+ int len,
+ const char* root_word,
+ char** slst);
+
+ struct hentry* lookup(const char* word);
+ int get_numrep() const;
+ struct replentry* get_reptable() const;
+ RepList* get_iconvtable() const;
+ RepList* get_oconvtable() const;
+ struct phonetable* get_phonetable() const;
+ int get_nummap() const;
+ struct mapentry* get_maptable() const;
+ int get_numbreak() const;
+ char** get_breaktable() const;
+ char* get_encoding();
+ int get_langnum() const;
+ char* get_key_string();
+ char* get_try_string() const;
+ const char* get_wordchars() const;
+ const std::vector<w_char>& get_wordchars_utf16() const;
+ char* get_ignore() const;
+ const std::vector<w_char>& get_ignore_utf16() const;
+ int get_compound() const;
+ FLAG get_compoundflag() const;
+ FLAG get_compoundbegin() const;
+ FLAG get_forbiddenword() const;
+ FLAG get_nosuggest() const;
+ FLAG get_nongramsuggest() const;
+ FLAG get_needaffix() const;
+ FLAG get_onlyincompound() const;
+ FLAG get_compoundroot() const;
+ FLAG get_lemma_present() const;
+ int get_checknum() const;
+ const char* get_prefix() const;
+ const char* get_suffix() const;
+ const char* get_derived() const;
+ const char* get_version() const;
+ int have_contclass() const;
+ int get_utf8() const;
+ int get_complexprefixes() const;
+ char* get_suffixed(char) const;
+ int get_maxngramsugs() const;
+ int get_maxcpdsugs() const;
+ int get_maxdiff() const;
+ int get_onlymaxdiff() const;
+ int get_nosplitsugs() const;
+ int get_sugswithdots(void) const;
+ FLAG get_keepcase(void) const;
+ FLAG get_forceucase(void) const;
+ FLAG get_warn(void) const;
+ int get_forbidwarn(void) const;
+ int get_checksharps(void) const;
+ char* encode_flag(unsigned short aflag) const;
+ int get_fullstrip() const;
+
+ private:
+ int parse_file(const char* affpath, const char* key);
+ int parse_flag(char* line, unsigned short* out, FileMgr* af);
+ int parse_num(char* line, int* out, FileMgr* af);
+ int parse_cpdsyllable(char* line, FileMgr* af);
+ int parse_reptable(char* line, FileMgr* af);
+ int parse_convtable(char* line,
+ FileMgr* af,
+ RepList** rl,
+ const char* keyword);
+ int parse_phonetable(char* line, FileMgr* af);
+ int parse_maptable(char* line, FileMgr* af);
+ int parse_breaktable(char* line, FileMgr* af);
+ int parse_checkcpdtable(char* line, FileMgr* af);
+ int parse_defcpdtable(char* line, FileMgr* af);
+ int parse_affix(char* line, const char at, FileMgr* af, char* dupflags);
+
+ void reverse_condition(std::string&);
+ void debugflag(char* result, unsigned short flag);
+ std::string& debugflag(std::string& result, unsigned short flag);
+ int condlen(const char*);
+ int encodeit(affentry& entry, const char* cs);
+ int build_pfxtree(PfxEntry* pfxptr);
+ int build_sfxtree(SfxEntry* sfxptr);
+ int process_pfx_order();
+ int process_sfx_order();
+ PfxEntry* process_pfx_in_order(PfxEntry* ptr, PfxEntry* nptr);
+ SfxEntry* process_sfx_in_order(SfxEntry* ptr, SfxEntry* nptr);
+ int process_pfx_tree_to_list();
+ int process_sfx_tree_to_list();
+ int redundant_condition(char, const char* strip, int stripl, const char* cond, int);
+ void finishFileMgr(FileMgr* afflst);
+};
+
+#endif
diff --git a/extensions/spellcheck/hunspell/src/atypes.hxx b/extensions/spellcheck/hunspell/src/atypes.hxx
new file mode 100644
index 000000000..60826af20
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/atypes.hxx
@@ -0,0 +1,145 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Hunspell, based on MySpell.
+ *
+ * The Initial Developers of the Original Code are
+ * Kevin Hendricks (MySpell) and Németh László (Hunspell).
+ * Portions created by the Initial Developers are Copyright (C) 2002-2005
+ * the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#ifndef _ATYPES_HXX_
+#define _ATYPES_HXX_
+
+#ifndef HUNSPELL_WARNING
+#include <stdio.h>
+#ifdef HUNSPELL_WARNING_ON
+#define HUNSPELL_WARNING fprintf
+#else
+// empty inline function to switch off warnings (instead of the C99 standard
+// variadic macros)
+static inline void HUNSPELL_WARNING(FILE*, const char*, ...) {}
+#endif
+#endif
+
+// HUNSTEM def.
+#define HUNSTEM
+
+#include "hashmgr.hxx"
+#include "w_char.hxx"
+#include <algorithm>
+#include <string>
+
+#define SETSIZE 256
+#define CONTSIZE 65536
+
+// affentry options
+#define aeXPRODUCT (1 << 0)
+#define aeUTF8 (1 << 1)
+#define aeALIASF (1 << 2)
+#define aeALIASM (1 << 3)
+#define aeLONGCOND (1 << 4)
+
+// compound options
+#define IN_CPD_NOT 0
+#define IN_CPD_BEGIN 1
+#define IN_CPD_END 2
+#define IN_CPD_OTHER 3
+
+// info options
+#define SPELL_COMPOUND (1 << 0)
+#define SPELL_FORBIDDEN (1 << 1)
+#define SPELL_ALLCAP (1 << 2)
+#define SPELL_NOCAP (1 << 3)
+#define SPELL_INITCAP (1 << 4)
+#define SPELL_ORIGCAP (1 << 5)
+#define SPELL_WARN (1 << 6)
+
+#define MAXLNLEN 8192
+
+#define MINCPDLEN 3
+#define MAXCOMPOUND 10
+#define MAXCONDLEN 20
+#define MAXCONDLEN_1 (MAXCONDLEN - sizeof(char*))
+
+#define MAXACC 1000
+
+#define FLAG unsigned short
+#define FLAG_NULL 0x00
+#define FREE_FLAG(a) a = 0
+
+#define TESTAFF(a, b, c) (std::binary_search(a, a + c, b))
+
+struct affentry {
+ std::string strip;
+ std::string appnd;
+ char numconds;
+ char opts;
+ unsigned short aflag;
+ unsigned short* contclass;
+ short contclasslen;
+ union {
+ char conds[MAXCONDLEN];
+ struct {
+ char conds1[MAXCONDLEN_1];
+ char* conds2;
+ } l;
+ } c;
+ char* morphcode;
+};
+
+struct guessword {
+ char* word;
+ bool allow;
+ char* orig;
+};
+
+struct mapentry {
+ char** set;
+ int len;
+};
+
+struct flagentry {
+ FLAG* def;
+ int len;
+};
+
+struct patentry {
+ char* pattern;
+ char* pattern2;
+ char* pattern3;
+ FLAG cond;
+ FLAG cond2;
+};
+
+#endif
diff --git a/extensions/spellcheck/hunspell/src/baseaffix.hxx b/extensions/spellcheck/hunspell/src/baseaffix.hxx
new file mode 100644
index 000000000..59256e92f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/baseaffix.hxx
@@ -0,0 +1,77 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Hunspell, based on MySpell.
+ *
+ * The Initial Developers of the Original Code are
+ * Kevin Hendricks (MySpell) and Németh László (Hunspell).
+ * Portions created by the Initial Developers are Copyright (C) 2002-2005
+ * the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#ifndef _BASEAFF_HXX_
+#define _BASEAFF_HXX_
+
+#include "hunvisapi.h"
+#include <string>
+
+class LIBHUNSPELL_DLL_EXPORTED AffEntry {
+ private:
+ AffEntry(const AffEntry&);
+ AffEntry& operator=(const AffEntry&);
+
+ protected:
+ AffEntry()
+ : numconds(0),
+ opts(0),
+ aflag(0),
+ morphcode(0),
+ contclass(NULL),
+ contclasslen(0) {}
+ std::string appnd;
+ std::string strip;
+ unsigned char numconds;
+ char opts;
+ unsigned short aflag;
+ union {
+ char conds[MAXCONDLEN];
+ struct {
+ char conds1[MAXCONDLEN_1];
+ char* conds2;
+ } l;
+ } c;
+ char* morphcode;
+ unsigned short* contclass;
+ short contclasslen;
+};
+
+#endif
diff --git a/extensions/spellcheck/hunspell/src/csutil.cxx b/extensions/spellcheck/hunspell/src/csutil.cxx
new file mode 100644
index 000000000..1948e4a3b
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/csutil.cxx
@@ -0,0 +1,2850 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Hunspell, based on MySpell.
+ *
+ * The Initial Developers of the Original Code are
+ * Kevin Hendricks (MySpell) and Németh László (Hunspell).
+ * Portions created by the Initial Developers are Copyright (C) 2002-2005
+ * the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <algorithm>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+
+#include "csutil.hxx"
+#include "atypes.hxx"
+#include "langnum.hxx"
+
+// Unicode character encoding information
+struct unicode_info {
+ unsigned short c;
+ unsigned short cupper;
+ unsigned short clower;
+};
+
+#ifdef _WIN32
+#include <windows.h>
+#include <wchar.h>
+#endif
+
+#ifdef OPENOFFICEORG
+#include <unicode/uchar.h>
+#else
+#ifndef MOZILLA_CLIENT
+#include "utf_info.cxx"
+#define UTF_LST_LEN (sizeof(utf_lst) / (sizeof(unicode_info)))
+#endif
+#endif
+
+#ifdef MOZILLA_CLIENT
+#include "nsCOMPtr.h"
+#include "nsIUnicodeEncoder.h"
+#include "nsIUnicodeDecoder.h"
+#include "nsUnicharUtils.h"
+#include "mozilla/dom/EncodingUtils.h"
+
+using mozilla::dom::EncodingUtils;
+#endif
+
+struct unicode_info2 {
+ char cletter;
+ unsigned short cupper;
+ unsigned short clower;
+};
+
+static struct unicode_info2* utf_tbl = NULL;
+static int utf_tbl_count =
+ 0; // utf_tbl can be used by multiple Hunspell instances
+
+FILE* myfopen(const char* path, const char* mode) {
+#ifdef _WIN32
+#define WIN32_LONG_PATH_PREFIX "\\\\?\\"
+ if (strncmp(path, WIN32_LONG_PATH_PREFIX, 4) == 0) {
+ int len = MultiByteToWideChar(CP_UTF8, 0, path, -1, NULL, 0);
+ wchar_t* buff = (wchar_t*)malloc(len * sizeof(wchar_t));
+ wchar_t* buff2 = (wchar_t*)malloc(len * sizeof(wchar_t));
+ FILE* f = NULL;
+ if (buff && buff2) {
+ MultiByteToWideChar(CP_UTF8, 0, path, -1, buff, len);
+ if (_wfullpath(buff2, buff, len) != NULL) {
+ f = _wfopen(buff2, (strcmp(mode, "r") == 0) ? L"r" : L"rb");
+ }
+ free(buff);
+ free(buff2);
+ }
+ return f;
+ }
+#endif
+ return fopen(path, mode);
+}
+
+std::string& u16_u8(std::string& dest, const std::vector<w_char>& src) {
+ dest.clear();
+ std::vector<w_char>::const_iterator u2 = src.begin();
+ std::vector<w_char>::const_iterator u2_max = src.end();
+ while (u2 < u2_max) {
+ signed char u8;
+ if (u2->h) { // > 0xFF
+ // XXX 4-byte haven't implemented yet.
+ if (u2->h >= 0x08) { // >= 0x800 (3-byte UTF-8 character)
+ u8 = 0xe0 + (u2->h >> 4);
+ dest.push_back(u8);
+ u8 = 0x80 + ((u2->h & 0xf) << 2) + (u2->l >> 6);
+ dest.push_back(u8);
+ u8 = 0x80 + (u2->l & 0x3f);
+ dest.push_back(u8);
+ } else { // < 0x800 (2-byte UTF-8 character)
+ u8 = 0xc0 + (u2->h << 2) + (u2->l >> 6);
+ dest.push_back(u8);
+ u8 = 0x80 + (u2->l & 0x3f);
+ dest.push_back(u8);
+ }
+ } else { // <= 0xFF
+ if (u2->l & 0x80) { // >0x80 (2-byte UTF-8 character)
+ u8 = 0xc0 + (u2->l >> 6);
+ dest.push_back(u8);
+ u8 = 0x80 + (u2->l & 0x3f);
+ dest.push_back(u8);
+ } else { // < 0x80 (1-byte UTF-8 character)
+ u8 = u2->l;
+ dest.push_back(u8);
+ }
+ }
+ ++u2;
+ }
+ return dest;
+}
+
+int u8_u16(std::vector<w_char>& dest, const std::string& src) {
+ dest.clear();
+ std::string::const_iterator u8 = src.begin();
+ std::string::const_iterator u8_max = src.end();
+
+ while (u8 < u8_max) {
+ w_char u2;
+ switch ((*u8) & 0xf0) {
+ case 0x00:
+ case 0x10:
+ case 0x20:
+ case 0x30:
+ case 0x40:
+ case 0x50:
+ case 0x60:
+ case 0x70: {
+ u2.h = 0;
+ u2.l = *u8;
+ break;
+ }
+ case 0x80:
+ case 0x90:
+ case 0xa0:
+ case 0xb0: {
+ HUNSPELL_WARNING(stderr,
+ "UTF-8 encoding error. Unexpected continuation bytes "
+ "in %ld. character position\n%s\n",
+ static_cast<long>(std::distance(src.begin(), u8)),
+ src.c_str());
+ u2.h = 0xff;
+ u2.l = 0xfd;
+ break;
+ }
+ case 0xc0:
+ case 0xd0: { // 2-byte UTF-8 codes
+ if ((*(u8 + 1) & 0xc0) == 0x80) {
+ u2.h = (*u8 & 0x1f) >> 2;
+ u2.l = (*u8 << 6) + (*(u8 + 1) & 0x3f);
+ ++u8;
+ } else {
+ HUNSPELL_WARNING(stderr,
+ "UTF-8 encoding error. Missing continuation byte in "
+ "%ld. character position:\n%s\n",
+ static_cast<long>(std::distance(src.begin(), u8)),
+ src.c_str());
+ u2.h = 0xff;
+ u2.l = 0xfd;
+ }
+ break;
+ }
+ case 0xe0: { // 3-byte UTF-8 codes
+ if ((*(u8 + 1) & 0xc0) == 0x80) {
+ u2.h = ((*u8 & 0x0f) << 4) + ((*(u8 + 1) & 0x3f) >> 2);
+ ++u8;
+ if ((*(u8 + 1) & 0xc0) == 0x80) {
+ u2.l = (static_cast<unsigned char>(*u8) << 6) + (*(u8 + 1) & 0x3f);
+ ++u8;
+ } else {
+ HUNSPELL_WARNING(stderr,
+ "UTF-8 encoding error. Missing continuation byte "
+ "in %ld. character position:\n%s\n",
+ static_cast<long>(std::distance(src.begin(), u8)),
+ src.c_str());
+ u2.h = 0xff;
+ u2.l = 0xfd;
+ }
+ } else {
+ HUNSPELL_WARNING(stderr,
+ "UTF-8 encoding error. Missing continuation byte in "
+ "%ld. character position:\n%s\n",
+ static_cast<long>(std::distance(src.begin(), u8)),
+ src.c_str());
+ u2.h = 0xff;
+ u2.l = 0xfd;
+ }
+ break;
+ }
+ case 0xf0: { // 4 or more byte UTF-8 codes
+ HUNSPELL_WARNING(stderr,
+ "This UTF-8 encoding can't convert to UTF-16:\n%s\n",
+ src.c_str());
+ u2.h = 0xff;
+ u2.l = 0xfd;
+ dest.push_back(u2);
+ return -1;
+ }
+ }
+ dest.push_back(u2);
+ ++u8;
+ }
+
+ return dest.size();
+}
+
+// strip strings into token based on single char delimiter
+// acts like strsep() but only uses a delim char and not
+// a delim string
+// default delimiter: white space characters
+
+char* mystrsep(char** stringp, const char delim) {
+ char* mp = *stringp;
+ if (*mp != '\0') {
+ char* dp;
+ if (delim) {
+ dp = strchr(mp, delim);
+ } else {
+ // don't use isspace() here, the string can be in some random charset
+ // that's way different than the locale's
+ for (dp = mp; (*dp && *dp != ' ' && *dp != '\t'); dp++)
+ ;
+ if (!*dp)
+ dp = NULL;
+ }
+ if (dp) {
+ *stringp = dp + 1;
+ *dp = '\0';
+ } else {
+ *stringp = mp + strlen(mp);
+ }
+ return mp;
+ }
+ return NULL;
+}
+
+// replaces strdup with ansi version
+char* mystrdup(const char* s) {
+ char* d = NULL;
+ if (s) {
+ size_t sl = strlen(s) + 1;
+ d = (char*)malloc(sl);
+ if (d) {
+ memcpy(d, s, sl);
+ } else {
+ HUNSPELL_WARNING(stderr, "Can't allocate memory.\n");
+ }
+ }
+ return d;
+}
+
+// strcat for limited length destination string
+char* mystrcat(char* dest, const char* st, int max) {
+ int len;
+ int len2;
+ if (dest == NULL || st == NULL)
+ return dest;
+ len = strlen(dest);
+ len2 = strlen(st);
+ if (len + len2 + 1 > max)
+ return dest;
+ strcpy(dest + len, st);
+ return dest;
+}
+
+// remove cross-platform text line end characters
+void mychomp(char* s) {
+ size_t k = strlen(s);
+ if ((k > 0) && ((*(s + k - 1) == '\r') || (*(s + k - 1) == '\n')))
+ *(s + k - 1) = '\0';
+ if ((k > 1) && (*(s + k - 2) == '\r'))
+ *(s + k - 2) = '\0';
+}
+
+// break text to lines
+// return number of lines
+int line_tok(const char* text, char*** lines, char breakchar) {
+ int linenum = 0;
+ if (!text) {
+ return linenum;
+ }
+ char* dup = mystrdup(text);
+ char* p = strchr(dup, breakchar);
+ while (p) {
+ linenum++;
+ *p = '\0';
+ p++;
+ p = strchr(p, breakchar);
+ }
+ linenum++;
+ *lines = (char**)malloc(linenum * sizeof(char*));
+ if (!(*lines)) {
+ free(dup);
+ return 0;
+ }
+
+ p = dup;
+ int l = 0;
+ for (int i = 0; i < linenum; i++) {
+ if (*p != '\0') {
+ (*lines)[l] = mystrdup(p);
+ if (!(*lines)[l]) {
+ for (i = 0; i < l; i++)
+ free((*lines)[i]);
+ free(dup);
+ return 0;
+ }
+ l++;
+ }
+ p += strlen(p) + 1;
+ }
+ free(dup);
+ if (!l) {
+ free(*lines);
+ *lines = NULL;
+ }
+ return l;
+}
+
+// uniq line in place
+char* line_uniq(char* text, char breakchar) {
+ char** lines;
+ int linenum = line_tok(text, &lines, breakchar);
+ int i;
+ strcpy(text, lines[0]);
+ for (i = 1; i < linenum; i++) {
+ int dup = 0;
+ for (int j = 0; j < i; j++) {
+ if (strcmp(lines[i], lines[j]) == 0) {
+ dup = 1;
+ break;
+ }
+ }
+ if (!dup) {
+ if ((i > 1) || (*(lines[0]) != '\0')) {
+ sprintf(text + strlen(text), "%c", breakchar);
+ }
+ strcat(text, lines[i]);
+ }
+ }
+ for (i = 0; i < linenum; i++) {
+ free(lines[i]);
+ }
+ free(lines);
+ return text;
+}
+
+// uniq and boundary for compound analysis: "1\n\2\n\1" -> " ( \1 | \2 ) "
+char* line_uniq_app(char** text, char breakchar) {
+ if (!strchr(*text, breakchar)) {
+ return *text;
+ }
+
+ char** lines;
+ int i;
+ int linenum = line_tok(*text, &lines, breakchar);
+ int dup = 0;
+ for (i = 0; i < linenum; i++) {
+ for (int j = 0; j < (i - 1); j++) {
+ if (strcmp(lines[i], lines[j]) == 0) {
+ *(lines[i]) = '\0';
+ dup++;
+ break;
+ }
+ }
+ }
+ if ((linenum - dup) == 1) {
+ strcpy(*text, lines[0]);
+ freelist(&lines, linenum);
+ return *text;
+ }
+ char* newtext = (char*)malloc(strlen(*text) + 2 * linenum + 3 + 1);
+ if (newtext) {
+ free(*text);
+ *text = newtext;
+ } else {
+ freelist(&lines, linenum);
+ return *text;
+ }
+ strcpy(*text, " ( ");
+ for (i = 0; i < linenum; i++)
+ if (*(lines[i])) {
+ sprintf(*text + strlen(*text), "%s%s", lines[i], " | ");
+ }
+ (*text)[strlen(*text) - 2] = ')'; // " ) "
+ freelist(&lines, linenum);
+ return *text;
+}
+
+// append s to ends of every lines in text
+std::string& strlinecat(std::string& str, const std::string& apd) {
+ size_t pos = 0;
+ while ((pos = str.find('\n', pos)) != std::string::npos) {
+ str.insert(pos, apd);
+ pos += apd.length() + 1;
+ }
+ str.append(apd);
+ return str;
+}
+
+// morphcmp(): compare MORPH_DERI_SFX, MORPH_INFL_SFX and MORPH_TERM_SFX fields
+// in the first line of the inputs
+// return 0, if inputs equal
+// return 1, if inputs may equal with a secondary suffix
+// otherwise return -1
+int morphcmp(const char* s, const char* t) {
+ int se = 0;
+ int te = 0;
+ const char* sl;
+ const char* tl;
+ const char* olds;
+ const char* oldt;
+ if (!s || !t)
+ return 1;
+ olds = s;
+ sl = strchr(s, '\n');
+ s = strstr(s, MORPH_DERI_SFX);
+ if (!s || (sl && sl < s))
+ s = strstr(olds, MORPH_INFL_SFX);
+ if (!s || (sl && sl < s)) {
+ s = strstr(olds, MORPH_TERM_SFX);
+ olds = NULL;
+ }
+ oldt = t;
+ tl = strchr(t, '\n');
+ t = strstr(t, MORPH_DERI_SFX);
+ if (!t || (tl && tl < t))
+ t = strstr(oldt, MORPH_INFL_SFX);
+ if (!t || (tl && tl < t)) {
+ t = strstr(oldt, MORPH_TERM_SFX);
+ oldt = NULL;
+ }
+ while (s && t && (!sl || sl > s) && (!tl || tl > t)) {
+ s += MORPH_TAG_LEN;
+ t += MORPH_TAG_LEN;
+ se = 0;
+ te = 0;
+ while ((*s == *t) && !se && !te) {
+ s++;
+ t++;
+ switch (*s) {
+ case ' ':
+ case '\n':
+ case '\t':
+ case '\0':
+ se = 1;
+ }
+ switch (*t) {
+ case ' ':
+ case '\n':
+ case '\t':
+ case '\0':
+ te = 1;
+ }
+ }
+ if (!se || !te) {
+ // not terminal suffix difference
+ if (olds)
+ return -1;
+ return 1;
+ }
+ olds = s;
+ s = strstr(s, MORPH_DERI_SFX);
+ if (!s || (sl && sl < s))
+ s = strstr(olds, MORPH_INFL_SFX);
+ if (!s || (sl && sl < s)) {
+ s = strstr(olds, MORPH_TERM_SFX);
+ olds = NULL;
+ }
+ oldt = t;
+ t = strstr(t, MORPH_DERI_SFX);
+ if (!t || (tl && tl < t))
+ t = strstr(oldt, MORPH_INFL_SFX);
+ if (!t || (tl && tl < t)) {
+ t = strstr(oldt, MORPH_TERM_SFX);
+ oldt = NULL;
+ }
+ }
+ if (!s && !t && se && te)
+ return 0;
+ return 1;
+}
+
+int get_sfxcount(const char* morph) {
+ if (!morph || !*morph)
+ return 0;
+ int n = 0;
+ const char* old = morph;
+ morph = strstr(morph, MORPH_DERI_SFX);
+ if (!morph)
+ morph = strstr(old, MORPH_INFL_SFX);
+ if (!morph)
+ morph = strstr(old, MORPH_TERM_SFX);
+ while (morph) {
+ n++;
+ old = morph;
+ morph = strstr(morph + 1, MORPH_DERI_SFX);
+ if (!morph)
+ morph = strstr(old + 1, MORPH_INFL_SFX);
+ if (!morph)
+ morph = strstr(old + 1, MORPH_TERM_SFX);
+ }
+ return n;
+}
+
+int fieldlen(const char* r) {
+ int n = 0;
+ while (r && *r != ' ' && *r != '\t' && *r != '\0' && *r != '\n') {
+ r++;
+ n++;
+ }
+ return n;
+}
+
+bool copy_field(std::string& dest,
+ const std::string& morph,
+ const std::string& var) {
+ if (morph.empty())
+ return false;
+ size_t pos = morph.find(var);
+ if (pos == std::string::npos)
+ return false;
+ dest.clear();
+ std::string beg(morph.substr(pos + MORPH_TAG_LEN, std::string::npos));
+
+ for (size_t i = 0; i < beg.size(); ++i) {
+ const char c(beg[i]);
+ if (c == ' ' || c == '\t' || c == '\n')
+ break;
+ dest.push_back(c);
+ }
+
+ return true;
+}
+
+std::string& mystrrep(std::string& str,
+ const std::string& search,
+ const std::string& replace) {
+ size_t pos = 0;
+ while ((pos = str.find(search, pos)) != std::string::npos) {
+ str.replace(pos, search.length(), replace);
+ pos += replace.length();
+ }
+ return str;
+}
+
+char* mystrrep(char* word, const char* pat, const char* rep) {
+ char* pos = strstr(word, pat);
+ if (pos) {
+ int replen = strlen(rep);
+ int patlen = strlen(pat);
+ while (pos) {
+ if (replen < patlen) {
+ char* end = word + strlen(word);
+ char* next = pos + replen;
+ char* prev = pos + strlen(pat);
+ for (; prev < end;* next = *prev, prev++, next++)
+ ;
+ *next = '\0';
+ } else if (replen > patlen) {
+ char* end = pos + patlen;
+ char* next = word + strlen(word) + replen - patlen;
+ char* prev = next - replen + patlen;
+ for (; prev >= end;* next = *prev, prev--, next--)
+ ;
+ }
+ strncpy(pos, rep, replen);
+ pos = strstr(word, pat);
+ }
+ }
+ return word;
+}
+
+// reverse word
+size_t reverseword(std::string& word) {
+ std::reverse(word.begin(), word.end());
+ return word.size();
+}
+
+// reverse word
+size_t reverseword_utf(std::string& word) {
+ std::vector<w_char> w;
+ u8_u16(w, word);
+ std::reverse(w.begin(), w.end());
+ u16_u8(word, w);
+ return w.size();
+}
+
+int uniqlist(char** list, int n) {
+ int i;
+ if (n < 2)
+ return n;
+ for (i = 0; i < n; i++) {
+ for (int j = 0; j < i; j++) {
+ if (list[j] && list[i] && (strcmp(list[j], list[i]) == 0)) {
+ free(list[i]);
+ list[i] = NULL;
+ break;
+ }
+ }
+ }
+ int m = 1;
+ for (i = 1; i < n; i++)
+ if (list[i]) {
+ list[m] = list[i];
+ m++;
+ }
+ return m;
+}
+
+void freelist(char*** list, int n) {
+ if (list && *list) {
+ for (int i = 0; i < n; i++)
+ free((*list)[i]);
+ free(*list);
+ *list = NULL;
+ }
+}
+
+namespace {
+unsigned char cupper(const struct cs_info* csconv, int nIndex) {
+ if (nIndex < 0 || nIndex > 255)
+ return nIndex;
+ return csconv[nIndex].cupper;
+}
+
+unsigned char clower(const struct cs_info* csconv, int nIndex) {
+ if (nIndex < 0 || nIndex > 255)
+ return nIndex;
+ return csconv[nIndex].clower;
+}
+
+unsigned char ccase(const struct cs_info* csconv, int nIndex) {
+ if (nIndex < 0 || nIndex > 255)
+ return nIndex;
+ return csconv[nIndex].ccase;
+}
+}
+
+w_char upper_utf(w_char u, int langnum) {
+ unsigned short idx = (u.h << 8) + u.l;
+ if (idx != unicodetoupper(idx, langnum)) {
+ u.h = (unsigned char)(unicodetoupper(idx, langnum) >> 8);
+ u.l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF);
+ }
+ return u;
+}
+
+w_char lower_utf(w_char u, int langnum) {
+ unsigned short idx = (u.h << 8) + u.l;
+ if (idx != unicodetolower(idx, langnum)) {
+ u.h = (unsigned char)(unicodetolower(idx, langnum) >> 8);
+ u.l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF);
+ }
+ return u;
+}
+
+// convert std::string to all caps
+std::string& mkallcap(std::string& s, const struct cs_info* csconv) {
+ for (std::string::iterator aI = s.begin(), aEnd = s.end(); aI != aEnd; ++aI) {
+ *aI = cupper(csconv, static_cast<unsigned char>(*aI));
+ }
+ return s;
+}
+
+// convert std::string to all little
+std::string& mkallsmall(std::string& s, const struct cs_info* csconv) {
+ for (std::string::iterator aI = s.begin(), aEnd = s.end(); aI != aEnd; ++aI) {
+ *aI = clower(csconv, static_cast<unsigned char>(*aI));
+ }
+ return s;
+}
+
+std::vector<w_char>& mkallsmall_utf(std::vector<w_char>& u,
+ int langnum) {
+ for (size_t i = 0; i < u.size(); ++i) {
+ unsigned short idx = (u[i].h << 8) + u[i].l;
+ if (idx != unicodetolower(idx, langnum)) {
+ u[i].h = (unsigned char)(unicodetolower(idx, langnum) >> 8);
+ u[i].l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF);
+ }
+ }
+ return u;
+}
+
+std::vector<w_char>& mkallcap_utf(std::vector<w_char>& u, int langnum) {
+ for (size_t i = 0; i < u.size(); i++) {
+ unsigned short idx = (u[i].h << 8) + u[i].l;
+ if (idx != unicodetoupper(idx, langnum)) {
+ u[i].h = (unsigned char)(unicodetoupper(idx, langnum) >> 8);
+ u[i].l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF);
+ }
+ }
+ return u;
+}
+
+std::string& mkinitcap(std::string& s, const struct cs_info* csconv) {
+ if (!s.empty()) {
+ s[0] = cupper(csconv, static_cast<unsigned char>(s[0]));
+ }
+ return s;
+}
+
+std::vector<w_char>& mkinitcap_utf(std::vector<w_char>& u, int langnum) {
+ if (!u.empty()) {
+ unsigned short idx = (u[0].h << 8) + u[0].l;
+ if (idx != unicodetoupper(idx, langnum)) {
+ u[0].h = (unsigned char)(unicodetoupper(idx, langnum) >> 8);
+ u[0].l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF);
+ }
+ }
+ return u;
+}
+
+std::string& mkinitsmall(std::string& s, const struct cs_info* csconv) {
+ if (!s.empty()) {
+ s[0] = clower(csconv, static_cast<unsigned char>(s[0]));
+ }
+ return s;
+}
+
+std::vector<w_char>& mkinitsmall_utf(std::vector<w_char>& u, int langnum) {
+ if (!u.empty()) {
+ unsigned short idx = (u[0].h << 8) + u[0].l;
+ if (idx != unicodetolower(idx, langnum)) {
+ u[0].h = (unsigned char)(unicodetolower(idx, langnum) >> 8);
+ u[0].l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF);
+ }
+ }
+ return u;
+}
+
+// conversion function for protected memory
+void store_pointer(char* dest, char* source) {
+ memcpy(dest, &source, sizeof(char*));
+}
+
+// conversion function for protected memory
+char* get_stored_pointer(const char* s) {
+ char* p;
+ memcpy(&p, s, sizeof(char*));
+ return p;
+}
+
+#ifndef MOZILLA_CLIENT
+
+// these are simple character mappings for the
+// encodings supported
+// supplying isupper, tolower, and toupper
+
+static struct cs_info iso1_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1},
+ {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4},
+ {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7},
+ {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa},
+ {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad},
+ {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0},
+ {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3},
+ {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6},
+ {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9},
+ {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc},
+ {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf},
+ {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2},
+ {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5},
+ {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8},
+ {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb},
+ {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce},
+ {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1},
+ {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4},
+ {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x00, 0xd7, 0xd7},
+ {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda},
+ {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd},
+ {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0},
+ {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3},
+ {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6},
+ {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9},
+ {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc},
+ {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf},
+ {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2},
+ {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5},
+ {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xd8},
+ {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb},
+ {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde},
+ {0x00, 0xff, 0xff}};
+
+static struct cs_info iso2_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x01, 0xb1, 0xa1},
+ {0x00, 0xa2, 0xa2}, {0x01, 0xb3, 0xa3}, {0x00, 0xa4, 0xa4},
+ {0x01, 0xb5, 0xa5}, {0x01, 0xb6, 0xa6}, {0x00, 0xa7, 0xa7},
+ {0x00, 0xa8, 0xa8}, {0x01, 0xb9, 0xa9}, {0x01, 0xba, 0xaa},
+ {0x01, 0xbb, 0xab}, {0x01, 0xbc, 0xac}, {0x00, 0xad, 0xad},
+ {0x01, 0xbe, 0xae}, {0x01, 0xbf, 0xaf}, {0x00, 0xb0, 0xb0},
+ {0x00, 0xb1, 0xa1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xa3},
+ {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xa5}, {0x00, 0xb6, 0xa6},
+ {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xa9},
+ {0x00, 0xba, 0xaa}, {0x00, 0xbb, 0xab}, {0x00, 0xbc, 0xac},
+ {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xae}, {0x00, 0xbf, 0xaf},
+ {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2},
+ {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5},
+ {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8},
+ {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb},
+ {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce},
+ {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1},
+ {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4},
+ {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x00, 0xd7, 0xd7},
+ {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda},
+ {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd},
+ {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0},
+ {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3},
+ {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6},
+ {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9},
+ {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc},
+ {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf},
+ {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2},
+ {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5},
+ {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xd8},
+ {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb},
+ {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde},
+ {0x00, 0xff, 0xff}};
+
+static struct cs_info iso3_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x01, 0xb1, 0xa1},
+ {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4},
+ {0x00, 0xa5, 0xa5}, {0x01, 0xb6, 0xa6}, {0x00, 0xa7, 0xa7},
+ {0x00, 0xa8, 0xa8}, {0x01, 0x69, 0xa9}, {0x01, 0xba, 0xaa},
+ {0x01, 0xbb, 0xab}, {0x01, 0xbc, 0xac}, {0x00, 0xad, 0xad},
+ {0x00, 0xae, 0xae}, {0x01, 0xbf, 0xaf}, {0x00, 0xb0, 0xb0},
+ {0x00, 0xb1, 0xa1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3},
+ {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xa6},
+ {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0x49},
+ {0x00, 0xba, 0xaa}, {0x00, 0xbb, 0xab}, {0x00, 0xbc, 0xac},
+ {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xaf},
+ {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2},
+ {0x00, 0xc3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5},
+ {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8},
+ {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb},
+ {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce},
+ {0x01, 0xef, 0xcf}, {0x00, 0xd0, 0xd0}, {0x01, 0xf1, 0xd1},
+ {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4},
+ {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x00, 0xd7, 0xd7},
+ {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda},
+ {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd},
+ {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0},
+ {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xe3},
+ {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6},
+ {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9},
+ {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc},
+ {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf},
+ {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2},
+ {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5},
+ {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xd8},
+ {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb},
+ {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde},
+ {0x00, 0xff, 0xff}};
+
+static struct cs_info iso4_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x01, 0xb1, 0xa1},
+ {0x00, 0xa2, 0xa2}, {0x01, 0xb3, 0xa3}, {0x00, 0xa4, 0xa4},
+ {0x01, 0xb5, 0xa5}, {0x01, 0xb6, 0xa6}, {0x00, 0xa7, 0xa7},
+ {0x00, 0xa8, 0xa8}, {0x01, 0xb9, 0xa9}, {0x01, 0xba, 0xaa},
+ {0x01, 0xbb, 0xab}, {0x01, 0xbc, 0xac}, {0x00, 0xad, 0xad},
+ {0x01, 0xbe, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0},
+ {0x00, 0xb1, 0xa1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xa3},
+ {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xa5}, {0x00, 0xb6, 0xa6},
+ {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xa9},
+ {0x00, 0xba, 0xaa}, {0x00, 0xbb, 0xab}, {0x00, 0xbc, 0xac},
+ {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xae}, {0x00, 0xbf, 0xbf},
+ {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2},
+ {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5},
+ {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8},
+ {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb},
+ {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce},
+ {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1},
+ {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4},
+ {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x00, 0xd7, 0xd7},
+ {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda},
+ {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd},
+ {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0},
+ {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3},
+ {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6},
+ {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9},
+ {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc},
+ {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf},
+ {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2},
+ {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5},
+ {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xd8},
+ {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb},
+ {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde},
+ {0x00, 0xff, 0xff}};
+
+static struct cs_info iso5_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x01, 0xf1, 0xa1},
+ {0x01, 0xf2, 0xa2}, {0x01, 0xf3, 0xa3}, {0x01, 0xf4, 0xa4},
+ {0x01, 0xf5, 0xa5}, {0x01, 0xf6, 0xa6}, {0x01, 0xf7, 0xa7},
+ {0x01, 0xf8, 0xa8}, {0x01, 0xf9, 0xa9}, {0x01, 0xfa, 0xaa},
+ {0x01, 0xfb, 0xab}, {0x01, 0xfc, 0xac}, {0x00, 0xad, 0xad},
+ {0x01, 0xfe, 0xae}, {0x01, 0xff, 0xaf}, {0x01, 0xd0, 0xb0},
+ {0x01, 0xd1, 0xb1}, {0x01, 0xd2, 0xb2}, {0x01, 0xd3, 0xb3},
+ {0x01, 0xd4, 0xb4}, {0x01, 0xd5, 0xb5}, {0x01, 0xd6, 0xb6},
+ {0x01, 0xd7, 0xb7}, {0x01, 0xd8, 0xb8}, {0x01, 0xd9, 0xb9},
+ {0x01, 0xda, 0xba}, {0x01, 0xdb, 0xbb}, {0x01, 0xdc, 0xbc},
+ {0x01, 0xdd, 0xbd}, {0x01, 0xde, 0xbe}, {0x01, 0xdf, 0xbf},
+ {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2},
+ {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5},
+ {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8},
+ {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb},
+ {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce},
+ {0x01, 0xef, 0xcf}, {0x00, 0xd0, 0xb0}, {0x00, 0xd1, 0xb1},
+ {0x00, 0xd2, 0xb2}, {0x00, 0xd3, 0xb3}, {0x00, 0xd4, 0xb4},
+ {0x00, 0xd5, 0xb5}, {0x00, 0xd6, 0xb6}, {0x00, 0xd7, 0xb7},
+ {0x00, 0xd8, 0xb8}, {0x00, 0xd9, 0xb9}, {0x00, 0xda, 0xba},
+ {0x00, 0xdb, 0xbb}, {0x00, 0xdc, 0xbc}, {0x00, 0xdd, 0xbd},
+ {0x00, 0xde, 0xbe}, {0x00, 0xdf, 0xbf}, {0x00, 0xe0, 0xc0},
+ {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3},
+ {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6},
+ {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9},
+ {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc},
+ {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf},
+ {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xa1}, {0x00, 0xf2, 0xa2},
+ {0x00, 0xf3, 0xa3}, {0x00, 0xf4, 0xa4}, {0x00, 0xf5, 0xa5},
+ {0x00, 0xf6, 0xa6}, {0x00, 0xf7, 0xa7}, {0x00, 0xf8, 0xa8},
+ {0x00, 0xf9, 0xa9}, {0x00, 0xfa, 0xaa}, {0x00, 0xfb, 0xab},
+ {0x00, 0xfc, 0xac}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xae},
+ {0x00, 0xff, 0xaf}};
+
+static struct cs_info iso6_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1},
+ {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4},
+ {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7},
+ {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa},
+ {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad},
+ {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0},
+ {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3},
+ {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6},
+ {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9},
+ {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc},
+ {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf},
+ {0x00, 0xc0, 0xc0}, {0x00, 0xc1, 0xc1}, {0x00, 0xc2, 0xc2},
+ {0x00, 0xc3, 0xc3}, {0x00, 0xc4, 0xc4}, {0x00, 0xc5, 0xc5},
+ {0x00, 0xc6, 0xc6}, {0x00, 0xc7, 0xc7}, {0x00, 0xc8, 0xc8},
+ {0x00, 0xc9, 0xc9}, {0x00, 0xca, 0xca}, {0x00, 0xcb, 0xcb},
+ {0x00, 0xcc, 0xcc}, {0x00, 0xcd, 0xcd}, {0x00, 0xce, 0xce},
+ {0x00, 0xcf, 0xcf}, {0x00, 0xd0, 0xd0}, {0x00, 0xd1, 0xd1},
+ {0x00, 0xd2, 0xd2}, {0x00, 0xd3, 0xd3}, {0x00, 0xd4, 0xd4},
+ {0x00, 0xd5, 0xd5}, {0x00, 0xd6, 0xd6}, {0x00, 0xd7, 0xd7},
+ {0x00, 0xd8, 0xd8}, {0x00, 0xd9, 0xd9}, {0x00, 0xda, 0xda},
+ {0x00, 0xdb, 0xdb}, {0x00, 0xdc, 0xdc}, {0x00, 0xdd, 0xdd},
+ {0x00, 0xde, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xe0},
+ {0x00, 0xe1, 0xe1}, {0x00, 0xe2, 0xe2}, {0x00, 0xe3, 0xe3},
+ {0x00, 0xe4, 0xe4}, {0x00, 0xe5, 0xe5}, {0x00, 0xe6, 0xe6},
+ {0x00, 0xe7, 0xe7}, {0x00, 0xe8, 0xe8}, {0x00, 0xe9, 0xe9},
+ {0x00, 0xea, 0xea}, {0x00, 0xeb, 0xeb}, {0x00, 0xec, 0xec},
+ {0x00, 0xed, 0xed}, {0x00, 0xee, 0xee}, {0x00, 0xef, 0xef},
+ {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xf1}, {0x00, 0xf2, 0xf2},
+ {0x00, 0xf3, 0xf3}, {0x00, 0xf4, 0xf4}, {0x00, 0xf5, 0xf5},
+ {0x00, 0xf6, 0xf6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xf8},
+ {0x00, 0xf9, 0xf9}, {0x00, 0xfa, 0xfa}, {0x00, 0xfb, 0xfb},
+ {0x00, 0xfc, 0xfc}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xfe},
+ {0x00, 0xff, 0xff}};
+
+static struct cs_info iso7_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1},
+ {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4},
+ {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7},
+ {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa},
+ {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad},
+ {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0},
+ {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3},
+ {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x01, 0xdc, 0xb6},
+ {0x00, 0xb7, 0xb7}, {0x01, 0xdd, 0xb8}, {0x01, 0xde, 0xb9},
+ {0x01, 0xdf, 0xba}, {0x00, 0xbb, 0xbb}, {0x01, 0xfc, 0xbc},
+ {0x00, 0xbd, 0xbd}, {0x01, 0xfd, 0xbe}, {0x01, 0xfe, 0xbf},
+ {0x00, 0xc0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2},
+ {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5},
+ {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8},
+ {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb},
+ {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce},
+ {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1},
+ {0x00, 0xd2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4},
+ {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x01, 0xf7, 0xd7},
+ {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda},
+ {0x01, 0xfb, 0xdb}, {0x00, 0xdc, 0xb6}, {0x00, 0xdd, 0xb8},
+ {0x00, 0xde, 0xb9}, {0x00, 0xdf, 0xba}, {0x00, 0xe0, 0xe0},
+ {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3},
+ {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6},
+ {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9},
+ {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc},
+ {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf},
+ {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd3},
+ {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5},
+ {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xd7}, {0x00, 0xf8, 0xd8},
+ {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb},
+ {0x00, 0xfc, 0xbc}, {0x00, 0xfd, 0xbe}, {0x00, 0xfe, 0xbf},
+ {0x00, 0xff, 0xff}};
+
+static struct cs_info iso8_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1},
+ {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4},
+ {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7},
+ {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa},
+ {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad},
+ {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0},
+ {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3},
+ {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6},
+ {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9},
+ {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc},
+ {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf},
+ {0x00, 0xc0, 0xc0}, {0x00, 0xc1, 0xc1}, {0x00, 0xc2, 0xc2},
+ {0x00, 0xc3, 0xc3}, {0x00, 0xc4, 0xc4}, {0x00, 0xc5, 0xc5},
+ {0x00, 0xc6, 0xc6}, {0x00, 0xc7, 0xc7}, {0x00, 0xc8, 0xc8},
+ {0x00, 0xc9, 0xc9}, {0x00, 0xca, 0xca}, {0x00, 0xcb, 0xcb},
+ {0x00, 0xcc, 0xcc}, {0x00, 0xcd, 0xcd}, {0x00, 0xce, 0xce},
+ {0x00, 0xcf, 0xcf}, {0x00, 0xd0, 0xd0}, {0x00, 0xd1, 0xd1},
+ {0x00, 0xd2, 0xd2}, {0x00, 0xd3, 0xd3}, {0x00, 0xd4, 0xd4},
+ {0x00, 0xd5, 0xd5}, {0x00, 0xd6, 0xd6}, {0x00, 0xd7, 0xd7},
+ {0x00, 0xd8, 0xd8}, {0x00, 0xd9, 0xd9}, {0x00, 0xda, 0xda},
+ {0x00, 0xdb, 0xdb}, {0x00, 0xdc, 0xdc}, {0x00, 0xdd, 0xdd},
+ {0x00, 0xde, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xe0},
+ {0x00, 0xe1, 0xe1}, {0x00, 0xe2, 0xe2}, {0x00, 0xe3, 0xe3},
+ {0x00, 0xe4, 0xe4}, {0x00, 0xe5, 0xe5}, {0x00, 0xe6, 0xe6},
+ {0x00, 0xe7, 0xe7}, {0x00, 0xe8, 0xe8}, {0x00, 0xe9, 0xe9},
+ {0x00, 0xea, 0xea}, {0x00, 0xeb, 0xeb}, {0x00, 0xec, 0xec},
+ {0x00, 0xed, 0xed}, {0x00, 0xee, 0xee}, {0x00, 0xef, 0xef},
+ {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xf1}, {0x00, 0xf2, 0xf2},
+ {0x00, 0xf3, 0xf3}, {0x00, 0xf4, 0xf4}, {0x00, 0xf5, 0xf5},
+ {0x00, 0xf6, 0xf6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xf8},
+ {0x00, 0xf9, 0xf9}, {0x00, 0xfa, 0xfa}, {0x00, 0xfb, 0xfb},
+ {0x00, 0xfc, 0xfc}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xfe},
+ {0x00, 0xff, 0xff}};
+
+static struct cs_info iso9_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0xfd, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0xdd}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1},
+ {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4},
+ {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7},
+ {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa},
+ {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad},
+ {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0},
+ {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3},
+ {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6},
+ {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9},
+ {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc},
+ {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf},
+ {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2},
+ {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5},
+ {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8},
+ {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb},
+ {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce},
+ {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1},
+ {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4},
+ {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x00, 0xd7, 0xd7},
+ {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda},
+ {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0x69, 0xdd},
+ {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0},
+ {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3},
+ {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6},
+ {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9},
+ {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc},
+ {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf},
+ {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2},
+ {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5},
+ {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xd8},
+ {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb},
+ {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0x49}, {0x00, 0xfe, 0xde},
+ {0x00, 0xff, 0xff}};
+
+static struct cs_info iso10_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1},
+ {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4},
+ {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7},
+ {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa},
+ {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad},
+ {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0},
+ {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3},
+ {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6},
+ {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9},
+ {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc},
+ {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf},
+ {0x00, 0xc0, 0xc0}, {0x00, 0xc1, 0xc1}, {0x00, 0xc2, 0xc2},
+ {0x00, 0xc3, 0xc3}, {0x00, 0xc4, 0xc4}, {0x00, 0xc5, 0xc5},
+ {0x00, 0xc6, 0xc6}, {0x00, 0xc7, 0xc7}, {0x00, 0xc8, 0xc8},
+ {0x00, 0xc9, 0xc9}, {0x00, 0xca, 0xca}, {0x00, 0xcb, 0xcb},
+ {0x00, 0xcc, 0xcc}, {0x00, 0xcd, 0xcd}, {0x00, 0xce, 0xce},
+ {0x00, 0xcf, 0xcf}, {0x00, 0xd0, 0xd0}, {0x00, 0xd1, 0xd1},
+ {0x00, 0xd2, 0xd2}, {0x00, 0xd3, 0xd3}, {0x00, 0xd4, 0xd4},
+ {0x00, 0xd5, 0xd5}, {0x00, 0xd6, 0xd6}, {0x00, 0xd7, 0xd7},
+ {0x00, 0xd8, 0xd8}, {0x00, 0xd9, 0xd9}, {0x00, 0xda, 0xda},
+ {0x00, 0xdb, 0xdb}, {0x00, 0xdc, 0xdc}, {0x00, 0xdd, 0xdd},
+ {0x00, 0xde, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xe0},
+ {0x00, 0xe1, 0xe1}, {0x00, 0xe2, 0xe2}, {0x00, 0xe3, 0xe3},
+ {0x00, 0xe4, 0xe4}, {0x00, 0xe5, 0xe5}, {0x00, 0xe6, 0xe6},
+ {0x00, 0xe7, 0xe7}, {0x00, 0xe8, 0xe8}, {0x00, 0xe9, 0xe9},
+ {0x00, 0xea, 0xea}, {0x00, 0xeb, 0xeb}, {0x00, 0xec, 0xec},
+ {0x00, 0xed, 0xed}, {0x00, 0xee, 0xee}, {0x00, 0xef, 0xef},
+ {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xf1}, {0x00, 0xf2, 0xf2},
+ {0x00, 0xf3, 0xf3}, {0x00, 0xf4, 0xf4}, {0x00, 0xf5, 0xf5},
+ {0x00, 0xf6, 0xf6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xf8},
+ {0x00, 0xf9, 0xf9}, {0x00, 0xfa, 0xfa}, {0x00, 0xfb, 0xfb},
+ {0x00, 0xfc, 0xfc}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xfe},
+ {0x00, 0xff, 0xff}};
+
+static struct cs_info koi8r_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1},
+ {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xb3}, {0x00, 0xa4, 0xa4},
+ {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7},
+ {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa},
+ {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad},
+ {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0},
+ {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x01, 0xa3, 0xb3},
+ {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6},
+ {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9},
+ {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc},
+ {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf},
+ {0x00, 0xc0, 0xe0}, {0x00, 0xc1, 0xe1}, {0x00, 0xc2, 0xe2},
+ {0x00, 0xc3, 0xe3}, {0x00, 0xc4, 0xe4}, {0x00, 0xc5, 0xe5},
+ {0x00, 0xc6, 0xe6}, {0x00, 0xc7, 0xe7}, {0x00, 0xc8, 0xe8},
+ {0x00, 0xc9, 0xe9}, {0x00, 0xca, 0xea}, {0x00, 0xcb, 0xeb},
+ {0x00, 0xcc, 0xec}, {0x00, 0xcd, 0xed}, {0x00, 0xce, 0xee},
+ {0x00, 0xcf, 0xef}, {0x00, 0xd0, 0xf0}, {0x00, 0xd1, 0xf1},
+ {0x00, 0xd2, 0xf2}, {0x00, 0xd3, 0xf3}, {0x00, 0xd4, 0xf4},
+ {0x00, 0xd5, 0xf5}, {0x00, 0xd6, 0xf6}, {0x00, 0xd7, 0xf7},
+ {0x00, 0xd8, 0xf8}, {0x00, 0xd9, 0xf9}, {0x00, 0xda, 0xfa},
+ {0x00, 0xdb, 0xfb}, {0x00, 0xdc, 0xfc}, {0x00, 0xdd, 0xfd},
+ {0x00, 0xde, 0xfe}, {0x00, 0xdf, 0xff}, {0x01, 0xc0, 0xe0},
+ {0x01, 0xc1, 0xe1}, {0x01, 0xc2, 0xe2}, {0x01, 0xc3, 0xe3},
+ {0x01, 0xc4, 0xe4}, {0x01, 0xc5, 0xe5}, {0x01, 0xc6, 0xe6},
+ {0x01, 0xc7, 0xe7}, {0x01, 0xc8, 0xe8}, {0x01, 0xc9, 0xe9},
+ {0x01, 0xca, 0xea}, {0x01, 0xcb, 0xeb}, {0x01, 0xcc, 0xec},
+ {0x01, 0xcd, 0xed}, {0x01, 0xce, 0xee}, {0x01, 0xcf, 0xef},
+ {0x01, 0xd0, 0xf0}, {0x01, 0xd1, 0xf1}, {0x01, 0xd2, 0xf2},
+ {0x01, 0xd3, 0xf3}, {0x01, 0xd4, 0xf4}, {0x01, 0xd5, 0xf5},
+ {0x01, 0xd6, 0xf6}, {0x01, 0xd7, 0xf7}, {0x01, 0xd8, 0xf8},
+ {0x01, 0xd9, 0xf9}, {0x01, 0xda, 0xfa}, {0x01, 0xdb, 0xfb},
+ {0x01, 0xdc, 0xfc}, {0x01, 0xdd, 0xfd}, {0x01, 0xde, 0xfe},
+ {0x01, 0xdf, 0xff}};
+
+static struct cs_info koi8u_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1},
+ {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xb3}, {0x00, 0xa4, 0xb4}, /* ie */
+ {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xb6}, /* i */
+ {0x00, 0xa7, 0xb7}, /* ii */
+ {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa},
+ {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xbd}, /* g'' */
+ {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0},
+ {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x01, 0xa3, 0xb3},
+ {0x00, 0xb4, 0xb4}, /* IE */
+ {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, /* I */
+ {0x00, 0xb7, 0xb7}, /* II */
+ {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9}, {0x00, 0xba, 0xba},
+ {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc}, {0x00, 0xbd, 0xbd},
+ {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf}, {0x00, 0xc0, 0xe0},
+ {0x00, 0xc1, 0xe1}, {0x00, 0xc2, 0xe2}, {0x00, 0xc3, 0xe3},
+ {0x00, 0xc4, 0xe4}, {0x00, 0xc5, 0xe5}, {0x00, 0xc6, 0xe6},
+ {0x00, 0xc7, 0xe7}, {0x00, 0xc8, 0xe8}, {0x00, 0xc9, 0xe9},
+ {0x00, 0xca, 0xea}, {0x00, 0xcb, 0xeb}, {0x00, 0xcc, 0xec},
+ {0x00, 0xcd, 0xed}, {0x00, 0xce, 0xee}, {0x00, 0xcf, 0xef},
+ {0x00, 0xd0, 0xf0}, {0x00, 0xd1, 0xf1}, {0x00, 0xd2, 0xf2},
+ {0x00, 0xd3, 0xf3}, {0x00, 0xd4, 0xf4}, {0x00, 0xd5, 0xf5},
+ {0x00, 0xd6, 0xf6}, {0x00, 0xd7, 0xf7}, {0x00, 0xd8, 0xf8},
+ {0x00, 0xd9, 0xf9}, {0x00, 0xda, 0xfa}, {0x00, 0xdb, 0xfb},
+ {0x00, 0xdc, 0xfc}, {0x00, 0xdd, 0xfd}, {0x00, 0xde, 0xfe},
+ {0x00, 0xdf, 0xff}, {0x01, 0xc0, 0xe0}, {0x01, 0xc1, 0xe1},
+ {0x01, 0xc2, 0xe2}, {0x01, 0xc3, 0xe3}, {0x01, 0xc4, 0xe4},
+ {0x01, 0xc5, 0xe5}, {0x01, 0xc6, 0xe6}, {0x01, 0xc7, 0xe7},
+ {0x01, 0xc8, 0xe8}, {0x01, 0xc9, 0xe9}, {0x01, 0xca, 0xea},
+ {0x01, 0xcb, 0xeb}, {0x01, 0xcc, 0xec}, {0x01, 0xcd, 0xed},
+ {0x01, 0xce, 0xee}, {0x01, 0xcf, 0xef}, {0x01, 0xd0, 0xf0},
+ {0x01, 0xd1, 0xf1}, {0x01, 0xd2, 0xf2}, {0x01, 0xd3, 0xf3},
+ {0x01, 0xd4, 0xf4}, {0x01, 0xd5, 0xf5}, {0x01, 0xd6, 0xf6},
+ {0x01, 0xd7, 0xf7}, {0x01, 0xd8, 0xf8}, {0x01, 0xd9, 0xf9},
+ {0x01, 0xda, 0xfa}, {0x01, 0xdb, 0xfb}, {0x01, 0xdc, 0xfc},
+ {0x01, 0xdd, 0xfd}, {0x01, 0xde, 0xfe}, {0x01, 0xdf, 0xff}};
+
+static struct cs_info cp1251_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x01, 0x90, 0x80},
+ {0x01, 0x83, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x81},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x01, 0x9a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x01, 0x9c, 0x8c},
+ {0x01, 0x9d, 0x8d}, {0x01, 0x9e, 0x8e}, {0x01, 0x9f, 0x8f},
+ {0x00, 0x90, 0x80}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x8a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x8c}, {0x00, 0x9d, 0x8d}, {0x00, 0x9e, 0x8e},
+ {0x00, 0x9f, 0x8f}, {0x00, 0xa0, 0xa0}, {0x01, 0xa2, 0xa1},
+ {0x00, 0xa2, 0xa1}, {0x01, 0xbc, 0xa3}, {0x00, 0xa4, 0xa4},
+ {0x01, 0xb4, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7},
+ {0x01, 0xb8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x01, 0xba, 0xaa},
+ {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad},
+ {0x00, 0xae, 0xae}, {0x01, 0xbf, 0xaf}, {0x00, 0xb0, 0xb0},
+ {0x00, 0xb1, 0xb1}, {0x01, 0xb3, 0xb2}, {0x00, 0xb3, 0xb2},
+ {0x00, 0xb4, 0xa5}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6},
+ {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xa8}, {0x00, 0xb9, 0xb9},
+ {0x00, 0xba, 0xaa}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xa3},
+ {0x01, 0xbe, 0xbd}, {0x00, 0xbe, 0xbd}, {0x00, 0xbf, 0xaf},
+ {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2},
+ {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5},
+ {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8},
+ {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb},
+ {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce},
+ {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1},
+ {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4},
+ {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x01, 0xf7, 0xd7},
+ {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda},
+ {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd},
+ {0x01, 0xfe, 0xde}, {0x01, 0xff, 0xdf}, {0x00, 0xe0, 0xc0},
+ {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3},
+ {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6},
+ {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9},
+ {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc},
+ {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf},
+ {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2},
+ {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5},
+ {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xd7}, {0x00, 0xf8, 0xd8},
+ {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb},
+ {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde},
+ {0x00, 0xff, 0xdf}};
+
+static struct cs_info iso13_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0A, 0x0A}, {0x00, 0x0B, 0x0B},
+ {0x00, 0x0C, 0x0C}, {0x00, 0x0D, 0x0D}, {0x00, 0x0E, 0x0E},
+ {0x00, 0x0F, 0x0F}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1A, 0x1A},
+ {0x00, 0x1B, 0x1B}, {0x00, 0x1C, 0x1C}, {0x00, 0x1D, 0x1D},
+ {0x00, 0x1E, 0x1E}, {0x00, 0x1F, 0x1F}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2A, 0x2A}, {0x00, 0x2B, 0x2B}, {0x00, 0x2C, 0x2C},
+ {0x00, 0x2D, 0x2D}, {0x00, 0x2E, 0x2E}, {0x00, 0x2F, 0x2F},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3A, 0x3A}, {0x00, 0x3B, 0x3B},
+ {0x00, 0x3C, 0x3C}, {0x00, 0x3D, 0x3D}, {0x00, 0x3E, 0x3E},
+ {0x00, 0x3F, 0x3F}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6A, 0x4A},
+ {0x01, 0x6B, 0x4B}, {0x01, 0x6C, 0x4C}, {0x01, 0x6D, 0x4D},
+ {0x01, 0x6E, 0x4E}, {0x01, 0x6F, 0x4F}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7A, 0x5A}, {0x00, 0x5B, 0x5B}, {0x00, 0x5C, 0x5C},
+ {0x00, 0x5D, 0x5D}, {0x00, 0x5E, 0x5E}, {0x00, 0x5F, 0x5F},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6A, 0x4A}, {0x00, 0x6B, 0x4B},
+ {0x00, 0x6C, 0x4C}, {0x00, 0x6D, 0x4D}, {0x00, 0x6E, 0x4E},
+ {0x00, 0x6F, 0x4F}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7A, 0x5A},
+ {0x00, 0x7B, 0x7B}, {0x00, 0x7C, 0x7C}, {0x00, 0x7D, 0x7D},
+ {0x00, 0x7E, 0x7E}, {0x00, 0x7F, 0x7F}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8A, 0x8A}, {0x00, 0x8B, 0x8B}, {0x00, 0x8C, 0x8C},
+ {0x00, 0x8D, 0x8D}, {0x00, 0x8E, 0x8E}, {0x00, 0x8F, 0x8F},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9A, 0x9A}, {0x00, 0x9B, 0x9B},
+ {0x00, 0x9C, 0x9C}, {0x00, 0x9D, 0x9D}, {0x00, 0x9E, 0x9E},
+ {0x00, 0x9F, 0x9F}, {0x00, 0xA0, 0xA0}, {0x00, 0xA1, 0xA1},
+ {0x00, 0xA2, 0xA2}, {0x00, 0xA3, 0xA3}, {0x00, 0xA4, 0xA4},
+ {0x00, 0xA5, 0xA5}, {0x00, 0xA6, 0xA6}, {0x00, 0xA7, 0xA7},
+ {0x01, 0xB8, 0xA8}, {0x00, 0xA9, 0xA9}, {0x01, 0xBA, 0xAA},
+ {0x00, 0xAB, 0xAB}, {0x00, 0xAC, 0xAC}, {0x00, 0xAD, 0xAD},
+ {0x00, 0xAE, 0xAE}, {0x01, 0xBF, 0xAF}, {0x00, 0xB0, 0xB0},
+ {0x00, 0xB1, 0xB1}, {0x00, 0xB2, 0xB2}, {0x00, 0xB3, 0xB3},
+ {0x00, 0xB4, 0xB4}, {0x00, 0xB5, 0xB5}, {0x00, 0xB6, 0xB6},
+ {0x00, 0xB7, 0xB7}, {0x00, 0xB8, 0xA8}, {0x00, 0xB9, 0xB9},
+ {0x00, 0xBA, 0xAA}, {0x00, 0xBB, 0xBB}, {0x00, 0xBC, 0xBC},
+ {0x00, 0xBD, 0xBD}, {0x00, 0xBE, 0xBE}, {0x00, 0xBF, 0xAF},
+ {0x01, 0xE0, 0xC0}, {0x01, 0xE1, 0xC1}, {0x01, 0xE2, 0xC2},
+ {0x01, 0xE3, 0xC3}, {0x01, 0xE4, 0xC4}, {0x01, 0xE5, 0xC5},
+ {0x01, 0xE6, 0xC6}, {0x01, 0xE7, 0xC7}, {0x01, 0xE8, 0xC8},
+ {0x01, 0xE9, 0xC9}, {0x01, 0xEA, 0xCA}, {0x01, 0xEB, 0xCB},
+ {0x01, 0xEC, 0xCC}, {0x01, 0xED, 0xCD}, {0x01, 0xEE, 0xCE},
+ {0x01, 0xEF, 0xCF}, {0x01, 0xF0, 0xD0}, {0x01, 0xF1, 0xD1},
+ {0x01, 0xF2, 0xD2}, {0x01, 0xF3, 0xD3}, {0x01, 0xF4, 0xD4},
+ {0x01, 0xF5, 0xD5}, {0x01, 0xF6, 0xD6}, {0x00, 0xD7, 0xD7},
+ {0x01, 0xF8, 0xD8}, {0x01, 0xF9, 0xD9}, {0x01, 0xFA, 0xDA},
+ {0x01, 0xFB, 0xDB}, {0x01, 0xFC, 0xDC}, {0x01, 0xFD, 0xDD},
+ {0x01, 0xFE, 0xDE}, {0x00, 0xDF, 0xDF}, {0x00, 0xE0, 0xC0},
+ {0x00, 0xE1, 0xC1}, {0x00, 0xE2, 0xC2}, {0x00, 0xE3, 0xC3},
+ {0x00, 0xE4, 0xC4}, {0x00, 0xE5, 0xC5}, {0x00, 0xE6, 0xC6},
+ {0x00, 0xE7, 0xC7}, {0x00, 0xE8, 0xC8}, {0x00, 0xE9, 0xC9},
+ {0x00, 0xEA, 0xCA}, {0x00, 0xEB, 0xCB}, {0x00, 0xEC, 0xCC},
+ {0x00, 0xED, 0xCD}, {0x00, 0xEE, 0xCE}, {0x00, 0xEF, 0xCF},
+ {0x00, 0xF0, 0xD0}, {0x00, 0xF1, 0xD1}, {0x00, 0xF2, 0xD2},
+ {0x00, 0xF3, 0xD3}, {0x00, 0xF4, 0xD4}, {0x00, 0xF5, 0xD5},
+ {0x00, 0xF6, 0xD6}, {0x00, 0xF7, 0xF7}, {0x00, 0xF8, 0xD8},
+ {0x00, 0xF9, 0xD9}, {0x00, 0xFA, 0xDA}, {0x00, 0xFB, 0xDB},
+ {0x00, 0xFC, 0xDC}, {0x00, 0xFD, 0xDD}, {0x00, 0xFE, 0xDE},
+ {0x00, 0xFF, 0xFF}};
+
+static struct cs_info iso14_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x01, 0xa2, 0xa1},
+ {0x00, 0xa2, 0xa1}, {0x00, 0xa3, 0xa3}, {0x01, 0xa5, 0xa4},
+ {0x00, 0xa5, 0xa4}, {0x01, 0xa6, 0xab}, {0x00, 0xa7, 0xa7},
+ {0x01, 0xb8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x01, 0xba, 0xaa},
+ {0x00, 0xab, 0xa6}, {0x01, 0xbc, 0xac}, {0x00, 0xad, 0xad},
+ {0x00, 0xae, 0xae}, {0x01, 0xff, 0xaf}, {0x01, 0xb1, 0xb0},
+ {0x00, 0xb1, 0xb0}, {0x01, 0xb3, 0xb2}, {0x00, 0xb3, 0xb2},
+ {0x01, 0xb5, 0xb4}, {0x00, 0xb5, 0xb4}, {0x00, 0xb6, 0xb6},
+ {0x01, 0xb9, 0xb7}, {0x00, 0xb8, 0xa8}, {0x00, 0xb9, 0xb6},
+ {0x00, 0xba, 0xaa}, {0x01, 0xbf, 0xbb}, {0x00, 0xbc, 0xac},
+ {0x01, 0xbe, 0xbd}, {0x00, 0xbe, 0xbd}, {0x00, 0xbf, 0xbb},
+ {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2},
+ {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5},
+ {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8},
+ {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb},
+ {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce},
+ {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1},
+ {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4},
+ {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x01, 0xf7, 0xd7},
+ {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda},
+ {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd},
+ {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0},
+ {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3},
+ {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6},
+ {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9},
+ {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc},
+ {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf},
+ {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2},
+ {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5},
+ {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xd7}, {0x00, 0xf8, 0xd8},
+ {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb},
+ {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde},
+ {0x00, 0xff, 0xff}};
+
+static struct cs_info iso15_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1},
+ {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4},
+ {0x00, 0xa5, 0xa5}, {0x01, 0xa8, 0xa6}, {0x00, 0xa7, 0xa7},
+ {0x00, 0xa8, 0xa6}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa},
+ {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad},
+ {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0},
+ {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3},
+ {0x01, 0xb8, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6},
+ {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb4}, {0x00, 0xb9, 0xb9},
+ {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x01, 0xbd, 0xbc},
+ {0x00, 0xbd, 0xbc}, {0x01, 0xff, 0xbe}, {0x00, 0xbf, 0xbf},
+ {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2},
+ {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5},
+ {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8},
+ {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb},
+ {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce},
+ {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1},
+ {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4},
+ {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x00, 0xd7, 0xd7},
+ {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda},
+ {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd},
+ {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0},
+ {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3},
+ {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6},
+ {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9},
+ {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc},
+ {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf},
+ {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2},
+ {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5},
+ {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xd8},
+ {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb},
+ {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde},
+ {0x00, 0xff, 0xbe}};
+
+static struct cs_info iscii_devanagari_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1},
+ {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4},
+ {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7},
+ {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa},
+ {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad},
+ {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0},
+ {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3},
+ {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6},
+ {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9},
+ {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc},
+ {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf},
+ {0x00, 0xc0, 0xc0}, {0x00, 0xc1, 0xc1}, {0x00, 0xc2, 0xc2},
+ {0x00, 0xc3, 0xc3}, {0x00, 0xc4, 0xc4}, {0x00, 0xc5, 0xc5},
+ {0x00, 0xc6, 0xc6}, {0x00, 0xc7, 0xc7}, {0x00, 0xc8, 0xc8},
+ {0x00, 0xc9, 0xc9}, {0x00, 0xca, 0xca}, {0x00, 0xcb, 0xcb},
+ {0x00, 0xcc, 0xcc}, {0x00, 0xcd, 0xcd}, {0x00, 0xce, 0xce},
+ {0x00, 0xcf, 0xcf}, {0x00, 0xd0, 0xd0}, {0x00, 0xd1, 0xd1},
+ {0x00, 0xd2, 0xd2}, {0x00, 0xd3, 0xd3}, {0x00, 0xd4, 0xd4},
+ {0x00, 0xd5, 0xd5}, {0x00, 0xd6, 0xd6}, {0x00, 0xd7, 0xd7},
+ {0x00, 0xd8, 0xd8}, {0x00, 0xd9, 0xd9}, {0x00, 0xda, 0xda},
+ {0x00, 0xdb, 0xdb}, {0x00, 0xdc, 0xdc}, {0x00, 0xdd, 0xdd},
+ {0x00, 0xde, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xe0},
+ {0x00, 0xe1, 0xe1}, {0x00, 0xe2, 0xe2}, {0x00, 0xe3, 0xe3},
+ {0x00, 0xe4, 0xe4}, {0x00, 0xe5, 0xe5}, {0x00, 0xe6, 0xe6},
+ {0x00, 0xe7, 0xe7}, {0x00, 0xe8, 0xe8}, {0x00, 0xe9, 0xe9},
+ {0x00, 0xea, 0xea}, {0x00, 0xeb, 0xeb}, {0x00, 0xec, 0xec},
+ {0x00, 0xed, 0xed}, {0x00, 0xee, 0xee}, {0x00, 0xef, 0xef},
+ {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xf1}, {0x00, 0xf2, 0xf2},
+ {0x00, 0xf3, 0xf3}, {0x00, 0xf4, 0xf4}, {0x00, 0xf5, 0xf5},
+ {0x00, 0xf6, 0xf6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xf8},
+ {0x00, 0xf9, 0xf9}, {0x00, 0xfa, 0xfa}, {0x00, 0xfb, 0xfb},
+ {0x00, 0xfc, 0xfc}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xfe},
+ {0x00, 0xff, 0xff}};
+
+static struct cs_info tis620_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1},
+ {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4},
+ {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7},
+ {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa},
+ {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad},
+ {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0},
+ {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3},
+ {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6},
+ {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9},
+ {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc},
+ {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf},
+ {0x00, 0xc0, 0xc0}, {0x00, 0xc1, 0xc1}, {0x00, 0xc2, 0xc2},
+ {0x00, 0xc3, 0xc3}, {0x00, 0xc4, 0xc4}, {0x00, 0xc5, 0xc5},
+ {0x00, 0xc6, 0xc6}, {0x00, 0xc7, 0xc7}, {0x00, 0xc8, 0xc8},
+ {0x00, 0xc9, 0xc9}, {0x00, 0xca, 0xca}, {0x00, 0xcb, 0xcb},
+ {0x00, 0xcc, 0xcc}, {0x00, 0xcd, 0xcd}, {0x00, 0xce, 0xce},
+ {0x00, 0xcf, 0xcf}, {0x00, 0xd0, 0xd0}, {0x00, 0xd1, 0xd1},
+ {0x00, 0xd2, 0xd2}, {0x00, 0xd3, 0xd3}, {0x00, 0xd4, 0xd4},
+ {0x00, 0xd5, 0xd5}, {0x00, 0xd6, 0xd6}, {0x00, 0xd7, 0xd7},
+ {0x00, 0xd8, 0xd8}, {0x00, 0xd9, 0xd9}, {0x00, 0xda, 0xda},
+ {0x00, 0xdb, 0xdb}, {0x00, 0xdc, 0xdc}, {0x00, 0xdd, 0xdd},
+ {0x00, 0xde, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xe0},
+ {0x00, 0xe1, 0xe1}, {0x00, 0xe2, 0xe2}, {0x00, 0xe3, 0xe3},
+ {0x00, 0xe4, 0xe4}, {0x00, 0xe5, 0xe5}, {0x00, 0xe6, 0xe6},
+ {0x00, 0xe7, 0xe7}, {0x00, 0xe8, 0xe8}, {0x00, 0xe9, 0xe9},
+ {0x00, 0xea, 0xea}, {0x00, 0xeb, 0xeb}, {0x00, 0xec, 0xec},
+ {0x00, 0xed, 0xed}, {0x00, 0xee, 0xee}, {0x00, 0xef, 0xef},
+ {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xf1}, {0x00, 0xf2, 0xf2},
+ {0x00, 0xf3, 0xf3}, {0x00, 0xf4, 0xf4}, {0x00, 0xf5, 0xf5},
+ {0x00, 0xf6, 0xf6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xf8},
+ {0x00, 0xf9, 0xf9}, {0x00, 0xfa, 0xfa}, {0x00, 0xfb, 0xfb},
+ {0x00, 0xfc, 0xfc}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xfe},
+ {0x00, 0xff, 0xff}};
+
+struct enc_entry {
+ const char* enc_name;
+ struct cs_info* cs_table;
+};
+
+static struct enc_entry encds[] = {
+ {"iso88591", iso1_tbl}, // ISO-8859-1
+ {"iso88592", iso2_tbl}, // ISO-8859-2
+ {"iso88593", iso3_tbl}, // ISO-8859-3
+ {"iso88594", iso4_tbl}, // ISO-8859-4
+ {"iso88595", iso5_tbl}, // ISO-8859-5
+ {"iso88596", iso6_tbl}, // ISO-8859-6
+ {"iso88597", iso7_tbl}, // ISO-8859-7
+ {"iso88598", iso8_tbl}, // ISO-8859-8
+ {"iso88599", iso9_tbl}, // ISO-8859-9
+ {"iso885910", iso10_tbl}, // ISO-8859-10
+ {"tis620", tis620_tbl}, // TIS-620/ISO-8859-11
+ {"tis6202533", tis620_tbl}, // TIS-620/ISO-8859-11
+ {"iso885911", tis620_tbl}, // TIS-620/ISO-8859-11
+ {"iso885913", iso13_tbl}, // ISO-8859-13
+ {"iso885914", iso14_tbl}, // ISO-8859-14
+ {"iso885915", iso15_tbl}, // ISO-8859-15
+ {"koi8r", koi8r_tbl}, // KOI8-R
+ {"koi8u", koi8u_tbl}, // KOI8-U
+ {"cp1251", cp1251_tbl}, // CP-1251
+ {"microsoftcp1251", cp1251_tbl}, // microsoft-cp1251
+ {"xisciias", iscii_devanagari_tbl}, // x-iscii-as
+ {"isciidevanagari", iscii_devanagari_tbl} // ISCII-DEVANAGARI
+};
+
+/* map to lower case and remove non alphanumeric chars */
+static void toAsciiLowerAndRemoveNonAlphanumeric(const char* pName,
+ char* pBuf) {
+ while (*pName) {
+ /* A-Z */
+ if ((*pName >= 0x41) && (*pName <= 0x5A)) {
+ *pBuf = (*pName) + 0x20; /* toAsciiLower */
+ pBuf++;
+ }
+ /* a-z, 0-9 */
+ else if (((*pName >= 0x61) && (*pName <= 0x7A)) ||
+ ((*pName >= 0x30) && (*pName <= 0x39))) {
+ *pBuf = *pName;
+ pBuf++;
+ }
+
+ pName++;
+ }
+
+ *pBuf = '\0';
+}
+
+struct cs_info* get_current_cs(const char* es) {
+ char* normalized_encoding = new char[strlen(es) + 1];
+ toAsciiLowerAndRemoveNonAlphanumeric(es, normalized_encoding);
+
+ struct cs_info* ccs = NULL;
+ int n = sizeof(encds) / sizeof(encds[0]);
+ for (int i = 0; i < n; i++) {
+ if (strcmp(normalized_encoding, encds[i].enc_name) == 0) {
+ ccs = encds[i].cs_table;
+ break;
+ }
+ }
+
+ delete[] normalized_encoding;
+
+ if (!ccs) {
+ HUNSPELL_WARNING(stderr,
+ "error: unknown encoding %s: using %s as fallback\n", es,
+ encds[0].enc_name);
+ ccs = encds[0].cs_table;
+ }
+
+ return ccs;
+}
+#else
+// XXX This function was rewritten for mozilla. Instead of storing the
+// conversion tables static in this file, create them when needed
+// with help the mozilla backend.
+struct cs_info* get_current_cs(const char* es) {
+ struct cs_info* ccs = new cs_info[256];
+ // Initialze the array with dummy data so that we wouldn't need
+ // to return null in case of failures.
+ for (int i = 0; i <= 0xff; ++i) {
+ ccs[i].ccase = false;
+ ccs[i].clower = i;
+ ccs[i].cupper = i;
+ }
+
+ nsCOMPtr<nsIUnicodeEncoder> encoder;
+ nsCOMPtr<nsIUnicodeDecoder> decoder;
+
+ nsresult rv;
+
+ nsAutoCString label(es);
+ nsAutoCString encoding;
+ if (!EncodingUtils::FindEncodingForLabelNoReplacement(label, encoding)) {
+ return ccs;
+ }
+ encoder = EncodingUtils::EncoderForEncoding(encoding);
+ decoder = EncodingUtils::DecoderForEncoding(encoding);
+ encoder->SetOutputErrorBehavior(encoder->kOnError_Signal, nullptr, '?');
+ decoder->SetInputErrorBehavior(decoder->kOnError_Signal);
+
+ for (unsigned int i = 0; i <= 0xff; ++i) {
+ bool success = false;
+ // We want to find the upper/lowercase equivalents of each byte
+ // in this 1-byte character encoding. Call our encoding/decoding
+ // APIs separately for each byte since they may reject some of the
+ // bytes, and we want to handle errors separately for each byte.
+ char lower, upper;
+ do {
+ if (i == 0)
+ break;
+ const char source = char(i);
+ char16_t uni, uniCased;
+ int32_t charLength = 1, uniLength = 1;
+
+ rv = decoder->Convert(&source, &charLength, &uni, &uniLength);
+ // Explicitly check NS_OK because we don't want to allow
+ // NS_OK_UDEC_MOREOUTPUT or NS_OK_UDEC_MOREINPUT.
+ if (rv != NS_OK || charLength != 1 || uniLength != 1)
+ break;
+ uniCased = ToLowerCase(uni);
+ rv = encoder->Convert(&uniCased, &uniLength, &lower, &charLength);
+ // Explicitly check NS_OK because we don't want to allow
+ // NS_OK_UDEC_MOREOUTPUT or NS_OK_UDEC_MOREINPUT.
+ if (rv != NS_OK || charLength != 1 || uniLength != 1)
+ break;
+
+ uniCased = ToUpperCase(uni);
+ rv = encoder->Convert(&uniCased, &uniLength, &upper, &charLength);
+ // Explicitly check NS_OK because we don't want to allow
+ // NS_OK_UDEC_MOREOUTPUT or NS_OK_UDEC_MOREINPUT.
+ if (rv != NS_OK || charLength != 1 || uniLength != 1)
+ break;
+
+ success = true;
+ } while (0);
+
+ if (success) {
+ ccs[i].cupper = upper;
+ ccs[i].clower = lower;
+ } else {
+ ccs[i].cupper = i;
+ ccs[i].clower = i;
+ }
+
+ if (ccs[i].clower != (unsigned char)i)
+ ccs[i].ccase = true;
+ else
+ ccs[i].ccase = false;
+ }
+
+ return ccs;
+}
+#endif
+
+// primitive isalpha() replacement for tokenization
+char* get_casechars(const char* enc) {
+ struct cs_info* csconv = get_current_cs(enc);
+ char expw[MAXLNLEN];
+ char* p = expw;
+ for (int i = 0; i <= 255; i++) {
+ if (cupper(csconv, i) != clower(csconv, i)) {
+ *p = static_cast<char>(i);
+ p++;
+ }
+ }
+ *p = '\0';
+#ifdef MOZILLA_CLIENT
+ delete[] csconv;
+#endif
+ return mystrdup(expw);
+}
+
+// language to encoding default map
+
+struct lang_map {
+ const char* lang;
+ int num;
+};
+
+static struct lang_map lang2enc[] =
+ {{"ar", LANG_ar}, {"az", LANG_az},
+ {"az_AZ", LANG_az}, // for back-compatibility
+ {"bg", LANG_bg}, {"ca", LANG_ca},
+ {"cs", LANG_cs}, {"da", LANG_da},
+ {"de", LANG_de}, {"el", LANG_el},
+ {"en", LANG_en}, {"es", LANG_es},
+ {"eu", LANG_eu}, {"gl", LANG_gl},
+ {"fr", LANG_fr}, {"hr", LANG_hr},
+ {"hu", LANG_hu}, {"hu_HU", LANG_hu}, // for back-compatibility
+ {"it", LANG_it}, {"la", LANG_la},
+ {"lv", LANG_lv}, {"nl", LANG_nl},
+ {"pl", LANG_pl}, {"pt", LANG_pt},
+ {"sv", LANG_sv}, {"tr", LANG_tr},
+ {"tr_TR", LANG_tr}, // for back-compatibility
+ {"ru", LANG_ru}, {"uk", LANG_uk}};
+
+int get_lang_num(const char* lang) {
+ int n = sizeof(lang2enc) / sizeof(lang2enc[0]);
+ for (int i = 0; i < n; i++) {
+ if (strcmp(lang, lang2enc[i].lang) == 0) {
+ return lang2enc[i].num;
+ }
+ }
+ return LANG_xx;
+}
+
+#ifndef OPENOFFICEORG
+#ifndef MOZILLA_CLIENT
+int initialize_utf_tbl() {
+ utf_tbl_count++;
+ if (utf_tbl)
+ return 0;
+ utf_tbl = (unicode_info2*)malloc(CONTSIZE * sizeof(unicode_info2));
+ if (utf_tbl) {
+ size_t j;
+ for (j = 0; j < CONTSIZE; j++) {
+ utf_tbl[j].cletter = 0;
+ utf_tbl[j].clower = (unsigned short)j;
+ utf_tbl[j].cupper = (unsigned short)j;
+ }
+ for (j = 0; j < UTF_LST_LEN; j++) {
+ utf_tbl[utf_lst[j].c].cletter = 1;
+ utf_tbl[utf_lst[j].c].clower = utf_lst[j].clower;
+ utf_tbl[utf_lst[j].c].cupper = utf_lst[j].cupper;
+ }
+ } else
+ return 1;
+ return 0;
+}
+#endif
+#endif
+
+void free_utf_tbl() {
+ if (utf_tbl_count > 0)
+ utf_tbl_count--;
+ if (utf_tbl && (utf_tbl_count == 0)) {
+ free(utf_tbl);
+ utf_tbl = NULL;
+ }
+}
+
+unsigned short unicodetoupper(unsigned short c, int langnum) {
+ // In Azeri and Turkish, I and i dictinct letters:
+ // There are a dotless lower case i pair of upper `I',
+ // and an upper I with dot pair of lower `i'.
+ if (c == 0x0069 && ((langnum == LANG_az) || (langnum == LANG_tr)))
+ return 0x0130;
+#ifdef OPENOFFICEORG
+ return static_cast<unsigned short>(u_toupper(c));
+#else
+#ifdef MOZILLA_CLIENT
+ return ToUpperCase((char16_t)c);
+#else
+ return (utf_tbl) ? utf_tbl[c].cupper : c;
+#endif
+#endif
+}
+
+unsigned short unicodetolower(unsigned short c, int langnum) {
+ // In Azeri and Turkish, I and i dictinct letters:
+ // There are a dotless lower case i pair of upper `I',
+ // and an upper I with dot pair of lower `i'.
+ if (c == 0x0049 && ((langnum == LANG_az) || (langnum == LANG_tr)))
+ return 0x0131;
+#ifdef OPENOFFICEORG
+ return static_cast<unsigned short>(u_tolower(c));
+#else
+#ifdef MOZILLA_CLIENT
+ return ToLowerCase((char16_t)c);
+#else
+ return (utf_tbl) ? utf_tbl[c].clower : c;
+#endif
+#endif
+}
+
+int unicodeisalpha(unsigned short c) {
+#ifdef OPENOFFICEORG
+ return u_isalpha(c);
+#else
+ return (utf_tbl) ? utf_tbl[c].cletter : 0;
+#endif
+}
+
+/* get type of capitalization */
+int get_captype(const std::string& word, cs_info* csconv) {
+ // now determine the capitalization type of the first nl letters
+ size_t ncap = 0;
+ size_t nneutral = 0;
+ size_t firstcap = 0;
+ if (csconv == NULL)
+ return NOCAP;
+ for (std::string::const_iterator q = word.begin(); q != word.end(); ++q) {
+ unsigned char nIndex = static_cast<unsigned char>(*q);
+ if (ccase(csconv, nIndex))
+ ncap++;
+ if (cupper(csconv, nIndex) == clower(csconv, nIndex))
+ nneutral++;
+ }
+ if (ncap) {
+ unsigned char nIndex = static_cast<unsigned char>(word[0]);
+ firstcap = csconv[nIndex].ccase;
+ }
+
+ // now finally set the captype
+ if (ncap == 0) {
+ return NOCAP;
+ } else if ((ncap == 1) && firstcap) {
+ return INITCAP;
+ } else if ((ncap == word.size()) || ((ncap + nneutral) == word.size())) {
+ return ALLCAP;
+ } else if ((ncap > 1) && firstcap) {
+ return HUHINITCAP;
+ }
+ return HUHCAP;
+}
+
+int get_captype_utf8(const std::vector<w_char>& word, int langnum) {
+ // now determine the capitalization type of the first nl letters
+ size_t ncap = 0;
+ size_t nneutral = 0;
+ size_t firstcap = 0;
+ for (size_t i = 0; i < word.size(); ++i) {
+ unsigned short idx = (word[i].h << 8) + word[i].l;
+ if (idx != unicodetolower(idx, langnum))
+ ncap++;
+ if (unicodetoupper(idx, langnum) == unicodetolower(idx, langnum))
+ nneutral++;
+ }
+ if (ncap) {
+ unsigned short idx = (word[0].h << 8) + word[0].l;
+ firstcap = (idx != unicodetolower(idx, langnum));
+ }
+
+ // now finally set the captype
+ if (ncap == 0) {
+ return NOCAP;
+ } else if ((ncap == 1) && firstcap) {
+ return INITCAP;
+ } else if ((ncap == word.size()) || ((ncap + nneutral) == word.size())) {
+ return ALLCAP;
+ } else if ((ncap > 1) && firstcap) {
+ return HUHINITCAP;
+ }
+ return HUHCAP;
+}
+
+// strip all ignored characters in the string
+size_t remove_ignored_chars_utf(std::string& word,
+ const std::vector<w_char>& ignored_chars) {
+ std::vector<w_char> w;
+ std::vector<w_char> w2;
+ u8_u16(w, word);
+
+ for (size_t i = 0; i < w.size(); ++i) {
+ if (!std::binary_search(ignored_chars.begin(),
+ ignored_chars.end(),
+ w[i])) {
+ w2.push_back(w[i]);
+ }
+ }
+
+ u16_u8(word, w2);
+ return w2.size();
+}
+
+namespace {
+class is_any_of {
+ public:
+ is_any_of(const std::string& in) : chars(in) {}
+
+ bool operator()(char c) { return chars.find(c) != std::string::npos; }
+
+ private:
+ std::string chars;
+};
+}
+
+// strip all ignored characters in the string
+size_t remove_ignored_chars(std::string& word,
+ const std::string& ignored_chars) {
+ word.erase(
+ std::remove_if(word.begin(), word.end(), is_any_of(ignored_chars)),
+ word.end());
+ return word.size();
+}
+
+int parse_string(char* line, char** out, int ln) {
+ char* tp = line;
+ char* piece;
+ int i = 0;
+ int np = 0;
+ if (*out) {
+ HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions\n", ln);
+ return 1;
+ }
+ piece = mystrsep(&tp, 0);
+ while (piece) {
+ if (*piece != '\0') {
+ switch (i) {
+ case 0: {
+ np++;
+ break;
+ }
+ case 1: {
+ *out = mystrdup(piece);
+ if (!*out)
+ return 1;
+ np++;
+ break;
+ }
+ default:
+ break;
+ }
+ i++;
+ }
+ // free(piece);
+ piece = mystrsep(&tp, 0);
+ }
+ if (np != 2) {
+ HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", ln);
+ return 1;
+ }
+ return 0;
+}
+
+bool parse_array(char* line,
+ char** out,
+ std::vector<w_char>& out_utf16,
+ int utf8,
+ int ln) {
+ if (parse_string(line, out, ln))
+ return false;
+ if (utf8) {
+ u8_u16(out_utf16, *out);
+ std::sort(out_utf16.begin(), out_utf16.end());
+ }
+ return true;
+}
diff --git a/extensions/spellcheck/hunspell/src/csutil.hxx b/extensions/spellcheck/hunspell/src/csutil.hxx
new file mode 100644
index 000000000..ce7091df5
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/csutil.hxx
@@ -0,0 +1,325 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Hunspell, based on MySpell.
+ *
+ * The Initial Developers of the Original Code are
+ * Kevin Hendricks (MySpell) and Németh László (Hunspell).
+ * Portions created by the Initial Developers are Copyright (C) 2002-2005
+ * the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef __CSUTILHXX__
+#define __CSUTILHXX__
+
+#include "hunvisapi.h"
+
+// First some base level utility routines
+
+#include <string>
+#include <vector>
+#include <string.h>
+#include "w_char.hxx"
+#include "htypes.hxx"
+
+#ifdef MOZILLA_CLIENT
+#include "nscore.h" // for mozalloc headers
+#endif
+
+// casing
+#define NOCAP 0
+#define INITCAP 1
+#define ALLCAP 2
+#define HUHCAP 3
+#define HUHINITCAP 4
+
+// default encoding and keystring
+#define SPELL_ENCODING "ISO8859-1"
+#define SPELL_KEYSTRING "qwertyuiop|asdfghjkl|zxcvbnm"
+
+// default morphological fields
+#define MORPH_STEM "st:"
+#define MORPH_ALLOMORPH "al:"
+#define MORPH_POS "po:"
+#define MORPH_DERI_PFX "dp:"
+#define MORPH_INFL_PFX "ip:"
+#define MORPH_TERM_PFX "tp:"
+#define MORPH_DERI_SFX "ds:"
+#define MORPH_INFL_SFX "is:"
+#define MORPH_TERM_SFX "ts:"
+#define MORPH_SURF_PFX "sp:"
+#define MORPH_FREQ "fr:"
+#define MORPH_PHON "ph:"
+#define MORPH_HYPH "hy:"
+#define MORPH_PART "pa:"
+#define MORPH_FLAG "fl:"
+#define MORPH_HENTRY "_H:"
+#define MORPH_TAG_LEN strlen(MORPH_STEM)
+
+#define MSEP_FLD ' '
+#define MSEP_REC '\n'
+#define MSEP_ALT '\v'
+
+// default flags
+#define DEFAULTFLAGS 65510
+#define FORBIDDENWORD 65510
+#define ONLYUPCASEFLAG 65511
+
+// fopen or optional _wfopen to fix long pathname problem of WIN32
+LIBHUNSPELL_DLL_EXPORTED FILE* myfopen(const char* path, const char* mode);
+
+// convert UTF-16 characters to UTF-8
+LIBHUNSPELL_DLL_EXPORTED std::string& u16_u8(std::string& dest,
+ const std::vector<w_char>& src);
+
+// convert UTF-8 characters to UTF-16
+LIBHUNSPELL_DLL_EXPORTED int u8_u16(std::vector<w_char>& dest,
+ const std::string& src);
+
+// remove end of line char(s)
+LIBHUNSPELL_DLL_EXPORTED void mychomp(char* s);
+
+// duplicate string
+LIBHUNSPELL_DLL_EXPORTED char* mystrdup(const char* s);
+
+// strcat for limited length destination string
+LIBHUNSPELL_DLL_EXPORTED char* mystrcat(char* dest, const char* st, int max);
+
+// parse into tokens with char delimiter
+LIBHUNSPELL_DLL_EXPORTED char* mystrsep(char** sptr, const char delim);
+
+// replace pat by rep in word and return word
+LIBHUNSPELL_DLL_EXPORTED char* mystrrep(char* word,
+ const char* pat,
+ const char* rep);
+LIBHUNSPELL_DLL_EXPORTED std::string& mystrrep(std::string& str,
+ const std::string& search,
+ const std::string& replace);
+
+// append s to ends of every lines in text
+LIBHUNSPELL_DLL_EXPORTED std::string& strlinecat(std::string& str,
+ const std::string& apd);
+
+// tokenize into lines with new line
+LIBHUNSPELL_DLL_EXPORTED int line_tok(const char* text,
+ char*** lines,
+ char breakchar);
+
+// tokenize into lines with new line and uniq in place
+LIBHUNSPELL_DLL_EXPORTED char* line_uniq(char* text, char breakchar);
+LIBHUNSPELL_DLL_EXPORTED char* line_uniq_app(char** text, char breakchar);
+
+// reverse word
+LIBHUNSPELL_DLL_EXPORTED size_t reverseword(std::string& word);
+
+// reverse word
+LIBHUNSPELL_DLL_EXPORTED size_t reverseword_utf(std::string&);
+
+// remove duplicates
+LIBHUNSPELL_DLL_EXPORTED int uniqlist(char** list, int n);
+
+// free character array list
+LIBHUNSPELL_DLL_EXPORTED void freelist(char*** list, int n);
+
+// character encoding information
+struct cs_info {
+ unsigned char ccase;
+ unsigned char clower;
+ unsigned char cupper;
+};
+
+LIBHUNSPELL_DLL_EXPORTED int initialize_utf_tbl();
+LIBHUNSPELL_DLL_EXPORTED void free_utf_tbl();
+LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c,
+ int langnum);
+LIBHUNSPELL_DLL_EXPORTED w_char upper_utf(w_char u, int langnum);
+LIBHUNSPELL_DLL_EXPORTED w_char lower_utf(w_char u, int langnum);
+LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c,
+ int langnum);
+LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c);
+
+LIBHUNSPELL_DLL_EXPORTED struct cs_info* get_current_cs(const char* es);
+
+// get language identifiers of language codes
+LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const char* lang);
+
+// get characters of the given 8bit encoding with lower- and uppercase forms
+LIBHUNSPELL_DLL_EXPORTED char* get_casechars(const char* enc);
+
+// convert std::string to all caps
+LIBHUNSPELL_DLL_EXPORTED std::string& mkallcap(std::string& s,
+ const struct cs_info* csconv);
+
+// convert null terminated string to all little
+LIBHUNSPELL_DLL_EXPORTED std::string& mkallsmall(std::string& s,
+ const struct cs_info* csconv);
+
+// convert first letter of string to little
+LIBHUNSPELL_DLL_EXPORTED std::string& mkinitsmall(std::string& s,
+ const struct cs_info* csconv);
+
+// convert first letter of string to capital
+LIBHUNSPELL_DLL_EXPORTED std::string& mkinitcap(std::string& s,
+ const struct cs_info* csconv);
+
+// convert first letter of UTF-8 string to capital
+LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
+mkinitcap_utf(std::vector<w_char>& u, int langnum);
+
+// convert UTF-8 string to little
+LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
+mkallsmall_utf(std::vector<w_char>& u, int langnum);
+
+// convert first letter of UTF-8 string to little
+LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
+mkinitsmall_utf(std::vector<w_char>& u, int langnum);
+
+// convert UTF-8 string to capital
+LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
+mkallcap_utf(std::vector<w_char>& u, int langnum);
+
+// get type of capitalization
+LIBHUNSPELL_DLL_EXPORTED int get_captype(const std::string& q, cs_info*);
+
+// get type of capitalization (UTF-8)
+LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(const std::vector<w_char>& q, int langnum);
+
+// strip all ignored characters in the string
+LIBHUNSPELL_DLL_EXPORTED size_t remove_ignored_chars_utf(
+ std::string& word,
+ const std::vector<w_char>& ignored_chars);
+
+// strip all ignored characters in the string
+LIBHUNSPELL_DLL_EXPORTED size_t remove_ignored_chars(
+ std::string& word,
+ const std::string& ignored_chars);
+
+LIBHUNSPELL_DLL_EXPORTED int parse_string(char* line, char** out, int ln);
+
+LIBHUNSPELL_DLL_EXPORTED bool parse_array(char* line,
+ char** out,
+ std::vector<w_char>& out_utf16,
+ int utf8,
+ int ln);
+
+LIBHUNSPELL_DLL_EXPORTED int fieldlen(const char* r);
+
+LIBHUNSPELL_DLL_EXPORTED bool copy_field(std::string& dest,
+ const std::string& morph,
+ const std::string& var);
+
+LIBHUNSPELL_DLL_EXPORTED int morphcmp(const char* s, const char* t);
+
+LIBHUNSPELL_DLL_EXPORTED int get_sfxcount(const char* morph);
+
+// conversion function for protected memory
+LIBHUNSPELL_DLL_EXPORTED void store_pointer(char* dest, char* source);
+
+// conversion function for protected memory
+LIBHUNSPELL_DLL_EXPORTED char* get_stored_pointer(const char* s);
+
+// hash entry macros
+LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_DATA(struct hentry* h) {
+ char* ret;
+ if (!h->var)
+ ret = NULL;
+ else if (h->var & H_OPT_ALIASM)
+ ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
+ else
+ ret = HENTRY_WORD(h) + h->blen + 1;
+ return ret;
+}
+
+LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA(
+ const struct hentry* h) {
+ const char* ret;
+ if (!h->var)
+ ret = NULL;
+ else if (h->var & H_OPT_ALIASM)
+ ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
+ else
+ ret = HENTRY_WORD(h) + h->blen + 1;
+ return ret;
+}
+
+// NULL-free version for warning-free OOo build
+LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA2(
+ const struct hentry* h) {
+ const char* ret;
+ if (!h->var)
+ ret = "";
+ else if (h->var & H_OPT_ALIASM)
+ ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
+ else
+ ret = HENTRY_WORD(h) + h->blen + 1;
+ return ret;
+}
+
+LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_FIND(struct hentry* h,
+ const char* p) {
+ return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL);
+}
+
+#endif
diff --git a/extensions/spellcheck/hunspell/src/filemgr.cxx b/extensions/spellcheck/hunspell/src/filemgr.cxx
new file mode 100644
index 000000000..2218bc79e
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/filemgr.cxx
@@ -0,0 +1,120 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Hunspell, based on MySpell.
+ *
+ * The Initial Developers of the Original Code are
+ * Kevin Hendricks (MySpell) and Németh László (Hunspell).
+ * Portions created by the Initial Developers are Copyright (C) 2002-2005
+ * the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "filemgr.hxx"
+#include "csutil.hxx"
+
+int FileMgr::fail(const char* err, const char* par) {
+ fprintf(stderr, err, par);
+ return -1;
+}
+
+FileMgr::FileMgr(const char* file, const char* key) : hin(NULL), linenum(0) {
+ in[0] = '\0';
+
+ fin = myfopen(file, "r");
+ if (!fin) {
+ // check hzipped file
+ std::string st(file);
+ st.append(HZIP_EXTENSION);
+ hin = new Hunzip(st.c_str(), key);
+ }
+ if (!fin && !hin)
+ fail(MSG_OPEN, file);
+}
+
+FileMgr::~FileMgr() {
+ if (fin)
+ fclose(fin);
+ if (hin)
+ delete hin;
+}
+
+char* FileMgr::getline() {
+ const char* l;
+ linenum++;
+ if (fin)
+ return fgets(in, BUFSIZE - 1, fin);
+ if (hin && ((l = hin->getline()) != NULL))
+ return strcpy(in, l);
+ linenum--;
+ return NULL;
+}
+
+int FileMgr::getlinenum() {
+ return linenum;
+}
diff --git a/extensions/spellcheck/hunspell/src/filemgr.hxx b/extensions/spellcheck/hunspell/src/filemgr.hxx
new file mode 100644
index 000000000..8b69931dd
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/filemgr.hxx
@@ -0,0 +1,101 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Hunspell, based on MySpell.
+ *
+ * The Initial Developers of the Original Code are
+ * Kevin Hendricks (MySpell) and Németh László (Hunspell).
+ * Portions created by the Initial Developers are Copyright (C) 2002-2005
+ * the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* file manager class - read lines of files [filename] OR [filename.hz] */
+#ifndef _FILEMGR_HXX_
+#define _FILEMGR_HXX_
+
+#include "hunvisapi.h"
+
+#include "hunzip.hxx"
+#include <stdio.h>
+
+class LIBHUNSPELL_DLL_EXPORTED FileMgr {
+ private:
+ FileMgr(const FileMgr&);
+ FileMgr& operator=(const FileMgr&);
+
+ protected:
+ FILE* fin;
+ Hunzip* hin;
+ char in[BUFSIZE + 50]; // input buffer
+ int fail(const char* err, const char* par);
+ int linenum;
+
+ public:
+ FileMgr(const char* filename, const char* key = NULL);
+ ~FileMgr();
+ char* getline();
+ int getlinenum();
+};
+#endif
diff --git a/extensions/spellcheck/hunspell/src/hashmgr.cxx b/extensions/spellcheck/hunspell/src/hashmgr.cxx
new file mode 100644
index 000000000..c3cd95420
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/hashmgr.cxx
@@ -0,0 +1,1147 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Hunspell, based on MySpell.
+ *
+ * The Initial Developers of the Original Code are
+ * Kevin Hendricks (MySpell) and Németh László (Hunspell).
+ * Portions created by the Initial Developers are Copyright (C) 2002-2005
+ * the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <limits>
+#include <sstream>
+
+#include "hashmgr.hxx"
+#include "csutil.hxx"
+#include "atypes.hxx"
+
+// build a hash table from a munched word list
+
+HashMgr::HashMgr(const char* tpath, const char* apath, const char* key)
+ : tablesize(0),
+ tableptr(NULL),
+ flag_mode(FLAG_CHAR),
+ complexprefixes(0),
+ utf8(0),
+ forbiddenword(FORBIDDENWORD) // forbidden word signing flag
+ ,
+ numaliasf(0),
+ aliasf(NULL),
+ aliasflen(0),
+ numaliasm(0),
+ aliasm(NULL) {
+ langnum = 0;
+ lang = NULL;
+ enc = NULL;
+ csconv = 0;
+ ignorechars = NULL;
+ load_config(apath, key);
+ int ec = load_tables(tpath, key);
+ if (ec) {
+ /* error condition - what should we do here */
+ HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n", ec);
+ if (tableptr) {
+ free(tableptr);
+ tableptr = NULL;
+ }
+ tablesize = 0;
+ }
+}
+
+HashMgr::~HashMgr() {
+ if (tableptr) {
+ // now pass through hash table freeing up everything
+ // go through column by column of the table
+ for (int i = 0; i < tablesize; i++) {
+ struct hentry* pt = tableptr[i];
+ struct hentry* nt = NULL;
+ while (pt) {
+ nt = pt->next;
+ if (pt->astr &&
+ (!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen)))
+ free(pt->astr);
+ free(pt);
+ pt = nt;
+ }
+ }
+ free(tableptr);
+ }
+ tablesize = 0;
+
+ if (aliasf) {
+ for (int j = 0; j < (numaliasf); j++)
+ free(aliasf[j]);
+ free(aliasf);
+ aliasf = NULL;
+ if (aliasflen) {
+ free(aliasflen);
+ aliasflen = NULL;
+ }
+ }
+ if (aliasm) {
+ for (int j = 0; j < (numaliasm); j++)
+ free(aliasm[j]);
+ free(aliasm);
+ aliasm = NULL;
+ }
+
+#ifndef OPENOFFICEORG
+#ifndef MOZILLA_CLIENT
+ if (utf8)
+ free_utf_tbl();
+#endif
+#endif
+
+ if (enc)
+ free(enc);
+ if (lang)
+ free(lang);
+
+ if (ignorechars)
+ free(ignorechars);
+
+#ifdef MOZILLA_CLIENT
+ delete[] csconv;
+#endif
+}
+
+// lookup a root word in the hashtable
+
+struct hentry* HashMgr::lookup(const char* word) const {
+ struct hentry* dp;
+ if (tableptr) {
+ dp = tableptr[hash(word)];
+ if (!dp)
+ return NULL;
+ for (; dp != NULL; dp = dp->next) {
+ if (strcmp(word, dp->word) == 0)
+ return dp;
+ }
+ }
+ return NULL;
+}
+
+// add a word to the hash table (private)
+int HashMgr::add_word(const char* word,
+ int wbl,
+ int wcl,
+ unsigned short* aff,
+ int al,
+ const char* desc,
+ bool onlyupcase) {
+
+ std::string *word_copy = NULL;
+ std::string *desc_copy = NULL;
+ if (ignorechars || complexprefixes) {
+ word_copy = new std::string(word, wbl);
+
+ if (ignorechars != NULL) {
+ if (utf8) {
+ wcl = remove_ignored_chars_utf(*word_copy, ignorechars_utf16);
+ } else {
+ remove_ignored_chars(*word_copy, ignorechars);
+ }
+ }
+
+ if (complexprefixes) {
+ if (utf8)
+ wcl = reverseword_utf(*word_copy);
+ else
+ reverseword(*word_copy);
+
+ if (desc && !aliasm) {
+ desc_copy = new std::string(desc);
+
+ if (complexprefixes) {
+ if (utf8)
+ reverseword_utf(*desc_copy);
+ else
+ reverseword(*desc_copy);
+ }
+ desc = desc_copy->c_str();
+ }
+ }
+
+ wbl = word_copy->size();
+ word = word_copy->c_str();
+ }
+
+ bool upcasehomonym = false;
+ int descl = desc ? (aliasm ? sizeof(char*) : strlen(desc) + 1) : 0;
+ // variable-length hash record with word and optional fields
+ struct hentry* hp =
+ (struct hentry*)malloc(sizeof(struct hentry) + wbl + descl);
+ if (!hp) {
+ delete desc_copy;
+ delete word_copy;
+ return 1;
+ }
+
+ char* hpw = hp->word;
+ strcpy(hpw, word);
+
+ int i = hash(hpw);
+
+ hp->blen = (unsigned char)wbl;
+ hp->clen = (unsigned char)wcl;
+ hp->alen = (short)al;
+ hp->astr = aff;
+ hp->next = NULL;
+ hp->next_homonym = NULL;
+
+ // store the description string or its pointer
+ if (desc) {
+ hp->var = H_OPT;
+ if (aliasm) {
+ hp->var += H_OPT_ALIASM;
+ store_pointer(hpw + wbl + 1, get_aliasm(atoi(desc)));
+ } else {
+ strcpy(hpw + wbl + 1, desc);
+ }
+ if (strstr(HENTRY_DATA(hp), MORPH_PHON))
+ hp->var += H_OPT_PHON;
+ } else
+ hp->var = 0;
+
+ struct hentry* dp = tableptr[i];
+ if (!dp) {
+ tableptr[i] = hp;
+ delete desc_copy;
+ delete word_copy;
+ return 0;
+ }
+ while (dp->next != NULL) {
+ if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) {
+ // remove hidden onlyupcase homonym
+ if (!onlyupcase) {
+ if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
+ free(dp->astr);
+ dp->astr = hp->astr;
+ dp->alen = hp->alen;
+ free(hp);
+ delete desc_copy;
+ delete word_copy;
+ return 0;
+ } else {
+ dp->next_homonym = hp;
+ }
+ } else {
+ upcasehomonym = true;
+ }
+ }
+ dp = dp->next;
+ }
+ if (strcmp(hp->word, dp->word) == 0) {
+ // remove hidden onlyupcase homonym
+ if (!onlyupcase) {
+ if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
+ free(dp->astr);
+ dp->astr = hp->astr;
+ dp->alen = hp->alen;
+ free(hp);
+ delete desc_copy;
+ delete word_copy;
+ return 0;
+ } else {
+ dp->next_homonym = hp;
+ }
+ } else {
+ upcasehomonym = true;
+ }
+ }
+ if (!upcasehomonym) {
+ dp->next = hp;
+ } else {
+ // remove hidden onlyupcase homonym
+ if (hp->astr)
+ free(hp->astr);
+ free(hp);
+ }
+
+ delete desc_copy;
+ delete word_copy;
+ return 0;
+}
+
+int HashMgr::add_hidden_capitalized_word(const std::string& word,
+ int wcl,
+ unsigned short* flags,
+ int flagslen,
+ char* dp,
+ int captype) {
+ if (flags == NULL)
+ flagslen = 0;
+
+ // add inner capitalized forms to handle the following allcap forms:
+ // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG
+ // Allcaps with suffixes: CIA's -> CIA'S
+ if (((captype == HUHCAP) || (captype == HUHINITCAP) ||
+ ((captype == ALLCAP) && (flagslen != 0))) &&
+ !((flagslen != 0) && TESTAFF(flags, forbiddenword, flagslen))) {
+ unsigned short* flags2 =
+ (unsigned short*)malloc(sizeof(unsigned short) * (flagslen + 1));
+ if (!flags2)
+ return 1;
+ if (flagslen)
+ memcpy(flags2, flags, flagslen * sizeof(unsigned short));
+ flags2[flagslen] = ONLYUPCASEFLAG;
+ if (utf8) {
+ std::string st;
+ std::vector<w_char> w;
+ u8_u16(w, word);
+ mkallsmall_utf(w, langnum);
+ mkinitcap_utf(w, langnum);
+ u16_u8(st, w);
+ return add_word(st.c_str(), st.size(), wcl, flags2, flagslen + 1, dp, true);
+ } else {
+ std::string new_word(word);
+ mkallsmall(new_word, csconv);
+ mkinitcap(new_word, csconv);
+ int ret = add_word(new_word.c_str(), new_word.size(), wcl, flags2, flagslen + 1, dp, true);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+// detect captype and modify word length for UTF-8 encoding
+int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {
+ int len;
+ if (utf8) {
+ std::vector<w_char> dest_utf;
+ len = u8_u16(dest_utf, word);
+ *captype = get_captype_utf8(dest_utf, langnum);
+ } else {
+ len = word.size();
+ *captype = get_captype(word, csconv);
+ }
+ return len;
+}
+
+// remove word (personal dictionary function for standalone applications)
+int HashMgr::remove(const char* word) {
+ struct hentry* dp = lookup(word);
+ while (dp) {
+ if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) {
+ unsigned short* flags =
+ (unsigned short*)malloc(sizeof(unsigned short) * (dp->alen + 1));
+ if (!flags)
+ return 1;
+ for (int i = 0; i < dp->alen; i++)
+ flags[i] = dp->astr[i];
+ flags[dp->alen] = forbiddenword;
+ dp->astr = flags;
+ dp->alen++;
+ std::sort(flags, flags + dp->alen);
+ }
+ dp = dp->next_homonym;
+ }
+ return 0;
+}
+
+/* remove forbidden flag to add a personal word to the hash */
+int HashMgr::remove_forbidden_flag(const std::string& word) {
+ struct hentry* dp = lookup(word.c_str());
+ if (!dp)
+ return 1;
+ while (dp) {
+ if (dp->astr && TESTAFF(dp->astr, forbiddenword, dp->alen)) {
+ if (dp->alen == 1)
+ dp->alen = 0; // XXX forbidden words of personal dic.
+ else {
+ unsigned short* flags2 =
+ (unsigned short*)malloc(sizeof(unsigned short) * (dp->alen - 1));
+ if (!flags2)
+ return 1;
+ int i, j = 0;
+ for (i = 0; i < dp->alen; i++) {
+ if (dp->astr[i] != forbiddenword)
+ flags2[j++] = dp->astr[i];
+ }
+ dp->alen--;
+ dp->astr = flags2; // XXX allowed forbidden words
+ }
+ }
+ dp = dp->next_homonym;
+ }
+ return 0;
+}
+
+// add a custom dic. word to the hash table (public)
+int HashMgr::add(const std::string& word) {
+ unsigned short* flags = NULL;
+ int al = 0;
+ if (remove_forbidden_flag(word)) {
+ int captype;
+ int wbl = word.size();
+ int wcl = get_clen_and_captype(word, &captype);
+ add_word(word.c_str(), wbl, wcl, flags, al, NULL, false);
+ return add_hidden_capitalized_word(word, wcl, flags, al, NULL,
+ captype);
+ }
+ return 0;
+}
+
+int HashMgr::add_with_affix(const char* word, const char* example) {
+ // detect captype and modify word length for UTF-8 encoding
+ struct hentry* dp = lookup(example);
+ remove_forbidden_flag(word);
+ if (dp && dp->astr) {
+ int captype;
+ int wbl = strlen(word);
+ int wcl = get_clen_and_captype(word, &captype);
+ if (aliasf) {
+ add_word(word, wbl, wcl, dp->astr, dp->alen, NULL, false);
+ } else {
+ unsigned short* flags =
+ (unsigned short*)malloc(dp->alen * sizeof(unsigned short));
+ if (flags) {
+ memcpy((void*)flags, (void*)dp->astr,
+ dp->alen * sizeof(unsigned short));
+ add_word(word, wbl, wcl, flags, dp->alen, NULL, false);
+ } else
+ return 1;
+ }
+ return add_hidden_capitalized_word(word, wcl, dp->astr,
+ dp->alen, NULL, captype);
+ }
+ return 1;
+}
+
+// walk the hash table entry by entry - null at end
+// initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp);
+struct hentry* HashMgr::walk_hashtable(int& col, struct hentry* hp) const {
+ if (hp && hp->next != NULL)
+ return hp->next;
+ for (col++; col < tablesize; col++) {
+ if (tableptr[col])
+ return tableptr[col];
+ }
+ // null at end and reset to start
+ col = -1;
+ return NULL;
+}
+
+// load a munched word list and build a hash table on the fly
+int HashMgr::load_tables(const char* tpath, const char* key) {
+ int al;
+ char* ap;
+ char* dp;
+ char* dp2;
+ unsigned short* flags;
+ char* ts;
+
+ // open dictionary file
+ FileMgr* dict = new FileMgr(tpath, key);
+ if (dict == NULL)
+ return 1;
+
+ // first read the first line of file to get hash table size */
+ if ((ts = dict->getline()) == NULL) {
+ HUNSPELL_WARNING(stderr, "error: empty dic file %s\n", tpath);
+ delete dict;
+ return 2;
+ }
+ mychomp(ts);
+
+ /* remove byte order mark */
+ if (strncmp(ts, "\xEF\xBB\xBF", 3) == 0) {
+ memmove(ts, ts + 3, strlen(ts + 3) + 1);
+ // warning: dic file begins with byte order mark: possible incompatibility
+ // with old Hunspell versions
+ }
+
+ tablesize = atoi(ts);
+
+ int nExtra = 5 + USERWORD;
+
+ if (tablesize <= 0 ||
+ (tablesize >= (std::numeric_limits<int>::max() - 1 - nExtra) /
+ int(sizeof(struct hentry*)))) {
+ HUNSPELL_WARNING(
+ stderr, "error: line 1: missing or bad word count in the dic file\n");
+ delete dict;
+ return 4;
+ }
+ tablesize += nExtra;
+ if ((tablesize % 2) == 0)
+ tablesize++;
+
+ // allocate the hash table
+ tableptr = (struct hentry**)calloc(tablesize, sizeof(struct hentry*));
+ if (!tableptr) {
+ delete dict;
+ return 3;
+ }
+
+ // loop through all words on much list and add to hash
+ // table and create word and affix strings
+
+ while ((ts = dict->getline()) != NULL) {
+ mychomp(ts);
+ // split each line into word and morphological description
+ dp = ts;
+ while ((dp = strchr(dp, ':')) != NULL) {
+ if ((dp > ts + 3) && (*(dp - 3) == ' ' || *(dp - 3) == '\t')) {
+ for (dp -= 4; dp >= ts && (*dp == ' ' || *dp == '\t'); dp--)
+ ;
+ if (dp < ts) { // missing word
+ dp = NULL;
+ } else {
+ *(dp + 1) = '\0';
+ dp = dp + 2;
+ }
+ break;
+ }
+ dp++;
+ }
+
+ // tabulator is the old morphological field separator
+ dp2 = strchr(ts, '\t');
+ if (dp2 && (!dp || dp2 < dp)) {
+ *dp2 = '\0';
+ dp = dp2 + 1;
+ }
+
+ // split each line into word and affix char strings
+ // "\/" signs slash in words (not affix separator)
+ // "/" at beginning of the line is word character (not affix separator)
+ ap = strchr(ts, '/');
+ while (ap) {
+ if (ap == ts) {
+ ap++;
+ continue;
+ } else if (*(ap - 1) != '\\')
+ break;
+ // replace "\/" with "/"
+ for (char *sp = ap - 1; *sp; *sp = *(sp + 1), sp++)
+ ;
+ ap = strchr(ap, '/');
+ }
+
+ if (ap) {
+ *ap = '\0';
+ if (aliasf) {
+ int index = atoi(ap + 1);
+ al = get_aliasf(index, &flags, dict);
+ if (!al) {
+ HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n",
+ dict->getlinenum());
+ *ap = '\0';
+ }
+ } else {
+ al = decode_flags(&flags, ap + 1, dict);
+ if (al == -1) {
+ HUNSPELL_WARNING(stderr, "Can't allocate memory.\n");
+ delete dict;
+ return 6;
+ }
+ std::sort(flags, flags + al);
+ }
+ } else {
+ al = 0;
+ ap = NULL;
+ flags = NULL;
+ }
+
+ int captype;
+ int wbl = strlen(ts);
+ int wcl = get_clen_and_captype(ts, &captype);
+ // add the word and its index plus its capitalized form optionally
+ if (add_word(ts, wbl, wcl, flags, al, dp, false) ||
+ add_hidden_capitalized_word(ts, wcl, flags, al, dp, captype)) {
+ delete dict;
+ return 5;
+ }
+ }
+
+ delete dict;
+ return 0;
+}
+
+// the hash function is a simple load and rotate
+// algorithm borrowed
+int HashMgr::hash(const char* word) const {
+ unsigned long hv = 0;
+ for (int i = 0; i < 4 && *word != 0; i++)
+ hv = (hv << 8) | (*word++);
+ while (*word != 0) {
+ ROTATE(hv, ROTATE_LEN);
+ hv ^= (*word++);
+ }
+ return (unsigned long)hv % tablesize;
+}
+
+int HashMgr::decode_flags(unsigned short** result, char* flags, FileMgr* af) {
+ int len;
+ if (*flags == '\0') {
+ *result = NULL;
+ return 0;
+ }
+ switch (flag_mode) {
+ case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz)
+ len = strlen(flags);
+ if (len % 2 == 1)
+ HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n",
+ af->getlinenum());
+ len /= 2;
+ *result = (unsigned short*)malloc(len * sizeof(unsigned short));
+ if (!*result)
+ return -1;
+ for (int i = 0; i < len; i++) {
+ (*result)[i] = (((unsigned short)flags[i * 2]) << 8) +
+ (unsigned short)flags[i * 2 + 1];
+ }
+ break;
+ }
+ case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521
+ // 23 233)
+ int i;
+ len = 1;
+ char* src = flags;
+ unsigned short* dest;
+ char* p;
+ for (p = flags; *p; p++) {
+ if (*p == ',')
+ len++;
+ }
+ *result = (unsigned short*)malloc(len * sizeof(unsigned short));
+ if (!*result)
+ return -1;
+ dest = *result;
+ for (p = flags; *p; p++) {
+ if (*p == ',') {
+ i = atoi(src);
+ if (i >= DEFAULTFLAGS)
+ HUNSPELL_WARNING(
+ stderr, "error: line %d: flag id %d is too large (max: %d)\n",
+ af->getlinenum(), i, DEFAULTFLAGS - 1);
+ *dest = (unsigned short)i;
+ if (*dest == 0)
+ HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n",
+ af->getlinenum());
+ src = p + 1;
+ dest++;
+ }
+ }
+ i = atoi(src);
+ if (i >= DEFAULTFLAGS)
+ HUNSPELL_WARNING(stderr,
+ "error: line %d: flag id %d is too large (max: %d)\n",
+ af->getlinenum(), i, DEFAULTFLAGS - 1);
+ *dest = (unsigned short)i;
+ if (*dest == 0)
+ HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n",
+ af->getlinenum());
+ break;
+ }
+ case FLAG_UNI: { // UTF-8 characters
+ std::vector<w_char> w;
+ u8_u16(w, flags);
+ len = w.size();
+ *result = (unsigned short*)malloc(len * sizeof(unsigned short));
+ if (!*result)
+ return -1;
+ memcpy(*result, &w[0], len * sizeof(short));
+ break;
+ }
+ default: { // Ispell's one-character flags (erfg -> e r f g)
+ unsigned short* dest;
+ len = strlen(flags);
+ *result = (unsigned short*)malloc(len * sizeof(unsigned short));
+ if (!*result)
+ return -1;
+ dest = *result;
+ for (unsigned char* p = (unsigned char*)flags; *p; p++) {
+ *dest = (unsigned short)*p;
+ dest++;
+ }
+ }
+ }
+ return len;
+}
+
+unsigned short HashMgr::decode_flag(const char* f) {
+ unsigned short s = 0;
+ int i;
+ switch (flag_mode) {
+ case FLAG_LONG:
+ s = ((unsigned short)f[0] << 8) + (unsigned short)f[1];
+ break;
+ case FLAG_NUM:
+ i = atoi(f);
+ if (i >= DEFAULTFLAGS)
+ HUNSPELL_WARNING(stderr, "error: flag id %d is too large (max: %d)\n",
+ i, DEFAULTFLAGS - 1);
+ s = (unsigned short)i;
+ break;
+ case FLAG_UNI: {
+ std::vector<w_char> w;
+ u8_u16(w, f);
+ if (!w.empty())
+ memcpy(&s, &w[0], 1 * sizeof(short));
+ break;
+ }
+ default:
+ s = (unsigned short)*((unsigned char*)f);
+ }
+ if (s == 0)
+ HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n");
+ return s;
+}
+
+char* HashMgr::encode_flag(unsigned short f) {
+ if (f == 0)
+ return mystrdup("(NULL)");
+ std::string ch;
+ if (flag_mode == FLAG_LONG) {
+ ch.push_back((unsigned char)(f >> 8));
+ ch.push_back((unsigned char)(f - ((f >> 8) << 8)));
+ } else if (flag_mode == FLAG_NUM) {
+ std::ostringstream stream;
+ stream << f;
+ ch = stream.str();
+ } else if (flag_mode == FLAG_UNI) {
+ const w_char* w_c = (const w_char*)&f;
+ std::vector<w_char> w(w_c, w_c + 1);
+ u16_u8(ch, w);
+ } else {
+ ch.push_back((unsigned char)(f));
+ }
+ return mystrdup(ch.c_str());
+}
+
+// read in aff file and set flag mode
+int HashMgr::load_config(const char* affpath, const char* key) {
+ char* line; // io buffers
+ int firstline = 1;
+
+ // open the affix file
+ FileMgr* afflst = new FileMgr(affpath, key);
+ if (!afflst) {
+ HUNSPELL_WARNING(
+ stderr, "Error - could not open affix description file %s\n", affpath);
+ return 1;
+ }
+
+ // read in each line ignoring any that do not
+ // start with a known line type indicator
+
+ while ((line = afflst->getline()) != NULL) {
+ mychomp(line);
+
+ /* remove byte order mark */
+ if (firstline) {
+ firstline = 0;
+ if (strncmp(line, "\xEF\xBB\xBF", 3) == 0)
+ memmove(line, line + 3, strlen(line + 3) + 1);
+ }
+
+ /* parse in the try string */
+ if ((strncmp(line, "FLAG", 4) == 0) && isspace(line[4])) {
+ if (flag_mode != FLAG_CHAR) {
+ HUNSPELL_WARNING(stderr,
+ "error: line %d: multiple definitions of the FLAG "
+ "affix file parameter\n",
+ afflst->getlinenum());
+ }
+ if (strstr(line, "long"))
+ flag_mode = FLAG_LONG;
+ if (strstr(line, "num"))
+ flag_mode = FLAG_NUM;
+ if (strstr(line, "UTF-8"))
+ flag_mode = FLAG_UNI;
+ if (flag_mode == FLAG_CHAR) {
+ HUNSPELL_WARNING(
+ stderr,
+ "error: line %d: FLAG needs `num', `long' or `UTF-8' parameter\n",
+ afflst->getlinenum());
+ }
+ }
+ if (strncmp(line, "FORBIDDENWORD", 13) == 0) {
+ char* st = NULL;
+ if (parse_string(line, &st, afflst->getlinenum())) {
+ delete afflst;
+ return 1;
+ }
+ forbiddenword = decode_flag(st);
+ free(st);
+ }
+ if (strncmp(line, "SET", 3) == 0) {
+ if (parse_string(line, &enc, afflst->getlinenum())) {
+ delete afflst;
+ return 1;
+ }
+ if (strcmp(enc, "UTF-8") == 0) {
+ utf8 = 1;
+#ifndef OPENOFFICEORG
+#ifndef MOZILLA_CLIENT
+ initialize_utf_tbl();
+#endif
+#endif
+ } else
+ csconv = get_current_cs(enc);
+ }
+ if (strncmp(line, "LANG", 4) == 0) {
+ if (parse_string(line, &lang, afflst->getlinenum())) {
+ delete afflst;
+ return 1;
+ }
+ langnum = get_lang_num(lang);
+ }
+
+ /* parse in the ignored characters (for example, Arabic optional diacritics
+ * characters */
+ if (strncmp(line, "IGNORE", 6) == 0) {
+ if (!parse_array(line, &ignorechars, ignorechars_utf16,
+ utf8, afflst->getlinenum())) {
+ delete afflst;
+ return 1;
+ }
+ }
+
+ if ((strncmp(line, "AF", 2) == 0) && isspace(line[2])) {
+ if (parse_aliasf(line, afflst)) {
+ delete afflst;
+ return 1;
+ }
+ }
+
+ if ((strncmp(line, "AM", 2) == 0) && isspace(line[2])) {
+ if (parse_aliasm(line, afflst)) {
+ delete afflst;
+ return 1;
+ }
+ }
+
+ if (strncmp(line, "COMPLEXPREFIXES", 15) == 0)
+ complexprefixes = 1;
+ if (((strncmp(line, "SFX", 3) == 0) || (strncmp(line, "PFX", 3) == 0)) &&
+ isspace(line[3]))
+ break;
+ }
+ if (csconv == NULL)
+ csconv = get_current_cs(SPELL_ENCODING);
+ delete afflst;
+ return 0;
+}
+
+/* parse in the ALIAS table */
+int HashMgr::parse_aliasf(char* line, FileMgr* af) {
+ if (numaliasf != 0) {
+ HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n",
+ af->getlinenum());
+ return 1;
+ }
+ char* tp = line;
+ char* piece;
+ int i = 0;
+ int np = 0;
+ piece = mystrsep(&tp, 0);
+ while (piece) {
+ if (*piece != '\0') {
+ switch (i) {
+ case 0: {
+ np++;
+ break;
+ }
+ case 1: {
+ numaliasf = atoi(piece);
+ if (numaliasf < 1) {
+ numaliasf = 0;
+ aliasf = NULL;
+ aliasflen = NULL;
+ HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
+ af->getlinenum());
+ return 1;
+ }
+ aliasf =
+ (unsigned short**)malloc(numaliasf * sizeof(unsigned short*));
+ aliasflen =
+ (unsigned short*)malloc(numaliasf * sizeof(unsigned short));
+ if (!aliasf || !aliasflen) {
+ numaliasf = 0;
+ if (aliasf)
+ free(aliasf);
+ if (aliasflen)
+ free(aliasflen);
+ aliasf = NULL;
+ aliasflen = NULL;
+ return 1;
+ }
+ np++;
+ break;
+ }
+ default:
+ break;
+ }
+ i++;
+ }
+ piece = mystrsep(&tp, 0);
+ }
+ if (np != 2) {
+ numaliasf = 0;
+ free(aliasf);
+ free(aliasflen);
+ aliasf = NULL;
+ aliasflen = NULL;
+ HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
+ af->getlinenum());
+ return 1;
+ }
+
+ /* now parse the numaliasf lines to read in the remainder of the table */
+ char* nl;
+ for (int j = 0; j < numaliasf; j++) {
+ if ((nl = af->getline()) == NULL)
+ return 1;
+ mychomp(nl);
+ tp = nl;
+ i = 0;
+ aliasf[j] = NULL;
+ aliasflen[j] = 0;
+ piece = mystrsep(&tp, 0);
+ while (piece) {
+ if (*piece != '\0') {
+ switch (i) {
+ case 0: {
+ if (strncmp(piece, "AF", 2) != 0) {
+ numaliasf = 0;
+ free(aliasf);
+ free(aliasflen);
+ aliasf = NULL;
+ aliasflen = NULL;
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ return 1;
+ }
+ break;
+ }
+ case 1: {
+ aliasflen[j] =
+ (unsigned short)decode_flags(&(aliasf[j]), piece, af);
+ std::sort(aliasf[j], aliasf[j] + aliasflen[j]);
+ break;
+ }
+ default:
+ break;
+ }
+ i++;
+ }
+ piece = mystrsep(&tp, 0);
+ }
+ if (!aliasf[j]) {
+ free(aliasf);
+ free(aliasflen);
+ aliasf = NULL;
+ aliasflen = NULL;
+ numaliasf = 0;
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int HashMgr::is_aliasf() {
+ return (aliasf != NULL);
+}
+
+int HashMgr::get_aliasf(int index, unsigned short** fvec, FileMgr* af) {
+ if ((index > 0) && (index <= numaliasf)) {
+ *fvec = aliasf[index - 1];
+ return aliasflen[index - 1];
+ }
+ HUNSPELL_WARNING(stderr, "error: line %d: bad flag alias index: %d\n",
+ af->getlinenum(), index);
+ *fvec = NULL;
+ return 0;
+}
+
+/* parse morph alias definitions */
+int HashMgr::parse_aliasm(char* line, FileMgr* af) {
+ if (numaliasm != 0) {
+ HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n",
+ af->getlinenum());
+ return 1;
+ }
+ char* tp = line;
+ char* piece;
+ int i = 0;
+ int np = 0;
+ piece = mystrsep(&tp, 0);
+ while (piece) {
+ if (*piece != '\0') {
+ switch (i) {
+ case 0: {
+ np++;
+ break;
+ }
+ case 1: {
+ numaliasm = atoi(piece);
+ if (numaliasm < 1) {
+ HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
+ af->getlinenum());
+ return 1;
+ }
+ aliasm = (char**)malloc(numaliasm * sizeof(char*));
+ if (!aliasm) {
+ numaliasm = 0;
+ return 1;
+ }
+ np++;
+ break;
+ }
+ default:
+ break;
+ }
+ i++;
+ }
+ piece = mystrsep(&tp, 0);
+ }
+ if (np != 2) {
+ numaliasm = 0;
+ free(aliasm);
+ aliasm = NULL;
+ HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
+ af->getlinenum());
+ return 1;
+ }
+
+ /* now parse the numaliasm lines to read in the remainder of the table */
+ char* nl = line;
+ for (int j = 0; j < numaliasm; j++) {
+ if ((nl = af->getline()) == NULL)
+ return 1;
+ mychomp(nl);
+ tp = nl;
+ i = 0;
+ aliasm[j] = NULL;
+ piece = mystrsep(&tp, ' ');
+ while (piece) {
+ if (*piece != '\0') {
+ switch (i) {
+ case 0: {
+ if (strncmp(piece, "AM", 2) != 0) {
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ numaliasm = 0;
+ free(aliasm);
+ aliasm = NULL;
+ return 1;
+ }
+ break;
+ }
+ case 1: {
+ // add the remaining of the line
+ if (*tp) {
+ *(tp - 1) = ' ';
+ tp = tp + strlen(tp);
+ }
+ std::string chunk(piece);
+ if (complexprefixes) {
+ if (utf8)
+ reverseword_utf(chunk);
+ else
+ reverseword(chunk);
+ }
+ aliasm[j] = mystrdup(chunk.c_str());
+ break;
+ }
+ default:
+ break;
+ }
+ i++;
+ }
+ piece = mystrsep(&tp, ' ');
+ }
+ if (!aliasm[j]) {
+ numaliasm = 0;
+ free(aliasm);
+ aliasm = NULL;
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int HashMgr::is_aliasm() {
+ return (aliasm != NULL);
+}
+
+char* HashMgr::get_aliasm(int index) {
+ if ((index > 0) && (index <= numaliasm))
+ return aliasm[index - 1];
+ HUNSPELL_WARNING(stderr, "error: bad morph. alias index: %d\n", index);
+ return NULL;
+}
diff --git a/extensions/spellcheck/hunspell/src/hashmgr.hxx b/extensions/spellcheck/hunspell/src/hashmgr.hxx
new file mode 100644
index 000000000..95b06b13f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/hashmgr.hxx
@@ -0,0 +1,149 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Hunspell, based on MySpell.
+ *
+ * The Initial Developers of the Original Code are
+ * Kevin Hendricks (MySpell) and Németh László (Hunspell).
+ * Portions created by the Initial Developers are Copyright (C) 2002-2005
+ * the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _HASHMGR_HXX_
+#define _HASHMGR_HXX_
+
+#include "hunvisapi.h"
+
+#include <stdio.h>
+#include <string>
+#include <vector>
+
+#include "htypes.hxx"
+#include "filemgr.hxx"
+#include "w_char.hxx"
+
+enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI };
+
+class LIBHUNSPELL_DLL_EXPORTED HashMgr {
+ int tablesize;
+ struct hentry** tableptr;
+ flag flag_mode;
+ int complexprefixes;
+ int utf8;
+ unsigned short forbiddenword;
+ int langnum;
+ char* enc;
+ char* lang;
+ struct cs_info* csconv;
+ char* ignorechars;
+ std::vector<w_char> ignorechars_utf16;
+ int numaliasf; // flag vector `compression' with aliases
+ unsigned short** aliasf;
+ unsigned short* aliasflen;
+ int numaliasm; // morphological desciption `compression' with aliases
+ char** aliasm;
+
+ public:
+ HashMgr(const char* tpath, const char* apath, const char* key = NULL);
+ ~HashMgr();
+
+ struct hentry* lookup(const char*) const;
+ int hash(const char*) const;
+ struct hentry* walk_hashtable(int& col, struct hentry* hp) const;
+
+ int add(const std::string& word);
+ int add_with_affix(const char* word, const char* pattern);
+ int remove(const char* word);
+ int decode_flags(unsigned short** result, char* flags, FileMgr* af);
+ unsigned short decode_flag(const char* flag);
+ char* encode_flag(unsigned short flag);
+ int is_aliasf();
+ int get_aliasf(int index, unsigned short** fvec, FileMgr* af);
+ int is_aliasm();
+ char* get_aliasm(int index);
+
+ private:
+ int get_clen_and_captype(const std::string& word, int* captype);
+ int load_tables(const char* tpath, const char* key);
+ int add_word(const char* word,
+ int wbl,
+ int wcl,
+ unsigned short* ap,
+ int al,
+ const char* desc,
+ bool onlyupcase);
+ int load_config(const char* affpath, const char* key);
+ int parse_aliasf(char* line, FileMgr* af);
+ int add_hidden_capitalized_word(const std::string& word,
+ int wcl,
+ unsigned short* flags,
+ int al,
+ char* dp,
+ int captype);
+ int parse_aliasm(char* line, FileMgr* af);
+ int remove_forbidden_flag(const std::string& word);
+};
+
+#endif
diff --git a/extensions/spellcheck/hunspell/src/htypes.hxx b/extensions/spellcheck/hunspell/src/htypes.hxx
new file mode 100644
index 000000000..d24439441
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/htypes.hxx
@@ -0,0 +1,71 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Hunspell, based on MySpell.
+ *
+ * The Initial Developers of the Original Code are
+ * Kevin Hendricks (MySpell) and Németh László (Hunspell).
+ * Portions created by the Initial Developers are Copyright (C) 2002-2005
+ * the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#ifndef _HTYPES_HXX_
+#define _HTYPES_HXX_
+
+#define ROTATE_LEN 5
+
+#define ROTATE(v, q) \
+ (v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q)) - 1));
+
+// hentry options
+#define H_OPT (1 << 0)
+#define H_OPT_ALIASM (1 << 1)
+#define H_OPT_PHON (1 << 2)
+
+// see also csutil.hxx
+#define HENTRY_WORD(h) &(h->word[0])
+
+// approx. number of user defined words
+#define USERWORD 1000
+
+struct hentry {
+ unsigned char blen; // word length in bytes
+ unsigned char clen; // word length in characters (different for UTF-8 enc.)
+ short alen; // length of affix flag vector
+ unsigned short* astr; // affix flag vector
+ struct hentry* next; // next word with same hash code
+ struct hentry* next_homonym; // next homonym word (with same hash code)
+ char var; // variable fields (only for special pronounciation yet)
+ char word[1]; // variable-length word (8-bit or UTF-8 encoding)
+};
+
+#endif
diff --git a/extensions/spellcheck/hunspell/src/hunspell.cxx b/extensions/spellcheck/hunspell/src/hunspell.cxx
new file mode 100644
index 000000000..7ff1e2bcf
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/hunspell.cxx
@@ -0,0 +1,1895 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Hunspell, based on MySpell.
+ *
+ * The Initial Developers of the Original Code are
+ * Kevin Hendricks (MySpell) and Németh László (Hunspell).
+ * Portions created by the Initial Developers are Copyright (C) 2002-2005
+ * the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "hunspell.hxx"
+#include "hunspell.h"
+#ifndef MOZILLA_CLIENT
+#include "config.h"
+#endif
+#include "csutil.hxx"
+
+#include <limits>
+#include <string>
+
+#define MAXWORDLEN 100
+#define MAXWORDUTF8LEN (MAXWORDLEN * 3)
+
+Hunspell::Hunspell(const char* affpath, const char* dpath, const char* key) {
+ encoding = NULL;
+ csconv = NULL;
+ utf8 = 0;
+ complexprefixes = 0;
+ affixpath = mystrdup(affpath);
+ maxdic = 0;
+
+ /* first set up the hash manager */
+ pHMgr[0] = new HashMgr(dpath, affpath, key);
+ if (pHMgr[0])
+ maxdic = 1;
+
+ /* next set up the affix manager */
+ /* it needs access to the hash manager lookup methods */
+ pAMgr = new AffixMgr(affpath, pHMgr, &maxdic, key);
+
+ /* get the preferred try string and the dictionary */
+ /* encoding from the Affix Manager for that dictionary */
+ char* try_string = pAMgr->get_try_string();
+ encoding = pAMgr->get_encoding();
+ langnum = pAMgr->get_langnum();
+ utf8 = pAMgr->get_utf8();
+ if (!utf8)
+ csconv = get_current_cs(encoding);
+ complexprefixes = pAMgr->get_complexprefixes();
+ wordbreak = pAMgr->get_breaktable();
+
+ /* and finally set up the suggestion manager */
+ pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);
+ if (try_string)
+ free(try_string);
+}
+
+Hunspell::~Hunspell() {
+ delete pSMgr;
+ delete pAMgr;
+ for (int i = 0; i < maxdic; i++)
+ delete pHMgr[i];
+ maxdic = 0;
+ pSMgr = NULL;
+ pAMgr = NULL;
+#ifdef MOZILLA_CLIENT
+ delete[] csconv;
+#endif
+ csconv = NULL;
+ if (encoding)
+ free(encoding);
+ encoding = NULL;
+ if (affixpath)
+ free(affixpath);
+ affixpath = NULL;
+}
+
+// load extra dictionaries
+int Hunspell::add_dic(const char* dpath, const char* key) {
+ if (maxdic == MAXDIC || !affixpath)
+ return 1;
+ pHMgr[maxdic] = new HashMgr(dpath, affixpath, key);
+ if (pHMgr[maxdic])
+ maxdic++;
+ else
+ return 1;
+ return 0;
+}
+
+// make a copy of src at destination while removing all leading
+// blanks and removing any trailing periods after recording
+// their presence with the abbreviation flag
+// also since already going through character by character,
+// set the capitalization type
+// return the length of the "cleaned" (and UTF-8 encoded) word
+
+size_t Hunspell::cleanword2(std::string& dest,
+ std::vector<w_char>& dest_utf,
+ const char* src,
+ int* nc,
+ int* pcaptype,
+ size_t* pabbrev) {
+ dest.clear();
+ dest_utf.clear();
+
+ const char* q = src;
+
+ // first skip over any leading blanks
+ while ((*q != '\0') && (*q == ' '))
+ q++;
+
+ // now strip off any trailing periods (recording their presence)
+ *pabbrev = 0;
+ int nl = strlen(q);
+ while ((nl > 0) && (*(q + nl - 1) == '.')) {
+ nl--;
+ (*pabbrev)++;
+ }
+
+ // if no characters are left it can't be capitalized
+ if (nl <= 0) {
+ *pcaptype = NOCAP;
+ return 0;
+ }
+
+ dest.append(q, nl);
+ nl = dest.size();
+ if (utf8) {
+ *nc = u8_u16(dest_utf, dest);
+ *pcaptype = get_captype_utf8(dest_utf, langnum);
+ } else {
+ *pcaptype = get_captype(dest, csconv);
+ *nc = nl;
+ }
+ return nl;
+}
+
+void Hunspell::cleanword(std::string& dest,
+ const char* src,
+ int* pcaptype,
+ int* pabbrev) {
+ dest.clear();
+ const unsigned char* q = (const unsigned char*)src;
+ int firstcap = 0;
+
+ // first skip over any leading blanks
+ while ((*q != '\0') && (*q == ' '))
+ q++;
+
+ // now strip off any trailing periods (recording their presence)
+ *pabbrev = 0;
+ int nl = strlen((const char*)q);
+ while ((nl > 0) && (*(q + nl - 1) == '.')) {
+ nl--;
+ (*pabbrev)++;
+ }
+
+ // if no characters are left it can't be capitalized
+ if (nl <= 0) {
+ *pcaptype = NOCAP;
+ return;
+ }
+
+ // now determine the capitalization type of the first nl letters
+ int ncap = 0;
+ int nneutral = 0;
+ int nc = 0;
+
+ if (!utf8) {
+ while (nl > 0) {
+ nc++;
+ if (csconv[(*q)].ccase)
+ ncap++;
+ if (csconv[(*q)].cupper == csconv[(*q)].clower)
+ nneutral++;
+ dest.push_back(*q++);
+ nl--;
+ }
+ // remember to terminate the destination string
+ firstcap = csconv[static_cast<unsigned char>(dest[0])].ccase;
+ } else {
+ std::vector<w_char> t;
+ u8_u16(t, src);
+ for (size_t i = 0; i < t.size(); ++i) {
+ unsigned short idx = (t[i].h << 8) + t[i].l;
+ unsigned short low = unicodetolower(idx, langnum);
+ if (idx != low)
+ ncap++;
+ if (unicodetoupper(idx, langnum) == low)
+ nneutral++;
+ }
+ u16_u8(dest, t);
+ if (ncap) {
+ unsigned short idx = (t[0].h << 8) + t[0].l;
+ firstcap = (idx != unicodetolower(idx, langnum));
+ }
+ }
+
+ // now finally set the captype
+ if (ncap == 0) {
+ *pcaptype = NOCAP;
+ } else if ((ncap == 1) && firstcap) {
+ *pcaptype = INITCAP;
+ } else if ((ncap == nc) || ((ncap + nneutral) == nc)) {
+ *pcaptype = ALLCAP;
+ } else if ((ncap > 1) && firstcap) {
+ *pcaptype = HUHINITCAP;
+ } else {
+ *pcaptype = HUHCAP;
+ }
+}
+
+void Hunspell::mkallcap(std::string& u8) {
+ if (utf8) {
+ std::vector<w_char> u16;
+ u8_u16(u16, u8);
+ ::mkallcap_utf(u16, langnum);
+ u16_u8(u8, u16);
+ } else {
+ ::mkallcap(u8, csconv);
+ }
+}
+
+int Hunspell::mkallsmall2(std::string& u8, std::vector<w_char>& u16) {
+ if (utf8) {
+ ::mkallsmall_utf(u16, langnum);
+ u16_u8(u8, u16);
+ } else {
+ ::mkallsmall(u8, csconv);
+ }
+ return u8.size();
+}
+
+// convert UTF-8 sharp S codes to latin 1
+std::string Hunspell::sharps_u8_l1(const std::string& source) {
+ std::string dest(source);
+ mystrrep(dest, "\xC3\x9F", "\xDF");
+ return dest;
+}
+
+// recursive search for right ss - sharp s permutations
+hentry* Hunspell::spellsharps(std::string& base,
+ size_t n_pos,
+ int n,
+ int repnum,
+ int* info,
+ char** root) {
+ size_t pos = base.find("ss", n_pos);
+ if (pos != std::string::npos && (n < MAXSHARPS)) {
+ base[pos] = '\xC3';
+ base[pos + 1] = '\x9F';
+ hentry* h = spellsharps(base, pos + 2, n + 1, repnum + 1, info, root);
+ if (h)
+ return h;
+ base[pos] = 's';
+ base[pos + 1] = 's';
+ h = spellsharps(base, pos + 2, n + 1, repnum, info, root);
+ if (h)
+ return h;
+ } else if (repnum > 0) {
+ if (utf8)
+ return checkword(base.c_str(), info, root);
+ std::string tmp(sharps_u8_l1(base));
+ return checkword(tmp.c_str(), info, root);
+ }
+ return NULL;
+}
+
+int Hunspell::is_keepcase(const hentry* rv) {
+ return pAMgr && rv->astr && pAMgr->get_keepcase() &&
+ TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);
+}
+
+/* insert a word to the beginning of the suggestion array and return ns */
+int Hunspell::insert_sug(char*** slst, const char* word, int ns) {
+ if (!*slst)
+ return ns;
+ char* dup = mystrdup(word);
+ if (!dup)
+ return ns;
+ if (ns == MAXSUGGESTION) {
+ ns--;
+ free((*slst)[ns]);
+ }
+ for (int k = ns; k > 0; k--)
+ (*slst)[k] = (*slst)[k - 1];
+ (*slst)[0] = dup;
+ return ns + 1;
+}
+
+int Hunspell::spell(const char* word, int* info, char** root) {
+ struct hentry* rv = NULL;
+
+ int info2 = 0;
+ if (!info)
+ info = &info2;
+ else
+ *info = 0;
+
+ // Hunspell supports XML input of the simplified API (see manual)
+ if (strcmp(word, SPELL_XML) == 0)
+ return 1;
+ int nc = strlen(word);
+ if (utf8) {
+ if (nc >= MAXWORDUTF8LEN)
+ return 0;
+ } else {
+ if (nc >= MAXWORDLEN)
+ return 0;
+ }
+ int captype = NOCAP;
+ size_t abbv = 0;
+ size_t wl = 0;
+
+ std::string scw;
+ std::vector<w_char> sunicw;
+
+ // input conversion
+ RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
+ {
+ std::string wspace;
+
+ int convstatus = rl ? rl->conv(word, wspace) : 0;
+ if (convstatus < 0)
+ return 0;
+ else if (convstatus > 0)
+ wl = cleanword2(scw, sunicw, wspace.c_str(), &nc, &captype, &abbv);
+ else
+ wl = cleanword2(scw, sunicw, word, &nc, &captype, &abbv);
+ }
+
+#ifdef MOZILLA_CLIENT
+ // accept the abbreviated words without dots
+ // workaround for the incomplete tokenization of Mozilla
+ abbv = 1;
+#endif
+
+ if (wl == 0 || maxdic == 0)
+ return 1;
+ if (root)
+ *root = NULL;
+
+ // allow numbers with dots, dashes and commas (but forbid double separators:
+ // "..", "--" etc.)
+ enum { NBEGIN, NNUM, NSEP };
+ int nstate = NBEGIN;
+ size_t i;
+
+ for (i = 0; (i < wl); i++) {
+ if ((scw[i] <= '9') && (scw[i] >= '0')) {
+ nstate = NNUM;
+ } else if ((scw[i] == ',') || (scw[i] == '.') || (scw[i] == '-')) {
+ if ((nstate == NSEP) || (i == 0))
+ break;
+ nstate = NSEP;
+ } else
+ break;
+ }
+ if ((i == wl) && (nstate == NNUM))
+ return 1;
+
+ switch (captype) {
+ case HUHCAP:
+ /* FALLTHROUGH */
+ case HUHINITCAP:
+ *info += SPELL_ORIGCAP;
+ /* FALLTHROUGH */
+ case NOCAP:
+ rv = checkword(scw.c_str(), info, root);
+ if ((abbv) && !(rv)) {
+ std::string u8buffer(scw);
+ u8buffer.push_back('.');
+ rv = checkword(u8buffer.c_str(), info, root);
+ }
+ break;
+ case ALLCAP: {
+ *info += SPELL_ORIGCAP;
+ rv = checkword(scw.c_str(), info, root);
+ if (rv)
+ break;
+ if (abbv) {
+ std::string u8buffer(scw);
+ u8buffer.push_back('.');
+ rv = checkword(u8buffer.c_str(), info, root);
+ if (rv)
+ break;
+ }
+ // Spec. prefix handling for Catalan, French, Italian:
+ // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
+ size_t apos = pAMgr ? scw.find('\'') : std::string::npos;
+ if (apos != std::string::npos) {
+ mkallsmall2(scw, sunicw);
+ //conversion may result in string with different len to pre-mkallsmall2
+ //so re-scan
+ if (apos != std::string::npos && apos < scw.size() - 1) {
+ std::string part1 = scw.substr(0, apos+1);
+ std::string part2 = scw.substr(apos+1);
+ if (utf8) {
+ std::vector<w_char> part1u, part2u;
+ u8_u16(part1u, part1);
+ u8_u16(part2u, part2);
+ mkinitcap2(part2, part2u);
+ scw = part1 + part2;
+ sunicw = part1u;
+ sunicw.insert(sunicw.end(), part2u.begin(), part2u.end());
+ rv = checkword(scw.c_str(), info, root);
+ if (rv)
+ break;
+ } else {
+ mkinitcap2(part2, sunicw);
+ scw = part1 + part2;
+ rv = checkword(scw.c_str(), info, root);
+ if (rv)
+ break;
+ }
+ mkinitcap2(scw, sunicw);
+ rv = checkword(scw.c_str(), info, root);
+ if (rv)
+ break;
+ }
+ }
+ if (pAMgr && pAMgr->get_checksharps() && scw.find("SS") != std::string::npos) {
+
+ mkallsmall2(scw, sunicw);
+ std::string u8buffer(scw);
+ rv = spellsharps(u8buffer, 0, 0, 0, info, root);
+ if (!rv) {
+ mkinitcap2(scw, sunicw);
+ rv = spellsharps(scw, 0, 0, 0, info, root);
+ }
+ if ((abbv) && !(rv)) {
+ u8buffer.push_back('.');
+ rv = spellsharps(u8buffer, 0, 0, 0, info, root);
+ if (!rv) {
+ u8buffer = std::string(scw);
+ u8buffer.push_back('.');
+ rv = spellsharps(u8buffer, 0, 0, 0, info, root);
+ }
+ }
+ if (rv)
+ break;
+ }
+ }
+ case INITCAP: {
+
+ *info += SPELL_ORIGCAP;
+ mkallsmall2(scw, sunicw);
+ std::string u8buffer(scw);
+ mkinitcap2(scw, sunicw);
+ if (captype == INITCAP)
+ *info += SPELL_INITCAP;
+ rv = checkword(scw.c_str(), info, root);
+ if (captype == INITCAP)
+ *info -= SPELL_INITCAP;
+ // forbid bad capitalization
+ // (for example, ijs -> Ijs instead of IJs in Dutch)
+ // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)
+ if (*info & SPELL_FORBIDDEN) {
+ rv = NULL;
+ break;
+ }
+ if (rv && is_keepcase(rv) && (captype == ALLCAP))
+ rv = NULL;
+ if (rv)
+ break;
+
+ rv = checkword(u8buffer.c_str(), info, root);
+ if (abbv && !rv) {
+ u8buffer.push_back('.');
+ rv = checkword(u8buffer.c_str(), info, root);
+ if (!rv) {
+ u8buffer = scw;
+ u8buffer.push_back('.');
+ if (captype == INITCAP)
+ *info += SPELL_INITCAP;
+ rv = checkword(u8buffer.c_str(), info, root);
+ if (captype == INITCAP)
+ *info -= SPELL_INITCAP;
+ if (rv && is_keepcase(rv) && (captype == ALLCAP))
+ rv = NULL;
+ break;
+ }
+ }
+ if (rv && is_keepcase(rv) &&
+ ((captype == ALLCAP) ||
+ // if CHECKSHARPS: KEEPCASE words with \xDF are allowed
+ // in INITCAP form, too.
+ !(pAMgr->get_checksharps() &&
+ ((utf8 && u8buffer.find("\xC3\x9F") != std::string::npos) ||
+ (!utf8 && u8buffer.find('\xDF') != std::string::npos)))))
+ rv = NULL;
+ break;
+ }
+ }
+
+ if (rv) {
+ if (pAMgr && pAMgr->get_warn() && rv->astr &&
+ TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) {
+ *info += SPELL_WARN;
+ if (pAMgr->get_forbidwarn())
+ return 0;
+ return HUNSPELL_OK_WARN;
+ }
+ return HUNSPELL_OK;
+ }
+
+ // recursive breaking at break points
+ if (wordbreak) {
+
+ int nbr = 0;
+ wl = scw.size();
+ int numbreak = pAMgr ? pAMgr->get_numbreak() : 0;
+
+ // calculate break points for recursion limit
+ for (int j = 0; j < numbreak; j++) {
+ size_t len = strlen(wordbreak[j]);
+ size_t pos = 0;
+ while ((pos = scw.find(wordbreak[j], pos, len)) != std::string::npos) {
+ ++nbr;
+ pos += len;
+ }
+ }
+ if (nbr >= 10)
+ return 0;
+
+ // check boundary patterns (^begin and end$)
+ for (int j = 0; j < numbreak; j++) {
+ size_t plen = strlen(wordbreak[j]);
+ if (plen == 1 || plen > wl)
+ continue;
+
+ if (wordbreak[j][0] == '^' &&
+ scw.compare(0, plen - 1, wordbreak[j] + 1, plen -1) == 0 && spell(scw.c_str() + plen - 1))
+ return 1;
+
+ if (wordbreak[j][plen - 1] == '$' &&
+ scw.compare(wl - plen + 1, plen - 1, wordbreak[j], plen - 1) == 0) {
+ char r = scw[wl - plen + 1];
+ scw[wl - plen + 1] = '\0';
+ if (spell(scw.c_str()))
+ return 1;
+ scw[wl - plen + 1] = r;
+ }
+ }
+
+ // other patterns
+ for (int j = 0; j < numbreak; j++) {
+ size_t plen = strlen(wordbreak[j]);
+ size_t found = scw.find(wordbreak[j]);
+ if ((found > 0) && (found < wl - plen)) {
+ if (!spell(scw.c_str() + found + plen))
+ continue;
+ char r = scw[found];
+ scw[found] = '\0';
+ // examine 2 sides of the break point
+ if (spell(scw.c_str()))
+ return 1;
+ scw[found] = r;
+
+ // LANG_hu: spec. dash rule
+ if (langnum == LANG_hu && strcmp(wordbreak[j], "-") == 0) {
+ r = scw[found + 1];
+ scw[found + 1] = '\0';
+ if (spell(scw.c_str()))
+ return 1; // check the first part with dash
+ scw[found + 1] = r;
+ }
+ // end of LANG specific region
+ }
+ }
+ }
+
+ return 0;
+}
+
+struct hentry* Hunspell::checkword(const char* w, int* info, char** root) {
+ struct hentry* he = NULL;
+ bool usebuffer = false;
+ int len, i;
+ std::string w2;
+ const char* word;
+
+ char* ignoredchars = pAMgr ? pAMgr->get_ignore() : NULL;
+ if (ignoredchars != NULL) {
+ w2.assign(w);
+ if (utf8) {
+ const std::vector<w_char>& ignoredchars_utf16 =
+ pAMgr->get_ignore_utf16();
+ remove_ignored_chars_utf(w2, ignoredchars_utf16);
+ } else {
+ remove_ignored_chars(w2, ignoredchars);
+ }
+ word = w2.c_str();
+ usebuffer = true;
+ } else
+ word = w;
+
+ len = strlen(word);
+
+ if (!len)
+ return NULL;
+
+ // word reversing wrapper for complex prefixes
+ if (complexprefixes) {
+ if (!usebuffer) {
+ w2.assign(word);
+ usebuffer = true;
+ }
+ if (utf8)
+ reverseword_utf(w2);
+ else
+ reverseword(w2);
+ }
+
+ if (usebuffer) {
+ word = w2.c_str();
+ }
+
+ // look word in hash table
+ for (i = 0; (i < maxdic) && !he; i++) {
+ he = (pHMgr[i])->lookup(word);
+
+ // check forbidden and onlyincompound words
+ if ((he) && (he->astr) && (pAMgr) &&
+ TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
+ if (info)
+ *info += SPELL_FORBIDDEN;
+ // LANG_hu section: set dash information for suggestions
+ if (langnum == LANG_hu) {
+ if (pAMgr->get_compoundflag() &&
+ TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {
+ if (info)
+ *info += SPELL_COMPOUND;
+ }
+ }
+ return NULL;
+ }
+
+ // he = next not needaffix, onlyincompound homonym or onlyupcase word
+ while (he && (he->astr) && pAMgr &&
+ ((pAMgr->get_needaffix() &&
+ TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) ||
+ (pAMgr->get_onlyincompound() &&
+ TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
+ (info && (*info & SPELL_INITCAP) &&
+ TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))))
+ he = he->next_homonym;
+ }
+
+ // check with affixes
+ if (!he && pAMgr) {
+ // try stripping off affixes */
+ he = pAMgr->affix_check(word, len, 0);
+
+ // check compound restriction and onlyupcase
+ if (he && he->astr &&
+ ((pAMgr->get_onlyincompound() &&
+ TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
+ (info && (*info & SPELL_INITCAP) &&
+ TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {
+ he = NULL;
+ }
+
+ if (he) {
+ if ((he->astr) && (pAMgr) &&
+ TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
+ if (info)
+ *info += SPELL_FORBIDDEN;
+ return NULL;
+ }
+ if (root) {
+ std::string word_root(he->word);
+ if (complexprefixes) {
+ if (utf8)
+ reverseword_utf(word_root);
+ else
+ reverseword(word_root);
+ }
+ *root = mystrdup(word_root.c_str());
+ }
+ // try check compound word
+ } else if (pAMgr->get_compound()) {
+ struct hentry* rwords[100]; // buffer for COMPOUND pattern checking
+ he = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, (hentry**)&rwords, 0, 0, info);
+ // LANG_hu section: `moving rule' with last dash
+ if ((!he) && (langnum == LANG_hu) && (word[len - 1] == '-')) {
+ char* dup = mystrdup(word);
+ if (!dup)
+ return NULL;
+ dup[len - 1] = '\0';
+ he = pAMgr->compound_check(dup, len - 1, -5, 0, 100, 0, NULL, (hentry**)&rwords, 1, 0,
+ info);
+ free(dup);
+ }
+ // end of LANG specific region
+ if (he) {
+ if (root) {
+ std::string word_root(he->word);
+ if (complexprefixes) {
+ if (utf8)
+ reverseword_utf(word_root);
+ else
+ reverseword(word_root);
+ }
+ *root = mystrdup(word_root.c_str());
+ }
+ if (info)
+ *info += SPELL_COMPOUND;
+ }
+ }
+ }
+
+ return he;
+}
+
+int Hunspell::suggest(char*** slst, const char* word) {
+ int onlycmpdsug = 0;
+ if (!pSMgr || maxdic == 0)
+ return 0;
+ *slst = NULL;
+ // process XML input of the simplified API (see manual)
+ if (strncmp(word, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {
+ return spellml(slst, word);
+ }
+ int nc = strlen(word);
+ if (utf8) {
+ if (nc >= MAXWORDUTF8LEN)
+ return 0;
+ } else {
+ if (nc >= MAXWORDLEN)
+ return 0;
+ }
+ int captype = NOCAP;
+ size_t abbv = 0;
+ size_t wl = 0;
+
+ std::string scw;
+ std::vector<w_char> sunicw;
+
+ // input conversion
+ RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
+ {
+ std::string wspace;
+
+ int convstatus = rl ? rl->conv(word, wspace) : 0;
+ if (convstatus < 0)
+ return 0;
+ else if (convstatus > 0)
+ wl = cleanword2(scw, sunicw, wspace.c_str(), &nc, &captype, &abbv);
+ else
+ wl = cleanword2(scw, sunicw, word, &nc, &captype, &abbv);
+
+ if (wl == 0)
+ return 0;
+ }
+
+ int ns = 0;
+ int capwords = 0;
+
+ // check capitalized form for FORCEUCASE
+ if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) {
+ int info = SPELL_ORIGCAP;
+ if (checkword(scw.c_str(), &info, NULL)) {
+ std::string form(scw);
+ mkinitcap(form);
+
+ char** wlst = (char**)malloc(MAXSUGGESTION * sizeof(char*));
+ if (wlst == NULL)
+ return -1;
+ *slst = wlst;
+ wlst[0] = mystrdup(form.c_str());
+ for (int i = 1; i < MAXSUGGESTION; ++i) {
+ wlst[i] = NULL;
+ }
+
+ return 1;
+ }
+ }
+
+ switch (captype) {
+ case NOCAP: {
+ ns = pSMgr->suggest(slst, scw.c_str(), ns, &onlycmpdsug);
+ break;
+ }
+
+ case INITCAP: {
+ capwords = 1;
+ ns = pSMgr->suggest(slst, scw.c_str(), ns, &onlycmpdsug);
+ if (ns == -1)
+ break;
+ std::string wspace(scw);
+ mkallsmall2(wspace, sunicw);
+ ns = pSMgr->suggest(slst, wspace.c_str(), ns, &onlycmpdsug);
+ break;
+ }
+ case HUHINITCAP:
+ capwords = 1;
+ case HUHCAP: {
+ ns = pSMgr->suggest(slst, scw.c_str(), ns, &onlycmpdsug);
+ if (ns != -1) {
+ // something.The -> something. The
+ size_t dot_pos = scw.find('.');
+ if (dot_pos != std::string::npos) {
+ std::string postdot = scw.substr(dot_pos + 1);
+ int captype_;
+ if (utf8) {
+ std::vector<w_char> postdotu;
+ u8_u16(postdotu, postdot);
+ captype_ = get_captype_utf8(postdotu, langnum);
+ } else {
+ captype_ = get_captype(postdot, csconv);
+ }
+ if (captype_ == INITCAP) {
+ std::string str(scw);
+ str.insert(dot_pos + 1, 1, ' ');
+ ns = insert_sug(slst, str.c_str(), ns);
+ }
+ }
+
+ std::string wspace;
+
+ if (captype == HUHINITCAP) {
+ // TheOpenOffice.org -> The OpenOffice.org
+ wspace = scw;
+ mkinitsmall2(wspace, sunicw);
+ ns = pSMgr->suggest(slst, wspace.c_str(), ns, &onlycmpdsug);
+ }
+ wspace = scw;
+ mkallsmall2(wspace, sunicw);
+ if (spell(wspace.c_str()))
+ ns = insert_sug(slst, wspace.c_str(), ns);
+ int prevns = ns;
+ ns = pSMgr->suggest(slst, wspace.c_str(), ns, &onlycmpdsug);
+ if (captype == HUHINITCAP) {
+ mkinitcap2(wspace, sunicw);
+ if (spell(wspace.c_str()))
+ ns = insert_sug(slst, wspace.c_str(), ns);
+ ns = pSMgr->suggest(slst, wspace.c_str(), ns, &onlycmpdsug);
+ }
+ // aNew -> "a New" (instead of "a new")
+ for (int j = prevns; j < ns; j++) {
+ char* space = strchr((*slst)[j], ' ');
+ if (space) {
+ size_t slen = strlen(space + 1);
+ // different case after space (need capitalisation)
+ if ((slen < wl) && strcmp(scw.c_str() + wl - slen, space + 1)) {
+ std::string first((*slst)[j], space + 1);
+ std::string second(space + 1);
+ std::vector<w_char> w;
+ if (utf8)
+ u8_u16(w, second);
+ mkinitcap2(second, w);
+ // set as first suggestion
+ char* r = (*slst)[j];
+ for (int k = j; k > 0; k--)
+ (*slst)[k] = (*slst)[k - 1];
+ free(r);
+ (*slst)[0] = mystrdup((first + second).c_str());
+ }
+ }
+ }
+ }
+ break;
+ }
+
+ case ALLCAP: {
+ std::string wspace(scw);
+ mkallsmall2(wspace, sunicw);
+ ns = pSMgr->suggest(slst, wspace.c_str(), ns, &onlycmpdsug);
+ if (ns == -1)
+ break;
+ if (pAMgr && pAMgr->get_keepcase() && spell(wspace.c_str()))
+ ns = insert_sug(slst, wspace.c_str(), ns);
+ mkinitcap2(wspace, sunicw);
+ ns = pSMgr->suggest(slst, wspace.c_str(), ns, &onlycmpdsug);
+ for (int j = 0; j < ns; j++) {
+ std::string form((*slst)[j]);
+ mkallcap(form);
+
+ if (pAMgr && pAMgr->get_checksharps()) {
+ if (utf8) {
+ mystrrep(form, "\xC3\x9F", "SS");
+ } else {
+ mystrrep(form, "\xDF", "SS");
+ }
+ }
+
+ free((*slst)[j]);
+ (*slst)[j] = mystrdup(form.c_str());
+
+ }
+ break;
+ }
+ }
+
+ // LANG_hu section: replace '-' with ' ' in Hungarian
+ if (langnum == LANG_hu) {
+ for (int j = 0; j < ns; j++) {
+ char* pos = strchr((*slst)[j], '-');
+ if (pos) {
+ int info;
+ *pos = '\0';
+ std::string w((*slst)[j]);
+ w.append(pos + 1);
+ (void)spell(w.c_str(), &info, NULL);
+ if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
+ *pos = ' ';
+ } else
+ *pos = '-';
+ }
+ }
+ }
+ // END OF LANG_hu section
+
+ // try ngram approach since found nothing or only compound words
+ if (pAMgr && (ns == 0 || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0) &&
+ (*slst)) {
+ switch (captype) {
+ case NOCAP: {
+ ns = pSMgr->ngsuggest(*slst, scw.c_str(), ns, pHMgr, maxdic);
+ break;
+ }
+ case HUHINITCAP:
+ capwords = 1;
+ case HUHCAP: {
+ std::string wspace(scw);
+ mkallsmall2(wspace, sunicw);
+ ns = pSMgr->ngsuggest(*slst, wspace.c_str(), ns, pHMgr, maxdic);
+ break;
+ }
+ case INITCAP: {
+ capwords = 1;
+ std::string wspace(scw);
+ mkallsmall2(wspace, sunicw);
+ ns = pSMgr->ngsuggest(*slst, wspace.c_str(), ns, pHMgr, maxdic);
+ break;
+ }
+ case ALLCAP: {
+ std::string wspace(scw);
+ mkallsmall2(wspace, sunicw);
+ int oldns = ns;
+ ns = pSMgr->ngsuggest(*slst, wspace.c_str(), ns, pHMgr, maxdic);
+ for (int j = oldns; j < ns; j++) {
+ std::string form((*slst)[j]);
+ mkallcap(form);
+ free((*slst)[j]);
+ (*slst)[j] = mystrdup(form.c_str());
+ }
+ break;
+ }
+ }
+ }
+
+ // try dash suggestion (Afo-American -> Afro-American)
+ size_t dash_pos = scw.find('-');
+ if (dash_pos != std::string::npos) {
+ int nodashsug = 1;
+ for (int j = 0; j < ns && nodashsug == 1; j++) {
+ if (strchr((*slst)[j], '-'))
+ nodashsug = 0;
+ }
+
+ size_t prev_pos = 0;
+ bool last = false;
+
+ while (nodashsug && !last) {
+ if (dash_pos == scw.size())
+ last = 1;
+ std::string chunk = scw.substr(prev_pos, dash_pos - prev_pos);
+ if (!spell(chunk.c_str())) {
+ char** nlst = NULL;
+ int nn = suggest(&nlst, chunk.c_str());
+ for (int j = nn - 1; j >= 0; j--) {
+ std::string wspace = scw.substr(0, prev_pos);
+ wspace.append(nlst[j]);
+ if (!last) {
+ wspace.append("-");
+ wspace.append(scw.substr(dash_pos + 1));
+ }
+ ns = insert_sug(slst, wspace.c_str(), ns);
+ free(nlst[j]);
+ }
+ if (nlst != NULL)
+ free(nlst);
+ nodashsug = 0;
+ }
+ if (!last) {
+ prev_pos = dash_pos + 1;
+ dash_pos = scw.find('-', prev_pos);
+ }
+ if (dash_pos == std::string::npos)
+ dash_pos = scw.size();
+ }
+ }
+
+ // word reversing wrapper for complex prefixes
+ if (complexprefixes) {
+ for (int j = 0; j < ns; j++) {
+ std::string root((*slst)[j]);
+ free((*slst)[j]);
+ if (utf8)
+ reverseword_utf(root);
+ else
+ reverseword(root);
+ (*slst)[j] = mystrdup(root.c_str());
+ }
+ }
+
+ // capitalize
+ if (capwords)
+ for (int j = 0; j < ns; j++) {
+ std::string form((*slst)[j]);
+ free((*slst)[j]);
+ mkinitcap(form);
+ (*slst)[j] = mystrdup(form.c_str());
+ }
+
+ // expand suggestions with dot(s)
+ if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
+ for (int j = 0; j < ns; j++) {
+ (*slst)[j] = (char*)realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);
+ strcat((*slst)[j], word + strlen(word) - abbv);
+ }
+ }
+
+ // remove bad capitalized and forbidden forms
+ if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {
+ switch (captype) {
+ case INITCAP:
+ case ALLCAP: {
+ int l = 0;
+ for (int j = 0; j < ns; j++) {
+ if (!strchr((*slst)[j], ' ') && !spell((*slst)[j])) {
+ std::string s;
+ std::vector<w_char> w;
+ if (utf8) {
+ u8_u16(w, (*slst)[j]);
+ } else {
+ s = (*slst)[j];
+ }
+ mkallsmall2(s, w);
+ free((*slst)[j]);
+ if (spell(s.c_str())) {
+ (*slst)[l] = mystrdup(s.c_str());
+ if ((*slst)[l])
+ l++;
+ } else {
+ mkinitcap2(s, w);
+ if (spell(s.c_str())) {
+ (*slst)[l] = mystrdup(s.c_str());
+ if ((*slst)[l])
+ l++;
+ }
+ }
+ } else {
+ (*slst)[l] = (*slst)[j];
+ l++;
+ }
+ }
+ ns = l;
+ }
+ }
+ }
+
+ // remove duplications
+ int l = 0;
+ for (int j = 0; j < ns; j++) {
+ (*slst)[l] = (*slst)[j];
+ for (int k = 0; k < l; k++) {
+ if (strcmp((*slst)[k], (*slst)[j]) == 0) {
+ free((*slst)[j]);
+ l--;
+ break;
+ }
+ }
+ l++;
+ }
+ ns = l;
+
+ // output conversion
+ rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
+ for (int j = 0; rl && j < ns; j++) {
+ std::string wspace;
+ if (rl->conv((*slst)[j], wspace) > 0) {
+ free((*slst)[j]);
+ (*slst)[j] = mystrdup(wspace.c_str());
+ }
+ }
+
+ // if suggestions removed by nosuggest, onlyincompound parameters
+ if (l == 0 && *slst) {
+ free(*slst);
+ *slst = NULL;
+ }
+ return l;
+}
+
+void Hunspell::free_list(char*** slst, int n) {
+ freelist(slst, n);
+}
+
+char* Hunspell::get_dic_encoding() {
+ return encoding;
+}
+
+int Hunspell::stem(char*** slst, char** desc, int n) {
+
+ std::string result2;
+ *slst = NULL;
+ if (n == 0)
+ return 0;
+ for (int i = 0; i < n; i++) {
+
+ std::string result;
+
+ // add compound word parts (except the last one)
+ char* s = (char*)desc[i];
+ char* part = strstr(s, MORPH_PART);
+ if (part) {
+ char* nextpart = strstr(part + 1, MORPH_PART);
+ while (nextpart) {
+ std::string field;
+ copy_field(field, part, MORPH_PART);
+ result.append(field);
+ part = nextpart;
+ nextpart = strstr(part + 1, MORPH_PART);
+ }
+ s = part;
+ }
+
+ char** pl;
+ std::string tok(s);
+ size_t alt = 0;
+ while ((alt = tok.find(" | ", alt)) != std::string::npos) {
+ tok[alt + 1] = MSEP_ALT;
+ }
+ int pln = line_tok(tok.c_str(), &pl, MSEP_ALT);
+ for (int k = 0; k < pln; k++) {
+ // add derivational suffixes
+ if (strstr(pl[k], MORPH_DERI_SFX)) {
+ // remove inflectional suffixes
+ char* is = strstr(pl[k], MORPH_INFL_SFX);
+ if (is)
+ *is = '\0';
+ char* sg = pSMgr->suggest_gen(&(pl[k]), 1, pl[k]);
+ if (sg) {
+ char** gen;
+ int genl = line_tok(sg, &gen, MSEP_REC);
+ free(sg);
+ for (int j = 0; j < genl; j++) {
+ result2.push_back(MSEP_REC);
+ result2.append(result);
+ result2.append(gen[j]);
+ }
+ freelist(&gen, genl);
+ }
+ } else {
+ result2.push_back(MSEP_REC);
+ result2.append(result);
+ if (strstr(pl[k], MORPH_SURF_PFX)) {
+ std::string field;
+ copy_field(field, pl[k], MORPH_SURF_PFX);
+ result2.append(field);
+ }
+ std::string field;
+ copy_field(field, pl[k], MORPH_STEM);
+ result2.append(field);
+ }
+ }
+ freelist(&pl, pln);
+ }
+ int sln = line_tok(result2.c_str(), slst, MSEP_REC);
+ return uniqlist(*slst, sln);
+}
+
+int Hunspell::stem(char*** slst, const char* word) {
+ char** pl;
+ int pln = analyze(&pl, word);
+ int pln2 = stem(slst, pl, pln);
+ freelist(&pl, pln);
+ return pln2;
+}
+
+const char* Hunspell::get_wordchars() {
+ return pAMgr->get_wordchars();
+}
+
+const std::vector<w_char>& Hunspell::get_wordchars_utf16() {
+ return pAMgr->get_wordchars_utf16();
+}
+
+void Hunspell::mkinitcap(std::string& u8) {
+ if (utf8) {
+ std::vector<w_char> u16;
+ u8_u16(u16, u8);
+ ::mkinitcap_utf(u16, langnum);
+ u16_u8(u8, u16);
+ } else {
+ ::mkinitcap(u8, csconv);
+ }
+}
+
+int Hunspell::mkinitcap2(std::string& u8, std::vector<w_char>& u16) {
+ if (utf8) {
+ ::mkinitcap_utf(u16, langnum);
+ u16_u8(u8, u16);
+ } else {
+ ::mkinitcap(u8, csconv);
+ }
+ return u8.size();
+}
+
+int Hunspell::mkinitsmall2(std::string& u8, std::vector<w_char>& u16) {
+ if (utf8) {
+ ::mkinitsmall_utf(u16, langnum);
+ u16_u8(u8, u16);
+ } else {
+ ::mkinitsmall(u8, csconv);
+ }
+ return u8.size();
+}
+
+int Hunspell::add(const char* word) {
+ if (pHMgr[0])
+ return (pHMgr[0])->add(word);
+ return 0;
+}
+
+int Hunspell::add_with_affix(const char* word, const char* example) {
+ if (pHMgr[0])
+ return (pHMgr[0])->add_with_affix(word, example);
+ return 0;
+}
+
+int Hunspell::remove(const char* word) {
+ if (pHMgr[0])
+ return (pHMgr[0])->remove(word);
+ return 0;
+}
+
+const char* Hunspell::get_version() {
+ return pAMgr->get_version();
+}
+
+struct cs_info* Hunspell::get_csconv() {
+ return csconv;
+}
+
+void Hunspell::cat_result(std::string& result, char* st) {
+ if (st) {
+ if (!result.empty())
+ result.append("\n");
+ result.append(st);
+ free(st);
+ }
+}
+
+int Hunspell::analyze(char*** slst, const char* word) {
+ *slst = NULL;
+ if (!pSMgr || maxdic == 0)
+ return 0;
+ int nc = strlen(word);
+ if (utf8) {
+ if (nc >= MAXWORDUTF8LEN)
+ return 0;
+ } else {
+ if (nc >= MAXWORDLEN)
+ return 0;
+ }
+ int captype = NOCAP;
+ size_t abbv = 0;
+ size_t wl = 0;
+
+ std::string scw;
+ std::vector<w_char> sunicw;
+
+ // input conversion
+ RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
+ {
+ std::string wspace;
+
+ int convstatus = rl ? rl->conv(word, wspace) : 0;
+ if (convstatus < 0)
+ return 0;
+ else if (convstatus > 0)
+ wl = cleanword2(scw, sunicw, wspace.c_str(), &nc, &captype, &abbv);
+ else
+ wl = cleanword2(scw, sunicw, word, &nc, &captype, &abbv);
+ }
+
+ if (wl == 0) {
+ if (abbv) {
+ scw.clear();
+ for (wl = 0; wl < abbv; wl++)
+ scw.push_back('.');
+ abbv = 0;
+ } else
+ return 0;
+ }
+
+ std::string result;
+
+ size_t n = 0;
+ size_t n2 = 0;
+ size_t n3 = 0;
+
+ // test numbers
+ // LANG_hu section: set dash information for suggestions
+ if (langnum == LANG_hu) {
+ while ((n < wl) && (((scw[n] <= '9') && (scw[n] >= '0')) ||
+ (((scw[n] == '.') || (scw[n] == ',')) && (n > 0)))) {
+ n++;
+ if ((scw[n] == '.') || (scw[n] == ',')) {
+ if (((n2 == 0) && (n > 3)) ||
+ ((n2 > 0) && ((scw[n - 1] == '.') || (scw[n - 1] == ','))))
+ break;
+ n2++;
+ n3 = n;
+ }
+ }
+
+ if ((n == wl) && (n3 > 0) && (n - n3 > 3))
+ return 0;
+ if ((n == wl) || ((n > 0) && ((scw[n] == '%') || (scw[n] == '\xB0')) &&
+ checkword(scw.c_str() + n, NULL, NULL))) {
+ result.append(scw);
+ result.resize(n - 1);
+ if (n == wl)
+ cat_result(result, pSMgr->suggest_morph(scw.c_str() + n - 1));
+ else {
+ char sign = scw[n];
+ scw[n] = '\0';
+ cat_result(result, pSMgr->suggest_morph(scw.c_str() + n - 1));
+ result.push_back('+'); // XXX SPEC. MORPHCODE
+ scw[n] = sign;
+ cat_result(result, pSMgr->suggest_morph(scw.c_str() + n));
+ }
+ return line_tok(result.c_str(), slst, MSEP_REC);
+ }
+ }
+ // END OF LANG_hu section
+
+ switch (captype) {
+ case HUHCAP:
+ case HUHINITCAP:
+ case NOCAP: {
+ cat_result(result, pSMgr->suggest_morph(scw.c_str()));
+ if (abbv) {
+ std::string u8buffer(scw);
+ u8buffer.push_back('.');
+ cat_result(result, pSMgr->suggest_morph(u8buffer.c_str()));
+ }
+ break;
+ }
+ case INITCAP: {
+ wl = mkallsmall2(scw, sunicw);
+ std::string u8buffer(scw);
+ mkinitcap2(scw, sunicw);
+ cat_result(result, pSMgr->suggest_morph(u8buffer.c_str()));
+ cat_result(result, pSMgr->suggest_morph(scw.c_str()));
+ if (abbv) {
+ u8buffer.push_back('.');
+ cat_result(result, pSMgr->suggest_morph(u8buffer.c_str()));
+
+ u8buffer = scw;
+ u8buffer.push_back('.');
+
+ cat_result(result, pSMgr->suggest_morph(u8buffer.c_str()));
+ }
+ break;
+ }
+ case ALLCAP: {
+ cat_result(result, pSMgr->suggest_morph(scw.c_str()));
+ if (abbv) {
+ std::string u8buffer(scw);
+ u8buffer.push_back('.');
+ cat_result(result, pSMgr->suggest_morph(u8buffer.c_str()));
+ }
+ mkallsmall2(scw, sunicw);
+ std::string u8buffer(scw);
+ mkinitcap2(scw, sunicw);
+
+ cat_result(result, pSMgr->suggest_morph(u8buffer.c_str()));
+ cat_result(result, pSMgr->suggest_morph(scw.c_str()));
+ if (abbv) {
+ u8buffer.push_back('.');
+ cat_result(result, pSMgr->suggest_morph(u8buffer.c_str()));
+
+ u8buffer = scw;
+ u8buffer.push_back('.');
+
+ cat_result(result, pSMgr->suggest_morph(u8buffer.c_str()));
+ }
+ break;
+ }
+ }
+
+ if (!result.empty()) {
+ // word reversing wrapper for complex prefixes
+ if (complexprefixes) {
+ if (utf8)
+ reverseword_utf(result);
+ else
+ reverseword(result);
+ }
+ return line_tok(result.c_str(), slst, MSEP_REC);
+ }
+
+ // compound word with dash (HU) I18n
+ // LANG_hu section: set dash information for suggestions
+
+ size_t dash_pos = langnum == LANG_hu ? scw.find('-') : std::string::npos;
+ int nresult = 0;
+ if (dash_pos != std::string::npos) {
+ std::string part1 = scw.substr(0, dash_pos);
+ std::string part2 = scw.substr(dash_pos+1);
+
+ // examine 2 sides of the dash
+ if (part2.empty()) { // base word ending with dash
+ if (spell(part1.c_str())) {
+ char* p = pSMgr->suggest_morph(part1.c_str());
+ if (p) {
+ int ret = line_tok(p, slst, MSEP_REC);
+ free(p);
+ return ret;
+ }
+ }
+ } else if (part2.size() == 1 && part2[0] == 'e') { // XXX (HU) -e hat.
+ if (spell(part1.c_str()) && (spell("-e"))) {
+ char* st = pSMgr->suggest_morph(part1.c_str());
+ if (st) {
+ result.append(st);
+ free(st);
+ }
+ result.push_back('+'); // XXX spec. separator in MORPHCODE
+ st = pSMgr->suggest_morph("-e");
+ if (st) {
+ result.append(st);
+ free(st);
+ }
+ return line_tok(result.c_str(), slst, MSEP_REC);
+ }
+ } else {
+ // first word ending with dash: word- XXX ???
+ part1.push_back(' ');
+ nresult = spell(part1.c_str());
+ part1.erase(part1.size() - 1);
+ if (nresult && spell(part2.c_str()) &&
+ ((part2.size() > 1) || ((part2[0] > '0') && (part2[0] < '9')))) {
+ char* st = pSMgr->suggest_morph(part1.c_str());
+ if (st) {
+ result.append(st);
+ free(st);
+ result.push_back('+'); // XXX spec. separator in MORPHCODE
+ }
+ st = pSMgr->suggest_morph(part2.c_str());
+ if (st) {
+ result.append(st);
+ free(st);
+ }
+ return line_tok(result.c_str(), slst, MSEP_REC);
+ }
+ }
+ // affixed number in correct word
+ if (nresult && (dash_pos > 0) &&
+ (((scw[dash_pos - 1] <= '9') && (scw[dash_pos - 1] >= '0')) ||
+ (scw[dash_pos - 1] == '.'))) {
+ n = 1;
+ if (scw[dash_pos - n] == '.')
+ n++;
+ // search first not a number character to left from dash
+ while ((dash_pos >= n) && ((scw[dash_pos - n] == '0') || (n < 3)) &&
+ (n < 6)) {
+ n++;
+ }
+ if (dash_pos < n)
+ n--;
+ // numbers: valami1000000-hoz
+ // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,
+ // 56-hoz, 6-hoz
+ for (; n >= 1; n--) {
+ if (scw[dash_pos - n] < '0' || scw[dash_pos - n] > '9') {
+ continue;
+ }
+ std::string chunk = scw.substr(dash_pos - n);
+ if (checkword(chunk.c_str(), NULL, NULL)) {
+ result.append(chunk);
+ char* st = pSMgr->suggest_morph(chunk.c_str());
+ if (st) {
+ result.append(st);
+ free(st);
+ }
+ return line_tok(result.c_str(), slst, MSEP_REC);
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+int Hunspell::generate(char*** slst, const char* word, char** pl, int pln) {
+ *slst = NULL;
+ if (!pSMgr || !pln)
+ return 0;
+ char** pl2;
+ int pl2n = analyze(&pl2, word);
+ int captype = NOCAP;
+ int abbv = 0;
+ std::string cw;
+ cleanword(cw, word, &captype, &abbv);
+ std::string result;
+
+ for (int i = 0; i < pln; i++) {
+ cat_result(result, pSMgr->suggest_gen(pl2, pl2n, pl[i]));
+ }
+ freelist(&pl2, pl2n);
+
+ if (!result.empty()) {
+ // allcap
+ if (captype == ALLCAP)
+ mkallcap(result);
+
+ // line split
+ int linenum = line_tok(result.c_str(), slst, MSEP_REC);
+
+ // capitalize
+ if (captype == INITCAP || captype == HUHINITCAP) {
+ for (int j = 0; j < linenum; j++) {
+ std::string form((*slst)[j]);
+ free((*slst)[j]);
+ mkinitcap(form);
+ (*slst)[j] = mystrdup(form.c_str());
+ }
+ }
+
+ // temporary filtering of prefix related errors (eg.
+ // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks")
+
+ int r = 0;
+ for (int j = 0; j < linenum; j++) {
+ if (!spell((*slst)[j])) {
+ free((*slst)[j]);
+ (*slst)[j] = NULL;
+ } else {
+ if (r < j)
+ (*slst)[r] = (*slst)[j];
+ r++;
+ }
+ }
+ if (r > 0)
+ return r;
+ free(*slst);
+ *slst = NULL;
+ }
+ return 0;
+}
+
+int Hunspell::generate(char*** slst, const char* word, const char* pattern) {
+ char** pl;
+ int pln = analyze(&pl, pattern);
+ int n = generate(slst, word, pl, pln);
+ freelist(&pl, pln);
+ return uniqlist(*slst, n);
+}
+
+// minimal XML parser functions
+std::string Hunspell::get_xml_par(const char* par) {
+ std::string dest;
+ if (!par)
+ return dest;
+ char end = *par;
+ if (end == '>')
+ end = '<';
+ else if (end != '\'' && end != '"')
+ return 0; // bad XML
+ for (par++; *par != '\0' && *par != end; ++par) {
+ dest.push_back(*par);
+ }
+ mystrrep(dest, "&lt;", "<");
+ mystrrep(dest, "&amp;", "&");
+ return dest;
+}
+
+int Hunspell::get_langnum() const {
+ return langnum;
+}
+
+int Hunspell::input_conv(const char* word, char* dest, size_t destsize) {
+ RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
+ return (rl && rl->conv(word, dest, destsize) > 0);
+}
+
+// return the beginning of the element (attr == NULL) or the attribute
+const char* Hunspell::get_xml_pos(const char* s, const char* attr) {
+ const char* end = strchr(s, '>');
+ const char* p = s;
+ if (attr == NULL)
+ return end;
+ do {
+ p = strstr(p, attr);
+ if (!p || p >= end)
+ return 0;
+ } while (*(p - 1) != ' ' && *(p - 1) != '\n');
+ return p + strlen(attr);
+}
+
+int Hunspell::check_xml_par(const char* q,
+ const char* attr,
+ const char* value) {
+ std::string cw = get_xml_par(get_xml_pos(q, attr));
+ if (cw == value)
+ return 1;
+ return 0;
+}
+
+int Hunspell::get_xml_list(char*** slst, const char* list, const char* tag) {
+ if (!list)
+ return 0;
+ int n = 0;
+ const char* p;
+ for (p = list; ((p = strstr(p, tag)) != NULL); p++)
+ n++;
+ if (n == 0)
+ return 0;
+ *slst = (char**)malloc(sizeof(char*) * n);
+ if (!*slst)
+ return 0;
+ for (p = list, n = 0; ((p = strstr(p, tag)) != NULL); p++, n++) {
+ std::string cw = get_xml_par(p + strlen(tag) - 1);
+ if (cw.empty()) {
+ break;
+ }
+ (*slst)[n] = mystrdup(cw.c_str());
+ }
+ return n;
+}
+
+int Hunspell::spellml(char*** slst, const char* word) {
+ const char* q = strstr(word, "<query");
+ if (!q)
+ return 0; // bad XML input
+ const char* q2 = strchr(q, '>');
+ if (!q2)
+ return 0; // bad XML input
+ q2 = strstr(q2, "<word");
+ if (!q2)
+ return 0; // bad XML input
+ if (check_xml_par(q, "type=", "analyze")) {
+ int n = 0;
+ std::string cw = get_xml_par(strchr(q2, '>'));
+ if (!cw.empty())
+ n = analyze(slst, cw.c_str());
+ if (n == 0)
+ return 0;
+ // convert the result to <code><a>ana1</a><a>ana2</a></code> format
+ std::string r;
+ r.append("<code>");
+ for (int i = 0; i < n; i++) {
+ r.append("<a>");
+
+ std::string entry((*slst)[i]);
+ free((*slst)[i]);
+ mystrrep(entry, "\t", " ");
+ mystrrep(entry, "&", "&amp;");
+ mystrrep(entry, "<", "&lt;");
+ r.append(entry);
+
+ r.append("</a>");
+ }
+ r.append("</code>");
+ (*slst)[0] = mystrdup(r.c_str());
+ return 1;
+ } else if (check_xml_par(q, "type=", "stem")) {
+ std::string cw = get_xml_par(strchr(q2, '>'));
+ if (!cw.empty())
+ return stem(slst, cw.c_str());
+ } else if (check_xml_par(q, "type=", "generate")) {
+ std::string cw = get_xml_par(strchr(q2, '>'));
+ if (cw.empty())
+ return 0;
+ const char* q3 = strstr(q2 + 1, "<word");
+ if (q3) {
+ std::string cw2 = get_xml_par(strchr(q3, '>'));
+ if (!cw2.empty()) {
+ return generate(slst, cw.c_str(), cw2.c_str());
+ }
+ } else {
+ if ((q2 = strstr(q2 + 1, "<code")) != NULL) {
+ char** slst2;
+ int n = get_xml_list(&slst2, strchr(q2, '>'), "<a>");
+ if (n != 0) {
+ int n2 = generate(slst, cw.c_str(), slst2, n);
+ freelist(&slst2, n);
+ return uniqlist(*slst, n2);
+ }
+ freelist(&slst2, n);
+ }
+ }
+ }
+ return 0;
+}
+
+Hunhandle* Hunspell_create(const char* affpath, const char* dpath) {
+ return (Hunhandle*)(new Hunspell(affpath, dpath));
+}
+
+Hunhandle* Hunspell_create_key(const char* affpath,
+ const char* dpath,
+ const char* key) {
+ return (Hunhandle*)(new Hunspell(affpath, dpath, key));
+}
+
+void Hunspell_destroy(Hunhandle* pHunspell) {
+ delete (Hunspell*)(pHunspell);
+}
+
+int Hunspell_add_dic(Hunhandle* pHunspell, const char* dpath) {
+ return ((Hunspell*)pHunspell)->add_dic(dpath);
+}
+
+int Hunspell_spell(Hunhandle* pHunspell, const char* word) {
+ return ((Hunspell*)pHunspell)->spell(word);
+}
+
+char* Hunspell_get_dic_encoding(Hunhandle* pHunspell) {
+ return ((Hunspell*)pHunspell)->get_dic_encoding();
+}
+
+int Hunspell_suggest(Hunhandle* pHunspell, char*** slst, const char* word) {
+ return ((Hunspell*)pHunspell)->suggest(slst, word);
+}
+
+int Hunspell_analyze(Hunhandle* pHunspell, char*** slst, const char* word) {
+ return ((Hunspell*)pHunspell)->analyze(slst, word);
+}
+
+int Hunspell_stem(Hunhandle* pHunspell, char*** slst, const char* word) {
+ return ((Hunspell*)pHunspell)->stem(slst, word);
+}
+
+int Hunspell_stem2(Hunhandle* pHunspell, char*** slst, char** desc, int n) {
+ return ((Hunspell*)pHunspell)->stem(slst, desc, n);
+}
+
+int Hunspell_generate(Hunhandle* pHunspell,
+ char*** slst,
+ const char* word,
+ const char* word2) {
+ return ((Hunspell*)pHunspell)->generate(slst, word, word2);
+}
+
+int Hunspell_generate2(Hunhandle* pHunspell,
+ char*** slst,
+ const char* word,
+ char** desc,
+ int n) {
+ return ((Hunspell*)pHunspell)->generate(slst, word, desc, n);
+}
+
+/* functions for run-time modification of the dictionary */
+
+/* add word to the run-time dictionary */
+
+int Hunspell_add(Hunhandle* pHunspell, const char* word) {
+ return ((Hunspell*)pHunspell)->add(word);
+}
+
+/* add word to the run-time dictionary with affix flags of
+ * the example (a dictionary word): Hunspell will recognize
+ * affixed forms of the new word, too.
+ */
+
+int Hunspell_add_with_affix(Hunhandle* pHunspell,
+ const char* word,
+ const char* example) {
+ return ((Hunspell*)pHunspell)->add_with_affix(word, example);
+}
+
+/* remove word from the run-time dictionary */
+
+int Hunspell_remove(Hunhandle* pHunspell, const char* word) {
+ return ((Hunspell*)pHunspell)->remove(word);
+}
+
+void Hunspell_free_list(Hunhandle*, char*** slst, int n) {
+ freelist(slst, n);
+}
+
+int Hunspell::suffix_suggest(char*** slst, const char* root_word) {
+ struct hentry* he = NULL;
+ int len;
+ std::string w2;
+ const char* word;
+ char* ignoredchars = pAMgr->get_ignore();
+ if (ignoredchars != NULL) {
+ w2.assign(root_word);
+ if (utf8) {
+ const std::vector<w_char>& ignoredchars_utf16 =
+ pAMgr->get_ignore_utf16();
+ remove_ignored_chars_utf(w2, ignoredchars_utf16);
+ } else {
+ remove_ignored_chars(w2, ignoredchars);
+ }
+ word = w2.c_str();
+ } else
+ word = root_word;
+
+ len = strlen(word);
+
+ if (!len)
+ return 0;
+
+ char** wlst = (char**)malloc(MAXSUGGESTION * sizeof(char*));
+ if (wlst == NULL)
+ return -1;
+ *slst = wlst;
+ for (int i = 0; i < MAXSUGGESTION; i++) {
+ wlst[i] = NULL;
+ }
+
+ for (int i = 0; (i < maxdic) && !he; i++) {
+ he = (pHMgr[i])->lookup(word);
+ }
+ if (he) {
+ return pAMgr->get_suffix_words(he->astr, he->alen, root_word, *slst);
+ }
+ return 0;
+}
diff --git a/extensions/spellcheck/hunspell/src/hunspell.h b/extensions/spellcheck/hunspell/src/hunspell.h
new file mode 100644
index 000000000..726bbe207
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/hunspell.h
@@ -0,0 +1,162 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Hunspell, based on MySpell.
+ *
+ * The Initial Developers of the Original Code are
+ * Kevin Hendricks (MySpell) and Németh László (Hunspell).
+ * Portions created by the Initial Developers are Copyright (C) 2002-2005
+ * the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#ifndef _MYSPELLMGR_H_
+#define _MYSPELLMGR_H_
+
+#include "hunvisapi.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct Hunhandle Hunhandle;
+
+LIBHUNSPELL_DLL_EXPORTED Hunhandle* Hunspell_create(const char* affpath,
+ const char* dpath);
+
+LIBHUNSPELL_DLL_EXPORTED Hunhandle* Hunspell_create_key(const char* affpath,
+ const char* dpath,
+ const char* key);
+
+LIBHUNSPELL_DLL_EXPORTED void Hunspell_destroy(Hunhandle* pHunspell);
+
+/* load extra dictionaries (only dic files)
+ * output: 0 = additional dictionary slots available, 1 = slots are now full*/
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_add_dic(Hunhandle* pHunspell,
+ const char* dpath);
+
+/* spell(word) - spellcheck word
+ * output: 0 = bad word, not 0 = good word
+ */
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_spell(Hunhandle* pHunspell, const char*);
+
+LIBHUNSPELL_DLL_EXPORTED char* Hunspell_get_dic_encoding(Hunhandle* pHunspell);
+
+/* suggest(suggestions, word) - search suggestions
+ * input: pointer to an array of strings pointer and the (bad) word
+ * array of strings pointer (here *slst) may not be initialized
+ * output: number of suggestions in string array, and suggestions in
+ * a newly allocated array of strings (*slts will be NULL when number
+ * of suggestion equals 0.)
+ */
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_suggest(Hunhandle* pHunspell,
+ char*** slst,
+ const char* word);
+
+/* morphological functions */
+
+/* analyze(result, word) - morphological analysis of the word */
+
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_analyze(Hunhandle* pHunspell,
+ char*** slst,
+ const char* word);
+
+/* stem(result, word) - stemmer function */
+
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem(Hunhandle* pHunspell,
+ char*** slst,
+ const char* word);
+
+/* stem(result, analysis, n) - get stems from a morph. analysis
+ * example:
+ * char ** result, result2;
+ * int n1 = Hunspell_analyze(result, "words");
+ * int n2 = Hunspell_stem2(result2, result, n1);
+ */
+
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem2(Hunhandle* pHunspell,
+ char*** slst,
+ char** desc,
+ int n);
+
+/* generate(result, word, word2) - morphological generation by example(s) */
+
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate(Hunhandle* pHunspell,
+ char*** slst,
+ const char* word,
+ const char* word2);
+
+/* generate(result, word, desc, n) - generation by morph. description(s)
+ * example:
+ * char ** result;
+ * char * affix = "is:plural"; // description depends from dictionaries, too
+ * int n = Hunspell_generate2(result, "word", &affix, 1);
+ * for (int i = 0; i < n; i++) printf("%s\n", result[i]);
+ */
+
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate2(Hunhandle* pHunspell,
+ char*** slst,
+ const char* word,
+ char** desc,
+ int n);
+
+/* functions for run-time modification of the dictionary */
+
+/* add word to the run-time dictionary */
+
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_add(Hunhandle* pHunspell,
+ const char* word);
+
+/* add word to the run-time dictionary with affix flags of
+ * the example (a dictionary word): Hunspell will recognize
+ * affixed forms of the new word, too.
+ */
+
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_add_with_affix(Hunhandle* pHunspell,
+ const char* word,
+ const char* example);
+
+/* remove word from the run-time dictionary */
+
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_remove(Hunhandle* pHunspell,
+ const char* word);
+
+/* free suggestion lists */
+
+LIBHUNSPELL_DLL_EXPORTED void Hunspell_free_list(Hunhandle* pHunspell,
+ char*** slst,
+ int n);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/extensions/spellcheck/hunspell/src/hunspell.hxx b/extensions/spellcheck/hunspell/src/hunspell.hxx
new file mode 100644
index 000000000..401475309
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/hunspell.hxx
@@ -0,0 +1,258 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Hunspell, based on MySpell.
+ *
+ * The Initial Developers of the Original Code are
+ * Kevin Hendricks (MySpell) and Németh László (Hunspell).
+ * Portions created by the Initial Developers are Copyright (C) 2002-2005
+ * the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "hunvisapi.h"
+
+#include "hashmgr.hxx"
+#include "affixmgr.hxx"
+#include "suggestmgr.hxx"
+#include "langnum.hxx"
+#include <vector>
+
+#define SPELL_XML "<?xml?>"
+
+#define MAXDIC 20
+#define MAXSUGGESTION 15
+#define MAXSHARPS 5
+
+#define HUNSPELL_OK (1 << 0)
+#define HUNSPELL_OK_WARN (1 << 1)
+
+#ifndef _MYSPELLMGR_HXX_
+#define _MYSPELLMGR_HXX_
+
+class LIBHUNSPELL_DLL_EXPORTED Hunspell {
+ private:
+ Hunspell(const Hunspell&);
+ Hunspell& operator=(const Hunspell&);
+
+ private:
+ AffixMgr* pAMgr;
+ HashMgr* pHMgr[MAXDIC];
+ int maxdic;
+ SuggestMgr* pSMgr;
+ char* affixpath;
+ char* encoding;
+ struct cs_info* csconv;
+ int langnum;
+ int utf8;
+ int complexprefixes;
+ char** wordbreak;
+
+ public:
+ /* Hunspell(aff, dic) - constructor of Hunspell class
+ * input: path of affix file and dictionary file
+ *
+ * In WIN32 environment, use UTF-8 encoded paths started with the long path
+ * prefix \\\\?\\ to handle system-independent character encoding and very
+ * long path names (without the long path prefix Hunspell will use fopen()
+ * with system-dependent character encoding instead of _wfopen()).
+ */
+
+ Hunspell(const char* affpath, const char* dpath, const char* key = NULL);
+ ~Hunspell();
+
+ /* load extra dictionaries (only dic files) */
+ int add_dic(const char* dpath, const char* key = NULL);
+
+ /* spell(word) - spellcheck word
+ * output: 0 = bad word, not 0 = good word
+ *
+ * plus output:
+ * info: information bit array, fields:
+ * SPELL_COMPOUND = a compound word
+ * SPELL_FORBIDDEN = an explicit forbidden word
+ * root: root (stem), when input is a word with affix(es)
+ */
+
+ int spell(const char* word, int* info = NULL, char** root = NULL);
+
+ /* suggest(suggestions, word) - search suggestions
+ * input: pointer to an array of strings pointer and the (bad) word
+ * array of strings pointer (here *slst) may not be initialized
+ * output: number of suggestions in string array, and suggestions in
+ * a newly allocated array of strings (*slts will be NULL when number
+ * of suggestion equals 0.)
+ */
+
+ int suggest(char*** slst, const char* word);
+
+ /* Suggest words from suffix rules
+ * suffix_suggest(suggestions, root_word)
+ * input: pointer to an array of strings pointer and the word
+ * array of strings pointer (here *slst) may not be initialized
+ * output: number of suggestions in string array, and suggestions in
+ * a newly allocated array of strings (*slts will be NULL when number
+ * of suggestion equals 0.)
+ */
+ int suffix_suggest(char*** slst, const char* root_word);
+
+ /* deallocate suggestion lists */
+
+ void free_list(char*** slst, int n);
+
+ char* get_dic_encoding();
+
+ /* morphological functions */
+
+ /* analyze(result, word) - morphological analysis of the word */
+
+ int analyze(char*** slst, const char* word);
+
+ /* stem(result, word) - stemmer function */
+
+ int stem(char*** slst, const char* word);
+
+ /* stem(result, analysis, n) - get stems from a morph. analysis
+ * example:
+ * char ** result, result2;
+ * int n1 = analyze(&result, "words");
+ * int n2 = stem(&result2, result, n1);
+ */
+
+ int stem(char*** slst, char** morph, int n);
+
+ /* generate(result, word, word2) - morphological generation by example(s) */
+
+ int generate(char*** slst, const char* word, const char* word2);
+
+ /* generate(result, word, desc, n) - generation by morph. description(s)
+ * example:
+ * char ** result;
+ * char * affix = "is:plural"; // description depends from dictionaries, too
+ * int n = generate(&result, "word", &affix, 1);
+ * for (int i = 0; i < n; i++) printf("%s\n", result[i]);
+ */
+
+ int generate(char*** slst, const char* word, char** desc, int n);
+
+ /* functions for run-time modification of the dictionary */
+
+ /* add word to the run-time dictionary */
+
+ int add(const char* word);
+
+ /* add word to the run-time dictionary with affix flags of
+ * the example (a dictionary word): Hunspell will recognize
+ * affixed forms of the new word, too.
+ */
+
+ int add_with_affix(const char* word, const char* example);
+
+ /* remove word from the run-time dictionary */
+
+ int remove(const char* word);
+
+ /* other */
+
+ /* get extra word characters definied in affix file for tokenization */
+ const char* get_wordchars();
+ const std::vector<w_char>& get_wordchars_utf16();
+
+ struct cs_info* get_csconv();
+ const char* get_version();
+
+ int get_langnum() const;
+
+ /* need for putdic */
+ int input_conv(const char* word, char* dest, size_t destsize);
+
+ private:
+ void cleanword(std::string& dest, const char*, int* pcaptype, int* pabbrev);
+ size_t cleanword2(std::string& dest,
+ std::vector<w_char>& dest_u,
+ const char*,
+ int* w_len,
+ int* pcaptype,
+ size_t* pabbrev);
+ void mkinitcap(std::string& u8);
+ int mkinitcap2(std::string& u8, std::vector<w_char>& u16);
+ int mkinitsmall2(std::string& u8, std::vector<w_char>& u16);
+ void mkallcap(std::string& u8);
+ int mkallsmall2(std::string& u8, std::vector<w_char>& u16);
+ struct hentry* checkword(const char*, int* info, char** root);
+ std::string sharps_u8_l1(const std::string& source);
+ hentry*
+ spellsharps(std::string& base, size_t start_pos, int, int, int* info, char** root);
+ int is_keepcase(const hentry* rv);
+ int insert_sug(char*** slst, const char* word, int ns);
+ void cat_result(std::string& result, char* st);
+ char* stem_description(const char* desc);
+ int spellml(char*** slst, const char* word);
+ std::string get_xml_par(const char* par);
+ const char* get_xml_pos(const char* s, const char* attr);
+ int get_xml_list(char*** slst, const char* list, const char* tag);
+ int check_xml_par(const char* q, const char* attr, const char* value);
+};
+
+#endif
diff --git a/extensions/spellcheck/hunspell/src/hunvisapi.h b/extensions/spellcheck/hunspell/src/hunvisapi.h
new file mode 100644
index 000000000..503c20f66
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/hunvisapi.h
@@ -0,0 +1,18 @@
+#ifndef _HUNSPELL_VISIBILITY_H_
+#define _HUNSPELL_VISIBILITY_H_
+
+#if defined(HUNSPELL_STATIC)
+# define LIBHUNSPELL_DLL_EXPORTED
+#elif defined(_MSC_VER)
+# if defined(BUILDING_LIBHUNSPELL)
+# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllexport)
+# else
+# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllimport)
+# endif
+#elif defined(BUILDING_LIBHUNSPELL) && 1
+# define LIBHUNSPELL_DLL_EXPORTED __attribute__((__visibility__("default")))
+#else
+# define LIBHUNSPELL_DLL_EXPORTED
+#endif
+
+#endif
diff --git a/extensions/spellcheck/hunspell/src/hunzip.cxx b/extensions/spellcheck/hunspell/src/hunzip.cxx
new file mode 100644
index 000000000..b2788a105
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/hunzip.cxx
@@ -0,0 +1,263 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Hunspell, based on MySpell.
+ *
+ * The Initial Developers of the Original Code are
+ * Kevin Hendricks (MySpell) and Németh László (Hunspell).
+ * Portions created by the Initial Developers are Copyright (C) 2002-2005
+ * the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "hunzip.hxx"
+#include "csutil.hxx"
+
+#define CODELEN 65536
+#define BASEBITREC 5000
+
+#define UNCOMPRESSED '\002'
+#define MAGIC "hz0"
+#define MAGIC_ENCRYPT "hz1"
+#define MAGICLEN (sizeof(MAGIC) - 1)
+
+int Hunzip::fail(const char* err, const char* par) {
+ fprintf(stderr, err, par);
+ return -1;
+}
+
+Hunzip::Hunzip(const char* file, const char* key)
+ : fin(NULL), bufsiz(0), lastbit(0), inc(0), inbits(0), outc(0), dec(NULL) {
+ in[0] = out[0] = line[0] = '\0';
+ filename = mystrdup(file);
+ if (getcode(key) == -1)
+ bufsiz = -1;
+ else
+ bufsiz = getbuf();
+}
+
+int Hunzip::getcode(const char* key) {
+ unsigned char c[2];
+ int i, j, n, p;
+ int allocatedbit = BASEBITREC;
+ const char* enc = key;
+
+ if (!filename)
+ return -1;
+
+ fin = myfopen(filename, "rb");
+ if (!fin)
+ return -1;
+
+ // read magic number
+ if ((fread(in, 1, 3, fin) < MAGICLEN) ||
+ !(strncmp(MAGIC, in, MAGICLEN) == 0 ||
+ strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0)) {
+ return fail(MSG_FORMAT, filename);
+ }
+
+ // check encryption
+ if (strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0) {
+ unsigned char cs;
+ if (!key)
+ return fail(MSG_KEY, filename);
+ if (fread(&c, 1, 1, fin) < 1)
+ return fail(MSG_FORMAT, filename);
+ for (cs = 0; *enc; enc++)
+ cs ^= *enc;
+ if (cs != c[0])
+ return fail(MSG_KEY, filename);
+ enc = key;
+ } else
+ key = NULL;
+
+ // read record count
+ if (fread(&c, 1, 2, fin) < 2)
+ return fail(MSG_FORMAT, filename);
+
+ if (key) {
+ c[0] ^= *enc;
+ if (*(++enc) == '\0')
+ enc = key;
+ c[1] ^= *enc;
+ }
+
+ n = ((int)c[0] << 8) + c[1];
+ dec = (struct bit*)malloc(BASEBITREC * sizeof(struct bit));
+ if (!dec)
+ return fail(MSG_MEMORY, filename);
+ dec[0].v[0] = 0;
+ dec[0].v[1] = 0;
+
+ // read codes
+ for (i = 0; i < n; i++) {
+ unsigned char l;
+ if (fread(c, 1, 2, fin) < 2)
+ return fail(MSG_FORMAT, filename);
+ if (key) {
+ if (*(++enc) == '\0')
+ enc = key;
+ c[0] ^= *enc;
+ if (*(++enc) == '\0')
+ enc = key;
+ c[1] ^= *enc;
+ }
+ if (fread(&l, 1, 1, fin) < 1)
+ return fail(MSG_FORMAT, filename);
+ if (key) {
+ if (*(++enc) == '\0')
+ enc = key;
+ l ^= *enc;
+ }
+ if (fread(in, 1, l / 8 + 1, fin) < (size_t)l / 8 + 1)
+ return fail(MSG_FORMAT, filename);
+ if (key)
+ for (j = 0; j <= l / 8; j++) {
+ if (*(++enc) == '\0')
+ enc = key;
+ in[j] ^= *enc;
+ }
+ p = 0;
+ for (j = 0; j < l; j++) {
+ int b = (in[j / 8] & (1 << (7 - (j % 8)))) ? 1 : 0;
+ int oldp = p;
+ p = dec[p].v[b];
+ if (p == 0) {
+ lastbit++;
+ if (lastbit == allocatedbit) {
+ allocatedbit += BASEBITREC;
+ dec = (struct bit*)realloc(dec, allocatedbit * sizeof(struct bit));
+ }
+ dec[lastbit].v[0] = 0;
+ dec[lastbit].v[1] = 0;
+ dec[oldp].v[b] = lastbit;
+ p = lastbit;
+ }
+ }
+ dec[p].c[0] = c[0];
+ dec[p].c[1] = c[1];
+ }
+ return 0;
+}
+
+Hunzip::~Hunzip() {
+ if (dec)
+ free(dec);
+ if (fin)
+ fclose(fin);
+ if (filename)
+ free(filename);
+}
+
+int Hunzip::getbuf() {
+ int p = 0;
+ int o = 0;
+ do {
+ if (inc == 0)
+ inbits = fread(in, 1, BUFSIZE, fin) * 8;
+ for (; inc < inbits; inc++) {
+ int b = (in[inc / 8] & (1 << (7 - (inc % 8)))) ? 1 : 0;
+ int oldp = p;
+ p = dec[p].v[b];
+ if (p == 0) {
+ if (oldp == lastbit) {
+ fclose(fin);
+ fin = NULL;
+ // add last odd byte
+ if (dec[lastbit].c[0])
+ out[o++] = dec[lastbit].c[1];
+ return o;
+ }
+ out[o++] = dec[oldp].c[0];
+ out[o++] = dec[oldp].c[1];
+ if (o == BUFSIZE)
+ return o;
+ p = dec[p].v[b];
+ }
+ }
+ inc = 0;
+ } while (inbits == BUFSIZE * 8);
+ return fail(MSG_FORMAT, filename);
+}
+
+const char* Hunzip::getline() {
+ char linebuf[BUFSIZE];
+ int l = 0, eol = 0, left = 0, right = 0;
+ if (bufsiz == -1)
+ return NULL;
+ while (l < bufsiz && !eol) {
+ linebuf[l++] = out[outc];
+ switch (out[outc]) {
+ case '\t':
+ break;
+ case 31: { // escape
+ if (++outc == bufsiz) {
+ bufsiz = getbuf();
+ outc = 0;
+ }
+ linebuf[l - 1] = out[outc];
+ break;
+ }
+ case ' ':
+ break;
+ default:
+ if (((unsigned char)out[outc]) < 47) {
+ if (out[outc] > 32) {
+ right = out[outc] - 31;
+ if (++outc == bufsiz) {
+ bufsiz = getbuf();
+ outc = 0;
+ }
+ }
+ if (out[outc] == 30)
+ left = 9;
+ else
+ left = out[outc];
+ linebuf[l - 1] = '\n';
+ eol = 1;
+ }
+ }
+ if (++outc == bufsiz) {
+ outc = 0;
+ bufsiz = fin ? getbuf() : -1;
+ }
+ }
+ if (right)
+ strcpy(linebuf + l - 1, line + strlen(line) - right - 1);
+ else
+ linebuf[l] = '\0';
+ strcpy(line + left, linebuf);
+ return line;
+}
diff --git a/extensions/spellcheck/hunspell/src/hunzip.hxx b/extensions/spellcheck/hunspell/src/hunzip.hxx
new file mode 100644
index 000000000..5082adddb
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/hunzip.hxx
@@ -0,0 +1,87 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Hunspell, based on MySpell.
+ *
+ * The Initial Developers of the Original Code are
+ * Kevin Hendricks (MySpell) and Németh László (Hunspell).
+ * Portions created by the Initial Developers are Copyright (C) 2002-2005
+ * the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+/* hunzip: file decompression for sorted dictionaries with optional encryption,
+ * algorithm: prefix-suffix encoding and 16-bit Huffman encoding */
+
+#ifndef _HUNZIP_HXX_
+#define _HUNZIP_HXX_
+
+#include "hunvisapi.h"
+
+#include <stdio.h>
+
+#define BUFSIZE 65536
+#define HZIP_EXTENSION ".hz"
+
+#define MSG_OPEN "error: %s: cannot open\n"
+#define MSG_FORMAT "error: %s: not in hzip format\n"
+#define MSG_MEMORY "error: %s: missing memory\n"
+#define MSG_KEY "error: %s: missing or bad password\n"
+
+struct bit {
+ unsigned char c[2];
+ int v[2];
+};
+
+class LIBHUNSPELL_DLL_EXPORTED Hunzip {
+ private:
+ Hunzip(const Hunzip&);
+ Hunzip& operator=(const Hunzip&);
+
+ protected:
+ char* filename;
+ FILE* fin;
+ int bufsiz, lastbit, inc, inbits, outc;
+ struct bit* dec; // code table
+ char in[BUFSIZE]; // input buffer
+ char out[BUFSIZE + 1]; // Huffman-decoded buffer
+ char line[BUFSIZE + 50]; // decoded line
+ int getcode(const char* key);
+ int getbuf();
+ int fail(const char* err, const char* par);
+
+ public:
+ Hunzip(const char* filename, const char* key = NULL);
+ ~Hunzip();
+ const char* getline();
+};
+
+#endif
diff --git a/extensions/spellcheck/hunspell/src/langnum.hxx b/extensions/spellcheck/hunspell/src/langnum.hxx
new file mode 100644
index 000000000..af5c86e4f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/langnum.hxx
@@ -0,0 +1,78 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Hunspell, based on MySpell.
+ *
+ * The Initial Developers of the Original Code are
+ * Kevin Hendricks (MySpell) and Németh László (Hunspell).
+ * Portions created by the Initial Developers are Copyright (C) 2002-2005
+ * the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#ifndef _LANGNUM_HXX_
+#define _LANGNUM_HXX_
+
+/*
+ language numbers for language specific codes
+ see http://l10n.openoffice.org/languages.html
+*/
+
+enum {
+ LANG_ar = 96,
+ LANG_az = 100, // custom number
+ LANG_bg = 41,
+ LANG_ca = 37,
+ LANG_cs = 42,
+ LANG_da = 45,
+ LANG_de = 49,
+ LANG_el = 30,
+ LANG_en = 01,
+ LANG_es = 34,
+ LANG_eu = 10,
+ LANG_fr = 02,
+ LANG_gl = 38,
+ LANG_hr = 78,
+ LANG_hu = 36,
+ LANG_it = 39,
+ LANG_la = 99, // custom number
+ LANG_lv = 101, // custom number
+ LANG_nl = 31,
+ LANG_pl = 48,
+ LANG_pt = 03,
+ LANG_ru = 07,
+ LANG_sv = 50,
+ LANG_tr = 90,
+ LANG_uk = 80,
+ LANG_xx = 999
+};
+
+#endif
diff --git a/extensions/spellcheck/hunspell/src/license.hunspell b/extensions/spellcheck/hunspell/src/license.hunspell
new file mode 100644
index 000000000..dc2ce9c1e
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/license.hunspell
@@ -0,0 +1,61 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Hunspell, based on MySpell.
+ *
+ * The Initial Developers of the Original Code are
+ * Kevin Hendricks (MySpell) and Laszlo Nemeth (Hunspell).
+ * Portions created by the Initial Developers are Copyright (C) 2002-2005
+ * the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s):
+ * David Einstein
+ * Davide Prina
+ * Giuseppe Modugno
+ * Gianluca Turconi
+ * Simon Brouwer
+ * Noll Janos
+ * Biro Arpad
+ * Goldman Eleonora
+ * Sarlos Tamas
+ * Bencsath Boldizsar
+ * Halacsy Peter
+ * Dvornik Laszlo
+ * Gefferth Andras
+ * Nagy Viktor
+ * Varga Daniel
+ * Chris Halls
+ * Rene Engelhard
+ * Bram Moolenaar
+ * Dafydd Jones
+ * Harri Pitkanen
+ * Andras Timar
+ * Tor Lillqvist
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#ifndef MOZILLA_CLIENT
+# include "config.h"
+#endif
diff --git a/extensions/spellcheck/hunspell/src/license.myspell b/extensions/spellcheck/hunspell/src/license.myspell
new file mode 100644
index 000000000..2da533075
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/license.myspell
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *
+ * NOTE: A special thanks and credit goes to Geoff Kuenning
+ * the creator of ispell. MySpell's affix algorithms were
+ * based on those of ispell which should be noted is
+ * copyright Geoff Kuenning et.al. and now available
+ * under a BSD style license. For more information on ispell
+ * and affix compression in general, please see:
+ * http://www.cs.ucla.edu/ficus-members/geoff/ispell.html
+ * (the home page for ispell)
+ *
+ * An almost complete rewrite of MySpell for use by
+ * the Mozilla project has been developed by David Einstein
+ * (Deinst@world.std.com). David and I are now
+ * working on parallel development tracks to help
+ * our respective projects (Mozilla and OpenOffice.org
+ * and we will maintain full affix file and dictionary
+ * file compatibility and work on merging our versions
+ * of MySpell back into a single tree. David has been
+ * a significant help in improving MySpell.
+ *
+ * Special thanks also go to La'szlo' Ne'meth
+ * <nemethl@gyorsposta.hu> who is the author of the
+ * Hungarian dictionary and who developed and contributed
+ * the code to support compound words in MySpell
+ * and fixed numerous problems with the encoding
+ * case conversion tables.
+ *
+ */
diff --git a/extensions/spellcheck/hunspell/src/moz.build b/extensions/spellcheck/hunspell/src/moz.build
new file mode 100644
index 000000000..017264643
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/moz.build
@@ -0,0 +1,38 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+UNIFIED_SOURCES += [
+ 'affentry.cxx',
+ 'affixmgr.cxx',
+ 'csutil.cxx',
+ 'filemgr.cxx',
+ 'hashmgr.cxx',
+ 'hunspell.cxx',
+ 'hunzip.cxx',
+ 'phonet.cxx',
+ 'replist.cxx',
+ 'suggestmgr.cxx',
+]
+
+# This variable is referenced in configure.in. Make sure to change that file
+# too if you need to change this variable.
+DEFINES['HUNSPELL_STATIC'] = True
+
+FINAL_LIBRARY = 'xul'
+
+LOCAL_INCLUDES += [
+ '../glue',
+]
+
+# We allow warnings for third-party code that can be updated from upstream.
+ALLOW_COMPILER_WARNINGS = True
+
+include('/ipc/chromium/chromium-config.mozbuild')
+
+if CONFIG['CLANG_CXX'] or CONFIG['CLANG_CL']:
+ CXXFLAGS += [
+ '-Wno-implicit-fallthrough',
+ ]
diff --git a/extensions/spellcheck/hunspell/src/patches/1322666 b/extensions/spellcheck/hunspell/src/patches/1322666
new file mode 100644
index 000000000..16db1fbe6
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/patches/1322666
@@ -0,0 +1,24 @@
+Bug 1322666 - Change MAXWORDLEN to 100
+
+diff --git a/extensions/spellcheck/hunspell/src/hunspell.cxx b/extensions/spellcheck/hunspell/src/hunspell.cxx
+--- a/extensions/spellcheck/hunspell/src/hunspell.cxx
++++ b/extensions/spellcheck/hunspell/src/hunspell.cxx
+@@ -80,17 +80,17 @@
+ #ifndef MOZILLA_CLIENT
+ #include "config.h"
+ #endif
+ #include "csutil.hxx"
+
+ #include <limits>
+ #include <string>
+
+-#define MAXWORDLEN 176
++#define MAXWORDLEN 100
+ #define MAXWORDUTF8LEN (MAXWORDLEN * 3)
+
+ Hunspell::Hunspell(const char* affpath, const char* dpath, const char* key) {
+ encoding = NULL;
+ csconv = NULL;
+ utf8 = 0;
+ complexprefixes = 0;
+ affixpath = mystrdup(affpath);
diff --git a/extensions/spellcheck/hunspell/src/phonet.cxx b/extensions/spellcheck/hunspell/src/phonet.cxx
new file mode 100644
index 000000000..17350e74a
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/phonet.cxx
@@ -0,0 +1,274 @@
+/* phonetic.c - generic replacement aglogithms for phonetic transformation
+ Copyright (C) 2000 Bjoern Jacke
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation;
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; If not, see
+ <http://www.gnu.org/licenses/>.
+
+ Changelog:
+
+ 2000-01-05 Bjoern Jacke <bjoern at j3e.de>
+ Initial Release insprired by the article about phonetic
+ transformations out of c't 25/1999
+
+ 2007-07-26 Bjoern Jacke <bjoern at j3e.de>
+ Released under MPL/GPL/LGPL tri-license for Hunspell
+
+ 2007-08-23 Laszlo Nemeth <nemeth at OOo>
+ Porting from Aspell to Hunspell using C-like structs
+*/
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+
+#include "csutil.hxx"
+#include "phonet.hxx"
+
+void init_phonet_hash(phonetable& parms) {
+ int i, k;
+
+ for (i = 0; i < HASHSIZE; i++) {
+ parms.hash[i] = -1;
+ }
+
+ for (i = 0; parms.rules[i][0] != '\0'; i += 2) {
+ /** set hash value **/
+ k = (unsigned char)parms.rules[i][0];
+
+ if (parms.hash[k] < 0) {
+ parms.hash[k] = i;
+ }
+ }
+}
+
+// like strcpy but safe if the strings overlap
+// but only if dest < src
+static inline void strmove(char* dest, char* src) {
+ while (*src)
+ *dest++ = *src++;
+ *dest = '\0';
+}
+
+static int myisalpha(char ch) {
+ if ((unsigned char)ch < 128)
+ return isalpha(ch);
+ return 1;
+}
+
+/* Do phonetic transformation. */
+/* phonetic transcription algorithm */
+/* see: http://aspell.net/man-html/Phonetic-Code.html */
+/* convert string to uppercase before this call */
+std::string phonet(const std::string& inword, phonetable& parms) {
+
+ int i, k = 0, p, z;
+ int k0, n0, p0 = -333, z0;
+ char c;
+ const char* s;
+ typedef unsigned char uchar;
+
+ size_t len = inword.size();
+ if (len > MAXPHONETUTF8LEN)
+ return std::string();
+ char word[MAXPHONETUTF8LEN + 1];
+ strncpy(word, inword.c_str(), MAXPHONETUTF8LEN);
+ word[MAXPHONETUTF8LEN] = '\0';
+
+ std::string target;
+ /** check word **/
+ i = z = 0;
+ while ((c = word[i]) != '\0') {
+ int n = parms.hash[(uchar)c];
+ z0 = 0;
+
+ if (n >= 0) {
+ /** check all rules for the same letter **/
+ while (parms.rules[n][0] == c) {
+ /** check whole string **/
+ k = 1; /** number of found letters **/
+ p = 5; /** default priority **/
+ s = parms.rules[n];
+ s++; /** important for (see below) "*(s-1)" **/
+
+ while (*s != '\0' && word[i + k] == *s && !isdigit((unsigned char)*s) &&
+ strchr("(-<^$", *s) == NULL) {
+ k++;
+ s++;
+ }
+ if (*s == '(') {
+ /** check letters in "(..)" **/
+ if (myisalpha(word[i + k]) // ...could be implied?
+ && strchr(s + 1, word[i + k]) != NULL) {
+ k++;
+ while (*s != ')')
+ s++;
+ s++;
+ }
+ }
+ p0 = (int)*s;
+ k0 = k;
+ while (*s == '-' && k > 1) {
+ k--;
+ s++;
+ }
+ if (*s == '<')
+ s++;
+ if (isdigit((unsigned char)*s)) {
+ /** determine priority **/
+ p = *s - '0';
+ s++;
+ }
+ if (*s == '^' && *(s + 1) == '^')
+ s++;
+
+ if (*s == '\0' || (*s == '^' && (i == 0 || !myisalpha(word[i - 1])) &&
+ (*(s + 1) != '$' || (!myisalpha(word[i + k0])))) ||
+ (*s == '$' && i > 0 && myisalpha(word[i - 1]) &&
+ (!myisalpha(word[i + k0])))) {
+ /** search for followup rules, if: **/
+ /** parms.followup and k > 1 and NO '-' in searchstring **/
+ char c0 = word[i + k - 1];
+ n0 = parms.hash[(uchar)c0];
+
+ // if (parms.followup && k > 1 && n0 >= 0
+ if (k > 1 && n0 >= 0 && p0 != (int)'-' && word[i + k] != '\0') {
+ /** test follow-up rule for "word[i+k]" **/
+ while (parms.rules[n0][0] == c0) {
+ /** check whole string **/
+ k0 = k;
+ p0 = 5;
+ s = parms.rules[n0];
+ s++;
+ while (*s != '\0' && word[i + k0] == *s &&
+ !isdigit((unsigned char)*s) &&
+ strchr("(-<^$", *s) == NULL) {
+ k0++;
+ s++;
+ }
+ if (*s == '(') {
+ /** check letters **/
+ if (myisalpha(word[i + k0]) &&
+ strchr(s + 1, word[i + k0]) != NULL) {
+ k0++;
+ while (*s != ')' && *s != '\0')
+ s++;
+ if (*s == ')')
+ s++;
+ }
+ }
+ while (*s == '-') {
+ /** "k0" gets NOT reduced **/
+ /** because "if (k0 == k)" **/
+ s++;
+ }
+ if (*s == '<')
+ s++;
+ if (isdigit((unsigned char)*s)) {
+ p0 = *s - '0';
+ s++;
+ }
+
+ if (*s == '\0'
+ /** *s == '^' cuts **/
+ || (*s == '$' && !myisalpha(word[i + k0]))) {
+ if (k0 == k) {
+ /** this is just a piece of the string **/
+ n0 += 2;
+ continue;
+ }
+
+ if (p0 < p) {
+ /** priority too low **/
+ n0 += 2;
+ continue;
+ }
+ /** rule fits; stop search **/
+ break;
+ }
+ n0 += 2;
+ } /** End of "while (parms.rules[n0][0] == c0)" **/
+
+ if (p0 >= p && parms.rules[n0][0] == c0) {
+ n += 2;
+ continue;
+ }
+ } /** end of follow-up stuff **/
+
+ /** replace string **/
+ s = parms.rules[n + 1];
+ p0 = (parms.rules[n][0] != '\0' &&
+ strchr(parms.rules[n] + 1, '<') != NULL)
+ ? 1
+ : 0;
+ if (p0 == 1 && z == 0) {
+ /** rule with '<' is used **/
+ if (!target.empty() && *s != '\0' &&
+ (target[target.size()-1] == c || target[target.size()-1] == *s)) {
+ target.erase(target.size() - 1);
+ }
+ z0 = 1;
+ z = 1;
+ k0 = 0;
+ while (*s != '\0' && word[i + k0] != '\0') {
+ word[i + k0] = *s;
+ k0++;
+ s++;
+ }
+ if (k > k0)
+ strmove(&word[0] + i + k0, &word[0] + i + k);
+
+ /** new "actual letter" **/
+ c = word[i];
+ } else { /** no '<' rule used **/
+ i += k - 1;
+ z = 0;
+ while (*s != '\0' && *(s + 1) != '\0' && target.size() < len) {
+ if (target.empty() || target[target.size()-1] != *s) {
+ target.push_back(*s);
+ }
+ s++;
+ }
+ /** new "actual letter" **/
+ c = *s;
+ if (parms.rules[n][0] != '\0' &&
+ strstr(parms.rules[n] + 1, "^^") != NULL) {
+ if (c != '\0') {
+ target.push_back(c);
+ }
+ strmove(&word[0], &word[0] + i + 1);
+ i = 0;
+ z0 = 1;
+ }
+ }
+ break;
+ } /** end of follow-up stuff **/
+ n += 2;
+ } /** end of while (parms.rules[n][0] == c) **/
+ } /** end of if (n >= 0) **/
+ if (z0 == 0) {
+ if (k && !p0 && target.size() < len && c != '\0' &&
+ (1 || target.empty() || target[target.size()-1] != c)) {
+ /** condense only double letters **/
+ target.push_back(c);
+ /// printf("\n setting \n");
+ }
+
+ i++;
+ z = 0;
+ k = 0;
+ }
+ } /** end of while ((c = word[i]) != '\0') **/
+
+ return target;
+} /** end of function "phonet" **/
diff --git a/extensions/spellcheck/hunspell/src/phonet.hxx b/extensions/spellcheck/hunspell/src/phonet.hxx
new file mode 100644
index 000000000..eb9fd0c62
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/phonet.hxx
@@ -0,0 +1,52 @@
+/* phonetic.c - generic replacement aglogithms for phonetic transformation
+ Copyright (C) 2000 Bjoern Jacke
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation;
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; If not, see
+ <http://www.gnu.org/licenses/>.
+
+ Changelog:
+
+ 2000-01-05 Bjoern Jacke <bjoern at j3e.de>
+ Initial Release insprired by the article about phonetic
+ transformations out of c't 25/1999
+
+ 2007-07-26 Bjoern Jacke <bjoern at j3e.de>
+ Released under MPL/GPL/LGPL tri-license for Hunspell
+
+ 2007-08-23 Laszlo Nemeth <nemeth at OOo>
+ Porting from Aspell to Hunspell using C-like structs
+*/
+
+#ifndef __PHONETHXX__
+#define __PHONETHXX__
+
+#define HASHSIZE 256
+#define MAXPHONETLEN 256
+#define MAXPHONETUTF8LEN (MAXPHONETLEN * 4)
+
+#include "hunvisapi.h"
+
+struct phonetable {
+ char utf8;
+ cs_info* lang;
+ int num;
+ char** rules;
+ int hash[HASHSIZE];
+};
+
+LIBHUNSPELL_DLL_EXPORTED void init_phonet_hash(phonetable& parms);
+
+LIBHUNSPELL_DLL_EXPORTED std::string phonet(const std::string& inword,
+ phonetable& phone);
+
+#endif
diff --git a/extensions/spellcheck/hunspell/src/replist.cxx b/extensions/spellcheck/hunspell/src/replist.cxx
new file mode 100644
index 000000000..b3e6b37d2
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/replist.cxx
@@ -0,0 +1,193 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Hunspell, based on MySpell.
+ *
+ * The Initial Developers of the Original Code are
+ * Kevin Hendricks (MySpell) and Németh László (Hunspell).
+ * Portions created by the Initial Developers are Copyright (C) 2002-2005
+ * the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <limits>
+
+#include "replist.hxx"
+#include "csutil.hxx"
+
+RepList::RepList(int n) {
+ dat = (replentry**)malloc(sizeof(replentry*) * n);
+ if (dat == 0)
+ size = 0;
+ else
+ size = n;
+ pos = 0;
+}
+
+RepList::~RepList() {
+ for (int i = 0; i < pos; i++) {
+ free(dat[i]->pattern);
+ free(dat[i]->pattern2);
+ free(dat[i]);
+ }
+ free(dat);
+}
+
+int RepList::get_pos() {
+ return pos;
+}
+
+replentry* RepList::item(int n) {
+ return dat[n];
+}
+
+int RepList::near(const char* word) {
+ int p1 = 0;
+ int p2 = pos;
+ while ((p2 - p1) > 1) {
+ int m = (p1 + p2) / 2;
+ int c = strcmp(word, dat[m]->pattern);
+ if (c <= 0) {
+ if (c < 0)
+ p2 = m;
+ else
+ p1 = p2 = m;
+ } else
+ p1 = m;
+ }
+ return p1;
+}
+
+int RepList::match(const char* word, int n) {
+ if (strncmp(word, dat[n]->pattern, strlen(dat[n]->pattern)) == 0)
+ return strlen(dat[n]->pattern);
+ return 0;
+}
+
+int RepList::add(char* pat1, char* pat2) {
+ if (pos >= size || pat1 == NULL || pat2 == NULL)
+ return 1;
+ replentry* r = (replentry*)malloc(sizeof(replentry));
+ if (r == NULL)
+ return 1;
+ r->pattern = mystrrep(pat1, "_", " ");
+ r->pattern2 = mystrrep(pat2, "_", " ");
+ r->start = false;
+ r->end = false;
+ dat[pos++] = r;
+ for (int i = pos - 1; i > 0; i--) {
+ r = dat[i];
+ if (strcmp(r->pattern, dat[i - 1]->pattern) < 0) {
+ dat[i] = dat[i - 1];
+ dat[i - 1] = r;
+ } else
+ break;
+ }
+ return 0;
+}
+
+int RepList::conv(const char* word, char* dest, size_t destsize) {
+ size_t stl = 0;
+ int change = 0;
+ for (size_t i = 0; i < strlen(word); i++) {
+ int n = near(word + i);
+ int l = match(word + i, n);
+ if (l) {
+ size_t replen = strlen(dat[n]->pattern2);
+ if (stl + replen >= destsize)
+ return -1;
+ strcpy(dest + stl, dat[n]->pattern2);
+ stl += replen;
+ i += l - 1;
+ change = 1;
+ } else {
+ if (stl + 1 >= destsize)
+ return -1;
+ dest[stl++] = word[i];
+ }
+ }
+ dest[stl] = '\0';
+ return change;
+}
+
+bool RepList::conv(const char* word, std::string& dest) {
+ dest.clear();
+
+ bool change = false;
+ for (size_t i = 0; i < strlen(word); i++) {
+ int n = near(word + i);
+ int l = match(word + i, n);
+ if (l) {
+ dest.append(dat[n]->pattern2);
+ i += l - 1;
+ change = true;
+ } else {
+ dest.push_back(word[i]);
+ }
+ }
+ return change;
+}
diff --git a/extensions/spellcheck/hunspell/src/replist.hxx b/extensions/spellcheck/hunspell/src/replist.hxx
new file mode 100644
index 000000000..0c5153625
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/replist.hxx
@@ -0,0 +1,107 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Hunspell, based on MySpell.
+ *
+ * The Initial Developers of the Original Code are
+ * Kevin Hendricks (MySpell) and Németh László (Hunspell).
+ * Portions created by the Initial Developers are Copyright (C) 2002-2005
+ * the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* string replacement list class */
+#ifndef _REPLIST_HXX_
+#define _REPLIST_HXX_
+
+#include "hunvisapi.h"
+
+#include "w_char.hxx"
+
+#include <string>
+#include <vector>
+
+class LIBHUNSPELL_DLL_EXPORTED RepList {
+ private:
+ RepList(const RepList&);
+ RepList& operator=(const RepList&);
+
+ protected:
+ replentry** dat;
+ int size;
+ int pos;
+
+ public:
+ RepList(int n);
+ ~RepList();
+
+ int get_pos();
+ int add(char* pat1, char* pat2);
+ replentry* item(int n);
+ int near(const char* word);
+ int match(const char* word, int n);
+ int conv(const char* word, char* dest, size_t destsize);
+ bool conv(const char* word, std::string& dest);
+};
+#endif
diff --git a/extensions/spellcheck/hunspell/src/suggestmgr.cxx b/extensions/spellcheck/hunspell/src/suggestmgr.cxx
new file mode 100644
index 000000000..17becd758
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/suggestmgr.cxx
@@ -0,0 +1,2192 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Hunspell, based on MySpell.
+ *
+ * The Initial Developers of the Original Code are
+ * Kevin Hendricks (MySpell) and Németh László (Hunspell).
+ * Portions created by the Initial Developers are Copyright (C) 2002-2005
+ * the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+
+#include "suggestmgr.hxx"
+#include "htypes.hxx"
+#include "csutil.hxx"
+
+const w_char W_VLINE = {'\0', '|'};
+
+SuggestMgr::SuggestMgr(const char* tryme, int maxn, AffixMgr* aptr) {
+ // register affix manager and check in string of chars to
+ // try when building candidate suggestions
+ pAMgr = aptr;
+
+ csconv = NULL;
+
+ ckeyl = 0;
+ ckey = NULL;
+ ckey_utf = NULL;
+
+ ctryl = 0;
+ ctry = NULL;
+ ctry_utf = NULL;
+
+ utf8 = 0;
+ langnum = 0;
+ complexprefixes = 0;
+
+ maxSug = maxn;
+ nosplitsugs = 0;
+ maxngramsugs = MAXNGRAMSUGS;
+ maxcpdsugs = MAXCOMPOUNDSUGS;
+
+ if (pAMgr) {
+ langnum = pAMgr->get_langnum();
+ ckey = pAMgr->get_key_string();
+ nosplitsugs = pAMgr->get_nosplitsugs();
+ if (pAMgr->get_maxngramsugs() >= 0)
+ maxngramsugs = pAMgr->get_maxngramsugs();
+ utf8 = pAMgr->get_utf8();
+ if (pAMgr->get_maxcpdsugs() >= 0)
+ maxcpdsugs = pAMgr->get_maxcpdsugs();
+ if (!utf8) {
+ char* enc = pAMgr->get_encoding();
+ csconv = get_current_cs(enc);
+ free(enc);
+ }
+ complexprefixes = pAMgr->get_complexprefixes();
+ }
+
+ if (ckey) {
+ if (utf8) {
+ std::vector<w_char> t;
+ ckeyl = u8_u16(t, ckey);
+ ckey_utf = (w_char*)malloc(ckeyl * sizeof(w_char));
+ if (ckey_utf)
+ memcpy(ckey_utf, &t[0], ckeyl * sizeof(w_char));
+ else
+ ckeyl = 0;
+ } else {
+ ckeyl = strlen(ckey);
+ }
+ }
+
+ if (tryme) {
+ ctry = mystrdup(tryme);
+ if (ctry)
+ ctryl = strlen(ctry);
+ if (ctry && utf8) {
+ std::vector<w_char> t;
+ ctryl = u8_u16(t, tryme);
+ ctry_utf = (w_char*)malloc(ctryl * sizeof(w_char));
+ if (ctry_utf)
+ memcpy(ctry_utf, &t[0], ctryl * sizeof(w_char));
+ else
+ ctryl = 0;
+ }
+ }
+}
+
+SuggestMgr::~SuggestMgr() {
+ pAMgr = NULL;
+ if (ckey)
+ free(ckey);
+ ckey = NULL;
+ if (ckey_utf)
+ free(ckey_utf);
+ ckey_utf = NULL;
+ ckeyl = 0;
+ if (ctry)
+ free(ctry);
+ ctry = NULL;
+ if (ctry_utf)
+ free(ctry_utf);
+ ctry_utf = NULL;
+ ctryl = 0;
+ maxSug = 0;
+#ifdef MOZILLA_CLIENT
+ delete[] csconv;
+#endif
+}
+
+int SuggestMgr::testsug(char** wlst,
+ const char* candidate,
+ int wl,
+ int ns,
+ int cpdsuggest,
+ int* timer,
+ clock_t* timelimit) {
+ int cwrd = 1;
+ if (ns == maxSug)
+ return maxSug;
+ for (int k = 0; k < ns; k++) {
+ if (strcmp(candidate, wlst[k]) == 0) {
+ cwrd = 0;
+ break;
+ }
+ }
+ if ((cwrd) && checkword(candidate, wl, cpdsuggest, timer, timelimit)) {
+ wlst[ns] = mystrdup(candidate);
+ if (wlst[ns] == NULL) {
+ for (int j = 0; j < ns; j++)
+ free(wlst[j]);
+ return -1;
+ }
+ ns++;
+ }
+ return ns;
+}
+
+// generate suggestions for a misspelled word
+// pass in address of array of char * pointers
+// onlycompoundsug: probably bad suggestions (need for ngram sugs, too)
+
+int SuggestMgr::suggest(char*** slst,
+ const char* w,
+ int nsug,
+ int* onlycompoundsug) {
+ int nocompoundtwowords = 0;
+ char** wlst;
+ std::vector<w_char> word_utf;
+ int wl = 0;
+ int nsugorig = nsug;
+ std::string w2;
+ const char* word = w;
+ int oldSug = 0;
+
+ // word reversing wrapper for complex prefixes
+ if (complexprefixes) {
+ w2.assign(w);
+ if (utf8)
+ reverseword_utf(w2);
+ else
+ reverseword(w2);
+ word = w2.c_str();
+ }
+
+ if (*slst) {
+ wlst = *slst;
+ } else {
+ wlst = (char**)malloc(maxSug * sizeof(char*));
+ if (wlst == NULL)
+ return -1;
+ for (int i = 0; i < maxSug; i++) {
+ wlst[i] = NULL;
+ }
+ }
+
+ if (utf8) {
+ wl = u8_u16(word_utf, word);
+ if (wl == -1) {
+ *slst = wlst;
+ return nsug;
+ }
+ }
+
+ for (int cpdsuggest = 0; (cpdsuggest < 2) && (nocompoundtwowords == 0);
+ cpdsuggest++) {
+ // limit compound suggestion
+ if (cpdsuggest > 0)
+ oldSug = nsug;
+
+ // suggestions for an uppercase word (html -> HTML)
+ if ((nsug < maxSug) && (nsug > -1)) {
+ nsug = (utf8) ? capchars_utf(wlst, &word_utf[0], wl, nsug, cpdsuggest)
+ : capchars(wlst, word, nsug, cpdsuggest);
+ }
+
+ // perhaps we made a typical fault of spelling
+ if ((nsug < maxSug) && (nsug > -1) &&
+ (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
+ nsug = replchars(wlst, word, nsug, cpdsuggest);
+ }
+
+ // perhaps we made chose the wrong char from a related set
+ if ((nsug < maxSug) && (nsug > -1) &&
+ (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
+ nsug = mapchars(wlst, word, nsug, cpdsuggest);
+ }
+
+ // only suggest compound words when no other suggestion
+ if ((cpdsuggest == 0) && (nsug > nsugorig))
+ nocompoundtwowords = 1;
+
+ // did we swap the order of chars by mistake
+ if ((nsug < maxSug) && (nsug > -1) &&
+ (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
+ nsug = (utf8) ? swapchar_utf(wlst, &word_utf[0], wl, nsug, cpdsuggest)
+ : swapchar(wlst, word, nsug, cpdsuggest);
+ }
+
+ // did we swap the order of non adjacent chars by mistake
+ if ((nsug < maxSug) && (nsug > -1) &&
+ (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
+ nsug = (utf8) ? longswapchar_utf(wlst, &word_utf[0], wl, nsug, cpdsuggest)
+ : longswapchar(wlst, word, nsug, cpdsuggest);
+ }
+
+ // did we just hit the wrong key in place of a good char (case and keyboard)
+ if ((nsug < maxSug) && (nsug > -1) &&
+ (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
+ nsug = (utf8) ? badcharkey_utf(wlst, &word_utf[0], wl, nsug, cpdsuggest)
+ : badcharkey(wlst, word, nsug, cpdsuggest);
+ }
+
+ // did we add a char that should not be there
+ if ((nsug < maxSug) && (nsug > -1) &&
+ (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
+ nsug = (utf8) ? extrachar_utf(wlst, &word_utf[0], wl, nsug, cpdsuggest)
+ : extrachar(wlst, word, nsug, cpdsuggest);
+ }
+
+ // did we forgot a char
+ if ((nsug < maxSug) && (nsug > -1) &&
+ (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
+ nsug = (utf8) ? forgotchar_utf(wlst, &word_utf[0], wl, nsug, cpdsuggest)
+ : forgotchar(wlst, word, nsug, cpdsuggest);
+ }
+
+ // did we move a char
+ if ((nsug < maxSug) && (nsug > -1) &&
+ (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
+ nsug = (utf8) ? movechar_utf(wlst, &word_utf[0], wl, nsug, cpdsuggest)
+ : movechar(wlst, word, nsug, cpdsuggest);
+ }
+
+ // did we just hit the wrong key in place of a good char
+ if ((nsug < maxSug) && (nsug > -1) &&
+ (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
+ nsug = (utf8) ? badchar_utf(wlst, &word_utf[0], wl, nsug, cpdsuggest)
+ : badchar(wlst, word, nsug, cpdsuggest);
+ }
+
+ // did we double two characters
+ if ((nsug < maxSug) && (nsug > -1) &&
+ (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
+ nsug = (utf8) ? doubletwochars_utf(wlst, &word_utf[0], wl, nsug, cpdsuggest)
+ : doubletwochars(wlst, word, nsug, cpdsuggest);
+ }
+
+ // perhaps we forgot to hit space and two words ran together
+ if (!nosplitsugs && (nsug < maxSug) && (nsug > -1) &&
+ (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
+ nsug = twowords(wlst, word, nsug, cpdsuggest);
+ }
+
+ } // repeating ``for'' statement compounding support
+
+ if (nsug < 0) {
+ // we ran out of memory - we should free up as much as possible
+ for (int i = 0; i < maxSug; i++)
+ if (wlst[i] != NULL)
+ free(wlst[i]);
+ free(wlst);
+ wlst = NULL;
+ }
+
+ if (!nocompoundtwowords && (nsug > 0) && onlycompoundsug)
+ *onlycompoundsug = 1;
+
+ *slst = wlst;
+ return nsug;
+}
+
+// suggestions for an uppercase word (html -> HTML)
+int SuggestMgr::capchars_utf(char** wlst,
+ const w_char* word,
+ int wl,
+ int ns,
+ int cpdsuggest) {
+ std::vector<w_char> candidate_utf(word, word + wl);
+ mkallcap_utf(candidate_utf, langnum);
+ std::string candidate;
+ u16_u8(candidate, candidate_utf);
+ return testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL,
+ NULL);
+}
+
+// suggestions for an uppercase word (html -> HTML)
+int SuggestMgr::capchars(char** wlst,
+ const char* word,
+ int ns,
+ int cpdsuggest) {
+ std::string candidate(word);
+ mkallcap(candidate, csconv);
+ return testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL,
+ NULL);
+}
+
+// suggestions for when chose the wrong char out of a related set
+int SuggestMgr::mapchars(char** wlst,
+ const char* word,
+ int ns,
+ int cpdsuggest) {
+ std::string candidate;
+ clock_t timelimit;
+ int timer;
+
+ int wl = strlen(word);
+ if (wl < 2 || !pAMgr)
+ return ns;
+
+ int nummap = pAMgr->get_nummap();
+ struct mapentry* maptable = pAMgr->get_maptable();
+ if (maptable == NULL)
+ return ns;
+
+ timelimit = clock();
+ timer = MINTIMER;
+ return map_related(word, candidate, 0, wlst, cpdsuggest, ns,
+ maptable, nummap, &timer, &timelimit);
+}
+
+int SuggestMgr::map_related(const char* word,
+ std::string& candidate,
+ int wn,
+ char** wlst,
+ int cpdsuggest,
+ int ns,
+ const mapentry* maptable,
+ int nummap,
+ int* timer,
+ clock_t* timelimit) {
+ if (*(word + wn) == '\0') {
+ int cwrd = 1;
+ for (int m = 0; m < ns; m++) {
+ if (candidate == wlst[m]) {
+ cwrd = 0;
+ break;
+ }
+ }
+ if ((cwrd) && checkword(candidate.c_str(), candidate.size(), cpdsuggest, timer, timelimit)) {
+ if (ns < maxSug) {
+ wlst[ns] = mystrdup(candidate.c_str());
+ if (wlst[ns] == NULL)
+ return -1;
+ ns++;
+ }
+ }
+ return ns;
+ }
+ int in_map = 0;
+ for (int j = 0; j < nummap; j++) {
+ for (int k = 0; k < maptable[j].len; k++) {
+ int len = strlen(maptable[j].set[k]);
+ if (strncmp(maptable[j].set[k], word + wn, len) == 0) {
+ in_map = 1;
+ size_t cn = candidate.size();
+ for (int l = 0; l < maptable[j].len; l++) {
+ candidate.resize(cn);
+ candidate.append(maptable[j].set[l]);
+ ns = map_related(word, candidate, wn + len, wlst,
+ cpdsuggest, ns, maptable, nummap, timer, timelimit);
+ if (!(*timer))
+ return ns;
+ }
+ }
+ }
+ }
+ if (!in_map) {
+ candidate.push_back(*(word + wn));
+ ns = map_related(word, candidate, wn + 1, wlst, cpdsuggest, ns,
+ maptable, nummap, timer, timelimit);
+ }
+ return ns;
+}
+
+// suggestions for a typical fault of spelling, that
+// differs with more, than 1 letter from the right form.
+int SuggestMgr::replchars(char** wlst,
+ const char* word,
+ int ns,
+ int cpdsuggest) {
+ std::string candidate;
+ int wl = strlen(word);
+ if (wl < 2 || !pAMgr)
+ return ns;
+ int numrep = pAMgr->get_numrep();
+ struct replentry* reptable = pAMgr->get_reptable();
+ if (reptable == NULL)
+ return ns;
+ for (int i = 0; i < numrep; i++) {
+ const char* r = word;
+ // search every occurence of the pattern in the word
+ while ((r = strstr(r, reptable[i].pattern)) != NULL &&
+ (!reptable[i].end || strlen(r) == strlen(reptable[i].pattern)) &&
+ (!reptable[i].start || r == word)) {
+ candidate.assign(word);
+ candidate.resize(r - word);
+ candidate.append(reptable[i].pattern2);
+ int lenp = strlen(reptable[i].pattern);
+ candidate.append(r + lenp);
+ ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL,
+ NULL);
+ if (ns == -1)
+ return -1;
+ // check REP suggestions with space
+ size_t sp = candidate.find(' ');
+ if (sp != std::string::npos) {
+ size_t prev = 0;
+ while (sp != std::string::npos) {
+ std::string prev_chunk = candidate.substr(prev, sp - prev);
+ if (checkword(prev_chunk.c_str(), prev_chunk.size(), 0, NULL, NULL)) {
+ int oldns = ns;
+ std::string post_chunk = candidate.substr(sp + 1);
+ ns = testsug(wlst, post_chunk.c_str(), post_chunk.size(), ns, cpdsuggest, NULL,
+ NULL);
+ if (ns == -1)
+ return -1;
+ if (oldns < ns) {
+ free(wlst[ns - 1]);
+ wlst[ns - 1] = mystrdup(candidate.c_str());
+ if (!wlst[ns - 1])
+ return -1;
+ }
+ }
+ prev = sp + 1;
+ sp = candidate.find(' ', prev);
+ }
+ }
+ r++; // search for the next letter
+ }
+ }
+ return ns;
+}
+
+// perhaps we doubled two characters (pattern aba -> ababa, for example vacation
+// -> vacacation)
+int SuggestMgr::doubletwochars(char** wlst,
+ const char* word,
+ int ns,
+ int cpdsuggest) {
+ int state = 0;
+ int wl = strlen(word);
+ if (wl < 5 || !pAMgr)
+ return ns;
+ for (int i = 2; i < wl; i++) {
+ if (word[i] == word[i - 2]) {
+ state++;
+ if (state == 3) {
+ std::string candidate(word, word + i - 1);
+ candidate.insert(candidate.end(), word + i + 1, word + wl);
+ ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL, NULL);
+ if (ns == -1)
+ return -1;
+ state = 0;
+ }
+ } else {
+ state = 0;
+ }
+ }
+ return ns;
+}
+
+// perhaps we doubled two characters (pattern aba -> ababa, for example vacation
+// -> vacacation)
+int SuggestMgr::doubletwochars_utf(char** wlst,
+ const w_char* word,
+ int wl,
+ int ns,
+ int cpdsuggest) {
+ int state = 0;
+ if (wl < 5 || !pAMgr)
+ return ns;
+ for (int i = 2; i < wl; i++) {
+ if (word[i] == word[i - 2]) {
+ state++;
+ if (state == 3) {
+ std::vector<w_char> candidate_utf(word, word + i - 1);
+ candidate_utf.insert(candidate_utf.end(), word + i + 1, word + wl);
+ std::string candidate;
+ u16_u8(candidate, candidate_utf);
+ ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL,
+ NULL);
+ if (ns == -1)
+ return -1;
+ state = 0;
+ }
+ } else {
+ state = 0;
+ }
+ }
+ return ns;
+}
+
+// error is wrong char in place of correct one (case and keyboard related
+// version)
+int SuggestMgr::badcharkey(char** wlst,
+ const char* word,
+ int ns,
+ int cpdsuggest) {
+ std::string candidate(word);
+
+ // swap out each char one by one and try uppercase and neighbor
+ // keyboard chars in its place to see if that makes a good word
+ for (size_t i = 0; i < candidate.size(); ++i) {
+ char tmpc = candidate[i];
+ // check with uppercase letters
+ candidate[i] = csconv[((unsigned char)tmpc)].cupper;
+ if (tmpc != candidate[i]) {
+ ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL, NULL);
+ if (ns == -1)
+ return -1;
+ candidate[i] = tmpc;
+ }
+ // check neighbor characters in keyboard string
+ if (!ckey)
+ continue;
+ char* loc = strchr(ckey, tmpc);
+ while (loc) {
+ if ((loc > ckey) && (*(loc - 1) != '|')) {
+ candidate[i] = *(loc - 1);
+ ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL, NULL);
+ if (ns == -1)
+ return -1;
+ }
+ if ((*(loc + 1) != '|') && (*(loc + 1) != '\0')) {
+ candidate[i] = *(loc + 1);
+ ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL, NULL);
+ if (ns == -1)
+ return -1;
+ }
+ loc = strchr(loc + 1, tmpc);
+ }
+ candidate[i] = tmpc;
+ }
+ return ns;
+}
+
+// error is wrong char in place of correct one (case and keyboard related
+// version)
+int SuggestMgr::badcharkey_utf(char** wlst,
+ const w_char* word,
+ int wl,
+ int ns,
+ int cpdsuggest) {
+ std::string candidate;
+ std::vector<w_char> candidate_utf(word, word + wl);
+ // swap out each char one by one and try all the tryme
+ // chars in its place to see if that makes a good word
+ for (int i = 0; i < wl; i++) {
+ w_char tmpc = candidate_utf[i];
+ // check with uppercase letters
+ candidate_utf[i] = upper_utf(candidate_utf[i], 1);
+ if (tmpc != candidate_utf[i]) {
+ u16_u8(candidate, candidate_utf);
+ ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL,
+ NULL);
+ if (ns == -1)
+ return -1;
+ candidate_utf[i] = tmpc;
+ }
+ // check neighbor characters in keyboard string
+ if (!ckey)
+ continue;
+ w_char* loc = ckey_utf;
+ while ((loc < (ckey_utf + ckeyl)) && *loc != tmpc)
+ loc++;
+ while (loc < (ckey_utf + ckeyl)) {
+ if ((loc > ckey_utf) && *(loc - 1) != W_VLINE) {
+ candidate_utf[i] = *(loc - 1);
+ u16_u8(candidate, candidate_utf);
+ ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL,
+ NULL);
+ if (ns == -1)
+ return -1;
+ }
+ if (((loc + 1) < (ckey_utf + ckeyl)) && (*(loc + 1) != W_VLINE)) {
+ candidate_utf[i] = *(loc + 1);
+ u16_u8(candidate, candidate_utf);
+ ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL,
+ NULL);
+ if (ns == -1)
+ return -1;
+ }
+ do {
+ loc++;
+ } while ((loc < (ckey_utf + ckeyl)) && *loc != tmpc);
+ }
+ candidate_utf[i] = tmpc;
+ }
+ return ns;
+}
+
+// error is wrong char in place of correct one
+int SuggestMgr::badchar(char** wlst, const char* word, int ns, int cpdsuggest) {
+ std::string candidate(word);
+ clock_t timelimit = clock();
+ int timer = MINTIMER;
+ // swap out each char one by one and try all the tryme
+ // chars in its place to see if that makes a good word
+ for (int j = 0; j < ctryl; j++) {
+ for (std::string::reverse_iterator aI = candidate.rbegin(), aEnd = candidate.rend(); aI != aEnd; ++aI) {
+ char tmpc = *aI;
+ if (ctry[j] == tmpc)
+ continue;
+ *aI = ctry[j];
+ ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, &timer, &timelimit);
+ if (ns == -1)
+ return -1;
+ if (!timer)
+ return ns;
+ *aI = tmpc;
+ }
+ }
+ return ns;
+}
+
+// error is wrong char in place of correct one
+int SuggestMgr::badchar_utf(char** wlst,
+ const w_char* word,
+ int wl,
+ int ns,
+ int cpdsuggest) {
+ std::vector<w_char> candidate_utf(word, word + wl);
+ std::string candidate;
+ clock_t timelimit = clock();
+ int timer = MINTIMER;
+ // swap out each char one by one and try all the tryme
+ // chars in its place to see if that makes a good word
+ for (int j = 0; j < ctryl; j++) {
+ for (int i = wl - 1; i >= 0; i--) {
+ w_char tmpc = candidate_utf[i];
+ if (tmpc == ctry_utf[j])
+ continue;
+ candidate_utf[i] = ctry_utf[j];
+ u16_u8(candidate, candidate_utf);
+ ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, &timer,
+ &timelimit);
+ if (ns == -1)
+ return -1;
+ if (!timer)
+ return ns;
+ candidate_utf[i] = tmpc;
+ }
+ }
+ return ns;
+}
+
+// error is word has an extra letter it does not need
+int SuggestMgr::extrachar_utf(char** wlst,
+ const w_char* word,
+ int wl,
+ int ns,
+ int cpdsuggest) {
+ std::vector<w_char> candidate_utf(word, word + wl);
+ if (candidate_utf.size() < 2)
+ return ns;
+ // try omitting one char of word at a time
+ for (size_t i = 0; i < candidate_utf.size(); ++i) {
+ size_t index = candidate_utf.size() - 1 - i;
+ w_char tmpc = candidate_utf[index];
+ candidate_utf.erase(candidate_utf.begin() + index);
+ std::string candidate;
+ u16_u8(candidate, candidate_utf);
+ ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL, NULL);
+ if (ns == -1)
+ return -1;
+ candidate_utf.insert(candidate_utf.begin() + index, tmpc);
+ }
+ return ns;
+}
+
+// error is word has an extra letter it does not need
+int SuggestMgr::extrachar(char** wlst,
+ const char* word,
+ int ns,
+ int cpdsuggest) {
+ std::string candidate(word);
+ if (candidate.size() < 2)
+ return ns;
+ // try omitting one char of word at a time
+ for (size_t i = 0; i < candidate.size(); ++i) {
+ size_t index = candidate.size() - 1 - i;
+ char tmpc = candidate[index];
+ candidate.erase(candidate.begin() + index);
+ ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL, NULL);
+ if (ns == -1)
+ return -1;
+ candidate.insert(candidate.begin() + index, tmpc);
+ }
+ return ns;
+}
+
+// error is missing a letter it needs
+int SuggestMgr::forgotchar(char** wlst,
+ const char* word,
+ int ns,
+ int cpdsuggest) {
+ std::string candidate(word);
+ clock_t timelimit = clock();
+ int timer = MINTIMER;
+
+ // try inserting a tryme character before every letter (and the null
+ // terminator)
+ for (int k = 0; k < ctryl; ++k) {
+ for (size_t i = 0; i <= candidate.size(); ++i) {
+ size_t index = candidate.size() - i;
+ candidate.insert(candidate.begin() + index, ctry[k]);
+ ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, &timer, &timelimit);
+ if (ns == -1)
+ return -1;
+ if (!timer)
+ return ns;
+ candidate.erase(candidate.begin() + index);
+ }
+ }
+ return ns;
+}
+
+// error is missing a letter it needs
+int SuggestMgr::forgotchar_utf(char** wlst,
+ const w_char* word,
+ int wl,
+ int ns,
+ int cpdsuggest) {
+ std::vector<w_char> candidate_utf(word, word + wl);
+ clock_t timelimit = clock();
+ int timer = MINTIMER;
+
+ // try inserting a tryme character at the end of the word and before every
+ // letter
+ for (int k = 0; k < ctryl; ++k) {
+ for (size_t i = 0; i <= candidate_utf.size(); ++i) {
+ size_t index = candidate_utf.size() - i;
+ candidate_utf.insert(candidate_utf.begin() + index, ctry_utf[k]);
+ std::string candidate;
+ u16_u8(candidate, candidate_utf);
+ ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, &timer,
+ &timelimit);
+ if (ns == -1)
+ return -1;
+ if (!timer)
+ return ns;
+ candidate_utf.erase(candidate_utf.begin() + index);
+ }
+ }
+ return ns;
+}
+
+/* error is should have been two words */
+int SuggestMgr::twowords(char** wlst,
+ const char* word,
+ int ns,
+ int cpdsuggest) {
+ int c1, c2;
+ int forbidden = 0;
+ int cwrd;
+
+ int wl = strlen(word);
+ if (wl < 3)
+ return ns;
+
+ if (langnum == LANG_hu)
+ forbidden = check_forbidden(word, wl);
+
+ char* candidate = (char*)malloc(wl + 2);
+ strcpy(candidate + 1, word);
+
+ // split the string into two pieces after every char
+ // if both pieces are good words make them a suggestion
+ for (char* p = candidate + 1; p[1] != '\0'; p++) {
+ p[-1] = *p;
+ // go to end of the UTF-8 character
+ while (utf8 && ((p[1] & 0xc0) == 0x80)) {
+ *p = p[1];
+ p++;
+ }
+ if (utf8 && p[1] == '\0')
+ break; // last UTF-8 character
+ *p = '\0';
+ c1 = checkword(candidate, strlen(candidate), cpdsuggest, NULL, NULL);
+ if (c1) {
+ c2 = checkword((p + 1), strlen(p + 1), cpdsuggest, NULL, NULL);
+ if (c2) {
+ *p = ' ';
+
+ // spec. Hungarian code (need a better compound word support)
+ if ((langnum == LANG_hu) && !forbidden &&
+ // if 3 repeating letter, use - instead of space
+ (((p[-1] == p[1]) &&
+ (((p > candidate + 1) && (p[-1] == p[-2])) || (p[-1] == p[2]))) ||
+ // or multiple compounding, with more, than 6 syllables
+ ((c1 == 3) && (c2 >= 2))))
+ *p = '-';
+
+ cwrd = 1;
+ for (int k = 0; k < ns; k++) {
+ if (strcmp(candidate, wlst[k]) == 0) {
+ cwrd = 0;
+ break;
+ }
+ }
+ if (ns < maxSug) {
+ if (cwrd) {
+ wlst[ns] = mystrdup(candidate);
+ if (wlst[ns] == NULL) {
+ free(candidate);
+ return -1;
+ }
+ ns++;
+ }
+ } else {
+ free(candidate);
+ return ns;
+ }
+ // add two word suggestion with dash, if TRY string contains
+ // "a" or "-"
+ // NOTE: cwrd doesn't modified for REP twoword sugg.
+ if (ctry && (strchr(ctry, 'a') || strchr(ctry, '-')) &&
+ mystrlen(p + 1) > 1 && mystrlen(candidate) - mystrlen(p) > 1) {
+ *p = '-';
+ for (int k = 0; k < ns; k++) {
+ if (strcmp(candidate, wlst[k]) == 0) {
+ cwrd = 0;
+ break;
+ }
+ }
+ if (ns < maxSug) {
+ if (cwrd) {
+ wlst[ns] = mystrdup(candidate);
+ if (wlst[ns] == NULL) {
+ free(candidate);
+ return -1;
+ }
+ ns++;
+ }
+ } else {
+ free(candidate);
+ return ns;
+ }
+ }
+ }
+ }
+ }
+ free(candidate);
+ return ns;
+}
+
+// error is adjacent letter were swapped
+int SuggestMgr::swapchar(char** wlst,
+ const char* word,
+ int ns,
+ int cpdsuggest) {
+ std::string candidate(word);
+ if (candidate.size() < 2)
+ return ns;
+
+ // try swapping adjacent chars one by one
+ for (size_t i = 0; i < candidate.size() - 1; ++i) {
+ std::swap(candidate[i], candidate[i+1]);
+ ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL, NULL);
+ if (ns == -1)
+ return -1;
+ std::swap(candidate[i], candidate[i+1]);
+ }
+
+ // try double swaps for short words
+ // ahev -> have, owudl -> would
+ if (candidate.size() == 4 || candidate.size() == 5) {
+ candidate[0] = word[1];
+ candidate[1] = word[0];
+ candidate[2] = word[2];
+ candidate[candidate.size() - 2] = word[candidate.size() - 1];
+ candidate[candidate.size() - 1] = word[candidate.size() - 2];
+ ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL, NULL);
+ if (ns == -1)
+ return -1;
+ if (candidate.size() == 5) {
+ candidate[0] = word[0];
+ candidate[1] = word[2];
+ candidate[2] = word[1];
+ ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL, NULL);
+ if (ns == -1)
+ return -1;
+ }
+ }
+
+ return ns;
+}
+
+// error is adjacent letter were swapped
+int SuggestMgr::swapchar_utf(char** wlst,
+ const w_char* word,
+ int wl,
+ int ns,
+ int cpdsuggest) {
+ std::vector<w_char> candidate_utf(word, word + wl);
+ if (candidate_utf.size() < 2)
+ return ns;
+
+ std::string candidate;
+ // try swapping adjacent chars one by one
+ for (size_t i = 0; i < candidate_utf.size() - 1; ++i) {
+ std::swap(candidate_utf[i], candidate_utf[i+1]);
+ u16_u8(candidate, candidate_utf);
+ ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL, NULL);
+ if (ns == -1)
+ return -1;
+ std::swap(candidate_utf[i], candidate_utf[i+1]);
+ }
+
+ // try double swaps for short words
+ // ahev -> have, owudl -> would, suodn -> sound
+ if (candidate_utf.size() == 4 || candidate_utf.size() == 5) {
+ candidate_utf[0] = word[1];
+ candidate_utf[1] = word[0];
+ candidate_utf[2] = word[2];
+ candidate_utf[candidate_utf.size() - 2] = word[candidate_utf.size() - 1];
+ candidate_utf[candidate_utf.size() - 1] = word[candidate_utf.size() - 2];
+ u16_u8(candidate, candidate_utf);
+ ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL, NULL);
+ if (ns == -1)
+ return -1;
+ if (candidate_utf.size() == 5) {
+ candidate_utf[0] = word[0];
+ candidate_utf[1] = word[2];
+ candidate_utf[2] = word[1];
+ u16_u8(candidate, candidate_utf);
+ ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL, NULL);
+ if (ns == -1)
+ return -1;
+ }
+ }
+ return ns;
+}
+
+// error is not adjacent letter were swapped
+int SuggestMgr::longswapchar(char** wlst,
+ const char* word,
+ int ns,
+ int cpdsuggest) {
+ std::string candidate(word);
+ // try swapping not adjacent chars one by one
+ for (std::string::iterator p = candidate.begin(); p < candidate.end(); ++p) {
+ for (std::string::iterator q = candidate.begin(); q < candidate.end(); ++q) {
+ if (abs(std::distance(q, p)) > 1) {
+ std::swap(*p, *q);
+ ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL, NULL);
+ if (ns == -1)
+ return -1;
+ std::swap(*p, *q);
+ }
+ }
+ }
+ return ns;
+}
+
+// error is adjacent letter were swapped
+int SuggestMgr::longswapchar_utf(char** wlst,
+ const w_char* word,
+ int wl,
+ int ns,
+ int cpdsuggest) {
+ std::vector<w_char> candidate_utf(word, word + wl);
+ // try swapping not adjacent chars
+ for (std::vector<w_char>::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) {
+ for (std::vector<w_char>::iterator q = candidate_utf.begin(); q < candidate_utf.end(); ++q) {
+ if (abs(std::distance(q, p)) > 1) {
+ std::swap(*p, *q);
+ std::string candidate;
+ u16_u8(candidate, candidate_utf);
+ ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL,
+ NULL);
+ if (ns == -1)
+ return -1;
+ std::swap(*p, *q);
+ }
+ }
+ }
+ return ns;
+}
+
+// error is a letter was moved
+int SuggestMgr::movechar(char** wlst,
+ const char* word,
+ int ns,
+ int cpdsuggest) {
+ std::string candidate(word);
+ if (candidate.size() < 2)
+ return ns;
+
+ // try moving a char
+ for (std::string::iterator p = candidate.begin(); p < candidate.end(); ++p) {
+ for (std::string::iterator q = p + 1; q < candidate.end() && std::distance(p, q) < 10; ++q) {
+ std::swap(*q, *(q - 1));
+ if (std::distance(p, q) < 2)
+ continue; // omit swap char
+ ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL, NULL);
+ if (ns == -1)
+ return -1;
+ }
+ std::copy(word, word + candidate.size(), candidate.begin());
+ }
+
+ for (std::string::reverse_iterator p = candidate.rbegin(), pEnd = candidate.rend() - 1; p != pEnd; ++p) {
+ for (std::string::reverse_iterator q = p + 1, qEnd = candidate.rend(); q != qEnd && std::distance(p, q) < 10; ++q) {
+ std::swap(*q, *(q - 1));
+ if (std::distance(p, q) < 2)
+ continue; // omit swap char
+ ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL, NULL);
+ if (ns == -1)
+ return -1;
+ }
+ std::copy(word, word + candidate.size(), candidate.begin());
+ }
+
+ return ns;
+}
+
+// error is a letter was moved
+int SuggestMgr::movechar_utf(char** wlst,
+ const w_char* word,
+ int wl,
+ int ns,
+ int cpdsuggest) {
+ std::vector<w_char> candidate_utf(word, word + wl);
+ if (candidate_utf.size() < 2)
+ return ns;
+
+ // try moving a char
+ for (std::vector<w_char>::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) {
+ for (std::vector<w_char>::iterator q = p + 1; q < candidate_utf.end() && std::distance(p, q) < 10; ++q) {
+ std::swap(*q, *(q - 1));
+ if (std::distance(p, q) < 2)
+ continue; // omit swap char
+ std::string candidate;
+ u16_u8(candidate, candidate_utf);
+ ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL,
+ NULL);
+ if (ns == -1)
+ return -1;
+ }
+ std::copy(word, word + candidate_utf.size(), candidate_utf.begin());
+ }
+
+ for (std::vector<w_char>::iterator p = candidate_utf.begin() + candidate_utf.size() - 1; p > candidate_utf.begin(); --p) {
+ for (std::vector<w_char>::iterator q = p - 1; q >= candidate_utf.begin() && std::distance(q, p) < 10; --q) {
+ std::swap(*q, *(q + 1));
+ if (std::distance(q, p) < 2)
+ continue; // omit swap char
+ std::string candidate;
+ u16_u8(candidate, candidate_utf);
+ ns = testsug(wlst, candidate.c_str(), candidate.size(), ns, cpdsuggest, NULL,
+ NULL);
+ if (ns == -1)
+ return -1;
+ }
+ std::copy(word, word + candidate_utf.size(), candidate_utf.begin());
+ }
+
+ return ns;
+}
+
+// generate a set of suggestions for very poorly spelled words
+int SuggestMgr::ngsuggest(char** wlst,
+ const char* w,
+ int ns,
+ HashMgr** pHMgr,
+ int md) {
+ int i, j;
+ int lval;
+ int sc;
+ int lp, lpphon;
+ int nonbmp = 0;
+
+ // exhaustively search through all root words
+ // keeping track of the MAX_ROOTS most similar root words
+ struct hentry* roots[MAX_ROOTS];
+ char* rootsphon[MAX_ROOTS];
+ int scores[MAX_ROOTS];
+ int scoresphon[MAX_ROOTS];
+ for (i = 0; i < MAX_ROOTS; i++) {
+ roots[i] = NULL;
+ scores[i] = -100 * i;
+ rootsphon[i] = NULL;
+ scoresphon[i] = -100 * i;
+ }
+ lp = MAX_ROOTS - 1;
+ lpphon = MAX_ROOTS - 1;
+ int low = NGRAM_LOWERING;
+
+ std::string w2;
+ const char* word = w;
+
+ // word reversing wrapper for complex prefixes
+ if (complexprefixes) {
+ w2.assign(w);
+ if (utf8)
+ reverseword_utf(w2);
+ else
+ reverseword(w2);
+ word = w2.c_str();
+ }
+
+ std::vector<w_char> u8;
+ int nc = strlen(word);
+ int n = (utf8) ? u8_u16(u8, word) : nc;
+
+ // set character based ngram suggestion for words with non-BMP Unicode
+ // characters
+ if (n == -1) {
+ utf8 = 0; // XXX not state-free
+ n = nc;
+ nonbmp = 1;
+ low = 0;
+ }
+
+ struct hentry* hp = NULL;
+ int col = -1;
+ phonetable* ph = (pAMgr) ? pAMgr->get_phonetable() : NULL;
+ std::string target;
+ std::string candidate;
+ if (ph) {
+ if (utf8) {
+ std::vector<w_char> _w;
+ u8_u16(_w, word);
+ mkallcap_utf(_w, langnum);
+ u16_u8(candidate, _w);
+ } else {
+ candidate.assign(word);
+ if (!nonbmp)
+ mkallcap(candidate, csconv);
+ }
+ target = phonet(candidate, *ph); // XXX phonet() is 8-bit (nc, not n)
+ }
+
+ FLAG forbiddenword = pAMgr ? pAMgr->get_forbiddenword() : FLAG_NULL;
+ FLAG nosuggest = pAMgr ? pAMgr->get_nosuggest() : FLAG_NULL;
+ FLAG nongramsuggest = pAMgr ? pAMgr->get_nongramsuggest() : FLAG_NULL;
+ FLAG onlyincompound = pAMgr ? pAMgr->get_onlyincompound() : FLAG_NULL;
+
+ for (i = 0; i < md; i++) {
+ while (0 != (hp = (pHMgr[i])->walk_hashtable(col, hp))) {
+ if ((hp->astr) && (pAMgr) &&
+ (TESTAFF(hp->astr, forbiddenword, hp->alen) ||
+ TESTAFF(hp->astr, ONLYUPCASEFLAG, hp->alen) ||
+ TESTAFF(hp->astr, nosuggest, hp->alen) ||
+ TESTAFF(hp->astr, nongramsuggest, hp->alen) ||
+ TESTAFF(hp->astr, onlyincompound, hp->alen)))
+ continue;
+
+ sc = ngram(3, word, HENTRY_WORD(hp), NGRAM_LONGER_WORSE + low) +
+ leftcommonsubstring(word, HENTRY_WORD(hp));
+
+ // check special pronounciation
+ std::string f;
+ if ((hp->var & H_OPT_PHON) &&
+ copy_field(f, HENTRY_DATA(hp), MORPH_PHON)) {
+ int sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE + low) +
+ +leftcommonsubstring(word, f.c_str());
+ if (sc2 > sc)
+ sc = sc2;
+ }
+
+ int scphon = -20000;
+ if (ph && (sc > 2) && (abs(n - (int)hp->clen) <= 3)) {
+ if (utf8) {
+ std::vector<w_char> _w;
+ u8_u16(_w, HENTRY_WORD(hp));
+ mkallcap_utf(_w, langnum);
+ u16_u8(candidate, _w);
+ } else {
+ candidate.assign(HENTRY_WORD(hp));
+ mkallcap(candidate, csconv);
+ }
+ std::string target2 = phonet(candidate, *ph);
+ scphon = 2 * ngram(3, target, target2, NGRAM_LONGER_WORSE);
+ }
+
+ if (sc > scores[lp]) {
+ scores[lp] = sc;
+ roots[lp] = hp;
+ lval = sc;
+ for (j = 0; j < MAX_ROOTS; j++)
+ if (scores[j] < lval) {
+ lp = j;
+ lval = scores[j];
+ }
+ }
+
+ if (scphon > scoresphon[lpphon]) {
+ scoresphon[lpphon] = scphon;
+ rootsphon[lpphon] = HENTRY_WORD(hp);
+ lval = scphon;
+ for (j = 0; j < MAX_ROOTS; j++)
+ if (scoresphon[j] < lval) {
+ lpphon = j;
+ lval = scoresphon[j];
+ }
+ }
+ }
+ }
+
+ // find minimum threshold for a passable suggestion
+ // mangle original word three differnt ways
+ // and score them to generate a minimum acceptable score
+ int thresh = 0;
+ for (int sp = 1; sp < 4; sp++) {
+ if (utf8) {
+ for (int k = sp; k < n; k += 4) {
+ u8[k].l = '*';
+ u8[k].h = 0;
+ }
+ std::string mw;
+ u16_u8(mw, u8);
+ thresh = thresh + ngram(n, word, mw, NGRAM_ANY_MISMATCH + low);
+ } else {
+ std::string mw(word);
+ for (int k = sp; k < n; k += 4)
+ mw[k] = '*';
+ thresh = thresh + ngram(n, word, mw, NGRAM_ANY_MISMATCH + low);
+ }
+ }
+ thresh = thresh / 3;
+ thresh--;
+
+ // now expand affixes on each of these root words and
+ // and use length adjusted ngram scores to select
+ // possible suggestions
+ char* guess[MAX_GUESS];
+ char* guessorig[MAX_GUESS];
+ int gscore[MAX_GUESS];
+ for (i = 0; i < MAX_GUESS; i++) {
+ guess[i] = NULL;
+ guessorig[i] = NULL;
+ gscore[i] = -100 * i;
+ }
+
+ lp = MAX_GUESS - 1;
+
+ struct guessword* glst;
+ glst = (struct guessword*)calloc(MAX_WORDS, sizeof(struct guessword));
+ if (!glst) {
+ if (nonbmp)
+ utf8 = 1;
+ return ns;
+ }
+
+ for (i = 0; i < MAX_ROOTS; i++) {
+ if (roots[i]) {
+ struct hentry* rp = roots[i];
+
+ std::string f;
+ const char *field = NULL;
+ if ((rp->var & H_OPT_PHON) && copy_field(f, HENTRY_DATA(rp), MORPH_PHON))
+ field = f.c_str();
+ int nw = pAMgr->expand_rootword(
+ glst, MAX_WORDS, HENTRY_WORD(rp), rp->blen, rp->astr, rp->alen, word,
+ nc, field);
+
+ for (int k = 0; k < nw; k++) {
+ sc = ngram(n, word, glst[k].word, NGRAM_ANY_MISMATCH + low) +
+ leftcommonsubstring(word, glst[k].word);
+
+ if (sc > thresh) {
+ if (sc > gscore[lp]) {
+ if (guess[lp]) {
+ free(guess[lp]);
+ if (guessorig[lp]) {
+ free(guessorig[lp]);
+ guessorig[lp] = NULL;
+ }
+ }
+ gscore[lp] = sc;
+ guess[lp] = glst[k].word;
+ guessorig[lp] = glst[k].orig;
+ lval = sc;
+ for (j = 0; j < MAX_GUESS; j++)
+ if (gscore[j] < lval) {
+ lp = j;
+ lval = gscore[j];
+ }
+ } else {
+ free(glst[k].word);
+ if (glst[k].orig)
+ free(glst[k].orig);
+ }
+ } else {
+ free(glst[k].word);
+ if (glst[k].orig)
+ free(glst[k].orig);
+ }
+ }
+ }
+ }
+ free(glst);
+
+ // now we are done generating guesses
+ // sort in order of decreasing score
+
+ bubblesort(&guess[0], &guessorig[0], &gscore[0], MAX_GUESS);
+ if (ph)
+ bubblesort(&rootsphon[0], NULL, &scoresphon[0], MAX_ROOTS);
+
+ // weight suggestions with a similarity index, based on
+ // the longest common subsequent algorithm and resort
+
+ int is_swap = 0;
+ int re = 0;
+ double fact = 1.0;
+ if (pAMgr) {
+ int maxd = pAMgr->get_maxdiff();
+ if (maxd >= 0)
+ fact = (10.0 - maxd) / 5.0;
+ }
+
+ for (i = 0; i < MAX_GUESS; i++) {
+ if (guess[i]) {
+ // lowering guess[i]
+ std::string gl;
+ int len;
+ if (utf8) {
+ std::vector<w_char> _w;
+ len = u8_u16(_w, guess[i]);
+ mkallsmall_utf(_w, langnum);
+ u16_u8(gl, _w);
+ } else {
+ gl.assign(guess[i]);
+ if (!nonbmp)
+ mkallsmall(gl, csconv);
+ len = strlen(guess[i]);
+ }
+
+ int _lcs = lcslen(word, gl.c_str());
+
+ // same characters with different casing
+ if ((n == len) && (n == _lcs)) {
+ gscore[i] += 2000;
+ break;
+ }
+ // using 2-gram instead of 3, and other weightening
+
+ re = ngram(2, word, gl, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED) +
+ ngram(2, gl, word, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED);
+
+ gscore[i] =
+ // length of longest common subsequent minus length difference
+ 2 * _lcs - abs((int)(n - len)) +
+ // weight length of the left common substring
+ leftcommonsubstring(word, gl.c_str()) +
+ // weight equal character positions
+ (!nonbmp && commoncharacterpositions(word, gl.c_str(), &is_swap)
+ ? 1
+ : 0) +
+ // swap character (not neighboring)
+ ((is_swap) ? 10 : 0) +
+ // ngram
+ ngram(4, word, gl, NGRAM_ANY_MISMATCH + low) +
+ // weighted ngrams
+ re +
+ // different limit for dictionaries with PHONE rules
+ (ph ? (re < len * fact ? -1000 : 0)
+ : (re < (n + len) * fact ? -1000 : 0));
+ }
+ }
+
+ bubblesort(&guess[0], &guessorig[0], &gscore[0], MAX_GUESS);
+
+ // phonetic version
+ if (ph)
+ for (i = 0; i < MAX_ROOTS; i++) {
+ if (rootsphon[i]) {
+ // lowering rootphon[i]
+ std::string gl;
+ int len;
+ if (utf8) {
+ std::vector<w_char> _w;
+ len = u8_u16(_w, rootsphon[i]);
+ mkallsmall_utf(_w, langnum);
+ u16_u8(gl, _w);
+ } else {
+ gl.assign(rootsphon[i]);
+ if (!nonbmp)
+ mkallsmall(gl, csconv);
+ len = strlen(rootsphon[i]);
+ }
+
+ // heuristic weigthing of ngram scores
+ scoresphon[i] += 2 * lcslen(word, gl) - abs((int)(n - len)) +
+ // weight length of the left common substring
+ leftcommonsubstring(word, gl.c_str());
+ }
+ }
+
+ if (ph)
+ bubblesort(&rootsphon[0], NULL, &scoresphon[0], MAX_ROOTS);
+
+ // copy over
+ int oldns = ns;
+
+ int same = 0;
+ for (i = 0; i < MAX_GUESS; i++) {
+ if (guess[i]) {
+ if ((ns < oldns + maxngramsugs) && (ns < maxSug) &&
+ (!same || (gscore[i] > 1000))) {
+ int unique = 1;
+ // leave only excellent suggestions, if exists
+ if (gscore[i] > 1000)
+ same = 1;
+ else if (gscore[i] < -100) {
+ same = 1;
+ // keep the best ngram suggestions, unless in ONLYMAXDIFF mode
+ if (ns > oldns || (pAMgr && pAMgr->get_onlymaxdiff())) {
+ free(guess[i]);
+ if (guessorig[i])
+ free(guessorig[i]);
+ continue;
+ }
+ }
+ for (j = 0; j < ns; j++) {
+ // don't suggest previous suggestions or a previous suggestion with
+ // prefixes or affixes
+ if ((!guessorig[i] && strstr(guess[i], wlst[j])) ||
+ (guessorig[i] && strstr(guessorig[i], wlst[j])) ||
+ // check forbidden words
+ !checkword(guess[i], strlen(guess[i]), 0, NULL, NULL)) {
+ unique = 0;
+ break;
+ }
+ }
+ if (unique) {
+ wlst[ns++] = guess[i];
+ if (guessorig[i]) {
+ free(guess[i]);
+ wlst[ns - 1] = guessorig[i];
+ }
+ } else {
+ free(guess[i]);
+ if (guessorig[i])
+ free(guessorig[i]);
+ }
+ } else {
+ free(guess[i]);
+ if (guessorig[i])
+ free(guessorig[i]);
+ }
+ }
+ }
+
+ oldns = ns;
+ if (ph)
+ for (i = 0; i < MAX_ROOTS; i++) {
+ if (rootsphon[i]) {
+ if ((ns < oldns + MAXPHONSUGS) && (ns < maxSug)) {
+ int unique = 1;
+ for (j = 0; j < ns; j++) {
+ // don't suggest previous suggestions or a previous suggestion with
+ // prefixes or affixes
+ if (strstr(rootsphon[i], wlst[j]) ||
+ // check forbidden words
+ !checkword(rootsphon[i], strlen(rootsphon[i]), 0, NULL, NULL)) {
+ unique = 0;
+ break;
+ }
+ }
+ if (unique) {
+ wlst[ns++] = mystrdup(rootsphon[i]);
+ if (!wlst[ns - 1])
+ return ns - 1;
+ }
+ }
+ }
+ }
+
+ if (nonbmp)
+ utf8 = 1;
+ return ns;
+}
+
+// see if a candidate suggestion is spelled correctly
+// needs to check both root words and words with affixes
+
+// obsolote MySpell-HU modifications:
+// return value 2 and 3 marks compounding with hyphen (-)
+// `3' marks roots without suffix
+int SuggestMgr::checkword(const char* word,
+ int len,
+ int cpdsuggest,
+ int* timer,
+ clock_t* timelimit) {
+ struct hentry* rv = NULL;
+ struct hentry* rv2 = NULL;
+ int nosuffix = 0;
+
+ // check time limit
+ if (timer) {
+ (*timer)--;
+ if (!(*timer) && timelimit) {
+ if ((clock() - *timelimit) > TIMELIMIT)
+ return 0;
+ *timer = MAXPLUSTIMER;
+ }
+ }
+
+ if (pAMgr) {
+ if (cpdsuggest == 1) {
+ if (pAMgr->get_compound()) {
+ struct hentry* rwords[100]; // buffer for COMPOUND pattern checking
+ rv = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, (hentry**)&rwords, 0, 1,
+ 0); // EXT
+ if (rv &&
+ (!(rv2 = pAMgr->lookup(word)) || !rv2->astr ||
+ !(TESTAFF(rv2->astr, pAMgr->get_forbiddenword(), rv2->alen) ||
+ TESTAFF(rv2->astr, pAMgr->get_nosuggest(), rv2->alen))))
+ return 3; // XXX obsolote categorisation + only ICONV needs affix
+ // flag check?
+ }
+ return 0;
+ }
+
+ rv = pAMgr->lookup(word);
+
+ if (rv) {
+ if ((rv->astr) &&
+ (TESTAFF(rv->astr, pAMgr->get_forbiddenword(), rv->alen) ||
+ TESTAFF(rv->astr, pAMgr->get_nosuggest(), rv->alen)))
+ return 0;
+ while (rv) {
+ if (rv->astr &&
+ (TESTAFF(rv->astr, pAMgr->get_needaffix(), rv->alen) ||
+ TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
+ TESTAFF(rv->astr, pAMgr->get_onlyincompound(), rv->alen))) {
+ rv = rv->next_homonym;
+ } else
+ break;
+ }
+ } else
+ rv = pAMgr->prefix_check(word, len,
+ 0); // only prefix, and prefix + suffix XXX
+
+ if (rv) {
+ nosuffix = 1;
+ } else {
+ rv = pAMgr->suffix_check(word, len, 0, NULL, NULL, 0,
+ NULL); // only suffix
+ }
+
+ if (!rv && pAMgr->have_contclass()) {
+ rv = pAMgr->suffix_check_twosfx(word, len, 0, NULL, FLAG_NULL);
+ if (!rv)
+ rv = pAMgr->prefix_check_twosfx(word, len, 1, FLAG_NULL);
+ }
+
+ // check forbidden words
+ if ((rv) && (rv->astr) &&
+ (TESTAFF(rv->astr, pAMgr->get_forbiddenword(), rv->alen) ||
+ TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
+ TESTAFF(rv->astr, pAMgr->get_nosuggest(), rv->alen) ||
+ TESTAFF(rv->astr, pAMgr->get_onlyincompound(), rv->alen)))
+ return 0;
+
+ if (rv) { // XXX obsolote
+ if ((pAMgr->get_compoundflag()) &&
+ TESTAFF(rv->astr, pAMgr->get_compoundflag(), rv->alen))
+ return 2 + nosuffix;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int SuggestMgr::check_forbidden(const char* word, int len) {
+ struct hentry* rv = NULL;
+
+ if (pAMgr) {
+ rv = pAMgr->lookup(word);
+ if (rv && rv->astr &&
+ (TESTAFF(rv->astr, pAMgr->get_needaffix(), rv->alen) ||
+ TESTAFF(rv->astr, pAMgr->get_onlyincompound(), rv->alen)))
+ rv = NULL;
+ if (!(pAMgr->prefix_check(word, len, 1)))
+ rv = pAMgr->suffix_check(word, len, 0, NULL, NULL, 0,
+ NULL); // prefix+suffix, suffix
+ // check forbidden words
+ if ((rv) && (rv->astr) &&
+ TESTAFF(rv->astr, pAMgr->get_forbiddenword(), rv->alen))
+ return 1;
+ }
+ return 0;
+}
+
+char* SuggestMgr::suggest_morph(const char* w) {
+ char result[MAXLNLEN];
+ char* r = (char*)result;
+ char* st;
+
+ struct hentry* rv = NULL;
+
+ *result = '\0';
+
+ if (!pAMgr)
+ return NULL;
+
+ std::string w2;
+ const char* word = w;
+
+ // word reversing wrapper for complex prefixes
+ if (complexprefixes) {
+ w2.assign(w);
+ if (utf8)
+ reverseword_utf(w2);
+ else
+ reverseword(w2);
+ word = w2.c_str();
+ }
+
+ rv = pAMgr->lookup(word);
+
+ while (rv) {
+ if ((!rv->astr) ||
+ !(TESTAFF(rv->astr, pAMgr->get_forbiddenword(), rv->alen) ||
+ TESTAFF(rv->astr, pAMgr->get_needaffix(), rv->alen) ||
+ TESTAFF(rv->astr, pAMgr->get_onlyincompound(), rv->alen))) {
+ if (!HENTRY_FIND(rv, MORPH_STEM)) {
+ mystrcat(result, " ", MAXLNLEN);
+ mystrcat(result, MORPH_STEM, MAXLNLEN);
+ mystrcat(result, word, MAXLNLEN);
+ }
+ if (HENTRY_DATA(rv)) {
+ mystrcat(result, " ", MAXLNLEN);
+ mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN);
+ }
+ mystrcat(result, "\n", MAXLNLEN);
+ }
+ rv = rv->next_homonym;
+ }
+
+ st = pAMgr->affix_check_morph(word, strlen(word));
+ if (st) {
+ mystrcat(result, st, MAXLNLEN);
+ free(st);
+ }
+
+ if (pAMgr->get_compound() && (*result == '\0')) {
+ struct hentry* rwords[100]; // buffer for COMPOUND pattern checking
+ pAMgr->compound_check_morph(word, strlen(word), 0, 0, 100, 0, NULL, (hentry**)&rwords, 0, &r,
+ NULL);
+ }
+
+ return (*result) ? mystrdup(line_uniq(result, MSEP_REC)) : NULL;
+}
+
+/* affixation */
+char* SuggestMgr::suggest_hentry_gen(hentry* rv, const char* pattern) {
+ char result[MAXLNLEN];
+ *result = '\0';
+ int sfxcount = get_sfxcount(pattern);
+
+ if (get_sfxcount(HENTRY_DATA(rv)) > sfxcount)
+ return NULL;
+
+ if (HENTRY_DATA(rv)) {
+ char* aff = pAMgr->morphgen(HENTRY_WORD(rv), rv->blen, rv->astr, rv->alen,
+ HENTRY_DATA(rv), pattern, 0);
+ if (aff) {
+ mystrcat(result, aff, MAXLNLEN);
+ mystrcat(result, "\n", MAXLNLEN);
+ free(aff);
+ }
+ }
+
+ // check all allomorphs
+ char allomorph[MAXLNLEN];
+ char* p = NULL;
+ if (HENTRY_DATA(rv))
+ p = (char*)strstr(HENTRY_DATA2(rv), MORPH_ALLOMORPH);
+ while (p) {
+ struct hentry* rv2 = NULL;
+ p += MORPH_TAG_LEN;
+ int plen = fieldlen(p);
+ strncpy(allomorph, p, plen);
+ allomorph[plen] = '\0';
+ rv2 = pAMgr->lookup(allomorph);
+ while (rv2) {
+ // if (HENTRY_DATA(rv2) && get_sfxcount(HENTRY_DATA(rv2)) <=
+ // sfxcount) {
+ if (HENTRY_DATA(rv2)) {
+ char* st = (char*)strstr(HENTRY_DATA2(rv2), MORPH_STEM);
+ if (st && (strncmp(st + MORPH_TAG_LEN, HENTRY_WORD(rv),
+ fieldlen(st + MORPH_TAG_LEN)) == 0)) {
+ char* aff = pAMgr->morphgen(HENTRY_WORD(rv2), rv2->blen, rv2->astr,
+ rv2->alen, HENTRY_DATA(rv2), pattern, 0);
+ if (aff) {
+ mystrcat(result, aff, MAXLNLEN);
+ mystrcat(result, "\n", MAXLNLEN);
+ free(aff);
+ }
+ }
+ }
+ rv2 = rv2->next_homonym;
+ }
+ p = strstr(p + plen, MORPH_ALLOMORPH);
+ }
+
+ return (*result) ? mystrdup(result) : NULL;
+}
+
+char* SuggestMgr::suggest_gen(char** desc, int n, const char* pattern) {
+ if (n == 0 || !pAMgr)
+ return NULL;
+
+ std::string result2;
+ std::string newpattern;
+ struct hentry* rv = NULL;
+
+ // search affixed forms with and without derivational suffixes
+ while (1) {
+ for (int k = 0; k < n; k++) {
+ std::string result;
+
+ // add compound word parts (except the last one)
+ char* s = (char*)desc[k];
+ char* part = strstr(s, MORPH_PART);
+ if (part) {
+ char* nextpart = strstr(part + 1, MORPH_PART);
+ while (nextpart) {
+ std::string field;
+ copy_field(field, part, MORPH_PART);
+ result.append(field);
+ part = nextpart;
+ nextpart = strstr(part + 1, MORPH_PART);
+ }
+ s = part;
+ }
+
+ char** pl;
+ std::string tok(s);
+ size_t pos = tok.find(" | ");
+ while (pos != std::string::npos) {
+ tok[pos + 1] = MSEP_ALT;
+ pos = tok.find(" | ", pos);
+ }
+ int pln = line_tok(tok.c_str(), &pl, MSEP_ALT);
+ for (int i = 0; i < pln; i++) {
+ // remove inflectional and terminal suffixes
+ char* is = strstr(pl[i], MORPH_INFL_SFX);
+ if (is)
+ *is = '\0';
+ char* ts = strstr(pl[i], MORPH_TERM_SFX);
+ while (ts) {
+ *ts = '_';
+ ts = strstr(pl[i], MORPH_TERM_SFX);
+ }
+ char* st = strstr(s, MORPH_STEM);
+ if (st) {
+ copy_field(tok, st, MORPH_STEM);
+ rv = pAMgr->lookup(tok.c_str());
+ while (rv) {
+ std::string newpat(pl[i]);
+ newpat.append(pattern);
+ char* sg = suggest_hentry_gen(rv, newpat.c_str());
+ if (!sg)
+ sg = suggest_hentry_gen(rv, pattern);
+ if (sg) {
+ char** gen;
+ int genl = line_tok(sg, &gen, MSEP_REC);
+ free(sg);
+ sg = NULL;
+ for (int j = 0; j < genl; j++) {
+ result2.push_back(MSEP_REC);
+ result2.append(result);
+ if (strstr(pl[i], MORPH_SURF_PFX)) {
+ std::string field;
+ copy_field(field, pl[i], MORPH_SURF_PFX);
+ result2.append(field);
+ }
+ result2.append(gen[j]);
+ }
+ freelist(&gen, genl);
+ }
+ rv = rv->next_homonym;
+ }
+ }
+ }
+ freelist(&pl, pln);
+ }
+
+ if (!result2.empty() || !strstr(pattern, MORPH_DERI_SFX))
+ break;
+
+ newpattern.assign(pattern);
+ mystrrep(newpattern, MORPH_DERI_SFX, MORPH_TERM_SFX);
+ pattern = newpattern.c_str();
+ }
+ return (!result2.empty() ? mystrdup(result2.c_str()) : NULL);
+}
+
+// generate an n-gram score comparing s1 and s2
+int SuggestMgr::ngram(int n,
+ const std::string& s1,
+ const std::string& s2,
+ int opt) {
+ int nscore = 0;
+ int ns;
+ int l1;
+ int l2;
+ int test = 0;
+
+ if (utf8) {
+ std::vector<w_char> su1;
+ std::vector<w_char> su2;
+ l1 = u8_u16(su1, s1);
+ l2 = u8_u16(su2, s2);
+ if ((l2 <= 0) || (l1 == -1))
+ return 0;
+ // lowering dictionary word
+ if (opt & NGRAM_LOWERING)
+ mkallsmall_utf(su2, langnum);
+ for (int j = 1; j <= n; j++) {
+ ns = 0;
+ for (int i = 0; i <= (l1 - j); i++) {
+ int k = 0;
+ for (int l = 0; l <= (l2 - j); l++) {
+ for (k = 0; k < j; k++) {
+ w_char& c1 = su1[i + k];
+ w_char& c2 = su2[l + k];
+ if ((c1.l != c2.l) || (c1.h != c2.h))
+ break;
+ }
+ if (k == j) {
+ ns++;
+ break;
+ }
+ }
+ if (k != j && opt & NGRAM_WEIGHTED) {
+ ns--;
+ test++;
+ if (i == 0 || i == l1 - j)
+ ns--; // side weight
+ }
+ }
+ nscore = nscore + ns;
+ if (ns < 2 && !(opt & NGRAM_WEIGHTED))
+ break;
+ }
+ } else {
+ l2 = s2.size();
+ if (l2 == 0)
+ return 0;
+ l1 = s1.size();
+ std::string t(s2);
+ if (opt & NGRAM_LOWERING)
+ mkallsmall(t, csconv);
+ for (int j = 1; j <= n; j++) {
+ ns = 0;
+ for (int i = 0; i <= (l1 - j); i++) {
+ std::string temp(s1.substr(i, j));
+ if (t.find(temp) != std::string::npos) {
+ ns++;
+ } else if (opt & NGRAM_WEIGHTED) {
+ ns--;
+ test++;
+ if (i == 0 || i == l1 - j)
+ ns--; // side weight
+ }
+ }
+ nscore = nscore + ns;
+ if (ns < 2 && !(opt & NGRAM_WEIGHTED))
+ break;
+ }
+ }
+
+ ns = 0;
+ if (opt & NGRAM_LONGER_WORSE)
+ ns = (l2 - l1) - 2;
+ if (opt & NGRAM_ANY_MISMATCH)
+ ns = abs(l2 - l1) - 2;
+ ns = (nscore - ((ns > 0) ? ns : 0));
+ return ns;
+}
+
+// length of the left common substring of s1 and (decapitalised) s2
+int SuggestMgr::leftcommonsubstring(const char* s1, const char* s2) {
+ if (utf8) {
+ std::vector<w_char> su1;
+ std::vector<w_char> su2;
+ int l1 = u8_u16(su1, s1);
+ int l2 = u8_u16(su2, s2);
+ // decapitalize dictionary word
+ if (complexprefixes) {
+ if (su1[l1 - 1] == su2[l2 - 1])
+ return 1;
+ } else {
+ unsigned short idx = su2.empty() ? 0 : (su2[0].h << 8) + su2[0].l;
+ unsigned short otheridx = su1.empty() ? 0 : (su1[0].h << 8) + su1[0].l;
+ if (otheridx != idx && (otheridx != unicodetolower(idx, langnum)))
+ return 0;
+ int i;
+ for (i = 1; (i < l1) && (i < l2) && (su1[i].l == su2[i].l) &&
+ (su1[i].h == su2[i].h);
+ i++)
+ ;
+ return i;
+ }
+ } else {
+ if (complexprefixes) {
+ int l1 = strlen(s1);
+ int l2 = strlen(s2);
+ if (l1 <= l2 && s2[l1 - 1] == s2[l2 - 1])
+ return 1;
+ } else if (csconv) {
+ const char* olds = s1;
+ // decapitalise dictionary word
+ if ((*s1 != *s2) && (*s1 != csconv[((unsigned char)*s2)].clower))
+ return 0;
+ do {
+ s1++;
+ s2++;
+ } while ((*s1 == *s2) && (*s1 != '\0'));
+ return (int)(s1 - olds);
+ }
+ }
+ return 0;
+}
+
+int SuggestMgr::commoncharacterpositions(const char* s1,
+ const char* s2,
+ int* is_swap) {
+ int num = 0;
+ int diff = 0;
+ int diffpos[2];
+ *is_swap = 0;
+ if (utf8) {
+ std::vector<w_char> su1;
+ std::vector<w_char> su2;
+ int l1 = u8_u16(su1, s1);
+ int l2 = u8_u16(su2, s2);
+
+ if (l1 <= 0 || l2 <= 0)
+ return 0;
+
+ // decapitalize dictionary word
+ if (complexprefixes) {
+ su2[l2 - 1] = lower_utf(su2[l2 - 1], langnum);
+ } else {
+ su2[0] = lower_utf(su2[0], langnum);
+ }
+ for (int i = 0; (i < l1) && (i < l2); i++) {
+ if (su1[i] == su2[i]) {
+ num++;
+ } else {
+ if (diff < 2)
+ diffpos[diff] = i;
+ diff++;
+ }
+ }
+ if ((diff == 2) && (l1 == l2) &&
+ (su1[diffpos[0]] == su2[diffpos[1]]) &&
+ (su1[diffpos[1]] == su2[diffpos[0]]))
+ *is_swap = 1;
+ } else {
+ size_t i;
+ std::string t(s2);
+ // decapitalize dictionary word
+ if (complexprefixes) {
+ size_t l2 = t.size();
+ t[l2 - 1] = csconv[(unsigned char)t[l2 - 1]].clower;
+ } else {
+ mkallsmall(t, csconv);
+ }
+ for (i = 0; (*(s1 + i) != 0) && i < t.size(); i++) {
+ if (*(s1 + i) == t[i]) {
+ num++;
+ } else {
+ if (diff < 2)
+ diffpos[diff] = i;
+ diff++;
+ }
+ }
+ if ((diff == 2) && (*(s1 + i) == 0) && i == t.size() &&
+ (*(s1 + diffpos[0]) == t[diffpos[1]]) &&
+ (*(s1 + diffpos[1]) == t[diffpos[0]]))
+ *is_swap = 1;
+ }
+ return num;
+}
+
+int SuggestMgr::mystrlen(const char* word) {
+ if (utf8) {
+ std::vector<w_char> w;
+ return u8_u16(w, word);
+ } else
+ return strlen(word);
+}
+
+// sort in decreasing order of score
+void SuggestMgr::bubblesort(char** rword, char** rword2, int* rsc, int n) {
+ int m = 1;
+ while (m < n) {
+ int j = m;
+ while (j > 0) {
+ if (rsc[j - 1] < rsc[j]) {
+ int sctmp = rsc[j - 1];
+ char* wdtmp = rword[j - 1];
+ rsc[j - 1] = rsc[j];
+ rword[j - 1] = rword[j];
+ rsc[j] = sctmp;
+ rword[j] = wdtmp;
+ if (rword2) {
+ wdtmp = rword2[j - 1];
+ rword2[j - 1] = rword2[j];
+ rword2[j] = wdtmp;
+ }
+ j--;
+ } else
+ break;
+ }
+ m++;
+ }
+ return;
+}
+
+// longest common subsequence
+void SuggestMgr::lcs(const char* s,
+ const char* s2,
+ int* l1,
+ int* l2,
+ char** result) {
+ int n, m;
+ std::vector<w_char> su;
+ std::vector<w_char> su2;
+ char* b;
+ char* c;
+ int i;
+ int j;
+ if (utf8) {
+ m = u8_u16(su, s);
+ n = u8_u16(su2, s2);
+ } else {
+ m = strlen(s);
+ n = strlen(s2);
+ }
+ c = (char*)malloc((m + 1) * (n + 1));
+ b = (char*)malloc((m + 1) * (n + 1));
+ if (!c || !b) {
+ if (c)
+ free(c);
+ if (b)
+ free(b);
+ *result = NULL;
+ return;
+ }
+ for (i = 1; i <= m; i++)
+ c[i * (n + 1)] = 0;
+ for (j = 0; j <= n; j++)
+ c[j] = 0;
+ for (i = 1; i <= m; i++) {
+ for (j = 1; j <= n; j++) {
+ if (((utf8) && (su[i - 1] == su2[j - 1])) ||
+ ((!utf8) && (s[i - 1] == s2[j - 1]))) {
+ c[i * (n + 1) + j] = c[(i - 1) * (n + 1) + j - 1] + 1;
+ b[i * (n + 1) + j] = LCS_UPLEFT;
+ } else if (c[(i - 1) * (n + 1) + j] >= c[i * (n + 1) + j - 1]) {
+ c[i * (n + 1) + j] = c[(i - 1) * (n + 1) + j];
+ b[i * (n + 1) + j] = LCS_UP;
+ } else {
+ c[i * (n + 1) + j] = c[i * (n + 1) + j - 1];
+ b[i * (n + 1) + j] = LCS_LEFT;
+ }
+ }
+ }
+ *result = b;
+ free(c);
+ *l1 = m;
+ *l2 = n;
+}
+
+int SuggestMgr::lcslen(const char* s, const char* s2) {
+ int m;
+ int n;
+ int i;
+ int j;
+ char* result;
+ int len = 0;
+ lcs(s, s2, &m, &n, &result);
+ if (!result)
+ return 0;
+ i = m;
+ j = n;
+ while ((i != 0) && (j != 0)) {
+ if (result[i * (n + 1) + j] == LCS_UPLEFT) {
+ len++;
+ i--;
+ j--;
+ } else if (result[i * (n + 1) + j] == LCS_UP) {
+ i--;
+ } else
+ j--;
+ }
+ free(result);
+ return len;
+}
+
+int SuggestMgr::lcslen(const std::string& s, const std::string& s2) {
+ return lcslen(s.c_str(), s2.c_str());
+}
diff --git a/extensions/spellcheck/hunspell/src/suggestmgr.hxx b/extensions/spellcheck/hunspell/src/suggestmgr.hxx
new file mode 100644
index 000000000..675d98eb8
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/suggestmgr.hxx
@@ -0,0 +1,198 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Hunspell, based on MySpell.
+ *
+ * The Initial Developers of the Original Code are
+ * Kevin Hendricks (MySpell) and Németh László (Hunspell).
+ * Portions created by the Initial Developers are Copyright (C) 2002-2005
+ * the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _SUGGESTMGR_HXX_
+#define _SUGGESTMGR_HXX_
+
+#define MAX_ROOTS 100
+#define MAX_WORDS 100
+#define MAX_GUESS 200
+#define MAXNGRAMSUGS 4
+#define MAXPHONSUGS 2
+#define MAXCOMPOUNDSUGS 3
+
+// timelimit: max ~1/4 sec (process time on Linux) for a time consuming function
+#define TIMELIMIT (CLOCKS_PER_SEC >> 2)
+#define MINTIMER 100
+#define MAXPLUSTIMER 100
+
+#define NGRAM_LONGER_WORSE (1 << 0)
+#define NGRAM_ANY_MISMATCH (1 << 1)
+#define NGRAM_LOWERING (1 << 2)
+#define NGRAM_WEIGHTED (1 << 3)
+
+#include "hunvisapi.h"
+
+#include "atypes.hxx"
+#include "affixmgr.hxx"
+#include "hashmgr.hxx"
+#include "langnum.hxx"
+#include <time.h>
+
+enum { LCS_UP, LCS_LEFT, LCS_UPLEFT };
+
+class LIBHUNSPELL_DLL_EXPORTED SuggestMgr {
+ private:
+ SuggestMgr(const SuggestMgr&);
+ SuggestMgr& operator=(const SuggestMgr&);
+
+ private:
+ char* ckey;
+ int ckeyl;
+ w_char* ckey_utf;
+
+ char* ctry;
+ int ctryl;
+ w_char* ctry_utf;
+
+ AffixMgr* pAMgr;
+ int maxSug;
+ struct cs_info* csconv;
+ int utf8;
+ int langnum;
+ int nosplitsugs;
+ int maxngramsugs;
+ int maxcpdsugs;
+ int complexprefixes;
+
+ public:
+ SuggestMgr(const char* tryme, int maxn, AffixMgr* aptr);
+ ~SuggestMgr();
+
+ int suggest(char*** slst, const char* word, int nsug, int* onlycmpdsug);
+ int ngsuggest(char** wlst, const char* word, int ns, HashMgr** pHMgr, int md);
+ int suggest_auto(char*** slst, const char* word, int nsug);
+ int suggest_stems(char*** slst, const char* word, int nsug);
+ int suggest_pos_stems(char*** slst, const char* word, int nsug);
+
+ char* suggest_morph(const char* word);
+ char* suggest_gen(char** pl, int pln, const char* pattern);
+ char* suggest_morph_for_spelling_error(const char* word);
+
+ private:
+ int testsug(char** wlst,
+ const char* candidate,
+ int wl,
+ int ns,
+ int cpdsuggest,
+ int* timer,
+ clock_t* timelimit);
+ int checkword(const char*, int, int, int*, clock_t*);
+ int check_forbidden(const char*, int);
+
+ int capchars(char**, const char*, int, int);
+ int replchars(char**, const char*, int, int);
+ int doubletwochars(char**, const char*, int, int);
+ int forgotchar(char**, const char*, int, int);
+ int swapchar(char**, const char*, int, int);
+ int longswapchar(char**, const char*, int, int);
+ int movechar(char**, const char*, int, int);
+ int extrachar(char**, const char*, int, int);
+ int badcharkey(char**, const char*, int, int);
+ int badchar(char**, const char*, int, int);
+ int twowords(char**, const char*, int, int);
+ int fixstems(char**, const char*, int);
+
+ int capchars_utf(char**, const w_char*, int wl, int, int);
+ int doubletwochars_utf(char**, const w_char*, int wl, int, int);
+ int forgotchar_utf(char**, const w_char*, int wl, int, int);
+ int extrachar_utf(char**, const w_char*, int wl, int, int);
+ int badcharkey_utf(char**, const w_char*, int wl, int, int);
+ int badchar_utf(char**, const w_char*, int wl, int, int);
+ int swapchar_utf(char**, const w_char*, int wl, int, int);
+ int longswapchar_utf(char**, const w_char*, int, int, int);
+ int movechar_utf(char**, const w_char*, int, int, int);
+
+ int mapchars(char**, const char*, int, int);
+ int map_related(const char*,
+ std::string&,
+ int,
+ char** wlst,
+ int,
+ int,
+ const mapentry*,
+ int,
+ int*,
+ clock_t*);
+ int ngram(int n, const std::string& s1, const std::string& s2, int opt);
+ int mystrlen(const char* word);
+ int leftcommonsubstring(const char* s1, const char* s2);
+ int commoncharacterpositions(const char* s1, const char* s2, int* is_swap);
+ void bubblesort(char** rwd, char** rwd2, int* rsc, int n);
+ void lcs(const char* s, const char* s2, int* l1, int* l2, char** result);
+ int lcslen(const char* s, const char* s2);
+ int lcslen(const std::string& s, const std::string& s2);
+ char* suggest_hentry_gen(hentry* rv, const char* pattern);
+};
+
+#endif
diff --git a/extensions/spellcheck/hunspell/src/w_char.hxx b/extensions/spellcheck/hunspell/src/w_char.hxx
new file mode 100644
index 000000000..336c454f7
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/w_char.hxx
@@ -0,0 +1,75 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Hunspell, based on MySpell.
+ *
+ * The Initial Developers of the Original Code are
+ * Kevin Hendricks (MySpell) and Németh László (Hunspell).
+ * Portions created by the Initial Developers are Copyright (C) 2002-2005
+ * the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#ifndef __WCHARHXX__
+#define __WCHARHXX__
+
+#ifndef GCC
+struct w_char {
+#else
+struct __attribute__((packed)) w_char {
+#endif
+ unsigned char l;
+ unsigned char h;
+
+ friend bool operator<(const w_char a, const w_char b) {
+ unsigned short a_idx = (a.h << 8) + a.l;
+ unsigned short b_idx = (b.h << 8) + b.l;
+ return a_idx < b_idx;
+ }
+
+ friend bool operator==(const w_char a, const w_char b) {
+ return (((a).l == (b).l) && ((a).h == (b).h));
+ }
+
+ friend bool operator!=(const w_char a, const w_char b) {
+ return !(a == b);;
+ }
+};
+
+// two character arrays
+struct replentry {
+ char* pattern;
+ char* pattern2;
+ bool start;
+ bool end;
+};
+
+#endif
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.aff b/extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.aff
new file mode 100644
index 000000000..0a11404fd
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.aff
@@ -0,0 +1,4 @@
+# capitalized ngram suggestion test data (Unicode version) for
+# Sf.net Bug ID 1463589, reported by Frederik Fouvry.
+SET UTF-8
+MAXNGRAMSUGS 1
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.dic b/extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.dic
new file mode 100644
index 000000000..8cec60603
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.dic
@@ -0,0 +1,2 @@
+1
+Kühlschrank
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.sug b/extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.sug
new file mode 100644
index 000000000..8a72f1e21
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.sug
@@ -0,0 +1,5 @@
+Kühlschrank
+Kühlschrank
+Kühlschrank
+Kühlschrank
+Kühlschrank
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.test b/extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.test
new file mode 100644
index 000000000..cde7c5410
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i utf-8
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.wrong b/extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.wrong
new file mode 100644
index 000000000..9de6c63cd
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.wrong
@@ -0,0 +1,5 @@
+kuhlschrank
+kuehlschrank
+kühlschrank
+Kuhlschrank
+Kuehlschrank
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1463589.aff b/extensions/spellcheck/hunspell/tests/unit/data/1463589.aff
new file mode 100644
index 000000000..8ecf4594e
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/1463589.aff
@@ -0,0 +1,3 @@
+# capitalized ngram suggestion test data for
+# Sf.net Bug ID 1463589, reported by Frederik Fouvry.
+MAXNGRAMSUGS 1
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1463589.dic b/extensions/spellcheck/hunspell/tests/unit/data/1463589.dic
new file mode 100644
index 000000000..a3caab802
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/1463589.dic
@@ -0,0 +1,2 @@
+1
+Khlschrank
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1463589.sug b/extensions/spellcheck/hunspell/tests/unit/data/1463589.sug
new file mode 100644
index 000000000..2961eddd2
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/1463589.sug
@@ -0,0 +1,5 @@
+Khlschrank
+Khlschrank
+Khlschrank
+Khlschrank
+Khlschrank
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1463589.test b/extensions/spellcheck/hunspell/tests/unit/data/1463589.test
new file mode 100644
index 000000000..dc295077f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/1463589.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i ISO8859-1
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1463589.wrong b/extensions/spellcheck/hunspell/tests/unit/data/1463589.wrong
new file mode 100644
index 000000000..0f3f48969
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/1463589.wrong
@@ -0,0 +1,5 @@
+kuhlschrank
+kuehlschrank
+khlschrank
+Kuhlschrank
+Kuehlschrank
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1592880.aff b/extensions/spellcheck/hunspell/tests/unit/data/1592880.aff
new file mode 100644
index 000000000..0aa064e37
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/1592880.aff
@@ -0,0 +1,20 @@
+# fix homonym handling for German dictionary project,
+# reported by Björn Jacke (sf.net Bug ID 1592880).
+SET ISO8859-1
+
+SFX N Y 1
+SFX N 0 n .
+
+SFX S Y 1
+SFX S 0 s .
+
+SFX P Y 1
+SFX P 0 en .
+
+SFX Q Y 2
+SFX Q 0 e .
+SFX Q 0 en .
+
+COMPOUNDEND z
+COMPOUNDPERMITFLAG c
+ONLYINCOMPOUND o
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1592880.dic b/extensions/spellcheck/hunspell/tests/unit/data/1592880.dic
new file mode 100644
index 000000000..8b0fef814
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/1592880.dic
@@ -0,0 +1,4 @@
+3
+weg/Qoz
+weg/P
+wege
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1592880.good b/extensions/spellcheck/hunspell/tests/unit/data/1592880.good
new file mode 100644
index 000000000..aa00a58b1
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/1592880.good
@@ -0,0 +1,3 @@
+weg
+wege
+wegen
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1592880.test b/extensions/spellcheck/hunspell/tests/unit/data/1592880.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/1592880.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1695964.aff b/extensions/spellcheck/hunspell/tests/unit/data/1695964.aff
new file mode 100644
index 000000000..359a25f3a
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/1695964.aff
@@ -0,0 +1,10 @@
+# fix NEEDAFFIX homonym suggestion.
+# Sf.net Bug ID 1695964, reported by Björn Jacke.
+TRY esianrtolcdugmphbyfvkwESIANRTOLCDUGMPHBYFVKW
+MAXNGRAMSUGS 0
+NEEDAFFIX h
+SFX S Y 1
+SFX S 0 s .
+
+SFX e Y 1
+SFX e 0 e .
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1695964.dic b/extensions/spellcheck/hunspell/tests/unit/data/1695964.dic
new file mode 100644
index 000000000..ff6d110cc
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/1695964.dic
@@ -0,0 +1,3 @@
+2
+Mull/he
+Mull/S
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1695964.sug b/extensions/spellcheck/hunspell/tests/unit/data/1695964.sug
new file mode 100644
index 000000000..35aedff7c
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/1695964.sug
@@ -0,0 +1,3 @@
+Mull
+Mulle
+Mulls
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1695964.test b/extensions/spellcheck/hunspell/tests/unit/data/1695964.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/1695964.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1695964.wrong b/extensions/spellcheck/hunspell/tests/unit/data/1695964.wrong
new file mode 100644
index 000000000..fd13dc8ca
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/1695964.wrong
@@ -0,0 +1,3 @@
+Mall
+Malle
+Malls
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1706659.aff b/extensions/spellcheck/hunspell/tests/unit/data/1706659.aff
new file mode 100644
index 000000000..66a676efa
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/1706659.aff
@@ -0,0 +1,13 @@
+# test COMPOUNDRULE bug reported by Björn Jacke
+SET ISO8859-1
+TRY esijanrtolcdugmphbyfvkwqxz
+
+SFX A Y 5
+SFX A 0 e .
+SFX A 0 er .
+SFX A 0 en .
+SFX A 0 em .
+SFX A 0 es .
+
+COMPOUNDRULE 1
+COMPOUNDRULE vw
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1706659.dic b/extensions/spellcheck/hunspell/tests/unit/data/1706659.dic
new file mode 100644
index 000000000..32d461f7a
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/1706659.dic
@@ -0,0 +1,4 @@
+3
+arbeits/v
+scheu/Aw
+farbig/A
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1706659.test b/extensions/spellcheck/hunspell/tests/unit/data/1706659.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/1706659.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1706659.wrong b/extensions/spellcheck/hunspell/tests/unit/data/1706659.wrong
new file mode 100644
index 000000000..799dd3111
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/1706659.wrong
@@ -0,0 +1,3 @@
+arbeitsfarbig
+arbeitsfarbige
+arbeitsfarbiger
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1975530.aff b/extensions/spellcheck/hunspell/tests/unit/data/1975530.aff
new file mode 100644
index 000000000..0912050d1
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/1975530.aff
@@ -0,0 +1,6 @@
+SET UTF-8
+IGNORE ٌٍَُِّْـ
+
+PFX x N 1
+PFX x أ ت أ[^ي]
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1975530.dic b/extensions/spellcheck/hunspell/tests/unit/data/1975530.dic
new file mode 100644
index 000000000..b1b455df5
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/1975530.dic
@@ -0,0 +1,3 @@
+2
+أرى/x
+أيار/x
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1975530.good b/extensions/spellcheck/hunspell/tests/unit/data/1975530.good
new file mode 100644
index 000000000..89212a57e
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/1975530.good
@@ -0,0 +1,3 @@
+أرى
+أيار
+ترى
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1975530.test b/extensions/spellcheck/hunspell/tests/unit/data/1975530.test
new file mode 100644
index 000000000..4d59c4212
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/1975530.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i UTF-8
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1975530.wrong b/extensions/spellcheck/hunspell/tests/unit/data/1975530.wrong
new file mode 100644
index 000000000..24cb57627
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/1975530.wrong
@@ -0,0 +1 @@
+تيار
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/2970240.aff b/extensions/spellcheck/hunspell/tests/unit/data/2970240.aff
new file mode 100644
index 000000000..6ef95161d
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/2970240.aff
@@ -0,0 +1,5 @@
+# test words with three parts
+CHECKCOMPOUNDPATTERN 1
+CHECKCOMPOUNDPATTERN le fi
+COMPOUNDFLAG c
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/2970240.dic b/extensions/spellcheck/hunspell/tests/unit/data/2970240.dic
new file mode 100644
index 000000000..f0b630569
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/2970240.dic
@@ -0,0 +1,4 @@
+3
+first/c
+middle/c
+last/c
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/2970240.good b/extensions/spellcheck/hunspell/tests/unit/data/2970240.good
new file mode 100644
index 000000000..a8d3a593b
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/2970240.good
@@ -0,0 +1 @@
+firstmiddlelast
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/2970240.test b/extensions/spellcheck/hunspell/tests/unit/data/2970240.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/2970240.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/2970240.wrong b/extensions/spellcheck/hunspell/tests/unit/data/2970240.wrong
new file mode 100644
index 000000000..32cead611
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/2970240.wrong
@@ -0,0 +1 @@
+lastmiddlefirst
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/2970242.aff b/extensions/spellcheck/hunspell/tests/unit/data/2970242.aff
new file mode 100644
index 000000000..909f0fbc3
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/2970242.aff
@@ -0,0 +1,4 @@
+CHECKCOMPOUNDPATTERN 1
+CHECKCOMPOUNDPATTERN /a /b
+COMPOUNDFLAG c
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/2970242.dic b/extensions/spellcheck/hunspell/tests/unit/data/2970242.dic
new file mode 100644
index 000000000..da0d05f92
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/2970242.dic
@@ -0,0 +1,4 @@
+3
+foo/ac
+bar/c
+baz/bc
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/2970242.good b/extensions/spellcheck/hunspell/tests/unit/data/2970242.good
new file mode 100644
index 000000000..90ecb182f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/2970242.good
@@ -0,0 +1,5 @@
+foobar
+barfoo
+bazfoo
+barbaz
+bazbar
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/2970242.test b/extensions/spellcheck/hunspell/tests/unit/data/2970242.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/2970242.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/2970242.wrong b/extensions/spellcheck/hunspell/tests/unit/data/2970242.wrong
new file mode 100644
index 000000000..9dabfec91
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/2970242.wrong
@@ -0,0 +1 @@
+foobaz
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/2999225.aff b/extensions/spellcheck/hunspell/tests/unit/data/2999225.aff
new file mode 100644
index 000000000..ea9d0b07b
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/2999225.aff
@@ -0,0 +1,6 @@
+COMPOUNDRULE 1
+COMPOUNDRULE ab
+
+COMPOUNDBEGIN A
+COMPOUNDEND B
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/2999225.dic b/extensions/spellcheck/hunspell/tests/unit/data/2999225.dic
new file mode 100644
index 000000000..249860362
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/2999225.dic
@@ -0,0 +1,4 @@
+3
+foo/aA
+bar/b
+baz/B
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/2999225.good b/extensions/spellcheck/hunspell/tests/unit/data/2999225.good
new file mode 100644
index 000000000..865e15452
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/2999225.good
@@ -0,0 +1,2 @@
+foobar
+foobaz
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/2999225.test b/extensions/spellcheck/hunspell/tests/unit/data/2999225.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/2999225.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/IJ.aff b/extensions/spellcheck/hunspell/tests/unit/data/IJ.aff
new file mode 100644
index 000000000..c817c2e91
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/IJ.aff
@@ -0,0 +1,8 @@
+# check bad capitalisation of Dutch letter IJ.
+TRY i
+FORBIDDENWORD *
+PFX i N 1
+PFX i ij IJ ij
+
+REP 1
+REP ij IJ
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/IJ.dic b/extensions/spellcheck/hunspell/tests/unit/data/IJ.dic
new file mode 100644
index 000000000..ecaf91d21
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/IJ.dic
@@ -0,0 +1,3 @@
+1
+ijs/i
+Ijs/*
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/IJ.good b/extensions/spellcheck/hunspell/tests/unit/data/IJ.good
new file mode 100644
index 000000000..5f888f057
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/IJ.good
@@ -0,0 +1,2 @@
+ijs
+IJs
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/IJ.sug b/extensions/spellcheck/hunspell/tests/unit/data/IJ.sug
new file mode 100644
index 000000000..582b7956b
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/IJ.sug
@@ -0,0 +1 @@
+IJs, ijs
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/IJ.test b/extensions/spellcheck/hunspell/tests/unit/data/IJ.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/IJ.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/IJ.wrong b/extensions/spellcheck/hunspell/tests/unit/data/IJ.wrong
new file mode 100644
index 000000000..54bbb475a
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/IJ.wrong
@@ -0,0 +1 @@
+Ijs
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/Makefile.am b/extensions/spellcheck/hunspell/tests/unit/data/Makefile.am
new file mode 100644
index 000000000..8018ccf7b
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/Makefile.am
@@ -0,0 +1,693 @@
+## Process this file with automake to create Makefile.in
+
+SUBDIRS = suggestiontest
+
+XFAIL_TESTS = @XFAILED@
+
+TESTS = \
+affixes.test \
+condition.test \
+condition_utf.test \
+base.test \
+base_utf.test \
+allcaps.test \
+allcaps_utf.test \
+allcaps2.test \
+allcaps3.test \
+keepcase.test \
+i58202.test \
+map.test \
+rep.test \
+sug.test \
+sugutf.test \
+phone.test \
+flag.test \
+flaglong.test \
+flagnum.test \
+flagutf8.test \
+slash.test \
+forbiddenword.test \
+nosuggest.test \
+alias.test \
+alias2.test \
+alias3.test \
+breakdefault.test \
+break.test \
+needaffix.test \
+needaffix2.test \
+needaffix3.test \
+needaffix4.test \
+needaffix5.test \
+circumfix.test \
+fogemorpheme.test \
+onlyincompound.test \
+complexprefixes.test \
+complexprefixes2.test \
+complexprefixesutf.test \
+conditionalprefix.test \
+zeroaffix.test \
+utf8.test \
+utf8_bom.test \
+utf8_bom2.test \
+utf8_nonbmp.test \
+compoundflag.test \
+compoundrule.test \
+compoundrule2.test \
+compoundrule3.test \
+compoundrule4.test \
+compoundrule5.test \
+compoundrule6.test \
+compoundrule7.test \
+compoundrule8.test \
+compoundaffix.test \
+compoundaffix2.test \
+compoundaffix3.test \
+checkcompounddup.test \
+checkcompoundtriple.test \
+simplifiedtriple.test \
+checkcompoundrep.test \
+checkcompoundcase2.test \
+checkcompoundcaseutf.test \
+checkcompoundpattern.test \
+checkcompoundpattern2.test \
+checkcompoundpattern3.test \
+checkcompoundpattern4.test \
+utfcompound.test \
+checksharps.test \
+checksharpsutf.test \
+germancompounding.test \
+germancompoundingold.test \
+i35725.test \
+i53643.test \
+i54633.test \
+i54980.test \
+maputf.test \
+reputf.test \
+ignore.test \
+ignoreutf.test \
+1592880.test \
+1695964.test \
+1463589.test \
+1463589_utf.test \
+IJ.test \
+i68568.test \
+i68568utf.test \
+1706659.test \
+digits_in_words.test \
+colons_in_words.test \
+ngram_utf_fix.test \
+morph.test \
+1975530.test \
+fullstrip.test \
+iconv.test \
+oconv.test \
+encoding.test \
+korean.test \
+opentaal_forbiddenword1.test \
+opentaal_forbiddenword2.test \
+opentaal_keepcase.test \
+arabic.test \
+2970240.test \
+2970242.test \
+breakoff.test \
+opentaal_cpdpat.test \
+opentaal_cpdpat2.test \
+2999225.test \
+onlyincompound2.test \
+forceucase.test \
+warn.test
+
+# infixes.test
+
+distclean-local:
+ -rm -rf testSubDir
+
+EXTRA_DIST = \
+test.sh \
+affixes.aff \
+affixes.dic \
+affixes.good \
+affixes.test \
+condition.aff \
+condition.dic \
+condition.good \
+condition.test \
+condition.wrong \
+condition_utf.aff \
+condition_utf.dic \
+condition_utf.good \
+condition_utf.test \
+condition_utf.wrong \
+base.aff \
+base.dic \
+base.good \
+base.sug \
+base.test \
+base.wrong \
+base_utf.aff \
+base_utf.dic \
+base_utf.good \
+base_utf.sug \
+base_utf.test \
+base_utf.wrong \
+allcaps.aff \
+allcaps.dic \
+allcaps.good \
+allcaps.sug \
+allcaps.test \
+allcaps.wrong \
+allcaps2.aff \
+allcaps2.dic \
+allcaps2.good \
+allcaps2.sug \
+allcaps2.test \
+allcaps2.wrong \
+allcaps3.aff \
+allcaps3.dic \
+allcaps3.good \
+allcaps3.test \
+allcaps3.wrong \
+allcaps_utf.aff \
+allcaps_utf.dic \
+allcaps_utf.good \
+allcaps_utf.sug \
+allcaps_utf.test \
+allcaps_utf.wrong \
+keepcase.aff \
+keepcase.dic \
+keepcase.good \
+keepcase.sug \
+keepcase.test \
+keepcase.wrong \
+map.aff \
+map.dic \
+map.sug \
+map.test \
+map.wrong \
+rep.aff \
+rep.dic \
+rep.sug \
+rep.test \
+rep.wrong \
+sug.aff \
+sug.dic \
+sug.sug \
+sug.test \
+sug.wrong \
+sugutf.aff \
+sugutf.dic \
+sugutf.sug \
+sugutf.test \
+sugutf.wrong \
+phone.aff \
+phone.dic \
+phone.sug \
+phone.test \
+phone.wrong \
+alias.aff \
+alias.dic \
+alias.good \
+alias.test \
+alias2.aff \
+alias2.dic \
+alias2.good \
+alias2.morph \
+alias2.test \
+alias3.aff \
+alias3.dic \
+alias3.good \
+alias3.morph \
+alias3.test \
+break.aff \
+break.dic \
+break.good \
+break.test \
+break.wrong \
+breakdefault.aff \
+breakdefault.dic \
+breakdefault.good \
+breakdefault.sug \
+breakdefault.test \
+breakdefault.wrong \
+circumfix.aff \
+circumfix.dic \
+circumfix.good \
+circumfix.morph \
+circumfix.test \
+circumfix.wrong \
+fogemorpheme.aff \
+fogemorpheme.dic \
+fogemorpheme.good \
+fogemorpheme.test \
+fogemorpheme.wrong \
+onlyincompound.aff \
+onlyincompound.dic \
+onlyincompound.good \
+onlyincompound.sug \
+onlyincompound.test \
+onlyincompound.wrong \
+forbiddenword.aff \
+forbiddenword.dic \
+forbiddenword.good \
+forbiddenword.test \
+forbiddenword.wrong \
+nosuggest.aff \
+nosuggest.dic \
+nosuggest.good \
+nosuggest.sug \
+nosuggest.test \
+nosuggest.wrong \
+germancompounding.aff \
+germancompounding.dic \
+germancompounding.good \
+germancompounding.test \
+germancompounding.wrong \
+germancompoundingold.aff \
+germancompoundingold.dic \
+germancompoundingold.good \
+germancompoundingold.test \
+germancompoundingold.wrong \
+needaffix2.aff \
+needaffix2.dic \
+needaffix2.good \
+needaffix2.morph \
+needaffix2.test \
+needaffix3.aff \
+needaffix3.dic \
+needaffix3.good \
+needaffix3.test \
+needaffix3.wrong \
+needaffix4.aff \
+needaffix4.dic \
+needaffix4.good \
+needaffix4.test \
+needaffix5.aff \
+needaffix5.dic \
+needaffix5.good \
+needaffix5.test \
+needaffix5.wrong \
+needaffix.aff \
+needaffix.dic \
+needaffix.good \
+needaffix.test \
+needaffix.wrong \
+zeroaffix.aff \
+zeroaffix.dic \
+zeroaffix.good \
+zeroaffix.morph \
+zeroaffix.test \
+utf8.aff \
+utf8.dic \
+utf8.good \
+utf8.test \
+utf8_bom.aff \
+utf8_bom.dic \
+utf8_bom.good \
+utf8_bom.test \
+utf8_bom2.aff \
+utf8_bom2.dic \
+utf8_bom2.good \
+utf8_bom2.test \
+utf8_nonbmp.aff \
+utf8_nonbmp.dic \
+utf8_nonbmp.good \
+utf8_nonbmp.sug \
+utf8_nonbmp.test \
+utf8_nonbmp.wrong \
+utfcompound.aff \
+utfcompound.dic \
+utfcompound.good \
+utfcompound.test \
+utfcompound.wrong \
+compoundflag.aff \
+compoundflag.dic \
+compoundflag.good \
+compoundflag.test \
+compoundflag.wrong \
+compoundrule.aff \
+compoundrule.dic \
+compoundrule.good \
+compoundrule.test \
+compoundrule.wrong \
+compoundrule2.aff \
+compoundrule2.dic \
+compoundrule2.good \
+compoundrule2.test \
+compoundrule2.wrong \
+compoundrule3.aff \
+compoundrule3.dic \
+compoundrule3.good \
+compoundrule3.test \
+compoundrule3.wrong \
+compoundrule4.aff \
+compoundrule4.dic \
+compoundrule4.good \
+compoundrule4.test \
+compoundrule4.wrong \
+compoundrule5.aff \
+compoundrule5.dic \
+compoundrule5.good \
+compoundrule5.morph \
+compoundrule5.test \
+compoundrule5.wrong \
+compoundrule6.aff \
+compoundrule6.dic \
+compoundrule6.good \
+compoundrule6.test \
+compoundrule6.wrong \
+compoundrule7.aff \
+compoundrule7.dic \
+compoundrule7.good \
+compoundrule7.test \
+compoundrule7.wrong \
+compoundrule8.aff \
+compoundrule8.dic \
+compoundrule8.good \
+compoundrule8.test \
+compoundrule8.wrong \
+compoundaffix.aff \
+compoundaffix.dic \
+compoundaffix.good \
+compoundaffix.test \
+compoundaffix.wrong \
+compoundaffix2.aff \
+compoundaffix2.dic \
+compoundaffix2.good \
+compoundaffix2.test \
+compoundaffix3.aff \
+compoundaffix3.dic \
+compoundaffix3.good \
+compoundaffix3.test \
+compoundaffix3.wrong \
+checkcompounddup.aff \
+checkcompounddup.dic \
+checkcompounddup.good \
+checkcompounddup.test \
+checkcompounddup.wrong \
+checkcompoundcase.aff \
+checkcompoundcase.dic \
+checkcompoundcase.good \
+checkcompoundcase.test \
+checkcompoundcase.wrong \
+checkcompoundcase2.aff \
+checkcompoundcase2.dic \
+checkcompoundcase2.good \
+checkcompoundcase2.test \
+checkcompoundcase2.wrong \
+checkcompoundcaseutf.aff \
+checkcompoundcaseutf.dic \
+checkcompoundcaseutf.good \
+checkcompoundcaseutf.test \
+checkcompoundcaseutf.wrong \
+checkcompoundrep.aff \
+checkcompoundrep.dic \
+checkcompoundrep.good \
+checkcompoundrep.test \
+checkcompoundrep.wrong \
+checkcompoundtriple.aff \
+checkcompoundtriple.dic \
+checkcompoundtriple.good \
+checkcompoundtriple.test \
+checkcompoundtriple.wrong \
+simplifiedtriple.aff \
+simplifiedtriple.dic \
+simplifiedtriple.good \
+simplifiedtriple.test \
+simplifiedtriple.wrong \
+checkcompoundpattern.aff \
+checkcompoundpattern.dic \
+checkcompoundpattern.good \
+checkcompoundpattern.test \
+checkcompoundpattern.wrong \
+checkcompoundpattern2.aff \
+checkcompoundpattern2.dic \
+checkcompoundpattern2.good \
+checkcompoundpattern2.test \
+checkcompoundpattern2.wrong \
+checkcompoundpattern3.aff \
+checkcompoundpattern3.dic \
+checkcompoundpattern3.good \
+checkcompoundpattern3.test \
+checkcompoundpattern3.wrong \
+checkcompoundpattern4.aff \
+checkcompoundpattern4.dic \
+checkcompoundpattern4.good \
+checkcompoundpattern4.test \
+checkcompoundpattern4.wrong \
+checksharps.aff \
+checksharps.dic \
+checksharps.good \
+checksharps.sug \
+checksharps.test \
+checksharps.wrong \
+checksharpsutf.aff \
+checksharpsutf.dic \
+checksharpsutf.good \
+checksharpsutf.sug \
+checksharpsutf.test \
+checksharpsutf.wrong \
+conditionalprefix.aff \
+conditionalprefix.dic \
+conditionalprefix.good \
+conditionalprefix.morph \
+conditionalprefix.test \
+conditionalprefix.wrong \
+flaglong.aff \
+flaglong.dic \
+flaglong.good \
+flaglong.test \
+flagnum.aff \
+flagnum.dic \
+flagnum.good \
+flagnum.test \
+flag.aff \
+flag.dic \
+flag.good \
+flag.test \
+flagutf8.aff \
+flagutf8.dic \
+flagutf8.good \
+flagutf8.test \
+complexprefixes.aff \
+complexprefixes.dic \
+complexprefixes.good \
+complexprefixes.wrong \
+complexprefixes.test \
+complexprefixes2.aff \
+complexprefixes2.dic \
+complexprefixes2.good \
+complexprefixes2.test \
+complexprefixesutf.aff \
+complexprefixesutf.dic \
+complexprefixesutf.good \
+complexprefixesutf.wrong \
+complexprefixesutf.test \
+i35725.aff \
+i35725.dic \
+i35725.good \
+i35725.sug \
+i35725.test \
+i35725.wrong \
+i53643.aff \
+i53643.dic \
+i53643.good \
+i53643.test \
+i53643.wrong \
+i54633.aff \
+i54633.dic \
+i54633.good \
+i54633.sug \
+i54633.test \
+i54633.wrong \
+i54980.aff \
+i54980.dic \
+i54980.good \
+i54980.test \
+i58202.aff \
+i58202.dic \
+i58202.good \
+i58202.sug \
+i58202.test \
+i58202.wrong \
+maputf.aff \
+maputf.dic \
+maputf.sug \
+maputf.wrong \
+maputf.test \
+reputf.aff \
+reputf.dic \
+reputf.sug \
+reputf.wrong \
+reputf.test \
+slash.aff \
+slash.dic \
+slash.good \
+slash.test \
+ignore.aff \
+ignore.dic \
+ignore.good \
+ignore.test \
+ignoreutf.aff \
+ignoreutf.dic \
+ignoreutf.good \
+ignoreutf.test \
+1592880.aff \
+1592880.dic \
+1592880.good \
+1592880.test \
+1695964.aff \
+1695964.dic \
+1695964.sug \
+1695964.test \
+1695964.wrong \
+1463589.aff \
+1463589.dic \
+1463589.sug \
+1463589.test \
+1463589.wrong \
+1463589_utf.aff \
+1463589_utf.dic \
+1463589_utf.sug \
+1463589_utf.test \
+1463589_utf.wrong \
+IJ.aff \
+IJ.dic \
+IJ.good \
+IJ.sug \
+IJ.test \
+IJ.wrong \
+i68568.aff \
+i68568.dic \
+i68568.test \
+i68568.wrong \
+i68568utf.aff \
+i68568utf.dic \
+i68568utf.test \
+i68568utf.wrong \
+1706659.aff \
+1706659.dic \
+1706659.test \
+1706659.wrong \
+digits_in_words.aff \
+digits_in_words.dic \
+digits_in_words.test \
+digits_in_words.wrong \
+colons_in_words.aff \
+colons_in_words.dic \
+colons_in_words.test \
+ngram_utf_fix.aff \
+ngram_utf_fix.dic \
+ngram_utf_fix.good \
+ngram_utf_fix.sug \
+ngram_utf_fix.test \
+ngram_utf_fix.wrong \
+morph.aff \
+morph.dic \
+morph.good \
+morph.morph \
+morph.test \
+1975530.aff \
+1975530.dic \
+1975530.good \
+1975530.test \
+1975530.wrong \
+fullstrip.aff \
+fullstrip.dic \
+fullstrip.good \
+fullstrip.test \
+iconv.aff \
+iconv.dic \
+iconv.good \
+iconv.test \
+oconv.aff \
+oconv.dic \
+oconv.good \
+oconv.sug \
+oconv.test \
+oconv.wrong \
+encoding.aff \
+encoding.dic \
+encoding.good \
+encoding.test \
+opentaal_forbiddenword1.aff \
+opentaal_forbiddenword1.dic \
+opentaal_forbiddenword1.good \
+opentaal_forbiddenword1.sug \
+opentaal_forbiddenword1.test \
+opentaal_forbiddenword1.wrong \
+opentaal_forbiddenword2.aff \
+opentaal_forbiddenword2.dic \
+opentaal_forbiddenword2.good \
+opentaal_forbiddenword2.sug \
+opentaal_forbiddenword2.test \
+opentaal_forbiddenword2.wrong \
+opentaal_forbiddenword2.aff \
+opentaal_forbiddenword2.dic \
+opentaal_forbiddenword2.good \
+opentaal_forbiddenword2.sug \
+opentaal_forbiddenword2.test \
+opentaal_forbiddenword2.wrong \
+opentaal_keepcase.aff \
+opentaal_keepcase.dic \
+opentaal_keepcase.good \
+opentaal_keepcase.sug \
+opentaal_keepcase.test \
+opentaal_keepcase.wrong \
+arabic.aff \
+arabic.dic \
+arabic.wrong \
+arabic.test \
+2970240.aff \
+2970240.dic \
+2970240.good \
+2970240.wrong \
+2970240.test \
+2970242.aff \
+2970242.dic \
+2970242.good \
+2970242.wrong \
+2970242.test \
+breakoff.aff \
+breakoff.dic \
+breakoff.good \
+breakoff.wrong \
+breakoff.test \
+opentaal_cpdpat.aff \
+opentaal_cpdpat.dic \
+opentaal_cpdpat.good \
+opentaal_cpdpat.wrong \
+opentaal_cpdpat.test \
+opentaal_cpdpat2.aff \
+opentaal_cpdpat2.dic \
+opentaal_cpdpat2.good \
+opentaal_cpdpat2.wrong \
+opentaal_cpdpat2.test \
+2999225.aff \
+2999225.dic \
+2999225.good \
+2999225.test \
+korean.aff \
+korean.dic \
+korean.good \
+korean.wrong \
+korean.test \
+onlyincompound2.aff \
+onlyincompound2.dic \
+onlyincompound2.good \
+onlyincompound2.test \
+onlyincompound2.wrong \
+forceucase.aff \
+forceucase.dic \
+forceucase.good \
+forceucase.sug \
+forceucase.wrong \
+forceucase.test \
+warn.aff \
+warn.dic \
+warn.good \
+warn.test
+
+# infixes.aff
+# infixes.dic
+# infixes.good
+# infixes.test
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/Makefile.in b/extensions/spellcheck/hunspell/tests/unit/data/Makefile.in
new file mode 100644
index 000000000..a27e04875
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/Makefile.in
@@ -0,0 +1,1416 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+subdir = tests
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/codeset.m4 \
+ $(top_srcdir)/m4/gettext.m4 $(top_srcdir)/m4/glibc2.m4 \
+ $(top_srcdir)/m4/glibc21.m4 $(top_srcdir)/m4/iconv.m4 \
+ $(top_srcdir)/m4/intdiv0.m4 $(top_srcdir)/m4/intl.m4 \
+ $(top_srcdir)/m4/intlmacosx.m4 $(top_srcdir)/m4/intmax.m4 \
+ $(top_srcdir)/m4/inttypes-pri.m4 \
+ $(top_srcdir)/m4/inttypes_h.m4 $(top_srcdir)/m4/lcmessage.m4 \
+ $(top_srcdir)/m4/lib-ld.m4 $(top_srcdir)/m4/lib-link.m4 \
+ $(top_srcdir)/m4/lib-prefix.m4 $(top_srcdir)/m4/libtool.m4 \
+ $(top_srcdir)/m4/lock.m4 $(top_srcdir)/m4/longlong.m4 \
+ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
+ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
+ $(top_srcdir)/m4/nls.m4 $(top_srcdir)/m4/po.m4 \
+ $(top_srcdir)/m4/printf-posix.m4 $(top_srcdir)/m4/progtest.m4 \
+ $(top_srcdir)/m4/size_max.m4 $(top_srcdir)/m4/stdint_h.m4 \
+ $(top_srcdir)/m4/uintmax_t.m4 $(top_srcdir)/m4/visibility.m4 \
+ $(top_srcdir)/m4/wchar_t.m4 $(top_srcdir)/m4/wint_t.m4 \
+ $(top_srcdir)/m4/xsize.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+SOURCES =
+DIST_SOURCES =
+RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
+ html-recursive info-recursive install-data-recursive \
+ install-dvi-recursive install-exec-recursive \
+ install-html-recursive install-info-recursive \
+ install-pdf-recursive install-ps-recursive install-recursive \
+ installcheck-recursive installdirs-recursive pdf-recursive \
+ ps-recursive uninstall-recursive
+RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
+ distclean-recursive maintainer-clean-recursive
+AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
+ $(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \
+ distdir
+ETAGS = etags
+CTAGS = ctags
+am__tty_colors = \
+red=; grn=; lgn=; blu=; std=
+DIST_SUBDIRS = $(SUBDIRS)
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+am__relativize = \
+ dir0=`pwd`; \
+ sed_first='s,^\([^/]*\)/.*$$,\1,'; \
+ sed_rest='s,^[^/]*/*,,'; \
+ sed_last='s,^.*/\([^/]*\)$$,\1,'; \
+ sed_butlast='s,/*[^/]*$$,,'; \
+ while test -n "$$dir1"; do \
+ first=`echo "$$dir1" | sed -e "$$sed_first"`; \
+ if test "$$first" != "."; then \
+ if test "$$first" = ".."; then \
+ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
+ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
+ else \
+ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
+ if test "$$first2" = "$$first"; then \
+ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
+ else \
+ dir2="../$$dir2"; \
+ fi; \
+ dir0="$$dir0"/"$$first"; \
+ fi; \
+ fi; \
+ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
+ done; \
+ reldir="$$dir2"
+ACLOCAL = @ACLOCAL@
+ALLOCA = @ALLOCA@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BUILD_INCLUDED_LIBINTL = @BUILD_INCLUDED_LIBINTL@
+CATOBJEXT = @CATOBJEXT@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CFLAG_VISIBILITY = @CFLAG_VISIBILITY@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CURSESLIB = @CURSESLIB@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DATADIRNAME = @DATADIRNAME@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GENCAT = @GENCAT@
+GETTEXT_MACRO_VERSION = @GETTEXT_MACRO_VERSION@
+GLIBC2 = @GLIBC2@
+GLIBC21 = @GLIBC21@
+GMSGFMT = @GMSGFMT@
+GMSGFMT_015 = @GMSGFMT_015@
+GREP = @GREP@
+HAVE_ASPRINTF = @HAVE_ASPRINTF@
+HAVE_POSIX_PRINTF = @HAVE_POSIX_PRINTF@
+HAVE_SNPRINTF = @HAVE_SNPRINTF@
+HAVE_VISIBILITY = @HAVE_VISIBILITY@
+HAVE_WPRINTF = @HAVE_WPRINTF@
+HUNSPELL_VERSION_MAJOR = @HUNSPELL_VERSION_MAJOR@
+HUNSPELL_VERSION_MINOR = @HUNSPELL_VERSION_MINOR@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+INSTOBJEXT = @INSTOBJEXT@
+INTLBISON = @INTLBISON@
+INTLLIBS = @INTLLIBS@
+INTLOBJS = @INTLOBJS@
+INTL_LIBTOOL_SUFFIX_PREFIX = @INTL_LIBTOOL_SUFFIX_PREFIX@
+INTL_MACOSX_LIBS = @INTL_MACOSX_LIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBICONV = @LIBICONV@
+LIBINTL = @LIBINTL@
+LIBMULTITHREAD = @LIBMULTITHREAD@
+LIBOBJS = @LIBOBJS@
+LIBPTH = @LIBPTH@
+LIBPTH_PREFIX = @LIBPTH_PREFIX@
+LIBS = @LIBS@
+LIBTHREAD = @LIBTHREAD@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBC = @LTLIBC@
+LTLIBICONV = @LTLIBICONV@
+LTLIBINTL = @LTLIBINTL@
+LTLIBMULTITHREAD = @LTLIBMULTITHREAD@
+LTLIBOBJS = @LTLIBOBJS@
+LTLIBPTH = @LTLIBPTH@
+LTLIBTHREAD = @LTLIBTHREAD@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+MSGFMT = @MSGFMT@
+MSGFMT_015 = @MSGFMT_015@
+MSGMERGE = @MSGMERGE@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+POSUB = @POSUB@
+PRI_MACROS_BROKEN = @PRI_MACROS_BROKEN@
+RANLIB = @RANLIB@
+READLINELIB = @READLINELIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+USE_INCLUDED_LIBINTL = @USE_INCLUDED_LIBINTL@
+USE_NLS = @USE_NLS@
+VERSION = @VERSION@
+WINDRES = @WINDRES@
+WOE32 = @WOE32@
+WOE32DLL = @WOE32DLL@
+XFAILED = @XFAILED@
+XGETTEXT = @XGETTEXT@
+XGETTEXT_015 = @XGETTEXT_015@
+XGETTEXT_EXTRA_OPTIONS = @XGETTEXT_EXTRA_OPTIONS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+lt_ECHO = @lt_ECHO@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+SUBDIRS = suggestiontest
+XFAIL_TESTS = @XFAILED@
+TESTS = \
+affixes.test \
+condition.test \
+condition_utf.test \
+base.test \
+base_utf.test \
+allcaps.test \
+allcaps_utf.test \
+allcaps2.test \
+allcaps3.test \
+keepcase.test \
+i58202.test \
+map.test \
+rep.test \
+sug.test \
+sugutf.test \
+phone.test \
+flag.test \
+flaglong.test \
+flagnum.test \
+flagutf8.test \
+slash.test \
+forbiddenword.test \
+nosuggest.test \
+alias.test \
+alias2.test \
+alias3.test \
+breakdefault.test \
+break.test \
+needaffix.test \
+needaffix2.test \
+needaffix3.test \
+needaffix4.test \
+needaffix5.test \
+circumfix.test \
+fogemorpheme.test \
+onlyincompound.test \
+complexprefixes.test \
+complexprefixes2.test \
+complexprefixesutf.test \
+conditionalprefix.test \
+zeroaffix.test \
+utf8.test \
+utf8_bom.test \
+utf8_bom2.test \
+utf8_nonbmp.test \
+compoundflag.test \
+compoundrule.test \
+compoundrule2.test \
+compoundrule3.test \
+compoundrule4.test \
+compoundrule5.test \
+compoundrule6.test \
+compoundrule7.test \
+compoundrule8.test \
+compoundaffix.test \
+compoundaffix2.test \
+compoundaffix3.test \
+checkcompounddup.test \
+checkcompoundtriple.test \
+simplifiedtriple.test \
+checkcompoundrep.test \
+checkcompoundcase2.test \
+checkcompoundcaseutf.test \
+checkcompoundpattern.test \
+checkcompoundpattern2.test \
+checkcompoundpattern3.test \
+checkcompoundpattern4.test \
+utfcompound.test \
+checksharps.test \
+checksharpsutf.test \
+germancompounding.test \
+germancompoundingold.test \
+i35725.test \
+i53643.test \
+i54633.test \
+i54980.test \
+maputf.test \
+reputf.test \
+ignore.test \
+ignoreutf.test \
+1592880.test \
+1695964.test \
+1463589.test \
+1463589_utf.test \
+IJ.test \
+i68568.test \
+i68568utf.test \
+1706659.test \
+digits_in_words.test \
+colons_in_words.test \
+ngram_utf_fix.test \
+morph.test \
+1975530.test \
+fullstrip.test \
+iconv.test \
+oconv.test \
+encoding.test \
+korean.test \
+opentaal_forbiddenword1.test \
+opentaal_forbiddenword2.test \
+opentaal_keepcase.test \
+arabic.test \
+2970240.test \
+2970242.test \
+breakoff.test \
+opentaal_cpdpat.test \
+opentaal_cpdpat2.test \
+2999225.test \
+onlyincompound2.test \
+forceucase.test \
+warn.test
+
+EXTRA_DIST = \
+test.sh \
+affixes.aff \
+affixes.dic \
+affixes.good \
+affixes.test \
+condition.aff \
+condition.dic \
+condition.good \
+condition.test \
+condition.wrong \
+condition_utf.aff \
+condition_utf.dic \
+condition_utf.good \
+condition_utf.test \
+condition_utf.wrong \
+base.aff \
+base.dic \
+base.good \
+base.sug \
+base.test \
+base.wrong \
+base_utf.aff \
+base_utf.dic \
+base_utf.good \
+base_utf.sug \
+base_utf.test \
+base_utf.wrong \
+allcaps.aff \
+allcaps.dic \
+allcaps.good \
+allcaps.sug \
+allcaps.test \
+allcaps.wrong \
+allcaps2.aff \
+allcaps2.dic \
+allcaps2.good \
+allcaps2.sug \
+allcaps2.test \
+allcaps2.wrong \
+allcaps3.aff \
+allcaps3.dic \
+allcaps3.good \
+allcaps3.test \
+allcaps3.wrong \
+allcaps_utf.aff \
+allcaps_utf.dic \
+allcaps_utf.good \
+allcaps_utf.sug \
+allcaps_utf.test \
+allcaps_utf.wrong \
+keepcase.aff \
+keepcase.dic \
+keepcase.good \
+keepcase.sug \
+keepcase.test \
+keepcase.wrong \
+map.aff \
+map.dic \
+map.sug \
+map.test \
+map.wrong \
+rep.aff \
+rep.dic \
+rep.sug \
+rep.test \
+rep.wrong \
+sug.aff \
+sug.dic \
+sug.sug \
+sug.test \
+sug.wrong \
+sugutf.aff \
+sugutf.dic \
+sugutf.sug \
+sugutf.test \
+sugutf.wrong \
+phone.aff \
+phone.dic \
+phone.sug \
+phone.test \
+phone.wrong \
+alias.aff \
+alias.dic \
+alias.good \
+alias.test \
+alias2.aff \
+alias2.dic \
+alias2.good \
+alias2.morph \
+alias2.test \
+alias3.aff \
+alias3.dic \
+alias3.good \
+alias3.morph \
+alias3.test \
+break.aff \
+break.dic \
+break.good \
+break.test \
+break.wrong \
+breakdefault.aff \
+breakdefault.dic \
+breakdefault.good \
+breakdefault.sug \
+breakdefault.test \
+breakdefault.wrong \
+circumfix.aff \
+circumfix.dic \
+circumfix.good \
+circumfix.morph \
+circumfix.test \
+circumfix.wrong \
+fogemorpheme.aff \
+fogemorpheme.dic \
+fogemorpheme.good \
+fogemorpheme.test \
+fogemorpheme.wrong \
+onlyincompound.aff \
+onlyincompound.dic \
+onlyincompound.good \
+onlyincompound.sug \
+onlyincompound.test \
+onlyincompound.wrong \
+forbiddenword.aff \
+forbiddenword.dic \
+forbiddenword.good \
+forbiddenword.test \
+forbiddenword.wrong \
+nosuggest.aff \
+nosuggest.dic \
+nosuggest.good \
+nosuggest.sug \
+nosuggest.test \
+nosuggest.wrong \
+germancompounding.aff \
+germancompounding.dic \
+germancompounding.good \
+germancompounding.test \
+germancompounding.wrong \
+germancompoundingold.aff \
+germancompoundingold.dic \
+germancompoundingold.good \
+germancompoundingold.test \
+germancompoundingold.wrong \
+needaffix2.aff \
+needaffix2.dic \
+needaffix2.good \
+needaffix2.morph \
+needaffix2.test \
+needaffix3.aff \
+needaffix3.dic \
+needaffix3.good \
+needaffix3.test \
+needaffix3.wrong \
+needaffix4.aff \
+needaffix4.dic \
+needaffix4.good \
+needaffix4.test \
+needaffix5.aff \
+needaffix5.dic \
+needaffix5.good \
+needaffix5.test \
+needaffix5.wrong \
+needaffix.aff \
+needaffix.dic \
+needaffix.good \
+needaffix.test \
+needaffix.wrong \
+zeroaffix.aff \
+zeroaffix.dic \
+zeroaffix.good \
+zeroaffix.morph \
+zeroaffix.test \
+utf8.aff \
+utf8.dic \
+utf8.good \
+utf8.test \
+utf8_bom.aff \
+utf8_bom.dic \
+utf8_bom.good \
+utf8_bom.test \
+utf8_bom2.aff \
+utf8_bom2.dic \
+utf8_bom2.good \
+utf8_bom2.test \
+utf8_nonbmp.aff \
+utf8_nonbmp.dic \
+utf8_nonbmp.good \
+utf8_nonbmp.sug \
+utf8_nonbmp.test \
+utf8_nonbmp.wrong \
+utfcompound.aff \
+utfcompound.dic \
+utfcompound.good \
+utfcompound.test \
+utfcompound.wrong \
+compoundflag.aff \
+compoundflag.dic \
+compoundflag.good \
+compoundflag.test \
+compoundflag.wrong \
+compoundrule.aff \
+compoundrule.dic \
+compoundrule.good \
+compoundrule.test \
+compoundrule.wrong \
+compoundrule2.aff \
+compoundrule2.dic \
+compoundrule2.good \
+compoundrule2.test \
+compoundrule2.wrong \
+compoundrule3.aff \
+compoundrule3.dic \
+compoundrule3.good \
+compoundrule3.test \
+compoundrule3.wrong \
+compoundrule4.aff \
+compoundrule4.dic \
+compoundrule4.good \
+compoundrule4.test \
+compoundrule4.wrong \
+compoundrule5.aff \
+compoundrule5.dic \
+compoundrule5.good \
+compoundrule5.morph \
+compoundrule5.test \
+compoundrule5.wrong \
+compoundrule6.aff \
+compoundrule6.dic \
+compoundrule6.good \
+compoundrule6.test \
+compoundrule6.wrong \
+compoundrule7.aff \
+compoundrule7.dic \
+compoundrule7.good \
+compoundrule7.test \
+compoundrule7.wrong \
+compoundrule8.aff \
+compoundrule8.dic \
+compoundrule8.good \
+compoundrule8.test \
+compoundrule8.wrong \
+compoundaffix.aff \
+compoundaffix.dic \
+compoundaffix.good \
+compoundaffix.test \
+compoundaffix.wrong \
+compoundaffix2.aff \
+compoundaffix2.dic \
+compoundaffix2.good \
+compoundaffix2.test \
+compoundaffix3.aff \
+compoundaffix3.dic \
+compoundaffix3.good \
+compoundaffix3.test \
+compoundaffix3.wrong \
+checkcompounddup.aff \
+checkcompounddup.dic \
+checkcompounddup.good \
+checkcompounddup.test \
+checkcompounddup.wrong \
+checkcompoundcase.aff \
+checkcompoundcase.dic \
+checkcompoundcase.good \
+checkcompoundcase.test \
+checkcompoundcase.wrong \
+checkcompoundcase2.aff \
+checkcompoundcase2.dic \
+checkcompoundcase2.good \
+checkcompoundcase2.test \
+checkcompoundcase2.wrong \
+checkcompoundcaseutf.aff \
+checkcompoundcaseutf.dic \
+checkcompoundcaseutf.good \
+checkcompoundcaseutf.test \
+checkcompoundcaseutf.wrong \
+checkcompoundrep.aff \
+checkcompoundrep.dic \
+checkcompoundrep.good \
+checkcompoundrep.test \
+checkcompoundrep.wrong \
+checkcompoundtriple.aff \
+checkcompoundtriple.dic \
+checkcompoundtriple.good \
+checkcompoundtriple.test \
+checkcompoundtriple.wrong \
+simplifiedtriple.aff \
+simplifiedtriple.dic \
+simplifiedtriple.good \
+simplifiedtriple.test \
+simplifiedtriple.wrong \
+checkcompoundpattern.aff \
+checkcompoundpattern.dic \
+checkcompoundpattern.good \
+checkcompoundpattern.test \
+checkcompoundpattern.wrong \
+checkcompoundpattern2.aff \
+checkcompoundpattern2.dic \
+checkcompoundpattern2.good \
+checkcompoundpattern2.test \
+checkcompoundpattern2.wrong \
+checkcompoundpattern3.aff \
+checkcompoundpattern3.dic \
+checkcompoundpattern3.good \
+checkcompoundpattern3.test \
+checkcompoundpattern3.wrong \
+checkcompoundpattern4.aff \
+checkcompoundpattern4.dic \
+checkcompoundpattern4.good \
+checkcompoundpattern4.test \
+checkcompoundpattern4.wrong \
+checksharps.aff \
+checksharps.dic \
+checksharps.good \
+checksharps.sug \
+checksharps.test \
+checksharps.wrong \
+checksharpsutf.aff \
+checksharpsutf.dic \
+checksharpsutf.good \
+checksharpsutf.sug \
+checksharpsutf.test \
+checksharpsutf.wrong \
+conditionalprefix.aff \
+conditionalprefix.dic \
+conditionalprefix.good \
+conditionalprefix.morph \
+conditionalprefix.test \
+conditionalprefix.wrong \
+flaglong.aff \
+flaglong.dic \
+flaglong.good \
+flaglong.test \
+flagnum.aff \
+flagnum.dic \
+flagnum.good \
+flagnum.test \
+flag.aff \
+flag.dic \
+flag.good \
+flag.test \
+flagutf8.aff \
+flagutf8.dic \
+flagutf8.good \
+flagutf8.test \
+complexprefixes.aff \
+complexprefixes.dic \
+complexprefixes.good \
+complexprefixes.wrong \
+complexprefixes.test \
+complexprefixes2.aff \
+complexprefixes2.dic \
+complexprefixes2.good \
+complexprefixes2.test \
+complexprefixesutf.aff \
+complexprefixesutf.dic \
+complexprefixesutf.good \
+complexprefixesutf.wrong \
+complexprefixesutf.test \
+i35725.aff \
+i35725.dic \
+i35725.good \
+i35725.sug \
+i35725.test \
+i35725.wrong \
+i53643.aff \
+i53643.dic \
+i53643.good \
+i53643.test \
+i53643.wrong \
+i54633.aff \
+i54633.dic \
+i54633.good \
+i54633.sug \
+i54633.test \
+i54633.wrong \
+i54980.aff \
+i54980.dic \
+i54980.good \
+i54980.test \
+i58202.aff \
+i58202.dic \
+i58202.good \
+i58202.sug \
+i58202.test \
+i58202.wrong \
+maputf.aff \
+maputf.dic \
+maputf.sug \
+maputf.wrong \
+maputf.test \
+reputf.aff \
+reputf.dic \
+reputf.sug \
+reputf.wrong \
+reputf.test \
+slash.aff \
+slash.dic \
+slash.good \
+slash.test \
+ignore.aff \
+ignore.dic \
+ignore.good \
+ignore.test \
+ignoreutf.aff \
+ignoreutf.dic \
+ignoreutf.good \
+ignoreutf.test \
+1592880.aff \
+1592880.dic \
+1592880.good \
+1592880.test \
+1695964.aff \
+1695964.dic \
+1695964.sug \
+1695964.test \
+1695964.wrong \
+1463589.aff \
+1463589.dic \
+1463589.sug \
+1463589.test \
+1463589.wrong \
+1463589_utf.aff \
+1463589_utf.dic \
+1463589_utf.sug \
+1463589_utf.test \
+1463589_utf.wrong \
+IJ.aff \
+IJ.dic \
+IJ.good \
+IJ.sug \
+IJ.test \
+IJ.wrong \
+i68568.aff \
+i68568.dic \
+i68568.test \
+i68568.wrong \
+i68568utf.aff \
+i68568utf.dic \
+i68568utf.test \
+i68568utf.wrong \
+1706659.aff \
+1706659.dic \
+1706659.test \
+1706659.wrong \
+digits_in_words.aff \
+digits_in_words.dic \
+digits_in_words.test \
+digits_in_words.wrong \
+colons_in_words.aff \
+colons_in_words.dic \
+colons_in_words.test \
+ngram_utf_fix.aff \
+ngram_utf_fix.dic \
+ngram_utf_fix.good \
+ngram_utf_fix.sug \
+ngram_utf_fix.test \
+ngram_utf_fix.wrong \
+morph.aff \
+morph.dic \
+morph.good \
+morph.morph \
+morph.test \
+1975530.aff \
+1975530.dic \
+1975530.good \
+1975530.test \
+1975530.wrong \
+fullstrip.aff \
+fullstrip.dic \
+fullstrip.good \
+fullstrip.test \
+iconv.aff \
+iconv.dic \
+iconv.good \
+iconv.test \
+oconv.aff \
+oconv.dic \
+oconv.good \
+oconv.sug \
+oconv.test \
+oconv.wrong \
+encoding.aff \
+encoding.dic \
+encoding.good \
+encoding.test \
+opentaal_forbiddenword1.aff \
+opentaal_forbiddenword1.dic \
+opentaal_forbiddenword1.good \
+opentaal_forbiddenword1.sug \
+opentaal_forbiddenword1.test \
+opentaal_forbiddenword1.wrong \
+opentaal_forbiddenword2.aff \
+opentaal_forbiddenword2.dic \
+opentaal_forbiddenword2.good \
+opentaal_forbiddenword2.sug \
+opentaal_forbiddenword2.test \
+opentaal_forbiddenword2.wrong \
+opentaal_forbiddenword2.aff \
+opentaal_forbiddenword2.dic \
+opentaal_forbiddenword2.good \
+opentaal_forbiddenword2.sug \
+opentaal_forbiddenword2.test \
+opentaal_forbiddenword2.wrong \
+opentaal_keepcase.aff \
+opentaal_keepcase.dic \
+opentaal_keepcase.good \
+opentaal_keepcase.sug \
+opentaal_keepcase.test \
+opentaal_keepcase.wrong \
+arabic.aff \
+arabic.dic \
+arabic.wrong \
+arabic.test \
+2970240.aff \
+2970240.dic \
+2970240.good \
+2970240.wrong \
+2970240.test \
+2970242.aff \
+2970242.dic \
+2970242.good \
+2970242.wrong \
+2970242.test \
+breakoff.aff \
+breakoff.dic \
+breakoff.good \
+breakoff.wrong \
+breakoff.test \
+opentaal_cpdpat.aff \
+opentaal_cpdpat.dic \
+opentaal_cpdpat.good \
+opentaal_cpdpat.wrong \
+opentaal_cpdpat.test \
+opentaal_cpdpat2.aff \
+opentaal_cpdpat2.dic \
+opentaal_cpdpat2.good \
+opentaal_cpdpat2.wrong \
+opentaal_cpdpat2.test \
+2999225.aff \
+2999225.dic \
+2999225.good \
+2999225.test \
+korean.aff \
+korean.dic \
+korean.good \
+korean.wrong \
+korean.test \
+onlyincompound2.aff \
+onlyincompound2.dic \
+onlyincompound2.good \
+onlyincompound2.test \
+onlyincompound2.wrong \
+forceucase.aff \
+forceucase.dic \
+forceucase.good \
+forceucase.sug \
+forceucase.wrong \
+forceucase.test \
+warn.aff \
+warn.dic \
+warn.good \
+warn.test
+
+all: all-recursive
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu tests/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --gnu tests/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run `make' without going through this Makefile.
+# To change the values of `make' variables: instead of editing Makefiles,
+# (1) if the variable is set in `config.status', edit `config.status'
+# (which will cause the Makefiles to be regenerated when you run `make');
+# (2) otherwise, pass the desired values on the `make' command line.
+$(RECURSIVE_TARGETS):
+ @fail= failcom='exit 1'; \
+ for f in x $$MAKEFLAGS; do \
+ case $$f in \
+ *=* | --[!k]*);; \
+ *k*) failcom='fail=yes';; \
+ esac; \
+ done; \
+ dot_seen=no; \
+ target=`echo $@ | sed s/-recursive//`; \
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ echo "Making $$target in $$subdir"; \
+ if test "$$subdir" = "."; then \
+ dot_seen=yes; \
+ local_target="$$target-am"; \
+ else \
+ local_target="$$target"; \
+ fi; \
+ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+ || eval $$failcom; \
+ done; \
+ if test "$$dot_seen" = "no"; then \
+ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+ fi; test -z "$$fail"
+
+$(RECURSIVE_CLEAN_TARGETS):
+ @fail= failcom='exit 1'; \
+ for f in x $$MAKEFLAGS; do \
+ case $$f in \
+ *=* | --[!k]*);; \
+ *k*) failcom='fail=yes';; \
+ esac; \
+ done; \
+ dot_seen=no; \
+ case "$@" in \
+ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
+ *) list='$(SUBDIRS)' ;; \
+ esac; \
+ rev=''; for subdir in $$list; do \
+ if test "$$subdir" = "."; then :; else \
+ rev="$$subdir $$rev"; \
+ fi; \
+ done; \
+ rev="$$rev ."; \
+ target=`echo $@ | sed s/-recursive//`; \
+ for subdir in $$rev; do \
+ echo "Making $$target in $$subdir"; \
+ if test "$$subdir" = "."; then \
+ local_target="$$target-am"; \
+ else \
+ local_target="$$target"; \
+ fi; \
+ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+ || eval $$failcom; \
+ done && test -z "$$fail"
+tags-recursive:
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
+ done
+ctags-recursive:
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
+ done
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ mkid -fID $$unique
+tags: TAGS
+
+TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ set x; \
+ here=`pwd`; \
+ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
+ include_option=--etags-include; \
+ empty_fix=.; \
+ else \
+ include_option=--include; \
+ empty_fix=; \
+ fi; \
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ if test "$$subdir" = .; then :; else \
+ test ! -f $$subdir/TAGS || \
+ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
+ fi; \
+ done; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: CTAGS
+CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+check-TESTS: $(TESTS)
+ @failed=0; all=0; xfail=0; xpass=0; skip=0; \
+ srcdir=$(srcdir); export srcdir; \
+ list=' $(TESTS) '; \
+ $(am__tty_colors); \
+ if test -n "$$list"; then \
+ for tst in $$list; do \
+ if test -f ./$$tst; then dir=./; \
+ elif test -f $$tst; then dir=; \
+ else dir="$(srcdir)/"; fi; \
+ if $(TESTS_ENVIRONMENT) $${dir}$$tst; then \
+ all=`expr $$all + 1`; \
+ case " $(XFAIL_TESTS) " in \
+ *[\ \ ]$$tst[\ \ ]*) \
+ xpass=`expr $$xpass + 1`; \
+ failed=`expr $$failed + 1`; \
+ col=$$red; res=XPASS; \
+ ;; \
+ *) \
+ col=$$grn; res=PASS; \
+ ;; \
+ esac; \
+ elif test $$? -ne 77; then \
+ all=`expr $$all + 1`; \
+ case " $(XFAIL_TESTS) " in \
+ *[\ \ ]$$tst[\ \ ]*) \
+ xfail=`expr $$xfail + 1`; \
+ col=$$lgn; res=XFAIL; \
+ ;; \
+ *) \
+ failed=`expr $$failed + 1`; \
+ col=$$red; res=FAIL; \
+ ;; \
+ esac; \
+ else \
+ skip=`expr $$skip + 1`; \
+ col=$$blu; res=SKIP; \
+ fi; \
+ echo "$${col}$$res$${std}: $$tst"; \
+ done; \
+ if test "$$all" -eq 1; then \
+ tests="test"; \
+ All=""; \
+ else \
+ tests="tests"; \
+ All="All "; \
+ fi; \
+ if test "$$failed" -eq 0; then \
+ if test "$$xfail" -eq 0; then \
+ banner="$$All$$all $$tests passed"; \
+ else \
+ if test "$$xfail" -eq 1; then failures=failure; else failures=failures; fi; \
+ banner="$$All$$all $$tests behaved as expected ($$xfail expected $$failures)"; \
+ fi; \
+ else \
+ if test "$$xpass" -eq 0; then \
+ banner="$$failed of $$all $$tests failed"; \
+ else \
+ if test "$$xpass" -eq 1; then passes=pass; else passes=passes; fi; \
+ banner="$$failed of $$all $$tests did not behave as expected ($$xpass unexpected $$passes)"; \
+ fi; \
+ fi; \
+ dashes="$$banner"; \
+ skipped=""; \
+ if test "$$skip" -ne 0; then \
+ if test "$$skip" -eq 1; then \
+ skipped="($$skip test was not run)"; \
+ else \
+ skipped="($$skip tests were not run)"; \
+ fi; \
+ test `echo "$$skipped" | wc -c` -le `echo "$$banner" | wc -c` || \
+ dashes="$$skipped"; \
+ fi; \
+ report=""; \
+ if test "$$failed" -ne 0 && test -n "$(PACKAGE_BUGREPORT)"; then \
+ report="Please report to $(PACKAGE_BUGREPORT)"; \
+ test `echo "$$report" | wc -c` -le `echo "$$banner" | wc -c` || \
+ dashes="$$report"; \
+ fi; \
+ dashes=`echo "$$dashes" | sed s/./=/g`; \
+ if test "$$failed" -eq 0; then \
+ echo "$$grn$$dashes"; \
+ else \
+ echo "$$red$$dashes"; \
+ fi; \
+ echo "$$banner"; \
+ test -z "$$skipped" || echo "$$skipped"; \
+ test -z "$$report" || echo "$$report"; \
+ echo "$$dashes$$std"; \
+ test "$$failed" -eq 0; \
+ else :; fi
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+ @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+ if test "$$subdir" = .; then :; else \
+ test -d "$(distdir)/$$subdir" \
+ || $(MKDIR_P) "$(distdir)/$$subdir" \
+ || exit 1; \
+ fi; \
+ done
+ @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+ if test "$$subdir" = .; then :; else \
+ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
+ $(am__relativize); \
+ new_distdir=$$reldir; \
+ dir1=$$subdir; dir2="$(top_distdir)"; \
+ $(am__relativize); \
+ new_top_distdir=$$reldir; \
+ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
+ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
+ ($(am__cd) $$subdir && \
+ $(MAKE) $(AM_MAKEFLAGS) \
+ top_distdir="$$new_top_distdir" \
+ distdir="$$new_distdir" \
+ am__remove_distdir=: \
+ am__skip_length_check=: \
+ am__skip_mode_fix=: \
+ distdir) \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+ $(MAKE) $(AM_MAKEFLAGS) check-TESTS
+check: check-recursive
+all-am: Makefile
+installdirs: installdirs-recursive
+installdirs-am:
+install: install-recursive
+install-exec: install-exec-recursive
+install-data: install-data-recursive
+uninstall: uninstall-recursive
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-recursive
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-recursive
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-recursive
+ -rm -f Makefile
+distclean-am: clean-am distclean-generic distclean-local \
+ distclean-tags
+
+dvi: dvi-recursive
+
+dvi-am:
+
+html: html-recursive
+
+html-am:
+
+info: info-recursive
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-recursive
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-recursive
+
+install-html-am:
+
+install-info: install-info-recursive
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-recursive
+
+install-pdf-am:
+
+install-ps: install-ps-recursive
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-recursive
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-recursive
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+pdf: pdf-recursive
+
+pdf-am:
+
+ps: ps-recursive
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) check-am \
+ ctags-recursive install-am install-strip tags-recursive
+
+.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
+ all all-am check check-TESTS check-am clean clean-generic \
+ clean-libtool ctags ctags-recursive distclean \
+ distclean-generic distclean-libtool distclean-local \
+ distclean-tags distdir dvi dvi-am html html-am info info-am \
+ install install-am install-data install-data-am install-dvi \
+ install-dvi-am install-exec install-exec-am install-html \
+ install-html-am install-info install-info-am install-man \
+ install-pdf install-pdf-am install-ps install-ps-am \
+ install-strip installcheck installcheck-am installdirs \
+ installdirs-am maintainer-clean maintainer-clean-generic \
+ mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \
+ ps ps-am tags tags-recursive uninstall uninstall-am
+
+
+# infixes.test
+
+distclean-local:
+ -rm -rf testSubDir
+
+# infixes.aff
+# infixes.dic
+# infixes.good
+# infixes.test
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/affixes.aff b/extensions/spellcheck/hunspell/tests/unit/data/affixes.aff
new file mode 100644
index 000000000..cf3c50021
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/affixes.aff
@@ -0,0 +1,7 @@
+# simple example for affix compression (see Hunspell(4))
+PFX A Y 1
+PFX A 0 re .
+
+SFX B Y 2
+SFX B 0 ed [^y]
+SFX B y ied y
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/affixes.dic b/extensions/spellcheck/hunspell/tests/unit/data/affixes.dic
new file mode 100644
index 000000000..e228043ef
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/affixes.dic
@@ -0,0 +1,4 @@
+3
+hello
+try/B
+work/AB
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/affixes.good b/extensions/spellcheck/hunspell/tests/unit/data/affixes.good
new file mode 100644
index 000000000..20097e8e6
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/affixes.good
@@ -0,0 +1,7 @@
+hello
+try
+tried
+work
+worked
+rework
+reworked
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/affixes.test b/extensions/spellcheck/hunspell/tests/unit/data/affixes.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/affixes.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/alias.aff b/extensions/spellcheck/hunspell/tests/unit/data/alias.aff
new file mode 100644
index 000000000..3fbce0ac4
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/alias.aff
@@ -0,0 +1,12 @@
+# aliases for flag vectors (AF)
+# AB -> 1
+# A -> 2
+AF 2
+AF AB
+AF A
+
+SFX A Y 1
+SFX A 0 x .
+
+SFX B Y 1
+SFX B 0 y/2 .
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/alias.dic b/extensions/spellcheck/hunspell/tests/unit/data/alias.dic
new file mode 100644
index 000000000..e0af3c918
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/alias.dic
@@ -0,0 +1,2 @@
+1
+foo/1
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/alias.good b/extensions/spellcheck/hunspell/tests/unit/data/alias.good
new file mode 100644
index 000000000..71702f231
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/alias.good
@@ -0,0 +1,4 @@
+foo
+foox
+fooy
+fooyx
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/alias.test b/extensions/spellcheck/hunspell/tests/unit/data/alias.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/alias.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/alias2.aff b/extensions/spellcheck/hunspell/tests/unit/data/alias2.aff
new file mode 100644
index 000000000..66a183833
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/alias2.aff
@@ -0,0 +1,17 @@
+# aliases for flag vectors (AF) and morphological descriptions (AM)
+# AB -> 1
+# A -> 2
+AF 2
+AF AB
+AF A
+
+AM 3
+AM is:affix_x
+AM ds:affix_y
+AM po:noun xx:other_data
+
+SFX A Y 1
+SFX A 0 x . 1
+
+SFX B Y 1
+SFX B 0 y/2 . 2
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/alias2.dic b/extensions/spellcheck/hunspell/tests/unit/data/alias2.dic
new file mode 100644
index 000000000..60300acee
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/alias2.dic
@@ -0,0 +1,2 @@
+1
+foo/1 3
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/alias2.good b/extensions/spellcheck/hunspell/tests/unit/data/alias2.good
new file mode 100644
index 000000000..71702f231
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/alias2.good
@@ -0,0 +1,4 @@
+foo
+foox
+fooy
+fooyx
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/alias2.morph b/extensions/spellcheck/hunspell/tests/unit/data/alias2.morph
new file mode 100644
index 000000000..01f983d57
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/alias2.morph
@@ -0,0 +1,12 @@
+> foo
+analyze(foo) = st:foo po:noun xx:other_data
+stem(foo) = foo
+> foox
+analyze(foox) = st:foo po:noun xx:other_data is:affix_x
+stem(foox) = foo
+> fooy
+analyze(fooy) = st:foo po:noun xx:other_data ds:affix_y
+stem(fooy) = fooy
+> fooyx
+analyze(fooyx) = st:foo po:noun xx:other_data ds:affix_y is:affix_x
+stem(fooyx) = fooy
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/alias2.test b/extensions/spellcheck/hunspell/tests/unit/data/alias2.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/alias2.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/alias3.aff b/extensions/spellcheck/hunspell/tests/unit/data/alias3.aff
new file mode 100644
index 000000000..a32818500
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/alias3.aff
@@ -0,0 +1,18 @@
+# morph. aliases with complex prefixes
+COMPLEXPREFIXES
+WORDCHARS _
+
+AM 4
+AM affix_1/
+AM affix_2/
+AM /suffix_1
+AM [stem_1]
+
+PFX A Y 1
+PFX A 0 tek . 1
+
+PFX B Y 1
+PFX B 0 met/A . 2
+
+SFX C Y 1
+SFX C 0 _test_ . 3
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/alias3.dic b/extensions/spellcheck/hunspell/tests/unit/data/alias3.dic
new file mode 100644
index 000000000..f22567cbe
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/alias3.dic
@@ -0,0 +1,2 @@
+1
+ouro/BC 4
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/alias3.good b/extensions/spellcheck/hunspell/tests/unit/data/alias3.good
new file mode 100644
index 000000000..6bf822826
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/alias3.good
@@ -0,0 +1,4 @@
+ouro
+metouro
+tekmetouro
+ouro_test_
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/alias3.morph b/extensions/spellcheck/hunspell/tests/unit/data/alias3.morph
new file mode 100644
index 000000000..33edf5cee
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/alias3.morph
@@ -0,0 +1,8 @@
+> ouro
+analyze(ouro) = [stem_1] ouro:ts
+> metouro
+analyze(metouro) = affix_2/ ouro:ts [stem_1]
+> tekmetouro
+analyze(tekmetouro) = affix_1/ affix_2/ ouro:ts [stem_1]
+> ouro_test_
+analyze(ouro_test_) = [stem_1] ouro:ts /suffix_1
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/alias3.test b/extensions/spellcheck/hunspell/tests/unit/data/alias3.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/alias3.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.aff b/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.aff
new file mode 100644
index 000000000..a11762568
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.aff
@@ -0,0 +1,6 @@
+SET UTF-8
+WORDCHARS '.
+
+SFX S N 1
+SFX S 0 's .
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.dic b/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.dic
new file mode 100644
index 000000000..7d3cdcc04
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.dic
@@ -0,0 +1,3 @@
+2
+OpenOffice.org
+UNICEF/S
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.good b/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.good
new file mode 100644
index 000000000..3afd877d9
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.good
@@ -0,0 +1,4 @@
+OpenOffice.org
+OPENOFFICE.ORG
+UNICEF's
+UNICEF'S
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.sug b/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.sug
new file mode 100644
index 000000000..d372ff23d
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.sug
@@ -0,0 +1,3 @@
+OpenOffice.org
+UNICEF
+UNICEF's
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.test b/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.test
new file mode 100644
index 000000000..cde7c5410
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i utf-8
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.wrong b/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.wrong
new file mode 100644
index 000000000..668194906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.wrong
@@ -0,0 +1,3 @@
+Openoffice.org
+Unicef
+Unicef's
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps.aff b/extensions/spellcheck/hunspell/tests/unit/data/allcaps.aff
new file mode 100644
index 000000000..57e916bf5
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps.aff
@@ -0,0 +1,5 @@
+# check uppercase forms of allcaps word + affix and words with mixed casing
+WORDCHARS '.
+
+SFX S N 1
+SFX S 0 's .
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps.dic b/extensions/spellcheck/hunspell/tests/unit/data/allcaps.dic
new file mode 100644
index 000000000..7d3cdcc04
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps.dic
@@ -0,0 +1,3 @@
+2
+OpenOffice.org
+UNICEF/S
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps.good b/extensions/spellcheck/hunspell/tests/unit/data/allcaps.good
new file mode 100644
index 000000000..3afd877d9
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps.good
@@ -0,0 +1,4 @@
+OpenOffice.org
+OPENOFFICE.ORG
+UNICEF's
+UNICEF'S
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps.sug b/extensions/spellcheck/hunspell/tests/unit/data/allcaps.sug
new file mode 100644
index 000000000..d372ff23d
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps.sug
@@ -0,0 +1,3 @@
+OpenOffice.org
+UNICEF
+UNICEF's
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps.test b/extensions/spellcheck/hunspell/tests/unit/data/allcaps.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps.wrong b/extensions/spellcheck/hunspell/tests/unit/data/allcaps.wrong
new file mode 100644
index 000000000..668194906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps.wrong
@@ -0,0 +1,3 @@
+Openoffice.org
+Unicef
+Unicef's
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.aff b/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.aff
new file mode 100644
index 000000000..67022d6eb
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.aff
@@ -0,0 +1,6 @@
+# forbidden all caps words are case sensitive
+# iPod -> ipodos ("iPodic" in Hungarian)
+FORBIDDENWORD *
+SFX s N 1
+SFX s 0 os .
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.dic b/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.dic
new file mode 100644
index 000000000..be21bfb40
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.dic
@@ -0,0 +1,4 @@
+3
+iPod/s
+iPodos/*
+ipodos
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.good b/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.good
new file mode 100644
index 000000000..5fd2f82ce
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.good
@@ -0,0 +1,4 @@
+iPod
+IPOD
+ipodos
+IPODOS
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.sug b/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.sug
new file mode 100644
index 000000000..5c312d7b5
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.sug
@@ -0,0 +1,2 @@
+iPod
+ipodos
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.test b/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.wrong b/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.wrong
new file mode 100644
index 000000000..010967be6
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.wrong
@@ -0,0 +1,2 @@
+ipod
+iPodos
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps3.aff b/extensions/spellcheck/hunspell/tests/unit/data/allcaps3.aff
new file mode 100644
index 000000000..789818e1a
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps3.aff
@@ -0,0 +1,10 @@
+# homonym support
+WORDCHARS '
+
+SFX s N 1
+SFX s 0 s .
+
+SFX S N 1
+SFX S 0 's .
+
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps3.dic b/extensions/spellcheck/hunspell/tests/unit/data/allcaps3.dic
new file mode 100644
index 000000000..e903a0fa9
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps3.dic
@@ -0,0 +1,7 @@
+4
+UNESCO/S
+Unesco/S
+Nasa/S
+NASA/S
+ACTS
+act/s
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps3.good b/extensions/spellcheck/hunspell/tests/unit/data/allcaps3.good
new file mode 100644
index 000000000..b9930a24d
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps3.good
@@ -0,0 +1,13 @@
+UNESCO
+Unesco
+UNESCO's
+Unesco's
+UNESCO'S
+NASA
+Nasa
+NASA's
+Nasa's
+NASA'S
+ACTS
+acts
+Acts
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps3.test b/extensions/spellcheck/hunspell/tests/unit/data/allcaps3.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps3.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps3.wrong b/extensions/spellcheck/hunspell/tests/unit/data/allcaps3.wrong
new file mode 100644
index 000000000..89172b824
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps3.wrong
@@ -0,0 +1,4 @@
+unesco
+unesco's
+nasa
+nasa's
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/arabic.aff b/extensions/spellcheck/hunspell/tests/unit/data/arabic.aff
new file mode 100644
index 000000000..f8dd5cf24
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/arabic.aff
@@ -0,0 +1,6 @@
+SET UTF-8
+TRY أ
+IGNORE ٌٍَُِّْ
+
+PFX Aa Y 1
+PFX Aa 0 0/X0 أ[^ي]
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/arabic.dic b/extensions/spellcheck/hunspell/tests/unit/data/arabic.dic
new file mode 100644
index 000000000..9a2035def
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/arabic.dic
@@ -0,0 +1,2 @@
+1
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/arabic.test b/extensions/spellcheck/hunspell/tests/unit/data/arabic.test
new file mode 100644
index 000000000..4d59c4212
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/arabic.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i UTF-8
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/arabic.wrong b/extensions/spellcheck/hunspell/tests/unit/data/arabic.wrong
new file mode 100644
index 000000000..9b566c364
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/arabic.wrong
@@ -0,0 +1 @@
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/base-utf.aff b/extensions/spellcheck/hunspell/tests/unit/data/base-utf.aff
new file mode 100644
index 000000000..493157b30
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/base-utf.aff
@@ -0,0 +1,198 @@
+# OpenOffice.org’s en_US.aff file
+# with Unicode apostrophe: ’
+
+SET UTF-8
+TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ'
+
+MAXNGRAMSUGS 1
+WORDCHARS .'’
+
+PFX A Y 1
+PFX A 0 re .
+
+PFX I Y 1
+PFX I 0 in .
+
+PFX U Y 1
+PFX U 0 un .
+
+PFX C Y 1
+PFX C 0 de .
+
+PFX E Y 1
+PFX E 0 dis .
+
+PFX F Y 1
+PFX F 0 con .
+
+PFX K Y 1
+PFX K 0 pro .
+
+SFX V N 2
+SFX V e ive e
+SFX V 0 ive [^e]
+
+SFX N Y 3
+SFX N e ion e
+SFX N y ication y
+SFX N 0 en [^ey]
+
+SFX X Y 3
+SFX X e ions e
+SFX X y ications y
+SFX X 0 ens [^ey]
+
+SFX H N 2
+SFX H y ieth y
+SFX H 0 th [^y]
+
+SFX Y Y 1
+SFX Y 0 ly .
+
+SFX G Y 2
+SFX G e ing e
+SFX G 0 ing [^e]
+
+SFX J Y 2
+SFX J e ings e
+SFX J 0 ings [^e]
+
+SFX D Y 4
+SFX D 0 d e
+SFX D y ied [^aeiou]y
+SFX D 0 ed [^ey]
+SFX D 0 ed [aeiou]y
+
+SFX T N 4
+SFX T 0 st e
+SFX T y iest [^aeiou]y
+SFX T 0 est [aeiou]y
+SFX T 0 est [^ey]
+
+SFX R Y 4
+SFX R 0 r e
+SFX R y ier [^aeiou]y
+SFX R 0 er [aeiou]y
+SFX R 0 er [^ey]
+
+SFX Z Y 4
+SFX Z 0 rs e
+SFX Z y iers [^aeiou]y
+SFX Z 0 ers [aeiou]y
+SFX Z 0 ers [^ey]
+
+SFX S Y 4
+SFX S y ies [^aeiou]y
+SFX S 0 s [aeiou]y
+SFX S 0 es [sxzh]
+SFX S 0 s [^sxzhy]
+
+SFX P Y 3
+SFX P y iness [^aeiou]y
+SFX P 0 ness [aeiou]y
+SFX P 0 ness [^y]
+
+SFX M Y 1
+SFX M 0 's .
+
+SFX B Y 3
+SFX B 0 able [^aeiou]
+SFX B 0 able ee
+SFX B e able [^aeiou]e
+
+SFX L Y 1
+SFX L 0 ment .
+
+REP 88
+REP a ei
+REP ei a
+REP a ey
+REP ey a
+REP ai ie
+REP ie ai
+REP are air
+REP are ear
+REP are eir
+REP air are
+REP air ere
+REP ere air
+REP ere ear
+REP ere eir
+REP ear are
+REP ear air
+REP ear ere
+REP eir are
+REP eir ere
+REP ch te
+REP te ch
+REP ch ti
+REP ti ch
+REP ch tu
+REP tu ch
+REP ch s
+REP s ch
+REP ch k
+REP k ch
+REP f ph
+REP ph f
+REP gh f
+REP f gh
+REP i igh
+REP igh i
+REP i uy
+REP uy i
+REP i ee
+REP ee i
+REP j di
+REP di j
+REP j gg
+REP gg j
+REP j ge
+REP ge j
+REP s ti
+REP ti s
+REP s ci
+REP ci s
+REP k cc
+REP cc k
+REP k qu
+REP qu k
+REP kw qu
+REP o eau
+REP eau o
+REP o ew
+REP ew o
+REP oo ew
+REP ew oo
+REP ew ui
+REP ui ew
+REP oo ui
+REP ui oo
+REP ew u
+REP u ew
+REP oo u
+REP u oo
+REP u oe
+REP oe u
+REP u ieu
+REP ieu u
+REP ue ew
+REP ew ue
+REP uff ough
+REP oo ieu
+REP ieu oo
+REP ier ear
+REP ear ier
+REP ear air
+REP air ear
+REP w qu
+REP qu w
+REP z ss
+REP ss z
+REP shun tion
+REP shun sion
+REP shun cion
+McDonalds’sá/w
+McDonald’sszá/g3) st:McDonald’s po:noun_prs is:TRANS
+McDonald’sszal/g4) st:McDonald’s po:noun_prs is:INSTR
+McDonald’ssal/w
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/base-utf.dic b/extensions/spellcheck/hunspell/tests/unit/data/base-utf.dic
new file mode 100644
index 000000000..b2b536d28
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/base-utf.dic
@@ -0,0 +1,29 @@
+28
+created/U
+create/XKVNGADS
+imply/GNSDX
+natural/PUY
+like/USPBY
+convey/BDGS
+look/GZRDS
+text
+hello
+said
+sawyer
+NASA
+rotten
+day
+tomorrow
+seven
+FAQ/SM
+can’t
+doesn’t
+etc
+won’t
+lip
+text
+horrifying
+speech
+suggest
+uncreate/V
+Hunspell
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/base-utf.good b/extensions/spellcheck/hunspell/tests/unit/data/base-utf.good
new file mode 100644
index 000000000..4c73e42b8
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/base-utf.good
@@ -0,0 +1,27 @@
+created
+uncreate
+uncreated
+imply
+implied
+unnatural
+conveyed
+sawyer
+NASA
+FAQs
+can’t
+doesn’t
+won’t
+Created
+Hello
+HELLO
+NASA
+etc.
+etc
+HELLO
+lip.
+text.
+NASA.
+Text.
+TEXT.
+Hunspell.
+HUNSPELL.
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/base-utf.sug b/extensions/spellcheck/hunspell/tests/unit/data/base-utf.sug
new file mode 100644
index 000000000..990b640cf
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/base-utf.sug
@@ -0,0 +1,11 @@
+looked, look
+text, create
+hello
+said
+rotten day, rotten-day, rotten
+tomorrow, rotten
+seven
+NASA
+horrifying
+speech, Hunspell
+suggest
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/base-utf.test b/extensions/spellcheck/hunspell/tests/unit/data/base-utf.test
new file mode 100644
index 000000000..4d59c4212
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/base-utf.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i UTF-8
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/base-utf.wrong b/extensions/spellcheck/hunspell/tests/unit/data/base-utf.wrong
new file mode 100644
index 000000000..88a6e2520
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/base-utf.wrong
@@ -0,0 +1,11 @@
+loooked
+texxt
+hlelo
+seid
+rottenday
+tomorow
+seeeven
+Nasa
+horrorfying
+peech
+sugesst
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/base.aff b/extensions/spellcheck/hunspell/tests/unit/data/base.aff
new file mode 100644
index 000000000..632f04b96
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/base.aff
@@ -0,0 +1,192 @@
+# OpenOffice.org's en_US.aff file
+
+SET ISO8859-1
+TRY esianrtolcdugmphbyfvkwz'
+
+WORDCHARS .'
+
+PFX A Y 1
+PFX A 0 re .
+
+PFX I Y 1
+PFX I 0 in .
+
+PFX U Y 1
+PFX U 0 un .
+
+PFX C Y 1
+PFX C 0 de .
+
+PFX E Y 1
+PFX E 0 dis .
+
+PFX F Y 1
+PFX F 0 con .
+
+PFX K Y 1
+PFX K 0 pro .
+
+SFX V N 2
+SFX V e ive e
+SFX V 0 ive [^e]
+
+SFX N Y 3
+SFX N e ion e
+SFX N y ication y
+SFX N 0 en [^ey]
+
+SFX X Y 3
+SFX X e ions e
+SFX X y ications y
+SFX X 0 ens [^ey]
+
+SFX H N 2
+SFX H y ieth y
+SFX H 0 th [^y]
+
+SFX Y Y 1
+SFX Y 0 ly .
+
+SFX G Y 2
+SFX G e ing e
+SFX G 0 ing [^e]
+
+SFX J Y 2
+SFX J e ings e
+SFX J 0 ings [^e]
+
+SFX D Y 4
+SFX D 0 d e
+SFX D y ied [^aeiou]y
+SFX D 0 ed [^ey]
+SFX D 0 ed [aeiou]y
+
+SFX T N 4
+SFX T 0 st e
+SFX T y iest [^aeiou]y
+SFX T 0 est [aeiou]y
+SFX T 0 est [^ey]
+
+SFX R Y 4
+SFX R 0 r e
+SFX R y ier [^aeiou]y
+SFX R 0 er [aeiou]y
+SFX R 0 er [^ey]
+
+SFX Z Y 4
+SFX Z 0 rs e
+SFX Z y iers [^aeiou]y
+SFX Z 0 ers [aeiou]y
+SFX Z 0 ers [^ey]
+
+SFX S Y 4
+SFX S y ies [^aeiou]y
+SFX S 0 s [aeiou]y
+SFX S 0 es [sxzh]
+SFX S 0 s [^sxzhy]
+
+SFX P Y 3
+SFX P y iness [^aeiou]y
+SFX P 0 ness [aeiou]y
+SFX P 0 ness [^y]
+
+SFX M Y 1
+SFX M 0 's .
+
+SFX B Y 3
+SFX B 0 able [^aeiou]
+SFX B 0 able ee
+SFX B e able [^aeiou]e
+
+SFX L Y 1
+SFX L 0 ment .
+
+REP 88
+REP a ei
+REP ei a
+REP a ey
+REP ey a
+REP ai ie
+REP ie ai
+REP are air
+REP are ear
+REP are eir
+REP air are
+REP air ere
+REP ere air
+REP ere ear
+REP ere eir
+REP ear are
+REP ear air
+REP ear ere
+REP eir are
+REP eir ere
+REP ch te
+REP te ch
+REP ch ti
+REP ti ch
+REP ch tu
+REP tu ch
+REP ch s
+REP s ch
+REP ch k
+REP k ch
+REP f ph
+REP ph f
+REP gh f
+REP f gh
+REP i igh
+REP igh i
+REP i uy
+REP uy i
+REP i ee
+REP ee i
+REP j di
+REP di j
+REP j gg
+REP gg j
+REP j ge
+REP ge j
+REP s ti
+REP ti s
+REP s ci
+REP ci s
+REP k cc
+REP cc k
+REP k qu
+REP qu k
+REP kw qu
+REP o eau
+REP eau o
+REP o ew
+REP ew o
+REP oo ew
+REP ew oo
+REP ew ui
+REP ui ew
+REP oo ui
+REP ui oo
+REP ew u
+REP u ew
+REP oo u
+REP u oo
+REP u oe
+REP oe u
+REP u ieu
+REP ieu u
+REP ue ew
+REP ew ue
+REP uff ough
+REP oo ieu
+REP ieu oo
+REP ier ear
+REP ear ier
+REP ear air
+REP air ear
+REP w qu
+REP qu w
+REP z ss
+REP ss z
+REP shun tion
+REP shun sion
+REP shun cion
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/base.dic b/extensions/spellcheck/hunspell/tests/unit/data/base.dic
new file mode 100644
index 000000000..5d9b8a28b
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/base.dic
@@ -0,0 +1,29 @@
+28
+created/U
+create/XKVNGADS
+imply/GNSDX
+natural/PUY
+like/USPBY
+convey/BDGS
+look/GZRDS
+text
+hello
+said
+sawyer
+NASA
+rotten
+day
+tomorrow
+seven
+FAQ/SM
+can't
+doesn't
+etc
+won't
+lip
+text
+horrifying
+speech
+suggest
+uncreate/V
+Hunspell
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/base.good b/extensions/spellcheck/hunspell/tests/unit/data/base.good
new file mode 100644
index 000000000..8e7f88e2b
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/base.good
@@ -0,0 +1,27 @@
+created
+uncreate
+uncreated
+imply
+implied
+unnatural
+conveyed
+sawyer
+NASA
+FAQs
+can't
+doesn't
+won't
+Created
+Hello
+HELLO
+NASA
+etc.
+etc
+HELLO
+lip.
+text.
+NASA.
+Text.
+TEXT.
+Hunspell.
+HUNSPELL.
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/base.sug b/extensions/spellcheck/hunspell/tests/unit/data/base.sug
new file mode 100644
index 000000000..553280a6a
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/base.sug
@@ -0,0 +1,11 @@
+looked, look
+text
+hello
+said
+rotten day, rotten-day, rotten
+tomorrow
+seven
+NASA
+horrifying
+speech
+suggest
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/base.test b/extensions/spellcheck/hunspell/tests/unit/data/base.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/base.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/base.wrong b/extensions/spellcheck/hunspell/tests/unit/data/base.wrong
new file mode 100644
index 000000000..88a6e2520
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/base.wrong
@@ -0,0 +1,11 @@
+loooked
+texxt
+hlelo
+seid
+rottenday
+tomorow
+seeeven
+Nasa
+horrorfying
+peech
+sugesst
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/break.aff b/extensions/spellcheck/hunspell/tests/unit/data/break.aff
new file mode 100644
index 000000000..47b8f6b7b
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/break.aff
@@ -0,0 +1,8 @@
+# word break points test, recursive break at dash and n-dash
+SET UTF-8
+
+BREAK 2
+BREAK -
+BREAK –
+
+WORDCHARS -–
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/break.dic b/extensions/spellcheck/hunspell/tests/unit/data/break.dic
new file mode 100644
index 000000000..f3d2aa02f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/break.dic
@@ -0,0 +1,4 @@
+3
+foo
+bar
+fox-bax
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/break.good b/extensions/spellcheck/hunspell/tests/unit/data/break.good
new file mode 100644
index 000000000..5f08bfd2f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/break.good
@@ -0,0 +1,7 @@
+foo
+bar
+fox-bax
+foo-bar
+foo–bar
+foo-bar-foo-bar
+foo-bar–foo-bar
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/break.test b/extensions/spellcheck/hunspell/tests/unit/data/break.test
new file mode 100644
index 000000000..cde7c5410
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/break.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i utf-8
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/break.wrong b/extensions/spellcheck/hunspell/tests/unit/data/break.wrong
new file mode 100644
index 000000000..599ed9f7f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/break.wrong
@@ -0,0 +1,12 @@
+fox
+bax
+-foo
+bar-
+fox-bar
+foo-bax
+foo–bax
+fox–bar
+foo-bar-fox-bar
+foo-bax-foo-bar
+foo-bar–fox-bar
+foo-bax–foo-bar
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.aff b/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.aff
new file mode 100644
index 000000000..a13f464a6
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.aff
@@ -0,0 +1,6 @@
+# default word break at hyphens and n-dashes
+
+SET UTF-8
+MAXNGRAMSUGS 0
+WORDCHARS -
+TRY ot
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.dic b/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.dic
new file mode 100644
index 000000000..bf2996035
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.dic
@@ -0,0 +1,6 @@
+3
+foo
+bar
+free
+scott
+scot-free
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.good b/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.good
new file mode 100644
index 000000000..8d8125457
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.good
@@ -0,0 +1,7 @@
+foo
+bar
+foo-
+-foo
+scot-free
+foo-bar
+foo-bar-foo-bar
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.sug b/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.sug
new file mode 100644
index 000000000..8bfc69d93
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.sug
@@ -0,0 +1,3 @@
+scott
+scot-free
+foo-bar
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.test b/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.test
new file mode 100644
index 000000000..cde7c5410
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i utf-8
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.wrong b/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.wrong
new file mode 100644
index 000000000..c3b203a7f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.wrong
@@ -0,0 +1,3 @@
+scot
+sco-free
+fo-bar
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/breakoff.aff b/extensions/spellcheck/hunspell/tests/unit/data/breakoff.aff
new file mode 100644
index 000000000..2e83d3802
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/breakoff.aff
@@ -0,0 +1,7 @@
+# switch off default word break at hyphens and n-dashes by BREAK 0
+SET UTF-8
+MAXNGRAMSUGS 0
+WORDCHARS -
+TRY ot
+
+BREAK 0
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/breakoff.dic b/extensions/spellcheck/hunspell/tests/unit/data/breakoff.dic
new file mode 100644
index 000000000..bf2996035
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/breakoff.dic
@@ -0,0 +1,6 @@
+3
+foo
+bar
+free
+scott
+scot-free
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/breakoff.good b/extensions/spellcheck/hunspell/tests/unit/data/breakoff.good
new file mode 100644
index 000000000..854b39efa
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/breakoff.good
@@ -0,0 +1,3 @@
+foo
+bar
+scot-free
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/breakoff.test b/extensions/spellcheck/hunspell/tests/unit/data/breakoff.test
new file mode 100644
index 000000000..cde7c5410
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/breakoff.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i utf-8
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/breakoff.wrong b/extensions/spellcheck/hunspell/tests/unit/data/breakoff.wrong
new file mode 100644
index 000000000..a6fcf7f1e
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/breakoff.wrong
@@ -0,0 +1,5 @@
+foo-
+-foo
+foo-bar
+foo-bar-foo-bar
+scot
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.aff b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.aff
new file mode 100644
index 000000000..7ac46eeab
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.aff
@@ -0,0 +1,3 @@
+# forbid upper case letters at word bounds in compounding
+CHECKCOMPOUNDCASE
+COMPOUNDFLAG A
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.dic b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.dic
new file mode 100644
index 000000000..80f65d38f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.dic
@@ -0,0 +1,5 @@
+4
+foo/A
+Bar/A
+BAZ/A
+-/A
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.good b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.good
new file mode 100644
index 000000000..9cbd79064
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.good
@@ -0,0 +1,5 @@
+Barfoo
+foo-Bar
+foo-BAZ
+BAZ-foo
+BAZ-Bar
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.test b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.test
new file mode 100644
index 000000000..dc295077f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i ISO8859-1
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.wrong b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.wrong
new file mode 100644
index 000000000..0714c22e5
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.wrong
@@ -0,0 +1,3 @@
+fooBar
+BAZBar
+BAZfoo
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.aff b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.aff
new file mode 100644
index 000000000..fea046b19
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.aff
@@ -0,0 +1,3 @@
+# check extended ascii
+CHECKCOMPOUNDCASE
+COMPOUNDFLAG A
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.dic b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.dic
new file mode 100644
index 000000000..086de0aed
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.dic
@@ -0,0 +1,3 @@
+2
+o/A
+o/A
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.good b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.good
new file mode 100644
index 000000000..b38fd0c6c
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.good
@@ -0,0 +1,2 @@
+oo
+oo
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.test b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.test
new file mode 100644
index 000000000..dc295077f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i ISO8859-1
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.wrong b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.wrong
new file mode 100644
index 000000000..94786e95b
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.wrong
@@ -0,0 +1 @@
+oo
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.aff b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.aff
new file mode 100644
index 000000000..546f478a5
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.aff
@@ -0,0 +1,3 @@
+SET UTF-8
+CHECKCOMPOUNDCASE
+COMPOUNDFLAG A
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.dic b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.dic
new file mode 100644
index 000000000..0b7fbc9ac
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.dic
@@ -0,0 +1,3 @@
+2
+áoó/A
+Óoá/A
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.good b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.good
new file mode 100644
index 000000000..32ae1353c
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.good
@@ -0,0 +1,2 @@
+áoóáoó
+Óoááoó
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.test b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.test
new file mode 100644
index 000000000..cde7c5410
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i utf-8
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.wrong b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.wrong
new file mode 100644
index 000000000..07434ccae
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.wrong
@@ -0,0 +1 @@
+áoóÓoá
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.aff b/extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.aff
new file mode 100644
index 000000000..5cd357a5a
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.aff
@@ -0,0 +1,3 @@
+# Forbid compound word with triple letters
+CHECKCOMPOUNDDUP
+COMPOUNDFLAG A
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.dic b/extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.dic
new file mode 100644
index 000000000..8ac75f4fc
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.dic
@@ -0,0 +1,3 @@
+2
+foo/A
+bar/A
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.good b/extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.good
new file mode 100644
index 000000000..3866f24ca
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.good
@@ -0,0 +1,5 @@
+barfoo
+foobar
+foofoobar
+foobarfoo
+barfoobarfoo
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.test b/extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.wrong b/extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.wrong
new file mode 100644
index 000000000..5e809b3d8
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.wrong
@@ -0,0 +1,3 @@
+foofoo
+foofoofoo
+foobarbar
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.aff b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.aff
new file mode 100644
index 000000000..dfda51af2
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.aff
@@ -0,0 +1,5 @@
+# forbid compounds with spec. pattern at word bounds
+COMPOUNDFLAG A
+CHECKCOMPOUNDPATTERN 2
+CHECKCOMPOUNDPATTERN nny ny
+CHECKCOMPOUNDPATTERN ssz sz
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.dic b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.dic
new file mode 100644
index 000000000..09300f0bc
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.dic
@@ -0,0 +1,5 @@
+4
+knny/A
+nyels/A
+hossz/A
+szmts/A
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.good b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.good
new file mode 100644
index 000000000..0f99c52d2
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.good
@@ -0,0 +1,2 @@
+knnyszmts
+hossznyels
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.test b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.test
new file mode 100644
index 000000000..dc295077f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i ISO8859-1
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.wrong b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.wrong
new file mode 100644
index 000000000..5edd11534
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.wrong
@@ -0,0 +1,4 @@
+knnynyels
+hosszszmts
+hosszknnynyels
+knnynyelshossz
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.aff b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.aff
new file mode 100644
index 000000000..fdf6560b4
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.aff
@@ -0,0 +1,7 @@
+# forbid compounds with spec. pattern at word bound and allow modificated form
+# (for German and Indian languages)
+COMPOUNDFLAG A
+CHECKCOMPOUNDPATTERN 2
+CHECKCOMPOUNDPATTERN o b z
+CHECKCOMPOUNDPATTERN oo ba u
+COMPOUNDMIN 1
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.dic b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.dic
new file mode 100644
index 000000000..8ac75f4fc
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.dic
@@ -0,0 +1,3 @@
+2
+foo/A
+bar/A
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.good b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.good
new file mode 100644
index 000000000..eaad4f902
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.good
@@ -0,0 +1,3 @@
+barfoo
+fozar
+fur
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.test b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.test
new file mode 100644
index 000000000..dc295077f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i ISO8859-1
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.wrong b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.wrong
new file mode 100644
index 000000000..323fae03f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.wrong
@@ -0,0 +1 @@
+foobar
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.aff b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.aff
new file mode 100644
index 000000000..6c2cfa4aa
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.aff
@@ -0,0 +1,6 @@
+# forbid compounds with spec. pattern at word bound and allow modificated form
+# (for Indian languages)
+COMPOUNDFLAG A
+CHECKCOMPOUNDPATTERN 1
+CHECKCOMPOUNDPATTERN o/X b/Y z
+COMPOUNDMIN 1
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.dic b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.dic
new file mode 100644
index 000000000..6bd1b7fc9
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.dic
@@ -0,0 +1,5 @@
+4
+foo/A
+boo/AX
+bar/A
+ban/AY
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.good b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.good
new file mode 100644
index 000000000..6070eff5c
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.good
@@ -0,0 +1,9 @@
+bozan
+barfoo
+banfoo
+banbar
+foobar
+fooban
+foobanbar
+boobar
+boobarfoo
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.test b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.test
new file mode 100644
index 000000000..dc295077f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i ISO8859-1
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.wrong b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.wrong
new file mode 100644
index 000000000..41d8d3747
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.wrong
@@ -0,0 +1,8 @@
+booban
+boobanfoo
+fozar
+fozarfoo
+fozan
+fozanfoo
+bozar
+bozarfoo
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.aff b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.aff
new file mode 100644
index 000000000..ef2566308
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.aff
@@ -0,0 +1,8 @@
+# sandhi in Telugu writing system, based on the Kiran Chittella's example
+
+COMPOUNDFLAG x
+COMPOUNDMIN 1
+CHECKCOMPOUNDPATTERN 2
+CHECKCOMPOUNDPATTERN a/A u/A O
+CHECKCOMPOUNDPATTERN u/B u/B u
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.dic b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.dic
new file mode 100644
index 000000000..d245ef019
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.dic
@@ -0,0 +1,6 @@
+4
+sUrya/Ax
+udayaM/Ax
+pEru/Bx
+unna/Bx
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.good b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.good
new file mode 100644
index 000000000..48761b6ee
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.good
@@ -0,0 +1,2 @@
+sUryOdayaM
+pErunna
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.test b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.test
new file mode 100644
index 000000000..dc295077f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i ISO8859-1
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.wrong b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.wrong
new file mode 100644
index 000000000..a357fec52
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.wrong
@@ -0,0 +1,2 @@
+sUryaudayaM
+pEruunna
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.aff b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.aff
new file mode 100644
index 000000000..4fb7ff55e
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.aff
@@ -0,0 +1,8 @@
+// forbid compound word, if it is also a non compound word with a REP fault
+// In example: Hungarian `szervz' (szer+vz) compound word is forbidden, because
+// this word is also a dictionary word (szerviz) with typical fault (i->)
+CHECKCOMPOUNDREP
+COMPOUNDFLAG A
+
+REP 1
+REP i
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.dic b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.dic
new file mode 100644
index 000000000..030bda916
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.dic
@@ -0,0 +1,5 @@
+3
+szer/A
+vz/A
+szerviz
+kocsi/A
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.good b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.good
new file mode 100644
index 000000000..c95c03c87
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.good
@@ -0,0 +1,2 @@
+vzszer
+szerkocsi \ No newline at end of file
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.test b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.test
new file mode 100644
index 000000000..dc295077f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i ISO8859-1
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.wrong b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.wrong
new file mode 100644
index 000000000..8c8701d47
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.wrong
@@ -0,0 +1,3 @@
+szervz
+szervzkocsi
+kocsiszervz
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.aff b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.aff
new file mode 100644
index 000000000..7159cf55d
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.aff
@@ -0,0 +1,3 @@
+# Forbid compound word with triple letters
+CHECKCOMPOUNDTRIPLE
+COMPOUNDFLAG A
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.dic b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.dic
new file mode 100644
index 000000000..607c489e8
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.dic
@@ -0,0 +1,5 @@
+4
+foo/A
+opera/A
+eel/A
+bare/A
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.good b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.good
new file mode 100644
index 000000000..1293f749a
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.good
@@ -0,0 +1,6 @@
+operafoo
+operaeel
+operabare
+eelbare
+eelfoo
+eelopera
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.test b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.wrong b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.wrong
new file mode 100644
index 000000000..ae2d02b20
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.wrong
@@ -0,0 +1,2 @@
+fooopera
+bareeel
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checksharps.aff b/extensions/spellcheck/hunspell/tests/unit/data/checksharps.aff
new file mode 100644
index 000000000..6b22c7390
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checksharps.aff
@@ -0,0 +1,4 @@
+# test - SS special capitalizing
+CHECKSHARPS
+WORDCHARS .
+KEEPCASE k
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checksharps.dic b/extensions/spellcheck/hunspell/tests/unit/data/checksharps.dic
new file mode 100644
index 000000000..91d14ab9e
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checksharps.dic
@@ -0,0 +1,7 @@
+6
+mig/k
+Aussto
+Absto.
+Auenabmessung
+Prozessionsstrae
+Auenmae
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checksharps.good b/extensions/spellcheck/hunspell/tests/unit/data/checksharps.good
new file mode 100644
index 000000000..e9be8c5c7
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checksharps.good
@@ -0,0 +1,13 @@
+mig
+Mig
+MSSIG
+Aussto
+Absto.
+Auenabmessung
+Prozessionsstrae
+Auenmae
+AUSSTOSS
+ABSTOSS.
+AUSSENABMESSUNG
+PROZESSIONSSTRASSE
+AUSSENMASSE
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checksharps.sug b/extensions/spellcheck/hunspell/tests/unit/data/checksharps.sug
new file mode 100644
index 000000000..52c6a943b
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checksharps.sug
@@ -0,0 +1 @@
+MSSIG, mig
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checksharps.test b/extensions/spellcheck/hunspell/tests/unit/data/checksharps.test
new file mode 100644
index 000000000..dc295077f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checksharps.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i ISO8859-1
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checksharps.wrong b/extensions/spellcheck/hunspell/tests/unit/data/checksharps.wrong
new file mode 100644
index 000000000..96eb8aea7
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checksharps.wrong
@@ -0,0 +1 @@
+MIG
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.aff b/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.aff
new file mode 100644
index 000000000..86c0fc426
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.aff
@@ -0,0 +1,5 @@
+# test - SS special capitalizing in UTF-8
+SET UTF-8
+CHECKSHARPS
+WORDCHARS ß.
+KEEPCASE k
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.dic b/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.dic
new file mode 100644
index 000000000..9cc364eec
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.dic
@@ -0,0 +1,7 @@
+6
+müßig/k
+Ausstoß
+Abstoß.
+Außenabmessung
+Prozessionsstraße
+Außenmaße
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.good b/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.good
new file mode 100644
index 000000000..a61c24319
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.good
@@ -0,0 +1,13 @@
+müßig
+Müßig
+MÜSSIG
+Ausstoß
+Abstoß.
+Außenabmessung
+Prozessionsstraße
+Außenmaße
+AUSSTOSS
+ABSTOSS.
+AUSSENABMESSUNG
+PROZESSIONSSTRASSE
+AUSSENMASSE
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.sug b/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.sug
new file mode 100644
index 000000000..ab68568e5
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.sug
@@ -0,0 +1 @@
+MÜSSIG, müßig
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.test b/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.test
new file mode 100644
index 000000000..cde7c5410
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i utf-8
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.wrong b/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.wrong
new file mode 100644
index 000000000..25eb03dce
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.wrong
@@ -0,0 +1 @@
+MÜßIG
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/circumfix.aff b/extensions/spellcheck/hunspell/tests/unit/data/circumfix.aff
new file mode 100644
index 000000000..1eecc644b
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/circumfix.aff
@@ -0,0 +1,16 @@
+# circumfixes: ~ obligate prefix/suffix combinations
+# superlative in Hungarian: leg- (prefix) AND -bb (suffix)
+
+CIRCUMFIX X
+
+PFX A Y 1
+PFX A 0 leg/X .
+
+PFX B Y 1
+PFX B 0 legesleg/X .
+
+SFX C Y 3
+SFX C 0 obb . is:COMPARATIVE
+SFX C 0 obb/AX . is:SUPERLATIVE
+SFX C 0 obb/BX . is:SUPERSUPERLATIVE
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/circumfix.dic b/extensions/spellcheck/hunspell/tests/unit/data/circumfix.dic
new file mode 100644
index 000000000..ba96f046d
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/circumfix.dic
@@ -0,0 +1,2 @@
+1
+nagy/C po:adj
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/circumfix.good b/extensions/spellcheck/hunspell/tests/unit/data/circumfix.good
new file mode 100644
index 000000000..65049d9f0
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/circumfix.good
@@ -0,0 +1,4 @@
+nagy
+nagyobb
+legnagyobb
+legeslegnagyobb
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/circumfix.morph b/extensions/spellcheck/hunspell/tests/unit/data/circumfix.morph
new file mode 100644
index 000000000..62e6c5371
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/circumfix.morph
@@ -0,0 +1,12 @@
+> nagy
+analyze(nagy) = st:nagy po:adj
+stem(nagy) = nagy
+> nagyobb
+analyze(nagyobb) = st:nagy po:adj is:COMPARATIVE
+stem(nagyobb) = nagy
+> legnagyobb
+analyze(legnagyobb) = fl:A st:nagy po:adj is:SUPERLATIVE
+stem(legnagyobb) = nagy
+> legeslegnagyobb
+analyze(legeslegnagyobb) = fl:B st:nagy po:adj is:SUPERSUPERLATIVE
+stem(legeslegnagyobb) = nagy
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/circumfix.test b/extensions/spellcheck/hunspell/tests/unit/data/circumfix.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/circumfix.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/circumfix.wrong b/extensions/spellcheck/hunspell/tests/unit/data/circumfix.wrong
new file mode 100644
index 000000000..bab8084ee
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/circumfix.wrong
@@ -0,0 +1,2 @@
+legnagy
+legeslegnagy
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/colons-in-words.aff b/extensions/spellcheck/hunspell/tests/unit/data/colons-in-words.aff
new file mode 100644
index 000000000..d08022694
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/colons-in-words.aff
@@ -0,0 +1,3 @@
+# Colons in Finnish and Swedish words. Problem reported by Lars Aronsson.
+# Parsing test (src/parsers)
+WORDCHARS :
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/colons-in-words.dic b/extensions/spellcheck/hunspell/tests/unit/data/colons-in-words.dic
new file mode 100644
index 000000000..bfea1ccc7
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/colons-in-words.dic
@@ -0,0 +1,4 @@
+2
+c:a
+S:t
+foo
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/colons-in-words.test b/extensions/spellcheck/hunspell/tests/unit/data/colons-in-words.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/colons-in-words.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.aff b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.aff
new file mode 100644
index 000000000..7ddb497a6
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.aff
@@ -0,0 +1,9 @@
+# set twofold prefix stripping
+# Coptic example by Moheb Mekhaiel
+COMPLEXPREFIXES
+
+PFX A Y 1
+PFX A 0 tek .
+
+PFX B Y 1
+PFX B 0 met/A .
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.dic b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.dic
new file mode 100644
index 000000000..2618c7cf4
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.dic
@@ -0,0 +1,3 @@
+1
+ouro/B
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.good b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.good
new file mode 100644
index 000000000..eed87a774
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.good
@@ -0,0 +1,3 @@
+ouro
+metouro
+tekmetouro
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.test b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.wrong b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.wrong
new file mode 100644
index 000000000..fb1c8b483
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.wrong
@@ -0,0 +1,2 @@
+tekouro
+mettekouro
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes2.aff b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes2.aff
new file mode 100644
index 000000000..b4fe1dca6
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes2.aff
@@ -0,0 +1,12 @@
+# complex prefixes with morphological analysis
+COMPLEXPREFIXES
+WORDCHARS _
+
+PFX A Y 1
+PFX A 0 tek . affix_1/
+
+PFX B Y 1
+PFX B 0 met/A . affix_2/
+
+SFX C Y 1
+SFX C 0 _test_ . /suffix_1
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes2.dic b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes2.dic
new file mode 100644
index 000000000..7e4baf06c
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes2.dic
@@ -0,0 +1,3 @@
+1
+ouro/BC [stem_1]
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes2.good b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes2.good
new file mode 100644
index 000000000..6bf822826
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes2.good
@@ -0,0 +1,4 @@
+ouro
+metouro
+tekmetouro
+ouro_test_
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes2.test b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes2.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes2.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.aff b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.aff
new file mode 100644
index 000000000..3991e9f5c
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.aff
@@ -0,0 +1,12 @@
+# Coptic example by Moheb Mekhaiel
+# Encoded with the new Coptic character encoding of Unicode 4.1
+SET UTF-8
+
+# set twofold prefix stripping
+COMPLEXPREFIXES
+
+PFX A Y 1
+PFX A 0 ⲧⲉⲕ .
+
+PFX B Y 1
+PFX B 0 ⲙⲉⲧ/A .
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.dic b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.dic
new file mode 100644
index 000000000..bd0eb6df0
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.dic
@@ -0,0 +1,2 @@
+1
+ⲟⲩⲣⲟ/B
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.good b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.good
new file mode 100644
index 000000000..7eb956619
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.good
@@ -0,0 +1,3 @@
+ⲟⲩⲣⲟ
+ⲙⲉⲧⲟⲩⲣⲟ
+ⲧⲉⲕⲙⲉⲧⲟⲩⲣⲟ
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.test b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.test
new file mode 100644
index 000000000..cde7c5410
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i utf-8
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.wrong b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.wrong
new file mode 100644
index 000000000..d8021fc44
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.wrong
@@ -0,0 +1,2 @@
+ⲧⲉⲕⲟⲩⲣⲟ
+ⲙⲉⲧⲧⲉⲕⲟⲩⲣⲟ
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.aff b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.aff
new file mode 100644
index 000000000..cae5669c2
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.aff
@@ -0,0 +1,7 @@
+COMPOUNDFLAG X
+
+PFX P Y 1
+PFX P 0 pre .
+
+SFX S Y 1
+SFX S 0 suf .
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.dic b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.dic
new file mode 100644
index 000000000..eba6b83fb
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.dic
@@ -0,0 +1,3 @@
+2
+foo/XPS
+bar/XPS
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.good b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.good
new file mode 100644
index 000000000..af1f0019a
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.good
@@ -0,0 +1,6 @@
+foo
+foofoo
+prefoo
+foosuf
+prefoosuf
+prefoobarsuf
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.test b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.wrong b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.wrong
new file mode 100644
index 000000000..b7e4067bc
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.wrong
@@ -0,0 +1,3 @@
+foosufbar
+fooprebarsuf
+prefooprebarsuf
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix2.aff b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix2.aff
new file mode 100644
index 000000000..1cac16e11
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix2.aff
@@ -0,0 +1,8 @@
+COMPOUNDFLAG X
+COMPOUNDPERMITFLAG Y
+
+PFX P Y 1
+PFX P 0 pre/Y .
+
+SFX S Y 1
+SFX S 0 suf/Y .
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix2.dic b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix2.dic
new file mode 100644
index 000000000..eba6b83fb
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix2.dic
@@ -0,0 +1,3 @@
+2
+foo/XPS
+bar/XPS
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix2.good b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix2.good
new file mode 100644
index 000000000..9f3020da0
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix2.good
@@ -0,0 +1,8 @@
+foo
+prefoo
+foosuf
+prefoosuf
+prefoobarsuf
+foosufbar
+fooprebarsuf
+prefooprebarsuf
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix2.test b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix2.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix2.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.aff b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.aff
new file mode 100644
index 000000000..98a12b56c
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.aff
@@ -0,0 +1,8 @@
+COMPOUNDFLAG X
+COMPOUNDFORBIDFLAG Z
+
+PFX P Y 1
+PFX P 0 pre/Z .
+
+SFX S Y 1
+SFX S 0 suf/Z .
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.dic b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.dic
new file mode 100644
index 000000000..eba6b83fb
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.dic
@@ -0,0 +1,3 @@
+2
+foo/XPS
+bar/XPS
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.good b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.good
new file mode 100644
index 000000000..76cc08eae
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.good
@@ -0,0 +1,5 @@
+foo
+foofoo
+prefoo
+foosuf
+prefoosuf
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.test b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.wrong b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.wrong
new file mode 100644
index 000000000..d92b90b28
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.wrong
@@ -0,0 +1,6 @@
+prefoobarsuf
+foosufbar
+fooprebar
+foosufprebar
+fooprebarsuf
+prefooprebarsuf
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundflag.aff b/extensions/spellcheck/hunspell/tests/unit/data/compoundflag.aff
new file mode 100644
index 000000000..bc8369ceb
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundflag.aff
@@ -0,0 +1,3 @@
+COMPOUNDMIN 3
+COMPOUNDFLAG A
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundflag.dic b/extensions/spellcheck/hunspell/tests/unit/data/compoundflag.dic
new file mode 100644
index 000000000..d1ea8e96e
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundflag.dic
@@ -0,0 +1,5 @@
+4
+foo/A
+bar/A
+xy/A
+yz/A
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundflag.good b/extensions/spellcheck/hunspell/tests/unit/data/compoundflag.good
new file mode 100644
index 000000000..21cc29f2f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundflag.good
@@ -0,0 +1,3 @@
+foobar
+barfoo
+foobarfoo
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundflag.test b/extensions/spellcheck/hunspell/tests/unit/data/compoundflag.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundflag.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundflag.wrong b/extensions/spellcheck/hunspell/tests/unit/data/compoundflag.wrong
new file mode 100644
index 000000000..c185bf150
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundflag.wrong
@@ -0,0 +1,4 @@
+xyyz
+fooxy
+xyfoo
+fooxybar
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule.aff b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule.aff
new file mode 100644
index 000000000..09309e0aa
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule.aff
@@ -0,0 +1,3 @@
+COMPOUNDMIN 1
+COMPOUNDRULE 1
+COMPOUNDRULE ABC
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule.dic b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule.dic
new file mode 100644
index 000000000..b11e8291e
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule.dic
@@ -0,0 +1,5 @@
+3
+a/A
+b/B
+c/BC
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule.good b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule.good
new file mode 100644
index 000000000..c7a0763bb
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule.good
@@ -0,0 +1,2 @@
+abc
+acc
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule.test b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule.wrong b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule.wrong
new file mode 100644
index 000000000..bc151ea02
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule.wrong
@@ -0,0 +1,39 @@
+ba
+aaabaaa
+bbaaa
+aaaaba
+bbbbbaa
+aa
+aaa
+aaaa
+ab
+aab
+aaab
+aaaab
+abb
+aabb
+aaabbb
+bb
+bbb
+bbbb
+aaab
+abcc
+abbc
+abbcc
+aabc
+aabcc
+aabbc
+aabbcc
+aaabbbccc
+ac
+aac
+aacc
+aaaccc
+bc
+bcc
+bbc
+bbcc
+bbbccc
+cc
+ccc
+cccccc
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.aff b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.aff
new file mode 100644
index 000000000..e4b86a53b
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.aff
@@ -0,0 +1,3 @@
+COMPOUNDMIN 1
+COMPOUNDRULE 1
+COMPOUNDRULE A*B*C*
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.dic b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.dic
new file mode 100644
index 000000000..7d07bbc89
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.dic
@@ -0,0 +1,5 @@
+3
+a/A
+b/B
+c/C
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.good b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.good
new file mode 100644
index 000000000..de743bb06
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.good
@@ -0,0 +1,37 @@
+aa
+aaa
+aaaa
+ab
+aab
+aaab
+aaaab
+abb
+aabb
+aaabbb
+bb
+bbb
+bbbb
+aaab
+abc
+abcc
+abbc
+abbcc
+aabc
+aabcc
+aabbc
+aabbcc
+aaabbbccc
+ac
+acc
+aac
+aacc
+aaaccc
+bc
+bcc
+bbc
+bbcc
+bbbccc
+cc
+ccc
+cccccc
+abcc
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.test b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.wrong b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.wrong
new file mode 100644
index 000000000..9e5d38d35
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.wrong
@@ -0,0 +1,8 @@
+ba
+aaabaaa
+bbaaa
+aaaaba
+bbbbbaa
+cba
+cab
+acb
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.aff b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.aff
new file mode 100644
index 000000000..005314586
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.aff
@@ -0,0 +1,3 @@
+COMPOUNDMIN 1
+COMPOUNDRULE 1
+COMPOUNDRULE A?B?C?
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.dic b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.dic
new file mode 100644
index 000000000..7d07bbc89
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.dic
@@ -0,0 +1,5 @@
+3
+a/A
+b/B
+c/C
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.good b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.good
new file mode 100644
index 000000000..7f518893e
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.good
@@ -0,0 +1,7 @@
+a
+b
+c
+ab
+abc
+ac
+bc
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.test b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.wrong b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.wrong
new file mode 100644
index 000000000..6bd1d8004
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.wrong
@@ -0,0 +1,41 @@
+aa
+aaa
+aaaa
+aab
+aaab
+aaaab
+abb
+aabb
+aaabbb
+bb
+bbb
+bbbb
+aaab
+abcc
+abbc
+abbcc
+aabc
+aabcc
+aabbc
+aabbcc
+aaabbbccc
+acc
+aac
+aacc
+aaaccc
+bcc
+bbc
+bbcc
+bbbccc
+cc
+ccc
+cccccc
+abcc
+ba
+aaabaaa
+bbaaa
+aaaaba
+bbbbbaa
+cba
+cab
+acb
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.aff b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.aff
new file mode 100644
index 000000000..8a9996cb3
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.aff
@@ -0,0 +1,7 @@
+# English ordinal numbers
+WORDCHARS 0123456789
+COMPOUNDMIN 1
+ONLYINCOMPOUND c
+COMPOUNDRULE 2
+COMPOUNDRULE n*1t
+COMPOUNDRULE n*mp
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.dic b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.dic
new file mode 100644
index 000000000..ced0735ec
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.dic
@@ -0,0 +1,24 @@
+22
+0/nm
+1/n1
+2/nm
+3/nm
+4/nm
+5/nm
+6/nm
+7/nm
+8/nm
+9/nm
+0th/pt
+1st/p
+1th/tc
+2nd/p
+2th/tc
+3rd/p
+3th/tc
+4th/pt
+5th/pt
+6th/pt
+7th/pt
+8th/pt
+9th/pt
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.good b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.good
new file mode 100644
index 000000000..fafe64a5c
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.good
@@ -0,0 +1,29 @@
+1st
+2nd
+3rd
+4th
+5th
+6th
+7th
+8th
+9th
+10th
+11th
+12th
+13th
+14th
+15th
+16th
+17th
+18th
+19th
+20th
+21st
+22nd
+23rd
+24th
+25th
+100th
+1000th
+10001st
+10011th
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.test b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.test
new file mode 100644
index 000000000..52e144cb8
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.test
@@ -0,0 +1,6 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
+
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.wrong b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.wrong
new file mode 100644
index 000000000..99f28e7cc
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.wrong
@@ -0,0 +1,5 @@
+1th
+2th
+3th
+10001th
+10011st
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.aff b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.aff
new file mode 100644
index 000000000..46502460b
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.aff
@@ -0,0 +1,7 @@
+# number + percent
+SET UTF-8
+COMPOUNDMIN 1
+COMPOUNDRULE 2
+COMPOUNDRULE N*%?
+COMPOUNDRULE NN*.NN*%?
+WORDCHARS 0123456789‰.
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.dic b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.dic
new file mode 100644
index 000000000..eeeffdac5
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.dic
@@ -0,0 +1,14 @@
+13
+0/N po:num
+1/N po:num
+2/N po:num
+3/N po:num
+4/N po:num
+5/N po:num
+6/N po:num
+7/N po:num
+8/N po:num
+9/N po:num
+./. po:sign_dot
+%/% po:sign_percent
+‰/% po:sign_per_mille
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.good b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.good
new file mode 100644
index 000000000..691fca1fb
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.good
@@ -0,0 +1,7 @@
+10%
+0.2%
+0.20%
+123.4561‰
+10
+0000
+10.25
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.morph b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.morph
new file mode 100644
index 000000000..107a80859
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.morph
@@ -0,0 +1,21 @@
+> 10%
+analyze(10%) = pa:1 st:1 po:num pa:0 st:0 po:num pa:% st:% po:sign_percent
+stem(10%) = 10%
+> 0.2%
+analyze(0.2%) = pa:0 st:0 po:num pa:. st:. po:sign_dot pa:2 st:2 po:num pa:% st:% po:sign_percent
+stem(0.2%) = 0.2%
+> 0.20%
+analyze(0.20%) = pa:0 st:0 po:num pa:. st:. po:sign_dot pa:2 st:2 po:num pa:0 st:0 po:num pa:% st:% po:sign_percent
+stem(0.20%) = 0.20%
+> 123.4561‰
+analyze(123.4561‰) = pa:1 st:1 po:num pa:2 st:2 po:num pa:3 st:3 po:num pa:. st:. po:sign_dot pa:4 st:4 po:num pa:5 st:5 po:num pa:6 st:6 po:num pa:1 st:1 po:num pa:‰ st:‰ po:sign_per_mille
+stem(123.4561‰) = 123.4561‰
+> 10
+analyze(10) = pa:1 st:1 po:num pa:0 st:0 po:num
+stem(10) = 10
+> 0000
+analyze(0000) = pa:0 st:0 po:num pa:0 st:0 po:num pa:0 st:0 po:num pa:0 st:0 po:num
+stem(0000) = 0000
+> 10.25
+analyze(10.25) = pa:1 st:1 po:num pa:0 st:0 po:num pa:. st:. po:sign_dot pa:2 st:2 po:num pa:5 st:5 po:num
+stem(10.25) = 10.25
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.test b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.test
new file mode 100644
index 000000000..cde7c5410
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i utf-8
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.wrong b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.wrong
new file mode 100644
index 000000000..ba1fe3290
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.wrong
@@ -0,0 +1 @@
+.25
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.aff b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.aff
new file mode 100644
index 000000000..e8a088d5a
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.aff
@@ -0,0 +1,4 @@
+COMPOUNDMIN 1
+COMPOUNDRULE 2
+COMPOUNDRULE A*A
+COMPOUNDRULE A*AAB*BBBC*C
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.dic b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.dic
new file mode 100644
index 000000000..7d07bbc89
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.dic
@@ -0,0 +1,5 @@
+3
+a/A
+b/B
+c/C
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.good b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.good
new file mode 100644
index 000000000..55a8f8bc5
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.good
@@ -0,0 +1,4 @@
+aa
+aaaaaa
+aabbbc
+aaaaabbbbbbcccccc
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.test b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.wrong b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.wrong
new file mode 100644
index 000000000..48b376dac
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.wrong
@@ -0,0 +1,4 @@
+abc
+abbbbbccccccc
+aabbccccccc
+aabbbbbbb
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.aff b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.aff
new file mode 100644
index 000000000..3ae1fc784
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.aff
@@ -0,0 +1,8 @@
+# English ordinal numbers (parenthesized long flags)
+FLAG long
+WORDCHARS 0123456789
+COMPOUNDMIN 1
+ONLYINCOMPOUND cc
+COMPOUNDRULE 2
+COMPOUNDRULE (nn)*(11)(tt)
+COMPOUNDRULE (nn)*(mm)(pp)
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.dic b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.dic
new file mode 100644
index 000000000..ad4bb4d28
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.dic
@@ -0,0 +1,24 @@
+22
+0/nnmm
+1/nn11
+2/nnmm
+3/nnmm
+4/nnmm
+5/nnmm
+6/nnmm
+7/nnmm
+8/nnmm
+9/nnmm
+0th/pptt
+1st/pp
+1th/ttcc
+2nd/pp
+2th/ttcc
+3rd/pp
+3th/ttcc
+4th/pptt
+5th/pptt
+6th/pptt
+7th/pptt
+8th/pptt
+9th/pptt
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.good b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.good
new file mode 100644
index 000000000..fafe64a5c
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.good
@@ -0,0 +1,29 @@
+1st
+2nd
+3rd
+4th
+5th
+6th
+7th
+8th
+9th
+10th
+11th
+12th
+13th
+14th
+15th
+16th
+17th
+18th
+19th
+20th
+21st
+22nd
+23rd
+24th
+25th
+100th
+1000th
+10001st
+10011th
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.test b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.test
new file mode 100644
index 000000000..52e144cb8
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.test
@@ -0,0 +1,6 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
+
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.wrong b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.wrong
new file mode 100644
index 000000000..99f28e7cc
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.wrong
@@ -0,0 +1,5 @@
+1th
+2th
+3th
+10001th
+10011st
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.aff b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.aff
new file mode 100644
index 000000000..03a423d48
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.aff
@@ -0,0 +1,8 @@
+# English ordinal numbers (parenthesized numerical flags)
+FLAG num
+WORDCHARS 0123456789
+COMPOUNDMIN 1
+ONLYINCOMPOUND 1000
+COMPOUNDRULE 2
+COMPOUNDRULE (1001)*(1002)(2001)
+COMPOUNDRULE (1001)*(2002)(2000)
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.dic b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.dic
new file mode 100644
index 000000000..e156e95fe
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.dic
@@ -0,0 +1,24 @@
+22
+0/1001,2002
+1/1001,1002
+2/1001,2002
+3/1001,2002
+4/1001,2002
+5/1001,2002
+6/1001,2002
+7/1001,2002
+8/1001,2002
+9/1001,2002
+0th/2000,2001
+1st/2000
+1th/2001,1000
+2nd/2000
+2th/2001,1000
+3rd/2000
+3th/2001,1000
+4th/2000,2001
+5th/2000,2001
+6th/2000,2001
+7th/2000,2001
+8th/2000,2001
+9th/2000,2001
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.good b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.good
new file mode 100644
index 000000000..fafe64a5c
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.good
@@ -0,0 +1,29 @@
+1st
+2nd
+3rd
+4th
+5th
+6th
+7th
+8th
+9th
+10th
+11th
+12th
+13th
+14th
+15th
+16th
+17th
+18th
+19th
+20th
+21st
+22nd
+23rd
+24th
+25th
+100th
+1000th
+10001st
+10011th
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.test b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.test
new file mode 100644
index 000000000..52e144cb8
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.test
@@ -0,0 +1,6 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
+
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.wrong b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.wrong
new file mode 100644
index 000000000..99f28e7cc
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.wrong
@@ -0,0 +1,5 @@
+1th
+2th
+3th
+10001th
+10011st
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/condition-utf.aff b/extensions/spellcheck/hunspell/tests/unit/data/condition-utf.aff
new file mode 100644
index 000000000..62a1ce5e5
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/condition-utf.aff
@@ -0,0 +1,42 @@
+SET UTF-8
+WORDCHARS 0123456789
+
+SFX S N 18
+SFX S 0 suf1 .
+SFX S 0 suf2 ó
+SFX S 0 suf3 [áéóú]
+SFX S 0 suf4 [^ó]
+SFX S 0 suf5 [^áéóú]
+SFX S 0 suf6 őó
+SFX S 0 suf7 ő[áéóú]
+SFX S 0 suf8 ő[^ó]
+SFX S 0 suf9 ő[^áéóú]
+SFX S 0 suf10 [áéóőú]ó
+SFX S 0 suf11 [^ő]ó
+SFX S 0 suf12 [^áéóőú]ó
+SFX S 0 suf13 [áéőú][^ú]
+SFX S 0 suf14 [^ú][áéóú]
+SFX S 0 suf15 [áéóú][^áéőú]
+SFX S 0 suf16 [^áéóú][^áéőú]
+SFX S 0 suf17 [áéóú][bcdfgkmnóprstvz]
+SFX S 0 suf18 [áéóú]ó
+
+PFX P N 18
+PFX P 0 pre1 .
+PFX P 0 pre2 ó
+PFX P 0 pre3 [áéóú]
+PFX P 0 pre4 [^ó]
+PFX P 0 pre5 [^áéóú]
+PFX P 0 pre6 óő
+PFX P 0 pre7 ó[áéőú]
+PFX P 0 pre8 ó[^ő]
+PFX P 0 pre9 ó[^áéóőú]
+PFX P 0 pre10 [áéóőú]ő
+PFX P 0 pre11 [^ó]ő
+PFX P 0 pre12 [^áéóőú]ő
+PFX P 0 pre13 [áéóú][áéőú]
+PFX P 0 pre14 [áéóú][^áéóú]
+PFX P 0 pre15 [áéóú][^áéőú]
+PFX P 0 pre16 [^áéőú][^áéóú]
+PFX P 0 pre17 [bcdfgkmnóprstvz][áéóú]
+PFX P 0 pre18 ó[áéóú]
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/condition-utf.dic b/extensions/spellcheck/hunspell/tests/unit/data/condition-utf.dic
new file mode 100644
index 000000000..f03ce4ea2
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/condition-utf.dic
@@ -0,0 +1,2 @@
+1
+óőó/SP
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/condition-utf.good b/extensions/spellcheck/hunspell/tests/unit/data/condition-utf.good
new file mode 100644
index 000000000..6c6203737
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/condition-utf.good
@@ -0,0 +1,19 @@
+óőó
+óőósuf1
+pre1óőó
+óőósuf2
+pre2óőó
+óőósuf3
+pre3óőó
+óőósuf6
+pre6óőó
+óőósuf7
+pre7óőó
+óőósuf10
+pre10óőó
+óőósuf13
+pre13óőó
+óőósuf14
+pre14óőó
+óőósuf16
+pre16óőó
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/condition-utf.test b/extensions/spellcheck/hunspell/tests/unit/data/condition-utf.test
new file mode 100644
index 000000000..cde7c5410
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/condition-utf.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i utf-8
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/condition-utf.wrong b/extensions/spellcheck/hunspell/tests/unit/data/condition-utf.wrong
new file mode 100644
index 000000000..f1022132c
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/condition-utf.wrong
@@ -0,0 +1,18 @@
+óőósuf4
+pre4óőó
+óőósuf5
+pre5óőó
+óőósuf8
+pre8óőó
+óőósuf9
+pre9óőó
+óőósuf11
+pre11óőó
+óőósuf12
+pre12óőó
+óőósuf15
+pre15óőó
+óőósuf17
+óőósuf18
+pre17óőó
+pre18óőó
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/condition.aff b/extensions/spellcheck/hunspell/tests/unit/data/condition.aff
new file mode 100644
index 000000000..62157421a
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/condition.aff
@@ -0,0 +1,62 @@
+SET ISO8859-2
+WORDCHARS 0123456789
+
+SFX S N 18
+SFX S 0 suf1 .
+SFX S 0 suf2 o
+SFX S 0 suf3 [aeou]
+SFX S 0 suf4 [^o]
+SFX S 0 suf5 [^aeou]
+SFX S 0 suf6 fo
+SFX S 0 suf7 f[aeou]
+SFX S 0 suf8 f[^o]
+SFX S 0 suf9 f[^aeou]
+SFX S 0 suf10 [aefu]o
+SFX S 0 suf11 [^f]o
+SFX S 0 suf12 [^aefu]o
+SFX S 0 suf13 [aefu][^aefu]
+SFX S 0 suf14 [^aeou][aeou]
+SFX S 0 suf15 [aeou][^aefu]
+SFX S 0 suf16 [^aeou][^aefu]
+SFX S 0 suf17 [aeou][bcdfgkmnoprstvz]
+SFX S 0 suf18 [aeou]o
+
+SFX Q N 2
+SFX Q 0 ning [^aeio][aeiou]n
+SFX Q 0 ing [aeio][aeiou][bcdfgkmnprstvz]
+
+SFX T N 1
+SFX T y ies .[^aeiou]y
+
+PFX U N 1
+PFX U 0 un wr.
+
+SFX Z Y 3
+SFX Z 0 ch [].a
+SFX Z 0 m [].a
+SFX Z a 0 [].a
+
+PFX P N 18
+PFX P 0 pre1 .
+PFX P 0 pre2 o
+PFX P 0 pre3 [aeou]
+PFX P 0 pre4 [^o]
+PFX P 0 pre5 [^aeou]
+PFX P 0 pre6 of
+PFX P 0 pre7 o[aefou]
+PFX P 0 pre8 o[^f]
+PFX P 0 pre9 o[^aefu]
+PFX P 0 pre10 [aefu]o
+PFX P 0 pre11 [^f]o
+PFX P 0 pre12 [^aefou]o
+PFX P 0 pre13 [aeou][aefu]
+PFX P 0 pre14 [aeou][^aeou]
+PFX P 0 pre15 [aeou][^aefu]
+PFX P 0 pre16 [^aefu][^aeou]
+PFX P 0 pre17 [bcdfgkmnoprstvz][aeou]
+PFX P 0 pre18 o[aeou]
+
+
+PFX R N 2
+PFX R 0 gnin n[aeiou][^aeio]
+PFX R 0 gni [bcdfgkmnprstvz][aeiou][aeio]
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/condition.dic b/extensions/spellcheck/hunspell/tests/unit/data/condition.dic
new file mode 100644
index 000000000..40ebd5588
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/condition.dic
@@ -0,0 +1,6 @@
+5
+ofo/SP
+entertain/Q
+nianretne/R
+ra/Z
+wry/TU
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/condition.good b/extensions/spellcheck/hunspell/tests/unit/data/condition.good
new file mode 100644
index 000000000..8fef4a747
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/condition.good
@@ -0,0 +1,26 @@
+ofo
+ofosuf1
+pre1ofo
+ofosuf2
+pre2ofo
+ofosuf3
+pre3ofo
+ofosuf6
+pre6ofo
+ofosuf7
+pre7ofo
+ofosuf10
+ofosuf13
+pre13ofo
+ofosuf14
+pre14ofo
+ofosuf16
+pre16ofo
+entertain
+entertaining
+gninianretne
+r
+ram
+rach
+wries
+unwry
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/condition.test b/extensions/spellcheck/hunspell/tests/unit/data/condition.test
new file mode 100644
index 000000000..c95329532
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/condition.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i ISO8859-2
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/condition.wrong b/extensions/spellcheck/hunspell/tests/unit/data/condition.wrong
new file mode 100644
index 000000000..7b83d828d
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/condition.wrong
@@ -0,0 +1,21 @@
+ofosuf4
+pre4ofo
+ofosuf5
+pre5ofo
+ofosuf8
+pre8ofo
+ofosuf9
+pre9ofo
+ofosuf11
+pre10ofo
+pre11ofo
+ofosuf12
+pre12ofo
+ofosuf15
+pre15ofo
+ofosuf17
+pre17ofo
+ofosuf18
+pre18ofo
+entertainning
+gninnianretne
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.aff b/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.aff
new file mode 100644
index 000000000..e7a9bf749
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.aff
@@ -0,0 +1,11 @@
+PFX P Y 1
+PFX P 0 un . ip:un
+
+SFX S Y 1
+SFX S 0 s . is:PL
+
+SFX Q Y 1
+SFX Q 0 s . is:3SGV
+
+SFX R Y 1
+SFX R 0 able/PS . ds:DER_V_ADJ_ABLE
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.dic b/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.dic
new file mode 100644
index 000000000..2f6d45615
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.dic
@@ -0,0 +1,3 @@
+2
+drink/RQ po:verb
+drink/S po:noun
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.good b/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.good
new file mode 100644
index 000000000..01438d0eb
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.good
@@ -0,0 +1,6 @@
+drink
+drinks
+drinkable
+drinkables
+undrinkable
+undrinkables
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.morph b/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.morph
new file mode 100644
index 000000000..95d544389
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.morph
@@ -0,0 +1,20 @@
+> drink
+analyze(drink) = st:drink po:verb
+analyze(drink) = st:drink po:noun
+stem(drink) = drink
+> drinks
+analyze(drinks) = st:drink po:verb is:3SGV
+analyze(drinks) = st:drink po:noun is:PL
+stem(drinks) = drink
+> drinkable
+analyze(drinkable) = st:drink po:verb ds:DER_V_ADJ_ABLE
+stem(drinkable) = drinkable
+> drinkables
+analyze(drinkables) = st:drink po:verb ds:DER_V_ADJ_ABLE is:PL
+stem(drinkables) = drinkable
+> undrinkable
+analyze(undrinkable) = ip:un st:drink po:verb ds:DER_V_ADJ_ABLE
+stem(undrinkable) = drinkable
+> undrinkables
+analyze(undrinkables) = ip:un st:drink po:verb ds:DER_V_ADJ_ABLE is:PL
+stem(undrinkables) = drinkable
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.test b/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.wrong b/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.wrong
new file mode 100644
index 000000000..70262d940
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.wrong
@@ -0,0 +1,2 @@
+undrink
+undrinks
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/digits-in-words.aff b/extensions/spellcheck/hunspell/tests/unit/data/digits-in-words.aff
new file mode 100644
index 000000000..18a42f6fd
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/digits-in-words.aff
@@ -0,0 +1,9 @@
+# Digits in words, handled by COMPOUNDRULE.
+# 1-jährig, 2-jährig, 100-jährig etc.
+SET UTF-8
+COMPOUNDMIN 1
+# recognize ab, aab, aaab etc. compounds (a=digits, b=-jährig, see dic file)
+COMPOUNDRULE 1
+COMPOUNDRULE a*b
+ONLYINCOMPOUND c
+WORDCHARS 0123456789-
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/digits-in-words.dic b/extensions/spellcheck/hunspell/tests/unit/data/digits-in-words.dic
new file mode 100644
index 000000000..deeaece05
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/digits-in-words.dic
@@ -0,0 +1,12 @@
+11
+0/a
+1/a
+2/a
+3/a
+4/a
+5/a
+6/a
+7/a
+8/a
+9/a
+-jährig/bc
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/digits-in-words.test b/extensions/spellcheck/hunspell/tests/unit/data/digits-in-words.test
new file mode 100644
index 000000000..cde7c5410
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/digits-in-words.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i utf-8
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/digits-in-words.wrong b/extensions/spellcheck/hunspell/tests/unit/data/digits-in-words.wrong
new file mode 100644
index 000000000..aeaf6ce34
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/digits-in-words.wrong
@@ -0,0 +1 @@
+-jährig
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/encoding.aff b/extensions/spellcheck/hunspell/tests/unit/data/encoding.aff
new file mode 100644
index 000000000..1f560d262
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/encoding.aff
@@ -0,0 +1 @@
+SET ISO-8859-15
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/encoding.dic b/extensions/spellcheck/hunspell/tests/unit/data/encoding.dic
new file mode 100644
index 000000000..414f9b8d3
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/encoding.dic
@@ -0,0 +1,3 @@
+2
+cur
+uvre
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/encoding.good b/extensions/spellcheck/hunspell/tests/unit/data/encoding.good
new file mode 100644
index 000000000..fc41c90aa
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/encoding.good
@@ -0,0 +1,4 @@
+cur
+uvre
+CUR
+UVRE
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/encoding.test b/extensions/spellcheck/hunspell/tests/unit/data/encoding.test
new file mode 100644
index 000000000..09619572e
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/encoding.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i ISO8859-15
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flag.aff b/extensions/spellcheck/hunspell/tests/unit/data/flag.aff
new file mode 100644
index 000000000..ac105c11f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/flag.aff
@@ -0,0 +1,13 @@
+# base 1-character flags
+
+SFX A Y 1
+SFX A 0 s/123 .
+
+SFX 1 Y 1
+SFX 1 0 bar .
+
+SFX 2 Y 1
+SFX 2 0 baz .
+
+PFX 3 Y 1
+PFX 3 0 un .
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flag.dic b/extensions/spellcheck/hunspell/tests/unit/data/flag.dic
new file mode 100644
index 000000000..b1b237106
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/flag.dic
@@ -0,0 +1,2 @@
+1
+foo/A3
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flag.good b/extensions/spellcheck/hunspell/tests/unit/data/flag.good
new file mode 100644
index 000000000..d5c27b1a6
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/flag.good
@@ -0,0 +1,8 @@
+foo
+foos
+foosbar
+foosbaz
+unfoo
+unfoos
+unfoosbar
+unfoosbaz
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flag.test b/extensions/spellcheck/hunspell/tests/unit/data/flag.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/flag.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flaglong.aff b/extensions/spellcheck/hunspell/tests/unit/data/flaglong.aff
new file mode 100644
index 000000000..437f13b3a
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/flaglong.aff
@@ -0,0 +1,14 @@
+# 2-character flags
+FLAG long
+
+SFX zx Y 1
+SFX zx 0 s/g?1G09 .
+
+SFX g? Y 1
+SFX g? 0 bar .
+
+SFX 1G Y 1
+SFX 1G 0 baz .
+
+PFX 09 Y 1
+PFX 09 0 un .
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flaglong.dic b/extensions/spellcheck/hunspell/tests/unit/data/flaglong.dic
new file mode 100644
index 000000000..46c601286
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/flaglong.dic
@@ -0,0 +1,2 @@
+1
+foo/zx09
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flaglong.good b/extensions/spellcheck/hunspell/tests/unit/data/flaglong.good
new file mode 100644
index 000000000..d5c27b1a6
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/flaglong.good
@@ -0,0 +1,8 @@
+foo
+foos
+foosbar
+foosbaz
+unfoo
+unfoos
+unfoosbar
+unfoosbaz
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flaglong.test b/extensions/spellcheck/hunspell/tests/unit/data/flaglong.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/flaglong.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flagnum.aff b/extensions/spellcheck/hunspell/tests/unit/data/flagnum.aff
new file mode 100644
index 000000000..823cee4cd
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/flagnum.aff
@@ -0,0 +1,14 @@
+# numerical flags
+FLAG num
+
+SFX 999 Y 1
+SFX 999 0 s/214,216,54321 .
+
+SFX 214 Y 1
+SFX 214 0 bar .
+
+SFX 216 Y 1
+SFX 216 0 baz .
+
+PFX 54321 Y 1
+PFX 54321 0 un .
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flagnum.dic b/extensions/spellcheck/hunspell/tests/unit/data/flagnum.dic
new file mode 100644
index 000000000..927c45f2f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/flagnum.dic
@@ -0,0 +1,2 @@
+1
+foo/999,54321
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flagnum.good b/extensions/spellcheck/hunspell/tests/unit/data/flagnum.good
new file mode 100644
index 000000000..d5c27b1a6
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/flagnum.good
@@ -0,0 +1,8 @@
+foo
+foos
+foosbar
+foosbaz
+unfoo
+unfoos
+unfoosbar
+unfoosbaz
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flagnum.test b/extensions/spellcheck/hunspell/tests/unit/data/flagnum.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/flagnum.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flagutf8.aff b/extensions/spellcheck/hunspell/tests/unit/data/flagutf8.aff
new file mode 100644
index 000000000..d0f75c185
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/flagutf8.aff
@@ -0,0 +1,15 @@
+# UTF-8 flags
+FLAG UTF-8
+
+SFX A Y 1
+SFX A 0 s/ÖüÜ .
+#SFX A 0 s/ÖüÖÜ .
+
+SFX Ö Y 1
+SFX Ö 0 bar .
+
+SFX ü Y 1
+SFX ü 0 baz .
+
+PFX Ü Y 1
+PFX Ü 0 un .
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flagutf8.dic b/extensions/spellcheck/hunspell/tests/unit/data/flagutf8.dic
new file mode 100644
index 000000000..2944490c9
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/flagutf8.dic
@@ -0,0 +1,2 @@
+1
+foo/AÜ
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flagutf8.good b/extensions/spellcheck/hunspell/tests/unit/data/flagutf8.good
new file mode 100644
index 000000000..d5c27b1a6
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/flagutf8.good
@@ -0,0 +1,8 @@
+foo
+foos
+foosbar
+foosbaz
+unfoo
+unfoos
+unfoosbar
+unfoosbaz
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flagutf8.test b/extensions/spellcheck/hunspell/tests/unit/data/flagutf8.test
new file mode 100644
index 000000000..cde7c5410
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/flagutf8.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i utf-8
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.aff b/extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.aff
new file mode 100644
index 000000000..56cdabe5a
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.aff
@@ -0,0 +1,12 @@
+# fogemorphemes: special morphemes in compounds
+#
+# Swedish example:
+# gata + kontoret = gatukontoret
+
+COMPOUNDFLAG X
+COMPOUNDBEGIN Y
+ONLYINCOMPOUND Z
+COMPOUNDPERMITFLAG P
+
+SFX A Y 1
+SFX A a u/YPZ .
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.dic b/extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.dic
new file mode 100644
index 000000000..1b76380d1
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.dic
@@ -0,0 +1,3 @@
+2
+gata/A
+kontoret/X
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.good b/extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.good
new file mode 100644
index 000000000..01e77d561
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.good
@@ -0,0 +1,3 @@
+gata
+kontoret
+gatukontoret
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.test b/extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.wrong b/extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.wrong
new file mode 100644
index 000000000..f920745c7
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.wrong
@@ -0,0 +1,3 @@
+gatu
+gatakontoret
+kontoretgatu
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.aff b/extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.aff
new file mode 100644
index 000000000..de7f8ad9a
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.aff
@@ -0,0 +1,11 @@
+# FORBIDDENWORD flag
+# The signed word, and its suffixed forms are all forbidden,
+# excepts with root homonyms.
+# Useful for forbidding bad suffixed forms or compounds.
+
+
+FORBIDDENWORD X
+COMPOUNDFLAG Y
+
+SFX A Y 1
+SFX A 0 s .
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.dic b/extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.dic
new file mode 100644
index 000000000..78f2ee3d1
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.dic
@@ -0,0 +1,8 @@
+5
+foo/S [1]
+foo/YX [2]
+foo/Y [3]
+foo/S [4]
+bar/YS [5]
+bars/X
+foos/X
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.good b/extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.good
new file mode 100644
index 000000000..7bd112e9e
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.good
@@ -0,0 +1,3 @@
+foo
+bar
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.test b/extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.wrong b/extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.wrong
new file mode 100644
index 000000000..5752c1e44
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.wrong
@@ -0,0 +1,4 @@
+bars
+foos
+foobar
+barfoo
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/forceucase.aff b/extensions/spellcheck/hunspell/tests/unit/data/forceucase.aff
new file mode 100644
index 000000000..5eebcbdab
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/forceucase.aff
@@ -0,0 +1,4 @@
+# force capitalized compound
+TRY F
+FORCEUCASE A
+COMPOUNDFLAG C
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/forceucase.dic b/extensions/spellcheck/hunspell/tests/unit/data/forceucase.dic
new file mode 100644
index 000000000..82fd93b30
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/forceucase.dic
@@ -0,0 +1,4 @@
+3
+foo/C
+bar/C
+baz/CA
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/forceucase.good b/extensions/spellcheck/hunspell/tests/unit/data/forceucase.good
new file mode 100644
index 000000000..37ecf4957
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/forceucase.good
@@ -0,0 +1,7 @@
+foo
+bar
+baz
+foobar
+Foobaz
+foobazbar
+Foobarbaz
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/forceucase.sug b/extensions/spellcheck/hunspell/tests/unit/data/forceucase.sug
new file mode 100644
index 000000000..6a77cbd06
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/forceucase.sug
@@ -0,0 +1,2 @@
+Foobaz
+Foobarbaz
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/forceucase.test b/extensions/spellcheck/hunspell/tests/unit/data/forceucase.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/forceucase.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/forceucase.wrong b/extensions/spellcheck/hunspell/tests/unit/data/forceucase.wrong
new file mode 100644
index 000000000..1503e42dd
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/forceucase.wrong
@@ -0,0 +1,2 @@
+foobaz
+foobarbaz
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/fullstrip.aff b/extensions/spellcheck/hunspell/tests/unit/data/fullstrip.aff
new file mode 100644
index 000000000..d60cb74d7
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/fullstrip.aff
@@ -0,0 +1,15 @@
+# FULLSTRIP option: Hunspell can strip full words by affix rules
+# see OpenOffice.org Issue #80145
+# test data from Davide Prina
+
+FULLSTRIP
+
+SET ISO8859-15
+TRY aioertnsclmdpgubzfvhàq'ACMSkBGPLxEyRTVòIODNwFéùèìjUZKHWJYQX
+
+SFX A Y 3 # verbo andare (verb to go)
+SFX A andare vado andare # io vado (I go)
+SFX A andare va andare # tu vai (you go)
+SFX A are iamo andare # noi andiamo (we go)
+
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/fullstrip.dic b/extensions/spellcheck/hunspell/tests/unit/data/fullstrip.dic
new file mode 100644
index 000000000..553113d44
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/fullstrip.dic
@@ -0,0 +1,4 @@
+2
+andare/A
+riandare/A
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/fullstrip.good b/extensions/spellcheck/hunspell/tests/unit/data/fullstrip.good
new file mode 100644
index 000000000..1240e71f5
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/fullstrip.good
@@ -0,0 +1,9 @@
+andare
+vado
+va
+andiamo
+riandare
+rivado
+riva
+riandiamo
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/fullstrip.test b/extensions/spellcheck/hunspell/tests/unit/data/fullstrip.test
new file mode 100644
index 000000000..4d59c4212
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/fullstrip.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i UTF-8
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/germancompounding.aff b/extensions/spellcheck/hunspell/tests/unit/data/germancompounding.aff
new file mode 100644
index 000000000..5ff25872c
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/germancompounding.aff
@@ -0,0 +1,91 @@
+# German compounding
+
+# handle special casing of German sharp s
+
+CHECKSHARPS
+
+# compound flags
+
+COMPOUNDBEGIN U
+COMPOUNDMIDDLE V
+COMPOUNDEND W
+
+# Prefixes are allowed at the beginning of compounds,
+# suffixes are allowed at the end of compounds by default:
+# (prefix)?(root)+(affix)?
+# Affixes with COMPOUNDPERMITFLAG may be inside of compounds.
+COMPOUNDPERMITFLAG P
+
+# for German fogemorphemes (Fuge-element)
+# Hint: ONLYINCOMPOUND is not required everywhere, but the
+# checking will be a little faster with it.
+
+ONLYINCOMPOUND X
+
+# forbid uppercase characters at compound word bounds
+CHECKCOMPOUNDCASE
+
+# for handling Fuge-elements with dashes (Arbeits-)
+# dash will be a special word
+
+COMPOUNDMIN 1
+WORDCHARS -
+
+# compound settings and fogemorpheme for `Arbeit'
+
+SFX A Y 3
+SFX A 0 s/UPX .
+SFX A 0 s/VPDX .
+SFX A 0 0/WXD .
+
+SFX B Y 2
+SFX B 0 0/UPX .
+SFX B 0 0/VWXDP .
+
+# a suffix for `Computer'
+
+SFX C Y 1
+SFX C 0 n/WD .
+
+# for forbid exceptions (*Arbeitsnehmer)
+
+FORBIDDENWORD Z
+
+# dash prefix for compounds with dash (Arbeits-Computer)
+
+PFX - Y 1
+PFX - 0 -/P .
+
+# decapitalizing prefix
+# circumfix for positioning in compounds
+
+PFX D Y 29
+PFX D A a/PX A
+PFX D /PX
+PFX D B b/PX B
+PFX D C c/PX C
+PFX D D d/PX D
+PFX D E e/PX E
+PFX D F f/PX F
+PFX D G g/PX G
+PFX D H h/PX H
+PFX D I i/PX I
+PFX D J j/PX J
+PFX D K k/PX K
+PFX D L l/PX L
+PFX D M m/PX M
+PFX D N n/PX N
+PFX D O o/PX O
+PFX D /PX
+PFX D P p/PX P
+PFX D Q q/PX Q
+PFX D R r/PX R
+PFX D S s/PX S
+PFX D T t/PX T
+PFX D U u/PX U
+PFX D /PX
+PFX D V v/PX V
+PFX D W w/PX W
+PFX D X x/PX X
+PFX D Y y/PX Y
+PFX D Z z/PX Z
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/germancompounding.dic b/extensions/spellcheck/hunspell/tests/unit/data/germancompounding.dic
new file mode 100644
index 000000000..5db6783a4
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/germancompounding.dic
@@ -0,0 +1,5 @@
+4
+Arbeit/A-
+Computer/BC-
+-/W
+Arbeitsnehmer/Z
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/germancompounding.good b/extensions/spellcheck/hunspell/tests/unit/data/germancompounding.good
new file mode 100644
index 000000000..e4945553c
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/germancompounding.good
@@ -0,0 +1,20 @@
+Computer
+Computern
+Arbeit
+Arbeits-
+Computerarbeit
+Computerarbeits-
+Arbeitscomputer
+Computercomputer
+Computercomputern
+Arbeitscomputern
+Computerarbeitscomputer
+Computerarbeitscomputern
+Arbeitscomputercomputer
+Computercomputerarbeit
+Arbeitscomputerarbeit
+Arbeitsarbeitsarbeit
+Computerarbeitsarbeit
+Computerarbeits-Computer
+Computerarbeits-Computern
+Computer-Arbeit
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/germancompounding.test b/extensions/spellcheck/hunspell/tests/unit/data/germancompounding.test
new file mode 100644
index 000000000..dc295077f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/germancompounding.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i ISO8859-1
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/germancompounding.wrong b/extensions/spellcheck/hunspell/tests/unit/data/germancompounding.wrong
new file mode 100644
index 000000000..c5f2ba115
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/germancompounding.wrong
@@ -0,0 +1,50 @@
+computer
+computern
+arbeit
+Arbeits
+arbeits
+ComputerArbeit
+ComputernArbeit
+Computernarbeit
+ComputerArbeits
+Arbeitcomputer
+Arbeitcomputern
+ArbeitsComputer
+ArbeitsComputern
+Computerarbeitcomputer
+ComputerArbeitcomputer
+ComputerArbeitscomputer
+Computerarbeitcomputern
+ComputerArbeitcomputern
+ComputerArbeitscomputern
+Arbeitscomputerarbeits
+Arbeitscomputernarbeits
+Computerarbeits-computer
+Arbeitsnehmer
+computers
+computern
+computernarbeit
+computernArbeit
+computerArbeit
+computerArbeits
+arbeitcomputer
+arbeitsComputer
+computerarbeitcomputer
+computerArbeitcomputer
+computerArbeitscomputer
+arbeitscomputerarbeits
+computerarbeits-computer
+arbeitsnehmer
+computernarbeit
+computernArbeit
+arbeits-
+computerarbeit
+computerarbeits-
+arbeitscomputer
+arbeitscomputern
+computerarbeitscomputer
+computerarbeitscomputern
+computerarbeitscomputers
+arbeitscomputerarbeit
+computerarbeits-Computer
+computerarbeits-Computern
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.aff b/extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.aff
new file mode 100644
index 000000000..3e06f0647
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.aff
@@ -0,0 +1,96 @@
+# German compounding
+
+# handle special casing of German sharp s
+
+CHECKSHARPS
+
+# compound flags
+
+COMPOUNDBEGIN U
+COMPOUNDMIDDLE V
+COMPOUNDEND W
+
+# Prefixes are allowed at the beginning of compounds,
+# suffixes are allowed at the end of compounds by default:
+# (prefix)?(root)+(affix)?
+# Affixes with COMPOUNDPERMITFLAG may be inside of compounds.
+COMPOUNDPERMITFLAG P
+
+# for German fogemorphemes (Fuge-element)
+# Hint: ONLYINCOMPOUND is not required everywhere, but the
+# checking will be a little faster with it.
+
+ONLYINCOMPOUND X
+
+# for decapitalizing nouns with fogemorphemes
+
+CIRCUMFIX Y
+
+# for handling Fuge-elements with dashes (Arbeits-)
+# dash will be a special word
+
+COMPOUNDMIN 1
+WORDCHARS -
+
+# compound settings and fogemorpheme for `Arbeit'
+
+SFX A Y 3
+SFX A 0 s/UPX .
+SFX A 0 s/VPXDY .
+SFX A 0 0/WXDY .
+
+# compound settings for `Computer'
+
+SFX B Y 2
+SFX B 0 0/UPX .
+SFX B 0 0/VWPXDY .
+
+# a suffix for `Computer'
+
+SFX C Y 2
+SFX C 0 n .
+SFX C 0 n/WXDY .
+
+# for forbid exceptions (*Arbeitsnehmer)
+
+FORBIDDENWORD Z
+
+# dash prefix for compounds with dash (Arbeits-Computer)
+
+PFX - Y 2
+PFX - 0 -/PUVW .
+PFX - 0 -/PY .
+
+# decapitalizing prefix
+# circumfix for positioning in compounds
+
+PFX D Y 29
+PFX D A a/PXY A
+PFX D /PXY
+PFX D B b/PXY B
+PFX D C c/PXY C
+PFX D D d/PXY D
+PFX D E e/PXY E
+PFX D F f/PXY F
+PFX D G g/PXY G
+PFX D H h/PXY H
+PFX D I i/PXY I
+PFX D J j/PXY J
+PFX D K k/PXY K
+PFX D L l/PXY L
+PFX D M m/PXY M
+PFX D N n/PXY N
+PFX D O o/PXY O
+PFX D /PXY
+PFX D P p/PXY P
+PFX D Q q/PXY Q
+PFX D R r/PXY R
+PFX D S s/PXY S
+PFX D T t/PXY T
+PFX D U u/PXY U
+PFX D /PXY
+PFX D V v/PXY V
+PFX D W w/PXY W
+PFX D X x/PXY X
+PFX D Y y/PXY Y
+PFX D Z z/PXY Z
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.dic b/extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.dic
new file mode 100644
index 000000000..5db6783a4
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.dic
@@ -0,0 +1,5 @@
+4
+Arbeit/A-
+Computer/BC-
+-/W
+Arbeitsnehmer/Z
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.good b/extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.good
new file mode 100644
index 000000000..5357bff16
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.good
@@ -0,0 +1,14 @@
+Computer
+Computern
+Arbeit
+Arbeits-
+Computerarbeit
+Computerarbeits-
+Arbeitscomputer
+Arbeitscomputern
+Computerarbeitscomputer
+Computerarbeitscomputern
+Arbeitscomputerarbeit
+Computerarbeits-Computer
+Computerarbeits-Computern
+Computer-Arbeit
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.test b/extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.wrong b/extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.wrong
new file mode 100644
index 000000000..c5f2ba115
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.wrong
@@ -0,0 +1,50 @@
+computer
+computern
+arbeit
+Arbeits
+arbeits
+ComputerArbeit
+ComputernArbeit
+Computernarbeit
+ComputerArbeits
+Arbeitcomputer
+Arbeitcomputern
+ArbeitsComputer
+ArbeitsComputern
+Computerarbeitcomputer
+ComputerArbeitcomputer
+ComputerArbeitscomputer
+Computerarbeitcomputern
+ComputerArbeitcomputern
+ComputerArbeitscomputern
+Arbeitscomputerarbeits
+Arbeitscomputernarbeits
+Computerarbeits-computer
+Arbeitsnehmer
+computers
+computern
+computernarbeit
+computernArbeit
+computerArbeit
+computerArbeits
+arbeitcomputer
+arbeitsComputer
+computerarbeitcomputer
+computerArbeitcomputer
+computerArbeitscomputer
+arbeitscomputerarbeits
+computerarbeits-computer
+arbeitsnehmer
+computernarbeit
+computernArbeit
+arbeits-
+computerarbeit
+computerarbeits-
+arbeitscomputer
+arbeitscomputern
+computerarbeitscomputer
+computerarbeitscomputern
+computerarbeitscomputers
+arbeitscomputerarbeit
+computerarbeits-Computer
+computerarbeits-Computern
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i35725.aff b/extensions/spellcheck/hunspell/tests/unit/data/i35725.aff
new file mode 100644
index 000000000..96755c7ec
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i35725.aff
@@ -0,0 +1,203 @@
+# Ngram suggestions
+# - fix case problem
+# - detect character swapping (keep only these suggestions)
+# - lesser suggestions
+# - weight with common subsequence algorithm
+# - suggest uppercased words
+
+# 2007-02-05:
+# now not neighbour character replacements and character movings are
+# detected by not ngram suggestions, too.
+
+# OpenOffice.org's en_US.aff file
+
+SET ISO8859-1
+TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ'
+
+WORDCHARS '
+
+PFX A Y 1
+PFX A 0 re .
+
+PFX I Y 1
+PFX I 0 in .
+
+PFX U Y 1
+PFX U 0 un .
+
+PFX C Y 1
+PFX C 0 de .
+
+PFX E Y 1
+PFX E 0 dis .
+
+PFX F Y 1
+PFX F 0 con .
+
+PFX K Y 1
+PFX K 0 pro .
+
+SFX V N 2
+SFX V e ive e
+SFX V 0 ive [^e]
+
+SFX N Y 3
+SFX N e ion e
+SFX N y ication y
+SFX N 0 en [^ey]
+
+SFX X Y 3
+SFX X e ions e
+SFX X y ications y
+SFX X 0 ens [^ey]
+
+SFX H N 2
+SFX H y ieth y
+SFX H 0 th [^y]
+
+SFX Y Y 1
+SFX Y 0 ly .
+
+SFX G Y 2
+SFX G e ing e
+SFX G 0 ing [^e]
+
+SFX J Y 2
+SFX J e ings e
+SFX J 0 ings [^e]
+
+SFX D Y 4
+SFX D 0 d e
+SFX D y ied [^aeiou]y
+SFX D 0 ed [^ey]
+SFX D 0 ed [aeiou]y
+
+SFX T N 4
+SFX T 0 st e
+SFX T y iest [^aeiou]y
+SFX T 0 est [aeiou]y
+SFX T 0 est [^ey]
+
+SFX R Y 4
+SFX R 0 r e
+SFX R y ier [^aeiou]y
+SFX R 0 er [aeiou]y
+SFX R 0 er [^ey]
+
+SFX Z Y 4
+SFX Z 0 rs e
+SFX Z y iers [^aeiou]y
+SFX Z 0 ers [aeiou]y
+SFX Z 0 ers [^ey]
+
+SFX S Y 4
+SFX S y ies [^aeiou]y
+SFX S 0 s [aeiou]y
+SFX S 0 es [sxzh]
+SFX S 0 s [^sxzhy]
+
+SFX P Y 3
+SFX P y iness [^aeiou]y
+SFX P 0 ness [aeiou]y
+SFX P 0 ness [^y]
+
+SFX M Y 1
+SFX M 0 's .
+
+SFX B Y 3
+SFX B 0 able [^aeiou]
+SFX B 0 able ee
+SFX B e able [^aeiou]e
+
+SFX L Y 1
+SFX L 0 ment .
+
+REP 88
+REP a ei
+REP ei a
+REP a ey
+REP ey a
+REP ai ie
+REP ie ai
+REP are air
+REP are ear
+REP are eir
+REP air are
+REP air ere
+REP ere air
+REP ere ear
+REP ere eir
+REP ear are
+REP ear air
+REP ear ere
+REP eir are
+REP eir ere
+REP ch te
+REP te ch
+REP ch ti
+REP ti ch
+REP ch tu
+REP tu ch
+REP ch s
+REP s ch
+REP ch k
+REP k ch
+REP f ph
+REP ph f
+REP gh f
+REP f gh
+REP i igh
+REP igh i
+REP i uy
+REP uy i
+REP i ee
+REP ee i
+REP j di
+REP di j
+REP j gg
+REP gg j
+REP j ge
+REP ge j
+REP s ti
+REP ti s
+REP s ci
+REP ci s
+REP k cc
+REP cc k
+REP k qu
+REP qu k
+REP kw qu
+REP o eau
+REP eau o
+REP o ew
+REP ew o
+REP oo ew
+REP ew oo
+REP ew ui
+REP ui ew
+REP oo ui
+REP ui oo
+REP ew u
+REP u ew
+REP oo u
+REP u oo
+REP u oe
+REP oe u
+REP u ieu
+REP ieu u
+REP ue ew
+REP ew ue
+REP uff ough
+REP oo ieu
+REP ieu oo
+REP ier ear
+REP ear ier
+REP ear air
+REP air ear
+REP w qu
+REP qu w
+REP z ss
+REP ss z
+REP shun tion
+REP shun sion
+REP shun cion
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i35725.dic b/extensions/spellcheck/hunspell/tests/unit/data/i35725.dic
new file mode 100644
index 000000000..0c61f0031
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i35725.dic
@@ -0,0 +1,15 @@
+15
+endangerment/SM
+ferment/FSCM
+preferment/SM
+impermanent/Y
+permanent/YSP
+semipermanent/Y
+empowerment/MS
+supermen
+tournament/MS
+ornamental/SY
+ornament/GSDM
+supernatant
+pimpernel
+UNESCO/M
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i35725.good b/extensions/spellcheck/hunspell/tests/unit/data/i35725.good
new file mode 100644
index 000000000..052ba8418
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i35725.good
@@ -0,0 +1 @@
+permanent
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i35725.sug b/extensions/spellcheck/hunspell/tests/unit/data/i35725.sug
new file mode 100644
index 000000000..a8bf1d980
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i35725.sug
@@ -0,0 +1,10 @@
+permanent, preferment
+permanent, ornament
+permanent
+Permanent, Preferment
+Permanent, Ornament
+Permanent
+UNESCO
+UNESCO
+UNESCO's
+UNESCO's
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i35725.test b/extensions/spellcheck/hunspell/tests/unit/data/i35725.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i35725.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i35725.wrong b/extensions/spellcheck/hunspell/tests/unit/data/i35725.wrong
new file mode 100644
index 000000000..573e195d8
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i35725.wrong
@@ -0,0 +1,10 @@
+permenant
+pernament
+pernemant
+Permenant
+Pernament
+Pernemant
+unesco
+Unesco
+unesco's
+Unesco's
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i53643.aff b/extensions/spellcheck/hunspell/tests/unit/data/i53643.aff
new file mode 100644
index 000000000..9fac6d84c
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i53643.aff
@@ -0,0 +1,2 @@
+# check numbers with separators
+WORDCHARS 0123456789.-,
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i53643.dic b/extensions/spellcheck/hunspell/tests/unit/data/i53643.dic
new file mode 100644
index 000000000..aec5d506b
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i53643.dic
@@ -0,0 +1,2 @@
+1
+foo
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i53643.good b/extensions/spellcheck/hunspell/tests/unit/data/i53643.good
new file mode 100644
index 000000000..116333452
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i53643.good
@@ -0,0 +1,19 @@
+1
+12
+123
+1234
+12345
+123456
+1234567
+1.1
+1.12
+1.123
+1.1234
+1.12345
+1.123456
+12.1
+123.12
+1234.123
+12345.1234
+123456.12345
+1234567.123456
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i53643.test b/extensions/spellcheck/hunspell/tests/unit/data/i53643.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i53643.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i53643.wrong b/extensions/spellcheck/hunspell/tests/unit/data/i53643.wrong
new file mode 100644
index 000000000..45c61d298
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i53643.wrong
@@ -0,0 +1,4 @@
+1..2
+1,,2
+1.,2
+1,.2
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i54633.aff b/extensions/spellcheck/hunspell/tests/unit/data/i54633.aff
new file mode 100644
index 000000000..46281e1c5
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i54633.aff
@@ -0,0 +1,2 @@
+# Missing capitalized suggestion for capitalized bad words
+SET ISO8859-1
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i54633.dic b/extensions/spellcheck/hunspell/tests/unit/data/i54633.dic
new file mode 100644
index 000000000..e26d6f9c8
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i54633.dic
@@ -0,0 +1,2 @@
+1
+diter
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i54633.good b/extensions/spellcheck/hunspell/tests/unit/data/i54633.good
new file mode 100644
index 000000000..a115f67ed
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i54633.good
@@ -0,0 +1,2 @@
+diter
+diter
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i54633.sug b/extensions/spellcheck/hunspell/tests/unit/data/i54633.sug
new file mode 100644
index 000000000..a115f67ed
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i54633.sug
@@ -0,0 +1,2 @@
+diter
+diter
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i54633.test b/extensions/spellcheck/hunspell/tests/unit/data/i54633.test
new file mode 100644
index 000000000..dc295077f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i54633.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i ISO8859-1
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i54633.wrong b/extensions/spellcheck/hunspell/tests/unit/data/i54633.wrong
new file mode 100644
index 000000000..579a45dab
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i54633.wrong
@@ -0,0 +1,2 @@
+editer
+Editer
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i54980.aff b/extensions/spellcheck/hunspell/tests/unit/data/i54980.aff
new file mode 100644
index 000000000..37cc5c53d
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i54980.aff
@@ -0,0 +1,2 @@
+# ISO-8859-15 (extended latin-1) support for French, Finnish and EURO symbol
+SET ISO8859-15
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i54980.dic b/extensions/spellcheck/hunspell/tests/unit/data/i54980.dic
new file mode 100644
index 000000000..414f9b8d3
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i54980.dic
@@ -0,0 +1,3 @@
+2
+cur
+uvre
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i54980.good b/extensions/spellcheck/hunspell/tests/unit/data/i54980.good
new file mode 100644
index 000000000..fc41c90aa
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i54980.good
@@ -0,0 +1,4 @@
+cur
+uvre
+CUR
+UVRE
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i54980.test b/extensions/spellcheck/hunspell/tests/unit/data/i54980.test
new file mode 100644
index 000000000..09619572e
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i54980.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i ISO8859-15
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i58202.aff b/extensions/spellcheck/hunspell/tests/unit/data/i58202.aff
new file mode 100644
index 000000000..11249d4f2
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i58202.aff
@@ -0,0 +1,4 @@
+# case suggestions
+MAXNGRAMSUGS 0
+# capitalise baz->Baz
+TRY B
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i58202.dic b/extensions/spellcheck/hunspell/tests/unit/data/i58202.dic
new file mode 100644
index 000000000..19e1980ba
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i58202.dic
@@ -0,0 +1,5 @@
+4
+foo
+bar
+Baz
+Boo
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i58202.good b/extensions/spellcheck/hunspell/tests/unit/data/i58202.good
new file mode 100644
index 000000000..88a079a55
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i58202.good
@@ -0,0 +1,10 @@
+foo
+bar
+Foo
+Bar
+Baz
+Boo
+FOO
+BAR
+BAZ
+BOO
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i58202.sug b/extensions/spellcheck/hunspell/tests/unit/data/i58202.sug
new file mode 100644
index 000000000..bc784acef
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i58202.sug
@@ -0,0 +1,13 @@
+foo, Boo
+Bar
+Baz
+Boo
+foo bar
+foo Bar
+Foo bar
+Foo Bar
+foo Baz
+Foo Baz
+Baz foo
+Baz Foo
+Baz Boo
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i58202.test b/extensions/spellcheck/hunspell/tests/unit/data/i58202.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i58202.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i58202.wrong b/extensions/spellcheck/hunspell/tests/unit/data/i58202.wrong
new file mode 100644
index 000000000..886584d80
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i58202.wrong
@@ -0,0 +1,13 @@
+fOO
+BAr
+baz
+BOo
+foobar
+fooBar
+Foobar
+FooBar
+fooBaz
+FooBaz
+Bazfoo
+BazFoo
+BazBoo
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i68568.aff b/extensions/spellcheck/hunspell/tests/unit/data/i68568.aff
new file mode 100644
index 000000000..f0c639e8d
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i68568.aff
@@ -0,0 +1,7 @@
+# Sant'Elia -> SANT'ELIA (Italian)
+# OpenOffice.org Issue 68658
+
+PFX a Y 1
+PFX a 0 Sant' E
+
+WORDCHARS '
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i68568.dic b/extensions/spellcheck/hunspell/tests/unit/data/i68568.dic
new file mode 100644
index 000000000..966010835
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i68568.dic
@@ -0,0 +1,2 @@
+1
+Elia/a
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i68568.test b/extensions/spellcheck/hunspell/tests/unit/data/i68568.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i68568.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i68568.wrong b/extensions/spellcheck/hunspell/tests/unit/data/i68568.wrong
new file mode 100644
index 000000000..998e9f4e4
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i68568.wrong
@@ -0,0 +1,5 @@
+sant'elia
+sant'Elia
+Sant'elia
+Sant'
+SANT'
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i68568utf.aff b/extensions/spellcheck/hunspell/tests/unit/data/i68568utf.aff
new file mode 100644
index 000000000..7076ee938
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i68568utf.aff
@@ -0,0 +1,8 @@
+# Sant'Elia -> SANT'ELIA (Italian)
+# OpenOffice.org Issue 68658
+SET UTF-8
+
+PFX a Y 1
+PFX a 0 Foó' B
+
+WORDCHARS '
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i68568utf.dic b/extensions/spellcheck/hunspell/tests/unit/data/i68568utf.dic
new file mode 100644
index 000000000..bc38229fa
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i68568utf.dic
@@ -0,0 +1,2 @@
+1
+Bár/a
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i68568utf.test b/extensions/spellcheck/hunspell/tests/unit/data/i68568utf.test
new file mode 100644
index 000000000..4d59c4212
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i68568utf.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i UTF-8
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i68568utf.wrong b/extensions/spellcheck/hunspell/tests/unit/data/i68568utf.wrong
new file mode 100644
index 000000000..0713c1369
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/i68568utf.wrong
@@ -0,0 +1,5 @@
+foó'bár
+foó'Bár
+Foó'bár
+foó'
+FOÓ'
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/iconv.aff b/extensions/spellcheck/hunspell/tests/unit/data/iconv.aff
new file mode 100644
index 000000000..36cf7a223
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/iconv.aff
@@ -0,0 +1,10 @@
+# input conversion (accept comma acuted letters also with cedilla,
+# as de facto replacement of the Romanian standard)
+SET UTF-8
+
+ICONV 4
+ICONV ş ș
+ICONV ţ ț
+ICONV Ş Ș
+ICONV Ţ Ț
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/iconv.dic b/extensions/spellcheck/hunspell/tests/unit/data/iconv.dic
new file mode 100644
index 000000000..8326eee2d
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/iconv.dic
@@ -0,0 +1,5 @@
+4
+Chișinău
+Țepes
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/iconv.good b/extensions/spellcheck/hunspell/tests/unit/data/iconv.good
new file mode 100644
index 000000000..746cf1e53
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/iconv.good
@@ -0,0 +1,6 @@
+Chișinău
+Chişinău
+Țepes
+Ţepes
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/iconv.test b/extensions/spellcheck/hunspell/tests/unit/data/iconv.test
new file mode 100644
index 000000000..cde7c5410
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/iconv.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i utf-8
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/ignore.aff b/extensions/spellcheck/hunspell/tests/unit/data/ignore.aff
new file mode 100644
index 000000000..238dc15e5
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/ignore.aff
@@ -0,0 +1,5 @@
+# ignore characters in words (for Arabic Harakat or Hebrew niqqud)
+IGNORE aeiou
+
+PFX A Y 1
+PFX A 0 re .
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/ignore.dic b/extensions/spellcheck/hunspell/tests/unit/data/ignore.dic
new file mode 100644
index 000000000..846983b7d
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/ignore.dic
@@ -0,0 +1,3 @@
+2
+xmpl
+expression/A
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/ignore.good b/extensions/spellcheck/hunspell/tests/unit/data/ignore.good
new file mode 100644
index 000000000..d7dd645c2
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/ignore.good
@@ -0,0 +1,6 @@
+example
+expression
+xmpl
+xprssn
+reexpression
+rxprssn
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/ignore.test b/extensions/spellcheck/hunspell/tests/unit/data/ignore.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/ignore.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/ignoreutf.aff b/extensions/spellcheck/hunspell/tests/unit/data/ignoreutf.aff
new file mode 100644
index 000000000..8646676d0
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/ignoreutf.aff
@@ -0,0 +1,6 @@
+# Arabic test for feature ignoring diacritics
+SET UTF-8
+# Arabic diacritics (harakat):
+# sukun, shadda, kasra, damma, fatha, kasratan, dammantan, fathatan (left to right)
+IGNORE ًٌٍَُِّْ
+WORDCHARS ًٌٍَُِّْ
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/ignoreutf.dic b/extensions/spellcheck/hunspell/tests/unit/data/ignoreutf.dic
new file mode 100644
index 000000000..d4a2a81e7
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/ignoreutf.dic
@@ -0,0 +1,10 @@
+9
+طِير
+فَتحة
+ضُمة
+كِسرة
+فتحًتان
+ضمتانٌ
+كسرتاٍن
+شدّة
+سكوْن
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/ignoreutf.good b/extensions/spellcheck/hunspell/tests/unit/data/ignoreutf.good
new file mode 100644
index 000000000..d463cd59f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/ignoreutf.good
@@ -0,0 +1,9 @@
+طير
+فتحة
+ضمة
+كسرة
+فتحتان
+ضمتان
+كسرتان
+شدة
+سكون
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/ignoreutf.test b/extensions/spellcheck/hunspell/tests/unit/data/ignoreutf.test
new file mode 100644
index 000000000..cde7c5410
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/ignoreutf.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i utf-8
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/keepcase.aff b/extensions/spellcheck/hunspell/tests/unit/data/keepcase.aff
new file mode 100644
index 000000000..b08006bf7
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/keepcase.aff
@@ -0,0 +1,3 @@
+# keep case in signed words
+KEEPCASE A
+WORDCHARS .
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/keepcase.dic b/extensions/spellcheck/hunspell/tests/unit/data/keepcase.dic
new file mode 100644
index 000000000..bf9992acf
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/keepcase.dic
@@ -0,0 +1,5 @@
+4
+foo/A
+Bar/A
+baz./A
+Quux./A
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/keepcase.good b/extensions/spellcheck/hunspell/tests/unit/data/keepcase.good
new file mode 100644
index 000000000..e6ff1817d
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/keepcase.good
@@ -0,0 +1,4 @@
+foo
+Bar
+baz.
+Quux.
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/keepcase.sug b/extensions/spellcheck/hunspell/tests/unit/data/keepcase.sug
new file mode 100644
index 000000000..551dd8bb3
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/keepcase.sug
@@ -0,0 +1,8 @@
+foo
+foo
+Bar
+Bar
+baz.
+baz.
+Quux.
+Quux.
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/keepcase.test b/extensions/spellcheck/hunspell/tests/unit/data/keepcase.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/keepcase.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/keepcase.wrong b/extensions/spellcheck/hunspell/tests/unit/data/keepcase.wrong
new file mode 100644
index 000000000..3b7914291
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/keepcase.wrong
@@ -0,0 +1,8 @@
+Foo
+FOO
+BAR
+bar
+Baz.
+BAZ.
+quux.
+QUUX.
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/korean.aff b/extensions/spellcheck/hunspell/tests/unit/data/korean.aff
new file mode 100644
index 000000000..979e3c228
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/korean.aff
@@ -0,0 +1 @@
+SET UTF-8
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/korean.dic b/extensions/spellcheck/hunspell/tests/unit/data/korean.dic
new file mode 100644
index 000000000..95cb4508e
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/korean.dic
@@ -0,0 +1,3 @@
+2
+들어오세요
+안녕하세요
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/korean.good b/extensions/spellcheck/hunspell/tests/unit/data/korean.good
new file mode 100644
index 000000000..660d506bb
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/korean.good
@@ -0,0 +1,2 @@
+들어오세요
+안녕하세요
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/korean.test b/extensions/spellcheck/hunspell/tests/unit/data/korean.test
new file mode 100644
index 000000000..4d59c4212
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/korean.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i UTF-8
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/korean.wrong b/extensions/spellcheck/hunspell/tests/unit/data/korean.wrong
new file mode 100644
index 000000000..5ea85cead
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/korean.wrong
@@ -0,0 +1 @@
+들어오세
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/map.aff b/extensions/spellcheck/hunspell/tests/unit/data/map.aff
new file mode 100644
index 000000000..3e78baba6
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/map.aff
@@ -0,0 +1,9 @@
+# With MAP suggestion, Hunspell can add missing accents to a word.
+
+# switch off ngram suggestion for testing
+MAXNGRAMSUGS 0
+
+MAP 3
+MAP u
+MAP o
+MAP (ss)
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/map.dic b/extensions/spellcheck/hunspell/tests/unit/data/map.dic
new file mode 100644
index 000000000..744394f0c
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/map.dic
@@ -0,0 +1,4 @@
+3
+Frhstck
+tkrfr
+gro
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/map.sug b/extensions/spellcheck/hunspell/tests/unit/data/map.sug
new file mode 100644
index 000000000..cadb75422
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/map.sug
@@ -0,0 +1,3 @@
+Frhstck
+tkrfr
+gro
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/map.test b/extensions/spellcheck/hunspell/tests/unit/data/map.test
new file mode 100644
index 000000000..dc295077f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/map.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i ISO8859-1
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/map.wrong b/extensions/spellcheck/hunspell/tests/unit/data/map.wrong
new file mode 100644
index 000000000..251c8a1e9
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/map.wrong
@@ -0,0 +1,3 @@
+Fruhstuck
+tukorfuro
+gross
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/maputf.aff b/extensions/spellcheck/hunspell/tests/unit/data/maputf.aff
new file mode 100644
index 000000000..30edb2a78
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/maputf.aff
@@ -0,0 +1,11 @@
+# With MAP suggestion, Hunspell can add missing accents to a word.
+
+SET UTF-8
+
+# switch off ngram suggestion for testing
+MAXNGRAMSUGS 0
+
+MAP 3
+MAP uúü
+MAP öóo
+MAP ß(ss)
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/maputf.dic b/extensions/spellcheck/hunspell/tests/unit/data/maputf.dic
new file mode 100644
index 000000000..1c6fa8d05
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/maputf.dic
@@ -0,0 +1,4 @@
+3
+Frühstück
+tükörfúró
+groß
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/maputf.sug b/extensions/spellcheck/hunspell/tests/unit/data/maputf.sug
new file mode 100644
index 000000000..81d09e021
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/maputf.sug
@@ -0,0 +1,3 @@
+Frühstück
+tükörfúró
+groß
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/maputf.test b/extensions/spellcheck/hunspell/tests/unit/data/maputf.test
new file mode 100644
index 000000000..cde7c5410
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/maputf.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i utf-8
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/maputf.wrong b/extensions/spellcheck/hunspell/tests/unit/data/maputf.wrong
new file mode 100644
index 000000000..251c8a1e9
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/maputf.wrong
@@ -0,0 +1,3 @@
+Fruhstuck
+tukorfuro
+gross
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/morph.aff b/extensions/spellcheck/hunspell/tests/unit/data/morph.aff
new file mode 100644
index 000000000..608085860
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/morph.aff
@@ -0,0 +1,12 @@
+# example for morphological analysis, stemming and generation
+PFX P Y 1
+PFX P 0 un . dp:pfx_un sp:un
+
+SFX S Y 1
+SFX S 0 s . is:plur
+
+SFX Q Y 1
+SFX Q 0 s . is:sg_3
+
+SFX R Y 1
+SFX R 0 able/PS . ds:der_able
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/morph.dic b/extensions/spellcheck/hunspell/tests/unit/data/morph.dic
new file mode 100644
index 000000000..f8d58a6d4
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/morph.dic
@@ -0,0 +1,10 @@
+9
+drink/S po:noun
+drink/RQ po:verb al:drank al:drunk ts:present
+drank po:verb st:drink is:past_1
+drunk po:verb st:drink is:past_2
+eat/RQ po:verb al:ate al:eaten ts:present
+ate po:verb st:eat is:past_1
+eaten po:verb st:eat is:past_2
+phenomenon po:noun al:phenomena
+phenomena po:noun st:phenomenon is:plur
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/morph.good b/extensions/spellcheck/hunspell/tests/unit/data/morph.good
new file mode 100644
index 000000000..9f0d24768
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/morph.good
@@ -0,0 +1,26 @@
+drink
+drinks
+drinkable
+drinkables
+undrinkable
+undrinkables
+drank
+drunk
+phenomenon
+phenomena
+drink eat
+drink eats
+drink ate
+drink eaten
+drink eatable
+drink eatables
+drink phenomena
+drinks eat
+drinks eats
+drinks ate
+drinks eaten
+drinks eatable
+drinks eatables
+drinks phenomena
+undrinkable phenomena
+phenomenon drinks
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/morph.morph b/extensions/spellcheck/hunspell/tests/unit/data/morph.morph
new file mode 100644
index 000000000..9965d7ea8
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/morph.morph
@@ -0,0 +1,48 @@
+> drink
+analyze(drink) = st:drink po:noun
+analyze(drink) = st:drink po:verb al:drank al:drunk ts:present
+stem(drink) = drink
+> drinks
+analyze(drinks) = st:drink po:verb al:drank al:drunk ts:present is:sg_3
+analyze(drinks) = st:drink po:noun is:plur
+stem(drinks) = drink
+> drinkable
+analyze(drinkable) = st:drink po:verb al:drank al:drunk ts:present ds:der_able
+stem(drinkable) = drinkable
+> drinkables
+analyze(drinkables) = st:drink po:verb al:drank al:drunk ts:present ds:der_able is:plur
+stem(drinkables) = drinkable
+> undrinkable
+analyze(undrinkable) = dp:pfx_un sp:un st:drink po:verb al:drank al:drunk ts:present ds:der_able
+stem(undrinkable) = undrinkable
+> undrinkables
+analyze(undrinkables) = dp:pfx_un sp:un st:drink po:verb al:drank al:drunk ts:present ds:der_able is:plur
+stem(undrinkables) = undrinkable
+> drank
+analyze(drank) = po:verb st:drink is:past_1
+stem(drank) = drink
+> drunk
+analyze(drunk) = po:verb st:drink is:past_2
+stem(drunk) = drink
+> phenomenon
+analyze(phenomenon) = st:phenomenon po:noun al:phenomena
+stem(phenomenon) = phenomenon
+> phenomena
+analyze(phenomena) = po:noun st:phenomenon is:plur
+stem(phenomena) = phenomenon
+generate(drink, eat) = drink
+generate(drink, eats) = drinks
+generate(drink, ate) = drank
+generate(drink, eaten) = drunk
+generate(drink, eatable) = drinkable
+generate(drink, eatables) = drinkables
+generate(drink, phenomena) = drinks
+generate(drinks, eat) = drink
+generate(drinks, eats) = drinks
+generate(drinks, ate) = drank
+generate(drinks, eaten) = drunk
+generate(drinks, eatable) = drinkable
+generate(drinks, eatables) = drinkables
+generate(drinks, phenomena) = drinks
+generate(undrinkable, phenomena) = undrinkables
+generate(phenomenon, drinks) = phenomena
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/morph.test b/extensions/spellcheck/hunspell/tests/unit/data/morph.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/morph.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix.aff b/extensions/spellcheck/hunspell/tests/unit/data/needaffix.aff
new file mode 100644
index 000000000..a5981ef69
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix.aff
@@ -0,0 +1,5 @@
+NEEDAFFIX X
+COMPOUNDFLAG Y
+
+SFX A Y 1
+SFX A 0 s/Y .
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix.dic b/extensions/spellcheck/hunspell/tests/unit/data/needaffix.dic
new file mode 100644
index 000000000..b5792765e
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix.dic
@@ -0,0 +1,3 @@
+2
+foo/YXA
+bar/Y
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix.good b/extensions/spellcheck/hunspell/tests/unit/data/needaffix.good
new file mode 100644
index 000000000..f9e0663f3
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix.good
@@ -0,0 +1,3 @@
+bar
+foos
+barfoos
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix.test b/extensions/spellcheck/hunspell/tests/unit/data/needaffix.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix.wrong b/extensions/spellcheck/hunspell/tests/unit/data/needaffix.wrong
new file mode 100644
index 000000000..257cc5642
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix.wrong
@@ -0,0 +1 @@
+foo
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix2.aff b/extensions/spellcheck/hunspell/tests/unit/data/needaffix2.aff
new file mode 100644
index 000000000..c434dac66
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix2.aff
@@ -0,0 +1,2 @@
+NEEDAFFIX X
+COMPOUNDFLAG Y
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix2.dic b/extensions/spellcheck/hunspell/tests/unit/data/needaffix2.dic
new file mode 100644
index 000000000..ff32e878b
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix2.dic
@@ -0,0 +1,5 @@
+4
+foo st:foo id:1
+foo/YX st:foo id:2
+foo/Y st:foo id:3
+bar/Y
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix2.good b/extensions/spellcheck/hunspell/tests/unit/data/needaffix2.good
new file mode 100644
index 000000000..7e4b098ef
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix2.good
@@ -0,0 +1,5 @@
+foo
+bar
+foobar
+barfoo
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix2.morph b/extensions/spellcheck/hunspell/tests/unit/data/needaffix2.morph
new file mode 100644
index 000000000..0f3e47431
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix2.morph
@@ -0,0 +1,13 @@
+> foo
+analyze(foo) = st:foo id:1
+analyze(foo) = st:foo id:3
+stem(foo) = foo
+> bar
+analyze(bar) = st:bar
+stem(bar) = bar
+> foobar
+analyze(foobar) = pa:foo st:foo id:3 pa:bar
+stem(foobar) = foo
+> barfoo
+analyze(barfoo) = pa:bar st:bar pa:foo st:foo id:3
+stem(barfoo) = barfoo
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix2.test b/extensions/spellcheck/hunspell/tests/unit/data/needaffix2.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix2.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix3.aff b/extensions/spellcheck/hunspell/tests/unit/data/needaffix3.aff
new file mode 100644
index 000000000..5d55d38e9
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix3.aff
@@ -0,0 +1,8 @@
+# neeadaffix on affixes
+NEEDAFFIX X
+
+SFX A Y 1
+SFX A 0 s/XB .
+
+SFX B Y 1
+SFX B 0 baz .
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix3.dic b/extensions/spellcheck/hunspell/tests/unit/data/needaffix3.dic
new file mode 100644
index 000000000..001d95e77
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix3.dic
@@ -0,0 +1,2 @@
+2
+foo/A
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix3.good b/extensions/spellcheck/hunspell/tests/unit/data/needaffix3.good
new file mode 100644
index 000000000..dc9a6a97d
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix3.good
@@ -0,0 +1,2 @@
+foo
+foosbaz
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix3.test b/extensions/spellcheck/hunspell/tests/unit/data/needaffix3.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix3.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix3.wrong b/extensions/spellcheck/hunspell/tests/unit/data/needaffix3.wrong
new file mode 100644
index 000000000..c09c408f2
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix3.wrong
@@ -0,0 +1 @@
+foos
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix4.aff b/extensions/spellcheck/hunspell/tests/unit/data/needaffix4.aff
new file mode 100644
index 000000000..c434dac66
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix4.aff
@@ -0,0 +1,2 @@
+NEEDAFFIX X
+COMPOUNDFLAG Y
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix4.dic b/extensions/spellcheck/hunspell/tests/unit/data/needaffix4.dic
new file mode 100644
index 000000000..96f80c12b
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix4.dic
@@ -0,0 +1,5 @@
+4
+foo/X [1]
+foo/Y [2]
+foo/YX [3]
+bar/Y [4]
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix4.good b/extensions/spellcheck/hunspell/tests/unit/data/needaffix4.good
new file mode 100644
index 000000000..7e4b098ef
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix4.good
@@ -0,0 +1,5 @@
+foo
+bar
+foobar
+barfoo
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix4.test b/extensions/spellcheck/hunspell/tests/unit/data/needaffix4.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix4.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix5.aff b/extensions/spellcheck/hunspell/tests/unit/data/needaffix5.aff
new file mode 100644
index 000000000..6399a3e98
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix5.aff
@@ -0,0 +1,13 @@
+# on affixes
+NEEDAFFIX X
+
+SFX A Y 2
+SFX A 0 suf/B .
+SFX A 0 pseudosuf/XB .
+
+SFX B Y 1
+SFX B 0 bar .
+
+PFX C Y 2
+PFX C 0 pre .
+PFX C 0 pseudopre/X .
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix5.dic b/extensions/spellcheck/hunspell/tests/unit/data/needaffix5.dic
new file mode 100644
index 000000000..83131e27a
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix5.dic
@@ -0,0 +1,2 @@
+1
+foo/AC
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix5.good b/extensions/spellcheck/hunspell/tests/unit/data/needaffix5.good
new file mode 100644
index 000000000..d1b86bf83
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix5.good
@@ -0,0 +1,11 @@
+foo
+prefoo
+foosuf
+prefoosuf
+foosufbar
+prefoosufbar
+pseudoprefoosuf
+pseudoprefoosufbar
+pseudoprefoopseudosufbar
+prefoopseudosuf
+prefoopseudosufbar
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix5.test b/extensions/spellcheck/hunspell/tests/unit/data/needaffix5.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix5.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix5.wrong b/extensions/spellcheck/hunspell/tests/unit/data/needaffix5.wrong
new file mode 100644
index 000000000..fdd1797fd
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix5.wrong
@@ -0,0 +1,3 @@
+pseudoprefoo
+foopseudosuf
+pseudoprefoopseudosuf
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.aff b/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.aff
new file mode 100644
index 000000000..19e698121
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.aff
@@ -0,0 +1,21 @@
+# Test fix of suffixed ngram suggestions with UTF-8 encoding and long flags.
+# Based on Vitaly Piryatinsky's bug report and example.
+SET UTF-8
+FLAG num
+
+PFX 101 Y 1
+PFX 101 0 пред .
+
+SFX 1381 Y 1
+SFX 1381 0 о .
+
+SFX 2000 Y 3
+SFX 2000 0 ам .
+SFX 2000 0 ами .
+SFX 2000 0 ах .
+
+SFX 2022 Y 4
+SFX 2022 0 а .
+SFX 2022 0 у .
+SFX 2022 0 ом .
+SFX 2022 0 е .
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.dic b/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.dic
new file mode 100644
index 000000000..27ce413ae
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.dic
@@ -0,0 +1,2 @@
+1
+человек/2022,2000,101
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.good b/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.good
new file mode 100644
index 000000000..366d92a9b
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.good
@@ -0,0 +1 @@
+человек
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.sug b/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.sug
new file mode 100644
index 000000000..58ab09b53
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.sug
@@ -0,0 +1,2 @@
+человек
+человек
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.test b/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.test
new file mode 100644
index 000000000..cde7c5410
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i utf-8
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.wrong b/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.wrong
new file mode 100644
index 000000000..97de996e0
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.wrong
@@ -0,0 +1,2 @@
+времячко
+человеко
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.aff b/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.aff
new file mode 100644
index 000000000..c9361da4c
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.aff
@@ -0,0 +1,5 @@
+# don't suggest word with NOSUGGEST flag (for example vulgar or obscene words)
+# See OpenOffice.org Issue #55498
+# (nosuggest.sug is an empty file)
+NOSUGGEST A
+COMPOUNDFLAG B
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.dic b/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.dic
new file mode 100644
index 000000000..dc80c916d
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.dic
@@ -0,0 +1,3 @@
+1
+foo/AB
+bar/B
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.good b/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.good
new file mode 100644
index 000000000..ad91a5e31
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.good
@@ -0,0 +1,3 @@
+foo
+foobar
+barfoo
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.sug b/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.sug
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.sug
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.test b/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.wrong b/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.wrong
new file mode 100644
index 000000000..89c7a1a9c
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.wrong
@@ -0,0 +1,3 @@
+foox
+foobarx
+barfoox
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/oconv.aff b/extensions/spellcheck/hunspell/tests/unit/data/oconv.aff
new file mode 100644
index 000000000..13a3d9b20
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/oconv.aff
@@ -0,0 +1,12 @@
+# output conversion
+SET UTF-8
+
+OCONV 7
+OCONV a A
+OCONV á Á
+OCONV b B
+OCONV c C
+OCONV d D
+OCONV e E
+OCONV é É
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/oconv.dic b/extensions/spellcheck/hunspell/tests/unit/data/oconv.dic
new file mode 100644
index 000000000..359186cac
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/oconv.dic
@@ -0,0 +1,4 @@
+3
+bébé
+dádá
+aábcdeé
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/oconv.good b/extensions/spellcheck/hunspell/tests/unit/data/oconv.good
new file mode 100644
index 000000000..6cdaab16e
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/oconv.good
@@ -0,0 +1,2 @@
+bébé
+dádá
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/oconv.sug b/extensions/spellcheck/hunspell/tests/unit/data/oconv.sug
new file mode 100644
index 000000000..a191c629d
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/oconv.sug
@@ -0,0 +1,3 @@
+BÉBÉ
+DÁDÁ
+AÁBCDEÉ
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/oconv.test b/extensions/spellcheck/hunspell/tests/unit/data/oconv.test
new file mode 100644
index 000000000..cde7c5410
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/oconv.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i utf-8
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/oconv.wrong b/extensions/spellcheck/hunspell/tests/unit/data/oconv.wrong
new file mode 100644
index 000000000..73dcc895a
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/oconv.wrong
@@ -0,0 +1,3 @@
+béb
+dád
+aábcde
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.aff b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.aff
new file mode 100644
index 000000000..e700b0e54
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.aff
@@ -0,0 +1,5 @@
+# words only in compounds (see also fogemorpheme example)
+ONLYINCOMPOUND O
+COMPOUNDFLAG A
+SFX B Y 1
+SFX B 0 s .
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.dic b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.dic
new file mode 100644
index 000000000..dc742f7ab
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.dic
@@ -0,0 +1,3 @@
+2
+foo/A
+pseudo/OAB
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.good b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.good
new file mode 100644
index 000000000..151d59734
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.good
@@ -0,0 +1,4 @@
+foo
+pseudofoo
+foopseudo
+foopseudos
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.sug b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.sug
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.sug
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.test b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.wrong b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.wrong
new file mode 100644
index 000000000..115d0c617
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.wrong
@@ -0,0 +1,2 @@
+pseudo
+pseudos
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.aff b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.aff
new file mode 100644
index 000000000..5d0ac5e69
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.aff
@@ -0,0 +1,12 @@
+# affixes only in compounds (see also fogemorpheme example)
+ONLYINCOMPOUND O
+COMPOUNDFLAG A
+COMPOUNDPERMITFLAG P
+
+SFX B Y 1
+SFX B 0 s/OP .
+
+# obligate fogemorpheme by forbidding the stem (0) in compounds
+
+CHECKCOMPOUNDPATTERN 1
+CHECKCOMPOUNDPATTERN 0/B /A
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.dic b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.dic
new file mode 100644
index 000000000..1adab653b
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.dic
@@ -0,0 +1,3 @@
+2
+foo/A
+pseudo/AB
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.good b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.good
new file mode 100644
index 000000000..a31ce34ac
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.good
@@ -0,0 +1,3 @@
+foo
+foopseudo
+pseudosfoo
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.test b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.wrong b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.wrong
new file mode 100644
index 000000000..29a71a3c3
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.wrong
@@ -0,0 +1,3 @@
+pseudos
+foopseudos
+pseudofoo
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.aff b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.aff
new file mode 100644
index 000000000..413aca404
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.aff
@@ -0,0 +1,13 @@
+FLAG long
+COMPOUNDBEGIN Ca
+COMPOUNDMIDDLE Cb
+COMPOUNDEND Cc
+COMPOUNDPERMITFLAG Cp
+ONLYINCOMPOUND Cx
+
+CHECKCOMPOUNDPATTERN 1
+CHECKCOMPOUNDPATTERN /Ch /Xs
+
+SFX Ch Y 2
+SFX Ch 0 s/CaCbCxCp .
+SFX Ch 0 s-/CaCbCcCp .
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.dic b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.dic
new file mode 100644
index 000000000..e7831b704
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.dic
@@ -0,0 +1,4 @@
+3
+schoonheid/Ch
+port/CcXs
+sport/Cc
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.good b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.good
new file mode 100644
index 000000000..fbaf830be
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.good
@@ -0,0 +1 @@
+schoonheidssport
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.test b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.wrong b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.wrong
new file mode 100644
index 000000000..3f9e8949b
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.wrong
@@ -0,0 +1 @@
+schoonheidsport
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.aff b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.aff
new file mode 100644
index 000000000..22dfe69d8
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.aff
@@ -0,0 +1,27 @@
+# Test file based on OpenTaal's Dutch dictionary, coded by Ruud Baars
+
+WORDCHARS -
+NOSPLITSUGS
+FLAG long
+
+COMPOUNDBEGIN Ca
+COMPOUNDMIDDLE Cb
+COMPOUNDEND Cc
+COMPOUNDPERMITFLAG Cp
+ONLYINCOMPOUND Cx
+
+CHECKCOMPOUNDPATTERN 2
+CHECKCOMPOUNDPATTERN 0/Ch /Xs
+CHECKCOMPOUNDPATTERN 0/Xm /Xm
+
+SFX CA Y 2
+SFX CA 0 /CaCp .
+SFX CA 0 -/CaCp .
+
+SFX CB Y 2
+SFX CB 0 /CbCp .
+SFX CB 0 -/CbCp .
+
+SFX Ch Y 2
+SFX Ch 0 s/CaCbCxCp .
+SFX Ch 0 s-/CaCbCcCp .
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.dic b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.dic
new file mode 100644
index 000000000..52581e942
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.dic
@@ -0,0 +1,4 @@
+100
+test/CACBCc
+zout/CACBXm
+suiker/CACBXm \ No newline at end of file
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.good b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.good
new file mode 100644
index 000000000..e604d6e2f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.good
@@ -0,0 +1 @@
+zout-suikertest
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.test b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.wrong b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.wrong
new file mode 100644
index 000000000..d8ddb16a5
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.wrong
@@ -0,0 +1 @@
+zoutsuikertest
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.aff b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.aff
new file mode 100644
index 000000000..fa073432f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.aff
@@ -0,0 +1,9 @@
+TRY r
+
+FORBIDDENWORD F
+COMPOUNDRULE 2
+COMPOUNDRULE WW
+COMPOUNDRULE WWW
+
+SFX S Y 1
+SFX S 0 s .
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.dic b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.dic
new file mode 100644
index 000000000..44375948f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.dic
@@ -0,0 +1,5 @@
+4
+foo/W
+word/W
+bar/WS
+foowordbar/FS
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.good b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.good
new file mode 100644
index 000000000..73a96a784
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.good
@@ -0,0 +1,3 @@
+fooword
+wordbar
+barwordfoo
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.sug b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.sug
new file mode 100644
index 000000000..60111a417
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.sug
@@ -0,0 +1 @@
+barwordfoo
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.test b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.wrong b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.wrong
new file mode 100644
index 000000000..59dfddfb2
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.wrong
@@ -0,0 +1,5 @@
+foowordbar
+foowordbars
+foowordba
+foowordbas
+barwodfoo
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.aff b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.aff
new file mode 100644
index 000000000..441354d6b
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.aff
@@ -0,0 +1,7 @@
+TRY r
+
+FORBIDDENWORD F
+COMPOUNDFLAG W
+
+SFX S Y 1
+SFX S 0 s .
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.dic b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.dic
new file mode 100644
index 000000000..895dd6230
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.dic
@@ -0,0 +1,5 @@
+3
+foo/WS
+word/W
+bar/WS
+foowordbar/FS \ No newline at end of file
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.good b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.good
new file mode 100644
index 000000000..17cf47de3
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.good
@@ -0,0 +1,4 @@
+fooword
+wordbar
+barwordfoo
+barwordfoos
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.sug b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.sug
new file mode 100644
index 000000000..60111a417
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.sug
@@ -0,0 +1 @@
+barwordfoo
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.test b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.wrong b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.wrong
new file mode 100644
index 000000000..59dfddfb2
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.wrong
@@ -0,0 +1,5 @@
+foowordbar
+foowordbars
+foowordba
+foowordbas
+barwodfoo
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.aff b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.aff
new file mode 100644
index 000000000..15c914bec
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.aff
@@ -0,0 +1,8 @@
+KEEPCASE K
+COMPOUNDBEGIN B
+COMPOUNDEND E
+COMPOUNDFLAG C
+COMPOUNDMIN 1
+WORDCHARS -
+BREAK 1
+BREAK #
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.dic b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.dic
new file mode 100644
index 000000000..b05ec131a
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.dic
@@ -0,0 +1,7 @@
+5
+tv-/KB
+-tv/KE
+word/C
+NATO-/B
+-NATO/E
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.good b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.good
new file mode 100644
index 000000000..e1c112910
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.good
@@ -0,0 +1,4 @@
+tv-word
+word-tv
+NATO-word
+word-NATO
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.sug b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.sug
new file mode 100644
index 000000000..07dde3fe6
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.sug
@@ -0,0 +1,8 @@
+Tv-word, Tv- word, Word
+Tv- word, Word
+word -tv, word-tv, word
+word -tv, word-tv, word
+wordword-tv, word
+Tv-word-tv
+NATO-
+-NATO
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.test b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.wrong b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.wrong
new file mode 100644
index 000000000..b15752ed2
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.wrong
@@ -0,0 +1,8 @@
+TV-word
+Tv-word
+word-TV
+word-Tv
+wordword-TV
+TV-word-TV
+Nato-word
+word-nato
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/phone.aff b/extensions/spellcheck/hunspell/tests/unit/data/phone.aff
new file mode 100644
index 000000000..5a27c14d7
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/phone.aff
@@ -0,0 +1,255 @@
+# phonetic suggestions by PHONE and optional ph field of dictionary words
+# Documentationo of PHONE: http://aspell.net/man-html/Phonetic-Code.html
+
+# phonetic_english.h - phonetic transformation rules for use with phonetic.c
+# Copyright (C) 2000 Björn Jacke
+#
+# This rule set is based on Lawrence Phillips original metaphone
+# algorithm with modifications made by Michael Kuhn in his
+# C implantation, more modifications by Björn Jacke when
+# converting the algorithm to a rule set and minor
+# touch ups by Kevin Atkinson
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License version 2.1 as published by the Free Software Foundation;
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# Björn Jacke may be reached by email at bjoern.jacke@gmx.de
+#
+# Changelog:
+#
+# 2000-01-05 Björn Jacke <bjoern.jacke@gmx.de>
+# - first version with translation rules derived from
+# metaphone.cc distributed with aspell 0.28.3
+# - "TH" is now representated as "@" because "0" is a
+# meta character
+# - removed TH(!vowel) --> T; always use TH --> # instead
+# - dropped "^AE" -> "E" (redundant)
+# - "ing" is transformed to "N", not "NK"
+# - "SCH(EO)" transforms to "SK" now
+# - added R --> SILENT if (after a vowel) and no (vowel or
+# "y" follows) like in "Marcy" or "abort"
+# - H is SILENT in RH at beginning of words
+# - H is SILENT if vowel leads and "Y" follows
+# - some ".OUGH.." --> ...F exceptions added
+# - "^V" transforms to "W"
+# 2000-01-07 Kevin Atkinson <kevinatk@home.com>
+# Converted from header to data file.
+#
+# 2007-08-23 László Németh <nemeth AT OOo>
+# Add PHONE header and PHONE keywords
+#
+# version 1.1
+
+PHONE 105
+PHONE AH(AEIOUY)-^ *H
+PHONE AR(AEIOUY)-^ *R
+PHONE A(HR)^ *
+PHONE A^ *
+PHONE AH(AEIOUY)- H
+PHONE AR(AEIOUY)- R
+PHONE A(HR) _
+PHONE BB- _
+PHONE B B
+PHONE CQ- _
+PHONE CIA X
+PHONE CH X
+PHONE C(EIY)- S
+PHONE CK K
+PHONE COUGH^ KF
+PHONE CC< C
+PHONE C K
+PHONE DG(EIY) K
+PHONE DD- _
+PHONE D T
+PHONE < E
+PHONE EH(AEIOUY)-^ *H
+PHONE ER(AEIOUY)-^ *R
+PHONE E(HR)^ *
+PHONE ENOUGH^$ *NF
+PHONE E^ *
+PHONE EH(AEIOUY)- H
+PHONE ER(AEIOUY)- R
+PHONE E(HR) _
+PHONE FF- _
+PHONE F F
+PHONE GN^ N
+PHONE GN$ N
+PHONE GNS$ NS
+PHONE GNED$ N
+PHONE GH(AEIOUY)- K
+PHONE GH _
+PHONE GG9 K
+PHONE G K
+PHONE H H
+PHONE IH(AEIOUY)-^ *H
+PHONE IR(AEIOUY)-^ *R
+PHONE I(HR)^ *
+PHONE I^ *
+PHONE ING6 N
+PHONE IH(AEIOUY)- H
+PHONE IR(AEIOUY)- R
+PHONE I(HR) _
+PHONE J K
+PHONE KN^ N
+PHONE KK- _
+PHONE K K
+PHONE LAUGH^ LF
+PHONE LL- _
+PHONE L L
+PHONE MB$ M
+PHONE MM M
+PHONE M M
+PHONE NN- _
+PHONE N N
+PHONE OH(AEIOUY)-^ *H
+PHONE OR(AEIOUY)-^ *R
+PHONE O(HR)^ *
+PHONE O^ *
+PHONE OH(AEIOUY)- H
+PHONE OR(AEIOUY)- R
+PHONE O(HR) _
+PHONE PH F
+PHONE PN^ N
+PHONE PP- _
+PHONE P P
+PHONE Q K
+PHONE RH^ R
+PHONE ROUGH^ RF
+PHONE RR- _
+PHONE R R
+PHONE SCH(EOU)- SK
+PHONE SC(IEY)- S
+PHONE SH X
+PHONE SI(AO)- X
+PHONE SS- _
+PHONE S S
+PHONE TI(AO)- X
+PHONE TH @
+PHONE TCH-- _
+PHONE TOUGH^ TF
+PHONE TT- _
+PHONE T T
+PHONE UH(AEIOUY)-^ *H
+PHONE UR(AEIOUY)-^ *R
+PHONE U(HR)^ *
+PHONE U^ *
+PHONE UH(AEIOUY)- H
+PHONE UR(AEIOUY)- R
+PHONE U(HR) _
+PHONE V^ W
+PHONE V F
+PHONE WR^ R
+PHONE WH^ W
+PHONE W(AEIOU)- W
+PHONE X^ S
+PHONE X KS
+PHONE Y(AEIOU)- Y
+PHONE ZZ- _
+PHONE Z S
+
+#The rules in a different view:
+#
+# Exceptions:
+#
+# Beginning of word: "gn", "kn-", "pn-", "wr-" ----> drop first letter
+# "Aebersold", "Gnagy", "Knuth", "Pniewski", "Wright"
+#
+# Beginning of word: "x" ----> change to "s"
+# as in "Deng Xiaopeng"
+#
+# Beginning of word: "wh-" ----> change to "w"
+# as in "Whalen"
+# Beginning of word: leading vowels are transformed to "*"
+#
+# "[crt]ough" and "enough" are handled separately because of "F" sound
+#
+#
+# A --> A at beginning
+# _ otherwise
+#
+# B --> B unless at the end of word after "m", as in "dumb", "McComb"
+#
+# C --> X (sh) if "-cia-" or "-ch-"
+# S if "-ci-", "-ce-", or "-cy-"
+# SILENT if "-sci-", "-sce-", or "-scy-", or "-cq-"
+# K otherwise, including in "-sch-"
+#
+# D --> K if in "-dge-", "-dgy-", or "-dgi-"
+# T otherwise
+#
+# E --> A at beginnig
+# _ SILENT otherwise
+#
+# F --> F
+#
+# G --> SILENT if in "-gh-" and not at end or before a vowel
+# in "-gn" or "-gned" or "-gns"
+# in "-dge-" etc., as in above rule
+# K if before "i", or "e", or "y" if not double "gg"
+#
+# K otherwise (incl. "GG"!)
+#
+# H --> SILENT if after vowel and no vowel or "Y" follows
+# or after "-ch-", "-sh-", "-ph-", "-th-", "-gh-"
+# or after "rh-" at beginning
+# H otherwise
+#
+# I --> A at beginning
+# _ SILENT otherwise
+#
+# J --> K
+#
+# K --> SILENT if after "c"
+# K otherwise
+#
+# L --> L
+#
+# M --> M
+#
+# N --> N
+#
+# O --> A at beginning
+# _ SILENT otherwise
+#
+# P --> F if before "h"
+# P otherwise
+#
+# Q --> K
+#
+# R --> SILENT if after vowel and no vowel or "Y" follows
+# R otherwise
+#
+# S --> X (sh) if before "h" or in "-sio-" or "-sia-"
+# SK if followed by "ch(eo)" (SCH(EO))
+# S otherwise
+#
+# T --> X (sh) if "-tia-" or "-tio-"
+# 0 (th) if before "h"
+# silent if in "-tch-"
+# T otherwise
+#
+# U --> A at beginning
+# _ SILENT otherwise
+#
+# V --> V if first letter of word
+# F otherwise
+#
+# W --> SILENT if not followed by a vowel
+# W if followed by a vowel
+#
+# X --> KS
+#
+# Y --> SILENT if not followed by a vowel
+# Y if followed by a vowel
+#
+# Z --> S
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/phone.dic b/extensions/spellcheck/hunspell/tests/unit/data/phone.dic
new file mode 100644
index 000000000..51b0743d0
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/phone.dic
@@ -0,0 +1,11 @@
+10
+Brasilia
+brassily
+Brazilian
+brilliance
+brilliancy
+brilliant
+brain
+brass
+Churchillian
+xxxxxxxxxx ph:Brasilia
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/phone.sug b/extensions/spellcheck/hunspell/tests/unit/data/phone.sug
new file mode 100644
index 000000000..cc22e3798
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/phone.sug
@@ -0,0 +1 @@
+Brasilia, Xxxxxxxxxx, Brilliant, Brazilian, Brassily, Brilliance
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/phone.test b/extensions/spellcheck/hunspell/tests/unit/data/phone.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/phone.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/phone.wrong b/extensions/spellcheck/hunspell/tests/unit/data/phone.wrong
new file mode 100644
index 000000000..ca9db395e
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/phone.wrong
@@ -0,0 +1 @@
+Brasillian
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/rep.aff b/extensions/spellcheck/hunspell/tests/unit/data/rep.aff
new file mode 100644
index 000000000..485755c89
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/rep.aff
@@ -0,0 +1,21 @@
+# With REP suggestions, we can fix typical language specific misspellings.
+
+# switch off ngram suggestion for testing
+MAXNGRAMSUGS 0
+
+REP 8
+REP f ph
+REP ph f
+REP shun$ tion
+REP ^alot$ a_lot # add the highest priority for "a lot" suggestion to "alot"
+REP ^foo$ bar
+REP ' _ # "un'alunno" -> "un alunno"
+REP ^vinten$ vinte_e_un
+REP s 's
+
+
+SFX A Y 1
+SFX A 0 's .
+
+
+WORDCHARS '
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/rep.dic b/extensions/spellcheck/hunspell/tests/unit/data/rep.dic
new file mode 100644
index 000000000..f9a4c008b
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/rep.dic
@@ -0,0 +1,15 @@
+10
+form
+phantom
+vacation
+vacations
+a
+lot
+un
+alunno
+bar
+barbars
+vinte
+e
+un
+auto/A
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/rep.sug b/extensions/spellcheck/hunspell/tests/unit/data/rep.sug
new file mode 100644
index 000000000..b48a5b80e
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/rep.sug
@@ -0,0 +1,8 @@
+form
+phantom
+vacation
+a lot, lot
+un alunno
+bar
+vinte e un
+auto's, auto
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/rep.test b/extensions/spellcheck/hunspell/tests/unit/data/rep.test
new file mode 100644
index 000000000..dc295077f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/rep.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i ISO8859-1
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/rep.wrong b/extensions/spellcheck/hunspell/tests/unit/data/rep.wrong
new file mode 100644
index 000000000..cd9699c4c
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/rep.wrong
@@ -0,0 +1,11 @@
+phorm
+fantom
+vacashun
+vacashuns
+alot
+un'alunno
+foo
+foobars
+barfoos
+vinten
+autos
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/reputf.aff b/extensions/spellcheck/hunspell/tests/unit/data/reputf.aff
new file mode 100644
index 000000000..ac434a426
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/reputf.aff
@@ -0,0 +1,9 @@
+# With REP suggestions, we can fix typical language specific misspellings.
+
+SET UTF-8
+
+# switch off ngram suggestion for testing
+MAXNGRAMSUGS 0
+
+REP 1
+REP oo őő
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/reputf.dic b/extensions/spellcheck/hunspell/tests/unit/data/reputf.dic
new file mode 100644
index 000000000..1890fcb8e
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/reputf.dic
@@ -0,0 +1,2 @@
+1
+főő
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/reputf.sug b/extensions/spellcheck/hunspell/tests/unit/data/reputf.sug
new file mode 100644
index 000000000..8a00bc371
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/reputf.sug
@@ -0,0 +1 @@
+főő
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/reputf.test b/extensions/spellcheck/hunspell/tests/unit/data/reputf.test
new file mode 100644
index 000000000..cde7c5410
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/reputf.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i utf-8
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/reputf.wrong b/extensions/spellcheck/hunspell/tests/unit/data/reputf.wrong
new file mode 100644
index 000000000..257cc5642
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/reputf.wrong
@@ -0,0 +1 @@
+foo
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.aff b/extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.aff
new file mode 100644
index 000000000..3ab347319
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.aff
@@ -0,0 +1,8 @@
+# Forbid compound word with triple letters
+CHECKCOMPOUNDTRIPLE
+# Allow simplified forms
+SIMPLIFIEDTRIPLE
+
+COMPOUNDMIN 2
+
+COMPOUNDFLAG A
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.dic b/extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.dic
new file mode 100644
index 000000000..cfe7a35dc
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.dic
@@ -0,0 +1,3 @@
+2
+glass/A
+sko/A
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.good b/extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.good
new file mode 100644
index 000000000..23a4815e8
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.good
@@ -0,0 +1,3 @@
+glass
+sko
+glassko
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.test b/extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.wrong b/extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.wrong
new file mode 100644
index 000000000..281128768
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.wrong
@@ -0,0 +1 @@
+glasssko
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/slash.aff b/extensions/spellcheck/hunspell/tests/unit/data/slash.aff
new file mode 100644
index 000000000..6ab104b9e
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/slash.aff
@@ -0,0 +1,4 @@
+# slashes in words (\/)
+
+# (only for tokenization)
+WORDCHARS /:
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/slash.dic b/extensions/spellcheck/hunspell/tests/unit/data/slash.dic
new file mode 100644
index 000000000..478276df6
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/slash.dic
@@ -0,0 +1,5 @@
+4
+/
+1\/2
+http:\/\/
+\/usr\/share\/myspell\/
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/slash.good b/extensions/spellcheck/hunspell/tests/unit/data/slash.good
new file mode 100644
index 000000000..4a25e205f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/slash.good
@@ -0,0 +1,4 @@
+/
+1/2
+http://
+/usr/share/myspell/
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/slash.test b/extensions/spellcheck/hunspell/tests/unit/data/slash.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/slash.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/sug.aff b/extensions/spellcheck/hunspell/tests/unit/data/sug.aff
new file mode 100644
index 000000000..b1f2adba6
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/sug.aff
@@ -0,0 +1,15 @@
+# new suggestion methods of Hunspell 1.5:
+# capitalization: nasa -> NASA
+# long swap: permenant -> permanent
+# long mov: Ghandi -> Gandhi
+# double two characters: vacacation -> vacation
+# space with REP: "alot" -> "a lot" ("a lot" need to be in the dic file.)
+
+# switch off ngram suggestion for testing
+MAXNGRAMSUGS 0
+REP 1
+REP alot a_lot
+KEY qwertzuiop|asdfghjkl|yxcvbnm|aq
+WORDCHARS .
+FORBIDDENWORD ?
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/sug.dic b/extensions/spellcheck/hunspell/tests/unit/data/sug.dic
new file mode 100644
index 000000000..0c22cedf4
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/sug.dic
@@ -0,0 +1,11 @@
+1
+NASA
+Gandhi
+grateful
+permanent
+vacation
+a
+lot
+have
+which
+McDonald
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/sug.sug b/extensions/spellcheck/hunspell/tests/unit/data/sug.sug
new file mode 100644
index 000000000..e277bdb77
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/sug.sug
@@ -0,0 +1,12 @@
+NASA
+Gandhi
+grateful
+permanent
+vacation
+a lot, lot
+permanent. Vacation
+have
+which
+Gandhi
+McDonald
+permanent
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/sug.test b/extensions/spellcheck/hunspell/tests/unit/data/sug.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/sug.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/sug.wrong b/extensions/spellcheck/hunspell/tests/unit/data/sug.wrong
new file mode 100644
index 000000000..4d184d5a6
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/sug.wrong
@@ -0,0 +1,12 @@
+nasa
+Ghandi
+greatful
+permenant
+vacacation
+alot
+permanent.Vacation
+ahev
+hwihc
+GAndhi
+Mcdonald
+permqnent
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/List_of_common_misspellings.txt b/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/List_of_common_misspellings.txt
new file mode 100644
index 000000000..571f3796a
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/List_of_common_misspellings.txt
@@ -0,0 +1,4020 @@
+# source: http://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines
+abandonned abandoned
+aberation aberration
+abilties abilities
+abilty ability
+abondon abandon
+abondoned abandoned
+abondoning abandoning
+abondons abandons
+aborigene aborigine
+abortificant abortifacient
+abreviated abbreviated
+abreviation abbreviation
+abritrary arbitrary
+absense absence
+absolutly absolutely
+absorbsion absorption
+absorbtion absorption
+abundacies abundances
+abundancies abundances
+abundunt abundant
+abutts abuts
+acadamy academy
+acadmic academic
+accademic academic
+accademy academy
+acccused accused
+accelleration acceleration
+accension accession, ascension
+acceptence acceptance
+acceptible acceptable
+accessable accessible
+accidentaly accidentally
+accidently accidentally
+acclimitization acclimatization
+acommodate accommodate
+accomadate accommodate
+accomadated accommodated
+accomadates accommodates
+accomadating accommodating
+accomadation accommodation
+accomadations accommodations
+accomdate accommodate
+accomodate accommodate
+accomodated accommodated
+accomodates accommodates
+accomodating accommodating
+accomodation accommodation
+accomodations accommodations
+accompanyed accompanied
+accordeon accordion
+accordian accordion
+accoring according
+accoustic acoustic
+accquainted acquainted
+accross across
+accussed accused
+acedemic academic
+acheive achieve
+acheived achieved
+acheivement achievement
+acheivements achievements
+acheives achieves
+acheiving achieving
+acheivment achievement
+acheivments achievements
+achievment achievement
+achievments achievements
+achive achieve, archive
+achived achieved, archived
+achivement achievement
+achivements achievements
+acknowldeged acknowledged
+acknowledgeing acknowledging
+ackward awkward, backward
+acomplish accomplish
+acomplished accomplished
+acomplishment accomplishment
+acomplishments accomplishments
+acording according
+acordingly accordingly
+acquaintence acquaintance
+acquaintences acquaintances
+acquiantence acquaintance
+acquiantences acquaintances
+acquited acquitted
+activites activities
+activly actively
+actualy actually
+acuracy accuracy
+acused accused
+acustom accustom
+acustommed accustomed
+adavanced advanced
+adbandon abandon
+additinally additionally
+additionaly additionally
+addmission admission
+addopt adopt
+addopted adopted
+addoptive adoptive
+addres address, adders
+addresable addressable
+addresed addressed
+addresing addressing
+addressess addresses
+addtion addition
+addtional additional
+adecuate adequate
+adhearing adhering
+adherance adherence
+admendment amendment
+admininistrative administrative
+adminstered administered
+adminstrate administrate
+adminstration administration
+adminstrative administrative
+adminstrator administrator
+admissability admissibility
+admissable admissible
+admited admitted
+admitedly admittedly
+adn and
+adolecent adolescent
+adquire acquire
+adquired acquired
+adquires acquires
+adquiring acquiring
+adres address
+adresable addressable
+adresing addressing
+adress address
+adressable addressable
+adressed addressed
+adressing addressing, dressing
+adventrous adventurous
+advertisment advertisement
+advertisments advertisements
+advesary adversary
+adviced advised
+aeriel aerial
+aeriels aerials
+afair affair
+afficianados aficionados
+afficionado aficionado
+afficionados aficionados
+affilate affiliate
+affilliate affiliate
+affort afford, effort
+aforememtioned aforementioned
+againnst against
+agains against
+agaisnt against
+aganist against
+aggaravates aggravates
+aggreed agreed
+aggreement agreement
+aggregious egregious
+aggresive aggressive
+agian again
+agianst against
+agin again
+agina again, angina
+aginst against
+agravate aggravate
+agre agree
+agred agreed
+agreeement agreement
+agreemnt agreement
+agregate aggregate
+agregates aggregates
+agreing agreeing
+agression aggression
+agressive aggressive
+agressively aggressively
+agressor aggressor
+agricuture agriculture
+agrieved aggrieved
+ahev have
+ahppen happen
+ahve have
+aicraft aircraft
+aiport airport
+airbourne airborne
+aircaft aircraft
+aircrafts aircraft
+airporta airports
+airrcraft aircraft
+aisian asian
+albiet albeit
+alchohol alcohol
+alchoholic alcoholic
+alchol alcohol
+alcholic alcoholic
+alcohal alcohol
+alcoholical alcoholic
+aledge allege
+aledged alleged
+aledges alleges
+alege allege
+aleged alleged
+alegience allegiance
+algebraical algebraic
+algorhitms algorithms
+algoritm algorithm
+algoritms algorithms
+alientating alienating
+alledge allege
+alledged alleged
+alledgedly allegedly
+alledges alleges
+allegedely allegedly
+allegedy allegedly
+allegely allegedly
+allegence allegiance
+allegience allegiance
+allign align
+alligned aligned
+alliviate alleviate
+allopone allophone
+allopones allophones
+allready already
+allthough although
+alltime all-time
+alltogether altogether
+almsot almost
+alochol alcohol
+alomst almost
+alot a lot, allot
+alotted allotted
+alowed allowed
+alowing allowing
+alreayd already
+alse else
+alsot also
+alternitives alternatives
+altho although
+althought although
+altough although
+alusion allusion, illusion
+alwasy always
+alwyas always
+amalgomated amalgamated
+amatuer amateur
+amature armature, amateur
+amendmant amendment
+amerliorate ameliorate
+amke make
+amking making
+ammend amend
+ammended amended
+ammendment amendment
+ammendments amendments
+ammount amount
+ammused amused
+amoung among
+amoungst amongst
+amung among
+analagous analogous
+analitic analytic
+analogeous analogous
+anarchim anarchism
+anarchistm anarchism
+anbd and
+ancestory ancestry
+ancilliary ancillary
+androgenous androgynous
+androgeny androgyny
+anihilation annihilation
+aniversary anniversary
+annoint anoint
+annointed anointed
+annointing anointing
+annoints anoints
+annouced announced
+annualy annually
+annuled annulled
+anohter another
+anomolies anomalies
+anomolous anomalous
+anomoly anomaly
+anonimity anonymity
+anounced announced
+ansalisation nasalisation
+ansalization nasalization
+ansestors ancestors
+antartic antarctic
+anthromorphization anthropomorphization
+anual annual, anal
+anulled annulled
+anwsered answered
+anyhwere anywhere
+anyother any other
+anytying anything
+aparent apparent
+aparment apartment
+apenines apennines, Apennines
+aplication application
+aplied applied
+apolegetics apologetics
+apon upon, apron
+apparant apparent
+apparantly apparently
+appart apart
+appartment apartment
+appartments apartments
+appealling appealing, appalling
+appeareance appearance
+appearence appearance
+appearences appearances
+appenines apennines, Apennines
+apperance appearance
+apperances appearances
+applicaiton application
+applicaitons applications
+appologies apologies
+appology apology
+apprearance appearance
+apprieciate appreciate
+approachs approaches
+appropiate appropriate
+appropraite appropriate
+appropropiate appropriate
+approproximate approximate
+approxamately approximately
+approxiately approximately
+approximitely approximately
+aprehensive apprehensive
+apropriate appropriate
+aproximate approximate
+aproximately approximately
+aquaintance acquaintance
+aquainted acquainted
+aquiantance acquaintance
+aquire acquire
+aquired acquired
+aquiring acquiring
+aquisition acquisition
+aquitted acquitted
+aranged arranged
+arangement arrangement
+arbitarily arbitrarily
+arbitary arbitrary
+archaelogists archaeologists
+archaelogy archaeology
+archaoelogy archeology, archaeology
+archaology archeology, archaeology
+archeaologist archeologist, archaeologist
+archeaologists archeologists, archaeologists
+archetect architect
+archetects architects
+archetectural architectural
+archetecturally architecturally
+archetecture architecture
+archiac archaic
+archictect architect
+archimedian archimedean
+architechturally architecturally
+architechture architecture
+architechtures architectures
+architectual architectural
+archtype archetype
+archtypes archetypes
+aready already
+areodynamics aerodynamics
+argubly arguably
+arguement argument
+arguements arguments
+arised arose
+arival arrival
+armamant armament
+armistace armistice
+aroud around
+arrangment arrangement
+arrangments arrangements
+arround around
+artical article
+artice article
+articel article
+artifical artificial
+artifically artificially
+artillary artillery
+arund around
+asetic ascetic
+asign assign
+aslo also
+asociated associated
+asorbed absorbed
+asphyxation asphyxiation
+assasin assassin
+assasinate assassinate
+assasinated assassinated
+assasinates assassinates
+assasination assassination
+assasinations assassinations
+assasined assassinated
+assasins assassins
+assassintation assassination
+assemple assemble
+assertation assertion
+asside aside
+assisnate assassinate
+assit assist
+assitant assistant
+assocation association
+assoicate associate
+assoicated associated
+assoicates associates
+assosication assassination
+asssassans assassins
+assualt assault
+assualted assaulted
+assymetric asymmetric
+assymetrical asymmetrical
+asteriod asteroid
+asthetic aesthetic
+asthetical aesthetical
+asthetically aesthetically
+asume assume
+aswell as well
+atain attain
+atempting attempting
+atheistical atheistic
+athenean athenian
+atheneans athenians
+athiesm atheism
+athiest atheist
+atorney attorney
+atribute attribute
+atributed attributed
+atributes attributes
+attaindre attainder, attained
+attemp attempt
+attemped attempted
+attemt attempt
+attemted attempted
+attemting attempting
+attemts attempts
+attendence attendance
+attendent attendant
+attendents attendants
+attened attended
+attension attention
+attitide attitude
+attributred attributed
+attrocities atrocities
+audeince audience
+auromated automated
+austrailia Australia
+austrailian Australian
+auther author
+authobiographic autobiographic
+authobiography autobiography
+authorative authoritative
+authorites authorities
+authorithy authority
+authoritiers authorities
+authoritive authoritative
+authrorities authorities
+autochtonous autochthonous
+autoctonous autochthonous
+automaticly automatically
+automibile automobile
+automonomous autonomous
+autor author
+autority authority
+auxilary auxiliary
+auxillaries auxiliaries
+auxillary auxiliary
+auxilliaries auxiliaries
+auxilliary auxiliary
+availablity availability
+availaible available
+availble available
+availiable available
+availible available
+avalable available
+avalance avalanche
+avaliable available
+avation aviation
+avengence a vengeance
+averageed averaged
+avilable available
+awared awarded
+awya away
+baceause because
+backgorund background
+backrounds backgrounds
+bakc back
+banannas bananas
+bandwith bandwidth
+bankrupcy bankruptcy
+banruptcy bankruptcy
+baout about, bout
+basicaly basically
+basicly basically
+bcak back
+beachead beachhead
+beacuse because
+beastiality bestiality
+beatiful beautiful
+beaurocracy bureaucracy
+beaurocratic bureaucratic
+beautyfull beautiful
+becamae became
+becasue because
+beccause because
+becomeing becoming
+becomming becoming
+becouse because
+becuase because
+bedore before
+befoer before
+beggin begin, begging
+begginer beginner
+begginers beginners
+beggining beginning
+begginings beginnings
+beggins begins
+begining beginning
+beginnig beginning
+behavour behavior, behaviour
+beleagured beleaguered
+beleif belief
+beleive believe
+beleived believed
+beleives believes
+beleiving believing
+beligum belgium
+belive believe
+belived believed
+belives believes, beliefs
+belligerant belligerent
+bellweather bellwether
+bemusemnt bemusement
+beneficary beneficiary
+beng being
+benificial beneficial
+benifit benefit
+benifits benefits
+bergamont bergamot
+Bernouilli Bernoulli
+beseige besiege
+beseiged besieged
+beseiging besieging
+betwen between
+beween between
+bewteen between
+bilateraly bilaterally
+billingualism bilingualism
+binominal binomial
+bizzare bizarre
+blaim blame
+blaimed blamed
+blessure blessing
+Blitzkreig Blitzkrieg
+boaut bout, boat, about
+bodydbuilder bodybuilder
+bombardement bombardment
+bombarment bombardment
+bondary boundary
+Bonnano Bonanno
+borke broke
+boundry boundary
+bouyancy buoyancy
+bouyant buoyant
+boyant buoyant
+Brasillian Brazilian
+breakthough breakthrough
+breakthroughts breakthroughs
+breif brief
+breifly briefly
+brethen brethren
+bretheren brethren
+briliant brilliant
+brillant brilliant
+brimestone brimstone
+Britian Britain
+Brittish British
+broacasted broadcast
+broadacasting broadcasting
+broady broadly
+Buddah Buddha
+buisness business
+buisnessman businessman
+buoancy buoyancy
+buring burying, burning, burin, during
+burried buried
+busineses business, businesses
+busness business
+bussiness business
+cacuses caucuses
+cahracters characters
+calaber caliber
+calander calendar, calender, colander
+calculs calculus
+calenders calendars
+caligraphy calligraphy
+caluclate calculate
+caluclated calculated
+caluculate calculate
+caluculated calculated
+calulate calculate
+calulated calculated
+Cambrige Cambridge
+camoflage camouflage
+campain campaign
+campains campaigns
+candadate candidate
+candiate candidate
+candidiate candidate
+cannister canister
+cannisters canisters
+cannnot cannot
+cannonical canonical
+cannotation connotation
+cannotations connotations
+cant cannot, can not, can't
+caost coast
+caperbility capability
+Capetown Cape Town
+capible capable
+captial capital
+captued captured
+capturd captured
+carachter character
+caracterized characterized
+carcas carcass, Caracas
+carefull careful
+careing caring
+carismatic charismatic
+Carmalite Carmelite
+carmel caramel, carmel-by-the-sea
+carniverous carnivorous
+carreer career
+carrers careers
+Carribbean Caribbean
+Carribean Caribbean
+cartdridge cartridge
+Carthagian Carthaginian
+carthographer cartographer
+cartilege cartilage
+cartilidge cartilage
+cartrige cartridge
+casette cassette
+casion caisson
+cassawory cassowary
+cassowarry cassowary
+casulaties casualties
+casulaty casualty
+catagories categories
+catagorized categorized
+catagory category
+catergorize categorize
+catergorized categorized
+Cataline Catiline, Catalina
+cathlic catholic
+catholocism catholicism
+catterpilar caterpillar
+catterpilars caterpillars
+cattleship battleship
+causalities casualties
+Ceasar Caesar
+Celcius Celsius
+cellpading cellpadding
+cementary cemetery
+cemetarey cemetery
+cemetaries cemeteries
+cemetary cemetery
+cencus census
+censur censor, censure
+cententenial centennial
+centruies centuries
+centruy century
+ceratin certain, keratin
+cerimonial ceremonial
+cerimonies ceremonies
+cerimonious ceremonious
+cerimony ceremony
+ceromony ceremony
+certainity certainty
+certian certain
+cervial cervical, servile, serval
+chalenging challenging
+challange challenge
+challanged challenged
+challege challenge
+Champange Champagne
+changable changeable
+charachter character
+charactor character
+charachters characters
+charactersistic characteristic
+charactors characters
+charasmatic charismatic
+charaterized characterized
+chariman chairman
+charistics characteristics
+chasr chaser, chase
+cheif chief
+chemcial chemical
+chemcially chemically
+chemestry chemistry
+chemicaly chemically
+childbird childbirth
+childen children
+choosen chosen
+chracter character
+chuch church
+churchs churches
+Cincinatti Cincinnati
+Cincinnatti Cincinnati
+circulaton circulation
+circumsicion circumcision
+circut circuit
+ciricuit circuit
+ciriculum curriculum
+civillian civilian
+claer clear
+claerer clearer
+claerly clearly
+claimes claims
+clas class
+clasic classic
+clasical classical
+clasically classically
+cleareance clearance
+clera clear, sclera
+clincial clinical
+clinicaly clinically
+cmo com
+cmoputer computer
+co-incided coincided
+coctail cocktail
+coform conform
+cognizent cognizant
+coincedentally coincidentally
+colaborations collaborations
+colateral collateral
+colelctive collective
+collaberative collaborative
+collecton collection
+collegue colleague
+collegues colleagues
+collonade colonnade
+collonies colonies
+collony colony
+collosal colossal
+colonizators colonizers
+comander commander, commandeer
+comando commando
+comandos commandos
+comany company
+comapany company
+comback comeback
+combanations combinations
+combinatins combinations
+combusion combustion
+comdemnation condemnation
+comemmorates commemorates
+comemoretion commemoration
+comision commission
+comisioned commissioned
+comisioner commissioner
+comisioning commissioning
+comisions commissions
+comission commission
+comissioned commissioned
+comissioner commissioner
+comissioning commissioning
+comissions commissions
+comited committed
+comiting committing
+comitted committed
+comittee committee
+comitting committing
+commandoes commandos
+commedic comedic
+commemerative commemorative
+commemmorate commemorate
+commemmorating commemorating
+commerical commercial
+commerically commercially
+commericial commercial
+commericially commercially
+commerorative commemorative
+comming coming
+comminication communication
+commision commission
+commisioned commissioned
+commisioner commissioner
+commisioning commissioning
+commisions commissions
+commited committed
+commitee committee
+commiting committing
+committe committee
+committment commitment
+committments commitments
+commmemorated commemorated
+commongly commonly
+commonweath commonwealth
+commuications communications
+commuinications communications
+communciation communication
+communiation communication
+communites communities
+compability compatibility
+comparision comparison
+comparisions comparisons
+comparitive comparative
+comparitively comparatively
+compatabilities compatibilities
+compatability compatibility
+compatable compatible
+compatablities compatibilities
+compatablity compatibility
+compatiable compatible
+compatiblities compatibilities
+compatiblity compatibility
+compeitions competitions
+compensantion compensation
+competance competence
+competant competent
+competative competitive
+competion competition, completion
+competitiion competition
+competive competitive
+competiveness competitiveness
+comphrehensive comprehensive
+compitent competent
+completedthe completed the
+completelyl completely
+completetion completion
+complier compiler
+componant component
+comprable comparable
+comprimise compromise
+compulsary compulsory
+compulsery compulsory
+computarized computerized
+concensus consensus
+concider consider
+concidered considered
+concidering considering
+conciders considers
+concieted conceited
+concieved conceived
+concious conscious
+conciously consciously
+conciousness consciousness
+condamned condemned
+condemmed condemned
+condidtion condition
+condidtions conditions
+conditionsof conditions of
+conected connected
+conection connection
+conesencus consensus
+confidental confidential
+confidentally confidentially
+confids confides
+configureable configurable
+confortable comfortable
+congradulations congratulations
+congresional congressional
+conived connived
+conjecutre conjecture
+conjuction conjunction
+Conneticut Connecticut
+conotations connotations
+conquerd conquered
+conquerer conqueror
+conquerers conquerors
+conqured conquered
+conscent consent
+consciouness consciousness
+consdider consider
+consdidered considered
+consdiered considered
+consectutive consecutive
+consenquently consequently
+consentrate concentrate
+consentrated concentrated
+consentrates concentrates
+consept concept
+consequentually consequently
+consequeseces consequences
+consern concern
+conserned concerned
+conserning concerning
+conservitive conservative
+consiciousness consciousness
+consicousness consciousness
+considerd considered
+consideres considered
+consious conscious
+consistant consistent
+consistantly consistently
+consituencies constituencies
+consituency constituency
+consituted constituted
+consitution constitution
+consitutional constitutional
+consolodate consolidate
+consolodated consolidated
+consonent consonant
+consonents consonants
+consorcium consortium
+conspiracys conspiracies
+conspiriator conspirator
+constaints constraints
+constanly constantly
+constarnation consternation
+constatn constant
+constinually continually
+constituant constituent
+constituants constituents
+constituion constitution
+constituional constitutional
+consttruction construction
+constuction construction
+consulant consultant
+consumate consummate
+consumated consummated
+contaiminate contaminate
+containes contains
+contamporaries contemporaries
+contamporary contemporary
+contempoary contemporary
+contemporaneus contemporaneous
+contempory contemporary
+contendor contender
+contined continued
+continous continuous
+continously continuously
+continueing continuing
+contravercial controversial
+contraversy controversy
+contributer contributor
+contributers contributors
+contritutions contributions
+controled controlled
+controling controlling
+controll control
+controlls controls
+controvercial controversial
+controvercy controversy
+controveries controversies
+controversal controversial
+controversey controversy
+controvertial controversial
+controvery controversy
+contruction construction
+conveinent convenient
+convenant covenant
+convential conventional
+convertables convertibles
+convertion conversion
+conveyer conveyor
+conviced convinced
+convienient convenient
+coordiantion coordination
+coorperation cooperation, corporation
+coorperations corporations
+copmetitors competitors
+coputer computer
+copywrite copyright
+coridal cordial
+cornmitted committed
+corosion corrosion
+corparate corporate
+corperations corporations
+correcters correctors
+correponding corresponding
+correposding corresponding
+correspondant correspondent
+correspondants correspondents
+corridoors corridors
+corrispond correspond
+corrispondant correspondent
+corrispondants correspondents
+corrisponded corresponded
+corrisponding corresponding
+corrisponds corresponds
+costitution constitution
+coucil council
+coudl could, cloud
+councellor councillor, counselor, councilor
+councellors councillors, counselors, councilors
+counries countries
+countains contains
+countires countries
+coururier courier, couturier
+coverted converted, covered, coveted
+cpoy coy, copy
+creaeted created
+creedence credence
+critereon criterion
+criterias criteria
+criticists critics
+critising criticising, criticizing
+critisising criticising
+critisism criticism
+critisisms criticisms
+critisize criticise, criticize
+critisized criticised, criticized
+critisizes criticises, criticizes
+critisizing criticising, criticizing
+critized criticized
+critizing criticizing
+crockodiles crocodiles
+crowm crown
+crtical critical
+crticised criticised
+crucifiction crucifixion
+crusies cruises
+crystalisation crystallisation
+culiminating culminating
+cumulatative cumulative
+curch church
+curcuit circuit
+currenly currently
+curriculem curriculum
+cxan cyan
+cyclinder cylinder
+dael deal, dial, dahl
+dalmation dalmatian
+damenor demeanor
+Dardenelles Dardanelles
+dacquiri daiquiri
+debateable debatable
+decendant descendant
+decendants descendants
+decendent descendant
+decendents descendants
+decideable decidable
+decidely decidedly
+decieved deceived
+decison decision
+decomissioned decommissioned
+decomposit decompose
+decomposited decomposed
+decompositing decomposing
+decomposits decomposes
+decress decrees
+decribe describe
+decribed described
+decribes describes
+decribing describing
+dectect detect
+defendent defendant
+defendents defendants
+deffensively defensively
+deffine define
+deffined defined
+definance defiance
+definate definite
+definately definitely
+definatly definitely
+definetly definitely
+definining defining
+definit definite
+definitly definitely
+definiton definition
+defintion definition
+degrate degrade
+delagates delegates
+delapidated dilapidated
+delerious delirious
+delevopment development
+deliberatly deliberately
+delusionally delusively
+demenor demeanor
+demographical demographic
+demolision demolition
+demorcracy democracy
+demostration demonstration
+denegrating denigrating
+densly densely
+deparment department
+deparments departments
+deparmental departmental
+dependance dependence
+dependancy dependency
+dependant dependent
+deram dram, dream
+deriviated derived
+derivitive derivative
+derogitory derogatory
+descendands descendants
+descibed described
+descision decision
+descisions decisions
+descriibes describes
+descripters descriptors
+descripton description
+desctruction destruction
+descuss discuss
+desgined designed
+deside decide
+desigining designing
+desinations destinations
+desintegrated disintegrated
+desintegration disintegration
+desireable desirable
+desitned destined
+desktiop desktop
+desorder disorder
+desoriented disoriented
+desparate desperate, disparate
+despatched dispatched
+despict depict
+despiration desperation
+dessicated desiccated
+dessigned designed
+destablized destabilized
+destory destroy
+detailled detailed
+detatched detached
+deteoriated deteriorated
+deteriate deteriorate
+deterioriating deteriorating
+determinining determining
+detremental detrimental
+devasted devastated
+develope develop
+developement development
+developped developed
+develpment development
+devels delves
+devestated devastated
+devestating devastating
+devide divide
+devided divided
+devistating devastating
+devolopement development
+diablical diabolical
+diamons diamonds
+diaster disaster
+dichtomy dichotomy
+diconnects disconnects
+dicover discover
+dicovered discovered
+dicovering discovering
+dicovers discovers
+dicovery discovery
+dicussed discussed
+didnt didn't
+diea idea, die
+dieing dying, dyeing
+dieties deities
+diety deity
+diferent different
+diferrent different
+differentiatiations differentiations
+differnt different
+difficulity difficulty
+diffrent different
+dificulties difficulties
+dificulty difficulty
+dimenions dimensions
+dimention dimension
+dimentional dimensional
+dimentions dimensions
+dimesnional dimensional
+diminuitive diminutive
+diosese diocese
+diphtong diphthong
+diphtongs diphthongs
+diplomancy diplomacy
+dipthong diphthong
+dipthongs diphthongs
+dirived derived
+disagreeed disagreed
+disapeared disappeared
+disapointing disappointing
+disappearred disappeared
+disaproval disapproval
+disasterous disastrous
+disatisfaction dissatisfaction
+disatisfied dissatisfied
+disatrous disastrous
+discontentment discontent
+discribe describe
+discribed described
+discribes describes
+discribing describing
+disctinction distinction
+disctinctive distinctive
+disemination dissemination
+disenchanged disenchanted
+disiplined disciplined
+disobediance disobedience
+disobediant disobedient
+disolved dissolved
+disover discover
+dispair despair
+disparingly disparagingly
+dispence dispense
+dispenced dispensed
+dispencing dispensing
+dispicable despicable
+dispite despite
+dispostion disposition
+disproportiate disproportionate
+disputandem disputandum
+disricts districts
+dissagreement disagreement
+dissapear disappear
+dissapearance disappearance
+dissapeared disappeared
+dissapearing disappearing
+dissapears disappears
+dissappear disappear
+dissappears disappears
+dissappointed disappointed
+dissarray disarray
+dissobediance disobedience
+dissobediant disobedient
+dissobedience disobedience
+dissobedient disobedient
+distiction distinction
+distingish distinguish
+distingished distinguished
+distingishes distinguishes
+distingishing distinguishing
+distingquished distinguished
+distrubution distribution
+distruction destruction
+distructive destructive
+ditributed distributed
+diversed diverse, diverged
+divice device
+divison division
+divisons divisions
+doccument document
+doccumented documented
+doccuments documents
+docrines doctrines
+doctines doctrines
+documenatry documentary
+doens does
+doesnt doesn't
+doign doing
+dominaton domination
+dominent dominant
+dominiant dominant
+donig doing
+dosen't doesn't
+doub doubt, daub
+doulbe double
+dowloads downloads
+dramtic dramatic
+draughtman draughtsman
+Dravadian Dravidian
+dreasm dreams
+driectly directly
+drnik drink
+druming drumming
+drummless drumless
+dupicate duplicate
+durig during
+durring during
+duting during
+dyas dryas
+eahc each
+ealier earlier
+earlies earliest
+earnt earned
+ecclectic eclectic
+eceonomy economy
+ecidious deciduous
+eclispe eclipse
+ecomonic economic
+ect etc
+eearly early
+efel evil
+effeciency efficiency
+effecient efficient
+effeciently efficiently
+efficency efficiency
+efficent efficient
+efficently efficiently
+efford effort, afford
+effords efforts, affords
+effulence effluence
+eigth eighth, eight
+eiter either
+elction election
+electic eclectic, electric
+electon election, electron
+electrial electrical
+electricly electrically
+electricty electricity
+elementay elementary
+eleminated eliminated
+eleminating eliminating
+eles eels
+eletricity electricity
+elicided elicited
+eligable eligible
+elimentary elementary
+ellected elected
+elphant elephant
+embarass embarrass
+embarassed embarrassed
+embarassing embarrassing
+embarassment embarrassment
+embargos embargoes
+embarras embarrass
+embarrased embarrassed
+embarrasing embarrassing
+embarrasment embarrassment
+embezelled embezzled
+emblamatic emblematic
+eminate emanate
+eminated emanated
+emision emission
+emited emitted
+emiting emitting
+emition emission, emotion
+emmediately immediately
+emmigrated emigrated
+emminent eminent, imminent
+emminently eminently
+emmisaries emissaries
+emmisarries emissaries
+emmisarry emissary
+emmisary emissary
+emmision emission
+emmisions emissions
+emmited emitted
+emmiting emitting
+emmitted emitted
+emmitting emitting
+emnity enmity
+emperical empirical
+emphaised emphasised
+emphsis emphasis
+emphysyma emphysema
+empirial empirical, imperial
+emprisoned imprisoned
+enameld enameled
+enchancement enhancement
+encouraing encouraging
+encryptiion encryption
+encylopedia encyclopedia
+endevors endeavors
+endevour endeavour
+endig ending
+endolithes endoliths
+enduce induce
+ened need
+enflamed inflamed
+enforceing enforcing
+engagment engagement
+engeneer engineer
+engeneering engineering
+engieneer engineer
+engieneers engineers
+enlargment enlargement
+enlargments enlargements
+Enlish English, enlist
+enourmous enormous
+enourmously enormously
+ensconsed ensconced
+entaglements entanglements
+enteratinment entertainment
+entitity entity
+entitlied entitled
+entrepeneur entrepreneur
+entrepeneurs entrepreneurs
+enviorment environment
+enviormental environmental
+enviormentally environmentally
+enviorments environments
+enviornment environment
+enviornmental environmental
+enviornmentalist environmentalist
+enviornmentally environmentally
+enviornments environments
+enviroment environment
+enviromental environmental
+enviromentalist environmentalist
+enviromentally environmentally
+enviroments environments
+envolutionary evolutionary
+envrionments environments
+enxt next
+epidsodes episodes
+epsiode episode
+equialent equivalent
+equilibium equilibrium
+equilibrum equilibrium
+equiped equipped
+equippment equipment
+equitorial equatorial
+equivelant equivalent
+equivelent equivalent
+equivilant equivalent
+equivilent equivalent
+equivlalent equivalent
+erally orally, really
+eratic erratic
+eratically erratically
+eraticly erratically
+erested arrested, erected
+errupted erupted
+esential essential
+esitmated estimated
+esle else
+especialy especially
+essencial essential
+essense essence
+essentail essential
+essentialy essentially
+essentual essential
+essesital essential
+estabishes establishes
+establising establishing
+ethnocentricm ethnocentrism
+ethose those, ethos
+Europian European
+Europians Europeans
+Eurpean European
+Eurpoean European
+evenhtually eventually
+eventally eventually
+eventially eventually
+eventualy eventually
+everthing everything
+everytime every time
+everyting everything
+eveyr every
+evidentally evidently
+exagerate exaggerate
+exagerated exaggerated
+exagerates exaggerates
+exagerating exaggerating
+exagerrate exaggerate
+exagerrated exaggerated
+exagerrates exaggerates
+exagerrating exaggerating
+examinated examined
+exampt exempt
+exapansion expansion
+excact exact
+excange exchange
+excecute execute
+excecuted executed
+excecutes executes
+excecuting executing
+excecution execution
+excedded exceeded
+excelent excellent
+excell excel
+excellance excellence
+excellant excellent
+excells excels
+excercise exercise
+exchanching exchanging
+excisted existed
+exculsivly exclusively
+execising exercising
+exection execution
+exectued executed
+exeedingly exceedingly
+exelent excellent
+exellent excellent
+exemple example
+exept except
+exeptional exceptional
+exerbate exacerbate
+exerbated exacerbated
+exerciese exercises
+exerpt excerpt
+exerpts excerpts
+exersize exercise
+exerternal external
+exhalted exalted
+exhibtion exhibition
+exibition exhibition
+exibitions exhibitions
+exicting exciting
+exinct extinct
+existance existence
+existant existent
+existince existence
+exliled exiled
+exludes excludes
+exmaple example
+exonorate exonerate
+exoskelaton exoskeleton
+expalin explain
+expeced expected
+expecially especially
+expeditonary expeditionary
+expeiments experiments
+expell expel
+expells expels
+experiance experience
+experianced experienced
+expiditions expeditions
+expierence experience
+explaination explanation
+explaning explaining
+explictly explicitly
+exploititive exploitative
+explotation exploitation
+expropiated expropriated
+expropiation expropriation
+exressed expressed
+extemely extremely
+extention extension
+extentions extensions
+extered exerted
+extermist extremist
+extint extinct, extant
+extradiction extradition
+extraterrestial extraterrestrial
+extraterrestials extraterrestrials
+extravagent extravagant
+extrememly extremely
+extremeophile extremophile
+extremly extremely
+extrordinarily extraordinarily
+extrordinary extraordinary
+eyar year, eyas
+eyars years, eyas
+eyasr years, eyas
+faciliate facilitate
+faciliated facilitated
+faciliates facilitates
+facilites facilities
+facillitate facilitate
+facinated fascinated
+facist fascist
+familes families
+familliar familiar
+famoust famous
+fanatism fanaticism
+Farenheit Fahrenheit
+fatc fact
+faught fought
+favoutrable favourable
+feasable feasible
+Febuary February
+fedreally federally
+feromone pheromone
+fertily fertility
+fianite finite
+fianlly finally
+ficticious fictitious
+fictious fictitious
+fidn find
+fiel feel, field, file, phial
+fiels feels, fields, files, phials
+fiercly fiercely
+fightings fighting
+filiament filament
+fimilies families
+finacial financial
+finaly finally
+financialy financially
+firends friends
+firts flirts, first
+fisionable fissionable
+flamable flammable
+flawess flawless
+fleed fled, freed
+Flemmish Flemish
+florescent fluorescent
+flourescent fluorescent
+fluorish flourish
+follwoing following
+folowing following
+fomed formed
+fomr from, form
+fonetic phonetic
+fontrier fontier
+foootball football
+forbad forbade
+forbiden forbidden
+foreward foreword
+forfiet forfeit
+forhead forehead
+foriegn foreign
+Formalhaut Fomalhaut
+formallize formalize
+formallized formalized
+formaly formally
+formelly formerly
+formidible formidable
+formost foremost
+forsaw foresaw
+forseeable foreseeable
+fortelling foretelling
+forunner forerunner
+foucs focus
+foudn found
+fougth fought
+foundaries foundries
+foundary foundry
+Foundland Newfoundland
+fourties forties
+fourty forty
+fouth fourth
+foward forward
+fucntion function
+fucntioning functioning
+Fransiscan Franciscan
+Fransiscans Franciscans
+freind friend
+freindly friendly
+frequentily frequently
+frome from
+fromed formed
+froniter frontier
+fufill fulfill
+fufilled fulfilled
+fulfiled fulfilled
+fundametal fundamental
+fundametals fundamentals
+funguses fungi
+funtion function
+furuther further
+futher further
+futhermore furthermore
+futhroc futhark, futhorc
+gae game, Gael, gale
+galatic galactic
+Galations Galatians
+gallaxies galaxies
+galvinized galvanized
+Gameboy Game Boy
+ganerate generate
+ganes games
+ganster gangster
+garantee guarantee
+garanteed guaranteed
+garantees guarantees
+garnison garrison
+gauarana guaraná
+gaurantee guarantee
+gauranteed guaranteed
+gaurantees guarantees
+gaurd guard, gourd
+gaurentee guarantee
+gaurenteed guaranteed
+gaurentees guarantees
+geneological genealogical
+geneologies genealogies
+geneology genealogy
+generaly generally
+generatting generating
+genialia genitalia
+geographicial geographical
+geometrician geometer
+geometricians geometers
+gerat great
+Ghandi Gandhi
+glight flight
+gnawwed gnawed
+godess goddess
+godesses goddesses
+Godounov Godunov
+gogin going, Gauguin
+goign going
+gonig going
+Gothenberg Gothenburg
+Gottleib Gottlieb
+gouvener governor
+govement government
+govenment government
+govenrment government
+goverance governance
+goverment government
+govermental governmental
+governer governor
+governmnet government
+govorment government
+govormental governmental
+govornment government
+gracefull graceful
+graet great
+grafitti graffiti
+gramatically grammatically
+grammaticaly grammatically
+grammer grammar
+grat great
+gratuitious gratuitous
+greatful grateful
+greatfully gratefully
+greif grief
+gridles griddles
+gropu group
+grwo grow
+Guaduloupe Guadalupe, Guadeloupe
+Guadulupe Guadalupe, Guadeloupe
+guage gauge
+guarentee guarantee
+guarenteed guaranteed
+guarentees guarantees
+Guatamala Guatemala
+Guatamalan Guatemalan
+guerilla guerrilla
+guerillas guerrillas
+guerrila guerrilla
+guerrilas guerrillas
+guidence guidance
+Guilia Giulia
+Guilio Giulio
+Guiness Guinness
+Guiseppe Giuseppe
+gunanine guanine
+gurantee guarantee
+guranteed guaranteed
+gurantees guarantees
+guttaral guttural
+gutteral guttural
+habaeus habeas
+habeus habeas
+Habsbourg Habsburg
+haemorrage haemorrhage
+haev have, heave
+Hallowean Hallowe'en, Halloween
+halp help
+hapen happen
+hapened happened
+hapening happening
+happend happened
+happended happened
+happenned happened
+harased harassed
+harases harasses
+harasment harassment
+harasments harassments
+harassement harassment
+harras harass
+harrased harassed
+harrases harasses
+harrasing harassing
+harrasment harassment
+harrasments harassments
+harrassed harassed
+harrasses harassed
+harrassing harassing
+harrassment harassment
+harrassments harassments
+hasnt hasn't
+haviest heaviest
+headquater headquarter
+headquarer headquarter
+headquatered headquartered
+headquaters headquarters
+healthercare healthcare
+heared heard
+heathy healthy
+Heidelburg Heidelberg
+heigher higher
+heirarchy hierarchy
+heiroglyphics hieroglyphics
+helment helmet
+helpfull helpful
+helpped helped
+hemmorhage hemorrhage
+herad heard, Hera
+heridity heredity
+heroe hero
+heros heroes
+hertzs hertz
+hesistant hesitant
+heterogenous heterogeneous
+hieght height
+hierachical hierarchical
+hierachies hierarchies
+hierachy hierarchy
+hierarcical hierarchical
+hierarcy hierarchy
+hieroglph hieroglyph
+hieroglphs hieroglyphs
+higer higher
+higest highest
+higway highway
+hillarious hilarious
+himselv himself
+hinderance hindrance
+hinderence hindrance
+hindrence hindrance
+hipopotamus hippopotamus
+hismelf himself
+histocompatability histocompatibility
+historicians historians
+hitsingles hit singles
+holliday holiday
+homestate home state
+homogeneize homogenize
+homogeneized homogenized
+honory honorary
+horrifing horrifying
+hosited hoisted
+hospitible hospitable
+hounour honour
+housr hours, house
+howver however
+hsitorians historians
+hstory history
+hten then, hen, the
+htere there, here
+htey they
+htikn think
+hting thing
+htink think
+htis this
+humer humor, humour
+humerous humorous, humerus
+huminoid humanoid
+humoural humoral
+humurous humorous
+husban husband
+hvae have
+hvaing having
+hvea have, heave
+hwihc which
+hwile while
+hwole whole
+hydogen hydrogen
+hydropile hydrophile
+hydropilic hydrophilic
+hydropobe hydrophobe
+hydropobic hydrophobic
+hygeine hygiene
+hypocracy hypocrisy
+hypocrasy hypocrisy
+hypocricy hypocrisy
+hypocrit hypocrite
+hypocrits hypocrites
+iconclastic iconoclastic
+idaeidae idea
+idaes ideas
+idealogies ideologies
+idealogy ideology
+identicial identical
+identifers identifiers
+ideosyncratic idiosyncratic
+idesa ideas, ides
+idiosyncracy idiosyncrasy
+Ihaca Ithaca
+illegimacy illegitimacy
+illegitmate illegitimate
+illess illness
+illiegal illegal
+illution illusion
+ilness illness
+ilogical illogical
+imagenary imaginary
+imagin imagine
+imaginery imaginary, imagery
+imanent eminent, imminent
+imcomplete incomplete
+imediately immediately
+imense immense
+imigrant emigrant, immigrant
+imigrated emigrated, immigrated
+imigration emigration, immigration
+iminent eminent, imminent, immanent
+immediatley immediately
+immediatly immediately
+immidately immediately
+immidiately immediately
+immitate imitate
+immitated imitated
+immitating imitating
+immitator imitator
+immunosupressant immunosuppressant
+impecabbly impeccably
+impedence impedance
+implamenting implementing
+impliment implement
+implimented implemented
+imploys employs
+importamt important
+imprioned imprisoned
+imprisonned imprisoned
+improvision improvisation
+improvments improvements
+inablility inability
+inaccessable inaccessible
+inadiquate inadequate
+inadquate inadequate
+inadvertant inadvertent
+inadvertantly inadvertently
+inagurated inaugurated
+inaguration inauguration
+inappropiate inappropriate
+inaugures inaugurates
+inbalance imbalance
+inbalanced imbalanced
+inbetween between
+incarcirated incarcerated
+incidentially incidentally
+incidently incidentally
+inclreased increased
+includ include
+includng including
+incompatabilities incompatibilities
+incompatability incompatibility
+incompatable incompatible
+incompatablities incompatibilities
+incompatablity incompatibility
+incompatiblities incompatibilities
+incompatiblity incompatibility
+incompetance incompetence
+incompetant incompetent
+incomptable incompatible
+incomptetent incompetent
+inconsistant inconsistent
+incorperation incorporation
+incorportaed incorporated
+incorprates incorporates
+incorruptable incorruptible
+incramentally incrementally
+increadible incredible
+incredable incredible
+inctroduce introduce
+inctroduced introduced
+incuding including
+incunabla incunabula
+indefinately indefinitely
+indefineable undefinable
+indefinitly indefinitely
+indentical identical
+indepedantly independently
+indepedence independence
+independance independence
+independant independent
+independantly independently
+independece independence
+independendet independent
+indictement indictment
+indigineous indigenous
+indipendence independence
+indipendent independent
+indipendently independently
+indespensible indispensable
+indespensable indispensable
+indispensible indispensable
+indisputible indisputable
+indisputibly indisputably
+indite indict
+individualy individually
+indpendent independent
+indpendently independently
+indulgue indulge
+indutrial industrial
+indviduals individuals
+inefficienty inefficiently
+inevatible inevitable
+inevitible inevitable
+inevititably inevitably
+infalability infallibility
+infallable infallible
+infectuous infectious
+infered inferred
+infilitrate infiltrate
+infilitrated infiltrated
+infilitration infiltration
+infinit infinite
+inflamation inflammation
+influencial influential
+influented influenced
+infomation information
+informtion information
+infrantryman infantryman
+infrigement infringement
+ingenius ingenious
+ingreediants ingredients
+inhabitans inhabitants
+inherantly inherently
+inheritage heritage, inheritance
+inheritence inheritance
+inital initial
+initally initially
+initation initiation
+initiaitive initiative
+inlcuding including
+inmigrant immigrant
+inmigrants immigrants
+innoculated inoculated
+inocence innocence
+inofficial unofficial
+inot into
+inpeach impeach
+inpolite impolite
+inprisonment imprisonment
+inproving improving
+insectiverous insectivorous
+insensative insensitive
+inseperable inseparable
+insistance insistence
+insitution institution
+insitutions institutions
+inspite in spite, inspire
+instade instead
+instatance instance
+institue institute
+instuction instruction
+instuments instruments
+instutionalized institutionalized
+instutions intuitions
+insurence insurance
+intelectual intellectual
+inteligence intelligence
+inteligent intelligent
+intenational international
+intepretation interpretation
+intepretator interpretor
+interational international
+interbread interbreed, interbred
+interchangable interchangeable
+interchangably interchangeably
+intercontinetal intercontinental
+intered interred, interned
+interelated interrelated
+interferance interference
+interfereing interfering
+intergrated integrated
+intergration integration
+interm interim
+internation international
+interpet interpret
+interrim interim
+interrugum interregnum
+intertaining entertaining
+interupt interrupt
+intervines intervenes
+intevene intervene
+intial initial
+intially initially
+intrduced introduced
+intrest interest
+introdued introduced
+intruduced introduced
+intrusted entrusted
+intutive intuitive
+intutively intuitively
+inudstry industry
+inumerable enumerable, innumerable
+inventer inventor
+invertibrates invertebrates
+investingate investigate
+involvment involvement
+irelevent irrelevant
+iresistable irresistible
+iresistably irresistibly
+iresistible irresistible
+iresistibly irresistibly
+iritable irritable
+iritated irritated
+ironicly ironically
+irregardless regardless
+irrelevent irrelevant
+irreplacable irreplaceable
+irresistable irresistible
+irresistably irresistibly
+isnt isn't
+Israelies Israelis
+issueing issuing
+itnroduced introduced
+iunior junior
+iwll will
+iwth with
+Japanes Japanese
+jaques jacques
+jeapardy jeopardy
+jewllery jewellery
+Johanine Johannine
+Jospeh Joseph
+jouney journey
+journied journeyed
+journies journeys
+jstu just
+jsut just
+Juadaism Judaism
+Juadism Judaism
+judical judicial
+judisuary judiciary
+juducial judicial
+juristiction jurisdiction
+juristictions jurisdictions
+kindergarden kindergarten
+klenex kleenex
+knifes knives
+knive knife
+knowlege knowledge
+knowlegeable knowledgeable
+knwo know
+knwos knows
+konw know
+konws knows
+kwno know
+labatory lavatory, laboratory
+labled labelled, labeled
+labratory laboratory
+laguage language
+laguages languages
+larg large
+largst largest
+larrry larry
+lastr last
+lattitude latitude
+launchs launch
+launhed launched
+lavae larvae
+layed laid
+lazyness laziness
+leaded led
+leage league
+leanr lean, learn, leaner
+leathal lethal
+lefted left
+legitamate legitimate
+legitmate legitimate
+leibnitz leibniz
+lenght length
+leran learn
+lerans learns
+lieuenant lieutenant
+leutenant lieutenant
+levetate levitate
+levetated levitated
+levetates levitates
+levetating levitating
+levle level
+liasion liaison
+liason liaison
+liasons liaisons
+libary library
+libell libel
+libguistic linguistic
+libguistics linguistics
+libitarianisn libertarianism
+lible libel, liable
+lieing lying
+liek like
+liekd liked
+liesure leisure
+lieved lived
+liftime lifetime
+lightyear light year
+lightyears light years
+likelyhood likelihood
+linnaena linnaean
+lippizaner lipizzaner
+liquify liquefy
+liscense license, licence
+lisence license, licence
+lisense license, licence
+listners listeners
+litature literature
+literture literature
+littel little
+litterally literally
+liuke like
+livley lively
+lmits limits
+loev love
+lonelyness loneliness
+longitudonal longitudinal
+lonley lonely
+lonly lonely, only
+loosing losing
+lotharingen lothringen
+lsat last
+lukid likud
+lveo love
+lvoe love
+Lybia Libya
+mackeral mackerel
+magasine magazine
+magincian magician
+magnificient magnificent
+magolia magnolia
+mailny mainly
+maintainance maintenance
+maintainence maintenance
+maintance maintenance
+maintenence maintenance
+maintinaing maintaining
+maintioned mentioned
+majoroty majority
+maked marked, made
+makse makes
+Malcom Malcolm
+maltesian Maltese
+mamal mammal
+mamalian mammalian
+managable manageable, manageably
+managment management
+manisfestations manifestations
+manoeuverability maneuverability
+manouver maneuver, manoeuvre
+manouverability maneuverability, manoeuvrability, manoeuverability
+manouverable maneuverable, manoeuvrable
+manouvers maneuvers, manoeuvres
+mantained maintained
+manuever maneuver, manoeuvre
+manuevers maneuvers, manoeuvres
+manufacturedd manufactured
+manufature manufacture
+manufatured manufactured
+manufaturing manufacturing
+manuver maneuver
+mariage marriage
+marjority majority
+markes marks
+marketting marketing
+marmelade marmalade
+marrage marriage
+marraige marriage
+marrtyred martyred
+marryied married
+Massachussets Massachusetts
+Massachussetts Massachusetts
+massmedia mass media
+masterbation masturbation
+mataphysical metaphysical
+materalists materialist
+mathamatics mathematics
+mathematican mathematician
+mathematicas mathematics
+matheticians mathematicians
+mathmatically mathematically
+mathmatician mathematician
+mathmaticians mathematicians
+mccarthyst mccarthyist
+mchanics mechanics
+meaninng meaning
+mear wear, mere, mare
+mechandise merchandise
+medacine medicine
+medeival medieval
+medevial medieval
+mediciney mediciny
+medievel medieval
+mediterainnean mediterranean
+Mediteranean Mediterranean
+meerkrat meerkat
+melieux milieux
+membranaphone membranophone
+memeber member
+menally mentally
+meranda veranda, Miranda
+mercentile mercantile
+messanger messenger
+messenging messaging
+metalic metallic
+metalurgic metallurgic
+metalurgical metallurgical
+metalurgy metallurgy
+metamorphysis metamorphosis
+metaphoricial metaphorical
+meterologist meteorologist
+meterology meteorology
+methaphor metaphor
+methaphors metaphors
+Michagan Michigan
+micoscopy microscopy
+midwifes midwives
+mileau milieu
+milennia millennia
+milennium millennium
+mileu milieu
+miliary military
+milion million
+miliraty military
+millenia millennia
+millenial millennial
+millenialism millennialism
+millenium millennium
+millepede millipede
+millioniare millionaire
+millitary military
+millon million
+miltary military
+minature miniature
+minerial mineral
+miniscule minuscule
+ministery ministry
+minstries ministries
+minstry ministry
+minumum minimum
+mirrorred mirrored
+miscelaneous miscellaneous
+miscellanious miscellaneous
+miscellanous miscellaneous
+mischeivous mischievous
+mischevious mischievous
+mischievious mischievous
+misdameanor misdemeanor
+misdameanors misdemeanors
+misdemenor misdemeanor
+misdemenors misdemeanors
+misfourtunes misfortunes
+misile missile
+Misouri Missouri
+mispell misspell
+mispelled misspelled
+mispelling misspelling
+missen mizzen
+Missisipi Mississippi
+Missisippi Mississippi
+missle missile
+missonary missionary
+misterious mysterious
+mistery mystery
+misteryous mysterious
+mkae make
+mkaes makes
+mkaing making
+mkea make
+moderm modem
+modle model
+moent moment
+moeny money
+mohammedans muslims
+moil mohel
+moleclues molecules
+momento memento
+monestaries monasteries
+monestary monastery, monetary
+monickers monikers
+monolite monolithic
+Monserrat Montserrat
+montains mountains
+montanous mountainous
+monts months
+montypic monotypic
+moreso more, more so
+morgage mortgage
+Morisette Morissette
+Morrisette Morissette
+morroccan moroccan
+morrocco morocco
+morroco morocco
+mosture moisture
+motiviated motivated
+mounth month
+movei movie
+movment movement
+mroe more
+mucuous mucous
+muder murder
+mudering murdering
+muhammadan muslim
+multicultralism multiculturalism
+multipled multiplied
+multiplers multipliers
+munbers numbers
+muncipalities municipalities
+muncipality municipality
+munnicipality municipality
+muscels mussels, muscles
+muscial musical
+muscician musician
+muscicians musicians
+mutiliated mutilated
+myraid myriad
+mysef myself
+mysogynist misogynist
+mysogyny misogyny
+mysterous mysterious
+Mythraic Mithraic
+naieve naive
+Napoleonian Napoleonic
+naturaly naturally
+naturely naturally
+naturual natural
+naturually naturally
+Nazereth Nazareth
+neccesarily necessarily
+neccesary necessary
+neccessarily necessarily
+neccessary necessary
+neccessities necessities
+necesarily necessarily
+necesary necessary
+necessiate necessitate
+neglible negligible
+negligable negligible
+negociate negotiate
+negociation negotiation
+negociations negotiations
+negotation negotiation
+neice niece, nice
+neigborhood neighborhood
+neigbour neighbour, neighbor
+neigbourhood neighbourhood
+neigbouring neighbouring, neighboring
+neigbours neighbours, neighbors
+neolitic neolithic
+nessasarily necessarily
+nessecary necessary
+nestin nesting
+neverthless nevertheless
+newletters newsletters
+Newyorker New Yorker
+nickle nickel
+nightfa;; nightfall
+nightime nighttime
+nineth ninth
+ninteenth nineteenth
+ninties 1990s
+ninty ninety
+nkow know
+nkwo know
+nmae name
+noncombatents noncombatants
+nonsence nonsense
+nontheless nonetheless
+noone no one
+norhern northern
+northen northern
+northereastern northeastern
+notabley notably
+noteable notable
+noteably notably
+noteriety notoriety
+noth north
+nothern northern
+noticable noticeable
+noticably noticeably
+noticeing noticing
+noticible noticeable
+notwhithstanding notwithstanding
+noveau nouveau
+nowdays nowadays
+nowe now
+nto not
+nucular nuclear
+nuculear nuclear
+nuisanse nuisance
+Nullabour Nullarbor
+numberous numerous
+Nuremburg Nuremberg
+nusance nuisance
+nutritent nutrient
+nutritents nutrients
+nuturing nurturing
+obediance obedience
+obediant obedient
+obession obsession
+obssessed obsessed
+obstacal obstacle
+obstancles obstacles
+obstruced obstructed
+ocasion occasion
+ocasional occasional
+ocasionally occasionally
+ocasionaly occasionally
+ocasioned occasioned
+ocasions occasions
+ocassion occasion
+ocassional occasional
+ocassionally occasionally
+ocassionaly occasionally
+ocassioned occasioned
+ocassions occasions
+occaison occasion
+occassion occasion
+occassional occasional
+occassionally occasionally
+occassionaly occasionally
+occassioned occasioned
+occassions occasions
+occationally occasionally
+occour occur
+occurance occurrence
+occurances occurrences
+occured occurred
+occurence occurrence
+occurences occurrences
+occuring occurring
+occurr occur
+occurrance occurrence
+occurrances occurrences
+octohedra octahedra
+octohedral octahedral
+octohedron octahedron
+ocuntries countries
+ocuntry country
+ocurr occur
+ocurrance occurrence
+ocurred occurred
+ocurrence occurrence
+offcers officers
+offcially officially
+offereings offerings
+offical official
+officals officials
+offically officially
+officaly officially
+officialy officially
+offred offered
+oftenly often
+oging going, ogling
+omision omission
+omited omitted
+omiting omitting
+omlette omelette
+ommision omission
+ommited omitted
+ommiting omitting
+ommitted omitted
+ommitting omitting
+omniverous omnivorous
+omniverously omnivorously
+omre more
+onot note, not
+onyl only
+openess openness
+oponent opponent
+oportunity opportunity
+opose oppose
+oposite opposite
+oposition opposition
+oppenly openly
+oppinion opinion
+opponant opponent
+oppononent opponent
+oppositition opposition
+oppossed opposed
+opprotunity opportunity
+opression oppression
+opressive oppressive
+opthalmic ophthalmic
+opthalmologist ophthalmologist
+opthalmology ophthalmology
+opthamologist ophthalmologist
+optmizations optimizations
+optomism optimism
+orded ordered
+organim organism
+organiztion organization
+orgin origin, organ
+orginal original
+orginally originally
+orginize organise
+oridinarily ordinarily
+origanaly originally
+originall original, originally
+originaly originally
+originially originally
+originnally originally
+origional original
+orignally originally
+orignially originally
+otehr other
+ouevre oeuvre
+overshaddowed overshadowed
+overthere over there
+overwelming overwhelming
+overwheliming overwhelming
+owrk work
+owudl would
+oxigen oxygen
+oximoron oxymoron
+paide paid
+paitience patience
+palce place, palace
+paleolitic paleolithic
+paliamentarian parliamentarian
+Palistian Palestinian
+Palistinian Palestinian
+Palistinians Palestinians
+pallete palette
+pamflet pamphlet
+pamplet pamphlet
+pantomine pantomime
+Papanicalou Papanicolaou
+paralel parallel
+paralell parallel
+paralelly parallelly
+paralely parallelly
+parallely parallelly
+paranthesis parenthesis
+paraphenalia paraphernalia
+parellels parallels
+parituclar particular
+parliment parliament
+parrakeets parakeets
+parralel parallel
+parrallel parallel
+parrallell parallel
+parrallelly parallelly
+parrallely parallelly
+partialy partially
+particually particularly
+particualr particular
+particuarly particularly
+particularily particularly
+particulary particularly
+pary party
+pased passed
+pasengers passengers
+passerbys passersby
+pasttime pastime
+pastural pastoral
+paticular particular
+pattented patented
+pavillion pavilion
+payed paid
+peacefuland peaceful and
+peageant pageant
+peculure peculiar
+pedestrain pedestrian
+peice piece
+Peloponnes Peloponnesus
+penatly penalty
+penerator penetrator
+penisula peninsula
+penisular peninsular
+penninsula peninsula
+penninsular peninsular
+pennisula peninsula
+pensinula peninsula
+peom poem
+peoms poems
+peopel people
+peotry poetry
+perade parade
+percepted perceived
+percieve perceive
+percieved perceived
+perenially perennially
+perfomers performers
+performence performance
+performes performed, performs
+perhasp perhaps
+perheaps perhaps
+perhpas perhaps
+peripathetic peripatetic
+peristent persistent
+perjery perjury
+perjorative pejorative
+permanant permanent
+permenant permanent
+permenantly permanently
+permissable permissible
+perogative prerogative
+peronal personal
+perosnality personality
+perphas perhaps
+perpindicular perpendicular
+perseverence perseverance
+persistance persistence
+persistant persistent
+personel personnel, personal
+personell personnel
+personnell personnel
+persuded persuaded
+persue pursue
+persued pursued
+persuing pursuing
+persuit pursuit
+persuits pursuits
+pertubation perturbation
+pertubations perturbations
+pessiary pessary
+petetion petition
+Pharoah Pharaoh
+phenomenom phenomenon
+phenomenonal phenomenal
+phenomenonly phenomenally
+phenomonenon phenomenon
+phenomonon phenomenon
+phenonmena phenomena
+Philipines Philippines
+philisopher philosopher
+philisophical philosophical
+philisophy philosophy
+Phillipine Philippine
+Phillipines Philippines
+Phillippines Philippines
+phillosophically philosophically
+philospher philosopher
+philosphies philosophies
+philosphy philosophy
+Phonecian Phoenecian
+phongraph phonograph
+phylosophical philosophical
+physicaly physically
+pich pitch
+pilgrimmage pilgrimage
+pilgrimmages pilgrimages
+pinapple pineapple
+pinnaple pineapple
+pinoneered pioneered
+plagarism plagiarism
+planation plantation
+planed planned
+plantiff plaintiff
+plateu plateau
+plausable plausible
+playright playwright
+playwrite playwright
+playwrites playwrights
+pleasent pleasant
+plebicite plebiscite
+plesant pleasant
+poeoples peoples
+poety poetry
+poisin poison
+polical political
+polinator pollinator
+polinators pollinators
+politican politician
+politicans politicians
+poltical political
+polute pollute
+poluted polluted
+polutes pollutes
+poluting polluting
+polution pollution
+polyphonyic polyphonic
+polysaccaride polysaccharide
+polysaccharid polysaccharide
+pomegranite pomegranate
+pomotion promotion
+poportional proportional
+popoulation population
+popularaty popularity
+populare popular
+populer popular
+portayed portrayed
+portraing portraying
+Portugese Portuguese
+portuguease portuguese
+posess possess
+posessed possessed
+posesses possesses
+posessing possessing
+posession possession
+posessions possessions
+posion poison
+positon position, positron
+possable possible
+possably possibly
+posseses possesses
+possesing possessing
+possesion possession
+possessess possesses
+possibile possible
+possibilty possibility
+possiblility possibility
+possiblilty possibility
+possiblities possibilities
+possiblity possibility
+possition position
+Postdam Potsdam
+posthomous posthumous
+postion position
+postive positive
+potatos potatoes
+portait portrait
+potrait portrait
+potrayed portrayed
+poulations populations
+poverful powerful
+poweful powerful
+powerfull powerful
+practial practical
+practially practically
+practicaly practically
+practicioner practitioner
+practicioners practitioners
+practicly practically
+practioner practitioner
+practioners practitioners
+prairy prairie
+prarie prairie
+praries prairies
+pratice practice
+preample preamble
+precedessor predecessor
+preceed precede
+preceeded preceded
+preceeding preceding
+preceeds precedes
+precentage percentage
+precice precise
+precisly precisely
+precurser precursor
+predecesors predecessors
+predicatble predictable
+predicitons predictions
+predomiantly predominately
+prefered preferred
+prefering preferring
+preferrably preferably
+pregancies pregnancies
+preiod period
+preliferation proliferation
+premeire premiere
+premeired premiered
+premillenial premillennial
+preminence preeminence
+premission permission
+Premonasterians Premonstratensians
+preocupation preoccupation
+prepair prepare
+prepartion preparation
+prepatory preparatory
+preperation preparation
+preperations preparations
+preriod period
+presedential presidential
+presense presence
+presidenital presidential
+presidental presidential
+presitgious prestigious
+prespective perspective
+prestigeous prestigious
+prestigous prestigious
+presumabely presumably
+presumibly presumably
+pretection protection
+prevelant prevalent
+preverse perverse
+previvous previous
+pricipal principal
+priciple principle
+priestood priesthood
+primarly primarily
+primative primitive
+primatively primitively
+primatives primitives
+primordal primordial
+priveledges privileges
+privelege privilege
+priveleged privileged
+priveleges privileges
+privelige privilege
+priveliged privileged
+priveliges privileges
+privelleges privileges
+privilage privilege
+priviledge privilege
+priviledges privileges
+privledge privilege
+privte private
+probabilaty probability
+probablistic probabilistic
+probablly probably
+probalibity probability
+probaly probably
+probelm problem
+proccess process
+proccessing processing
+procede proceed, precede
+proceded proceeded, preceded
+procedes proceeds, precedes
+procedger procedure
+proceding proceeding, preceding
+procedings proceedings
+proceedure procedure
+proces process
+processer processor
+proclaimation proclamation
+proclamed proclaimed
+proclaming proclaiming
+proclomation proclamation
+profesion profusion, profession
+profesor professor
+professer professor
+proffesed professed
+proffesion profession
+proffesional professional
+proffesor professor
+profilic prolific
+progessed progressed
+programable programmable
+progrom pogrom, program
+progroms pogroms, programs
+prohabition prohibition
+prologomena prolegomena
+prominance prominence
+prominant prominent
+prominantly prominently
+prominately prominently, predominately
+promiscous promiscuous
+promotted promoted
+pronomial pronominal
+pronouced pronounced
+pronounched pronounced
+pronounciation pronunciation
+proove prove
+prooved proved
+prophacy prophecy
+propietary proprietary
+propmted prompted
+propoganda propaganda
+propogate propagate
+propogates propagates
+propogation propagation
+propostion proposition
+propotions proportions
+propper proper
+propperly properly
+proprietory proprietary
+proseletyzing proselytizing
+protaganist protagonist
+protaganists protagonists
+protocal protocol
+protoganist protagonist
+protrayed portrayed
+protruberance protuberance
+protruberances protuberances
+prouncements pronouncements
+provacative provocative
+provded provided
+provicial provincial
+provinicial provincial
+provisonal provisional
+provisiosn provision
+proximty proximity
+pseudononymous pseudonymous
+pseudonyn pseudonym
+psuedo pseudo
+psycology psychology
+psyhic psychic
+publicaly publicly
+puchasing purchasing
+Pucini Puccini
+Puertorrican Puerto Rican
+Puertorricans Puerto Ricans
+pumkin pumpkin
+puritannical puritanical
+purposedly purposely
+purpotedly purportedly
+pursuade persuade
+pursuaded persuaded
+pursuades persuades
+pususading persuading
+puting putting
+pwoer power
+pyscic psychic
+qtuie quite, quiet
+quantaty quantity
+quantitiy quantity
+quarantaine quarantine
+Queenland Queensland
+questonable questionable
+quicklyu quickly
+quinessential quintessential
+quitted quit
+quizes quizzes
+qutie quite, quiet
+rabinnical rabbinical
+racaus raucous
+radiactive radioactive
+radify ratify
+raelly really
+rarified rarefied
+reaccurring recurring
+reacing reaching
+reacll recall
+readmition readmission
+realitvely relatively
+realsitic realistic
+realtions relations
+realy really
+realyl really
+reasearch research
+rebiulding rebuilding
+rebllions rebellions
+rebounce rebound
+reccomend recommend
+reccomendations recommendations
+reccomended recommended
+reccomending recommending
+reccommend recommend
+reccommended recommended
+reccommending recommending
+reccuring recurring
+receeded receded
+receeding receding
+receivedfrom received from
+recepient recipient
+recepients recipients
+receving receiving
+rechargable rechargeable
+reched reached
+recide reside
+recided resided
+recident resident
+recidents residents
+reciding residing
+reciepents recipients
+reciept receipt
+recieve receive
+recieved received
+reciever receiver
+recievers receivers
+recieves receives
+recieving receiving
+recipiant recipient
+recipiants recipients
+recived received
+recivership receivership
+recogise recognise
+recogize recognize
+recomend recommend
+recomended recommended
+recomending recommending
+recomends recommends
+recommedations recommendations
+reconaissance reconnaissance
+reconcilation reconciliation
+reconized recognized
+reconnaissence reconnaissance
+recontructed reconstructed
+recordproducer record producer
+recquired required
+recrational recreational
+recrod record
+recuiting recruiting
+recuring recurring
+recurrance recurrence
+rediculous ridiculous
+reedeming redeeming
+reenforced reinforced
+refect reflect
+refedendum referendum
+referal referral
+refered referred
+referiang referring
+refering referring
+refernces references
+referrence reference
+referrs refers
+reffered referred
+refference reference
+refrence reference
+refrences references
+refrers refers
+refridgeration refrigeration
+refridgerator refrigerator
+refromist reformist
+refusla refusal
+regardes regards
+regluar regular
+reguarly regularly
+regulaion regulation
+regulaotrs regulators
+regularily regularly
+rehersal rehearsal
+reicarnation reincarnation
+reigining reigning
+reknown renown
+reknowned renowned
+rela real
+relaly really
+relatiopnship relationship
+relativly relatively
+relected reelected
+releive relieve
+releived relieved
+releiver reliever
+releses releases
+relevence relevance
+relevent relevant
+reliablity reliability
+relient reliant
+religeous religious
+religous religious
+religously religiously
+relinqushment relinquishment
+relitavely relatively
+relized realised, realized
+relpacement replacement
+remaing remaining
+remeber remember
+rememberable memorable
+rememberance remembrance
+remembrence remembrance
+remenant remnant
+remenicent reminiscent
+reminent remnant
+reminescent reminiscent
+reminscent reminiscent
+reminsicent reminiscent
+rendevous rendezvous
+rendezous rendezvous
+renedered rende
+renewl renewal
+rentors renters
+reoccurrence recurrence
+reorganision reorganisation
+repatition repetition, repartition
+repentence repentance
+repentent repentant
+repeteadly repeatedly
+repetion repetition
+repid rapid
+reponse response
+reponsible responsible
+reportadly reportedly
+represantative representative
+representive representative
+representives representatives
+reproducable reproducible
+reprtoire repertoire
+repsectively respectively
+reptition repetition
+requirment requirement
+requred required
+resaurant restaurant
+resembelance resemblance
+resembes resembles
+resemblence resemblance
+resevoir reservoir
+resignement resignment
+resistable resistible
+resistence resistance
+resistent resistant
+respectivly respectively
+responce response
+responibilities responsibilities
+responisble responsible
+responnsibilty responsibility
+responsability responsibility
+responsibile responsible
+responsibilites responsibilities
+responsiblity responsibility
+ressemblance resemblance
+ressemble resemble
+ressembled resembled
+ressemblence resemblance
+ressembling resembling
+resssurecting resurrecting
+ressurect resurrect
+ressurected resurrected
+ressurection resurrection
+ressurrection resurrection
+restaraunt restaurant
+restaraunteur restaurateur
+restaraunteurs restaurateurs
+restaraunts restaurants
+restauranteurs restaurateurs
+restauration restoration
+restauraunt restaurant
+resteraunt restaurant
+resteraunts restaurants
+resticted restricted
+restraunt restraint, restaurant
+resturant restaurant
+resturaunt restaurant
+resurecting resurrecting
+retalitated retaliated
+retalitation retaliation
+retreive retrieve
+returnd returned
+revaluated reevaluated
+reveral reversal
+reversable reversible
+revolutionar revolutionary
+rewitten rewritten
+rewriet rewrite
+rhymme rhyme
+rhythem rhythm
+rhythim rhythm
+rhytmic rhythmic
+rigeur rigueur, rigour, rigor
+rigourous rigorous
+rininging ringing
+rised rose
+Rockerfeller Rockefeller
+rococco rococo
+rocord record
+roomate roommate
+rougly roughly
+rucuperate recuperate
+rudimentatry rudimentary
+rulle rule
+runing running
+runnung running
+russina Russian
+Russion Russian
+rwite write
+rythem rhythm
+rythim rhythm
+rythm rhythm
+rythmic rhythmic
+rythyms rhythms
+sacrafice sacrifice
+sacreligious sacrilegious
+sacrifical sacrificial
+saftey safety
+safty safety
+salery salary
+sanctionning sanctioning
+sandwhich sandwich
+Sanhedrim Sanhedrin
+santioned sanctioned
+sargant sergeant
+sargeant sergeant
+sasy says, sassy
+satelite satellite
+satelites satellites
+Saterday Saturday
+Saterdays Saturdays
+satisfactority satisfactorily
+satric satiric
+satrical satirical
+satrically satirically
+sattelite satellite
+sattelites satellites
+saught sought
+saveing saving
+saxaphone saxophone
+scaleable scalable
+scandanavia Scandinavia
+scaricity scarcity
+scavanged scavenged
+schedual schedule
+scholarhip scholarship
+scholarstic scholastic, scholarly
+scientfic scientific
+scientifc scientific
+scientis scientist
+scince science
+scinece science
+scirpt script
+scoll scroll
+screenwrighter screenwriter
+scrutinity scrutiny
+scuptures sculptures
+seach search
+seached searched
+seaches searches
+secceeded seceded, succeeded
+seceed succeed, secede
+seceeded succeeded, seceded
+secratary secretary
+secretery secretary
+sedereal sidereal
+seeked sought
+segementation segmentation
+seguoys segues
+seige siege
+seing seeing
+seinor senior
+seldomly seldom
+senarios scenarios
+sence sense
+senstive sensitive
+sensure censure
+seperate separate
+seperated separated
+seperately separately
+seperates separates
+seperating separating
+seperation separation
+seperatism separatism
+seperatist separatist
+sepina subpoena
+sepulchure sepulchre, sepulcher
+sepulcre sepulchre, sepulcher
+sergent sergeant
+settelement settlement
+settlment settlement
+severeal several
+severley severely
+severly severely
+sevice service
+shaddow shadow
+shamen shaman, shamans
+sheat sheath, sheet, cheat
+sheild shield
+sherif sheriff
+shineing shining
+shiped shipped
+shiping shipping
+shopkeeepers shopkeepers
+shorly shortly
+shortwhile short while
+shoudl should
+shoudln should, shouldn't
+shouldnt shouldn't
+shreak shriek
+shrinked shrunk
+sicne since
+sideral sidereal
+sieze seize, size
+siezed seized, sized
+siezing seizing, sizing
+siezure seizure
+siezures seizures
+siginificant significant
+signficant significant
+signficiant significant
+signfies signifies
+signifantly significantly
+significently significantly
+signifigant significant
+signifigantly significantly
+signitories signatories
+signitory signatory
+similarily similarly
+similiar similar
+similiarity similarity
+similiarly similarly
+simmilar similar
+simpley simply
+simplier simpler
+simultanous simultaneous
+simultanously simultaneously
+sincerley sincerely
+singsog singsong
+sinse sines, since
+Sionist Zionist
+Sionists Zionists
+Sixtin Sistine
+Skagerak Skagerrak
+skateing skating
+slaugterhouses slaughterhouses
+slowy slowly
+smae same
+smealting smelting
+smoe some
+sneeks sneaks
+snese sneeze
+socalism socialism
+socities societies
+soem some
+sofware software
+sohw show
+soilders soldiers
+solatary solitary
+soley solely
+soliders soldiers
+soliliquy soliloquy
+soluable soluble
+somene someone
+somtimes sometimes
+somwhere somewhere
+sophicated sophisticated
+sorceror sorcerer
+sorrounding surrounding
+sotry story
+sotyr satyr, story
+soudn sound
+soudns sounds
+sould could, should, sold
+sountrack soundtrack
+sourth south
+sourthern southern
+souvenier souvenir
+souveniers souvenirs
+soveits soviets
+sovereignity sovereignty
+soverign sovereign
+soverignity sovereignty
+soverignty sovereignty
+spainish Spanish
+speach speech
+specfic specific
+speciallized specialised, specialized
+specif specific, specify
+specifiying specifying
+speciman specimen
+spectauclar spectacular
+spectaulars spectaculars
+spects aspects, expects
+spectum spectrum
+speices species
+spendour splendour
+spermatozoan spermatozoon
+spoace space
+sponser sponsor
+sponsered sponsored
+spontanous spontaneous
+sponzored sponsored
+spoonfulls spoonfuls
+sppeches speeches
+spreaded spread
+sprech speech
+spred spread
+spriritual spiritual
+spritual spiritual
+sqaure square
+stablility stability
+stainlees stainless
+staion station
+standars standards
+stange strange
+startegic strategic
+startegies strategies
+startegy strategy
+stateman statesman
+statememts statements
+statment statement
+steriods steroids
+sterotypes stereotypes
+stilus stylus
+stingent stringent
+stiring stirring
+stirrs stirs
+stlye style
+stong strong
+stopry story
+storeis stories
+storise stories
+stornegst strongest
+stoyr story
+stpo stop
+stradegies strategies
+stradegy strategy
+strat start, strata
+stratagically strategically
+streemlining streamlining
+stregth strength
+strenghen strengthen
+strenghened strengthened
+strenghening strengthening
+strenght strength
+strenghten strengthen
+strenghtened strengthened
+strenghtening strengthening
+strengtened strengthened
+strenous strenuous
+strictist strictest
+strikely strikingly
+strnad strand
+stroy story, destroy
+structual structural
+stubborness stubbornness
+stucture structure
+stuctured structured
+studdy study
+studing studying
+stuggling struggling
+sturcture structure
+subcatagories subcategories
+subcatagory subcategory
+subconsiously subconsciously
+subjudgation subjugation
+submachne submachine
+subpecies subspecies
+subsidary subsidiary
+subsiduary subsidiary
+subsquent subsequent
+subsquently subsequently
+substace substance
+substancial substantial
+substatial substantial
+substituded substituted
+substract subtract
+substracted subtracted
+substracting subtracting
+substraction subtraction
+substracts subtracts
+subtances substances
+subterranian subterranean
+suburburban suburban
+succceeded succeeded
+succcesses successes
+succedded succeeded
+succeded succeeded
+succeds succeeds
+succesful successful
+succesfully successfully
+succesfuly successfully
+succesion succession
+succesive successive
+successfull successful
+successully successfully
+succsess success
+succsessfull successful
+suceed succeed
+suceeded succeeded
+suceeding succeeding
+suceeds succeeds
+sucesful successful
+sucesfully successfully
+sucesfuly successfully
+sucesion succession
+sucess success
+sucesses successes
+sucessful successful
+sucessfull successful
+sucessfully successfully
+sucessfuly successfully
+sucession succession
+sucessive successive
+sucessor successor
+sucessot successor
+sucide suicide
+sucidial suicidal
+sufferage suffrage
+sufferred suffered
+sufferring suffering
+sufficent sufficient
+sufficently sufficiently
+sumary summary
+sunglases sunglasses
+suop soup
+superceeded superseded
+superintendant superintendent
+suphisticated sophisticated
+suplimented supplemented
+supose suppose
+suposed supposed
+suposedly supposedly
+suposes supposes
+suposing supposing
+supplamented supplemented
+suppliementing supplementing
+suppoed supposed
+supposingly supposedly
+suppy supply
+supress suppress
+supressed suppressed
+supresses suppresses
+supressing suppressing
+suprise surprise
+suprised surprised
+suprising surprising
+suprisingly surprisingly
+suprize surprise
+suprized surprised
+suprizing surprising
+suprizingly surprisingly
+surfce surface
+surley surly, surely
+suround surround
+surounded surrounded
+surounding surrounding
+suroundings surroundings
+surounds surrounds
+surplanted supplanted
+surpress suppress
+surpressed suppressed
+surprize surprise
+surprized surprised
+surprizing surprising
+surprizingly surprisingly
+surrended surrounded, surrendered
+surrepetitious surreptitious
+surrepetitiously surreptitiously
+surreptious surreptitious
+surreptiously surreptitiously
+surronded surrounded
+surrouded surrounded
+surrouding surrounding
+surrundering surrendering
+surveilence surveillance
+surveill surveil
+surveyer surveyor
+surviver survivor
+survivers survivors
+survivied survived
+suseptable susceptible
+suseptible susceptible
+suspention suspension
+swaer swear
+swaers swears
+swepth swept
+swiming swimming
+syas says
+symetrical symmetrical
+symetrically symmetrically
+symetry symmetry
+symettric symmetric
+symmetral symmetric
+symmetricaly symmetrically
+synagouge synagogue
+syncronization synchronization
+synonomous synonymous
+synonymns synonyms
+synphony symphony
+syphyllis syphilis
+sypmtoms symptoms
+syrap syrup
+sysmatically systematically
+sytem system
+sytle style
+tabacco tobacco
+tahn than
+taht that
+talekd talked
+targetted targeted
+targetting targeting
+tast taste
+tath that
+tattooes tattoos
+taxanomic taxonomic
+taxanomy taxonomy
+teached taught
+techician technician
+techicians technicians
+techiniques techniques
+technitian technician
+technnology technology
+technolgy technology
+teh the
+tehy they
+telelevision television
+televsion television
+telphony telephony
+temerature temperature
+temparate temperate
+temperarily temporarily
+temperment temperament
+tempertaure temperature
+temperture temperature
+temprary temporary
+tenacle tentacle
+tenacles tentacles
+tendacy tendency
+tendancies tendencies
+tendancy tendency
+tennisplayer tennis player
+tepmorarily temporarily
+terrestial terrestrial
+terriories territories
+terriory territory
+territorist terrorist
+territoy territory
+terroist terrorist
+testiclular testicular
+tghe the
+thast that, that's
+theather theater, theatre
+theese these
+theif thief
+theives thieves
+themselfs themselves
+themslves themselves
+ther there, their, the
+therafter thereafter
+therby thereby
+theri their
+thgat that
+thge the
+thier their
+thign thing
+thigns things
+thigsn things
+thikn think
+thikning thinking, thickening
+thikns thinks
+thiunk think
+thn then
+thna than
+thne then
+thnig thing
+thnigs things
+thoughout throughout
+threatend threatened
+threatning threatening
+threee three
+threshhold threshold
+thrid third
+throrough thorough
+throughly thoroughly
+throught thought, through, throughout
+througout throughout
+thru through
+thsi this
+thsoe those
+thta that
+thyat that
+tiem time, Tim
+tihkn think
+tihs this
+timne time
+tiome time, tome
+tje the
+tjhe the
+tjpanishad upanishad
+tkae take
+tkaes takes
+tkaing taking
+tlaking talking
+tobbaco tobacco
+todays today's
+todya today
+toghether together
+tolerence tolerance
+Tolkein Tolkien
+tomatos tomatoes
+tommorow tomorrow
+tommorrow tomorrow
+tongiht tonight
+toriodal toroidal
+tormenters tormentors
+torpeados torpedoes
+torpedos torpedoes
+tothe to the
+toubles troubles
+tounge tongue
+tourch torch, touch
+towords towards
+towrad toward
+tradionally traditionally
+traditionaly traditionally
+traditionnal traditional
+traditition tradition
+tradtionally traditionally
+trafficed trafficked
+trafficing trafficking
+trafic traffic
+trancendent transcendent
+trancending transcending
+tranform transform
+tranformed transformed
+transcendance transcendence
+transcendant transcendent
+transcendentational transcendental
+transcripting transcribing, transcription
+transending transcending
+transesxuals transsexuals
+transfered transferred
+transfering transferring
+transformaton transformation
+transistion transition
+translater translator
+translaters translators
+transmissable transmissible
+transporation transportation
+tremelo tremolo
+tremelos tremolos
+triguered triggered
+triology trilogy
+troling trolling
+troup troupe
+troups troupes, troops
+truely truly
+trustworthyness trustworthiness
+turnk turnkey, trunk
+Tuscon Tucson
+tust trust
+twelth twelfth
+twon town
+twpo two
+tyhat that
+tyhe they
+typcial typical
+typicaly typically
+tyranies tyrannies
+tyrany tyranny
+tyrranies tyrannies
+tyrrany tyranny
+ubiquitious ubiquitous
+uise use
+Ukranian Ukrainian
+ultimely ultimately
+unacompanied unaccompanied
+unahppy unhappy
+unanymous unanimous
+unathorised unauthorised
+unavailible unavailable
+unballance unbalance
+unbeleivable unbelievable
+uncertainity uncertainty
+unchallengable unchallengeable
+unchangable unchangeable
+uncompetive uncompetitive
+unconcious unconscious
+unconciousness unconsciousness
+unconfortability discomfort
+uncontitutional unconstitutional
+unconvential unconventional
+undecideable undecidable
+understoon understood
+undesireable undesirable
+undetecable undetectable
+undoubtely undoubtedly
+undreground underground
+uneccesary unnecessary
+unecessary unnecessary
+unequalities inequalities
+unforetunately unfortunately
+unforgetable unforgettable
+unforgiveable unforgivable
+unfortunatley unfortunately
+unfortunatly unfortunately
+unfourtunately unfortunately
+unihabited uninhabited
+unilateraly unilaterally
+unilatreal unilateral
+unilatreally unilaterally
+uninterruped uninterrupted
+uninterupted uninterrupted
+UnitesStates UnitedStates
+univeral universal
+univeristies universities
+univeristy university
+universtiy university
+univesities universities
+univesity university
+unkown unknown
+unlikey unlikely
+unmanouverable unmaneuverable, unmanoeuvrable
+unmistakeably unmistakably
+unneccesarily unnecessarily
+unneccesary unnecessary
+unneccessarily unnecessarily
+unneccessary unnecessary
+unnecesarily unnecessarily
+unnecesary unnecessary
+unoffical unofficial
+unoperational nonoperational
+unoticeable unnoticeable
+unplease displease
+unplesant unpleasant
+unprecendented unprecedented
+unprecidented unprecedented
+unrepentent unrepentant
+unrepetant unrepentant
+unrepetent unrepentant
+unsed used, unused, unsaid
+unsubstanciated unsubstantiated
+unsuccesful unsuccessful
+unsuccesfully unsuccessfully
+unsuccessfull unsuccessful
+unsucesful unsuccessful
+unsucesfuly unsuccessfully
+unsucessful unsuccessful
+unsucessfull unsuccessful
+unsucessfully unsuccessfully
+unsuprised unsurprised
+unsuprising unsurprising
+unsuprisingly unsurprisingly
+unsuprized unsurprised
+unsuprizing unsurprising
+unsuprizingly unsurprisingly
+unsurprized unsurprised
+unsurprizing unsurprising
+unsurprizingly unsurprisingly
+untill until
+untranslateable untranslatable
+unuseable unusable
+unusuable unusable
+unviersity university
+unwarrented unwarranted
+unweildly unwieldy
+unwieldly unwieldy
+upcomming upcoming
+upgradded upgraded
+usally usually
+useage usage
+usefull useful
+usefuly usefully
+useing using
+usualy usually
+ususally usually
+vaccum vacuum
+vaccume vacuum
+vacinity vicinity
+vaguaries vagaries
+vaieties varieties
+vailidty validity
+valetta valletta
+valuble valuable
+valueable valuable
+varations variations
+varient variant
+variey variety
+varing varying
+varities varieties
+varity variety
+vasall vassal
+vasalls vassals
+vegatarian vegetarian
+vegitable vegetable
+vegitables vegetables
+vegtable vegetable
+vehicule vehicle
+vell well
+venemous venomous
+vengance vengeance
+vengence vengeance
+verfication verification
+verison version
+verisons versions
+vermillion vermilion
+versitilaty versatility
+versitlity versatility
+vetween between
+veyr very
+vigeur vigueur, vigour, vigor
+vigilence vigilance
+vigourous vigorous
+villian villain
+villification vilification
+villify vilify
+villin villi, villain, villein
+vincinity vicinity
+violentce violence
+virutal virtual
+virtualy virtually
+virutally virtually
+visable visible
+visably visibly
+visting visiting
+vistors visitors
+vitories victories
+volcanoe volcano
+voleyball volleyball
+volontary voluntary
+volonteer volunteer
+volonteered volunteered
+volonteering volunteering
+volonteers volunteers
+volounteer volunteer
+volounteered volunteered
+volounteering volunteering
+volounteers volunteers
+vreity variety
+vrey very
+vriety variety
+vulnerablility vulnerability
+vyer very
+vyre very
+waht what
+wanna want to
+warantee warranty
+wardobe wardrobe
+warrent warrant
+warrriors warriors
+wasnt wasn't
+wass was
+watn want
+wayword wayward
+weaponary weaponry
+weas was
+wehn when
+weild wield, wild
+weilded wielded
+wendsay Wednesday
+wensday Wednesday
+wereabouts whereabouts
+whant want
+whants wants
+whcih which
+wheras whereas
+wherease whereas
+whereever wherever
+whic which
+whihc which
+whith with
+whlch which
+whn when
+wholey wholly
+wholy wholly, holy
+whta what
+whther whether
+wich which, witch
+widesread widespread
+wief wife
+wierd weird
+wiew view
+wih with
+wiht with
+wille will
+willingless willingness
+wirting writing
+withdrawl withdrawal, withdraw
+witheld withheld
+withing within
+withold withhold
+witht with
+witn with
+wiull will
+wnat want
+wnated wanted
+wnats wants
+wohle whole
+wokr work
+wokring working
+wonderfull wonderful
+workststion workstation
+worls world
+wordlwide worldwide
+worshipper worshiper
+worshipping worshiping
+worstened worsened
+woudl would
+wresters wrestlers
+wriet write
+writen written
+wroet wrote
+wrok work
+wroking working
+ws was
+wtih with
+wupport support
+xenophoby xenophobia
+yaching yachting
+yatch yacht
+yeasr years
+yeild yield
+yeilding yielding
+Yementite Yemenite, Yemeni
+yearm year
+yera year
+yeras years
+yersa years
+youseff yousef
+youself yourself
+ytou you
+yuo you
+joo you
+zeebra zebra
+
+[[Category:Wikipedia tools]]
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/Makefile.am b/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/Makefile.am
new file mode 100644
index 000000000..b8be6c5b6
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/Makefile.am
@@ -0,0 +1,6 @@
+EXTRA_DIST= \
+List_of_common_misspellings.txt \
+Makefile.orig \
+prepare \
+README \
+test
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/Makefile.in b/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/Makefile.in
new file mode 100644
index 000000000..11d332705
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/Makefile.in
@@ -0,0 +1,435 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+subdir = tests/suggestiontest
+DIST_COMMON = README $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/codeset.m4 \
+ $(top_srcdir)/m4/gettext.m4 $(top_srcdir)/m4/glibc2.m4 \
+ $(top_srcdir)/m4/glibc21.m4 $(top_srcdir)/m4/iconv.m4 \
+ $(top_srcdir)/m4/intdiv0.m4 $(top_srcdir)/m4/intl.m4 \
+ $(top_srcdir)/m4/intlmacosx.m4 $(top_srcdir)/m4/intmax.m4 \
+ $(top_srcdir)/m4/inttypes-pri.m4 \
+ $(top_srcdir)/m4/inttypes_h.m4 $(top_srcdir)/m4/lcmessage.m4 \
+ $(top_srcdir)/m4/lib-ld.m4 $(top_srcdir)/m4/lib-link.m4 \
+ $(top_srcdir)/m4/lib-prefix.m4 $(top_srcdir)/m4/libtool.m4 \
+ $(top_srcdir)/m4/lock.m4 $(top_srcdir)/m4/longlong.m4 \
+ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
+ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
+ $(top_srcdir)/m4/nls.m4 $(top_srcdir)/m4/po.m4 \
+ $(top_srcdir)/m4/printf-posix.m4 $(top_srcdir)/m4/progtest.m4 \
+ $(top_srcdir)/m4/size_max.m4 $(top_srcdir)/m4/stdint_h.m4 \
+ $(top_srcdir)/m4/uintmax_t.m4 $(top_srcdir)/m4/visibility.m4 \
+ $(top_srcdir)/m4/wchar_t.m4 $(top_srcdir)/m4/wint_t.m4 \
+ $(top_srcdir)/m4/xsize.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+SOURCES =
+DIST_SOURCES =
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+ALLOCA = @ALLOCA@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BUILD_INCLUDED_LIBINTL = @BUILD_INCLUDED_LIBINTL@
+CATOBJEXT = @CATOBJEXT@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CFLAG_VISIBILITY = @CFLAG_VISIBILITY@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CURSESLIB = @CURSESLIB@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DATADIRNAME = @DATADIRNAME@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GENCAT = @GENCAT@
+GETTEXT_MACRO_VERSION = @GETTEXT_MACRO_VERSION@
+GLIBC2 = @GLIBC2@
+GLIBC21 = @GLIBC21@
+GMSGFMT = @GMSGFMT@
+GMSGFMT_015 = @GMSGFMT_015@
+GREP = @GREP@
+HAVE_ASPRINTF = @HAVE_ASPRINTF@
+HAVE_POSIX_PRINTF = @HAVE_POSIX_PRINTF@
+HAVE_SNPRINTF = @HAVE_SNPRINTF@
+HAVE_VISIBILITY = @HAVE_VISIBILITY@
+HAVE_WPRINTF = @HAVE_WPRINTF@
+HUNSPELL_VERSION_MAJOR = @HUNSPELL_VERSION_MAJOR@
+HUNSPELL_VERSION_MINOR = @HUNSPELL_VERSION_MINOR@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+INSTOBJEXT = @INSTOBJEXT@
+INTLBISON = @INTLBISON@
+INTLLIBS = @INTLLIBS@
+INTLOBJS = @INTLOBJS@
+INTL_LIBTOOL_SUFFIX_PREFIX = @INTL_LIBTOOL_SUFFIX_PREFIX@
+INTL_MACOSX_LIBS = @INTL_MACOSX_LIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBICONV = @LIBICONV@
+LIBINTL = @LIBINTL@
+LIBMULTITHREAD = @LIBMULTITHREAD@
+LIBOBJS = @LIBOBJS@
+LIBPTH = @LIBPTH@
+LIBPTH_PREFIX = @LIBPTH_PREFIX@
+LIBS = @LIBS@
+LIBTHREAD = @LIBTHREAD@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBC = @LTLIBC@
+LTLIBICONV = @LTLIBICONV@
+LTLIBINTL = @LTLIBINTL@
+LTLIBMULTITHREAD = @LTLIBMULTITHREAD@
+LTLIBOBJS = @LTLIBOBJS@
+LTLIBPTH = @LTLIBPTH@
+LTLIBTHREAD = @LTLIBTHREAD@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+MSGFMT = @MSGFMT@
+MSGFMT_015 = @MSGFMT_015@
+MSGMERGE = @MSGMERGE@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+POSUB = @POSUB@
+PRI_MACROS_BROKEN = @PRI_MACROS_BROKEN@
+RANLIB = @RANLIB@
+READLINELIB = @READLINELIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+USE_INCLUDED_LIBINTL = @USE_INCLUDED_LIBINTL@
+USE_NLS = @USE_NLS@
+VERSION = @VERSION@
+WINDRES = @WINDRES@
+WOE32 = @WOE32@
+WOE32DLL = @WOE32DLL@
+XFAILED = @XFAILED@
+XGETTEXT = @XGETTEXT@
+XGETTEXT_015 = @XGETTEXT_015@
+XGETTEXT_EXTRA_OPTIONS = @XGETTEXT_EXTRA_OPTIONS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+lt_ECHO = @lt_ECHO@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+EXTRA_DIST = \
+List_of_common_misspellings.txt \
+Makefile.orig \
+prepare \
+README \
+test
+
+all: all-am
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu tests/suggestiontest/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --gnu tests/suggestiontest/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+tags: TAGS
+TAGS:
+
+ctags: CTAGS
+CTAGS:
+
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+ -rm -f Makefile
+distclean-am: clean-am distclean-generic
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: install-am install-strip
+
+.PHONY: all all-am check check-am clean clean-generic clean-libtool \
+ distclean distclean-generic distclean-libtool distdir dvi \
+ dvi-am html html-am info info-am install install-am \
+ install-data install-data-am install-dvi install-dvi-am \
+ install-exec install-exec-am install-html install-html-am \
+ install-info install-info-am install-man install-pdf \
+ install-pdf-am install-ps install-ps-am install-strip \
+ installcheck installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-generic \
+ mostlyclean-libtool pdf pdf-am ps ps-am uninstall uninstall-am
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/README b/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/README
new file mode 100644
index 000000000..c50e05cea
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/README
@@ -0,0 +1,16 @@
+source of text data: Wikipedia
+http://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines
+
+For testing Hunspell you need the extended en_US dictionary with phonetic table:
+http://hunspell.sourceforge.net/en_US.zip
+
+test:
+make -f Makefile.orig
+
+test only with Hunspell:
+
+make -f Makefile.orig single
+
+test with different input file and dictionaries:
+
+INPUT=dutchlist.txt HUNSPELL=nl_NL ASPELL=nl make -f Makefile.orig
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/prepare b/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/prepare
new file mode 100644
index 000000000..a72d931b8
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/prepare
@@ -0,0 +1,40 @@
+#!/bin/bash
+# Check common misspellings
+# input file format:
+# word->word1, ...
+# Source: http://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines
+
+hunspell=../../src/tools/hunspell
+hlang=${HUNSPELL:-en_US}
+alang=${ASPELL:-en_US}
+input=${INPUT:-List_of_common_misspellings.txt}
+
+# remove bad words recognised by Hunspell as good
+cat $input | sed 's/[-]>/ /' | $hunspell -d $hlang -1 -L |
+
+# remove items with dash for Aspell
+grep '^[^-]* ' |
+
+# remove spaces from end of lines
+sed 's/ *$//' >$input.1
+
+# remove bad words recognised by Aspell as good
+cut -f 1 -d ' ' $input.1 | aspell -l $alang --list |
+awk 'FILENAME=="-"{a[$1]=1;next}a[$1]{print$0}' - $input.1 |
+
+# change commas with tabs
+sed 's/, */ /g' >$input.2
+
+# remove lines with unrecognised suggestions (except suggestion with spaces)
+cut -d ' ' -f 2- $input.2 | tr "\t" "\n" | grep -v ' ' >x.1
+cat x.1 | $hunspell -l -d $hlang >x.2
+cat x.1 | aspell -l $alang --list >>x.2
+cat x.2 | awk 'BEGIN{FS="\t"}
+FILENAME=="-"{a[$1]=1;next}a[$2]!=1 && a[$3]!=1{print $0}' - $input.2 >$input.3
+
+cut -f 1 -d ' ' $input.3 | aspell -l $alang -a | grep -v ^$ | sed -n '2,$p' |
+sed 's/^.*: //;s/, / /g' >$input.4
+
+cat $input.3 | $hunspell -d $hlang -a -1 | grep -v ^$ | sed -n '2,$p' |
+sed 's/^.*: //;s/, / /g' >$input.5
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/test b/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/test
new file mode 100644
index 000000000..8e6c1cc1f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/test
@@ -0,0 +1,25 @@
+#!/bin/bash
+# Check common misspellings
+# input file format:
+# word->word1, ...
+# Source: http://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines
+
+input=${INPUT:-List_of_common_misspellings.txt}
+
+function check() {
+cat $1 | awk 'BEGIN{maxord=0;FS="\t"}FILENAME=="-"{for (i=1; i<=NF; i++){a[NR,$(i)]=i};max=NR;next}{x1=a[NR-max,$2];x2=a[NR-max,$3];sug++;if($3)sug++;if (!x1&&!x2){mis2++;misrow=misrow"\n"$0};if(!x1||($3 && !x2))mis++;ord+=x1+x2;}END{
+print "Missed rows", misrow;
+print "======================================="
+print maxord, "max. suggestion for a word";
+print max, "input rows";
+print mis2, "missing rows";
+print sug, "expected suggestions";
+print mis, "missing suggestions";
+print ord/(sug-mis), "average ranking";
+}' - $2
+}
+
+test -f $input.4 && check $input.4 $input.3 >result.aspell
+check $input.5 $input.3 >result.hunspell
+test -f result.aspell && tail -6 result.aspell
+tail -6 result.hunspell
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/sugutf.aff b/extensions/spellcheck/hunspell/tests/unit/data/sugutf.aff
new file mode 100644
index 000000000..60294d24c
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/sugutf.aff
@@ -0,0 +1,15 @@
+# new suggestion methods of Hunspell 1.5:
+# capitalization: nasa -> NASA
+# long swap: permenant -> permanent
+# long mov: Ghandi -> Gandhi
+# double two characters: vacacation -> vacation
+# space with REP: "alot" -> "a lot" ("a lot" need to be in the dic file.)
+
+SET UTF-8
+# switch off ngram suggestion for testing
+MAXNGRAMSUGS 0
+REP 1
+REP alot a_lot
+KEY qwertzuiop|asdfghjkl|yxcvbnm|aq
+WORDCHARS .
+FORBIDDENWORD ?
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/sugutf.dic b/extensions/spellcheck/hunspell/tests/unit/data/sugutf.dic
new file mode 100644
index 000000000..cf7c9aadb
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/sugutf.dic
@@ -0,0 +1,11 @@
+10
+NASA
+Gandhi
+grateful
+permanent
+vacation
+a
+lot
+have
+which
+McDonald
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/sugutf.sug b/extensions/spellcheck/hunspell/tests/unit/data/sugutf.sug
new file mode 100644
index 000000000..e277bdb77
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/sugutf.sug
@@ -0,0 +1,12 @@
+NASA
+Gandhi
+grateful
+permanent
+vacation
+a lot, lot
+permanent. Vacation
+have
+which
+Gandhi
+McDonald
+permanent
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/sugutf.test b/extensions/spellcheck/hunspell/tests/unit/data/sugutf.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/sugutf.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/sugutf.wrong b/extensions/spellcheck/hunspell/tests/unit/data/sugutf.wrong
new file mode 100644
index 000000000..4d184d5a6
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/sugutf.wrong
@@ -0,0 +1,12 @@
+nasa
+Ghandi
+greatful
+permenant
+vacacation
+alot
+permanent.Vacation
+ahev
+hwihc
+GAndhi
+Mcdonald
+permqnent
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/test.sh b/extensions/spellcheck/hunspell/tests/unit/data/test.sh
new file mode 100644
index 000000000..c89ca9bf7
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/test.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+export LC_ALL="C"
+
+function check_valgrind_log () {
+if [ "$VALGRIND" != "" ]; then
+ if [ -f $TEMPDIR/test.pid* ]; then
+ log=`ls $TEMPDIR/test.pid*`
+ if ! grep -q 'ERROR SUMMARY: 0 error' $log; then
+ echo "Fail in $NAME $1 checking detected by Valgrind"
+ echo "$log Valgrind log file moved to $TEMPDIR/badlogs"
+ mv $log $TEMPDIR/badlogs
+ exit 1
+ fi
+ if grep -q 'LEAK SUMMARY' $log; then
+ echo "Memory leak in $NAME $1 checking detected by Valgrind"
+ echo "$log Valgrind log file moved to $TEMPDIR/badlogs"
+ mv $log $TEMPDIR/badlogs
+ exit 1
+ fi
+ rm -f $log
+ fi
+fi
+}
+
+TESTDIR=.
+TEMPDIR=$TESTDIR/testSubDir
+NAME="$1"
+shift
+
+if [ ! -d $TEMPDIR ]; then
+ mkdir $TEMPDIR
+fi
+
+shopt -s expand_aliases
+
+alias hunspell='../libtool --mode=execute -dlopen ../src/hunspell/.libs/libhunspell*.la ../src/tools/hunspell'
+alias analyze='../libtool --mode=execute -dlopen ../src/hunspell/.libs/libhunspell*.la ../src/tools/analyze'
+
+if [ "$VALGRIND" != "" ]; then
+ rm -f $TEMPDIR/test.pid*
+ if [ ! -d $TEMPDIR/badlogs ]; then
+ mkdir $TEMPDIR/badlogs
+ fi
+
+ alias hunspell='../libtool --mode=execute -dlopen ../src/hunspell/.libs/libhunspell*.la valgrind --tool=$VALGRIND --leak-check=yes --show-reachable=yes --log-file=$TEMPDIR/test.pid ../src/tools/hunspell'
+ alias analyze='../libtool --mode=execute -dlopen ../src/hunspell/.libs/libhunspell*.la valgrind --tool=$VALGRIND --leak-check=yes --show-reachable=yes --log-file=$TEMPDIR/test.pid ../src/tools/analyze'
+fi
+
+# Tests good words
+if test -f $TESTDIR/$NAME.good; then
+ hunspell -l $* -d $TESTDIR/$NAME <$TESTDIR/$NAME.good >$TEMPDIR/$NAME.good
+ if test -s $TEMPDIR/$NAME.good; then
+ echo "============================================="
+ echo "Fail in $NAME.good. Good words recognised as wrong:"
+ cat $TEMPDIR/$NAME.good
+ rm -f $TEMPDIR/$NAME.good
+ exit 1
+ fi
+ rm -f $TEMPDIR/$NAME.good
+fi
+
+check_valgrind_log "good words"
+
+# Tests bad words
+if test -f $TESTDIR/$NAME.wrong; then
+ hunspell -l $* -d $TESTDIR/$NAME <$TESTDIR/$NAME.wrong >$TEMPDIR/$NAME.wrong
+ tr -d ' ' <$TESTDIR/$NAME.wrong >$TEMPDIR/$NAME.wrong.detab
+ if ! cmp $TEMPDIR/$NAME.wrong $TEMPDIR/$NAME.wrong.detab >/dev/null; then
+ echo "============================================="
+ echo "Fail in $NAME.wrong. Bad words recognised as good:"
+ tr -d ' ' <$TESTDIR/$NAME.wrong >$TEMPDIR/$NAME.wrong.detab
+ diff $TEMPDIR/$NAME.wrong.detab $TEMPDIR/$NAME.wrong | grep '^<' | sed 's/^..//'
+ rm -f $TEMPDIR/$NAME.wrong $TEMPDIR/$NAME.wrong.detab
+ exit 1
+ fi
+ rm -f $TEMPDIR/$NAME.wrong $TEMPDIR/$NAME.wrong.detab
+fi
+
+check_valgrind_log "bad words"
+
+# Tests morphological analysis
+if test -f $TESTDIR/$NAME.morph; then
+ sed 's/ $//' $TESTDIR/$NAME.good >$TEMPDIR/$NAME.good
+ analyze $TESTDIR/$NAME.aff $TESTDIR/$NAME.dic $TEMPDIR/$NAME.good >$TEMPDIR/$NAME.morph
+ if ! cmp $TEMPDIR/$NAME.morph $TESTDIR/$NAME.morph >/dev/null; then
+ echo "============================================="
+ echo "Fail in $NAME.morph. Bad analysis?"
+ diff $TESTDIR/$NAME.morph $TEMPDIR/$NAME.morph | grep '^<' | sed 's/^..//'
+ rm -f $TEMPDIR/$NAME.morph
+ exit 1
+ fi
+ rm -f $TEMPDIR/$NAME.{morph,good}
+fi
+
+check_valgrind_log "morphological analysis"
+
+# Tests suggestions
+if test -f $TESTDIR/$NAME.sug; then
+ hunspell $* -a -d $TESTDIR/$NAME <$TESTDIR/$NAME.wrong | grep '^&' | \
+ sed 's/^[^:]*: //' >$TEMPDIR/$NAME.sug
+ if ! cmp $TEMPDIR/$NAME.sug $TESTDIR/$NAME.sug >/dev/null; then
+ echo "============================================="
+ echo "Fail in $NAME.sug. Bad suggestion?"
+ diff $TESTDIR/$NAME.sug $TEMPDIR/$NAME.sug
+ rm -f $TEMPDIR/$NAME.sug
+ exit 1
+ fi
+ rm -f $TEMPDIR/$NAME.sug
+fi
+
+check_valgrind_log "suggestion"
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom.aff b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom.aff
new file mode 100644
index 000000000..f56998b9f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom.aff
@@ -0,0 +1,3 @@
+SET UTF-8
+
+# removing byte order mark from affix file
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom.dic b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom.dic
new file mode 100644
index 000000000..8b10768e5
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom.dic
@@ -0,0 +1,2 @@
+1
+apéritif
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom.good b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom.good
new file mode 100644
index 000000000..c344eaf5a
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom.good
@@ -0,0 +1,2 @@
+apéritif
+APÉRITIF
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom.test b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom.test
new file mode 100644
index 000000000..1d25699aa
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i utf-8 -1
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom2.aff b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom2.aff
new file mode 100644
index 000000000..784935c84
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom2.aff
@@ -0,0 +1,3 @@
+SET UTF-8
+
+# removing byte order mark from dic file
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom2.dic b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom2.dic
new file mode 100644
index 000000000..b763179a0
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom2.dic
@@ -0,0 +1,2 @@
+1
+apéritif
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom2.good b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom2.good
new file mode 100644
index 000000000..c344eaf5a
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom2.good
@@ -0,0 +1,2 @@
+apéritif
+APÉRITIF
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom2.test b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom2.test
new file mode 100644
index 000000000..1d25699aa
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom2.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i utf-8 -1
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.aff b/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.aff
new file mode 100644
index 000000000..979e3c228
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.aff
@@ -0,0 +1 @@
+SET UTF-8
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.dic b/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.dic
new file mode 100644
index 000000000..4a040eeb0
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.dic
@@ -0,0 +1,5 @@
+4 # Old Persian numbers (1-4), source: Wikipedia
+𐏑
+𐏒
+𐏒𐏑
+𐏒𐏒
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.good b/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.good
new file mode 100644
index 000000000..9f989d339
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.good
@@ -0,0 +1,5 @@
+𐏑
+𐏒
+𐏒𐏑
+𐏒𐏒
+
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.sug b/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.sug
new file mode 100644
index 000000000..bfe2a539f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.sug
@@ -0,0 +1,2 @@
+𐏒𐏑, 𐏒𐏒
+𐏒𐏑, 𐏒𐏒
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.test b/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.test
new file mode 100644
index 000000000..1d25699aa
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i utf-8 -1
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.wrong b/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.wrong
new file mode 100644
index 000000000..d18dfa4c2
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.wrong
@@ -0,0 +1,2 @@
+𐏑𐏒𐏒
+𐏑𐏒𐏒
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8.aff b/extensions/spellcheck/hunspell/tests/unit/data/utf8.aff
new file mode 100644
index 000000000..e8934d71b
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8.aff
@@ -0,0 +1,10 @@
+SET UTF-8
+
+SFX A Y 7
+SFX A 0 őő .
+SFX A 0 ő o
+SFX A 0 ő ó
+SFX A ó ő ó
+SFX A ó őoo ó
+SFX A o őo o
+SFX A 0 ó [abcdó]
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8.dic b/extensions/spellcheck/hunspell/tests/unit/data/utf8.dic
new file mode 100644
index 000000000..e7cb34daf
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8.dic
@@ -0,0 +1,3 @@
+2
+foo/A
+foó/A
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8.good b/extensions/spellcheck/hunspell/tests/unit/data/utf8.good
new file mode 100644
index 000000000..08aa4dadf
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8.good
@@ -0,0 +1,9 @@
+foo
+foó
+fooőő
+fooő
+foóő
+foő
+foőo
+foőoo
+foóó
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8.test b/extensions/spellcheck/hunspell/tests/unit/data/utf8.test
new file mode 100644
index 000000000..cde7c5410
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i utf-8
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utfcompound.aff b/extensions/spellcheck/hunspell/tests/unit/data/utfcompound.aff
new file mode 100644
index 000000000..43506afa3
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/utfcompound.aff
@@ -0,0 +1,3 @@
+SET UTF-8
+COMPOUNDMIN 3
+COMPOUNDFLAG A
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utfcompound.dic b/extensions/spellcheck/hunspell/tests/unit/data/utfcompound.dic
new file mode 100644
index 000000000..ab90a1b70
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/utfcompound.dic
@@ -0,0 +1,9 @@
+8
+foo/A
+bar/A
+fóó/A
+áár/A
+xy/A
+yz/A
+éé/A
+őő/A
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utfcompound.good b/extensions/spellcheck/hunspell/tests/unit/data/utfcompound.good
new file mode 100644
index 000000000..1a1a1b19c
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/utfcompound.good
@@ -0,0 +1,5 @@
+foobar
+barfoo
+foobarfoo
+fóóáár
+áárfóó
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utfcompound.test b/extensions/spellcheck/hunspell/tests/unit/data/utfcompound.test
new file mode 100644
index 000000000..cde7c5410
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/utfcompound.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME -i utf-8
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utfcompound.wrong b/extensions/spellcheck/hunspell/tests/unit/data/utfcompound.wrong
new file mode 100644
index 000000000..fa385c1b0
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/utfcompound.wrong
@@ -0,0 +1,7 @@
+xyyz
+fooxy
+xyfoo
+fooxybar
+ééőő
+fóóéé
+őőáár
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/warn.aff b/extensions/spellcheck/hunspell/tests/unit/data/warn.aff
new file mode 100644
index 000000000..d586fa33e
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/warn.aff
@@ -0,0 +1,13 @@
+# WARN flag
+# The signed word, and its suffixed forms result warning message in command-line
+
+#Use to forbid the words with flag WARN
+#FORBIDWARN
+
+WARN W
+
+SFX A Y 1
+SFX A 0 s .
+
+REP 1
+REP foo bar
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/warn.dic b/extensions/spellcheck/hunspell/tests/unit/data/warn.dic
new file mode 100644
index 000000000..d63f6047e
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/warn.dic
@@ -0,0 +1,3 @@
+1
+foo/WA
+bar
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/warn.good b/extensions/spellcheck/hunspell/tests/unit/data/warn.good
new file mode 100644
index 000000000..542f439a4
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/warn.good
@@ -0,0 +1,2 @@
+foo
+foos
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/warn.test b/extensions/spellcheck/hunspell/tests/unit/data/warn.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/warn.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.aff b/extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.aff
new file mode 100644
index 000000000..fdb047b0c
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.aff
@@ -0,0 +1,12 @@
+PSEUDOROOT X
+COMPOUNDFLAG Y
+
+SFX A Y 1
+SFX A 0 0 . >
+
+SFX B Y 1
+SFX B 0 0 . <ZERO>>
+
+SFX C Y 2
+SFX C 0 0/XAB . <ZERODERIV>
+SFX C 0 baz/XAB . <DERIV>
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.dic b/extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.dic
new file mode 100644
index 000000000..72cba8d34
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.dic
@@ -0,0 +1,3 @@
+2
+foo/XA <FOO
+bar/XABC <BAR
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.good b/extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.good
new file mode 100644
index 000000000..b1fb3ba5c
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.good
@@ -0,0 +1,3 @@
+bar
+foo
+barbaz
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.morph b/extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.morph
new file mode 100644
index 000000000..bcb788ad7
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.morph
@@ -0,0 +1,13 @@
+> bar
+analyze(bar) = st:bar <BAR <ZERO>>
+analyze(bar) = st:bar <BAR >
+analyze(bar) = st:bar <BAR <ZERODERIV> <ZERO>>
+analyze(bar) = st:bar <BAR <ZERODERIV> >
+stem(bar) = bar
+> foo
+analyze(foo) = st:foo <FOO >
+stem(foo) = foo
+> barbaz
+analyze(barbaz) = st:bar <BAR <DERIV> <ZERO>>
+analyze(barbaz) = st:bar <BAR <DERIV> >
+stem(barbaz) = bar
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.test b/extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.test
new file mode 100644
index 000000000..7f4436906
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME
diff --git a/extensions/spellcheck/hunspell/tests/unit/test_hunspell.js b/extensions/spellcheck/hunspell/tests/unit/test_hunspell.js
new file mode 100644
index 000000000..a67eeaec0
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/test_hunspell.js
@@ -0,0 +1,220 @@
+/* Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+var Cc = Components.classes;
+var Ci = Components.interfaces;
+
+const tests = [
+ ["affixes", "iso-8859-1"],
+ ["condition", "iso-8859-1"],
+ ["condition-utf", "UTF-8"],
+ ["base", "iso-8859-1"],
+ ["base-utf", "UTF-8"],
+ ["allcaps", "iso-8859-1"],
+ ["allcaps-utf", "UTF-8"],
+ ["allcaps2", "iso-8859-1"],
+ ["allcaps3", "iso-8859-1"],
+ ["keepcase", "iso-8859-1"],
+ ["i58202", "iso-8859-1"],
+ ["map", "iso-8859-1"],
+ ["rep", "iso-8859-1"],
+ ["sug", "iso-8859-1"],
+ ["sugutf", "UTF-8"],
+ ["phone", "iso-8859-1"],
+ ["flag", "iso-8859-1"],
+ ["flaglong", "iso-8859-1"],
+ ["flagnum", "iso-8859-1"],
+ ["flagutf8", "UTF-8"],
+ ["slash", "iso-8859-1"],
+ ["forbiddenword", "iso-8859-1"],
+ ["nosuggest", "iso-8859-1"],
+ ["alias", "iso-8859-1"],
+ ["alias2", "iso-8859-1"],
+ ["alias3", "iso-8859-1"],
+ ["breakdefault", "iso-8859-1"],
+ ["break", "UTF-8"],
+ ["needaffix", "iso-8859-1"],
+ ["needaffix2", "iso-8859-1"],
+ ["needaffix3", "iso-8859-1"],
+ ["needaffix4", "iso-8859-1"],
+ ["needaffix5", "iso-8859-1"],
+ ["circumfix", "iso-8859-1"],
+ ["fogemorpheme", "iso-8859-1"],
+ ["onlyincompound", "iso-8859-1"],
+ ["complexprefixes", "iso-8859-1"],
+ ["complexprefixes2", "iso-8859-1"],
+ ["complexprefixesutf", "UTF-8"],
+ ["conditionalprefix", "iso-8859-1"],
+ ["zeroaffix", "iso-8859-1"],
+ ["utf8", "UTF-8"],
+ ["utf8-bom", "UTF-8", {1: "todo"}],
+ ["utf8-bom2", "UTF-8", {1: "todo"}],
+ ["utf8-nonbmp", "UTF-8", {1: "todo", 2: "todo", 3: "todo", 4: "todo"}],
+ ["compoundflag", "iso-8859-1"],
+ ["compoundrule", "iso-8859-1"],
+ ["compoundrule2", "iso-8859-1"],
+ ["compoundrule3", "iso-8859-1"],
+ ["compoundrule4", "iso-8859-1"],
+ ["compoundrule5", "UTF-8"],
+ ["compoundrule6", "iso-8859-1"],
+ ["compoundrule7", "iso-8859-1"],
+ ["compoundrule8", "iso-8859-1"],
+ ["compoundaffix", "iso-8859-1"],
+ ["compoundaffix2", "iso-8859-1"],
+ ["compoundaffix3", "iso-8859-1"],
+ ["checkcompounddup", "iso-8859-1"],
+ ["checkcompoundtriple", "iso-8859-1"],
+ ["simplifiedtriple", "iso-8859-1"],
+ ["checkcompoundrep", "iso-8859-1"],
+ ["checkcompoundcase2", "iso-8859-1"],
+ ["checkcompoundcaseutf", "UTF-8"],
+ ["checkcompoundpattern", "iso-8859-1"],
+ ["checkcompoundpattern2", "iso-8859-1"],
+ ["checkcompoundpattern3", "iso-8859-1"],
+ ["checkcompoundpattern4", "iso-8859-1"],
+ ["utfcompound", "UTF-8"],
+ ["checksharps", "iso-8859-1"],
+ ["checksharpsutf", "UTF-8"],
+ ["germancompounding", "iso-8859-1"],
+ ["germancompoundingold", "iso-8859-1"],
+ ["i35725", "iso-8859-1"],
+ ["i53643", "iso-8859-1"],
+ ["i54633", "iso-8859-1"],
+ ["i54980", "iso-8859-1", {1: "todo", 3: "todo"}],
+ ["maputf", "UTF-8"],
+ ["reputf", "UTF-8"],
+ ["ignore", "iso-8859-1"],
+ ["ignoreutf", "UTF-8",
+ {1: "todo", 2: "todo", 3: "todo", 4: "todo", 5: "todo", 6: "todo",
+ 7: "todo", 8: "todo"}],
+ ["1592880", "iso-8859-1"],
+ ["1695964", "iso-8859-1"],
+ ["1463589", "iso-8859-1"],
+ ["1463589-utf", "UTF-8"],
+ ["IJ", "iso-8859-1"],
+ ["i68568", "iso-8859-1"],
+ ["i68568utf", "UTF-8"],
+ ["1706659", "iso-8859-1"],
+ ["digits-in-words", "iso-8859-1"],
+// ["colons-in-words", "iso-8859-1"], Suggestion test only
+ ["ngram-utf-fix", "UTF-8"],
+ ["morph", "us-ascii",
+ {11: "todo", 12: "todo", 13: "todo", 14: "todo", 15: "todo", 16: "todo",
+ 17: "todo", 18: "todo", 19: "todo", 20: "todo", 21: "todo", 22: "todo",
+ 23: "todo", 24: "todo", 25: "todo", 26: "todo", 27: "todo"}],
+ ["1975530", "UTF-8"],
+ ["fullstrip", "iso-8859-1"],
+ ["iconv", "UTF-8"],
+ ["oconv", "UTF-8"],
+ ["encoding", "iso-8859-1", {1: "todo", 3: "todo"}],
+ ["korean", "UTF-8"],
+ ["opentaal-forbiddenword1", "UTF-8"],
+ ["opentaal-forbiddenword2", "UTF-8"],
+ ["opentaal-keepcase", "UTF-8"],
+ ["arabic", "UTF-8"],
+ ["2970240", "iso-8859-1"],
+ ["2970242", "iso-8859-1"],
+ ["breakoff", "iso-8859-1"],
+ ["opentaal-cpdpat", "iso-8859-1"],
+ ["opentaal-cpdpat2", "iso-8859-1"],
+ ["2999225", "iso-8859-1"],
+ ["onlyincompound2", "iso-8859-1"],
+ ["forceucase", "iso-8859-1"],
+ ["warn", "iso-8859-1"]
+];
+
+function do_get_file_by_line(file, charset) {
+ dump("getting file by line for file " + file.path + "\n");
+ dump("using charset " + charset +"\n");
+ let fis = Cc["@mozilla.org/network/file-input-stream;1"].
+ createInstance(Ci.nsIFileInputStream);
+ fis.init(file, 0x1 /* READONLY */,
+ 0o444, Ci.nsIFileInputStream.CLOSE_ON_EOF);
+
+ let lis = Cc["@mozilla.org/intl/converter-input-stream;1"].
+ createInstance(Ci.nsIConverterInputStream);
+ lis.init(fis, charset, 1024, 0);
+ lis.QueryInterface(Ci.nsIUnicharLineInputStream);
+
+ var val = {};
+ while (lis.readLine(val)) {
+ yield val.value;
+ val = {};
+ }
+}
+
+function do_run_test(checker, name, charset, todo_good, todo_bad) {
+ dump("\n\n\n\n");
+ dump("running test for " + name + "\n");
+ if (!checker) {
+ do_throw("Need spell checker here!");
+ }
+
+ let good = do_get_file("data/" + name + ".good", true);
+ let bad = do_get_file("data/" + name + ".wrong", true);
+ let sug = do_get_file("data/" + name + ".sug", true);
+
+ dump("Need some expected output\n")
+ do_check_true(good.exists() || bad.exists() || sug.exists());
+
+ dump("Setting dictionary to " + name + "\n");
+ checker.dictionary = name;
+
+ if (good.exists()) {
+ var good_counter = 0;
+ for (val in do_get_file_by_line(good, charset)) {
+ let todo = false;
+ good_counter++;
+ if (todo_good && todo_good[good_counter]) {
+ todo = true;
+ dump("TODO\n");
+ }
+
+ dump("Expect word " + val + " is spelled correctly\n");
+ if (todo) {
+ todo_check_true(checker.check(val));
+ } else {
+ do_check_true(checker.check(val));
+ }
+ }
+ }
+
+ if (bad.exists()) {
+ var bad_counter = 0;
+ for (val in do_get_file_by_line(bad, charset)) {
+ let todo = false;
+ bad_counter++;
+ if (todo_bad && todo_bad[bad_counter]) {
+ todo = true;
+ dump("TODO\n");
+ }
+
+ dump("Expect word " + val + " is spelled wrong\n");
+ if (todo) {
+ todo_check_false(checker.check(val));
+ } else {
+ do_check_false(checker.check(val));
+ }
+ }
+ }
+
+ // XXXkhuey test suggestions
+}
+
+function run_test() {
+ let spellChecker = Cc["@mozilla.org/spellchecker/engine;1"].
+ getService(Ci.mozISpellCheckingEngine);
+
+ do_check_true(!!spellChecker, "Should have a spell checker");
+ spellChecker.QueryInterface(Ci.mozISpellCheckingEngine);
+ let testdir = do_get_file("data/", false);
+ spellChecker.loadDictionariesFromDir(testdir);
+
+ function do_run_test_closure(test) {
+ [name, charset, todo_good, todo_bad] = test;
+ do_run_test(spellChecker, name, charset, todo_good, todo_bad);
+ }
+
+ tests.forEach(do_run_test_closure);
+}
diff --git a/extensions/spellcheck/hunspell/tests/unit/xpcshell.ini b/extensions/spellcheck/hunspell/tests/unit/xpcshell.ini
new file mode 100644
index 000000000..39e55e51f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/xpcshell.ini
@@ -0,0 +1,7 @@
+[DEFAULT]
+head =
+tail =
+skip-if = toolkit == 'android'
+support-files = data/**
+
+[test_hunspell.js]