spell.h 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. /*-------------------------------------------------------------------------
  2. *
  3. * spell.h
  4. *
  5. * Declarations for ISpell dictionary
  6. *
  7. * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
  8. *
  9. * src/include/tsearch/dicts/spell.h
  10. *
  11. *-------------------------------------------------------------------------
  12. */
  13. #ifndef __SPELL_H__
  14. #define __SPELL_H__
  15. #include "regex/regex.h"
  16. #include "tsearch/dicts/regis.h"
  17. #include "tsearch/ts_public.h"
  18. /*
  19. * SPNode and SPNodeData are used to represent prefix tree (Trie) to store
  20. * a words list.
  21. */
  22. struct SPNode;
  23. typedef struct
  24. {
  25. uint32 val:8,
  26. isword:1,
  27. /* Stores compound flags listed below */
  28. compoundflag:4,
  29. /* Reference to an entry of the AffixData field */
  30. affix:19;
  31. struct SPNode *node;
  32. } SPNodeData;
  33. /*
  34. * Names of FF_ are correlated with Hunspell options in affix file
  35. * http://hunspell.sourceforge.net/
  36. */
  37. #define FF_COMPOUNDONLY 0x01
  38. #define FF_COMPOUNDBEGIN 0x02
  39. #define FF_COMPOUNDMIDDLE 0x04
  40. #define FF_COMPOUNDLAST 0x08
  41. #define FF_COMPOUNDFLAG ( FF_COMPOUNDBEGIN | FF_COMPOUNDMIDDLE | \
  42. FF_COMPOUNDLAST )
  43. #define FF_COMPOUNDFLAGMASK 0x0f
  44. typedef struct SPNode
  45. {
  46. uint32 length;
  47. SPNodeData data[FLEXIBLE_ARRAY_MEMBER];
  48. } SPNode;
  49. #define SPNHDRSZ (offsetof(SPNode,data))
  50. /*
  51. * Represents an entry in a words list.
  52. */
  53. typedef struct spell_struct
  54. {
  55. union
  56. {
  57. /*
  58. * flag is filled in by NIImportDictionary(). After
  59. * NISortDictionary(), d is used instead of flag.
  60. */
  61. char *flag;
  62. /* d is used in mkSPNode() */
  63. struct
  64. {
  65. /* Reference to an entry of the AffixData field */
  66. int affix;
  67. /* Length of the word */
  68. int len;
  69. } d;
  70. } p;
  71. char word[FLEXIBLE_ARRAY_MEMBER];
  72. } SPELL;
  73. #define SPELLHDRSZ (offsetof(SPELL, word))
  74. /*
  75. * If an affix uses a regex, we have to store that separately in a struct
  76. * that won't move around when arrays of affixes are enlarged or sorted.
  77. * This is so that it can be found to be cleaned up at context destruction.
  78. */
  79. typedef struct aff_regex_struct
  80. {
  81. regex_t regex;
  82. MemoryContextCallback mcallback;
  83. } aff_regex_struct;
  84. /*
  85. * Represents an entry in an affix list.
  86. */
  87. typedef struct aff_struct
  88. {
  89. char *flag;
  90. /* FF_SUFFIX or FF_PREFIX */
  91. uint32 type:1,
  92. flagflags:7,
  93. issimple:1,
  94. isregis:1,
  95. replen:14;
  96. char *find;
  97. char *repl;
  98. union
  99. {
  100. aff_regex_struct *pregex;
  101. Regis regis;
  102. } reg;
  103. } AFFIX;
  104. /*
  105. * affixes use dictionary flags too
  106. */
  107. #define FF_COMPOUNDPERMITFLAG 0x10
  108. #define FF_COMPOUNDFORBIDFLAG 0x20
  109. #define FF_CROSSPRODUCT 0x40
  110. /*
  111. * Don't change the order of these. Initialization sorts by these,
  112. * and expects prefixes to come first after sorting.
  113. */
  114. #define FF_SUFFIX 1
  115. #define FF_PREFIX 0
  116. /*
  117. * AffixNode and AffixNodeData are used to represent prefix tree (Trie) to store
  118. * an affix list.
  119. */
  120. struct AffixNode;
  121. typedef struct
  122. {
  123. uint32 val:8,
  124. naff:24;
  125. AFFIX **aff;
  126. struct AffixNode *node;
  127. } AffixNodeData;
  128. typedef struct AffixNode
  129. {
  130. uint32 isvoid:1,
  131. length:31;
  132. AffixNodeData data[FLEXIBLE_ARRAY_MEMBER];
  133. } AffixNode;
  134. #define ANHRDSZ (offsetof(AffixNode, data))
  135. typedef struct
  136. {
  137. char *affix;
  138. int len;
  139. bool issuffix;
  140. } CMPDAffix;
  141. /*
  142. * Type of encoding affix flags in Hunspell dictionaries
  143. */
  144. typedef enum
  145. {
  146. FM_CHAR, /* one character (like ispell) */
  147. FM_LONG, /* two characters */
  148. FM_NUM /* number, >= 0 and < 65536 */
  149. } FlagMode;
  150. /*
  151. * Structure to store Hunspell options. Flag representation depends on flag
  152. * type. These flags are about support of compound words.
  153. */
  154. typedef struct CompoundAffixFlag
  155. {
  156. union
  157. {
  158. /* Flag name if flagMode is FM_CHAR or FM_LONG */
  159. char *s;
  160. /* Flag name if flagMode is FM_NUM */
  161. uint32 i;
  162. } flag;
  163. /* we don't have a bsearch_arg version, so, copy FlagMode */
  164. FlagMode flagMode;
  165. uint32 value;
  166. } CompoundAffixFlag;
  167. #define FLAGNUM_MAXSIZE (1 << 16)
  168. typedef struct
  169. {
  170. int maffixes;
  171. int naffixes;
  172. AFFIX *Affix;
  173. AffixNode *Suffix;
  174. AffixNode *Prefix;
  175. SPNode *Dictionary;
  176. /* Array of sets of affixes */
  177. char **AffixData;
  178. int lenAffixData;
  179. int nAffixData;
  180. bool useFlagAliases;
  181. CMPDAffix *CompoundAffix;
  182. bool usecompound;
  183. FlagMode flagMode;
  184. /*
  185. * All follow fields are actually needed only for initialization
  186. */
  187. /* Array of Hunspell options in affix file */
  188. CompoundAffixFlag *CompoundAffixFlags;
  189. /* number of entries in CompoundAffixFlags array */
  190. int nCompoundAffixFlag;
  191. /* allocated length of CompoundAffixFlags array */
  192. int mCompoundAffixFlag;
  193. /*
  194. * Remaining fields are only used during dictionary construction; they are
  195. * set up by NIStartBuild and cleared by NIFinishBuild.
  196. */
  197. MemoryContext buildCxt; /* temp context for construction */
  198. /* Temporary array of all words in the dict file */
  199. SPELL **Spell;
  200. int nspell; /* number of valid entries in Spell array */
  201. int mspell; /* allocated length of Spell array */
  202. /* These are used to allocate "compact" data without palloc overhead */
  203. char *firstfree; /* first free address (always maxaligned) */
  204. size_t avail; /* free space remaining at firstfree */
  205. } IspellDict;
  206. extern TSLexeme *NINormalizeWord(IspellDict *Conf, char *word);
  207. extern void NIStartBuild(IspellDict *Conf);
  208. extern void NIImportAffixes(IspellDict *Conf, const char *filename);
  209. extern void NIImportDictionary(IspellDict *Conf, const char *filename);
  210. extern void NISortDictionary(IspellDict *Conf);
  211. extern void NISortAffixes(IspellDict *Conf);
  212. extern void NIFinishBuild(IspellDict *Conf);
  213. #endif