ts_public.h 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. /*-------------------------------------------------------------------------
  2. *
  3. * ts_public.h
  4. * Public interface to various tsearch modules, such as
  5. * parsers and dictionaries.
  6. *
  7. * Copyright (c) 1998-2022, PostgreSQL Global Development Group
  8. *
  9. * src/include/tsearch/ts_public.h
  10. *
  11. *-------------------------------------------------------------------------
  12. */
  13. #ifndef _PG_TS_PUBLIC_H_
  14. #define _PG_TS_PUBLIC_H_
  15. #include "tsearch/ts_type.h"
  16. /*
  17. * Parser's framework
  18. */
  19. /*
  20. * returning type for prslextype method of parser
  21. */
  22. typedef struct
  23. {
  24. int lexid;
  25. char *alias;
  26. char *descr;
  27. } LexDescr;
  28. /*
  29. * Interface to headline generator (tsparser's prsheadline function)
  30. *
  31. * HeadlineParsedText describes the text that is to be highlighted.
  32. * Some fields are passed from the core code to the prsheadline function,
  33. * while others are output from the prsheadline function.
  34. *
  35. * The principal data is words[], an array of HeadlineWordEntry,
  36. * one entry per token, of length curwords.
  37. * The fields of HeadlineWordEntry are:
  38. *
  39. * in, selected, replace, skip: these flags are initially zero
  40. * and may be set by the prsheadline function. A consecutive group
  41. * of tokens marked "in" form a "fragment" to be output.
  42. * Such tokens may additionally be marked selected, replace, or skip
  43. * to modify how they are shown. (If you set more than one of those
  44. * bits, you get an unspecified one of those behaviors.)
  45. *
  46. * type, len, pos, word: filled by core code to describe the token.
  47. *
  48. * item: if the token matches any operand of the tsquery of interest,
  49. * a pointer to such an operand. (If there are multiple matching
  50. * operands, we generate extra copies of the HeadlineWordEntry to hold
  51. * all the pointers. The extras are marked with repeated = 1 and should
  52. * be ignored except for checking the item pointer.)
  53. */
  54. typedef struct
  55. {
  56. uint32 selected:1, /* token is to be highlighted */
  57. in:1, /* token is part of headline */
  58. replace:1, /* token is to be replaced with a space */
  59. repeated:1, /* duplicate entry to hold item pointer */
  60. skip:1, /* token is to be skipped (not output) */
  61. unused:3, /* available bits */
  62. type:8, /* parser's token category */
  63. len:16; /* length of token */
  64. WordEntryPos pos; /* position of token */
  65. char *word; /* text of token (not null-terminated) */
  66. QueryOperand *item; /* a matching query operand, or NULL if none */
  67. } HeadlineWordEntry;
  68. typedef struct
  69. {
  70. /* Fields filled by core code before calling prsheadline function: */
  71. HeadlineWordEntry *words;
  72. int32 lenwords; /* allocated length of words[] */
  73. int32 curwords; /* current number of valid entries */
  74. int32 vectorpos; /* used by ts_parse.c in filling pos fields */
  75. /* The prsheadline function must fill these fields: */
  76. /* Strings for marking selected tokens and separating fragments: */
  77. char *startsel; /* palloc'd strings */
  78. char *stopsel;
  79. char *fragdelim;
  80. int16 startsellen; /* lengths of strings */
  81. int16 stopsellen;
  82. int16 fragdelimlen;
  83. } HeadlineParsedText;
  84. /*
  85. * Common useful things for tsearch subsystem
  86. */
  87. extern char *get_tsearch_config_filename(const char *basename,
  88. const char *extension);
  89. /*
  90. * Often useful stopword list management
  91. */
  92. typedef struct
  93. {
  94. int len;
  95. char **stop;
  96. } StopList;
  97. extern void readstoplist(const char *fname, StopList *s,
  98. char *(*wordop) (const char *));
  99. extern bool searchstoplist(StopList *s, char *key);
  100. /*
  101. * Interface with dictionaries
  102. */
  103. /* return struct for any lexize function */
  104. typedef struct
  105. {
  106. /*----------
  107. * Number of current variant of split word. For example the Norwegian
  108. * word 'fotballklubber' has two variants to split: ( fotball, klubb )
  109. * and ( fot, ball, klubb ). So, dictionary should return:
  110. *
  111. * nvariant lexeme
  112. * 1 fotball
  113. * 1 klubb
  114. * 2 fot
  115. * 2 ball
  116. * 2 klubb
  117. *
  118. * In general, a TSLexeme will be considered to belong to the same split
  119. * variant as the previous one if they have the same nvariant value.
  120. * The exact values don't matter, only changes from one lexeme to next.
  121. *----------
  122. */
  123. uint16 nvariant;
  124. uint16 flags; /* See flag bits below */
  125. char *lexeme; /* C string */
  126. } TSLexeme;
  127. /* Flag bits that can appear in TSLexeme.flags */
  128. #define TSL_ADDPOS 0x01
  129. #define TSL_PREFIX 0x02
  130. #define TSL_FILTER 0x04
  131. /*
  132. * Struct for supporting complex dictionaries like thesaurus.
  133. * 4th argument for dictlexize method is a pointer to this
  134. */
  135. typedef struct
  136. {
  137. bool isend; /* in: marks for lexize_info about text end is
  138. * reached */
  139. bool getnext; /* out: dict wants next lexeme */
  140. void *private_state; /* internal dict state between calls with
  141. * getnext == true */
  142. } DictSubState;
  143. #endif /* _PG_TS_PUBLIC_H_ */