psqlscan_int.h 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. /*-------------------------------------------------------------------------
  2. *
  3. * psqlscan_int.h
  4. * lexical scanner internal declarations
  5. *
  6. * This file declares the PsqlScanStateData structure used by psqlscan.l
  7. * and shared by other lexers compatible with it, such as psqlscanslash.l.
  8. *
  9. * One difficult aspect of this code is that we need to work in multibyte
  10. * encodings that are not ASCII-safe. A "safe" encoding is one in which each
  11. * byte of a multibyte character has the high bit set (it's >= 0x80). Since
  12. * all our lexing rules treat all high-bit-set characters alike, we don't
  13. * really need to care whether such a byte is part of a sequence or not.
  14. * In an "unsafe" encoding, we still expect the first byte of a multibyte
  15. * sequence to be >= 0x80, but later bytes might not be. If we scan such
  16. * a sequence as-is, the lexing rules could easily be fooled into matching
  17. * such bytes to ordinary ASCII characters. Our solution for this is to
  18. * substitute 0xFF for each non-first byte within the data presented to flex.
  19. * The flex rules will then pass the FF's through unmolested. The
  20. * psqlscan_emit() subroutine is responsible for looking back to the original
  21. * string and replacing FF's with the corresponding original bytes.
  22. *
  23. * Another interesting thing we do here is scan different parts of the same
  24. * input with physically separate flex lexers (ie, lexers written in separate
  25. * .l files). We can get away with this because the only part of the
  26. * persistent state of a flex lexer that depends on its parsing rule tables
  27. * is the start state number, which is easy enough to manage --- usually,
  28. * in fact, we just need to set it to INITIAL when changing lexers. But to
  29. * make that work at all, we must use re-entrant lexers, so that all the
  30. * relevant state is in the yyscan_t attached to the PsqlScanState;
  31. * if we were using lexers with separate static state we would soon end up
  32. * with dangling buffer pointers in one or the other. Also note that this
  33. * is unlikely to work very nicely if the lexers aren't all built with the
  34. * same flex version, or if they don't use the same flex options.
  35. *
  36. *
  37. * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
  38. * Portions Copyright (c) 1994, Regents of the University of California
  39. *
  40. * src/include/fe_utils/psqlscan_int.h
  41. *
  42. *-------------------------------------------------------------------------
  43. */
  44. #ifndef PSQLSCAN_INT_H
  45. #define PSQLSCAN_INT_H
  46. #include "fe_utils/psqlscan.h"
  47. /*
  48. * These are just to allow this file to be compilable standalone for header
  49. * validity checking; in actual use, this file should always be included
  50. * from the body of a flex file, where these symbols are already defined.
  51. */
  52. #ifndef YY_TYPEDEF_YY_BUFFER_STATE
  53. #define YY_TYPEDEF_YY_BUFFER_STATE
  54. typedef struct yy_buffer_state *YY_BUFFER_STATE;
  55. #endif
  56. #ifndef YY_TYPEDEF_YY_SCANNER_T
  57. #define YY_TYPEDEF_YY_SCANNER_T
  58. typedef void *yyscan_t;
  59. #endif
  60. /*
  61. * We use a stack of flex buffers to handle substitution of psql variables.
  62. * Each stacked buffer contains the as-yet-unread text from one psql variable.
  63. * When we pop the stack all the way, we resume reading from the outer buffer
  64. * identified by scanbufhandle.
  65. */
  66. typedef struct StackElem
  67. {
  68. YY_BUFFER_STATE buf; /* flex input control structure */
  69. char *bufstring; /* data actually being scanned by flex */
  70. char *origstring; /* copy of original data, if needed */
  71. char *varname; /* name of variable providing data, or NULL */
  72. struct StackElem *next;
  73. } StackElem;
  74. /*
  75. * All working state of the lexer must be stored in PsqlScanStateData
  76. * between calls. This allows us to have multiple open lexer operations,
  77. * which is needed for nested include files. The lexer itself is not
  78. * recursive, but it must be re-entrant.
  79. */
  80. typedef struct PsqlScanStateData
  81. {
  82. yyscan_t scanner; /* Flex's state for this PsqlScanState */
  83. PQExpBuffer output_buf; /* current output buffer */
  84. StackElem *buffer_stack; /* stack of variable expansion buffers */
  85. /*
  86. * These variables always refer to the outer buffer, never to any stacked
  87. * variable-expansion buffer.
  88. */
  89. YY_BUFFER_STATE scanbufhandle;
  90. char *scanbuf; /* start of outer-level input buffer */
  91. const char *scanline; /* current input line at outer level */
  92. /* safe_encoding, curline, refline are used by emit() to replace FFs */
  93. int encoding; /* encoding being used now */
  94. bool safe_encoding; /* is current encoding "safe"? */
  95. bool std_strings; /* are string literals standard? */
  96. const char *curline; /* actual flex input string for cur buf */
  97. const char *refline; /* original data for cur buffer */
  98. /*
  99. * All this state lives across successive input lines, until explicitly
  100. * reset by psql_scan_reset. start_state is adopted by yylex() on entry,
  101. * and updated with its finishing state on exit.
  102. */
  103. int start_state; /* yylex's starting/finishing state */
  104. int state_before_str_stop; /* start cond. before end quote */
  105. int paren_depth; /* depth of nesting in parentheses */
  106. int xcdepth; /* depth of nesting in slash-star comments */
  107. char *dolqstart; /* current $foo$ quote start string */
  108. /*
  109. * State to track boundaries of BEGIN ... END blocks in function
  110. * definitions, so that semicolons do not send query too early.
  111. */
  112. int identifier_count; /* identifiers since start of statement */
  113. char identifiers[4]; /* records the first few identifiers */
  114. int begin_depth; /* depth of begin/end pairs */
  115. /*
  116. * Callback functions provided by the program making use of the lexer,
  117. * plus a void* callback passthrough argument.
  118. */
  119. const PsqlScanCallbacks *callbacks;
  120. void *cb_passthrough;
  121. } PsqlScanStateData;
  122. /*
  123. * Functions exported by psqlscan.l, but only meant for use within
  124. * compatible lexers.
  125. */
  126. extern void psqlscan_push_new_buffer(PsqlScanState state,
  127. const char *newstr, const char *varname);
  128. extern void psqlscan_pop_buffer_stack(PsqlScanState state);
  129. extern void psqlscan_select_top_buffer(PsqlScanState state);
  130. extern bool psqlscan_var_is_current_source(PsqlScanState state,
  131. const char *varname);
  132. extern YY_BUFFER_STATE psqlscan_prepare_buffer(PsqlScanState state,
  133. const char *txt, int len,
  134. char **txtcopy);
  135. extern void psqlscan_emit(PsqlScanState state, const char *txt, int len);
  136. extern char *psqlscan_extract_substring(PsqlScanState state,
  137. const char *txt, int len);
  138. extern void psqlscan_escape_variable(PsqlScanState state,
  139. const char *txt, int len,
  140. PsqlScanQuoteType quote);
  141. extern void psqlscan_test_variable(PsqlScanState state,
  142. const char *txt, int len);
  143. #endif /* PSQLSCAN_INT_H */