tokenizer_states.h 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. // Copyright 2011 Google Inc. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. //
  15. // Author: [email protected] (Jonathan Tang)
  16. //
  17. // This contains the list of states used in the tokenizer. Although at first
  18. // glance it seems like these could be kept internal to the tokenizer, several
  19. // of the actions in the parser require that it reach into the tokenizer and
  20. // reset the tokenizer state. For that to work, it needs to have the
  21. // definitions of individual states available.
  22. //
  23. // This may also be useful for providing more detailed error messages for parse
  24. // errors, as we can match up states and inputs in a table without having to
  25. // clutter the tokenizer code with lots of precise error messages.
  26. #ifndef GUMBO_TOKENIZER_STATES_H_
  27. #define GUMBO_TOKENIZER_STATES_H_
  28. // The ordering of this enum is also used to build the dispatch table for the
  29. // tokenizer state machine, so if it is changed, be sure to update that too.
  30. typedef enum {
  31. GUMBO_LEX_DATA,
  32. GUMBO_LEX_CHAR_REF_IN_DATA,
  33. GUMBO_LEX_RCDATA,
  34. GUMBO_LEX_CHAR_REF_IN_RCDATA,
  35. GUMBO_LEX_RAWTEXT,
  36. GUMBO_LEX_SCRIPT,
  37. GUMBO_LEX_PLAINTEXT,
  38. GUMBO_LEX_TAG_OPEN,
  39. GUMBO_LEX_END_TAG_OPEN,
  40. GUMBO_LEX_TAG_NAME,
  41. GUMBO_LEX_RCDATA_LT,
  42. GUMBO_LEX_RCDATA_END_TAG_OPEN,
  43. GUMBO_LEX_RCDATA_END_TAG_NAME,
  44. GUMBO_LEX_RAWTEXT_LT,
  45. GUMBO_LEX_RAWTEXT_END_TAG_OPEN,
  46. GUMBO_LEX_RAWTEXT_END_TAG_NAME,
  47. GUMBO_LEX_SCRIPT_LT,
  48. GUMBO_LEX_SCRIPT_END_TAG_OPEN,
  49. GUMBO_LEX_SCRIPT_END_TAG_NAME,
  50. GUMBO_LEX_SCRIPT_ESCAPED_START,
  51. GUMBO_LEX_SCRIPT_ESCAPED_START_DASH,
  52. GUMBO_LEX_SCRIPT_ESCAPED,
  53. GUMBO_LEX_SCRIPT_ESCAPED_DASH,
  54. GUMBO_LEX_SCRIPT_ESCAPED_DASH_DASH,
  55. GUMBO_LEX_SCRIPT_ESCAPED_LT,
  56. GUMBO_LEX_SCRIPT_ESCAPED_END_TAG_OPEN,
  57. GUMBO_LEX_SCRIPT_ESCAPED_END_TAG_NAME,
  58. GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_START,
  59. GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED,
  60. GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_DASH,
  61. GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_DASH_DASH,
  62. GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_LT,
  63. GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_END,
  64. GUMBO_LEX_BEFORE_ATTR_NAME,
  65. GUMBO_LEX_ATTR_NAME,
  66. GUMBO_LEX_AFTER_ATTR_NAME,
  67. GUMBO_LEX_BEFORE_ATTR_VALUE,
  68. GUMBO_LEX_ATTR_VALUE_DOUBLE_QUOTED,
  69. GUMBO_LEX_ATTR_VALUE_SINGLE_QUOTED,
  70. GUMBO_LEX_ATTR_VALUE_UNQUOTED,
  71. GUMBO_LEX_CHAR_REF_IN_ATTR_VALUE,
  72. GUMBO_LEX_AFTER_ATTR_VALUE_QUOTED,
  73. GUMBO_LEX_SELF_CLOSING_START_TAG,
  74. GUMBO_LEX_BOGUS_COMMENT,
  75. GUMBO_LEX_MARKUP_DECLARATION,
  76. GUMBO_LEX_COMMENT_START,
  77. GUMBO_LEX_COMMENT_START_DASH,
  78. GUMBO_LEX_COMMENT,
  79. GUMBO_LEX_COMMENT_END_DASH,
  80. GUMBO_LEX_COMMENT_END,
  81. GUMBO_LEX_COMMENT_END_BANG,
  82. GUMBO_LEX_DOCTYPE,
  83. GUMBO_LEX_BEFORE_DOCTYPE_NAME,
  84. GUMBO_LEX_DOCTYPE_NAME,
  85. GUMBO_LEX_AFTER_DOCTYPE_NAME,
  86. GUMBO_LEX_AFTER_DOCTYPE_PUBLIC_KEYWORD,
  87. GUMBO_LEX_BEFORE_DOCTYPE_PUBLIC_ID,
  88. GUMBO_LEX_DOCTYPE_PUBLIC_ID_DOUBLE_QUOTED,
  89. GUMBO_LEX_DOCTYPE_PUBLIC_ID_SINGLE_QUOTED,
  90. GUMBO_LEX_AFTER_DOCTYPE_PUBLIC_ID,
  91. GUMBO_LEX_BETWEEN_DOCTYPE_PUBLIC_SYSTEM_ID,
  92. GUMBO_LEX_AFTER_DOCTYPE_SYSTEM_KEYWORD,
  93. GUMBO_LEX_BEFORE_DOCTYPE_SYSTEM_ID,
  94. GUMBO_LEX_DOCTYPE_SYSTEM_ID_DOUBLE_QUOTED,
  95. GUMBO_LEX_DOCTYPE_SYSTEM_ID_SINGLE_QUOTED,
  96. GUMBO_LEX_AFTER_DOCTYPE_SYSTEM_ID,
  97. GUMBO_LEX_BOGUS_DOCTYPE,
  98. GUMBO_LEX_CDATA
  99. } GumboTokenizerEnum;
  100. #endif // GUMBO_TOKENIZER_STATES_H_