Scanner.h 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354
  1. #ifndef ANKI_UTIL_SCANNER_H
  2. #define ANKI_UTIL_SCANNER_H
  3. #include <exception>
  4. #include <array>
  5. #include <iosfwd>
  6. #include <fstream>
  7. namespace anki { namespace scanner {
  8. /// Scanner exception
  9. class Exception: public std::exception
  10. {
  11. public:
  12. /// Constructor
  13. Exception(const std::string& err, int errNo,
  14. const std::string& scriptFilename, int scriptLineNmbr);
  15. /// Copy constructor
  16. Exception(const Exception& e);
  17. /// Destructor. Do nothing
  18. ~Exception() throw()
  19. {}
  20. /// Return the error code
  21. virtual const char* what() const throw();
  22. private:
  23. std::string error;
  24. int errNo; ///< Error number
  25. std::string scriptFilename;
  26. int scriptLineNmbr;
  27. mutable std::string errWhat;
  28. };
  29. /// The max allowed length of a script line
  30. const int MAX_SCRIPT_LINE_LEN = 1024;
  31. /// The TokenCode is an enum that defines the Token type
  32. enum TokenCode
  33. {
  34. // general codes
  35. TC_ERROR, TC_END, TC_COMMENT, TC_NUMBER, TC_CHARACTER, TC_STRING,
  36. TC_IDENTIFIER, TC_NEWLINE,
  37. // keywords listed by strlen (dummy keywords at the moment)
  38. TC_KE,
  39. TC_KEY,
  40. TC_KEYW,
  41. TC_KEYWO,
  42. TC_KEYWOR,
  43. TC_KEYWORD,
  44. // operators
  45. TC_SCOPE_RESOLUTION, TC_L_SQ_BRACKET, TC_R_SQ_BRACKET, TC_L_PAREN,
  46. TC_R_PAREN,
  47. TC_DOT, TC_POINTER_TO_MEMBER, TC_L_BRACKET, TC_R_BRACKET, TC_COMMA,
  48. TC_PERIOD, TC_UPDOWNDOT, TC_QUESTIONMARK, TC_SHARP, TC_EQUAL,
  49. TC_NOT_EQUAL, TC_LESS, TC_GREATER, TC_LESS_EQUAL, TC_GREATER_EQUAL,
  50. TC_LOGICAL_OR, TC_LOGICAL_AND, TC_PLUS, TC_MINUS, TC_STAR,
  51. TC_BSLASH, TC_NOT, TC_BITWISE_AND, TC_BITWISE_OR, TC_UNARAY_COMPLEMENT,
  52. TC_MOD, TC_XOR, TC_INC, TC_DEC, TC_SHL,
  53. TC_SHR, TC_ASSIGN, TC_ASSIGN_ADD, TC_ASSIGN_SUB, TC_ASSIGN_MUL,
  54. TC_ASSIGN_DIV, TC_ASSIGN_MOD, TC_ASSIGN_SHL, TC_ASSIGN_SHR, TC_ASSIGN_AND,
  55. TC_ASSIGN_XOR, TC_ASSIGN_OR, TC_BACK_SLASH
  56. }; // end enum TokenCode
  57. /// The value of Token::dataType
  58. enum DataType
  59. {
  60. DT_FLOAT,
  61. DT_INT,
  62. DT_CHAR,
  63. DT_STR
  64. };
  65. /// Used inside the Token, its a variant that holds the data of the Token
  66. class TokenDataVal
  67. {
  68. friend class Scanner;
  69. friend class Token;
  70. public:
  71. /// @name Accessors
  72. /// @{
  73. /// Access the data as C char
  74. char getChar() const
  75. {
  76. return char_;
  77. }
  78. /// Access the data as unsigned int
  79. unsigned long getInt() const
  80. {
  81. return int_;
  82. }
  83. /// Access the data as double
  84. double getFloat() const
  85. {
  86. return float_;
  87. }
  88. /// Access the data as C string
  89. const char* getString() const
  90. {
  91. return string;
  92. }
  93. /// @}
  94. private:
  95. /// The data as unnamed union
  96. union
  97. {
  98. char char_;
  99. unsigned long int_;
  100. double float_;
  101. /// Points to @ref Token::asString if the token is string or
  102. /// identifier
  103. char* string;
  104. };
  105. };
  106. /// The Token class
  107. class Token
  108. {
  109. friend class Scanner;
  110. public:
  111. Token()
  112. : code(TC_ERROR)
  113. {}
  114. Token(const Token& b);
  115. /// @name accessors
  116. /// @{
  117. const char* getString() const
  118. {
  119. return &asString[0];
  120. }
  121. TokenCode getCode() const
  122. {
  123. return code;
  124. }
  125. DataType getDataType() const
  126. {
  127. return dataType;
  128. }
  129. const TokenDataVal& getValue() const
  130. {
  131. return value;
  132. }
  133. /// @}
  134. std::string getInfoString() const;
  135. friend std::ostream& operator<<(std::ostream& s,
  136. const Token& x);
  137. private:
  138. std::array<char, MAX_SCRIPT_LINE_LEN> asString;
  139. TokenCode code; ///< The first thing you should know about a token
  140. /// Additional info in case @ref code is @ref TC_NUMBER
  141. DataType dataType;
  142. TokenDataVal value; ///< A value variant
  143. };
  144. /// C++ Tokenizer
  145. ///
  146. /// The Scanner loads a file or an already loaded iostream and extracts the
  147. /// tokens. The script must be in C++ format. The class does not make any kind
  148. /// of memory allocations so it can be fast.
  149. class Scanner
  150. {
  151. public:
  152. /// Constructor #1
  153. /// @param newlinesAsWhitespace @see newlinesAsWhitespace
  154. Scanner(bool newlinesAsWhitespace = true);
  155. /// Constructor #2
  156. /// @see loadFile
  157. /// @param newlinesAsWhitespace @see newlinesAsWhitespace
  158. /// @exception Exception
  159. Scanner(const char* filename, bool newlinesAsWhitespace = true);
  160. /// Constructor #3
  161. /// @see loadIstream
  162. /// @param newlinesAsWhitespace @see newlinesAsWhitespace
  163. /// @exception Exception
  164. Scanner(std::istream& istream_,
  165. const char* scriptName_ = "unamed-istream",
  166. bool newlinesAsWhitespace = true);
  167. /// It only unloads the file if file is chosen
  168. ~Scanner()
  169. {
  170. unload();
  171. }
  172. /// Load a file to extract tokens
  173. /// @param filename The filename of the file to read
  174. /// @exception Exception
  175. void loadFile(const char* filename);
  176. /// Load a STL istream to extract tokens
  177. /// @param istream_ The stream from where to read
  178. /// @param scriptName_ The name of the stream. For error reporting
  179. /// @exception Exception
  180. void loadIstream(std::istream& istream_,
  181. const char* scriptName_ = "unamed-istream");
  182. /// Extracts all tokens and prints them. Used for debugging
  183. void getAllPrintAll();
  184. /// Get the next token from the stream. Its virtual and you can
  185. /// override it
  186. /// @return The next Token
  187. /// @exception Exception
  188. virtual const Token& getNextToken();
  189. /// Accessor for the current token
  190. /// @return The current Token
  191. const Token& getCrntToken() const
  192. {
  193. return crntToken;
  194. }
  195. /// Get the name of the input stream
  196. const char* getScriptName() const
  197. {
  198. return scriptName;
  199. }
  200. /// Get the current line the Scanner is processing
  201. int getLineNumber() const
  202. {
  203. return lineNmbr;
  204. }
  205. protected:
  206. /// Every char in the Ascii table is binded with one characteristic
  207. /// code type. This helps the scanning
  208. enum AsciiFlag
  209. {
  210. AC_ERROR = 0,
  211. AC_EOF = 1,
  212. AC_LETTER = 2,
  213. AC_DIGIT = 4,
  214. AC_SPECIAL = 8,
  215. AC_WHITESPACE = 16,
  216. AC_QUOTE = 32,
  217. AC_DOUBLEQUOTE = 64,
  218. AC_ACCEPTABLE_IN_COMMENTS = 128 ///< Only accepted in comments
  219. };
  220. /// Reserved words like "int" "const" etc. Currently the reserved words
  221. /// list is being populated with dummy data
  222. struct ResWord
  223. {
  224. const char* string;
  225. TokenCode code;
  226. };
  227. static char eofChar; ///< Special end of file character
  228. /// The array contains one AsciiFlag for every symbol of the ASCII table
  229. static AsciiFlag asciiLookupTable[];
  230. /// @name Reserved words
  231. /// Groups of ResWord grouped by the length of the ResWord::string
  232. /// @{
  233. static ResWord rw2[], rw3[], rw4[], rw5[], rw6[], rw7[];
  234. /// @}
  235. /// The array contains all the groups of ResWord
  236. static ResWord* rwTable[];
  237. Token crntToken; ///< The current token
  238. /// In contains the current line's text
  239. char line[MAX_SCRIPT_LINE_LEN];
  240. char* pchar; ///< Points somewhere to @ref line
  241. int lineNmbr; ///< The number of the current line
  242. /// Treat newlines as whitespace. If false means that the Scanner
  243. /// returns (among others) newline tokens
  244. bool newlinesAsWhitespace;
  245. /// Commented lines number
  246. /// Used to keep track of the newlines in multiline comments so we can
  247. /// then return the correct number of newlines
  248. /// in case of newlinesAsWhitespace is false
  249. int commentedLines;
  250. /// @name Input
  251. /// @{
  252. /// The file stream. Used if the @ref Scanner is initiated using
  253. /// @ref loadFile
  254. std::ifstream inFstream;
  255. /// Points to either @ref inFstream or an external std::istream
  256. std::istream* inStream;
  257. /// The name of the input stream. Mainly used for error messaging
  258. char scriptName[512];
  259. /// @}
  260. /// @name Checkers
  261. /// @{
  262. void checkWord();
  263. void checkComment();
  264. void checkNumber();
  265. void checkString();
  266. void checkChar();
  267. void checkSpecial();
  268. /// @}
  269. /// It reads a new line from the iostream and it points @ref pchar to
  270. /// the beginning of that line
  271. void getLine();
  272. /// Get the next char from the @ref line. If @ref line empty then get
  273. /// new line. It returns '\\0' if we are in the
  274. /// end of the line
  275. char getNextChar();
  276. /// Put the char that @ref getNextChar got back to the current line
  277. char putBackChar();
  278. /// Initializes the asciiLookupTable. It runs only once in the
  279. /// construction of the first Scanner @see Scanner()
  280. static void initAsciiMap();
  281. /// A function to save us from typing
  282. static AsciiFlag& lookupAscii(char c)
  283. {
  284. return asciiLookupTable[static_cast<int>(c)];
  285. }
  286. /// Common initialization code
  287. void init(bool newlinesAsWhitespace_);
  288. /// Unloads the file
  289. void unload();
  290. };
  291. }} // end namespaces
  292. #endif