tokenstream.cpp 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. // ======================================================================== //
  2. // Copyright 2009-2017 Intel Corporation //
  3. // //
  4. // Licensed under the Apache License, Version 2.0 (the "License"); //
  5. // you may not use this file except in compliance with the License. //
  6. // You may obtain a copy of the License at //
  7. // //
  8. // http://www.apache.org/licenses/LICENSE-2.0 //
  9. // //
  10. // Unless required by applicable law or agreed to in writing, software //
  11. // distributed under the License is distributed on an "AS IS" BASIS, //
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
  13. // See the License for the specific language governing permissions and //
  14. // limitations under the License. //
  15. // ======================================================================== //
  16. #include "tokenstream.h"
  17. #include "../math/math.h"
  18. namespace embree
  19. {
  20. /* shorthands for common sets of characters */
  21. const std::string TokenStream::alpha = "abcdefghijklmnopqrstuvwxyz";
  22. const std::string TokenStream::ALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
  23. const std::string TokenStream::numbers = "0123456789";
  24. const std::string TokenStream::separators = "\n\t\r ";
  25. /* creates map for fast categorization of characters */
  26. static void createCharMap(bool map[256], const std::string& chrs) {
  27. for (size_t i=0; i<256; i++) map[i] = false;
  28. for (size_t i=0; i<chrs.size(); i++) map[uint8_t(chrs[i])] = true;
  29. }
  30. /* build full tokenizer that takes list of valid characters and keywords */
  31. TokenStream::TokenStream(const Ref<Stream<int> >& cin, //< stream to read from
  32. const std::string& alpha, //< valid characters for identifiers
  33. const std::string& seps, //< characters that act as separators
  34. const std::vector<std::string>& symbols) //< symbols
  35. : cin(cin), symbols(symbols)
  36. {
  37. createCharMap(isAlphaMap,alpha);
  38. createCharMap(isSepMap,seps);
  39. }
  40. bool TokenStream::decDigits(std::string& str_o)
  41. {
  42. bool ok = false;
  43. std::string str;
  44. if (cin->peek() == '+' || cin->peek() == '-') str += (char)cin->get();
  45. while (isDigit(cin->peek())) { ok = true; str += (char)cin->get(); }
  46. if (ok) str_o += str;
  47. else cin->unget(str.size());
  48. return ok;
  49. }
  50. bool TokenStream::decDigits1(std::string& str_o)
  51. {
  52. bool ok = false;
  53. std::string str;
  54. while (isDigit(cin->peek())) { ok = true; str += (char)cin->get(); }
  55. if (ok) str_o += str; else cin->unget(str.size());
  56. return ok;
  57. }
  58. bool TokenStream::trySymbol(const std::string& symbol)
  59. {
  60. size_t pos = 0;
  61. while (pos < symbol.size()) {
  62. if (symbol[pos] != cin->peek()) { cin->unget(pos); return false; }
  63. cin->drop(); pos++;
  64. }
  65. return true;
  66. }
  67. bool TokenStream::trySymbols(Token& token, const ParseLocation& loc)
  68. {
  69. for (size_t i=0; i<symbols.size(); i++) {
  70. if (!trySymbol(symbols[i])) continue;
  71. token = Token(symbols[i],Token::TY_SYMBOL,loc);
  72. return true;
  73. }
  74. return false;
  75. }
  76. bool TokenStream::tryFloat(Token& token, const ParseLocation& loc)
  77. {
  78. bool ok = false;
  79. std::string str;
  80. if (trySymbol("nan")) {
  81. token = Token(float(nan));
  82. return true;
  83. }
  84. if (trySymbol("+inf")) {
  85. token = Token(float(pos_inf));
  86. return true;
  87. }
  88. if (trySymbol("-inf")) {
  89. token = Token(float(neg_inf));
  90. return true;
  91. }
  92. if (decDigits(str))
  93. {
  94. if (cin->peek() == '.') {
  95. str += (char)cin->get();
  96. decDigits(str);
  97. if (cin->peek() == 'e' || cin->peek() == 'E') {
  98. str += (char)cin->get();
  99. if (decDigits(str)) ok = true; // 1.[2]E2
  100. }
  101. else ok = true; // 1.[2]
  102. }
  103. else if (cin->peek() == 'e' || cin->peek() == 'E') {
  104. str += (char)cin->get();
  105. if (decDigits(str)) ok = true; // 1E2
  106. }
  107. }
  108. else
  109. {
  110. if (cin->peek() == '.') {
  111. str += (char)cin->get();
  112. if (decDigits(str)) {
  113. if (cin->peek() == 'e' || cin->peek() == 'E') {
  114. str += (char)cin->get();
  115. if (decDigits(str)) ok = true; // .3E2
  116. }
  117. else ok = true; // .3
  118. }
  119. }
  120. }
  121. if (ok) {
  122. token = Token((float)atof(str.c_str()),loc);
  123. }
  124. else cin->unget(str.size());
  125. return ok;
  126. }
  127. bool TokenStream::tryInt(Token& token, const ParseLocation& loc) {
  128. std::string str;
  129. if (decDigits(str)) {
  130. token = Token(atoi(str.c_str()),loc);
  131. return true;
  132. }
  133. return false;
  134. }
  135. bool TokenStream::tryString(Token& token, const ParseLocation& loc)
  136. {
  137. std::string str;
  138. if (cin->peek() != '\"') return false;
  139. cin->drop();
  140. while (cin->peek() != '\"') str += (char)cin->get();
  141. cin->drop();
  142. token = Token(str,Token::TY_STRING,loc);
  143. return true;
  144. }
  145. bool TokenStream::tryIdentifier(Token& token, const ParseLocation& loc)
  146. {
  147. std::string str;
  148. if (!isAlpha(cin->peek())) return false;
  149. str += (char)cin->get();
  150. while (isAlphaNum(cin->peek())) str += (char)cin->get();
  151. token = Token(str,Token::TY_IDENTIFIER,loc);
  152. return true;
  153. }
  154. void TokenStream::skipSeparators()
  155. {
  156. /* skip separators */
  157. while (cin->peek() != EOF && isSeparator(cin->peek()))
  158. cin->drop();
  159. }
  160. Token TokenStream::next()
  161. {
  162. Token token;
  163. skipSeparators();
  164. ParseLocation loc = cin->loc();
  165. if (trySymbols (token,loc)) return token; /**< try to parse a symbol */
  166. if (tryFloat (token,loc)) return token; /**< try to parse float */
  167. if (tryInt (token,loc)) return token; /**< try to parse integer */
  168. if (tryString (token,loc)) return token; /**< try to parse string */
  169. if (tryIdentifier(token,loc)) return token; /**< try to parse identifier */
  170. if (cin->peek() == EOF ) return Token(loc); /**< return EOF token */
  171. return Token((char)cin->get(),loc); /**< return invalid character token */
  172. }
  173. }