Compiler2Pass.h 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. /*
  2. -----------------------------------------------------------------------------
  3. This source file is part of OGRE
  4. (Object-oriented Graphics Rendering Engine)
  5. For the latest info, see http://www.ogre3d.org/
  6. Copyright (c) 2000-2011 Torus Knot Software Ltd
  7. Permission is hereby granted, free of charge, to any person obtaining a copy
  8. of this software and associated documentation files (the "Software"), to deal
  9. in the Software without restriction, including without limitation the rights
  10. to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11. copies of the Software, and to permit persons to whom the Software is
  12. furnished to do so, subject to the following conditions:
  13. The above copyright notice and this permission notice shall be included in
  14. all copies or substantial portions of the Software.
  15. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  21. THE SOFTWARE.
  22. -----------------------------------------------------------------------------
  23. */
  24. #ifndef COMPILER2PASS_H
  25. #define COMPILER2PASS_H
  26. #include <vector>
  27. #ifdef _WIN32
  28. #define WIN32_LEAN_AND_MEAN
  29. #if !defined(NOMINMAX) && defined(_MSC_VER)
  30. # define NOMINMAX // required to stop windows.h messing up std::min
  31. #endif
  32. #include <windows.h>
  33. #endif
  34. // FIX ME - should not be hard coded
  35. #define BAD_TOKEN 999
  36. typedef unsigned int UINT32;
  37. /** Compiler2Pass is a generic compiler/assembler
  38. @remarks
  39. provides a tokenizer in pass 1 and relies on the subclass to provide the virtual method for pass 2
  40. PASS 1 - tokenize source: this is a simple brute force lexical scanner/analyzer that also parses
  41. the formed token for proper semantics and context in one pass
  42. it uses Look Ahead Left-Right (LALR) ruling based on Backus - Naur From notation for semantic
  43. checking and also performs context checking allowing for language dialects
  44. PASS 2 - generate application specific instructions ie native instructions
  45. @par
  46. this class must be subclassed with the subclass providing implementation for Pass 2. The subclass
  47. is responsible for setting up the token libraries along with defining the language syntax.
  48. */
  49. class Compiler2Pass {
  50. protected:
  51. // BNF operation types
  52. enum OperationType {otRULE, otAND, otOR, otOPTIONAL, otREPEAT, otEND};
  53. /** structure used to build rule paths
  54. */
  55. struct TokenRule {
  56. OperationType mOperation;
  57. UINT32 mTokenID;
  58. const char* mSymbol;
  59. UINT32 mErrorID;
  60. };
  61. /** structure used to build Symbol Type library */
  62. struct SymbolDef {
  63. UINT32 mID; // Token ID which is the index into the Token Type library
  64. UINT32 mPass2Data; // data used by pass 2 to build native instructions
  65. UINT32 mContextKey; // context key to fit the Active Context
  66. UINT32 mContextPatternSet; // new pattern to set for Active Context bits
  67. UINT32 mContextPatternClear;// Contexts bits to clear Active Context bits
  68. int mDefTextID; // index into text table for default name : set at runtime
  69. UINT32 mRuleID; // index into Rule database for non-terminal toke rulepath
  70. // if RuleID is zero the token is terminal
  71. };
  72. /** structure for Token instructions */
  73. struct TokenInst {
  74. UINT32 mNTTRuleID; // Non-Terminal Token Rule ID that generated Token
  75. UINT32 mID; // Token ID
  76. int mLine; // line number in source code where Token was found
  77. int mPos; // Character position in source where Token was found
  78. };
  79. typedef std::vector<TokenInst> TokenInstContainer;
  80. //typedef TokenInstContainer::iterator TokenInstIterator;
  81. /// container for Tokens extracted from source
  82. TokenInstContainer mTokenInstructions;
  83. /// pointer to the source to be compiled
  84. const char* mSource;
  85. int mEndOfSource;
  86. /// pointers to Text and Token Type libraries setup by subclass
  87. SymbolDef* mSymbolTypeLib;
  88. /// pointer to root rule path - has to be set by subclass constructor
  89. TokenRule* mRootRulePath;
  90. /// number of entries in Text and Token Type libraries
  91. int mRulePathLibCnt;
  92. int mSymbolTypeLibCnt;
  93. /// mVauleID needs to be initialized by the subclass before compiling occurs
  94. /// it defines the token ID used in the symbol type library
  95. UINT32 mValueID;
  96. /// storage container for constants defined in source
  97. std::vector<float> mConstants;
  98. /// Active Contexts pattern used in pass 1 to determine which tokens are valid for a certain context
  99. UINT32 mActiveContexts;
  100. /** check token semantics between ID1 and ID2 using left/right semantic data in Token Type library
  101. @param ID1 token ID on the left
  102. @param ID2 token ID on the right
  103. @return
  104. true if both will bind to each other
  105. false if either fails the semantic bind test
  106. */
  107. //bool checkTokenSemantics(UINT32 ID1, UINT32 ID2);
  108. /** perform pass 1 of compile process
  109. scans source for symbols that can be tokenized and then
  110. performs general semantic and context verification on each symbol before it is tokenized.
  111. A tokenized instruction list is built to be used by Pass 2.
  112. */
  113. bool doPass1();
  114. /** pure virtual method that must be set up by subclass to perform Pass 2 of compile process
  115. @remark
  116. Pass 2 is for the subclass to take the token instructions generated in Pass 1 and
  117. build the application specific instructions along with verifying
  118. symantic and context rules that could not be checked in Pass 1
  119. */
  120. virtual bool doPass2() = 0;
  121. void findEOL();
  122. /** get the text symbol for this token
  123. @remark
  124. mainly used for debugging and in test routines
  125. @param sid is the token ID
  126. @return a pointer to the string text
  127. */
  128. const char* getTypeDefText(const UINT32 sid);
  129. /** check to see if the text at the present position in the source is a numerical constant
  130. @param fvalue is a reference that will receive the float value that is in the source
  131. @param charsize reference to receive number of characters that make of the value in the source
  132. @return
  133. true if characters form a valid float representation
  134. false if a number value could not be extracted
  135. */
  136. bool isFloatValue(float & fvalue, int & charsize);
  137. /** check to see if the text is in the symbol text library
  138. @param symbol points to begining of text where a symbol token might exist
  139. @param symbolsize reference that will receive the size value of the symbol found
  140. @return
  141. true if a matching token could be found in the token type library
  142. false if could not be tokenized
  143. */
  144. bool isSymbol(const char* symbol, int & symbolsize);
  145. /// position to the next possible valid sysmbol
  146. bool positionToNextSymbol();
  147. /** process input source text using rulepath to determine allowed tokens
  148. @remarks
  149. the method is reentrant and recursive
  150. if a non-terminal token is encountered in the current rule path then the method is
  151. called using the new rule path referenced by the non-terminal token
  152. Tokens can have the following operation states which effects the flow path of the rule
  153. RULE: defines a rule path for the non-terminal token
  154. AND: the token is required for the rule to pass
  155. OR: if the previous tokens failed then try these ones
  156. OPTIONAL: the token is optional and does not cause the rule to fail if the token is not found
  157. REPEAT: the token is required but there can be more than one in a sequence
  158. END: end of the rule path - the method returns the succuss of the rule
  159. @param rulepathIDX index into to array of Token Rules that define a rule path to be processed
  160. @return
  161. true if rule passed - all required tokens found
  162. false if one or more tokens required to complete the rule were not found
  163. */
  164. bool processRulePath( UINT32 rulepathIDX);
  165. // setup ActiveContexts - should be called by subclass to setup initial language contexts
  166. void setActiveContexts(const UINT32 contexts){ mActiveContexts = contexts; }
  167. /// comment specifiers are hard coded
  168. void skipComments();
  169. /// find end of line marker and move past it
  170. void skipEOL();
  171. /// skip all the white space which includes spaces and tabs
  172. void skipWhiteSpace();
  173. /** check if current position in source has the symbol text equivalent to the TokenID
  174. @param rulepathIDX index into rule path database of token to validate
  175. @param activeRuleID index of non-terminal rule that generated the token
  176. @return
  177. true if token was found
  178. false if token symbol text does not match the source text
  179. if token is non-terminal then processRulePath is called
  180. */
  181. bool ValidateToken(const UINT32 rulepathIDX, const UINT32 activeRuleID);
  182. public:
  183. // ** these probably should not be public
  184. int mCurrentLine;
  185. int mCharPos;
  186. /// constructor
  187. Compiler2Pass();
  188. virtual ~Compiler2Pass() {}
  189. /** compile the source - performs 2 passes
  190. first pass is to tokinize, check semantics and context
  191. second pass is performed by subclass and converts tokens to application specific instructions
  192. @remark
  193. Pass 2 only gets executed if Pass 1 has no errors
  194. @param source a pointer to the source text to be compiled
  195. @return
  196. true if Pass 1 and Pass 2 are successfull
  197. false if any errors occur in Pass 1 or Pass 2
  198. */
  199. bool compile(const char* source);
  200. /** Initialize the type library with matching symbol text found in symbol text library
  201. find a default text for all Symbol Types in library
  202. scan through all the rules and initialize TypeLib with index to text and index to rules for non-terminal tokens
  203. must be called by subclass after libraries and rule database setup
  204. */
  205. void InitSymbolTypeLib();
  206. };
  207. #endif