| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280 |
- /*
- -----------------------------------------------------------------------------
- This source file is part of OGRE
- (Object-oriented Graphics Rendering Engine)
- For the latest info, see http://www.ogre3d.org/
- Copyright (c) 2000-2011 Torus Knot Software Ltd
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- -----------------------------------------------------------------------------
- */
- #ifndef COMPILER2PASS_H
- #define COMPILER2PASS_H
- #include <vector>
- #ifdef _WIN32
- #define WIN32_LEAN_AND_MEAN
- #if !defined(NOMINMAX) && defined(_MSC_VER)
- # define NOMINMAX // required to stop windows.h messing up std::min
- #endif
- #include <windows.h>
- #endif
- // FIX ME - should not be hard coded
- #define BAD_TOKEN 999
- typedef unsigned int UINT32;
- /** Compiler2Pass is a generic compiler/assembler
- @remarks
- provides a tokenizer in pass 1 and relies on the subclass to provide the virtual method for pass 2
- PASS 1 - tokenize source: this is a simple brute force lexical scanner/analyzer that also parses
- the formed token for proper semantics and context in one pass
- it uses Look Ahead Left-Right (LALR) ruling based on Backus - Naur From notation for semantic
- checking and also performs context checking allowing for language dialects
- PASS 2 - generate application specific instructions ie native instructions
- @par
- this class must be subclassed with the subclass providing implementation for Pass 2. The subclass
- is responsible for setting up the token libraries along with defining the language syntax.
- */
- class Compiler2Pass {
- protected:
- // BNF operation types
- enum OperationType {otRULE, otAND, otOR, otOPTIONAL, otREPEAT, otEND};
- /** structure used to build rule paths
- */
- struct TokenRule {
- OperationType mOperation;
- UINT32 mTokenID;
- const char* mSymbol;
- UINT32 mErrorID;
- };
- /** structure used to build Symbol Type library */
- struct SymbolDef {
- UINT32 mID; // Token ID which is the index into the Token Type library
- UINT32 mPass2Data; // data used by pass 2 to build native instructions
- UINT32 mContextKey; // context key to fit the Active Context
- UINT32 mContextPatternSet; // new pattern to set for Active Context bits
- UINT32 mContextPatternClear;// Contexts bits to clear Active Context bits
- int mDefTextID; // index into text table for default name : set at runtime
- UINT32 mRuleID; // index into Rule database for non-terminal toke rulepath
- // if RuleID is zero the token is terminal
- };
- /** structure for Token instructions */
- struct TokenInst {
- UINT32 mNTTRuleID; // Non-Terminal Token Rule ID that generated Token
- UINT32 mID; // Token ID
- int mLine; // line number in source code where Token was found
- int mPos; // Character position in source where Token was found
- };
- typedef std::vector<TokenInst> TokenInstContainer;
- //typedef TokenInstContainer::iterator TokenInstIterator;
- /// container for Tokens extracted from source
- TokenInstContainer mTokenInstructions;
- /// pointer to the source to be compiled
- const char* mSource;
- int mEndOfSource;
- /// pointers to Text and Token Type libraries setup by subclass
- SymbolDef* mSymbolTypeLib;
- /// pointer to root rule path - has to be set by subclass constructor
- TokenRule* mRootRulePath;
- /// number of entries in Text and Token Type libraries
- int mRulePathLibCnt;
- int mSymbolTypeLibCnt;
- /// mVauleID needs to be initialized by the subclass before compiling occurs
- /// it defines the token ID used in the symbol type library
- UINT32 mValueID;
- /// storage container for constants defined in source
- std::vector<float> mConstants;
- /// Active Contexts pattern used in pass 1 to determine which tokens are valid for a certain context
- UINT32 mActiveContexts;
- /** check token semantics between ID1 and ID2 using left/right semantic data in Token Type library
- @param ID1 token ID on the left
- @param ID2 token ID on the right
- @return
- true if both will bind to each other
- false if either fails the semantic bind test
- */
- //bool checkTokenSemantics(UINT32 ID1, UINT32 ID2);
- /** perform pass 1 of compile process
- scans source for symbols that can be tokenized and then
- performs general semantic and context verification on each symbol before it is tokenized.
- A tokenized instruction list is built to be used by Pass 2.
- */
- bool doPass1();
- /** pure virtual method that must be set up by subclass to perform Pass 2 of compile process
- @remark
- Pass 2 is for the subclass to take the token instructions generated in Pass 1 and
- build the application specific instructions along with verifying
- symantic and context rules that could not be checked in Pass 1
- */
- virtual bool doPass2() = 0;
- void findEOL();
- /** get the text symbol for this token
- @remark
- mainly used for debugging and in test routines
- @param sid is the token ID
- @return a pointer to the string text
- */
- const char* getTypeDefText(const UINT32 sid);
- /** check to see if the text at the present position in the source is a numerical constant
- @param fvalue is a reference that will receive the float value that is in the source
- @param charsize reference to receive number of characters that make of the value in the source
- @return
- true if characters form a valid float representation
- false if a number value could not be extracted
- */
- bool isFloatValue(float & fvalue, int & charsize);
- /** check to see if the text is in the symbol text library
- @param symbol points to begining of text where a symbol token might exist
- @param symbolsize reference that will receive the size value of the symbol found
- @return
- true if a matching token could be found in the token type library
- false if could not be tokenized
- */
- bool isSymbol(const char* symbol, int & symbolsize);
- /// position to the next possible valid sysmbol
- bool positionToNextSymbol();
- /** process input source text using rulepath to determine allowed tokens
- @remarks
- the method is reentrant and recursive
- if a non-terminal token is encountered in the current rule path then the method is
- called using the new rule path referenced by the non-terminal token
- Tokens can have the following operation states which effects the flow path of the rule
- RULE: defines a rule path for the non-terminal token
- AND: the token is required for the rule to pass
- OR: if the previous tokens failed then try these ones
- OPTIONAL: the token is optional and does not cause the rule to fail if the token is not found
- REPEAT: the token is required but there can be more than one in a sequence
- END: end of the rule path - the method returns the succuss of the rule
- @param rulepathIDX index into to array of Token Rules that define a rule path to be processed
- @return
- true if rule passed - all required tokens found
- false if one or more tokens required to complete the rule were not found
- */
- bool processRulePath( UINT32 rulepathIDX);
- // setup ActiveContexts - should be called by subclass to setup initial language contexts
- void setActiveContexts(const UINT32 contexts){ mActiveContexts = contexts; }
- /// comment specifiers are hard coded
- void skipComments();
- /// find end of line marker and move past it
- void skipEOL();
- /// skip all the white space which includes spaces and tabs
- void skipWhiteSpace();
- /** check if current position in source has the symbol text equivalent to the TokenID
- @param rulepathIDX index into rule path database of token to validate
- @param activeRuleID index of non-terminal rule that generated the token
- @return
- true if token was found
- false if token symbol text does not match the source text
- if token is non-terminal then processRulePath is called
- */
- bool ValidateToken(const UINT32 rulepathIDX, const UINT32 activeRuleID);
- public:
- // ** these probably should not be public
- int mCurrentLine;
- int mCharPos;
- /// constructor
- Compiler2Pass();
- virtual ~Compiler2Pass() {}
- /** compile the source - performs 2 passes
- first pass is to tokinize, check semantics and context
- second pass is performed by subclass and converts tokens to application specific instructions
- @remark
- Pass 2 only gets executed if Pass 1 has no errors
- @param source a pointer to the source text to be compiled
- @return
- true if Pass 1 and Pass 2 are successfull
- false if any errors occur in Pass 1 or Pass 2
- */
- bool compile(const char* source);
- /** Initialize the type library with matching symbol text found in symbol text library
- find a default text for all Symbol Types in library
- scan through all the rules and initialize TypeLib with index to text and index to rules for non-terminal tokens
- must be called by subclass after libraries and rule database setup
- */
- void InitSymbolTypeLib();
- };
- #endif
|