/* ----------------------------------------------------------------------------- This source file is part of OGRE (Object-oriented Graphics Rendering Engine) For the latest info, see http://www.ogre3d.org/ Copyright (c) 2000-2011 Torus Knot Software Ltd Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ----------------------------------------------------------------------------- */ #ifndef COMPILER2PASS_H #define COMPILER2PASS_H #include #ifdef _WIN32 #define WIN32_LEAN_AND_MEAN #if !defined(NOMINMAX) && defined(_MSC_VER) # define NOMINMAX // required to stop windows.h messing up std::min #endif #include #endif // FIX ME - should not be hard coded #define BAD_TOKEN 999 typedef unsigned int UINT32; /** Compiler2Pass is a generic compiler/assembler @remarks provides a tokenizer in pass 1 and relies on the subclass to provide the virtual method for pass 2 PASS 1 - tokenize source: this is a simple brute force lexical scanner/analyzer that also parses the formed token for proper semantics and context in one pass it uses Look Ahead Left-Right (LALR) ruling based on Backus - Naur From notation for semantic checking and also performs context checking allowing for language dialects PASS 2 - generate application specific instructions ie native instructions @par this class must be subclassed with the subclass providing implementation for Pass 2. The subclass is responsible for setting up the token libraries along with defining the language syntax. */ class Compiler2Pass { protected: // BNF operation types enum OperationType {otRULE, otAND, otOR, otOPTIONAL, otREPEAT, otEND}; /** structure used to build rule paths */ struct TokenRule { OperationType mOperation; UINT32 mTokenID; const char* mSymbol; UINT32 mErrorID; }; /** structure used to build Symbol Type library */ struct SymbolDef { UINT32 mID; // Token ID which is the index into the Token Type library UINT32 mPass2Data; // data used by pass 2 to build native instructions UINT32 mContextKey; // context key to fit the Active Context UINT32 mContextPatternSet; // new pattern to set for Active Context bits UINT32 mContextPatternClear;// Contexts bits to clear Active Context bits int mDefTextID; // index into text table for default name : set at runtime UINT32 mRuleID; // index into Rule database for non-terminal toke rulepath // if RuleID is zero the token is terminal }; /** structure for Token instructions */ struct TokenInst { UINT32 mNTTRuleID; // Non-Terminal Token Rule ID that generated Token UINT32 mID; // Token ID int mLine; // line number in source code where Token was found int mPos; // Character position in source where Token was found }; typedef std::vector TokenInstContainer; //typedef TokenInstContainer::iterator TokenInstIterator; /// container for Tokens extracted from source TokenInstContainer mTokenInstructions; /// pointer to the source to be compiled const char* mSource; int mEndOfSource; /// pointers to Text and Token Type libraries setup by subclass SymbolDef* mSymbolTypeLib; /// pointer to root rule path - has to be set by subclass constructor TokenRule* mRootRulePath; /// number of entries in Text and Token Type libraries int mRulePathLibCnt; int mSymbolTypeLibCnt; /// mVauleID needs to be initialized by the subclass before compiling occurs /// it defines the token ID used in the symbol type library UINT32 mValueID; /// storage container for constants defined in source std::vector mConstants; /// Active Contexts pattern used in pass 1 to determine which tokens are valid for a certain context UINT32 mActiveContexts; /** check token semantics between ID1 and ID2 using left/right semantic data in Token Type library @param ID1 token ID on the left @param ID2 token ID on the right @return true if both will bind to each other false if either fails the semantic bind test */ //bool checkTokenSemantics(UINT32 ID1, UINT32 ID2); /** perform pass 1 of compile process scans source for symbols that can be tokenized and then performs general semantic and context verification on each symbol before it is tokenized. A tokenized instruction list is built to be used by Pass 2. */ bool doPass1(); /** pure virtual method that must be set up by subclass to perform Pass 2 of compile process @remark Pass 2 is for the subclass to take the token instructions generated in Pass 1 and build the application specific instructions along with verifying symantic and context rules that could not be checked in Pass 1 */ virtual bool doPass2() = 0; void findEOL(); /** get the text symbol for this token @remark mainly used for debugging and in test routines @param sid is the token ID @return a pointer to the string text */ const char* getTypeDefText(const UINT32 sid); /** check to see if the text at the present position in the source is a numerical constant @param fvalue is a reference that will receive the float value that is in the source @param charsize reference to receive number of characters that make of the value in the source @return true if characters form a valid float representation false if a number value could not be extracted */ bool isFloatValue(float & fvalue, int & charsize); /** check to see if the text is in the symbol text library @param symbol points to begining of text where a symbol token might exist @param symbolsize reference that will receive the size value of the symbol found @return true if a matching token could be found in the token type library false if could not be tokenized */ bool isSymbol(const char* symbol, int & symbolsize); /// position to the next possible valid sysmbol bool positionToNextSymbol(); /** process input source text using rulepath to determine allowed tokens @remarks the method is reentrant and recursive if a non-terminal token is encountered in the current rule path then the method is called using the new rule path referenced by the non-terminal token Tokens can have the following operation states which effects the flow path of the rule RULE: defines a rule path for the non-terminal token AND: the token is required for the rule to pass OR: if the previous tokens failed then try these ones OPTIONAL: the token is optional and does not cause the rule to fail if the token is not found REPEAT: the token is required but there can be more than one in a sequence END: end of the rule path - the method returns the succuss of the rule @param rulepathIDX index into to array of Token Rules that define a rule path to be processed @return true if rule passed - all required tokens found false if one or more tokens required to complete the rule were not found */ bool processRulePath( UINT32 rulepathIDX); // setup ActiveContexts - should be called by subclass to setup initial language contexts void setActiveContexts(const UINT32 contexts){ mActiveContexts = contexts; } /// comment specifiers are hard coded void skipComments(); /// find end of line marker and move past it void skipEOL(); /// skip all the white space which includes spaces and tabs void skipWhiteSpace(); /** check if current position in source has the symbol text equivalent to the TokenID @param rulepathIDX index into rule path database of token to validate @param activeRuleID index of non-terminal rule that generated the token @return true if token was found false if token symbol text does not match the source text if token is non-terminal then processRulePath is called */ bool ValidateToken(const UINT32 rulepathIDX, const UINT32 activeRuleID); public: // ** these probably should not be public int mCurrentLine; int mCharPos; /// constructor Compiler2Pass(); virtual ~Compiler2Pass() {} /** compile the source - performs 2 passes first pass is to tokinize, check semantics and context second pass is performed by subclass and converts tokens to application specific instructions @remark Pass 2 only gets executed if Pass 1 has no errors @param source a pointer to the source text to be compiled @return true if Pass 1 and Pass 2 are successfull false if any errors occur in Pass 1 or Pass 2 */ bool compile(const char* source); /** Initialize the type library with matching symbol text found in symbol text library find a default text for all Symbol Types in library scan through all the rules and initialize TypeLib with index to text and index to rules for non-terminal tokens must be called by subclass after libraries and rule database setup */ void InitSymbolTypeLib(); }; #endif