CE Compiler.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. /******************************************************************************/
  2. #if EE_PRIVATE
  3. namespace Edit{
  4. /******************************************************************************/
  5. enum CMD_TYPE : Byte
  6. {
  7. CMD_NONE ,
  8. CMD_INSTRUCT , // raw instruction, sample: "int x=0;", "x+=15;", "obj.func(123);" - 'raw_range' specifies instruction range of tokens (excluding semicolon)
  9. CMD_GROUP , // group of commands, sample; "{ .. }" - 'raw_range' specifies range of { .. } excluding brackets, 'cmds' specifies all commands inside
  10. CMD_IF , // if(cond)true;else false; - 'cond_range' specifies range of tokens (excluding brackets), 'cmds' specifies all commands for true, 'cmds_false' specifies all commands for false
  11. CMD_FOR , // for(init; cond; step)cmds; - 'init_range' specifies range of tokens (excluding semicolon), 'cond_range' specifies range of tokens excluding semicolon, 'step_range' specifies range of tokens, 'cmds' specifies all commands inside loop
  12. CMD_WHILE , // while(cond)cmds; - 'cond_range' specifies range of tokens (excluding brackets), 'cmds' specifies all commands inside loop
  13. CMD_DO , // do cmds while(cond); - 'cmds' specifies all commands inside loop, 'cond_range' specifies range of tokens (excluding brackets)
  14. CMD_SWITCH , // switch(expr){cmds} - 'cmds' specifies all commands inside switch, 'raw_range' specifies range of tokens (excluding brackets)
  15. CMD_RETURN , // return expr; - 'raw_range' specifies range of tokens after return (excluding semicolon)
  16. CMD_BREAK , // break; - 'raw_range' specifies range of tokens after break (excluding semicolon)
  17. CMD_CONTINUE , // continue; - 'raw_range' specifies range of tokens after continue (excluding semicolon)
  18. CMD_GOTO , // goto label; - 'raw_range' specifies range of tokens after goto (excluding semicolon), 'label_index'=index of the label to jump to (-1 if unknown)
  19. CMD_GOTO_COND , // if( cond)goto label; - 'cond_range' specifies range of tokens (excluding brackets), 'label_index'=index of the label to jump to (-1 if unknown)
  20. CMD_GOTO_COND_N, // if(!cond)goto label; - 'cond_range' specifies range of tokens (excluding brackets), 'label_index'=index of the label to jump to (-1 if unknown)
  21. CMD_LABEL , // label: - 'raw_range' specifies range of tokens for label (excluding colon), 'label_index'=index of the label (-1 if unknown)
  22. };
  23. struct Command
  24. {
  25. struct For
  26. {
  27. VecI2 cond_range, init_range, step_range;
  28. void reset() {cond_range.set(0,-1); init_range.set(0,-1); step_range.set(0,-1);}
  29. };
  30. CMD_TYPE type;
  31. Memc<Command> cmds, cmds_false; // 'cmds_false' is used for "if(..).. else cmds_false;"
  32. union // all ranges may be invalid "max<min", ranges are described as "VecI2.x=min, VecI2.y=max"
  33. {
  34. VecI2 raw_range;
  35. For _for ;
  36. };
  37. Int label_index, // label index
  38. scope_label, // label index which specifies end of the block scope
  39. code_pos ; // byte code raw position of the start of the command
  40. Int startTokenIndex(); // return the token index of the start of the command
  41. Command() {type=CMD_NONE; raw_range.set(0,-1); _for.reset(); label_index=scope_label=-1; code_pos=0;}
  42. };
  43. /******************************************************************************/
  44. struct Message
  45. {
  46. enum TYPE : Byte
  47. {
  48. NONE ,
  49. WARNING,
  50. ERROR ,
  51. };
  52. TYPE type;
  53. Str text;
  54. Token *token;
  55. Source *source;
  56. Memc<Message> children;
  57. Message& set(C Str &text, TYPE type=NONE) {T.type=type; T.text=text; T.token= null ; T.source= null ; return T;}
  58. Message& set(C Str &text, Token *token , TYPE type=NONE) {T.type=type; T.text=text; T.token= token ; T.source=((token && token->line) ? token->line->source : null); return T;}
  59. Message& set(C Str &text, Source *source, Int token, TYPE type=NONE) {T.type=type; T.text=text; T.token=(source ? source->getToken(token) : null); T.source= source ; return T;}
  60. Message& set(C Str &text, Symbol *symbol, TYPE type=NONE) {return set(text, symbol ? symbol->source : null, symbol ? symbol->token_index : -1, type);}
  61. Message& error (C Str &text ) {return set(text, ERROR );}
  62. Message& error (C Str &text, Token *token ) {return set(text, token , ERROR );}
  63. Message& error (C Str &text, Source *source, Int token) {return set(text, source, token, ERROR );}
  64. Message& error (C Str &text, Symbol *symbol ) {return set(text, symbol, ERROR );}
  65. Message& warning(C Str &text ) {return set(text, WARNING);}
  66. Message& warning(C Str &text, Token *token ) {return set(text, token , WARNING);}
  67. Message& warning(C Str &text, Source *source, Int token) {return set(text, source, token, WARNING);}
  68. Message& warning(C Str &text, Symbol *symbol ) {return set(text, symbol, WARNING);}
  69. Message() {type=NONE; token=null; source=null;}
  70. };
  71. /******************************************************************************/
  72. struct CompilerContext
  73. {
  74. struct Constant
  75. {
  76. Int const_offset, // offset in the 'const_data'
  77. heap_offset, // offset in the 'heap'
  78. size; // size of the data
  79. };
  80. Bool store_known_global_var_on_heap;
  81. Memc<Constant> constants, var_constants;
  82. Memc<Byte > const_data;
  83. CodeEnvironment &env;
  84. Int heapConstant(CPtr data, Int size, Int align ); // create new constant on the heap and return its raw offset in the heap
  85. void varHeapConstant(Int heap_offset, CPtr data, Int size); // set constant in the specified position of the heap
  86. explicit CompilerContext(CodeEnvironment &env) : env(env) {store_known_global_var_on_heap=false;}
  87. };
  88. /******************************************************************************/
  89. enum COMPILE_RESULT
  90. {
  91. COMPILE_FAILED , // no expression was returned
  92. COMPILE_PARTIAL, // expression was returned, however there still remained others which weren't processed
  93. COMPILE_FULL , // expression was returned, all expressions were procesed and the returned is the only one that's left
  94. };
  95. struct Compiler // Function Compiler
  96. {
  97. Bool strict, // if strict compilation
  98. quiet, // if silence all compilation messages
  99. allow_cast, // if allow casting
  100. allow_func_lists; // if allow FUNC_LIST when calculating final
  101. Int final, // token index we want to calculate as final
  102. labels, // current number of labels in a function
  103. cmd_index, // current command index (increased during compilation)
  104. scope_label, // index of the label at which current scope ends (changed during compilation)
  105. stack_size, // current stack size (increased during compilation by local variables)
  106. recursive; // recursive function depth during compiling
  107. Source *source; // source file of the tokens to compile
  108. Memc<Local > locals; // all detected local variables (named and temporaries)
  109. Memc<Int > live_locals; // index of locals that are live and need to be destroyed
  110. Memc<Message> &msgs; // compilation messages
  111. Memc<Token* > &tokens; // source tokens
  112. CompilerContext *ctx;
  113. Compiler(Memc<Message> &msgs, Memc<Token*> &tokens, Source *source, CompilerContext *ctx) : msgs(msgs), tokens(tokens), source(source), ctx(ctx) {strict=true; quiet=false; allow_cast=true; allow_func_lists=false; final=-1; labels=0; cmd_index=0; scope_label=-1; stack_size=0; recursive=0; source=null;}
  114. Compiler& relax ( ) {strict=false; return T;}
  115. Compiler& setFinal(Int token_index, Bool allow_func_lists) {T.final=token_index; T.allow_func_lists=allow_func_lists; return T;}
  116. void expand (Memc<Command> &src, Memc<Command> &dest, Int label_break, Int label_continue, Int label_return, Int label_scope);
  117. void compile(Memc<Command> &cmds, Mems<Byte> &code, Symbol &func, Symbol::Modif *result_value);
  118. Int heapConstant(CPtr data, Int size, Int align ); // create new constant on the heap and return its raw offset in the heap
  119. void varHeapConstant(Int heap_offset, CPtr data, Int size); // set constant in the specified position of the heap
  120. Int newLocal (Symbol::Modif &type, Bool block_scope, Token *token); // create new local variable of the 'type' type and return its index in the 'locals' container, 'token'=token at which the variable gets created
  121. void mapLocalInStack(Expr::Memory &mem, Bool error_on_fail=true ); // map the local variable in fixed stack position
  122. void mapVar (Expr::Memory &mem ); // map the variable if it's a local
  123. Bool unmappedLocal (Expr::Memory &mem );
  124. COMPILE_RESULT compileExpr (Memc<Expr> &expr, Symbol *space, Expr &out); // compile series of expressions, 'space'=where are the expressions located, store the result in 'out'
  125. COMPILE_RESULT compileTokens(Int from, Int to, Expr &out, Memc<Token*> *tokens=null);
  126. Bool appendTokens(Memc<Expr> &expr, Int from, Int to, Memc<Token*> *tokens=null); // add tokens in the range of 'from..to' to the 'expr' container, false on fail
  127. };
  128. /******************************************************************************/
  129. //inline Bool ValueFitsInInstruction(Int size) {return size<=PtrSize && size<=SIZE(Ptr)*/;} // check pointer size of target configuration (so it can fit in target variables) and current configuration (so it can fit in temporary variables used in compilation process)
  130. inline Bool ValueFitsInInstruction(Int size) {return size<=MEMBER_SIZE(Call::Param, raw);} // currently U64 is always used for 'raw' so it can always fit that kind of data
  131. void ReadCommands(Source &source, Int &token_index, Symbol &set_parent, Memc<Command> &cmds, Memc<Message> &msgs); // recursive space assign + organize into commands
  132. void CompileCommands(Source &source, Memc<Command> &cmds, Memc<Message> &msgs);
  133. CChar8* FindFuncCall(Call::Func func);
  134. Call::Func FindFuncCall(C Str &func_name);
  135. Call::Func GetFuncCall(C Str &func_name);
  136. CPtr FindGlobalVar(C Str & var_name);
  137. Call::Func GetIgnoreCall ();
  138. Call::Func GetCallFunc (Bool has_this, Bool has_result);
  139. Call::Func GetIndirectionCall ();
  140. Call::Func GetAddressOfCall ();
  141. Call::Func GetOffsetCall (Symbol::Modif &offset);
  142. Call::Func GetAddPtrCall (Symbol::Modif &offset);
  143. Call::Func GetPtrAddCall (Symbol::Modif &offset);
  144. Call::Func GetPtrDiffCall ();
  145. Call::Func GetGotoCall ();
  146. Call::Func GetGotoCondCall (Symbol::Modif &cond);
  147. Call::Func GetGotoCondNCall (Symbol::Modif &cond);
  148. Call::Func GetSetConstCall (Int size, Bool heap);
  149. Call::Func GetSetAddrHeapCall ();
  150. Call::Func GetSetAddrStackCall ();
  151. Call::Func GetSetAddrResultCall();
  152. Str FuncName(Symbol &func );
  153. Str DtorName(Symbol &Class);
  154. Str NativeOperatorName(C Str &op, Symbol::Modif &result, Symbol::Modif &a ); // this function skips the 'const and '&' REF symbol for result and parameters
  155. Str NativeOperatorName(C Str &op, Symbol::Modif &result, Symbol::Modif &a, Symbol::Modif &b); // this function skips the 'const and '&' REF symbol for result and parameters
  156. /******************************************************************************/
  157. } // namespace
  158. /******************************************************************************/
  159. Int Compare(C Edit::Message &a, C Edit::Message &b);
  160. /******************************************************************************/
  161. #endif
  162. /******************************************************************************/