toker.cpp.bak 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. #include "std.h"
  2. #include "toker.h"
  3. struct Stricmp{
  4. bool operator()( const char *x,const char *y )const{
  5. while( tolower(*x)==tolower(*y) && *x ){++x;++y;}
  6. return tolower(*x)-tolower(*y)<0;
  7. }
  8. };
  9. typedef map<const char*,int,Stricmp> TokeMap;
  10. static TokeMap _tokes;
  11. static void initTokes(){
  12. if( _tokes.size() ) return;
  13. _tokes["Strict"]=T_STRICT;
  14. _tokes["SuperStrict"]=T_SUPERSTRICT;
  15. _tokes["Module"]=T_MODULE;
  16. _tokes["Framework"]=T_FRAMEWORK;
  17. _tokes["Import"]=T_IMPORT;
  18. _tokes["ModuleInfo"]=T_MODULEINFO;
  19. _tokes["DefData"]=T_DEFDATA;
  20. _tokes["ReadData"]=T_READDATA;
  21. _tokes["RestoreData"]=T_RESTOREDATA;
  22. _tokes["Rem"]=T_REM;
  23. _tokes["EndRem"]=T_ENDREM;
  24. _tokes["Try"]=T_TRY;
  25. _tokes["Catch"]=T_CATCH;
  26. _tokes["EndTry"]=T_ENDTRY;
  27. _tokes["Throw"]=T_THROW;
  28. _tokes["Goto"]=T_GOTO;
  29. _tokes["True"]=T_TRUE;
  30. _tokes["False"]=T_FALSE;
  31. _tokes["Pi"]=T_PI;
  32. _tokes["Byte"]=T_BYTE;
  33. _tokes["Short"]=T_SHORT;
  34. _tokes["Int"]=T_INT;
  35. _tokes["Long"]=T_LONG;
  36. _tokes["Float"]=T_FLOAT;
  37. _tokes["Double"]=T_DOUBLE;
  38. _tokes["Object"]=T_OBJECT;
  39. _tokes["String"]=T_STRING;
  40. _tokes["Var"]=T_VAR;
  41. _tokes["Ptr"]=T_PTR;
  42. _tokes["VarPtr"]=T_VARPTR;
  43. _tokes["Chr"]=T_CHR;
  44. _tokes["Len"]=T_LEN;
  45. _tokes["Asc"]=T_ASC;
  46. _tokes["SizeOf"]=T_SIZEOF;
  47. _tokes["Sgn"]=T_SGN;
  48. _tokes["Abs"]=T_ABS;
  49. _tokes["Min"]=T_MIN;
  50. _tokes["Max"]=T_MAX;
  51. _tokes["Mod"]=T_MOD;
  52. _tokes["Shl"]=T_SHL;
  53. _tokes["Shr"]=T_SHR;
  54. _tokes["Sar"]=T_SAR;
  55. _tokes["Not"]=T_NOT;
  56. _tokes["And"]=T_AND;
  57. _tokes["Or"]=T_OR;
  58. _tokes["Return"]=T_RETURN;
  59. _tokes["Local"]=T_LOCAL;
  60. _tokes["Global"]=T_GLOBAL;
  61. _tokes["Const"]=T_CONST;
  62. _tokes["Field"]=T_FIELD;
  63. _tokes["Alias"]=T_ALIAS;
  64. _tokes["End"]=T_END;
  65. _tokes["Type"]=T_TYPE;
  66. _tokes["EndType"]=T_ENDTYPE;
  67. _tokes["Extends"]=T_EXTENDS;
  68. _tokes["Method"]=T_METHOD;
  69. _tokes["EndMethod"]=T_ENDMETHOD;
  70. _tokes["Abstract"]=T_ABSTRACT;
  71. _tokes["Final"]=T_FINAL;
  72. _tokes["Function"]=T_FUNCTION;
  73. _tokes["EndFunction"]=T_ENDFUNCTION;
  74. _tokes["New"]=T_NEW;
  75. _tokes["Release"]=T_RELEASE;
  76. _tokes["Delete"]=T_DELETE;
  77. _tokes["Null"]=T_NULL;
  78. _tokes["Self"]=T_SELF;
  79. _tokes["Super"]=T_SUPER;
  80. _tokes["Incbin"]=T_INCBIN;
  81. _tokes["IncbinPtr"]=T_INCBINPTR;
  82. _tokes["IncbinLen"]=T_INCBINLEN;
  83. _tokes["Include"]=T_INCLUDE;
  84. _tokes["Extern"]=T_EXTERN;
  85. _tokes["EndExtern"]=T_ENDEXTERN;
  86. _tokes["Public"]=T_PUBLIC;
  87. _tokes["Private"]=T_PRIVATE;
  88. _tokes["If"]=T_IF;
  89. _tokes["Then"]=T_THEN;
  90. _tokes["Else"]=T_ELSE;
  91. _tokes["ElseIf"]=T_ELSEIF;
  92. _tokes["EndIf"]=T_ENDIF;
  93. _tokes["For"]=T_FOR;
  94. _tokes["To"]=T_TO;
  95. _tokes["Step"]=T_STEP;
  96. _tokes["Next"]=T_NEXT;
  97. _tokes["EachIn"]=T_EACHIN;
  98. _tokes["While"]=T_WHILE;
  99. _tokes["EndWhile"]=T_WEND;
  100. _tokes["Wend"]=T_WEND;
  101. _tokes["Repeat"]=T_REPEAT;
  102. _tokes["Until"]=T_UNTIL;
  103. _tokes["Forever"]=T_FOREVER;
  104. _tokes["Select"]=T_SELECT;
  105. _tokes["Case"]=T_CASE;
  106. _tokes["Default"]=T__DEFAULT;
  107. _tokes["EndSelect"]=T_ENDSELECT;
  108. _tokes["Continue"]=T_CONTINUE;
  109. _tokes["Exit"]=T_EXIT;
  110. _tokes["Assert"]=T_ASSERT;
  111. _tokes["NoDebug"]=T_NODEBUG;
  112. }
  113. static Toke nextToke( const vector<char> &line,int &p ){
  114. int b=p;
  115. int c=line[p++];
  116. int cur=c;
  117. if( isalpha(c) || c=='_' ){
  118. while( isalnum( c=line[p] ) || c=='_' ) ++p;
  119. cur=T_IDENT;
  120. string t( &line[b],p-b );
  121. TokeMap::iterator it=_tokes.find(t.c_str());
  122. if( it!=_tokes.end() ){
  123. cur=it->second;
  124. if( cur==T_END && line[p]==' ' && isalpha(line[p+1]) ){
  125. int st=p+1,en=p+2;
  126. while( isalpha(line[en]) ) ++en;
  127. string t="end"+string( &line[st],en-st );
  128. it=_tokes.find(t.c_str());
  129. if( it!=_tokes.end() ){
  130. cur=it->second;
  131. p=en;
  132. }
  133. }
  134. }
  135. }else if( isdigit(c) || (c=='.' && isdigit(line[p])) ){
  136. cur=T_INTCONST;
  137. if( c=='.' ){
  138. ++p;cur=T_FLOATCONST;
  139. }
  140. while( isdigit(line[p]) ) ++p;
  141. if( cur==T_INTCONST && line[p]=='.' && isdigit(line[p+1]) ){
  142. p+=2;cur=T_FLOATCONST;
  143. while( isdigit(line[p]) ) ++p;
  144. }
  145. if( tolower(line[p])=='e' && (line[p+1]=='+'||line[p+1]=='-'||isdigit(line[p+1])) ){
  146. ++p;cur=T_FLOATCONST;
  147. if( !isdigit(line[p]) ) ++p;
  148. while( isdigit(line[p]) ) ++p;
  149. }
  150. }else if( c=='$' && isxdigit(line[p]) ){
  151. ++p;cur=T_INTCONST;
  152. while( isxdigit(line[p]) ) ++p;
  153. }else if( c=='%' && (line[p]=='0'||line[p]=='1') ){
  154. ++p;cur=T_INTCONST;
  155. while( line[p]=='0' || line[p]=='1' ) ++p;
  156. }else if( c=='$' && tolower(line[p])=='z' ){
  157. ++p;cur=T_CSTRING;
  158. }else if( c=='$' && tolower(line[p])=='w' ){
  159. ++p;cur=T_WSTRING;
  160. }else if( c=='\"' ){ //string const
  161. cur=T_STRINGCONST;
  162. while( line[p]!='\"' && line[p]!='\n' ) ++p;
  163. if( line[p++]!='\"' ) cur=T_BADSTRINGCONST;
  164. }else if( c=='<' ){ //comparison
  165. switch( line[p++] ){
  166. case '=':cur=T_LE;break;
  167. case '>':cur=T_NE;break;
  168. default:cur=T_LT;--p;
  169. }
  170. }else if( c=='=' ){ //comparison
  171. switch( line[p++] ){
  172. case '>':cur=T_GE;break;
  173. case '<':cur=T_LE;break;
  174. default:cur=T_EQ;--p;
  175. }
  176. }else if( c=='>' ){ //comparison
  177. switch( line[p++] ){
  178. case '=':cur=T_GE;break;
  179. case '<':cur=T_NE;break;
  180. default:cur=T_GT;--p;
  181. }
  182. }else if( c==':' ){
  183. Toke t=nextToke( line,p );
  184. switch( t.toke ){
  185. case '+':cur=T_ADDASSIGN;break;
  186. case '-':cur=T_SUBASSIGN;break;
  187. case '*':cur=T_MULASSIGN;break;
  188. case '/':cur=T_DIVASSIGN;break;
  189. case '|':cur=T_ORASSIGN;break;
  190. case '&':cur=T_ANDASSIGN;break;
  191. case '~':cur=T_XORASSIGN;break;
  192. case T_MOD:cur=T_MODASSIGN;break;
  193. case T_SHL:cur=T_SHLASSIGN;break;
  194. case T_SHR:cur=T_SHRASSIGN;break;
  195. case T_SAR:cur=T_SARASSIGN;break;
  196. default:p=b+1;
  197. }
  198. }else if( c=='.' && line[p]=='.' ){
  199. ++p;cur=T_DOTDOT;
  200. }else if( c=='[' ){ //allow spaces in [,] type tokes
  201. while( line[p]==' ' || line[p]==',' ) ++p;
  202. if( line[p]==']' ){
  203. ++p;cur=T_ARRAYDECL;
  204. }else{
  205. p=b+1;
  206. }
  207. }
  208. return Toke( cur,b,p );
  209. }
  210. Toker::Toker( string f ):fh(0),toke_index(0),line_num(0),file_name(f){
  211. initTokes();
  212. fh=fopen( file_name.c_str(),"rb" );
  213. if( !fh ) fail( "Unable to open file '%s'",file_name.c_str() );
  214. encoding=UNK;
  215. next();
  216. }
  217. void Toker::close(){
  218. if( fh ){
  219. fclose( fh );
  220. fh=0;
  221. }
  222. }
  223. string Toker::sourceFile(){
  224. return file_name;
  225. }
  226. string Toker::sourceInfo(){
  227. return file_name+";"+fromint(line_num)+";"+fromint(curr_toke.begin+1);
  228. }
  229. int Toker::curr(){
  230. return curr_toke.toke;
  231. }
  232. string Toker::text(){
  233. return string( &line[curr_toke.begin],curr_toke.end-curr_toke.begin );
  234. }
  235. bstring Toker::wtext(){
  236. return bstring( &wline[curr_toke.begin],curr_toke.end-curr_toke.begin );
  237. }
  238. int Toker::peek( int n ){
  239. assert( toke_index+n<tokes.size() );
  240. return tokes[toke_index+n].toke;
  241. }
  242. int Toker::tgetc(){
  243. int c=fgetc(fh),d,e;
  244. if( c==EOF ) return c;
  245. switch( encoding ){
  246. case UNK:
  247. d=fgetc(fh);
  248. if( c==0xfe && d==0xff ){
  249. encoding=UTF16BE;
  250. }else if( c==0xff && d==0xfe ){
  251. encoding=UTF16LE;
  252. }else if( c==0xef && d==0xbb ){
  253. e=fgetc(fh);
  254. if( e==0xbf ){
  255. encoding=UTF8;
  256. }else{
  257. ungetc( e,fh );
  258. }
  259. }
  260. if( encoding==UNK ){
  261. encoding=LATIN1;
  262. ungetc( d,fh );
  263. ungetc( c,fh );
  264. }
  265. return tgetc();
  266. case LATIN1:
  267. return c;
  268. case UTF8:
  269. if( c<128 ){
  270. return c;
  271. }
  272. d=fgetc(fh);
  273. if( c<224 ){
  274. return (c-192)*64+(d-128);
  275. }
  276. e=fgetc(fh);
  277. if( c<240 ){
  278. return (c-224)*4096+(d-128)*64+(e-128);
  279. }
  280. return 0;
  281. case UTF16BE:
  282. return ((c&0xff)<<8)|(fgetc(fh)&0xff);
  283. case UTF16LE:
  284. return ((fgetc(fh)&0xff)<<8)|(c&0xff);
  285. }
  286. cout<<"Here!"<<endl;
  287. return ' ';
  288. }
  289. void Toker::nextLine(){
  290. ++line_num;
  291. line.clear();
  292. wline.clear();
  293. tokes.clear();
  294. if( !fh ){
  295. tokes.push_back( Toke(EOF,0,0) );
  296. return;
  297. }
  298. for(;;){
  299. int c=tgetc();
  300. if( c=='\n' || c==EOF ){
  301. if( c==EOF ) close();
  302. line.push_back( '\n' );
  303. wline.push_back( '\n' );
  304. break;
  305. }
  306. line.push_back( (c>32 && c<127) ? c : ' ' );
  307. wline.push_back(c);
  308. }
  309. int p=0;
  310. for(;;){
  311. int c=line[p];
  312. if( c=='\'' || c=='\n' ){
  313. if( tokes.size() && tokes.back().toke==T_DOTDOT ){
  314. tokes.pop_back();
  315. break;
  316. }
  317. tokes.push_back( Toke('\n',p,line.size()) );
  318. break;
  319. }else if( isgraph(c) ){
  320. tokes.push_back( nextToke(line,p) );
  321. }else{
  322. ++p;
  323. }
  324. }
  325. }
  326. int Toker::next(){
  327. if( curr()==EOF ) return EOF;
  328. while( toke_index==tokes.size() ){
  329. nextLine();
  330. toke_index=0;
  331. for(;;){
  332. if( !tokes.size() ){
  333. nextLine();
  334. }else if( tokes[0].toke=='?' ){
  335. ++toke_index;
  336. bool cc=true,cNot=false;
  337. if( toke_index<tokes.size() && tokes[toke_index].toke==T_NOT ){
  338. ++toke_index;
  339. cNot=true;
  340. }
  341. if( toke_index<tokes.size() && tokes[toke_index].toke==T_IDENT ){
  342. string id=string( &line[tokes[toke_index].begin],tokes[toke_index].end-tokes[toke_index].begin );
  343. ++toke_index;
  344. cc=env_config.count( tolower(id) );
  345. }
  346. if( cNot ) cc=!cc;
  347. if( cc ) break;
  348. do{
  349. nextLine();
  350. }while( tokes[0].toke!=EOF && tokes[0].toke!='?' );
  351. toke_index=0;
  352. }else if( tokes[0].toke==T_REM ){
  353. do{
  354. nextLine();
  355. }while( tokes[0].toke!=EOF && tokes[0].toke!=T_ENDREM );
  356. if( tokes[0].toke==EOF ) break;
  357. nextLine();
  358. }else{
  359. break;
  360. }
  361. }
  362. }
  363. curr_toke=tokes[toke_index++];
  364. return curr();
  365. }
  366. string Toker::toString( int n ){
  367. switch( n ){
  368. case '\n':return "end-of-line";
  369. case EOF:return "end-of-file";
  370. case T_LT:return "'<'";
  371. case T_GT:return "'>'";
  372. case T_LE:return "'<='";
  373. case T_GE:return "'>='";
  374. case T_EQ:return "'='";
  375. case T_NE:return "'<>'";
  376. case T_DOTDOT:return "'..'";
  377. case T_IDENT:return "identifier";
  378. case T_INTCONST:return "integer literal";
  379. case T_FLOATCONST:return "floating point literal";
  380. case T_STRINGCONST:return "string literal";
  381. case T_CSTRING:return "cstring tag";
  382. case T_WSTRING:return "wstring tag";
  383. case T_ARRAYDECL:return "array declaration";
  384. case T_BADSTRINGCONST:return "malformed string literal";
  385. case T_ADDASSIGN:return "add assign";
  386. case T_SUBASSIGN:return "subtract assign";
  387. case T_MULASSIGN:return "multiply assign";
  388. case T_DIVASSIGN:return "divide assign";
  389. case T_MODASSIGN:return "remainder assign";
  390. case T_ORASSIGN:return "or assign";
  391. case T_ANDASSIGN:return "and assign";
  392. case T_XORASSIGN:return "exclusive or assign";
  393. case T_SHLASSIGN:return "shift left assign";
  394. case T_SHRASSIGN:return "shift right assign";
  395. case T_SARASSIGN:return "Shift arithmetic right assign";
  396. }
  397. TokeMap::iterator it;
  398. for( it=_tokes.begin();it!=_tokes.end();++it ){
  399. if( n==it->second ) return it->first;
  400. }
  401. if( isgraph(n) ){
  402. char c=n;
  403. return "'"+string(&c,1)+"'";
  404. }
  405. char buf[8];
  406. sprintf( buf,"%i",n );
  407. return "<chr:"+string(buf)+">";
  408. }